diff --git a/.circleci/config.yml b/.circleci/config.yml index b5636fa6c..7157949b7 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,6 +38,11 @@ jobs: name: Run the python tests command: | python3 -m unittest discover python/tests + # TODO: Reenable when extension api becomes stable + # - run: + # name: Build example extension + # command: | + # cd examples/extensions && python3 -m pip install . - run: name: Build CPP only command: | @@ -78,6 +83,13 @@ jobs: conda activate runner-env DEVICE=cpu python -m xmlrunner discover -v python/tests -o test-results/cpu DEVICE=gpu python -m xmlrunner discover -v python/tests -o test-results/gpu + # TODO: Reenable when extension api becomes stable + # - run: + # name: Build example extension + # command: | + # eval "$(conda shell.bash hook)" + # conda activate runner-env + # cd examples/extensions && python -m pip install . - store_test_results: path: test-results - run: diff --git a/examples/extensions/axpby/axpby.cpp b/examples/extensions/axpby/axpby.cpp index 6da2ff591..732dc43b6 100644 --- a/examples/extensions/axpby/axpby.cpp +++ b/examples/extensions/axpby/axpby.cpp @@ -104,7 +104,10 @@ void axpby_impl( } /** Fall back implementation for evaluation on CPU */ -void Axpby::eval(const std::vector& inputs, array& out) { +void Axpby::eval( + const std::vector& inputs, + std::vector& out_arr) { + auto out = out_arr[0]; // Check the inputs (registered in the op while constructing the out array) assert(inputs.size() == 2); auto& x = inputs[0]; @@ -175,7 +178,10 @@ void axpby_impl_accelerate( } /** Evaluate primitive on CPU using accelerate specializations */ -void Axpby::eval_cpu(const std::vector& inputs, array& out) { +void Axpby::eval_cpu( + const std::vector& inputs, + std::vector& outarr) { + auto out = outarr[0]; assert(inputs.size() == 2); auto& x = inputs[0]; auto& y = inputs[1]; @@ -189,13 +195,15 @@ void Axpby::eval_cpu(const std::vector& inputs, array& out) { } // Fall back to common backend if specializations are not available - eval(inputs, out); + eval(inputs, outarr); } #else // Accelerate not available /** Evaluate primitive on CPU falling back to common backend */ -void Axpby::eval_cpu(const std::vector& inputs, array& out) { +void Axpby::eval_cpu( + const std::vector& inputs, + std::vector& out) { eval(inputs, out); } @@ -208,8 +216,11 @@ void Axpby::eval_cpu(const std::vector& inputs, array& out) { #ifdef _METAL_ /** Evaluate primitive on GPU */ -void Axpby::eval_gpu(const std::vector& inputs, array& out) { +void Axpby::eval_gpu( + const std::vector& inputs, + std::vector& outarr) { // Prepare inputs + auto out = outarr[0]; assert(inputs.size() == 2); auto& x = inputs[0]; auto& y = inputs[1]; @@ -295,7 +306,9 @@ void Axpby::eval_gpu(const std::vector& inputs, array& out) { #else // Metal is not available /** Fail evaluation on GPU */ -void Axpby::eval_gpu(const std::vector& inputs, array& out) { +void Axpby::eval_gpu( + const std::vector& inputs, + std::vector& out) { throw std::runtime_error("Axpby has no GPU implementation."); } @@ -306,7 +319,7 @@ void Axpby::eval_gpu(const std::vector& inputs, array& out) { /////////////////////////////////////////////////////////////////////////////// /** The Jacobian-vector product. */ -array Axpby::jvp( +std::vector Axpby::jvp( const std::vector& primals, const std::vector& tangents, const std::vector& argnums) { @@ -321,32 +334,33 @@ array Axpby::jvp( if (argnums.size() > 1) { auto scale = argnums[0] == 0 ? alpha_ : beta_; auto scale_arr = array(scale, tangents[0].dtype()); - return multiply(scale_arr, tangents[0], stream()); + return {multiply(scale_arr, tangents[0], stream())}; } // If, argnums = {0, 1}, we take contributions from both // which gives us jvp = tangent_x * alpha + tangent_y * beta else { - return axpby(tangents[0], tangents[1], alpha_, beta_, stream()); + return {axpby(tangents[0], tangents[1], alpha_, beta_, stream())}; } } /** The vector-Jacobian product. */ std::vector Axpby::vjp( const std::vector& primals, - const array& cotan, - const std::vector& argnums) { + const std::vector& cotangents, + const std::vector& argnums, + const std::vector&) { // Reverse mode diff std::vector vjps; for (auto arg : argnums) { auto scale = arg == 0 ? alpha_ : beta_; - auto scale_arr = array(scale, cotan.dtype()); - vjps.push_back(multiply(scale_arr, cotan, stream())); + auto scale_arr = array(scale, cotangents[0].dtype()); + vjps.push_back(multiply(scale_arr, cotangents[0], stream())); } return vjps; } /** Vectorize primitive along given axis */ -std::pair Axpby::vmap( +std::pair, std::vector> Axpby::vmap( const std::vector& inputs, const std::vector& axes) { throw std::runtime_error("Axpby has no vmap implementation."); diff --git a/examples/extensions/axpby/axpby.h b/examples/extensions/axpby/axpby.h index 2b85dadb2..649d9600a 100644 --- a/examples/extensions/axpby/axpby.h +++ b/examples/extensions/axpby/axpby.h @@ -42,11 +42,13 @@ class Axpby : public Primitive { * To avoid unnecessary allocations, the evaluation function * is responsible for allocating space for the array. */ - void eval_cpu(const std::vector& inputs, array& out) override; - void eval_gpu(const std::vector& inputs, array& out) override; + void eval_cpu(const std::vector& inputs, std::vector& out) + override; + void eval_gpu(const std::vector& inputs, std::vector& out) + override; /** The Jacobian-vector product. */ - array jvp( + std::vector jvp( const std::vector& primals, const std::vector& tangents, const std::vector& argnums) override; @@ -54,8 +56,9 @@ class Axpby : public Primitive { /** The vector-Jacobian product. */ std::vector vjp( const std::vector& primals, - const array& cotan, - const std::vector& argnums) override; + const std::vector& cotangents, + const std::vector& argnums, + const std::vector& outputs) override; /** * The primitive must know how to vectorize itself across @@ -63,7 +66,7 @@ class Axpby : public Primitive { * representing the vectorized computation and the axis which * corresponds to the output vectorized dimension. */ - std::pair vmap( + std::pair, std::vector> vmap( const std::vector& inputs, const std::vector& axes) override; @@ -80,7 +83,7 @@ class Axpby : public Primitive { float beta_; /** Fall back implementation for evaluation on CPU */ - void eval(const std::vector& inputs, array& out); + void eval(const std::vector& inputs, std::vector& out); }; } // namespace mlx::core \ No newline at end of file diff --git a/examples/extensions/pyproject.toml b/examples/extensions/pyproject.toml new file mode 100644 index 000000000..1c5302936 --- /dev/null +++ b/examples/extensions/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=42", "pybind11>=2.10", "cmake>=3.24", "mlx @ git+https://github.com/mlx-explore/mlx@main"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/mlx/io/CMakeLists.txt b/mlx/io/CMakeLists.txt index f12b2bd85..8e80cc4c5 100644 --- a/mlx/io/CMakeLists.txt +++ b/mlx/io/CMakeLists.txt @@ -14,6 +14,11 @@ target_include_directories( $ $ ) +install( + DIRECTORY ${json_SOURCE_DIR}/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/json + COMPONENT json_source +) MESSAGE(STATUS "Downloading gguflib") FetchContent_Declare(gguflib @@ -26,6 +31,12 @@ target_include_directories( $ $ ) +install( + DIRECTORY ${gguflib_SOURCE_DIR}/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/gguflib + COMPONENT gguflib_source +) + add_library( gguflib STATIC ${gguflib_SOURCE_DIR}/fp16.c