diff --git a/.circleci/config.yml b/.circleci/config.yml index b2e7794c1..7af33e64d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,7 +31,7 @@ jobs: name: Install dependencies command: | pip install --upgrade cmake - pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f + pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 pip install numpy sudo apt-get update sudo apt-get install libblas-dev liblapack-dev liblapacke-dev @@ -80,7 +80,7 @@ jobs: source env/bin/activate pip install --upgrade pip pip install --upgrade cmake - pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f + pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 pip install numpy pip install torch pip install tensorflow @@ -143,7 +143,7 @@ jobs: source env/bin/activate pip install --upgrade pip pip install --upgrade cmake - pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f + pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 pip install --upgrade setuptools pip install numpy pip install twine @@ -207,7 +207,7 @@ jobs: source env/bin/activate pip install --upgrade pip pip install --upgrade cmake - pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f + pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 pip install --upgrade setuptools pip install numpy pip install auditwheel diff --git a/docs/src/install.rst b/docs/src/install.rst index 7001d896f..f34db7270 100644 --- a/docs/src/install.rst +++ b/docs/src/install.rst @@ -74,7 +74,7 @@ Install `nanobind `_ with: .. code-block:: shell - pip install git+https://github.com/wjakob/nanobind.git + pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 Then simply build and install MLX using pip: @@ -120,7 +120,7 @@ Create a build directory and run CMake and make: .. code-block:: shell mkdir -p build && cd build - cmake .. && make -j + cmake .. && make -j Run tests with: @@ -139,7 +139,7 @@ directory as the executable statically linked to ``libmlx.a`` or the preprocessor constant ``METAL_PATH`` should be defined at build time and it should point to the path to the built metal library. -.. list-table:: Build Options +.. list-table:: Build Options :widths: 25 8 :header-rows: 1 @@ -161,15 +161,15 @@ should point to the path to the built metal library. .. note:: - If you have multiple Xcode installations and wish to use - a specific one while building, you can do so by adding the - following environment variable before building + If you have multiple Xcode installations and wish to use + a specific one while building, you can do so by adding the + following environment variable before building .. code-block:: shell export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/" - Further, you can use the following command to find out which + Further, you can use the following command to find out which macOS SDK will be used .. code-block:: shell @@ -201,7 +201,7 @@ Then set the active developer directory: sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer -x86 Shell +x86 Shell ~~~~~~~~~ .. _build shell: diff --git a/examples/extensions/pyproject.toml b/examples/extensions/pyproject.toml index c71470da1..3c27696a7 100644 --- a/examples/extensions/pyproject.toml +++ b/examples/extensions/pyproject.toml @@ -3,6 +3,6 @@ requires = [ "setuptools>=42", "cmake>=3.24", "mlx>=0.9.0", - "nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f", + "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4", ] build-backend = "setuptools.build_meta" diff --git a/mlx/io/gguf_quants.cpp b/mlx/io/gguf_quants.cpp index e0eb73ad1..7c4e87253 100644 --- a/mlx/io/gguf_quants.cpp +++ b/mlx/io/gguf_quants.cpp @@ -2,6 +2,7 @@ #include #include +#include #include @@ -118,16 +119,21 @@ void gguf_load_quantized( std::vector weights_shape = shape; weights_shape.back() /= (weights_per_byte * 4); + auto w_nbytes = uint32.size * + std::accumulate(weights_shape.begin(), + weights_shape.end(), + 1, + std::multiplies()); - array weights(std::move(weights_shape), uint32, nullptr, {}); - weights.set_data(allocator::malloc(weights.nbytes())); + array weights(allocator::malloc(w_nbytes), std::move(weights_shape), uint32); // For scales and bias shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block; - array scales(shape, float16, nullptr, {}); - array biases(std::move(shape), float16, nullptr, {}); - scales.set_data(allocator::malloc(scales.nbytes())); - biases.set_data(allocator::malloc(biases.nbytes())); + auto sb_nbytes = float16.size * + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + + array scales(allocator::malloc(sb_nbytes), shape, float16); + array biases(allocator::malloc(sb_nbytes), std::move(shape), float16); if (tensor.type == GGUF_TYPE_Q4_0) { extract_q4_0_data(tensor, weights, scales, biases); diff --git a/pyproject.toml b/pyproject.toml index b9511111e..07c15ff3e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] requires = [ "setuptools>=42", - "nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f", + "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4", "cmake>=3.24", ] build-backend = "setuptools.build_meta"