diff --git a/.circleci/config.yml b/.circleci/config.yml
index b2e7794c1..7af33e64d 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -31,7 +31,7 @@ jobs:
name: Install dependencies
command: |
pip install --upgrade cmake
- pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
+ pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install numpy
sudo apt-get update
sudo apt-get install libblas-dev liblapack-dev liblapacke-dev
@@ -80,7 +80,7 @@ jobs:
source env/bin/activate
pip install --upgrade pip
pip install --upgrade cmake
- pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
+ pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install numpy
pip install torch
pip install tensorflow
@@ -143,7 +143,7 @@ jobs:
source env/bin/activate
pip install --upgrade pip
pip install --upgrade cmake
- pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
+ pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install --upgrade setuptools
pip install numpy
pip install twine
@@ -207,7 +207,7 @@ jobs:
source env/bin/activate
pip install --upgrade pip
pip install --upgrade cmake
- pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
+ pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install --upgrade setuptools
pip install numpy
pip install auditwheel
diff --git a/docs/src/install.rst b/docs/src/install.rst
index 7001d896f..f34db7270 100644
--- a/docs/src/install.rst
+++ b/docs/src/install.rst
@@ -74,7 +74,7 @@ Install `nanobind `_ with:
.. code-block:: shell
- pip install git+https://github.com/wjakob/nanobind.git
+ pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
Then simply build and install MLX using pip:
@@ -120,7 +120,7 @@ Create a build directory and run CMake and make:
.. code-block:: shell
mkdir -p build && cd build
- cmake .. && make -j
+ cmake .. && make -j
Run tests with:
@@ -139,7 +139,7 @@ directory as the executable statically linked to ``libmlx.a`` or the
preprocessor constant ``METAL_PATH`` should be defined at build time and it
should point to the path to the built metal library.
-.. list-table:: Build Options
+.. list-table:: Build Options
:widths: 25 8
:header-rows: 1
@@ -161,15 +161,15 @@ should point to the path to the built metal library.
.. note::
- If you have multiple Xcode installations and wish to use
- a specific one while building, you can do so by adding the
- following environment variable before building
+ If you have multiple Xcode installations and wish to use
+ a specific one while building, you can do so by adding the
+ following environment variable before building
.. code-block:: shell
export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/"
- Further, you can use the following command to find out which
+ Further, you can use the following command to find out which
macOS SDK will be used
.. code-block:: shell
@@ -201,7 +201,7 @@ Then set the active developer directory:
sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
-x86 Shell
+x86 Shell
~~~~~~~~~
.. _build shell:
diff --git a/examples/extensions/pyproject.toml b/examples/extensions/pyproject.toml
index c71470da1..3c27696a7 100644
--- a/examples/extensions/pyproject.toml
+++ b/examples/extensions/pyproject.toml
@@ -3,6 +3,6 @@ requires = [
"setuptools>=42",
"cmake>=3.24",
"mlx>=0.9.0",
- "nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f",
+ "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4",
]
build-backend = "setuptools.build_meta"
diff --git a/mlx/io/gguf_quants.cpp b/mlx/io/gguf_quants.cpp
index e0eb73ad1..7c4e87253 100644
--- a/mlx/io/gguf_quants.cpp
+++ b/mlx/io/gguf_quants.cpp
@@ -2,6 +2,7 @@
#include
#include
+#include
#include
@@ -118,16 +119,21 @@ void gguf_load_quantized(
std::vector weights_shape = shape;
weights_shape.back() /= (weights_per_byte * 4);
+ auto w_nbytes = uint32.size *
+ std::accumulate(weights_shape.begin(),
+ weights_shape.end(),
+ 1,
+ std::multiplies());
- array weights(std::move(weights_shape), uint32, nullptr, {});
- weights.set_data(allocator::malloc(weights.nbytes()));
+ array weights(allocator::malloc(w_nbytes), std::move(weights_shape), uint32);
// For scales and bias
shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block;
- array scales(shape, float16, nullptr, {});
- array biases(std::move(shape), float16, nullptr, {});
- scales.set_data(allocator::malloc(scales.nbytes()));
- biases.set_data(allocator::malloc(biases.nbytes()));
+ auto sb_nbytes = float16.size *
+ std::accumulate(shape.begin(), shape.end(), 1, std::multiplies());
+
+ array scales(allocator::malloc(sb_nbytes), shape, float16);
+ array biases(allocator::malloc(sb_nbytes), std::move(shape), float16);
if (tensor.type == GGUF_TYPE_Q4_0) {
extract_q4_0_data(tensor, weights, scales, biases);
diff --git a/pyproject.toml b/pyproject.toml
index b9511111e..07c15ff3e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
[build-system]
requires = [
"setuptools>=42",
- "nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f",
+ "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4",
"cmake>=3.24",
]
build-backend = "setuptools.build_meta"