fix gguf loading quants (#1014)

* fix gguf loading quants

* fix nanobind install

* actual fix
This commit is contained in:
Awni Hannun 2024-04-19 12:24:07 -07:00 committed by GitHub
parent ef5f7d1aea
commit ed83908931
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 26 additions and 20 deletions

View File

@ -31,7 +31,7 @@ jobs:
name: Install dependencies name: Install dependencies
command: | command: |
pip install --upgrade cmake pip install --upgrade cmake
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install numpy pip install numpy
sudo apt-get update sudo apt-get update
sudo apt-get install libblas-dev liblapack-dev liblapacke-dev sudo apt-get install libblas-dev liblapack-dev liblapacke-dev
@ -80,7 +80,7 @@ jobs:
source env/bin/activate source env/bin/activate
pip install --upgrade pip pip install --upgrade pip
pip install --upgrade cmake pip install --upgrade cmake
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install numpy pip install numpy
pip install torch pip install torch
pip install tensorflow pip install tensorflow
@ -143,7 +143,7 @@ jobs:
source env/bin/activate source env/bin/activate
pip install --upgrade pip pip install --upgrade pip
pip install --upgrade cmake pip install --upgrade cmake
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install --upgrade setuptools pip install --upgrade setuptools
pip install numpy pip install numpy
pip install twine pip install twine
@ -207,7 +207,7 @@ jobs:
source env/bin/activate source env/bin/activate
pip install --upgrade pip pip install --upgrade pip
pip install --upgrade cmake pip install --upgrade cmake
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
pip install --upgrade setuptools pip install --upgrade setuptools
pip install numpy pip install numpy
pip install auditwheel pip install auditwheel

View File

@ -74,7 +74,7 @@ Install `nanobind <https://nanobind.readthedocs.io/en/latest/>`_ with:
.. code-block:: shell .. code-block:: shell
pip install git+https://github.com/wjakob/nanobind.git pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
Then simply build and install MLX using pip: Then simply build and install MLX using pip:
@ -120,7 +120,7 @@ Create a build directory and run CMake and make:
.. code-block:: shell .. code-block:: shell
mkdir -p build && cd build mkdir -p build && cd build
cmake .. && make -j cmake .. && make -j
Run tests with: Run tests with:
@ -139,7 +139,7 @@ directory as the executable statically linked to ``libmlx.a`` or the
preprocessor constant ``METAL_PATH`` should be defined at build time and it preprocessor constant ``METAL_PATH`` should be defined at build time and it
should point to the path to the built metal library. should point to the path to the built metal library.
.. list-table:: Build Options .. list-table:: Build Options
:widths: 25 8 :widths: 25 8
:header-rows: 1 :header-rows: 1
@ -161,15 +161,15 @@ should point to the path to the built metal library.
.. note:: .. note::
If you have multiple Xcode installations and wish to use If you have multiple Xcode installations and wish to use
a specific one while building, you can do so by adding the a specific one while building, you can do so by adding the
following environment variable before building following environment variable before building
.. code-block:: shell .. code-block:: shell
export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/" export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/"
Further, you can use the following command to find out which Further, you can use the following command to find out which
macOS SDK will be used macOS SDK will be used
.. code-block:: shell .. code-block:: shell
@ -201,7 +201,7 @@ Then set the active developer directory:
sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
x86 Shell x86 Shell
~~~~~~~~~ ~~~~~~~~~
.. _build shell: .. _build shell:

View File

@ -3,6 +3,6 @@ requires = [
"setuptools>=42", "setuptools>=42",
"cmake>=3.24", "cmake>=3.24",
"mlx>=0.9.0", "mlx>=0.9.0",
"nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f", "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4",
] ]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"

View File

@ -2,6 +2,7 @@
#include <cstdint> #include <cstdint>
#include <cstring> #include <cstring>
#include <numeric>
#include <mlx/io/gguf.h> #include <mlx/io/gguf.h>
@ -118,16 +119,21 @@ void gguf_load_quantized(
std::vector<int> weights_shape = shape; std::vector<int> weights_shape = shape;
weights_shape.back() /= (weights_per_byte * 4); weights_shape.back() /= (weights_per_byte * 4);
auto w_nbytes = uint32.size *
std::accumulate(weights_shape.begin(),
weights_shape.end(),
1,
std::multiplies<size_t>());
array weights(std::move(weights_shape), uint32, nullptr, {}); array weights(allocator::malloc(w_nbytes), std::move(weights_shape), uint32);
weights.set_data(allocator::malloc(weights.nbytes()));
// For scales and bias // For scales and bias
shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block; shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block;
array scales(shape, float16, nullptr, {}); auto sb_nbytes = float16.size *
array biases(std::move(shape), float16, nullptr, {}); std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
scales.set_data(allocator::malloc(scales.nbytes()));
biases.set_data(allocator::malloc(biases.nbytes())); array scales(allocator::malloc(sb_nbytes), shape, float16);
array biases(allocator::malloc(sb_nbytes), std::move(shape), float16);
if (tensor.type == GGUF_TYPE_Q4_0) { if (tensor.type == GGUF_TYPE_Q4_0) {
extract_q4_0_data(tensor, weights, scales, biases); extract_q4_0_data(tensor, weights, scales, biases);

View File

@ -1,7 +1,7 @@
[build-system] [build-system]
requires = [ requires = [
"setuptools>=42", "setuptools>=42",
"nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f", "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4",
"cmake>=3.24", "cmake>=3.24",
] ]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"