mirror of
https://github.com/ml-explore/mlx.git
synced 2025-06-24 09:21:16 +08:00
fix gguf loading quants (#1014)
* fix gguf loading quants * fix nanobind install * actual fix
This commit is contained in:
parent
ef5f7d1aea
commit
ed83908931
@ -31,7 +31,7 @@ jobs:
|
|||||||
name: Install dependencies
|
name: Install dependencies
|
||||||
command: |
|
command: |
|
||||||
pip install --upgrade cmake
|
pip install --upgrade cmake
|
||||||
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
|
pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
|
||||||
pip install numpy
|
pip install numpy
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get install libblas-dev liblapack-dev liblapacke-dev
|
sudo apt-get install libblas-dev liblapack-dev liblapacke-dev
|
||||||
@ -80,7 +80,7 @@ jobs:
|
|||||||
source env/bin/activate
|
source env/bin/activate
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install --upgrade cmake
|
pip install --upgrade cmake
|
||||||
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
|
pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
|
||||||
pip install numpy
|
pip install numpy
|
||||||
pip install torch
|
pip install torch
|
||||||
pip install tensorflow
|
pip install tensorflow
|
||||||
@ -143,7 +143,7 @@ jobs:
|
|||||||
source env/bin/activate
|
source env/bin/activate
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install --upgrade cmake
|
pip install --upgrade cmake
|
||||||
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
|
pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
|
||||||
pip install --upgrade setuptools
|
pip install --upgrade setuptools
|
||||||
pip install numpy
|
pip install numpy
|
||||||
pip install twine
|
pip install twine
|
||||||
@ -207,7 +207,7 @@ jobs:
|
|||||||
source env/bin/activate
|
source env/bin/activate
|
||||||
pip install --upgrade pip
|
pip install --upgrade pip
|
||||||
pip install --upgrade cmake
|
pip install --upgrade cmake
|
||||||
pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f
|
pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
|
||||||
pip install --upgrade setuptools
|
pip install --upgrade setuptools
|
||||||
pip install numpy
|
pip install numpy
|
||||||
pip install auditwheel
|
pip install auditwheel
|
||||||
|
@ -74,7 +74,7 @@ Install `nanobind <https://nanobind.readthedocs.io/en/latest/>`_ with:
|
|||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
pip install git+https://github.com/wjakob/nanobind.git
|
pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4
|
||||||
|
|
||||||
Then simply build and install MLX using pip:
|
Then simply build and install MLX using pip:
|
||||||
|
|
||||||
@ -120,7 +120,7 @@ Create a build directory and run CMake and make:
|
|||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
mkdir -p build && cd build
|
mkdir -p build && cd build
|
||||||
cmake .. && make -j
|
cmake .. && make -j
|
||||||
|
|
||||||
Run tests with:
|
Run tests with:
|
||||||
|
|
||||||
@ -139,7 +139,7 @@ directory as the executable statically linked to ``libmlx.a`` or the
|
|||||||
preprocessor constant ``METAL_PATH`` should be defined at build time and it
|
preprocessor constant ``METAL_PATH`` should be defined at build time and it
|
||||||
should point to the path to the built metal library.
|
should point to the path to the built metal library.
|
||||||
|
|
||||||
.. list-table:: Build Options
|
.. list-table:: Build Options
|
||||||
:widths: 25 8
|
:widths: 25 8
|
||||||
:header-rows: 1
|
:header-rows: 1
|
||||||
|
|
||||||
@ -161,15 +161,15 @@ should point to the path to the built metal library.
|
|||||||
|
|
||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
If you have multiple Xcode installations and wish to use
|
If you have multiple Xcode installations and wish to use
|
||||||
a specific one while building, you can do so by adding the
|
a specific one while building, you can do so by adding the
|
||||||
following environment variable before building
|
following environment variable before building
|
||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
|
|
||||||
export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/"
|
export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/"
|
||||||
|
|
||||||
Further, you can use the following command to find out which
|
Further, you can use the following command to find out which
|
||||||
macOS SDK will be used
|
macOS SDK will be used
|
||||||
|
|
||||||
.. code-block:: shell
|
.. code-block:: shell
|
||||||
@ -201,7 +201,7 @@ Then set the active developer directory:
|
|||||||
|
|
||||||
sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
|
sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer
|
||||||
|
|
||||||
x86 Shell
|
x86 Shell
|
||||||
~~~~~~~~~
|
~~~~~~~~~
|
||||||
|
|
||||||
.. _build shell:
|
.. _build shell:
|
||||||
|
@ -3,6 +3,6 @@ requires = [
|
|||||||
"setuptools>=42",
|
"setuptools>=42",
|
||||||
"cmake>=3.24",
|
"cmake>=3.24",
|
||||||
"mlx>=0.9.0",
|
"mlx>=0.9.0",
|
||||||
"nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f",
|
"nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4",
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <numeric>
|
||||||
|
|
||||||
#include <mlx/io/gguf.h>
|
#include <mlx/io/gguf.h>
|
||||||
|
|
||||||
@ -118,16 +119,21 @@ void gguf_load_quantized(
|
|||||||
|
|
||||||
std::vector<int> weights_shape = shape;
|
std::vector<int> weights_shape = shape;
|
||||||
weights_shape.back() /= (weights_per_byte * 4);
|
weights_shape.back() /= (weights_per_byte * 4);
|
||||||
|
auto w_nbytes = uint32.size *
|
||||||
|
std::accumulate(weights_shape.begin(),
|
||||||
|
weights_shape.end(),
|
||||||
|
1,
|
||||||
|
std::multiplies<size_t>());
|
||||||
|
|
||||||
array weights(std::move(weights_shape), uint32, nullptr, {});
|
array weights(allocator::malloc(w_nbytes), std::move(weights_shape), uint32);
|
||||||
weights.set_data(allocator::malloc(weights.nbytes()));
|
|
||||||
|
|
||||||
// For scales and bias
|
// For scales and bias
|
||||||
shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block;
|
shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block;
|
||||||
array scales(shape, float16, nullptr, {});
|
auto sb_nbytes = float16.size *
|
||||||
array biases(std::move(shape), float16, nullptr, {});
|
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>());
|
||||||
scales.set_data(allocator::malloc(scales.nbytes()));
|
|
||||||
biases.set_data(allocator::malloc(biases.nbytes()));
|
array scales(allocator::malloc(sb_nbytes), shape, float16);
|
||||||
|
array biases(allocator::malloc(sb_nbytes), std::move(shape), float16);
|
||||||
|
|
||||||
if (tensor.type == GGUF_TYPE_Q4_0) {
|
if (tensor.type == GGUF_TYPE_Q4_0) {
|
||||||
extract_q4_0_data(tensor, weights, scales, biases);
|
extract_q4_0_data(tensor, weights, scales, biases);
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
[build-system]
|
[build-system]
|
||||||
requires = [
|
requires = [
|
||||||
"setuptools>=42",
|
"setuptools>=42",
|
||||||
"nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f",
|
"nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4",
|
||||||
"cmake>=3.24",
|
"cmake>=3.24",
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
Loading…
Reference in New Issue
Block a user