mirror of
				https://github.com/ml-explore/mlx.git
				synced 2025-11-01 00:28:11 +08:00 
			
		
		
		
	fix gguf loading quants (#1014)
* fix gguf loading quants * fix nanobind install * actual fix
This commit is contained in:
		| @@ -31,7 +31,7 @@ jobs: | |||||||
|           name: Install dependencies |           name: Install dependencies | ||||||
|           command: | |           command: | | ||||||
|             pip install --upgrade cmake |             pip install --upgrade cmake | ||||||
|             pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f |             pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 | ||||||
|             pip install numpy |             pip install numpy | ||||||
|             sudo apt-get update |             sudo apt-get update | ||||||
|             sudo apt-get install libblas-dev liblapack-dev liblapacke-dev |             sudo apt-get install libblas-dev liblapack-dev liblapacke-dev | ||||||
| @@ -80,7 +80,7 @@ jobs: | |||||||
|             source env/bin/activate |             source env/bin/activate | ||||||
|             pip install --upgrade pip |             pip install --upgrade pip | ||||||
|             pip install --upgrade cmake |             pip install --upgrade cmake | ||||||
|             pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f |             pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 | ||||||
|             pip install numpy |             pip install numpy | ||||||
|             pip install torch |             pip install torch | ||||||
|             pip install tensorflow |             pip install tensorflow | ||||||
| @@ -143,7 +143,7 @@ jobs: | |||||||
|             source env/bin/activate |             source env/bin/activate | ||||||
|             pip install --upgrade pip |             pip install --upgrade pip | ||||||
|             pip install --upgrade cmake |             pip install --upgrade cmake | ||||||
|             pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f |             pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 | ||||||
|             pip install --upgrade setuptools |             pip install --upgrade setuptools | ||||||
|             pip install numpy |             pip install numpy | ||||||
|             pip install twine |             pip install twine | ||||||
| @@ -207,7 +207,7 @@ jobs: | |||||||
|             source env/bin/activate |             source env/bin/activate | ||||||
|             pip install --upgrade pip |             pip install --upgrade pip | ||||||
|             pip install --upgrade cmake |             pip install --upgrade cmake | ||||||
|             pip install git+https://github.com/wjakob/nanobind.git@4148debcf91f5ccab0c3b8d67b5c3cabd61f407f |             pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 | ||||||
|             pip install --upgrade setuptools |             pip install --upgrade setuptools | ||||||
|             pip install numpy |             pip install numpy | ||||||
|             pip install auditwheel |             pip install auditwheel | ||||||
|   | |||||||
| @@ -74,7 +74,7 @@ Install `nanobind <https://nanobind.readthedocs.io/en/latest/>`_ with: | |||||||
|  |  | ||||||
| .. code-block:: shell | .. code-block:: shell | ||||||
|  |  | ||||||
|     pip install git+https://github.com/wjakob/nanobind.git |     pip install git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4 | ||||||
|  |  | ||||||
| Then simply build and install MLX using pip: | Then simply build and install MLX using pip: | ||||||
|  |  | ||||||
| @@ -120,7 +120,7 @@ Create a build directory and run CMake and make: | |||||||
| .. code-block:: shell | .. code-block:: shell | ||||||
|  |  | ||||||
|    mkdir -p build && cd build |    mkdir -p build && cd build | ||||||
|    cmake .. && make -j  |    cmake .. && make -j | ||||||
|  |  | ||||||
| Run tests with: | Run tests with: | ||||||
|  |  | ||||||
| @@ -139,7 +139,7 @@ directory as the executable statically linked to ``libmlx.a`` or the | |||||||
| preprocessor constant ``METAL_PATH`` should be defined at build time and it | preprocessor constant ``METAL_PATH`` should be defined at build time and it | ||||||
| should point to the path to the built metal library. | should point to the path to the built metal library. | ||||||
|  |  | ||||||
| .. list-table:: Build Options  | .. list-table:: Build Options | ||||||
|    :widths: 25 8 |    :widths: 25 8 | ||||||
|    :header-rows: 1 |    :header-rows: 1 | ||||||
|  |  | ||||||
| @@ -161,15 +161,15 @@ should point to the path to the built metal library. | |||||||
|  |  | ||||||
| .. note:: | .. note:: | ||||||
|  |  | ||||||
|     If you have multiple Xcode installations and wish to use  |     If you have multiple Xcode installations and wish to use | ||||||
|     a specific one while building, you can do so by adding the  |     a specific one while building, you can do so by adding the | ||||||
|     following environment variable before building  |     following environment variable before building | ||||||
|  |  | ||||||
|     .. code-block:: shell |     .. code-block:: shell | ||||||
|  |  | ||||||
|       export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/" |       export DEVELOPER_DIR="/path/to/Xcode.app/Contents/Developer/" | ||||||
|  |  | ||||||
|     Further, you can use the following command to find out which  |     Further, you can use the following command to find out which | ||||||
|     macOS SDK will be used |     macOS SDK will be used | ||||||
|  |  | ||||||
|     .. code-block:: shell |     .. code-block:: shell | ||||||
| @@ -201,7 +201,7 @@ Then set the active developer directory: | |||||||
|  |  | ||||||
|   sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer |   sudo xcode-select --switch /Applications/Xcode.app/Contents/Developer | ||||||
|  |  | ||||||
| x86 Shell  | x86 Shell | ||||||
| ~~~~~~~~~ | ~~~~~~~~~ | ||||||
|  |  | ||||||
| .. _build shell: | .. _build shell: | ||||||
|   | |||||||
| @@ -3,6 +3,6 @@ requires = [ | |||||||
|   "setuptools>=42", |   "setuptools>=42", | ||||||
|   "cmake>=3.24", |   "cmake>=3.24", | ||||||
|   "mlx>=0.9.0", |   "mlx>=0.9.0", | ||||||
|   "nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f", |   "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4", | ||||||
| ] | ] | ||||||
| build-backend = "setuptools.build_meta" | build-backend = "setuptools.build_meta" | ||||||
|   | |||||||
| @@ -2,6 +2,7 @@ | |||||||
|  |  | ||||||
| #include <cstdint> | #include <cstdint> | ||||||
| #include <cstring> | #include <cstring> | ||||||
|  | #include <numeric> | ||||||
|  |  | ||||||
| #include <mlx/io/gguf.h> | #include <mlx/io/gguf.h> | ||||||
|  |  | ||||||
| @@ -118,16 +119,21 @@ void gguf_load_quantized( | |||||||
|  |  | ||||||
|   std::vector<int> weights_shape = shape; |   std::vector<int> weights_shape = shape; | ||||||
|   weights_shape.back() /= (weights_per_byte * 4); |   weights_shape.back() /= (weights_per_byte * 4); | ||||||
|  |   auto w_nbytes = uint32.size * | ||||||
|  |       std::accumulate(weights_shape.begin(), | ||||||
|  |                       weights_shape.end(), | ||||||
|  |                       1, | ||||||
|  |                       std::multiplies<size_t>()); | ||||||
|  |  | ||||||
|   array weights(std::move(weights_shape), uint32, nullptr, {}); |   array weights(allocator::malloc(w_nbytes), std::move(weights_shape), uint32); | ||||||
|   weights.set_data(allocator::malloc(weights.nbytes())); |  | ||||||
|  |  | ||||||
|   // For scales and bias |   // For scales and bias | ||||||
|   shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block; |   shape[shape.size() - 1] = shape[shape.size() - 1] / weights_per_block; | ||||||
|   array scales(shape, float16, nullptr, {}); |   auto sb_nbytes = float16.size * | ||||||
|   array biases(std::move(shape), float16, nullptr, {}); |       std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>()); | ||||||
|   scales.set_data(allocator::malloc(scales.nbytes())); |  | ||||||
|   biases.set_data(allocator::malloc(biases.nbytes())); |   array scales(allocator::malloc(sb_nbytes), shape, float16); | ||||||
|  |   array biases(allocator::malloc(sb_nbytes), std::move(shape), float16); | ||||||
|  |  | ||||||
|   if (tensor.type == GGUF_TYPE_Q4_0) { |   if (tensor.type == GGUF_TYPE_Q4_0) { | ||||||
|     extract_q4_0_data(tensor, weights, scales, biases); |     extract_q4_0_data(tensor, weights, scales, biases); | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| [build-system] | [build-system] | ||||||
| requires = [ | requires = [ | ||||||
|   "setuptools>=42", |   "setuptools>=42", | ||||||
|   "nanobind@git+https://github.com/wjakob/nanobind.git#egg=4148debcf91f5ccab0c3b8d67b5c3cabd61f407f", |   "nanobind@git+https://github.com/wjakob/nanobind.git@2f04eac452a6d9142dedb957701bdb20125561e4", | ||||||
|   "cmake>=3.24", |   "cmake>=3.24", | ||||||
| ] | ] | ||||||
| build-backend = "setuptools.build_meta" | build-backend = "setuptools.build_meta" | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Awni Hannun
					Awni Hannun