From df124e018abc4f4bea3d20eb9aeddf7188651de3 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Thu, 18 Jul 2024 07:35:35 -0700 Subject: [PATCH] fix gguf (#1273) * fix gguf * comment --- mlx/io/gguf.cpp | 13 ++++++++++--- mlx/io/gguf_quants.cpp | 6 +++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/mlx/io/gguf.cpp b/mlx/io/gguf.cpp index c452886fd..934bcee82 100644 --- a/mlx/io/gguf.cpp +++ b/mlx/io/gguf.cpp @@ -217,13 +217,11 @@ std::unordered_map load_arrays(gguf_ctx* ctx) { }; while (gguf_get_tensor(ctx, &tensor)) { - std::string name(tensor.name, tensor.namelen); if (tensor.type == GGUF_TYPE_Q4_0 || tensor.type == GGUF_TYPE_Q4_1 || tensor.type == GGUF_TYPE_Q8_0) { gguf_load_quantized(array_map, tensor); } else { - std::string name = std::string(tensor.name, tensor.namelen); - + std::string name(tensor.name, tensor.namelen); const auto& [data, dtype] = extract_tensor_data(&tensor); array loaded_array = array(data, get_shape(tensor), dtype); check_insert(array_map.insert({name, loaded_array})); @@ -233,6 +231,15 @@ std::unordered_map load_arrays(gguf_ctx* ctx) { } GGUFLoad load_gguf(const std::string& file, StreamOrDevice s) { + bool exists; + { + std::ifstream f(file.c_str()); + exists = f.good(); + } + if (!exists) { + throw std::invalid_argument("[load_gguf] Failed to open " + file); + } + gguf_ctx* ctx = gguf_open(file.data()); if (!ctx) { throw std::runtime_error("[load_gguf] gguf_init failed"); diff --git a/mlx/io/gguf_quants.cpp b/mlx/io/gguf_quants.cpp index a06ccfe65..4f89b2278 100644 --- a/mlx/io/gguf_quants.cpp +++ b/mlx/io/gguf_quants.cpp @@ -9,7 +9,8 @@ namespace mlx::core { void unpack_32_4(uint8_t* data, int8_t* dst) { - for (int64_t j = 0; j < 16; ++j) { + std::fill_n(dst, 16, 0); + for (int j = 0; j < 16; ++j) { uint8_t x = (data[j + 2] & 0x0F); // j+2 to skip scale bytes. if (j % 2 != 0) { x <<= 4; @@ -17,7 +18,7 @@ void unpack_32_4(uint8_t* data, int8_t* dst) { dst[j / 2] += x; } // Last 16 weights are in the higher bits - for (int64_t j = 0; j < 16; ++j) { + for (int j = 0; j < 16; ++j) { uint8_t x = (data[j + 2] >> 4); if (j % 2 != 0) { x <<= 4; @@ -134,7 +135,6 @@ void gguf_load_quantized( array scales(allocator::malloc(sb_nbytes), shape, float16); array biases(allocator::malloc(sb_nbytes), std::move(shape), float16); - if (tensor.type == GGUF_TYPE_Q4_0) { extract_q4_0_data(tensor, weights, scales, biases); } else if (tensor.type == GGUF_TYPE_Q4_1) {