Fix quant in gguf (#698)

* fix quant in gguf * fix whisper
2025-08-10 19:26:46 +08:00 · 2024-04-19 20:07:11 -07:00 · 2024-04-19 20:07:11 -07:00 · 6abdbe3be8
commit 6abdbe3be8
parent 574ad7f6fe
2 changed files with 7 additions and 3 deletions
--- a/llms/gguf_llm/models.py
+++ b/llms/gguf_llm/models.py
@ -285,7 +285,7 @@ def load(gguf_file: str, repo: str = None):
            and f"{p}.scales" in weights
        )
        nn.quantize(
-            qm,
+            model,
            **quantization,
            class_predicate=class_predicate,
        )
--- a/whisper/whisper/load_models.py
+++ b/whisper/whisper/load_models.py
@ -27,13 +27,17 @@ def load_model(
    model_args = whisper.ModelDimensions(**config)

    weights = mx.load(str(model_path / "weights.npz"))
-    weights = tree_unflatten(list(weights.items()))

    model = whisper.Whisper(model_args, dtype)

    if quantization is not None:
-        nn.quantize(model, **quantization)
+        class_predicate = (
+            lambda p, m: isinstance(m, (nn.Linear, nn.Embedding))
+            and f"{p}.scales" in weights
+        )
+        nn.quantize(model, **quantization, class_predicate=class_predicate)

+    weights = tree_unflatten(list(weights.items()))
    model.update(weights)
    mx.eval(model.parameters())
    return model