Fix quant in gguf (#698)

* fix quant in gguf * fix whisper
2025-10-24 14:38:09 +08:00 · 2024-04-19 20:07:11 -07:00
parent 574ad7f6fe
commit 6abdbe3be8
2 changed files with 7 additions and 3 deletions
--- a/llms/gguf_llm/models.py
+++ b/llms/gguf_llm/models.py
@@ -285,7 +285,7 @@ def load(gguf_file: str, repo: str = None):
            and f"{p}.scales" in weights
        )
        nn.quantize(
-            qm,
+            model,
            **quantization,
            class_predicate=class_predicate,
        )