one more quant fix (#708)

2025-12-13 16:38:59 +08:00 · 2024-04-22 18:12:52 -07:00
parent 8d5cf5b0c8
commit ecbc6ff1e3
2 changed files with 7 additions and 6 deletions
--- a/lora/convert.py
+++ b/lora/convert.py
@@ -18,12 +18,10 @@ def quantize(weights, config, args):
    model.load_weights(list(weights.items()))

    # Quantize the model:
-    nn.QuantizedLinear.quantize_module(
+    nn.quantize(
        model,
        args.q_group_size,
        args.q_bits,
-        linear_class_predicate=lambda m: isinstance(m, nn.Linear)
-        and m.weight.shape[0] != 8,
    )

    # Update the config:
--- a/lora/utils.py
+++ b/lora/utils.py
@@ -147,11 +147,14 @@ def load(path_or_hf_repo: str):
    model_args = models.ModelArgs.from_dict(config)
    model = models.Model(model_args)
    if quantization is not None:
-        nn.QuantizedLinear.quantize_module(
+        class_predicate = (
+            lambda p, m: isinstance(m, (nn.Linear, nn.Embedding))
+            and f"{p}.scales" in weights
+        )
+        nn.quantize(
            model,
            **quantization,
-            linear_class_predicate=lambda m: isinstance(m, nn.Linear)
-            and m.weight.shape[0] != 8,
+            class_predicate=class_predicate,
        )

    model.load_weights(list(weights.items()))