chore(lora): support mixtral in lora example (#343)

2025-10-23 22:18:06 +08:00 · 2024-01-20 06:07:45 -08:00
parent 527cea4027
commit 1415595409
6 changed files with 279 additions and 4 deletions
--- a/lora/convert.py
+++ b/lora/convert.py
@@ -20,7 +20,13 @@ def quantize(weights, config, args):
    model.load_weights(list(weights.items()))

    # Quantize the model:
-    nn.QuantizedLinear.quantize_module(model, args.q_group_size, args.q_bits)
+    nn.QuantizedLinear.quantize_module(
+        model,
+        args.q_group_size,
+        args.q_bits,
+        linear_class_predicate=lambda m: isinstance(m, nn.Linear)
+        and m.weight.shape[0] != 8,
+    )

    # Update the config:
    quantized_config["quantization"] = {