quantize linear

2025-09-01 04:14:38 +08:00 · 2024-01-07 11:31:12 -08:00
parent bf9926489e
commit 94d7bd2ac3
2 changed files with 0 additions and 5 deletions
--- a/lora/convert.py
+++ b/lora/convert.py
@@ -28,8 +28,6 @@ def quantize(weights, config, args):
        model,
        args.q_group_size,
        args.q_bits,
-        linear_class_predicate=lambda m: isinstance(m, nn.Linear)
-        and m.weight.shape[0] != config["vocab_size"],
    )

    # Update the config:
--- a/lora/lora.py
+++ b/lora/lora.py
@@ -339,9 +339,6 @@ def load_model(folder: str):
        model_args = ModelArgs(**config)
    model = Model(model_args)
    if quantization is not None:
-        quantization["linear_class_predicate"] = lambda m: isinstance(
-            m, nn.Linear
-        ) and (m.weight.shape[0] != model_args.vocab_size)
        nn.QuantizedLinear.quantize_module(model, **quantization)

    weights = mx.load(str(model_path / "weights.npz"))