mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-10-23 22:18:06 +08:00
quantize linear (#250)
This commit is contained in:
@@ -339,9 +339,6 @@ def load_model(folder: str):
|
||||
model_args = ModelArgs(**config)
|
||||
model = Model(model_args)
|
||||
if quantization is not None:
|
||||
quantization["linear_class_predicate"] = lambda m: isinstance(
|
||||
m, nn.Linear
|
||||
) and (m.weight.shape[0] != model_args.vocab_size)
|
||||
nn.QuantizedLinear.quantize_module(model, **quantization)
|
||||
|
||||
weights = mx.load(str(model_path / "weights.npz"))
|
||||
|
Reference in New Issue
Block a user