mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-10-24 06:28:07 +08:00
Quantize embedding / Update quantize API (#680)
* more async eval * quantize embedding / update quantize api * more updates for quantize * update for quantize embeddings * update sd quant API * update sdxl quants * error for datasets < batch_size * async * fix config loading * fix quant * fix tests * fix req * remove lm head if tie weights is true * fix test
This commit is contained in:
@@ -339,7 +339,7 @@ def load_model(model_path):
|
||||
quantization = config.pop("quantization", None)
|
||||
model = Llama(ModelArgs(**config))
|
||||
if quantization is not None:
|
||||
nn.QuantizedLinear.quantize_module(model, **quantization)
|
||||
nn.quantize(model, **quantization)
|
||||
model.update(tree_unflatten(list(weights.items())))
|
||||
tokenizer = SentencePieceProcessor(model_file=str(model_path / "tokenizer.model"))
|
||||
return model, tokenizer
|
||||
|
||||
Reference in New Issue
Block a user