feat(mlx-lm): add de-quant for fuse.py (#365)

* feat(mlx-lm): add de-quant for fuse

* chore: disable quant in to linear when de-quant enabled

* chore: add better error handling for adapter file not found
This commit is contained in:
Anchen
2024-01-26 13:59:32 +11:00
committed by GitHub
parent f51e98fcf1
commit 854ad8747a
4 changed files with 70 additions and 10 deletions

View File

@@ -24,7 +24,7 @@ MODEL_MAPPING = {
"qwen": qwen,
"plamo": plamo,
}
MAX_FILE_SIZE_GB = 15
MAX_FILE_SIZE_GB = 5
linear_class_predicate = (
lambda m: isinstance(m, nn.Linear)