feat(mlx-lm): add de-quant for fuse.py (#365)

* feat(mlx-lm): add de-quant for fuse * chore: disable quant in to linear when de-quant enabled * chore: add better error handling for adapter file not found
2025-12-15 17:58:54 +08:00 · 2024-01-26 13:59:32 +11:00
parent f51e98fcf1
commit 854ad8747a
4 changed files with 70 additions and 10 deletions
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -24,7 +24,7 @@ MODEL_MAPPING = {
    "qwen": qwen,
    "plamo": plamo,
 }
-MAX_FILE_SIZE_GB = 15
+MAX_FILE_SIZE_GB = 5

 linear_class_predicate = (
    lambda m: isinstance(m, nn.Linear)