feat(mlx-lm): add de-quant for fuse.py (#365)

* feat(mlx-lm): add de-quant for fuse

* chore: disable quant in to linear when de-quant enabled

* chore: add better error handling for adapter file not found
This commit is contained in:
Anchen
2024-01-26 13:59:32 +11:00
committed by GitHub
parent f51e98fcf1
commit 854ad8747a
4 changed files with 70 additions and 10 deletions

View File

@@ -29,7 +29,7 @@ class LoRALinear(nn.Module):
lora_lin.linear = linear
return lora_lin
def to_linear(self):
def to_linear(self, de_quantize: bool = False):
linear = self.linear
bias = "bias" in linear
weight = linear.weight
@@ -56,7 +56,7 @@ class LoRALinear(nn.Module):
if bias:
fused_linear.bias = linear.bias
if is_quantized:
if is_quantized and not de_quantize:
fused_linear = nn.QuantizedLinear.from_linear(
fused_linear,
linear.group_size,