feat(lora): add de-quantized support for fuse.py (#351)

* feat(lora): add de-quantized support for fuse.py

* address comments
This commit is contained in:
Anchen
2024-01-22 17:32:24 -08:00
committed by GitHub
parent 30be4c4734
commit 8022083979
3 changed files with 48 additions and 6 deletions

View File

@@ -16,7 +16,7 @@ class LoRALinear(nn.Module):
lora_lin.linear = linear
return lora_lin
def to_linear(self):
def to_linear(self, de_quantize: bool = False):
linear = self.linear
bias = "bias" in linear
weight = linear.weight
@@ -43,7 +43,7 @@ class LoRALinear(nn.Module):
if bias:
fused_linear.bias = linear.bias
if is_quantized:
if is_quantized and not de_quantize:
fused_linear = nn.QuantizedLinear.from_linear(
fused_linear,
linear.group_size,