use lower precision base weights

This commit is contained in:
Awni Hannun 2023-12-15 10:29:42 -08:00
parent d108c558fc
commit 84f02ef58b
2 changed files with 3 additions and 2 deletions

View File

@ -184,6 +184,7 @@ def load(args):
def loss(model, inputs, targets, lengths): def loss(model, inputs, targets, lengths):
# Run model on inputs # Run model on inputs
logits, _ = model(inputs) logits, _ = model(inputs)
logits = logits.astype(mx.float32)
# Mask padding tokens # Mask padding tokens
length_mask = mx.arange(inputs.shape[1])[None, :] < lengths[:, None] length_mask = mx.arange(inputs.shape[1])[None, :] < lengths[:, None]
@ -326,7 +327,7 @@ def generate(model, prompt, tokenizer, args):
print(s, flush=True) print(s, flush=True)
def load_model(folder: str, dtype=mx.float32): def load_model(folder: str, dtype=mx.float16):
model_path = Path(folder) model_path = Path(folder)
tokenizer = Tokenizer(str(model_path / "tokenizer.model")) tokenizer = Tokenizer(str(model_path / "tokenizer.model"))
with open(model_path / "params.json", "r") as f: with open(model_path / "params.json", "r") as f:

View File

@ -47,7 +47,7 @@ class LoRALinear(nn.Module):
self.lora_b = mx.zeros(shape=(lora_rank, output_dims)) self.lora_b = mx.zeros(shape=(lora_rank, output_dims))
def __call__(self, x): def __call__(self, x):
y = self.linear(x) y = self.linear(x.astype(self.linear.weight.dtype)).astype(x.dtype)
z = (x @ self.lora_a) @ self.lora_b z = (x @ self.lora_a) @ self.lora_b
return y + 2.0 * z return y + 2.0 * z