mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-25 01:41:19 +08:00
use lower precision base weights
This commit is contained in:
parent
d108c558fc
commit
84f02ef58b
@ -184,6 +184,7 @@ def load(args):
|
|||||||
def loss(model, inputs, targets, lengths):
|
def loss(model, inputs, targets, lengths):
|
||||||
# Run model on inputs
|
# Run model on inputs
|
||||||
logits, _ = model(inputs)
|
logits, _ = model(inputs)
|
||||||
|
logits = logits.astype(mx.float32)
|
||||||
|
|
||||||
# Mask padding tokens
|
# Mask padding tokens
|
||||||
length_mask = mx.arange(inputs.shape[1])[None, :] < lengths[:, None]
|
length_mask = mx.arange(inputs.shape[1])[None, :] < lengths[:, None]
|
||||||
@ -326,7 +327,7 @@ def generate(model, prompt, tokenizer, args):
|
|||||||
print(s, flush=True)
|
print(s, flush=True)
|
||||||
|
|
||||||
|
|
||||||
def load_model(folder: str, dtype=mx.float32):
|
def load_model(folder: str, dtype=mx.float16):
|
||||||
model_path = Path(folder)
|
model_path = Path(folder)
|
||||||
tokenizer = Tokenizer(str(model_path / "tokenizer.model"))
|
tokenizer = Tokenizer(str(model_path / "tokenizer.model"))
|
||||||
with open(model_path / "params.json", "r") as f:
|
with open(model_path / "params.json", "r") as f:
|
||||||
|
@ -47,7 +47,7 @@ class LoRALinear(nn.Module):
|
|||||||
self.lora_b = mx.zeros(shape=(lora_rank, output_dims))
|
self.lora_b = mx.zeros(shape=(lora_rank, output_dims))
|
||||||
|
|
||||||
def __call__(self, x):
|
def __call__(self, x):
|
||||||
y = self.linear(x)
|
y = self.linear(x.astype(self.linear.weight.dtype)).astype(x.dtype)
|
||||||
z = (x @ self.lora_a) @ self.lora_b
|
z = (x @ self.lora_a) @ self.lora_b
|
||||||
return y + 2.0 * z
|
return y + 2.0 * z
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user