mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-10-23 22:18:06 +08:00
use lower precision base weights
This commit is contained in:
@@ -184,6 +184,7 @@ def load(args):
|
||||
def loss(model, inputs, targets, lengths):
|
||||
# Run model on inputs
|
||||
logits, _ = model(inputs)
|
||||
logits = logits.astype(mx.float32)
|
||||
|
||||
# Mask padding tokens
|
||||
length_mask = mx.arange(inputs.shape[1])[None, :] < lengths[:, None]
|
||||
@@ -326,7 +327,7 @@ def generate(model, prompt, tokenizer, args):
|
||||
print(s, flush=True)
|
||||
|
||||
|
||||
def load_model(folder: str, dtype=mx.float32):
|
||||
def load_model(folder: str, dtype=mx.float16):
|
||||
model_path = Path(folder)
|
||||
tokenizer = Tokenizer(str(model_path / "tokenizer.model"))
|
||||
with open(model_path / "params.json", "r") as f:
|
||||
|
Reference in New Issue
Block a user