mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-12-16 02:08:55 +08:00
Merge branch 'ml-explore:main' into adding-support-for-mamba2
This commit is contained in:
@@ -159,8 +159,8 @@ def evaluate(
|
||||
ntokens += toks
|
||||
mx.eval(all_losses, ntokens)
|
||||
|
||||
all_losses = mx.distributed.all_sum(all_losses)
|
||||
ntokens = mx.distributed.all_sum(ntokens)
|
||||
all_losses = mx.distributed.all_sum(all_losses, stream=mx.cpu)
|
||||
ntokens = mx.distributed.all_sum(ntokens, stream=mx.cpu)
|
||||
|
||||
return (all_losses / ntokens).item()
|
||||
|
||||
@@ -272,9 +272,9 @@ def train(
|
||||
if it % args.steps_per_report == 0 or it == args.iters:
|
||||
stop = time.perf_counter()
|
||||
|
||||
train_loss = mx.distributed.all_sum(losses).item()
|
||||
train_loss = mx.distributed.all_sum(losses, stream=mx.cpu).item()
|
||||
train_loss /= steps * mx.distributed.init().size()
|
||||
n_tokens = mx.distributed.all_sum(n_tokens).item()
|
||||
n_tokens = mx.distributed.all_sum(n_tokens, stream=mx.cpu).item()
|
||||
learning_rate = optimizer.learning_rate.item()
|
||||
it_sec = args.steps_per_report / (stop - start)
|
||||
tokens_sec = float(n_tokens) / (stop - start)
|
||||
|
||||
@@ -100,6 +100,7 @@ def linear_to_lora_layers(
|
||||
"minicpm",
|
||||
"deepseek",
|
||||
"olmo2",
|
||||
"internlm3",
|
||||
]:
|
||||
keys = set(["self_attn.q_proj", "self_attn.v_proj"])
|
||||
if model.model_type in ["mixtral", "phimoe"]:
|
||||
|
||||
Reference in New Issue
Block a user