mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-07-19 09:31:13 +08:00
rebase loss calculation
This commit is contained in:
parent
a527cdb39b
commit
00712522ba
@ -317,7 +317,9 @@ def grpo_loss(
|
||||
per_token_loss = -((policy_ratio * advantages.reshape(-1, 1) - beta * kl_div) * length_mask)
|
||||
|
||||
# Average over tokens
|
||||
loss = per_token_loss.sum().mean()
|
||||
sequence_sums = per_token_loss.sum(axis=1)
|
||||
sequence_lengths = length_mask.sum(axis=1)
|
||||
loss = (sequence_sums / sequence_lengths).mean()
|
||||
|
||||
# Calculate mean KL divergence for metrics
|
||||
mean_kl = ((kl_div * length_mask).sum(axis=1) / length_mask.sum(axis=1)).mean()
|
||||
@ -343,7 +345,7 @@ def grpo_loss(
|
||||
}
|
||||
mx.metal.clear_cache()
|
||||
|
||||
return loss, length_mask.sum(axis=1).sum(), metrics
|
||||
return loss, sequence_lengths.sum(), metrics
|
||||
|
||||
|
||||
def iterate_grpo_batches(dataset, tokenizer, batch_size, max_seq_length, train=False):
|
||||
|
Loading…
Reference in New Issue
Block a user