From 06ff47012f795ee8d3d54adae21868882a34d22d Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Tue, 11 Mar 2025 09:00:21 +0100 Subject: [PATCH] match pytoch imeplentation for loss calculation --- llms/mlx_lm/tuner/grpo_trainer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llms/mlx_lm/tuner/grpo_trainer.py b/llms/mlx_lm/tuner/grpo_trainer.py index a5307bd3..69603702 100644 --- a/llms/mlx_lm/tuner/grpo_trainer.py +++ b/llms/mlx_lm/tuner/grpo_trainer.py @@ -381,9 +381,7 @@ def grpo_loss( ) # Average over tokens - sequence_sums = per_token_loss.sum(axis=1) - sequence_lengths = length_mask.sum(axis=1) - loss = (sequence_sums / sequence_lengths).mean() + loss = (per_token_loss * length_mask).sum() / length_mask.sum() # Matches the pytorch implementaiton # Calculate mean KL divergence for metrics mean_kl = ((kl_div * length_mask).sum(axis=1) / length_mask.sum(axis=1)).mean() @@ -454,7 +452,7 @@ def grpo_loss( mx.metal.clear_cache() - return loss, sequence_lengths.sum(), metrics + return loss, length_mask.sum(axis=1).sum(), metrics def iterate_grpo_batches(dataset, batch_size, max_seq_length, train=False):