From e5aa2c3b5d7ee6416e0f730d4e4422c7116fbb56 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Mon, 10 Feb 2025 17:51:14 +0100 Subject: [PATCH] nits --- llms/mlx_lm/tuner/grpo_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/llms/mlx_lm/tuner/grpo_trainer.py b/llms/mlx_lm/tuner/grpo_trainer.py index ca6192ad..36b44ac2 100644 --- a/llms/mlx_lm/tuner/grpo_trainer.py +++ b/llms/mlx_lm/tuner/grpo_trainer.py @@ -322,6 +322,7 @@ def grpo_loss( answer=expanded_answers )) reward_metrics[f'{func_name}_mean'] = mx.mean(func_rewards) + reward_metrics[f'{func_name}_std'] = mx.std(func_rewards) metrics = { 'total_rewards_mean': mx.mean(rewards),