This commit is contained in:
Goekdeniz-Guelmez 2025-02-10 17:51:14 +01:00
parent f88e897019
commit e5aa2c3b5d

View File

@ -322,6 +322,7 @@ def grpo_loss(
answer=expanded_answers answer=expanded_answers
)) ))
reward_metrics[f'{func_name}_mean'] = mx.mean(func_rewards) reward_metrics[f'{func_name}_mean'] = mx.mean(func_rewards)
reward_metrics[f'{func_name}_std'] = mx.std(func_rewards)
metrics = { metrics = {
'total_rewards_mean': mx.mean(rewards), 'total_rewards_mean': mx.mean(rewards),