mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-07-03 07:11:13 +08:00
nits
This commit is contained in:
parent
f88e897019
commit
e5aa2c3b5d
@ -322,6 +322,7 @@ def grpo_loss(
|
|||||||
answer=expanded_answers
|
answer=expanded_answers
|
||||||
))
|
))
|
||||||
reward_metrics[f'{func_name}_mean'] = mx.mean(func_rewards)
|
reward_metrics[f'{func_name}_mean'] = mx.mean(func_rewards)
|
||||||
|
reward_metrics[f'{func_name}_std'] = mx.std(func_rewards)
|
||||||
|
|
||||||
metrics = {
|
metrics = {
|
||||||
'total_rewards_mean': mx.mean(rewards),
|
'total_rewards_mean': mx.mean(rewards),
|
||||||
|
Loading…
Reference in New Issue
Block a user