updates

2025-12-16 02:08:55 +08:00 · 2025-03-05 14:49:56 +01:00
parent 9a36452519
commit d723ddfeda
1 changed files with 1 additions and 0 deletions
--- a/llms/mlx_lm/tuner/grpo_trainer.py
+++ b/llms/mlx_lm/tuner/grpo_trainer.py
@@ -343,6 +343,7 @@ def grpo_loss(
        # Convert to tensor
        prompt_tensor = mx.array(padded_prompts)
        prompt_tensor = mx.stop_gradient(prompt_tensor) # Explicitly stop gradient on input
        try:
            mx.metal.clear_cache()