Merge branch 'main' into adding-GRPO-training

2025-12-16 02:08:55 +08:00 · 2025-02-28 21:16:02 +01:00
parent 15d53279ae 845cd8c01e
commit 80e10a59d7
5 changed files with 55 additions and 29 deletions
--- a/llms/mlx_lm/lora.py
+++ b/llms/mlx_lm/lora.py
@@ -64,6 +64,7 @@ CONFIG_DEFAULTS = {
    "grad_checkpoint": False,
    "lr_schedule": None,
    "lora_parameters": {"rank": 8, "alpha": 16, "dropout": 0.0, "scale": 10.0},
+    "mask_prompt": False,

    # GRPO args
    "reference_model_path": None,
@@ -74,7 +75,7 @@ CONFIG_DEFAULTS = {
    "use_chat_template": False,
    "use_prompt": False,
    "temperature": 1.0,
-    "reward_weights": None,
+    "reward_weights": None
 }


@@ -112,7 +113,7 @@ def build_parser():
        "--mask-prompt",
        action="store_true",
        help="Mask the prompt in the loss when training",
-        default=False,
+        default=None,
    )

    parser.add_argument(