updates

2025-12-16 02:08:55 +08:00 · 2025-02-12 11:07:53 +01:00
parent c42e858d7e
commit e33d9d509b
3 changed files with 70 additions and 111 deletions
--- a/llms/mlx_lm/tuner/datasets.py
+++ b/llms/mlx_lm/tuner/datasets.py
@@ -2,9 +2,8 @@ import itertools
 import json
 import types
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Tuple

-from .utils import GRPOExample
 from transformers import PreTrainedTokenizer


@@ -12,7 +11,7 @@ class GRPODataset:
    """
    Dataset wrapper for GRPO training data.
    Each example should have a 'prompt' and 'answer' field.
-    Returns data as GRPOExample instances.
+    Returns data in (prompt_tokens, answer_tokens, prompt_str, answer_str) tuple format.
    """
    def __init__(
        self,
@@ -23,40 +22,33 @@ class GRPODataset:
        use_chat_template: bool = False,
        use_prompt: bool = False
    ):
-        self._data: List[GRPOExample] = []
+        self._data = []
        for item in data:
            prompt_str = str(item[prompt_key])
            answer_str = str(item[answer_key])
-            
            if use_chat_template:
                prompt_tokens = tokenizer.apply_chat_template(
                    [
                        {'role': 'system', 'content': """A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
-                        The assistantfirst thinks about the reasoning process in the mind and then provides the user with the answer.
-                        The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>."""},
-                        {'role': 'user', 'content': prompt_str}
+                The assistantfirst thinks about the reasoning process in the mind and then provides the user with the answer.
+                The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>."""},
+                    {'role': 'user', 'content': prompt_str}
                    ],
                )
                answer_tokens = tokenizer.encode(answer_str)
            else:
                if use_prompt:
                    prompt_tokens = tokenizer.encode(f"""A conversation between User and Assistant. The user asks a question, and the Assistant solves it.
-                    The assistantfirst thinks about the reasoning process in the mind and then provides the user with the answer.
-                    The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.
-                    User: {prompt_str} Assistant: """)
+            The assistantfirst thinks about the reasoning process in the mind and then provides the user with the answer.
+            The reasoning process and answer are enclosed within <think> </think> and <answer> </answer> tags, respectively, i.e., <think> reasoning process here </think><answer> answer here </answer>.
+            User: {prompt_str} Assistant: """)
                else:
                    prompt_tokens = tokenizer.encode(prompt_str)
                answer_tokens = tokenizer.encode(answer_str)
-            
-            self._data.append(GRPOExample(
-                prompt_tokens=prompt_tokens,
-                answer_tokens=answer_tokens,
-                prompt_text=prompt_str,
-                answer_text=answer_str
-            ))
+            self._data.append((prompt_tokens, answer_tokens, prompt_str, answer_str))

-    def __getitem__(self, idx: int) -> GRPOExample:
-        """Returns a GRPOExample instance."""
+    def __getitem__(self, idx: int) -> Tuple[List[int], List[int], str, str]:
+        """Returns a (prompt_tokens, answer_tokens, prompt_str, answer_str) tuple."""
        return self._data[idx]

    def __len__(self) -> int:
@@ -318,7 +310,7 @@ def load_dataset(args, tokenizer: PreTrainedTokenizer):
            train, valid, test = load_local_dataset(args, data_path, tokenizer, args)
        else:
            print(f"Loading Hugging Face dataset {args.data}.")
-            train, valid, test = load_hf_dataset(args, args.data, tokenizer, args)
+            train, valid, test = load_hf_dataset(args.data, tokenizer, args)

    if args.train and len(train) == 0:
        raise ValueError(