last update, gn

This commit is contained in:
Goekdeniz-Guelmez
2025-02-24 22:20:07 +01:00
parent e4eac9c97b
commit 53185c7f3d
3 changed files with 30 additions and 16 deletions

View File

@@ -55,7 +55,7 @@ def r1_soft_format_reward_func(prompts: list, completions: list, answer: list, *
def r1_strict_format_reward_func(prompts: list, completions: list, answer: list, **kwargs) -> list[float]:
if not completions:
return [0.0] * len(prompts)
pattern = r"<think>\n.*?\n</think>\n<answer>*?</answer>"
pattern = r"<think> .*? </think><answer> .*? </answer>"
matches = [bool(re.search(pattern, r)) if r else False for r in completions]
return [0.5 if match else 0.0 for match in matches]