From d352074e739183e8d8a25d99f148eae39086d145 Mon Sep 17 00:00:00 2001 From: Chime Ogbuji Date: Mon, 23 Dec 2024 10:45:17 -0500 Subject: [PATCH] Add support for multiturn fewshot examples and chat templates Added two new arguments to the evaluation script: `--fewshot-as-multiturn` and `--apply-chat-template` which correspond to lm_eval options of similar names and are very often used to ensure apples-to-apples comparisons of lm_evaluation results --- llms/mlx_lm/evaluate.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/llms/mlx_lm/evaluate.py b/llms/mlx_lm/evaluate.py index bf7bf4d4..850055f6 100644 --- a/llms/mlx_lm/evaluate.py +++ b/llms/mlx_lm/evaluate.py @@ -332,6 +332,18 @@ def main(): type=float, ) parser.add_argument("--seed", type=int, default=123, help="Random seed.") + parser.add_argument( + "--fewshot-as-multiturn", + action="store_true", + help="Whether to provide the fewshot examples as a multiturn conversation or a single user turn.", + default=False, + ) + parser.add_argument( + "--apply-chat-template", + action="store_true", + help="Specifies whether to apply a chat template to the prompt", + default=False, + ) args = parser.parse_args() output_dir = Path(args.output_dir) @@ -347,6 +359,8 @@ def main(): results = lm_eval.simple_evaluate( model=lm, tasks=args.tasks, + fewshot_as_multiturn=args.fewshot_as_multiturn, + apply_chat_template=args.apply_chat_template, num_fewshot=args.num_shots, limit=args.limit, random_seed=args.seed,