From d352074e739183e8d8a25d99f148eae39086d145 Mon Sep 17 00:00:00 2001
From: Chime Ogbuji <chimezie@gmail.com>
Date: Mon, 23 Dec 2024 10:45:17 -0500
Subject: [PATCH] Add support for multiturn fewshot examples and chat templates

Added two new arguments to the evaluation script: `--fewshot-as-multiturn` and `--apply-chat-template` which correspond to lm_eval options of similar names and are very often used to ensure apples-to-apples comparisons of lm_evaluation results
---
 llms/mlx_lm/evaluate.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llms/mlx_lm/evaluate.py b/llms/mlx_lm/evaluate.py
index bf7bf4d4..850055f6 100644
--- a/llms/mlx_lm/evaluate.py
+++ b/llms/mlx_lm/evaluate.py
@@ -332,6 +332,18 @@ def main():
         type=float,
     )
     parser.add_argument("--seed", type=int, default=123, help="Random seed.")
+    parser.add_argument(
+        "--fewshot-as-multiturn",
+        action="store_true",
+        help="Whether to provide the fewshot examples as a multiturn conversation or a single user turn.",
+        default=False,
+    )
+    parser.add_argument(
+        "--apply-chat-template",
+        action="store_true",
+        help="Specifies whether to apply a chat template to the prompt",
+        default=False,
+    )
     args = parser.parse_args()
 
     output_dir = Path(args.output_dir)
@@ -347,6 +359,8 @@ def main():
     results = lm_eval.simple_evaluate(
         model=lm,
         tasks=args.tasks,
+        fewshot_as_multiturn=args.fewshot_as_multiturn,
+        apply_chat_template=args.apply_chat_template,
         num_fewshot=args.num_shots,
         limit=args.limit,
         random_seed=args.seed,