From 25ec2d8c4496be68acf7e0c9ea1ae4269e1a2a19 Mon Sep 17 00:00:00 2001 From: Angelos Katharopoulos Date: Sun, 5 Jan 2025 22:26:05 -0800 Subject: [PATCH 1/4] Change the eos-token argument for mlx_lm.generate (#1176) --- llms/mlx_lm/generate.py | 9 +++++---- llms/mlx_lm/tokenizer_utils.py | 12 ++++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py index 1ea66384..3301edae 100644 --- a/llms/mlx_lm/generate.py +++ b/llms/mlx_lm/generate.py @@ -43,10 +43,11 @@ def setup_arg_parser(): help="Optional path for the trained adapter weights and config.", ) parser.add_argument( - "--eos-token", + "--extra-eos-token", type=str, default=None, - help="End of sequence token for tokenizer", + nargs="+", + help="Add tokens in the list of eos tokens that stop generation.", ) parser.add_argument( "--system-prompt", @@ -161,8 +162,6 @@ def main(): {} if not using_cache else json.loads(metadata["tokenizer_config"]) ) tokenizer_config["trust_remote_code"] = True - if args.eos_token is not None: - tokenizer_config["eos_token"] = args.eos_token model_path = args.model if using_cache: @@ -181,6 +180,8 @@ def main(): adapter_path=args.adapter_path, tokenizer_config=tokenizer_config, ) + for eos_token in args.extra_eos_token: + tokenizer.add_eos_token(eos_token) if args.use_default_chat_template: if tokenizer.chat_template is None: diff --git a/llms/mlx_lm/tokenizer_utils.py b/llms/mlx_lm/tokenizer_utils.py index ca3d6c06..1b5bdd77 100644 --- a/llms/mlx_lm/tokenizer_utils.py +++ b/llms/mlx_lm/tokenizer_utils.py @@ -266,6 +266,18 @@ class TokenizerWrapper: else {tokenizer.eos_token_id} ) + def add_eos_token(self, token: str): + token_id = None + try: + token_id = int(token) + except ValueError: + token_id = self._tokenizer.convert_tokens_to_ids(token) + + if token_id is None: + raise ValueError(f"'{token}' is not a token for this tokenizer") + + self._eos_token_ids.add(token_id) + def __getattr__(self, attr): if attr == "detokenizer": return self._detokenizer From f2619f507c7dcde70410cc2cbb1d4715476d79ee Mon Sep 17 00:00:00 2001 From: Chime Ogbuji Date: Mon, 6 Jan 2025 10:58:43 -0500 Subject: [PATCH 2/4] Add support for fewshot and apply chat template lm_eval functionality (#1180) * Add support for multiturn fewshot examples and chat templates Added two new arguments to the evaluation script: `--fewshot-as-multiturn` and `--apply-chat-template` which correspond to lm_eval options of similar names and are very often used to ensure apples-to-apples comparisons of lm_evaluation results * Add HF overrides for methods needed by added options * don't add duplicate bos --------- Co-authored-by: Awni Hannun --- .circleci/config.yml | 2 +- llms/mlx_lm/evaluate.py | 59 +++++++++++++++++++++++++++++------------ llms/setup.py | 4 +-- 3 files changed, 45 insertions(+), 20 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index cecd2d57..8367281e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,7 +32,7 @@ jobs: pip install --upgrade pip pip install unittest-xml-reporting cd llms/ - pip install -e ".[testing]" + pip install -e ".[test]" - run: name: Run Python tests command: | diff --git a/llms/mlx_lm/evaluate.py b/llms/mlx_lm/evaluate.py index bf7bf4d4..ca5e83bb 100644 --- a/llms/mlx_lm/evaluate.py +++ b/llms/mlx_lm/evaluate.py @@ -77,15 +77,19 @@ class MLXLM(LM): path_or_hf_repo: str, batch_size: int = 16, max_tokens: Optional[int] = None, + use_chat_template: Optional[bool] = None, ) -> None: super().__init__() self._batch_size = batch_size - self._model, self._tokenizer = load(path_or_hf_repo) - self._max_tokens = max_tokens or self._tokenizer.model_max_length + self._model, self.tokenizer = load(path_or_hf_repo) + self._max_tokens = max_tokens or self.tokenizer.model_max_length + self.use_chat_template = use_chat_template or ( + self.tokenizer.chat_template is not None + ) def _score_fn(self, inputs, tokenize=True, step_size=32): if tokenize: - inputs = self._tokenizer.encode(inputs) + inputs = self._tokenize(inputs) inputs = _pad_inputs(inputs, self._max_tokens, truncate=False) inputs = mx.array(inputs) inputs, targets = inputs[..., :-1], inputs[..., 1:] @@ -149,7 +153,12 @@ class MLXLM(LM): return results def _tokenize(self, texts): - return [tuple(self._tokenizer.encode(t)) for t in texts] + return [ + tuple( + self.tokenizer.encode(t, add_special_tokens=not self.use_chat_template) + ) + for t in texts + ] def loglikelihood(self, requests) -> list[tuple[float, bool]]: """Compute log-likelihood of generating a continuation from a context. @@ -221,6 +230,9 @@ class MLXLM(LM): ) return [(r[0], r[1] == r[2]) for r in results] + tokenizer_name = lm_eval.models.huggingface.HFLM.tokenizer_name + apply_chat_template = lm_eval.models.huggingface.HFLM.apply_chat_template + def loglikelihood_rolling(self, requests) -> list[float]: """Compute full log-likelihood of a string, with no truncation, for perplexity computation - We will use the full max context length of the model. @@ -283,21 +295,14 @@ class MLXLM(LM): completions = [] for context, until in tqdm(zip(contexts, untils), total=len(contexts)): - if self._tokenizer.chat_template is not None: - messages = [{"role": "user", "content": context}] - context = self._tokenizer.apply_chat_template( - messages, add_generation_prompt=True - ) - else: - context = self._tokenizer.encode(context) - + context = self._tokenize(context) max_tokens = min( self._max_tokens, - self._tokenizer.model_max_length - len(context), + self.tokenizer.model_max_length - len(context), ) text = "" for response in stream_generate( - self._model, self._tokenizer, prompt=context, max_tokens=max_tokens + self._model, self.tokenizer, prompt=context, max_tokens=max_tokens ): text += response.text if any(u in text for u in until): @@ -332,6 +337,21 @@ def main(): type=float, ) parser.add_argument("--seed", type=int, default=123, help="Random seed.") + parser.add_argument( + "--fewshot-as-multiturn", + action="store_true", + help="Whether to provide the fewshot examples as a multiturn " + "conversation or a single user turn.", + default=False, + ) + parser.add_argument( + "--apply-chat-template", + action=argparse.BooleanOptionalAction, + help="Specifies whether to apply a chat template to the prompt. If " + "the model has a chat template, this defaults to `True`, " + "otherwise `False`.", + default=None, + ) args = parser.parse_args() output_dir = Path(args.output_dir) @@ -342,18 +362,23 @@ def main(): mx.random.seed(args.seed) - lm = MLXLM(args.model, batch_size=args.batch_size, max_tokens=args.max_tokens) - + lm = MLXLM( + args.model, + batch_size=args.batch_size, + max_tokens=args.max_tokens, + use_chat_template=args.apply_chat_template, + ) results = lm_eval.simple_evaluate( model=lm, tasks=args.tasks, + fewshot_as_multiturn=args.fewshot_as_multiturn, + apply_chat_template=lm.use_chat_template, num_fewshot=args.num_shots, limit=args.limit, random_seed=args.seed, numpy_random_seed=args.seed, torch_random_seed=args.seed, fewshot_random_seed=args.seed, - apply_chat_template=True, ) model_name = args.model.replace("/", "_") diff --git a/llms/setup.py b/llms/setup.py index b88dcd33..e6fddbae 100644 --- a/llms/setup.py +++ b/llms/setup.py @@ -27,8 +27,8 @@ setup( packages=["mlx_lm", "mlx_lm.models", "mlx_lm.tuner"], python_requires=">=3.8", extras_require={ - "testing": ["datasets"], - "evaluation": ["lm-eval"], + "test": ["datasets"], + "evaluate": ["lm-eval", "tqdm"], }, entry_points={ "console_scripts": [ From 9183fe8b6d6b5e86cac0f47b54675f272c9f3591 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Mon, 6 Jan 2025 10:12:07 -0800 Subject: [PATCH 3/4] fix (#1192) --- llms/mlx_lm/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py index 3301edae..26481d6b 100644 --- a/llms/mlx_lm/generate.py +++ b/llms/mlx_lm/generate.py @@ -45,7 +45,7 @@ def setup_arg_parser(): parser.add_argument( "--extra-eos-token", type=str, - default=None, + default=(), nargs="+", help="Add tokens in the list of eos tokens that stop generation.", ) From b8f0cacfa8dd08aaca7025351a7afddd481ca490 Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Tue, 7 Jan 2025 18:18:31 +0100 Subject: [PATCH 4/4] Use upload_large_folder (#1193) --- llms/mlx_lm/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index 0c35d07f..ad79349e 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -673,12 +673,10 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str): api = HfApi() api.create_repo(repo_id=upload_repo, exist_ok=True) - api.upload_folder( + api.upload_large_folder( folder_path=path, repo_id=upload_repo, repo_type="model", - multi_commits=True, - multi_commits_verbose=True, ) print(f"Upload successful, go to https://huggingface.co/{upload_repo} for details.")