From d8b073e3a71a89b80d58f02f48cb17711642b2d1 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Wed, 12 Jun 2024 07:44:21 -0700 Subject: [PATCH] Add eos token to lora fine-tunes (#818) * add eos token to lora fine-tunes * Comment --- llms/mlx_lm/tuner/trainer.py | 6 ++++++ llms/mlx_lm/version.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/llms/mlx_lm/tuner/trainer.py b/llms/mlx_lm/tuner/trainer.py index feecf523..24fcc5c6 100644 --- a/llms/mlx_lm/tuner/trainer.py +++ b/llms/mlx_lm/tuner/trainer.py @@ -92,6 +92,12 @@ def iterate_batches(dataset, tokenizer, batch_size, max_seq_length, train=False) for i in indices: # Encode batch batch = [tokenizer.encode(dataset[j]) for j in batch_idx[i]] + for b in batch: + if b[-1] == tokenizer.eos_token_id: + print("[WARNING] Example already has an EOS token appended") + else: + b.append(tokenizer.eos_token_id) + lengths = [len(x) for x in batch] if max(lengths) > max_seq_length: diff --git a/llms/mlx_lm/version.py b/llms/mlx_lm/version.py index 086e3505..88c3e75e 100644 --- a/llms/mlx_lm/version.py +++ b/llms/mlx_lm/version.py @@ -1,3 +1,3 @@ # Copyright © 2023-2024 Apple Inc. -__version__ = "0.14.2" +__version__ = "0.15.0"