diff --git a/llms/mlx_lm/tuner/trainer.py b/llms/mlx_lm/tuner/trainer.py index feecf523..24fcc5c6 100644 --- a/llms/mlx_lm/tuner/trainer.py +++ b/llms/mlx_lm/tuner/trainer.py @@ -92,6 +92,12 @@ def iterate_batches(dataset, tokenizer, batch_size, max_seq_length, train=False) for i in indices: # Encode batch batch = [tokenizer.encode(dataset[j]) for j in batch_idx[i]] + for b in batch: + if b[-1] == tokenizer.eos_token_id: + print("[WARNING] Example already has an EOS token appended") + else: + b.append(tokenizer.eos_token_id) + lengths = [len(x) for x in batch] if max(lengths) > max_seq_length: diff --git a/llms/mlx_lm/version.py b/llms/mlx_lm/version.py index 086e3505..88c3e75e 100644 --- a/llms/mlx_lm/version.py +++ b/llms/mlx_lm/version.py @@ -1,3 +1,3 @@ # Copyright © 2023-2024 Apple Inc. -__version__ = "0.14.2" +__version__ = "0.15.0"