mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 09:21:18 +08:00
Add eos token to lora fine-tunes (#818)
* add eos token to lora fine-tunes * Comment
This commit is contained in:
parent
3cc58e17fb
commit
d8b073e3a7
@ -92,6 +92,12 @@ def iterate_batches(dataset, tokenizer, batch_size, max_seq_length, train=False)
|
|||||||
for i in indices:
|
for i in indices:
|
||||||
# Encode batch
|
# Encode batch
|
||||||
batch = [tokenizer.encode(dataset[j]) for j in batch_idx[i]]
|
batch = [tokenizer.encode(dataset[j]) for j in batch_idx[i]]
|
||||||
|
for b in batch:
|
||||||
|
if b[-1] == tokenizer.eos_token_id:
|
||||||
|
print("[WARNING] Example already has an EOS token appended")
|
||||||
|
else:
|
||||||
|
b.append(tokenizer.eos_token_id)
|
||||||
|
|
||||||
lengths = [len(x) for x in batch]
|
lengths = [len(x) for x in batch]
|
||||||
|
|
||||||
if max(lengths) > max_seq_length:
|
if max(lengths) > max_seq_length:
|
||||||
|
@ -1,3 +1,3 @@
|
|||||||
# Copyright © 2023-2024 Apple Inc.
|
# Copyright © 2023-2024 Apple Inc.
|
||||||
|
|
||||||
__version__ = "0.14.2"
|
__version__ = "0.15.0"
|
||||||
|
Loading…
Reference in New Issue
Block a user