Add eos token to lora fine-tunes (#818)

* add eos token to lora fine-tunes

* Comment
This commit is contained in:
Awni Hannun 2024-06-12 07:44:21 -07:00 committed by GitHub
parent 3cc58e17fb
commit d8b073e3a7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 1 deletions

View File

@ -92,6 +92,12 @@ def iterate_batches(dataset, tokenizer, batch_size, max_seq_length, train=False)
for i in indices: for i in indices:
# Encode batch # Encode batch
batch = [tokenizer.encode(dataset[j]) for j in batch_idx[i]] batch = [tokenizer.encode(dataset[j]) for j in batch_idx[i]]
for b in batch:
if b[-1] == tokenizer.eos_token_id:
print("[WARNING] Example already has an EOS token appended")
else:
b.append(tokenizer.eos_token_id)
lengths = [len(x) for x in batch] lengths = [len(x) for x in batch]
if max(lengths) > max_seq_length: if max(lengths) > max_seq_length:

View File

@ -1,3 +1,3 @@
# Copyright © 2023-2024 Apple Inc. # Copyright © 2023-2024 Apple Inc.
__version__ = "0.14.2" __version__ = "0.15.0"