mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 06:54:39 +08:00
Synch use of special tokens with iterate_batches
This commit is contained in:
parent
f989401881
commit
6df285ef6c
@ -166,7 +166,7 @@ def iterate_completion_batches(
|
|||||||
for j in batch_idx[i]:
|
for j in batch_idx[i]:
|
||||||
prompt, completion = dataset.get_prompt_and_completion(j)
|
prompt, completion = dataset.get_prompt_and_completion(j)
|
||||||
prompt_lengths.append(input_length(prompt, completion, tokenizer))
|
prompt_lengths.append(input_length(prompt, completion, tokenizer))
|
||||||
full_sequence = tokenizer.encode(dataset[j], add_special_tokens=False)
|
full_sequence = tokenizer.encode(dataset[j])
|
||||||
if full_sequence[-1] != tokenizer.eos_token_id:
|
if full_sequence[-1] != tokenizer.eos_token_id:
|
||||||
full_sequence.append(tokenizer.eos_token_id)
|
full_sequence.append(tokenizer.eos_token_id)
|
||||||
batch.append(full_sequence)
|
batch.append(full_sequence)
|
||||||
|
Loading…
Reference in New Issue
Block a user