mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 01:46:09 +08:00
Change the last_segment logic in tokenizer utils
This commit is contained in:
parent
b444acfd69
commit
6ef5ca4ce5
@ -3,8 +3,6 @@ from functools import partial
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
REPLACEMENT_CHAR = "\ufffd"
|
||||
|
||||
|
||||
class StreamingDetokenizer:
|
||||
"""The streaming detokenizer interface so that we can detokenize one token at a time.
|
||||
@ -51,11 +49,9 @@ class StreamingDetokenizer:
|
||||
def last_segment(self):
|
||||
"""Return the last segment of readable text since last time this property was accessed."""
|
||||
text = self.text
|
||||
if text and text[-1] != REPLACEMENT_CHAR:
|
||||
segment = text[self.offset :]
|
||||
self.offset = len(text)
|
||||
return segment
|
||||
return ""
|
||||
segment = text[self.offset :]
|
||||
self.offset = len(text)
|
||||
return text
|
||||
|
||||
|
||||
class NaiveStreamingDetokenizer(StreamingDetokenizer):
|
||||
|
Loading…
Reference in New Issue
Block a user