This commit is contained in:
Awni Hannun
2024-06-27 06:38:19 -07:00
committed by GitHub
parent 7979b84a9e
commit 9f10728145

View File

@@ -120,7 +120,7 @@ class SPMStreamingDetokenizer(StreamingDetokenizer):
self.trim_space = trim_space
# Extract the tokens in a list from id to text
self.tokenmap = [None] * len(tokenizer.vocab)
self.tokenmap = [""] * (max(tokenizer.vocab.values()) + 1)
for value, tokenid in tokenizer.vocab.items():
self.tokenmap[tokenid] = value