some cleanup, warnings, tests

This commit is contained in:
Awni Hannun
2024-11-22 16:51:22 -08:00
parent 9986787303
commit f82e49aad9
9 changed files with 57 additions and 61 deletions

View File

@@ -34,10 +34,11 @@ class TestTokenizers(unittest.TestCase):
detokenizer = tokenizer.detokenizer
detokenizer.reset()
text = ""
for t in tokens:
for e, t in enumerate(tokens):
detokenizer.add_token(t)
seg = detokenizer.last_segment
text += seg
self.assertEqual(detokenizer.tokens, tokens[: e + 1])
detokenizer.finalize()
text += detokenizer.last_segment
self.assertEqual(text, expected_text)