mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 04:14:38 +08:00
fix encoding with special tokens + chat template (#1189)
This commit is contained in:
@@ -36,7 +36,8 @@ class TestDatasets(unittest.TestCase):
|
||||
data = {"text": "This is an example for the model."}
|
||||
self.save_data(4 * [data])
|
||||
args = types.SimpleNamespace(train=True, test=False, data=self.test_dir)
|
||||
train, valid, test = datasets.load_dataset(args, None)
|
||||
tokenizer = AutoTokenizer.from_pretrained(HF_MODEL_PATH)
|
||||
train, valid, test = datasets.load_dataset(args, tokenizer)
|
||||
self.assertEqual(len(train), 4)
|
||||
self.assertEqual(len(valid), 4)
|
||||
self.assertEqual(len(test), 0)
|
||||
@@ -82,6 +83,8 @@ class TestDatasets(unittest.TestCase):
|
||||
"name": "billsum",
|
||||
"prompt_feature": "text",
|
||||
"completion_feature": "summary",
|
||||
"train_split": "train[:2%]",
|
||||
"valid_split": "train[-2%:]",
|
||||
},
|
||||
test=False,
|
||||
train=True,
|
||||
|
Reference in New Issue
Block a user