mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
Make it easier to know in which file we have bad JSON data (#458)
* Make it easier to know which file we have bad JSON data in. * Use a loop rather than repeat code sections. I previously had these as separate cut-n-drooled sections of code. This change makes it a clean loop. Co-authored-by: Awni Hannun <awni.hannun@gmail.com> * Small fix to previous code suggestion to restore a missing variable. --------- Co-authored-by: Awni Hannun <awni.hannun@gmail.com>
This commit is contained in:
parent
88458c4e40
commit
8c9148a8fd
11
lora/lora.py
11
lora/lora.py
@ -138,8 +138,17 @@ class Dataset:
|
||||
|
||||
|
||||
def load(args):
|
||||
def load_and_check(name):
|
||||
dataset_path = Path(args.data) / f"{name}.jsonl"
|
||||
try:
|
||||
train = Dataset(dataset_path)
|
||||
except Exception as e:
|
||||
print(f"Unable to build dataset {dataset_path} ({e})")
|
||||
raise
|
||||
|
||||
names = ("train", "valid", "test")
|
||||
train, valid, test = (Dataset(Path(args.data) / f"{n}.jsonl") for n in names)
|
||||
train, valid, test = (load_and_check(n) for n in names)
|
||||
|
||||
if args.train and len(train) == 0:
|
||||
raise ValueError(
|
||||
"Training set not found or empty. Must provide training set for fine-tuning."
|
||||
|
Loading…
Reference in New Issue
Block a user