This commit is contained in:
Goekdeniz-Guelmez 2025-03-01 12:42:39 +01:00
parent 6a3912be7f
commit bb261aadcb

View File

@ -2,18 +2,9 @@ import itertools
import json
import types
from pathlib import Path
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Union
from transformers import PreTrainedTokenizer
from typing import List, Dict, Union
from transformers import PreTrainedTokenizer
from typing import List, Dict, Union
from transformers import PreTrainedTokenizer
from typing import List, Dict, Union
from transformers import PreTrainedTokenizer
class ORPODataset:
def __init__(
@ -368,7 +359,7 @@ def load_dataset(args, tokenizer: PreTrainedTokenizer):
train, valid, test = load_local_dataset(args, data_path, tokenizer, args)
else:
print(f"Loading Hugging Face dataset {args.data}.")
train, valid, test = load_hf_dataset(args.data, tokenizer, args)
train, valid, test = load_hf_dataset(args, args.data, tokenizer, args)
if args.train and len(train) == 0:
raise ValueError(