This commit is contained in:
Goekdeniz-Guelmez 2025-03-01 12:42:39 +01:00
parent 6a3912be7f
commit bb261aadcb

View File

@ -2,18 +2,9 @@ import itertools
import json import json
import types import types
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Union from typing import Any, Dict, List, Union
from transformers import PreTrainedTokenizer from transformers import PreTrainedTokenizer
from typing import List, Dict, Union
from transformers import PreTrainedTokenizer
from typing import List, Dict, Union
from transformers import PreTrainedTokenizer
from typing import List, Dict, Union
from transformers import PreTrainedTokenizer
class ORPODataset: class ORPODataset:
def __init__( def __init__(
@ -368,7 +359,7 @@ def load_dataset(args, tokenizer: PreTrainedTokenizer):
train, valid, test = load_local_dataset(args, data_path, tokenizer, args) train, valid, test = load_local_dataset(args, data_path, tokenizer, args)
else: else:
print(f"Loading Hugging Face dataset {args.data}.") print(f"Loading Hugging Face dataset {args.data}.")
train, valid, test = load_hf_dataset(args.data, tokenizer, args) train, valid, test = load_hf_dataset(args, args.data, tokenizer, args)
if args.train and len(train) == 0: if args.train and len(train) == 0:
raise ValueError( raise ValueError(