diff --git a/transformer_lm/datasets.py b/transformer_lm/datasets.py index b741c220..f0e8ff51 100644 --- a/transformer_lm/datasets.py +++ b/transformer_lm/datasets.py @@ -40,7 +40,9 @@ def _load(save_dir, filenames): def wikitext(dataset="2", save_dir="/tmp"): """ Load the WikiText-* language modeling dataset: - https://paperswithcode.com/dataset/penn-treebank + https://paperswithcode.com/dataset/wikitext-2 + https://paperswithcode.com/dataset/wikitext-103 + """ if dataset not in ("2", "103"): raise ValueError(f'Dataset must be either "2" or "103", got {dataset}')