From 11a2daebf3ab9ed9f8d0d96c24a7d7e95456e653 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Sat, 7 Sep 2024 08:23:49 -0700 Subject: [PATCH] fix bug in upload + docs nit --- llms/mlx_lm/LORA.md | 30 +++++++----------------------- llms/mlx_lm/utils.py | 2 +- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/llms/mlx_lm/LORA.md b/llms/mlx_lm/LORA.md index 2e739d0f..3bf1ac68 100644 --- a/llms/mlx_lm/LORA.md +++ b/llms/mlx_lm/LORA.md @@ -166,44 +166,28 @@ Currently, `*.jsonl` files support three data formats: `chat`, `chat`: ```jsonl -{ - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, - { - "role": "user", - "content": "Hello." - }, - { - "role": "assistant", - "content": "How can I assistant you today." - } - ] -} +{"messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "Hello." }, { "role": "assistant", "content": "How can I assistant you today." } ] } ``` `completions`: ```jsonl -{ - "prompt": "What is the capital of France?", - "completion": "Paris." -} +{"prompt": "What is the capital of France?", "completion": "Paris."} ``` `text`: ```jsonl -{ - "text": "This is an example for the model." -} +{"text": "This is an example for the model."} ``` Note, the format is automatically determined by the dataset. Note also, keys in each line not expected by the loader will be ignored. +> [!NOTE] +> Each example in the datasets must be on a single line. Do not put more than +> one example per line and do not split an example accross multiple lines. + ### Hugging Face Datasets To use Hugging Face datasets, first install the `datasets` package: diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index ad9b3221..b4a2ea51 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -581,7 +581,7 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str): prompt="hello" if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None: - messages = [{"role": "user", "content": prompt}] + messages = [{{"role": "user", "content": prompt}}] prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True )