From 6c2369e4b97f49fb5906ec46033497b39931b25d Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Sat, 7 Sep 2024 14:46:57 -0700 Subject: [PATCH] Fix bug in upload + docs nit (#981) * fix bug in upload + docs nit * nit --- llms/mlx_lm/LORA.md | 30 +++++++----------------------- llms/mlx_lm/utils.py | 2 +- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/llms/mlx_lm/LORA.md b/llms/mlx_lm/LORA.md index 2e739d0f..2d9a2553 100644 --- a/llms/mlx_lm/LORA.md +++ b/llms/mlx_lm/LORA.md @@ -166,44 +166,28 @@ Currently, `*.jsonl` files support three data formats: `chat`, `chat`: ```jsonl -{ - "messages": [ - { - "role": "system", - "content": "You are a helpful assistant." - }, - { - "role": "user", - "content": "Hello." - }, - { - "role": "assistant", - "content": "How can I assistant you today." - } - ] -} +{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello."}, {"role": "assistant", "content": "How can I assistant you today."}]} ``` `completions`: ```jsonl -{ - "prompt": "What is the capital of France?", - "completion": "Paris." -} +{"prompt": "What is the capital of France?", "completion": "Paris."} ``` `text`: ```jsonl -{ - "text": "This is an example for the model." -} +{"text": "This is an example for the model."} ``` Note, the format is automatically determined by the dataset. Note also, keys in each line not expected by the loader will be ignored. +> [!NOTE] +> Each example in the datasets must be on a single line. Do not put more than +> one example per line and do not split an example accross multiple lines. + ### Hugging Face Datasets To use Hugging Face datasets, first install the `datasets` package: diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index ad9b3221..b4a2ea51 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -581,7 +581,7 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str): prompt="hello" if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None: - messages = [{"role": "user", "content": prompt}] + messages = [{{"role": "user", "content": prompt}}] prompt = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True )