mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 09:21:18 +08:00
Fix bug in upload + docs nit (#981)
* fix bug in upload + docs nit * nit
This commit is contained in:
parent
c3e3411756
commit
6c2369e4b9
@ -166,44 +166,28 @@ Currently, `*.jsonl` files support three data formats: `chat`,
|
|||||||
`chat`:
|
`chat`:
|
||||||
|
|
||||||
```jsonl
|
```jsonl
|
||||||
{
|
{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello."}, {"role": "assistant", "content": "How can I assistant you today."}]}
|
||||||
"messages": [
|
|
||||||
{
|
|
||||||
"role": "system",
|
|
||||||
"content": "You are a helpful assistant."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"role": "user",
|
|
||||||
"content": "Hello."
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"role": "assistant",
|
|
||||||
"content": "How can I assistant you today."
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
```
|
||||||
|
|
||||||
`completions`:
|
`completions`:
|
||||||
|
|
||||||
```jsonl
|
```jsonl
|
||||||
{
|
{"prompt": "What is the capital of France?", "completion": "Paris."}
|
||||||
"prompt": "What is the capital of France?",
|
|
||||||
"completion": "Paris."
|
|
||||||
}
|
|
||||||
```
|
```
|
||||||
|
|
||||||
`text`:
|
`text`:
|
||||||
|
|
||||||
```jsonl
|
```jsonl
|
||||||
{
|
{"text": "This is an example for the model."}
|
||||||
"text": "This is an example for the model."
|
|
||||||
}
|
|
||||||
```
|
```
|
||||||
|
|
||||||
Note, the format is automatically determined by the dataset. Note also, keys in
|
Note, the format is automatically determined by the dataset. Note also, keys in
|
||||||
each line not expected by the loader will be ignored.
|
each line not expected by the loader will be ignored.
|
||||||
|
|
||||||
|
> [!NOTE]
|
||||||
|
> Each example in the datasets must be on a single line. Do not put more than
|
||||||
|
> one example per line and do not split an example accross multiple lines.
|
||||||
|
|
||||||
### Hugging Face Datasets
|
### Hugging Face Datasets
|
||||||
|
|
||||||
To use Hugging Face datasets, first install the `datasets` package:
|
To use Hugging Face datasets, first install the `datasets` package:
|
||||||
|
@ -581,7 +581,7 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str):
|
|||||||
prompt="hello"
|
prompt="hello"
|
||||||
|
|
||||||
if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
|
if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
|
||||||
messages = [{"role": "user", "content": prompt}]
|
messages = [{{"role": "user", "content": prompt}}]
|
||||||
prompt = tokenizer.apply_chat_template(
|
prompt = tokenizer.apply_chat_template(
|
||||||
messages, tokenize=False, add_generation_prompt=True
|
messages, tokenize=False, add_generation_prompt=True
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user