Update LLM generation docs to use chat template (#973)

* fix docs

* add template to model cards as well

* revert

* version
This commit is contained in:
Awni Hannun 2024-09-07 06:06:15 -07:00 committed by GitHub
parent 324184d670
commit c3e3411756
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 24 additions and 3 deletions

View File

@ -29,7 +29,14 @@ from mlx_lm import load, generate
model, tokenizer = load("mlx-community/Mistral-7B-Instruct-v0.3-4bit") model, tokenizer = load("mlx-community/Mistral-7B-Instruct-v0.3-4bit")
response = generate(model, tokenizer, prompt="hello", verbose=True) prompt = "Write a story about Einstein"
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
response = generate(model, tokenizer, prompt=prompt, verbose=True)
``` ```
To see a description of all the arguments you can do: To see a description of all the arguments you can do:
@ -79,6 +86,11 @@ model, tokenizer = load(repo)
prompt = "Write a story about Einstein" prompt = "Write a story about Einstein"
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
for t in stream_generate(model, tokenizer, prompt, max_tokens=512): for t in stream_generate(model, tokenizer, prompt, max_tokens=512):
print(t, end="", flush=True) print(t, end="", flush=True)
print() print()

View File

@ -1,3 +1,3 @@
# Copyright © 2023-2024 Apple Inc. # Copyright © 2023-2024 Apple Inc.
__version__ = "0.18.1" __version__ = "0.18.2"

View File

@ -577,7 +577,16 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str):
from mlx_lm import load, generate from mlx_lm import load, generate
model, tokenizer = load("{upload_repo}") model, tokenizer = load("{upload_repo}")
response = generate(model, tokenizer, prompt="hello", verbose=True)
prompt="hello"
if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
messages = [{"role": "user", "content": prompt}]
prompt = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
response = generate(model, tokenizer, prompt=prompt, verbose=True)
``` ```
""" """
) )