diff --git a/llms/README.md b/llms/README.md index 79f26d41..b8e1914d 100644 --- a/llms/README.md +++ b/llms/README.md @@ -29,7 +29,14 @@ from mlx_lm import load, generate model, tokenizer = load("mlx-community/Mistral-7B-Instruct-v0.3-4bit") -response = generate(model, tokenizer, prompt="hello", verbose=True) +prompt = "Write a story about Einstein" + +messages = [{"role": "user", "content": prompt}] +prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True +) + +response = generate(model, tokenizer, prompt=prompt, verbose=True) ``` To see a description of all the arguments you can do: @@ -79,6 +86,11 @@ model, tokenizer = load(repo) prompt = "Write a story about Einstein" +messages = [{"role": "user", "content": prompt}] +prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True +) + for t in stream_generate(model, tokenizer, prompt, max_tokens=512): print(t, end="", flush=True) print() diff --git a/llms/mlx_lm/_version.py b/llms/mlx_lm/_version.py index a2eb9a25..8110c823 100644 --- a/llms/mlx_lm/_version.py +++ b/llms/mlx_lm/_version.py @@ -1,3 +1,3 @@ # Copyright © 2023-2024 Apple Inc. -__version__ = "0.18.1" +__version__ = "0.18.2" diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index eee28c9c..ad9b3221 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -577,7 +577,16 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str): from mlx_lm import load, generate model, tokenizer = load("{upload_repo}") - response = generate(model, tokenizer, prompt="hello", verbose=True) + + prompt="hello" + + if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None: + messages = [{"role": "user", "content": prompt}] + prompt = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + response = generate(model, tokenizer, prompt=prompt, verbose=True) ``` """ )