diff --git a/llms/README.md b/llms/README.md index 4f7451c1..3c5a0b3d 100644 --- a/llms/README.md +++ b/llms/README.md @@ -64,6 +64,29 @@ prompt = tokenizer.apply_chat_template( text = generate(model, tokenizer, prompt=prompt, verbose=True) ``` +To use temperature or other sampler arguments pass it like this + +``` +from mlx_lm import load, generate + +model, tokenizer = load("mlx-community/Mistral-7B-Instruct-v0.3-4bit") + +temp: 0.7 +top_p: 0.9 +top_k: 25 +sampler = make_sampler(temp, top_p,top_k) + +prompt = "Write a story about Ada Lovelace" + +messages = [{"role": "user", "content": prompt}] +prompt = tokenizer.apply_chat_template( + messages, add_generation_prompt=True +) + +text = generate(model, tokenizer, prompt=prompt, sampler, verbose=True) + +``` + To see a description of all the arguments you can do: ```