mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 09:21:18 +08:00

* Generate response with optional arguments * Reference response generation example * Include transformers and sentencepiece * Update example to run Mistral-7B-Instruct-v0.3 * Link to generation example * Style changes from pre-commit
41 lines
1.0 KiB
Python
41 lines
1.0 KiB
Python
from mlx_lm import generate, load
|
|
|
|
# Specify the checkpoint
|
|
checkpoint = "mistralai/Mistral-7B-Instruct-v0.3"
|
|
|
|
# Load the corresponding model and tokenizer
|
|
model, tokenizer = load(path_or_hf_repo=checkpoint)
|
|
|
|
# Specify the prompt and conversation history
|
|
prompt = "Why is the sky blue?"
|
|
conversation = [{"role": "user", "content": prompt}]
|
|
|
|
# Transform the prompt into the chat template
|
|
prompt = tokenizer.apply_chat_template(
|
|
conversation=conversation, tokenize=False, add_generation_prompt=True
|
|
)
|
|
|
|
# Specify the maximum number of tokens
|
|
max_tokens = 1_000
|
|
|
|
# Specify if tokens and timing information will be printed
|
|
verbose = True
|
|
|
|
# Some optional arguments for causal language model generation
|
|
generation_args = {
|
|
"temp": 0.7,
|
|
"repetition_penalty": 1.2,
|
|
"repetition_context_size": 20,
|
|
"top_p": 0.95,
|
|
}
|
|
|
|
# Generate a response with the specified settings
|
|
response = generate(
|
|
model=model,
|
|
tokenizer=tokenizer,
|
|
prompt=prompt,
|
|
max_tokens=max_tokens,
|
|
verbose=verbose,
|
|
**generation_args,
|
|
)
|