mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
34 lines
836 B
Python
34 lines
836 B
Python
# Copyright © 2024 Apple Inc.
|
|
|
|
from mlx_lm import generate, load
|
|
|
|
# Specify the checkpoint
|
|
checkpoint = "mistralai/Mistral-7B-Instruct-v0.3"
|
|
|
|
# Load the corresponding model and tokenizer
|
|
model, tokenizer = load(path_or_hf_repo=checkpoint)
|
|
|
|
# Specify the prompt and conversation history
|
|
prompt = "Why is the sky blue?"
|
|
conversation = [{"role": "user", "content": prompt}]
|
|
|
|
# Transform the prompt into the chat template
|
|
prompt = tokenizer.apply_chat_template(
|
|
conversation=conversation, add_generation_prompt=True
|
|
)
|
|
|
|
# Specify the maximum number of tokens
|
|
max_tokens = 1_000
|
|
|
|
# Specify if tokens and timing information will be printed
|
|
verbose = True
|
|
|
|
# Generate a response with the specified settings
|
|
response = generate(
|
|
model=model,
|
|
tokenizer=tokenizer,
|
|
prompt=prompt,
|
|
max_tokens=max_tokens,
|
|
verbose=verbose,
|
|
)
|