mlx-examples/llms/d.py

12 lines
283 B
Python
Raw Normal View History

import mlx_lm
model, tokenizer = mlx_lm.load("/Users/llwu/models/mlx/Meta-Llama-3.1-8B-4bit")
for s in mlx_lm.stream_generate(
model,
tokenizer,
prompt=["Meta Llama 3.1 is a ", "Google Gemma 2 is a "],
max_tokens=20,
):
print(s[0].ljust(30) + s[1], flush=True)