mlx-examples/llms/c.py
2024-10-09 15:13:12 -04:00

12 lines
246 B
Python

import mlx_lm
model, tokenizer = mlx_lm.load("/Users/llwu/models/mlx/Meta-Llama-3.1-8B-4bit")
for s in mlx_lm.stream_generate(
model,
tokenizer,
prompt="Meta Llama 3.1 is a ",
max_tokens=100,
):
print(s, end="", flush=True)