mlx-examples/llms/a.py
2024-10-09 15:13:12 -04:00

27 lines
816 B
Python

import mlx_lm
# model, tokenizer = mlx_lm.load("mlx-community/SmolLM-1.7B-Instruct-fp16")
model, tokenizer = mlx_lm.load("/Users/llwu/models/mlx/Qwen2-0.5B-8bit-Instruct")
draft_model, draft_tokenizer = mlx_lm.load("mlx-community/SmolLM-135M-Instruct-4bit")
# https://github.com/hemingkx/Spec-Bench/blob/main/data/spec_bench/question.jsonl
prompt = "Develop a Python program that reads all the text files under a directory and returns top-5 words with the most number of occurrences."
prompt = tokenizer.apply_chat_template(
[{"role": "user", "content": prompt}],
tokenize=False,
add_generation_prompt=True,
)
mlx_lm.generate(
model,
tokenizer,
prompt=prompt,
verbose=True,
max_tokens=500,
temp=1.0,
min_p=0.1,
repetition_penalty=1.2,
# draft_model=draft_model,
)