mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-12-16 02:08:55 +08:00
support kimi + more options in chat mode (#1312)
This commit is contained in:
@@ -65,12 +65,25 @@ def main():
|
||||
tokenizer_config={"trust_remote_code": True},
|
||||
)
|
||||
|
||||
print(f"[INFO] Starting chat session with {args.model}. To exit, enter 'q'.")
|
||||
def print_help():
|
||||
print("The command list:")
|
||||
print("- 'q' to exit")
|
||||
print("- 'r' to reset the chat")
|
||||
print("- 'h' to display these commands")
|
||||
|
||||
print(f"[INFO] Starting chat session with {args.model}.")
|
||||
print_help()
|
||||
prompt_cache = make_prompt_cache(model, args.max_kv_size)
|
||||
while True:
|
||||
query = input(">> ")
|
||||
if query == "q":
|
||||
break
|
||||
if query == "r":
|
||||
prompt_cache = make_prompt_cache(model, args.max_kv_size)
|
||||
continue
|
||||
if query == "h":
|
||||
print_help()
|
||||
continue
|
||||
messages = [{"role": "user", "content": query}]
|
||||
prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
|
||||
for response in stream_generate(
|
||||
|
||||
Reference in New Issue
Block a user