mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 04:14:38 +08:00
Fix prompt cache for models without chat template (#1250)
* fix deepseek sharding (#1242) * fix prompt cache with no chat template
This commit is contained in:
@@ -199,7 +199,7 @@ def main():
|
||||
if tokenizer.chat_template is None:
|
||||
tokenizer.chat_template = tokenizer.default_chat_template
|
||||
elif using_cache:
|
||||
tokenizer.chat_template = metadata["chat_template"]
|
||||
tokenizer.chat_template = json.loads(metadata["chat_template"])
|
||||
|
||||
prompt = args.prompt.replace("\\n", "\n").replace("\\t", "\t")
|
||||
prompt = sys.stdin.read() if prompt == "-" else prompt
|
||||
|
Reference in New Issue
Block a user