Fix prompt cache for models without chat template (#1250)

* fix deepseek sharding (#1242)

* fix prompt cache with no chat template
This commit is contained in:
Awni Hannun
2025-02-06 11:10:58 -08:00
committed by GitHub
parent 747c08e202
commit 52c41b5b5a
3 changed files with 7 additions and 7 deletions

View File

@@ -199,7 +199,7 @@ def main():
if tokenizer.chat_template is None:
tokenizer.chat_template = tokenizer.default_chat_template
elif using_cache:
tokenizer.chat_template = metadata["chat_template"]
tokenizer.chat_template = json.loads(metadata["chat_template"])
prompt = args.prompt.replace("\\n", "\n").replace("\\t", "\t")
prompt = sys.stdin.read() if prompt == "-" else prompt