Fix prompt cache issue in server.py. Honestly I don't understand why this fixes it, but prompt_cache seems to become functional with this change.

This commit is contained in:
Jeonghyun Lee 2025-03-13 17:24:58 +09:00
parent 3e5baf583b
commit 4c60cb8ef9

View File

@ -452,17 +452,24 @@ class APIHandler(BaseHTTPRequestHandler):
def get_prompt_cache(self, prompt):
cache_len = len(self.prompt_cache.tokens)
# Check if the cache is valid for the current prompt
if (
self.prompt_cache.model_key != self.model_provider.model_key
or cache_len >= len(prompt)
or self.prompt_cache.tokens != prompt[:cache_len]
):
# Reinitialize the cache entirely
self.prompt_cache.model_key = self.model_provider.model_key
self.prompt_cache.cache = make_prompt_cache(self.model_provider.model)
# Reset the cache tokens to be empty because the cache was re-created
self.prompt_cache.tokens = []
new_prompt = prompt
else:
prompt = prompt[cache_len:]
self.prompt_cache.tokens.extend(prompt)
return prompt
# Use the already cached tokens; only process the tail of the prompt
new_prompt = prompt[cache_len:]
# Update the cache tokens with the new tokens being processed
self.prompt_cache.tokens.extend(new_prompt)
return new_prompt
def handle_completion(
self,