From 373dd6f2a290dfa45b3f41b455817eb1bf252d55 Mon Sep 17 00:00:00 2001 From: Matt Wronkiewicz Date: Tue, 19 Mar 2024 20:21:26 -0700 Subject: [PATCH] Set finish_reason in response (#592) --- llms/mlx_lm/server.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llms/mlx_lm/server.py b/llms/mlx_lm/server.py index 83df9e29..2540abd2 100644 --- a/llms/mlx_lm/server.py +++ b/llms/mlx_lm/server.py @@ -238,6 +238,7 @@ class APIHandler(BaseHTTPRequestHandler): A list of stop words passed to the stopping_criteria function """ tokens = [] + finish_reason = "length" for (token, _), _ in zip( generate_step( prompt=prompt, @@ -255,12 +256,13 @@ class APIHandler(BaseHTTPRequestHandler): tokens, stop_id_sequences, TOKENIZER.eos_token_id ) if stop_condition.stop_met: + finish_reason = "stop" if stop_condition.trim_length: tokens = tokens[: -stop_condition.trim_length] break text = TOKENIZER.decode(tokens) - response = self.generate_response(text, "stop", len(prompt), len(tokens)) + response = self.generate_response(text, finish_reason, len(prompt), len(tokens)) response_json = json.dumps(response).encode()