mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-30 02:53:41 +08:00
Fix object property value in mlx_lm.server chat completions response to match OpenAI spec
These were "chat.completions" and "chat.completions.chunk" but should be "chat.completion" and "chat.completion.chunk" for compatibility with clients expecting an OpenAI API. In particular, this solves a problem in which aider 0.64.1 reports hitting a token limit on any completion request, no matter how small, despite apparently correct counts in the usage property. Refer to: https://platform.openai.com/docs/api-reference/chat/object > object string > The object type, which is always chat.completion. https://platform.openai.com/docs/api-reference/chat/streaming > object string > The object type, which is always chat.completion.chunk.
This commit is contained in:
parent
0f135396ae
commit
ec494a97ec
@ -92,7 +92,7 @@ curl localhost:8080/v1/chat/completions \
|
|||||||
|
|
||||||
- `system_fingerprint`: A unique identifier for the system.
|
- `system_fingerprint`: A unique identifier for the system.
|
||||||
|
|
||||||
- `object`: Any of "chat.completions", "chat.completions.chunk" (for
|
- `object`: Any of "chat.completion", "chat.completion.chunk" (for
|
||||||
streaming), or "text.completion".
|
streaming), or "text.completion".
|
||||||
|
|
||||||
- `model`: The model repo or path (e.g. `"mlx-community/Llama-3.2-3B-Instruct-4bit"`).
|
- `model`: The model repo or path (e.g. `"mlx-community/Llama-3.2-3B-Instruct-4bit"`).
|
||||||
|
@ -589,9 +589,7 @@ class APIHandler(BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
# Determine response type
|
# Determine response type
|
||||||
self.request_id = f"chatcmpl-{uuid.uuid4()}"
|
self.request_id = f"chatcmpl-{uuid.uuid4()}"
|
||||||
self.object_type = (
|
self.object_type = "chat.completion.chunk" if self.stream else "chat.completion"
|
||||||
"chat.completions.chunk" if self.stream else "chat.completions"
|
|
||||||
)
|
|
||||||
if (
|
if (
|
||||||
hasattr(self.tokenizer, "apply_chat_template")
|
hasattr(self.tokenizer, "apply_chat_template")
|
||||||
and self.tokenizer.chat_template
|
and self.tokenizer.chat_template
|
||||||
|
Loading…
Reference in New Issue
Block a user