mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-10-23 22:18:06 +08:00
Add /v1/models endpoint to mlx_lm.server (#984)
* Add 'models' endpoint to server * Add test for new 'models' server endpoint * Check hf_cache for mlx models * update tests to check hf_cache for models * simplify test * doc --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
# Copyright © 2024 Apple Inc.
|
||||
|
||||
import http
|
||||
import json
|
||||
import threading
|
||||
import unittest
|
||||
|
||||
@@ -77,6 +79,19 @@ class TestServer(unittest.TestCase):
|
||||
self.assertIn("id", response_body)
|
||||
self.assertIn("choices", response_body)
|
||||
|
||||
def test_handle_models(self):
|
||||
url = f"http://localhost:{self.port}/v1/models"
|
||||
response = requests.get(url)
|
||||
self.assertEqual(response.status_code, 200)
|
||||
response_body = json.loads(response.text)
|
||||
self.assertEqual(response_body["object"], "list")
|
||||
self.assertIsInstance(response_body["data"], list)
|
||||
self.assertGreater(len(response_body["data"]), 0)
|
||||
model = response_body["data"][0]
|
||||
self.assertIn("id", model)
|
||||
self.assertEqual(model["object"], "model")
|
||||
self.assertIn("created", model)
|
||||
|
||||
def test_sequence_overlap(self):
|
||||
from mlx_lm.server import sequence_overlap
|
||||
|
||||
|
Reference in New Issue
Block a user