[mlx-lm] Use top p in server (#1144)

* use top p in server

* couple other fixes
This commit is contained in:
Awni Hannun
2024-12-12 11:12:21 -08:00
committed by GitHub
parent 19abf3dcaa
commit 2ba0e36683
3 changed files with 5 additions and 2 deletions

View File

@@ -465,7 +465,7 @@ class APIHandler(BaseHTTPRequestHandler):
text = ""
tic = time.perf_counter()
sampler = make_sampler(self.temperature)
sampler = make_sampler(self.temperature, top_p=self.top_p)
logits_processors = make_logits_processors(
self.logit_bias, self.repetition_penalty, self.repetition_context_size
)