use top p in server

This commit is contained in:
Awni Hannun 2024-12-08 20:16:44 -08:00
parent 06af3c9b0e
commit 2277033a24
2 changed files with 2 additions and 2 deletions

View File

@ -190,7 +190,7 @@ def make_repetition_penalty(penalty: float, context_size: int = 20):
Callable[[mx.array, List[int]], mx.array]:
The repetition penalty processor.
"""
if penalty < 0 or not isinstance(penalty, float):
if penalty < 0 or not isinstance(penalty, (int, float)):
raise ValueError(f"penalty must be a non-negative float, got {penalty}")
def repetition_penalty_processor(tokens, logits):

View File

@ -465,7 +465,7 @@ class APIHandler(BaseHTTPRequestHandler):
text = ""
tic = time.perf_counter()
sampler = make_sampler(self.temperature)
sampler = make_sampler(self.temperature, top_p=self.top_p)
logits_processors = make_logits_processors(
self.logit_bias, self.repetition_penalty, self.repetition_context_size
)