[mlx-lm] Use top p in server (#1144)

* use top p in server

* couple other fixes
This commit is contained in:
Awni Hannun
2024-12-12 11:12:21 -08:00
committed by GitHub
parent 19abf3dcaa
commit 2ba0e36683
3 changed files with 5 additions and 2 deletions

View File

@@ -190,7 +190,7 @@ def make_repetition_penalty(penalty: float, context_size: int = 20):
Callable[[mx.array, List[int]], mx.array]:
The repetition penalty processor.
"""
if penalty < 0 or not isinstance(penalty, float):
if penalty < 0 or not isinstance(penalty, (int, float)):
raise ValueError(f"penalty must be a non-negative float, got {penalty}")
def repetition_penalty_processor(tokens, logits):