mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
fix long prompt generations (#1023)
This commit is contained in:
parent
fca087be49
commit
b7373cb44f
@ -239,8 +239,8 @@ def generate_step(
|
|||||||
return y, logprobs.squeeze(0)
|
return y, logprobs.squeeze(0)
|
||||||
|
|
||||||
while y.size > prefill_step_size:
|
while y.size > prefill_step_size:
|
||||||
model(y[:prefill_step_size][None], cache=cache)
|
model(y[:prefill_step_size][None], cache=prompt_cache)
|
||||||
mx.eval([c.state for c in cache])
|
mx.eval([c.state for c in prompt_cache])
|
||||||
y = y[prefill_step_size:]
|
y = y[prefill_step_size:]
|
||||||
|
|
||||||
y, logprobs = _step(y)
|
y, logprobs = _step(y)
|
||||||
|
Loading…
Reference in New Issue
Block a user