mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-08-29 09:56:24 +08:00
couple other fixes
This commit is contained in:
parent
2277033a24
commit
2024181b7c
@ -299,6 +299,9 @@ def generate_step(
|
|||||||
prompt_processed_tokens = 0
|
prompt_processed_tokens = 0
|
||||||
while y.size > prefill_step_size:
|
while y.size > prefill_step_size:
|
||||||
model(y[:prefill_step_size][None], cache=prompt_cache)
|
model(y[:prefill_step_size][None], cache=prompt_cache)
|
||||||
|
maybe_quantize_kv_cache(
|
||||||
|
prompt_cache, quantized_kv_start, kv_group_size, kv_bits
|
||||||
|
)
|
||||||
mx.eval([c.state for c in prompt_cache])
|
mx.eval([c.state for c in prompt_cache])
|
||||||
prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
|
prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
|
||||||
prompt_processed_tokens += prefill_step_size
|
prompt_processed_tokens += prefill_step_size
|
||||||
|
Loading…
Reference in New Issue
Block a user