couple other fixes

2025-08-29 01:46:09 +08:00 · 2024-12-12 10:51:38 -08:00 · 2024-12-12 10:51:38 -08:00 · 2024181b7c
commit 2024181b7c
parent 2277033a24
1 changed files with 3 additions and 0 deletions
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@ -299,6 +299,9 @@ def generate_step(
        prompt_processed_tokens = 0
        while y.size > prefill_step_size:
            model(y[:prefill_step_size][None], cache=prompt_cache)
+            maybe_quantize_kv_cache(
+                prompt_cache, quantized_kv_start, kv_group_size, kv_bits
+            )
            mx.eval([c.state for c in prompt_cache])
            prompt_progress_callback(prompt_processed_tokens, total_prompt_tokens)
            prompt_processed_tokens += prefill_step_size