Kv cache (#643)

* in place kv_cache * fix * fix kv cache size * partially fix kv cache dtype * step kv cache * multiple of step size * more teests + kv cache * more kv cache * udpate all models to use kv cache
2025-12-14 17:28:59 +08:00 · 2024-05-08 08:18:13 -07:00
parent bfbc0e434a
commit ee60e2a9d5
22 changed files with 534 additions and 298 deletions
--- a/llms/mlx_lm/tokenizer_utils.py
+++ b/llms/mlx_lm/tokenizer_utils.py
@@ -314,7 +314,8 @@ def load_tokenizer(model_path, tokenizer_config_extra={}):

    tokenizer_file = model_path / "tokenizer.json"
    if tokenizer_file.exists():
-        tokenizer_content = json.load(tokenizer_file.open())
+        with open(tokenizer_file, "r") as fid:
+            tokenizer_content = json.load(fid)
        if "decoder" in tokenizer_content:
            if _is_spm_decoder(tokenizer_content["decoder"]):
                detokenizer_class = SPMStreamingDetokenizer