mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-01 12:49:50 +08:00
Kv cache (#643)
* in place kv_cache * fix * fix kv cache size * partially fix kv cache dtype * step kv cache * multiple of step size * more teests + kv cache * more kv cache * udpate all models to use kv cache
This commit is contained in:
@@ -314,7 +314,8 @@ def load_tokenizer(model_path, tokenizer_config_extra={}):
|
||||
|
||||
tokenizer_file = model_path / "tokenizer.json"
|
||||
if tokenizer_file.exists():
|
||||
tokenizer_content = json.load(tokenizer_file.open())
|
||||
with open(tokenizer_file, "r") as fid:
|
||||
tokenizer_content = json.load(fid)
|
||||
if "decoder" in tokenizer_content:
|
||||
if _is_spm_decoder(tokenizer_content["decoder"]):
|
||||
detokenizer_class = SPMStreamingDetokenizer
|
||||
|
Reference in New Issue
Block a user