* in place kv_cache

* fix

* fix kv cache size

* partially fix kv cache dtype

* step kv cache

* multiple of step size

* more teests + kv cache

* more kv cache

* udpate all models to use kv cache
This commit is contained in:
Awni Hannun
2024-05-08 08:18:13 -07:00
committed by GitHub
parent bfbc0e434a
commit ee60e2a9d5
22 changed files with 534 additions and 298 deletions

View File

@@ -314,7 +314,8 @@ def load_tokenizer(model_path, tokenizer_config_extra={}):
tokenizer_file = model_path / "tokenizer.json"
if tokenizer_file.exists():
tokenizer_content = json.load(tokenizer_file.open())
with open(tokenizer_file, "r") as fid:
tokenizer_content = json.load(fid)
if "decoder" in tokenizer_content:
if _is_spm_decoder(tokenizer_content["decoder"]):
detokenizer_class = SPMStreamingDetokenizer