mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
Make sure to use UTF-8 when loading tokenizer.json
This commit is contained in:
parent
4c3df00162
commit
d8e6996254
@ -352,7 +352,7 @@ def load_tokenizer(model_path, tokenizer_config_extra={}, eos_token_ids=None):
|
|||||||
|
|
||||||
tokenizer_file = model_path / "tokenizer.json"
|
tokenizer_file = model_path / "tokenizer.json"
|
||||||
if tokenizer_file.exists():
|
if tokenizer_file.exists():
|
||||||
with open(tokenizer_file, "r") as fid:
|
with open(tokenizer_file, "r", encoding="utf-8") as fid:
|
||||||
tokenizer_content = json.load(fid)
|
tokenizer_content = json.load(fid)
|
||||||
if "decoder" in tokenizer_content:
|
if "decoder" in tokenizer_content:
|
||||||
if _is_spm_decoder(tokenizer_content["decoder"]):
|
if _is_spm_decoder(tokenizer_content["decoder"]):
|
||||||
|
Loading…
Reference in New Issue
Block a user