mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-06-24 17:31:18 +08:00
Fix detokenizer space match for quote (#1072)
* fix + test * remove transformer flax/torch warning * format
This commit is contained in:
parent
ab4bf05c6e
commit
8fe9539af7
@ -1,4 +1,9 @@
|
|||||||
# Copyright © 2023-2024 Apple Inc.
|
# Copyright © 2023-2024 Apple Inc.
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
|
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"
|
||||||
|
|
||||||
from .utils import convert, generate, load, stream_generate
|
from .utils import convert, generate, load, stream_generate
|
||||||
|
@ -169,7 +169,7 @@ class BPEStreamingDetokenizer(StreamingDetokenizer):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
_byte_decoder = None
|
_byte_decoder = None
|
||||||
_space_matches = (".", "?", "!", ",", "'", "n't", "'m", "'s", "'ve", "'re")
|
_space_matches = (".", "?", "!", ",", "n't", "'m", "'s", "'ve", "'re")
|
||||||
|
|
||||||
def __init__(self, tokenizer):
|
def __init__(self, tokenizer):
|
||||||
|
|
||||||
|
@ -51,6 +51,9 @@ class TestTokenizers(unittest.TestCase):
|
|||||||
tokens = tokenizer.encode("3 3")
|
tokens = tokenizer.encode("3 3")
|
||||||
check(tokens)
|
check(tokens)
|
||||||
|
|
||||||
|
tokens = tokenizer.encode("import 'package:flutter/material.dart';")
|
||||||
|
check(tokens)
|
||||||
|
|
||||||
def test_tokenizers(self):
|
def test_tokenizers(self):
|
||||||
tokenizer_repos = [
|
tokenizer_repos = [
|
||||||
("mlx-community/Qwen1.5-0.5B-Chat-4bit", BPEStreamingDetokenizer),
|
("mlx-community/Qwen1.5-0.5B-Chat-4bit", BPEStreamingDetokenizer),
|
||||||
|
Loading…
Reference in New Issue
Block a user