diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py index 402f0a9f..9e0e51ea 100644 --- a/llms/mlx_lm/utils.py +++ b/llms/mlx_lm/utils.py @@ -133,9 +133,9 @@ def generate( print(s[skip:], end="", flush=True) skip = len(s) - tokens = tokenizer.decode(tokens)[skip:] + tokens = tokenizer.decode(tokens) if verbose: - print(tokens, flush=True) + print(tokens[skip:], flush=True) return tokens diff --git a/llms/setup.py b/llms/setup.py index 78839b2b..c9fd6c46 100644 --- a/llms/setup.py +++ b/llms/setup.py @@ -8,7 +8,7 @@ with open(Path(__file__).parent / "mlx_lm/requirements.txt") as fid: requirements = [str(r) for r in pkg_resources.parse_requirements(fid)] setup( name="mlx-lm", - version="0.0.2", + version="0.0.3", description="LLMs on Apple silicon with MLX and the Hugging Face Hub", long_description=open("README.md", encoding="utf-8").read(), long_description_content_type="text/markdown",