tweaks

2025-08-29 07:30:06 +08:00 · 2024-12-27 15:52:42 -08:00 · 2024-12-27 15:52:42 -08:00 · 30e98c85c1
commit 30e98c85c1
parent fdd16caf7a
1 changed files with 3 additions and 2 deletions
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@ -296,9 +296,9 @@ def generate_step(
    prompt_progress_callback = prompt_progress_callback or (lambda *_: None)

    def _step(y):
+        if y.ndim == 1:
+            y = mx.expand_dims(y, axis=-1)
        with mx.stream(generation_stream):
-            if y.ndim == 1:
-                y = mx.expand_dims(y, axis=-1)
            logits = model(
                y,
                cache=prompt_cache,
@ -514,6 +514,7 @@ def batch_generate(
        # we have <pad>text<pad>text. Should involve taking `prompt_cache_lens`
        # to extend `mask` below, and handling position_ids (see TODO below)
        raise ValueError("Batch generation does not support prompt_cache yet.")
+    tokenizer = copy.deepcopy(tokenizer)
    if not isinstance(tokenizer, TokenizerWrapper):
        tokenizer = TokenizerWrapper(tokenizer)
    # TODO: left-shift position_ids for absolute/rotary positional encodings