Merge branch 'ml-explore:main' into adding-full-finetuning

2025-09-01 04:14:38 +08:00 · 2024-09-21 23:59:50 +02:00
parent 9868743762 796d5e40e4
commit eb016aea9c
6 changed files with 48 additions and 43 deletions
--- a/llms/README.md
+++ b/llms/README.md
@@ -29,7 +29,14 @@ from mlx_lm import load, generate

 model, tokenizer = load("mlx-community/Mistral-7B-Instruct-v0.3-4bit")

-response = generate(model, tokenizer, prompt="hello", verbose=True)
+prompt = "Write a story about Einstein"
+
+messages = [{"role": "user", "content": prompt}]
+prompt = tokenizer.apply_chat_template(
+    messages, tokenize=False, add_generation_prompt=True
+)
+
+response = generate(model, tokenizer, prompt=prompt, verbose=True)
 ```

 To see a description of all the arguments you can do:
@@ -79,6 +86,11 @@ model, tokenizer = load(repo)

 prompt = "Write a story about Einstein"

+messages = [{"role": "user", "content": prompt}]
+prompt = tokenizer.apply_chat_template(
+    messages, tokenize=False, add_generation_prompt=True
+)
+
 for t in stream_generate(model, tokenizer, prompt, max_tokens=512):
    print(t, end="", flush=True)
 print()
--- a/llms/mlx_lm/LORA.md
+++ b/llms/mlx_lm/LORA.md
@@ -179,44 +179,28 @@ Currently, `*.jsonl` files support three data formats: `chat`,
 `chat`:

 ```jsonl
-{
-  "messages": [
-    {
-      "role": "system",
-      "content": "You are a helpful assistant."
-    },
-    {
-      "role": "user",
-      "content": "Hello."
-    },
-    {
-      "role": "assistant",
-      "content": "How can I assistant you today."
-    }
-  ]
-}
+{"messages": [{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello."}, {"role": "assistant", "content": "How can I assistant you today."}]}
 ```

 `completions`:

 ```jsonl
-{
-  "prompt": "What is the capital of France?",
-  "completion": "Paris."
-}
+{"prompt": "What is the capital of France?", "completion": "Paris."}
 ```

 `text`:

 ```jsonl
-{
-  "text": "This is an example for the model."
-}
+{"text": "This is an example for the model."}
 ```

 Note, the format is automatically determined by the dataset. Note also, keys in
 each line not expected by the loader will be ignored.

+> [!NOTE]
+> Each example in the datasets must be on a single line. Do not put more than
+> one example per line and do not split an example accross multiple lines.
+
 ### Hugging Face Datasets

 To use Hugging Face datasets, first install the `datasets` package:
--- a/llms/mlx_lm/_version.py
+++ b/llms/mlx_lm/_version.py
@@ -1,3 +1,3 @@
 # Copyright © 2023-2024 Apple Inc.

-__version__ = "0.18.1"
+__version__ = "0.18.2"
--- a/llms/mlx_lm/cache_prompt.py
+++ b/llms/mlx_lm/cache_prompt.py
@@ -139,8 +139,8 @@ def main():
    print("Saving...")
    cache_dict = {}
    for i, c in enumerate(cache):
-        cache_dict[f"{i}_keys"] = c.state[0]
-        cache_dict[f"{i}_values"] = c.state[1]
+        cache_dict[f"{i}_keys"] = c.state[0][..., : c.offset, :]
+        cache_dict[f"{i}_values"] = c.state[1][..., : c.offset, :]
    metadata = {}
    metadata["model"] = args.model
    metadata["chat_template"] = tokenizer.chat_template
--- a/llms/mlx_lm/gguf.py
+++ b/llms/mlx_lm/gguf.py
@@ -67,7 +67,7 @@ class HfVocab:
    def get_token_type(
        self, token_id: int, token_text: bytes, special_ids: Set[int]
    ) -> TokenType:
-        if re.fullmatch(rb"<0x[0-9A-Fa-f]{2}>", token_text.encode("utf-8")):
+        if re.fullmatch(r"<0x[0-9A-Fa-f]{2}>", token_text):
            return TokenType.BYTE
        return TokenType.CONTROL if token_id in special_ids else TokenType.NORMAL

@@ -77,9 +77,7 @@ class HfVocab:
    def added_tokens(self) -> Iterable[Tuple[bytes, float, TokenType]]:
        for text in self.added_tokens_list:
            if text in self.specials:
-                toktype = self.get_token_type(
-                    self.specials[text], b"", self.special_ids
-                )
+                toktype = self.get_token_type(self.specials[text], "", self.special_ids)
                score = self.get_token_score(self.specials[text])
            else:
                toktype = TokenType.USER_DEFINED
@@ -243,15 +241,18 @@ def prepare_metadata(config, vocab):
    metadata["tokenizer.ggml.tokens"] = tokens
    metadata["tokenizer.ggml.scores"] = mx.array(scores, dtype=mx.float32)
    metadata["tokenizer.ggml.token_type"] = mx.array(toktypes, dtype=mx.uint32)
-    metadata["tokenizer.ggml.bos_token_id"] = mx.array(
-        vocab.tokenizer.bos_token_id, dtype=mx.uint32
-    )
-    metadata["tokenizer.ggml.eos_token_id"] = mx.array(
-        vocab.tokenizer.eos_token_id, dtype=mx.uint32
-    )
-    metadata["tokenizer.ggml.unknown_token_id"] = mx.array(
-        vocab.tokenizer.unk_token_id, dtype=mx.uint32
-    )
+    if vocab.tokenizer.bos_token_id is not None:
+        metadata["tokenizer.ggml.bos_token_id"] = mx.array(
+            vocab.tokenizer.bos_token_id, dtype=mx.uint32
+        )
+    if vocab.tokenizer.eos_token_id is not None:
+        metadata["tokenizer.ggml.eos_token_id"] = mx.array(
+            vocab.tokenizer.eos_token_id, dtype=mx.uint32
+        )
+    if vocab.tokenizer.unk_token_id is not None:
+        metadata["tokenizer.ggml.unknown_token_id"] = mx.array(
+            vocab.tokenizer.unk_token_id, dtype=mx.uint32
+        )

    metadata = {k: v for k, v in metadata.items() if v is not None}
    return metadata
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -14,7 +14,6 @@ from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Type,
 import mlx.core as mx
 import mlx.nn as nn
 from huggingface_hub import snapshot_download
-from huggingface_hub.utils._errors import RepositoryNotFoundError
 from mlx.utils import tree_flatten
 from transformers import PreTrainedTokenizer

@@ -91,7 +90,7 @@ def get_model_path(path_or_hf_repo: str, revision: Optional[str] = None) -> Path
                    ],
                )
            )
-        except RepositoryNotFoundError:
+        except:
            raise ModelNotFoundError(
                f"Model not found for path or HF repo: {path_or_hf_repo}.\n"
                "Please make sure you specified the local path or Hugging Face"
@@ -577,7 +576,16 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str):
        from mlx_lm import load, generate

        model, tokenizer = load("{upload_repo}")
-        response = generate(model, tokenizer, prompt="hello", verbose=True)
+
+        prompt="hello"
+
+        if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
+            messages = [{{"role": "user", "content": prompt}}]
+            prompt = tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+
+        response = generate(model, tokenizer, prompt=prompt, verbose=True)
        ```
        """
    )