deepseek v3 model with pipeline parallelism (#1191)

* deepseekv3 * use upload_large_file instead of deprecated multi comit * add pipeline generation and example * comment * get fp16 working * use mlx==0.22
2025-12-15 01:42:31 +08:00 · 2025-01-09 15:55:53 -08:00
parent 40b88eff48
commit 5cae0a60e6
7 changed files with 577 additions and 5 deletions
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -561,7 +561,7 @@ def load(
            Defaults to an empty dictionary.
        adapter_path (str, optional): Path to the LoRA adapters. If provided, applies LoRA layers
            to the model. Default: ``None``.
-        lazy (bool): If False eval the model parameters to make sure they are
+        lazy (bool): If ``False`` eval the model parameters to make sure they are
            loaded in memory before returning, otherwise they will be loaded
            when needed. Default: ``False``
    Returns:
@@ -655,7 +655,7 @@ def upload_to_hub(path: str, upload_repo: str, hf_path: str):

        model, tokenizer = load("{upload_repo}")

-        prompt="hello"
+        prompt = "hello"

        if tokenizer.chat_template is not None:
            messages = [{{"role": "user", "content": prompt}}]