add deepseek coder example (#172)

* feat: add example for deepseek coder * chore: remove hardcoded rope_scaling_factor * feat: add quantization support * chore: update readme * chore: clean up the rope scalling factor param in create cos sin theta * feat: add repetition_penalty * style /consistency changes to ease future integration * nits in README * one more typo --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-12-11 15:06:34 +08:00 · 2023-12-29 16:42:22 +11:00
parent 37fd2464dc
commit 31ddbd7806
9 changed files with 529 additions and 11 deletions
--- a/llms/llama/convert.py
+++ b/llms/llama/convert.py
@@ -15,6 +15,7 @@ import torch
 from llama import Llama, ModelArgs, sanitize_config
 from mlx.utils import tree_flatten, tree_map, tree_unflatten

+
 def llama(model_path):
    SHARD_FIRST = ["wv", "wq", "wk", "w1", "w3", "output"]
    SHARD_SECOND = ["tok_embeddings", "wo", "w2"]
@@ -185,13 +186,13 @@ if __name__ == "__main__":
        action="store_true",
    )
    parser.add_argument(
-        "--q_group_size",
+        "--q-group-size",
        help="Group size for quantization.",
        type=int,
        default=64,
    )
    parser.add_argument(
-        "--q_bits",
+        "--q-bits",
        help="Bits per weight for quantization.",
        type=int,
        default=4,