mirror of
https://github.com/ml-explore/mlx-examples.git
synced 2025-09-24 15:58:11 +08:00
add deepseek coder example (#172)
* feat: add example for deepseek coder * chore: remove hardcoded rope_scaling_factor * feat: add quantization support * chore: update readme * chore: clean up the rope scalling factor param in create cos sin theta * feat: add repetition_penalty * style /consistency changes to ease future integration * nits in README * one more typo --------- Co-authored-by: Awni Hannun <awni@apple.com>
This commit is contained in:
@@ -60,7 +60,12 @@ def convert(args):
|
||||
args.model, trust_remote_code=True, torch_dtype=torch.float16
|
||||
)
|
||||
state_dict = model.state_dict()
|
||||
weights = {replace_key(k): (v.numpy() if v.dtype != torch.bfloat16 else v.to(torch.float32).numpy()) for k, v in state_dict.items()}
|
||||
weights = {
|
||||
replace_key(k): (
|
||||
v.numpy() if v.dtype != torch.bfloat16 else v.to(torch.float32).numpy()
|
||||
)
|
||||
for k, v in state_dict.items()
|
||||
}
|
||||
config = model.config.to_dict()
|
||||
|
||||
if args.quantize:
|
||||
@@ -95,13 +100,13 @@ if __name__ == "__main__":
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--q_group_size",
|
||||
"--q-group-size",
|
||||
help="Group size for quantization.",
|
||||
type=int,
|
||||
default=64,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--q_bits",
|
||||
"--q-bits",
|
||||
help="Bits per weight for quantization.",
|
||||
type=int,
|
||||
default=4,
|
||||
|
Reference in New Issue
Block a user