diff --git a/llms/llama/convert.py b/llms/llama/convert.py index 9c05c8b7..33610f44 100644 --- a/llms/llama/convert.py +++ b/llms/llama/convert.py @@ -8,6 +8,7 @@ import json import shutil from pathlib import Path from typing import Dict + import mlx.core as mx import mlx.nn as nn import torch @@ -149,7 +150,7 @@ def quantize(weights, config, args): def make_shards(weights: dict, max_file_size_gibibyte: int = 15): max_file_size_bytes = max_file_size_gibibyte << 30 shards = [] - shard : Dict[str, mx.array] = {} + shard: Dict[str, mx.array] = {} shard_size = 0 for k, v in weights.items(): if shard_size + v.nbytes > max_file_size_bytes: diff --git a/llms/mixtral/mixtral.py b/llms/mixtral/mixtral.py index 8520b87c..807d3b23 100644 --- a/llms/mixtral/mixtral.py +++ b/llms/mixtral/mixtral.py @@ -91,7 +91,7 @@ class FeedForward(nn.Module): class MOEFeedForward(nn.Module): def __init__(self, args: ModelArgs): super().__init__() - + if args.moe is None: raise ValueError("args.moe must not be None for MOEFeedForward")