Add llms subdir + update README (#145)

* add llms subdir + update README * nits * use same pre-commit as mlx * update readmes a bit * format
2025-12-11 06:55:00 +08:00 · 2023-12-20 10:22:25 -08:00
parent aed14618ca
commit 27c0a8c002
62 changed files with 164 additions and 146 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,5 +1,11 @@
 repos:
-   repo: https://github.com/psf/black
+-   repo: https://github.com/psf/black-pre-commit-mirror
    rev: 22.10.0
    hooks:
    -   id: black
 -   repo: https://github.com/pycqa/isort
    rev: 5.12.0
    hooks:
    -   id: isort
        args:
            - --profile=black
--- a/README.md
+++ b/README.md
@@ -5,18 +5,37 @@ framework](https://github.com/ml-explore/mlx).
 The [MNIST](mnist) example is a good starting point to learn how to use MLX.
-Some more useful examples include:
+Some more useful examples are listed below.
 ### Text Models 
 - [Transformer language model](transformer_lm) training.
- Large scale text generation with [LLaMA](llama), [Mistral](mistral) or [Phi](phi2).  
+- Large scale text generation with [LLaMA](llms/llama),
- Mixture-of-experts (MoE) language model with [Mixtral 8x7B](mixtral)
+  [Mistral](llms/mistral), [Phi-2](llms/phi2), and more in the [LLMs](llms)
  directory.
 - A mixture-of-experts (MoE) language model with [Mixtral 8x7B](llms/mixtral).
 - Parameter efficient fine-tuning with [LoRA](lora).
 - Text-to-text multi-task Transformers with [T5](t5).
 - Bidirectional language understanding with [BERT](bert).
 ### Image Models 
 - Generating images with [Stable Diffusion](stable_diffusion).
 ### Audio Models
 - Speech recognition with [OpenAI's Whisper](whisper).
- Bidirectional language understanding with [BERT](bert)
+
 ### Other Models 
 - Semi-supervised learning on graph-structured data with [GCN](gcn).
-Note: You can now directly download a few converted checkpoints from the [MLX Community](https://huggingface.co/mlx-community) organisation on Hugging Face.
+### Hugging Face
 Note: You can now directly download a few converted checkpoints from the [MLX
 Community](https://huggingface.co/mlx-community) organization on Hugging Face.
 We encourage you to join the community and [contribute new
 models](https://github.com/ml-explore/mlx-examples/issues/155).
 ## Contributing 
--- a/bert/convert.py
+++ b/bert/convert.py
@@ -1,7 +1,7 @@
 from transformers import BertModel
 import argparse
 import numpy
 from transformers import BertModel
 def replace_key(key: str) -> str:
--- a/bert/hf_model.py
+++ b/bert/hf_model.py
@@ -1,7 +1,7 @@
 from transformers import AutoModel, AutoTokenizer
 import argparse
 from transformers import AutoModel, AutoTokenizer
 def run(bert_model: str):
    batch = [
--- a/bert/model.py
+++ b/bert/model.py
@@ -1,13 +1,13 @@
-import numpy as np
+import argparse
 from typing import Optional
 from dataclasses import dataclass
-from transformers import BertTokenizer
+from typing import Optional
 from mlx.utils import tree_unflatten
 import mlx.core as mx
 import mlx.nn as nn
 import argparse
 import numpy
 import numpy as np
 from mlx.utils import tree_unflatten
 from transformers import BertTokenizer
@dataclass
--- a/cifar/dataset.py
+++ b/cifar/dataset.py
@@ -1,6 +1,7 @@
 import math
 import mlx.core as mx
 from mlx.data.datasets import load_cifar10
 import math
 def get_cifar10(batch_size, root=None):
--- a/cifar/main.py
+++ b/cifar/main.py
@@ -1,11 +1,11 @@
 import argparse
 import time
 import resnet
 import mlx.nn as nn
 import mlx.core as mx
 import mlx.optimizers as optim
 from dataset import get_cifar10
 import mlx.core as mx
 import mlx.nn as nn
 import mlx.optimizers as optim
 import resnet
 from dataset import get_cifar10
 parser = argparse.ArgumentParser(add_help=True)
 parser.add_argument(
--- a/cifar/resnet.py
+++ b/cifar/resnet.py
@@ -6,11 +6,11 @@ There's no BatchNorm is mlx==0.0.4, using LayerNorm instead.
 """
 from typing import Any
 import mlx.core as mx
 import mlx.nn as nn
 from mlx.utils import tree_flatten
 __all__ = [
    "ResNet",
    "resnet20",
--- a/gcn/datasets.py
+++ b/gcn/datasets.py
@@ -1,9 +1,9 @@
 import os
 import requests
 import tarfile
 import mlx.core as mx
 import numpy as np
 import requests
 import scipy.sparse as sparse
 """
--- a/gcn/main.py
+++ b/gcn/main.py
@@ -3,10 +3,10 @@ from argparse import ArgumentParser
 import mlx.core as mx
 import mlx.nn as nn
 import mlx.optimizers as optim
 from datasets import download_cora, load_data, train_val_test_mask
 from mlx.nn.losses import cross_entropy
 from mlx.utils import tree_flatten
 from datasets import download_cora, load_data, train_val_test_mask
 from gcn import GCN
--- a/llms/llama/README.md
+++ b/llms/llama/README.md
@@ -20,8 +20,9 @@ weights you will need to request access from Meta:
 - [Request Llama v1](https://docs.google.com/forms/d/e/1FAIpQLSfqNECQnMkycAp2jP4Z9TFX0cGR4uf7b_fBxjY_OjhJILlKGA/viewform)
 - [Request Llama v2](https://ai.meta.com/resources/models-and-libraries/llama-downloads/)
-> [!TIP]
+> [!TIP] Alternatively, you can also download a few converted checkpoints from
-> Alternatively, you can also download a few converted checkpoints from the the [MLX Community](https://huggingface.co/mlx-community) organisation on Hugging Face and skip the conversion step.
+> the [MLX Community](https://huggingface.co/mlx-community) organization on
 > Hugging Face and skip the conversion step.
 You can download the TinyLlama models directly from [Hugging
 Face](https://huggingface.co/TinyLlama).
--- a/llms/llama/convert.py
+++ b/llms/llama/convert.py
@@ -4,8 +4,9 @@ import argparse
 import collections
 import glob
 import json
 import numpy as np
 from pathlib import Path
 import numpy as np
 import torch
--- a/llms/llama/llama.py
+++ b/llms/llama/llama.py
@@ -1,16 +1,16 @@
 # Copyright © 2023 Apple Inc.
 import argparse
 from dataclasses import dataclass
 import json
 from pathlib import Path
 from typing import Optional, Tuple, List
 from sentencepiece import SentencePieceProcessor
 import time
 from dataclasses import dataclass
 from pathlib import Path
 from typing import List, Optional, Tuple
 import mlx.core as mx
 import mlx.nn as nn
 from mlx.utils import tree_unflatten
 from sentencepiece import SentencePieceProcessor
@dataclass
--- a/llms/llama/requirements.txt
+++ b/llms/llama/requirements.txt
--- a/llms/llama/sample_prompt.txt
+++ b/llms/llama/sample_prompt.txt
--- a/llms/mistral/.gitignore
+++ b/llms/mistral/.gitignore
--- a/llms/mistral/README.md
+++ b/llms/mistral/README.md
@@ -29,7 +29,10 @@ python convert.py
 The conversion script will save the converted weights in the same location.
 > [!TIP]
-> Alternatively, you can also download a few converted checkpoints from the the [MLX Community](https://huggingface.co/mlx-community) organisation on Hugging Face and skip the conversion step.
+> Alternatively, you can also download a few converted checkpoints from the
 > [MLX Community](https://huggingface.co/mlx-community) organization on Hugging
 > Face and skip the conversion step.
 ### Run
--- a/llms/mistral/convert.py
+++ b/llms/mistral/convert.py
@@ -2,10 +2,10 @@
 import argparse
 import json
 import numpy as np
 from pathlib import Path
 import torch
 import numpy as np
 import torch
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert Mistral weights to MLX.")
--- a/llms/mistral/mistral.py
+++ b/llms/mistral/mistral.py
@@ -1,15 +1,15 @@
 # Copyright © 2023 Apple Inc.
 import argparse
 from dataclasses import dataclass
 import json
 from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional, Tuple, List
+from typing import List, Optional, Tuple
 from sentencepiece import SentencePieceProcessor
 import mlx.core as mx
 import mlx.nn as nn
 from mlx.utils import tree_map, tree_unflatten
 from sentencepiece import SentencePieceProcessor
@dataclass
--- a/llms/mistral/requirements.txt
+++ b/llms/mistral/requirements.txt
--- a/llms/mistral/test.py
+++ b/llms/mistral/test.py
@@ -2,11 +2,10 @@
 import unittest
 import mistral
 import mlx.core as mx
 from mlx.utils import tree_map
 import mistral
 class TestMistral(unittest.TestCase):
    def test_model(self):
--- a/llms/mixtral/README.md
+++ b/llms/mixtral/README.md
--- a/llms/mixtral/convert.py
+++ b/llms/mixtral/convert.py
@@ -3,8 +3,9 @@
 import argparse
 import glob
 import json
 import numpy as np
 from pathlib import Path
 import numpy as np
 import torch
--- a/llms/mixtral/mixtral.py
+++ b/llms/mixtral/mixtral.py
@@ -1,17 +1,17 @@
 # Copyright © 2023 Apple Inc.
 import argparse
 from dataclasses import dataclass
 import glob
 import json
-import numpy as np
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Optional, Tuple, List
+from typing import List, Optional, Tuple
 from sentencepiece import SentencePieceProcessor
 import mlx.core as mx
 import mlx.nn as nn
 import numpy as np
 from mlx.utils import tree_map, tree_unflatten
 from sentencepiece import SentencePieceProcessor
@dataclass
--- a/llms/mixtral/params.json
+++ b/llms/mixtral/params.json
--- a/llms/mixtral/requirements.txt
+++ b/llms/mixtral/requirements.txt
--- a/llms/phi2/.gitignore
+++ b/llms/phi2/.gitignore
--- a/llms/phi2/README.md
+++ b/llms/phi2/README.md
@@ -17,8 +17,10 @@ python convert.py
 This will make the `weights.npz` file which MLX can read.
-> [!TIP]
+> [!TIP] Alternatively, you can also download a few converted checkpoints from
-> Alternatively, you can also download a few converted checkpoints from the the [MLX Community](https://huggingface.co/mlx-community) organisation on Hugging Face and skip the conversion step.
+> the [MLX Community](https://huggingface.co/mlx-community) organization on
 > Hugging Face and skip the conversion step.
 ## Generate 
--- a/llms/phi2/convert.py
+++ b/llms/phi2/convert.py
@@ -1,5 +1,5 @@
 from transformers import AutoModelForCausalLM
 import numpy as np
 from transformers import AutoModelForCausalLM
 def replace_key(key: str) -> str:
--- a/llms/phi2/phi2.py
+++ b/llms/phi2/phi2.py
@@ -1,13 +1,13 @@
 import argparse
 import math
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Optional
 from dataclasses import dataclass
 from mlx.utils import tree_unflatten
 from transformers import AutoTokenizer
 import mlx.core as mx
 import mlx.nn as nn
-import math
+from mlx.utils import tree_unflatten
 from transformers import AutoTokenizer
@dataclass
@@ -167,7 +167,7 @@ def load_model(model_path: str):
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Phi-2 inference script")
    parser.add_argument(
-        "--model_path",
+        "--model-path",
        type=str,
        default="phi-2",
        help="The path to the model weights",
@@ -178,7 +178,7 @@ if __name__ == "__main__":
        default="Write a detailed analogy between mathematics and a lighthouse.",
    )
    parser.add_argument(
-        "--max_tokens",
+        "--max-tokens",
        "-m",
        type=int,
        default=100,
--- a/llms/phi2/requirements.txt
+++ b/llms/phi2/requirements.txt
--- a/llms/qwen/convert.py
+++ b/llms/qwen/convert.py
@@ -1,8 +1,9 @@
 import argparse
-from transformers import AutoModelForCausalLM
+import json
 import numpy as np
 import torch
-import json
+from transformers import AutoModelForCausalLM
 def replace_key(key: str) -> str:
--- a/llms/qwen/qwen.py
+++ b/llms/qwen/qwen.py
@@ -1,6 +1,7 @@
 import argparse
 from dataclasses import dataclass
 import json
 from dataclasses import dataclass
 import mlx.core as mx
 import mlx.nn as nn
 from mlx.utils import tree_unflatten
--- a/lora/convert.py
+++ b/lora/convert.py
@@ -2,12 +2,12 @@
 import argparse
 import json
 import numpy as np
 from pathlib import Path
 import shutil
 import os
-import torch
+import shutil
 from pathlib import Path
 import numpy as np
 import torch
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
--- a/lora/data/wikisql.py
+++ b/lora/data/wikisql.py
@@ -32,8 +32,8 @@ class WikiSQL:
    def _maybe_download(self, data_dir):
        if not os.path.exists(data_dir):
            import io
            from urllib import request
            import tarfile
            from urllib import request
            url = "https://raw.githubusercontent.com/salesforce/WikiSQL/master/data.tar.bz2"
            r = request.urlopen(url)
--- a/lora/lora.py
+++ b/lora/lora.py
@@ -3,19 +3,17 @@
 import argparse
 import json
 import math
 import numpy as np
 from pathlib import Path
 from sentencepiece import SentencePieceProcessor
 import time
-from typing import Optional, Tuple, List
+from pathlib import Path
 from typing import List, Optional, Tuple
 import mlx.core as mx
 import mlx.nn as nn
 import mlx.optimizers as optim
-from mlx.utils import tree_map, tree_flatten, tree_unflatten
+import numpy as np
-
+from mlx.utils import tree_flatten, tree_map, tree_unflatten
-
+from models import LoRALinear, Model, ModelArgs
-from models import ModelArgs, Model, LoRALinear
+from sentencepiece import SentencePieceProcessor
 def build_parser():
--- a/lora/models.py
+++ b/lora/models.py
@@ -1,8 +1,8 @@
 # Copyright © 2023 Apple Inc.
 from dataclasses import dataclass
 import math
-from typing import Optional, Tuple, List
+from dataclasses import dataclass
 from typing import List, Optional, Tuple
 import mlx.core as mx
 import mlx.nn as nn
--- a/mnist/jax_main.py
+++ b/mnist/jax_main.py
@@ -1,9 +1,10 @@
 # Copyright © 2023 Apple Inc.
 import functools
 import time
 import jax
 import jax.numpy as jnp
 import functools
 import time
 import mnist
--- a/mnist/main.py
+++ b/mnist/main.py
@@ -3,11 +3,10 @@
 import argparse
 import time
 import numpy as np
 import mlx.core as mx
 import mlx.nn as nn
 import mlx.optimizers as optim
 import numpy as np
 import mnist
--- a/mnist/mnist.py
+++ b/mnist/mnist.py
@@ -1,11 +1,12 @@
 # Copyright © 2023 Apple Inc.
 import gzip
 import numpy as np
 import os
 import pickle
 from urllib import request
 import numpy as np
 def mnist(save_dir="/tmp"):
    """
--- a/mnist/torch_main.py
+++ b/mnist/torch_main.py
@@ -1,9 +1,10 @@
 # Copyright © 2023 Apple Inc.
 import argparse
 import torch
 import time
 import torch
 import mnist
--- a/speechcommands/kwt.py
+++ b/speechcommands/kwt.py
@@ -1,9 +1,9 @@
 from typing import Any
 import mlx.core as mx
 import mlx.nn as nn
 from mlx.utils import tree_flatten
 __all__ = ["KWT", "kwt1", "kwt2", "kwt3"]
--- a/speechcommands/main.py
+++ b/speechcommands/main.py
@@ -1,13 +1,13 @@
 import argparse
 import time
 import kwt
 import mlx.nn as nn
 import mlx.data as dx
 import mlx.core as mx
 import mlx.optimizers as optim
 from mlx.data.features import mfsc
 from mlx.data.datasets import load_speechcommands
 import kwt
 import mlx.core as mx
 import mlx.data as dx
 import mlx.nn as nn
 import mlx.optimizers as optim
 from mlx.data.datasets import load_speechcommands
 from mlx.data.features import mfsc
 parser = argparse.ArgumentParser(add_help=True)
 parser.add_argument(
--- a/stable_diffusion/stable_diffusion/init.py
+++ b/stable_diffusion/stable_diffusion/init.py
@@ -6,12 +6,12 @@ from typing import Tuple
 import mlx.core as mx
 from .model_io import (
-    load_unet,
+    _DEFAULT_MODEL,
    load_text_encoder,
    load_autoencoder,
    load_diffusion_config,
    load_text_encoder,
    load_tokenizer,
-    _DEFAULT_MODEL,
+    load_unet,
 )
 from .sampler import SimpleEulerSampler
--- a/stable_diffusion/stable_diffusion/model_io.py
+++ b/stable_diffusion/stable_diffusion/model_io.py
@@ -3,20 +3,18 @@
 import json
 from functools import partial
 import mlx.core as mx
 import numpy as np
 from huggingface_hub import hf_hub_download
 from mlx.utils import tree_unflatten
 from safetensors import safe_open as safetensor_open
 import mlx.core as mx
 from mlx.utils import tree_unflatten
 from .clip import CLIPTextModel
-from .config import UNetConfig, CLIPTextModelConfig, AutoencoderConfig, DiffusionConfig
+from .config import AutoencoderConfig, CLIPTextModelConfig, DiffusionConfig, UNetConfig
 from .tokenizer import Tokenizer
 from .unet import UNetModel
 from .vae import Autoencoder
 _DEFAULT_MODEL = "stabilityai/stable-diffusion-2-1-base"
 _MODELS = {
    # See https://huggingface.co/stabilityai/stable-diffusion-2-1-base for the model details and license
--- a/stable_diffusion/stable_diffusion/sampler.py
+++ b/stable_diffusion/stable_diffusion/sampler.py
@@ -1,9 +1,9 @@
 # Copyright © 2023 Apple Inc.
 from .config import DiffusionConfig
 import mlx.core as mx
 from .config import DiffusionConfig
 def _linspace(a, b, num):
    x = mx.arange(0, num) / (num - 1)
--- a/stable_diffusion/txt2image.py
+++ b/stable_diffusion/txt2image.py
@@ -1,14 +1,13 @@
 # Copyright © 2023 Apple Inc.
 import argparse
 import mlx.core as mx
 from PIL import Image
 from tqdm import tqdm
 import mlx.core as mx
 from stable_diffusion import StableDiffusion
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Generate images from a textual prompt using stable diffusion"
--- a/t5/convert.py
+++ b/t5/convert.py
@@ -1,6 +1,5 @@
 from transformers import T5ForConditionalGeneration
 import numpy as np
-
+from transformers import T5ForConditionalGeneration
 SHARED_REPLACEMENT_PATTERNS = [
    (".block.", ".layers."),
@@ -48,8 +47,7 @@ def convert(model_name, dtype):
    dtype = getattr(np, dtype)
    model = T5ForConditionalGeneration.from_pretrained(model_name, torch_dtype="auto")
    weights = {
-        replace_key(k): v.numpy().astype(dtype)
+        replace_key(k): v.numpy().astype(dtype) for k, v in model.state_dict().items()
        for k, v in model.state_dict().items()
    }
    file_name = model_name.replace("/", "-")
    print(f"Saving weights to {file_name}.npz")
--- a/t5/hf_t5.py
+++ b/t5/hf_t5.py
@@ -1,7 +1,7 @@
 from transformers import T5ForConditionalGeneration, T5EncoderModel, AutoTokenizer
 import argparse
 from transformers import AutoTokenizer, T5EncoderModel, T5ForConditionalGeneration
 def embed(t5_model: str):
    batch = [
@@ -51,4 +51,3 @@ if __name__ == "__main__":
        embed(args.model)
    else:
        generate(args.model)
--- a/t5/t5.py
+++ b/t5/t5.py
@@ -1,11 +1,11 @@
 import argparse
 from typing import Optional, Tuple, List
 from time import perf_counter_ns
 from typing import List, Optional, Tuple
 import numpy as np
 import mlx.core as mx
 import mlx.nn as nn
-from mlx.utils import tree_unflatten, tree_map
+import numpy as np
 from mlx.utils import tree_map, tree_unflatten
 from transformers import T5Config, T5Tokenizer
@@ -166,7 +166,7 @@ class DenseActivation(nn.Module):
            self.act = nn.relu
        elif activation == "gelu":
            self.act = nn.gelu
-        elif activation ==  "silu":
+        elif activation == "silu":
            self.act = nn.silu
        else:
            raise ValueError(f"Unknown activation: {activation}")
@@ -337,7 +337,7 @@ class Tokenizer:
        self._tokenizer = T5Tokenizer.from_pretrained(
            args.model,
            legacy=False,
-            model_max_length=getattr(config, 'n_positions', 512)
+            model_max_length=getattr(config, "n_positions", 512),
        )
    @property
--- a/transformer_lm/datasets.py
+++ b/transformer_lm/datasets.py
@@ -2,10 +2,11 @@
 import io
 import itertools
 import numpy as np
 import os
 from urllib import request
 import zipfile
 from urllib import request
 import numpy as np
 def load_dataset(dataname):
--- a/transformer_lm/jax_main.py
+++ b/transformer_lm/jax_main.py
@@ -1,14 +1,14 @@
 # Copyright © 2023 Apple Inc.
 import functools
 import jax
 import jax.numpy as jnp
 import math
 import numpy as np
 import time
 from collections import namedtuple
 import datasets
 import jax
 import jax.numpy as jnp
 import numpy as np
 from tree_utils import tree_flatten
 """
--- a/transformer_lm/main.py
+++ b/transformer_lm/main.py
@@ -3,15 +3,13 @@
 import math
 import time
-import numpy as np
+import datasets
 import mlx.core as mx
 import mlx.nn as nn
 import mlx.optimizers as optim
 import numpy as np
 from mlx.utils import tree_flatten
 import datasets
 class TransformerLM(nn.Module):
    def __init__(self, vocab_size: int, num_layers: int, dims: int, num_heads: int):
--- a/transformer_lm/tf_main.py
+++ b/transformer_lm/tf_main.py
@@ -3,11 +3,10 @@
 import math
 import time
 import datasets
 import numpy as np
 import tensorflow as tf
 import datasets
 def to_samples(context_size, dataset):
    tokens = dataset.size
--- a/transformer_lm/torch_main.py
+++ b/transformer_lm/torch_main.py
@@ -3,11 +3,10 @@
 import math
 import time
 import datasets
 import numpy as np
 import torch
 import datasets
 def to_samples(context_size, dataset):
    tokens = dataset.size
--- a/whisper/benchmark.py
+++ b/whisper/benchmark.py
@@ -5,10 +5,7 @@ import time
 import mlx.core as mx
-from whisper import load_models
+from whisper import audio, decoding, load_models, transcribe
 from whisper import audio
 from whisper import decoding
 from whisper import transcribe
 audio_file = "whisper/assets/ls_test.flac"
--- a/whisper/test.py
+++ b/whisper/test.py
@@ -1,19 +1,18 @@
 # Copyright © 2023 Apple Inc.
 import os
 import subprocess
 import unittest
 import mlx.core as mx
 import numpy as np
 import os
 import subprocess
 import torch
 import whisper
 import whisper.audio as audio
 import whisper.decoding as decoding
 import whisper.load_models as load_models
 import whisper.torch_whisper as torch_whisper
 import whisper.decoding as decoding
 TEST_AUDIO = "whisper/assets/ls_test.flac"
--- a/whisper/whisper/init.py
+++ b/whisper/whisper/init.py
@@ -1,6 +1,4 @@
 # Copyright © 2023 Apple Inc.
-from . import load_models
+from . import audio, decoding, load_models
 from . import audio
 from . import decoding
 from .transcribe import transcribe
--- a/whisper/whisper/decoding.py
+++ b/whisper/whisper/decoding.py
@@ -1,12 +1,13 @@
 # Copyright © 2023 Apple Inc.
 import zlib
 from dataclasses import dataclass, field, replace
 from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Union
 import mlx.core as mx
 from mlx.utils import tree_map
 import mlx.nn as nn
 import numpy as np
-import zlib
+from mlx.utils import tree_map
 from .audio import CHUNK_LENGTH
 from .tokenizer import Tokenizer, get_tokenizer
--- a/whisper/whisper/load_models.py
+++ b/whisper/whisper/load_models.py
@@ -7,13 +7,11 @@ import warnings
 from typing import List
 import mlx.core as mx
 from mlx.utils import tree_map
 import torch
 from mlx.utils import tree_map
 from tqdm import tqdm
-from . import whisper
+from . import torch_whisper, whisper
 from . import torch_whisper
 _MODELS = {
    "tiny.en": "https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt",
--- a/whisper/whisper/transcribe.py
+++ b/whisper/whisper/transcribe.py
@@ -1,12 +1,12 @@
 # Copyright © 2023 Apple Inc.
 import sys
 from typing import Optional, Tuple, Union
 import mlx.core as mx
 import numpy as np
 import sys
 from typing import Optional, Tuple, Union
 import tqdm
 from .audio import (
    FRAMES_PER_SECOND,
    HOP_LENGTH,
--- a/whisper/whisper/whisper.py
+++ b/whisper/whisper/whisper.py
@@ -2,13 +2,12 @@
 import base64
 import gzip
 from dataclasses import dataclass
 import math
 from dataclasses import dataclass
 from typing import Dict, Iterable, Optional
 import mlx.core as mx
 import mlx.nn as nn
 import numpy as np
 from .decoding import decode as decode_function