diff --git a/.circleci/config.yml b/.circleci/config.yml
index 7279e652..aec28e77 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,5 +1,8 @@
 version: 2.1
 
+orbs:
+  apple: ml-explore/pr-approval@0.1.0
+ 
 jobs:
   linux_build_and_test:
     docker:
@@ -31,5 +34,7 @@ workflows:
     jobs:
       - hold:
           type: approval
+      - apple/authenticate:
+          context: pr-approval
       - linux_build_and_test:
           requires: [ hold ]
diff --git a/llms/mlx_lm/__init__.py b/llms/mlx_lm/__init__.py
index 82960423..ecf69c6d 100644
--- a/llms/mlx_lm/__init__.py
+++ b/llms/mlx_lm/__init__.py
@@ -1,4 +1,4 @@
-from .convert import convert
-from .utils import generate, load
+# Copyright © 2023-2024 Apple Inc.
 
-__version__ = "0.0.14"
+from .utils import convert, generate, load
+from .version import __version__
diff --git a/llms/mlx_lm/convert.py b/llms/mlx_lm/convert.py
index e68d3af1..7eac34aa 100644
--- a/llms/mlx_lm/convert.py
+++ b/llms/mlx_lm/convert.py
@@ -1,22 +1,8 @@
+# Copyright © 2023-2024 Apple Inc.
+
 import argparse
-import copy
-import glob
-import json
-import shutil
-from pathlib import Path
-from typing import Tuple
 
-import mlx.core as mx
-import mlx.nn as nn
-from mlx.utils import tree_flatten
-
-from .utils import (
-    fetch_from_hub,
-    get_model_path,
-    linear_class_predicate,
-    save_weights,
-    upload_to_hub,
-)
+from .utils import convert
 
 
 def configure_parser() -> argparse.ArgumentParser:
@@ -59,73 +45,6 @@ def configure_parser() -> argparse.ArgumentParser:
     return parser
 
 
-def quantize_model(
-    model: nn.Module, config: dict, q_group_size: int, q_bits: int
-) -> Tuple:
-    """
-    Applies quantization to the model weights.
-
-    Args:
-        model (nn.Module): The model to be quantized.
-        config (dict): Model configuration.
-        q_group_size (int): Group size for quantization.
-        q_bits (int): Bits per weight for quantization.
-
-    Returns:
-        Tuple: Tuple containing quantized weights and config.
-    """
-    quantized_config = copy.deepcopy(config)
-
-    nn.QuantizedLinear.quantize_module(
-        model, q_group_size, q_bits, linear_class_predicate=linear_class_predicate
-    )
-    quantized_config["quantization"] = {"group_size": q_group_size, "bits": q_bits}
-    quantized_weights = dict(tree_flatten(model.parameters()))
-
-    return quantized_weights, quantized_config
-
-
-def convert(
-    hf_path: str,
-    mlx_path: str = "mlx_model",
-    quantize: bool = False,
-    q_group_size: int = 64,
-    q_bits: int = 4,
-    dtype: str = "float16",
-    upload_repo: str = None,
-):
-    print("[INFO] Loading")
-    model_path = get_model_path(hf_path)
-    model, config, tokenizer = fetch_from_hub(model_path, lazy=True)
-
-    weights = dict(tree_flatten(model.parameters()))
-    dtype = mx.float16 if quantize else getattr(mx, dtype)
-    weights = {k: v.astype(dtype) for k, v in weights.items()}
-
-    if quantize:
-        print("[INFO] Quantizing")
-        model.load_weights(list(weights.items()))
-        weights, config = quantize_model(model, config, q_group_size, q_bits)
-
-    if isinstance(mlx_path, str):
-        mlx_path = Path(mlx_path)
-
-    del model
-    save_weights(mlx_path, weights, donate_weights=True)
-
-    py_files = glob.glob(str(model_path / "*.py"))
-    for file in py_files:
-        shutil.copy(file, mlx_path)
-
-    tokenizer.save_pretrained(mlx_path)
-
-    with open(mlx_path / "config.json", "w") as fid:
-        json.dump(config, fid, indent=4)
-
-    if upload_repo is not None:
-        upload_to_hub(mlx_path, upload_repo, hf_path)
-
-
 if __name__ == "__main__":
     parser = configure_parser()
     args = parser.parse_args()
diff --git a/llms/mlx_lm/generate.py b/llms/mlx_lm/generate.py
index eab5e792..cf159c21 100644
--- a/llms/mlx_lm/generate.py
+++ b/llms/mlx_lm/generate.py
@@ -1,3 +1,5 @@
+# Copyright © 2023-2024 Apple Inc.
+
 import argparse
 
 import mlx.core as mx
diff --git a/llms/mlx_lm/merge.py b/llms/mlx_lm/merge.py
index 2603653d..efdc726d 100644
--- a/llms/mlx_lm/merge.py
+++ b/llms/mlx_lm/merge.py
@@ -1,3 +1,5 @@
+# Copyright © 2023-2024 Apple Inc.
+
 import argparse
 import glob
 import json
diff --git a/llms/mlx_lm/server.py b/llms/mlx_lm/server.py
index e8f35325..894c2e37 100644
--- a/llms/mlx_lm/server.py
+++ b/llms/mlx_lm/server.py
@@ -1,3 +1,5 @@
+# Copyright © 2023-2024 Apple Inc.
+
 import argparse
 import json
 import time
diff --git a/llms/mlx_lm/utils.py b/llms/mlx_lm/utils.py
index 814beca4..ca78088c 100644
--- a/llms/mlx_lm/utils.py
+++ b/llms/mlx_lm/utils.py
@@ -1,9 +1,12 @@
+# Copyright © 2023-2024 Apple Inc.
+
 import copy
 import gc
 import glob
 import importlib
 import json
 import logging
+import shutil
 import time
 from pathlib import Path
 from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
@@ -11,6 +14,7 @@ from typing import Any, Callable, Dict, Generator, List, Optional, Tuple, Union
 import mlx.core as mx
 import mlx.nn as nn
 from huggingface_hub import snapshot_download
+from mlx.utils import tree_flatten
 from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizer
 
 # Local imports
@@ -515,3 +519,70 @@ def save_weights(
             f,
             indent=4,
         )
+
+
+def quantize_model(
+    model: nn.Module, config: dict, q_group_size: int, q_bits: int
+) -> Tuple:
+    """
+    Applies quantization to the model weights.
+
+    Args:
+        model (nn.Module): The model to be quantized.
+        config (dict): Model configuration.
+        q_group_size (int): Group size for quantization.
+        q_bits (int): Bits per weight for quantization.
+
+    Returns:
+        Tuple: Tuple containing quantized weights and config.
+    """
+    quantized_config = copy.deepcopy(config)
+
+    nn.QuantizedLinear.quantize_module(
+        model, q_group_size, q_bits, linear_class_predicate=linear_class_predicate
+    )
+    quantized_config["quantization"] = {"group_size": q_group_size, "bits": q_bits}
+    quantized_weights = dict(tree_flatten(model.parameters()))
+
+    return quantized_weights, quantized_config
+
+
+def convert(
+    hf_path: str,
+    mlx_path: str = "mlx_model",
+    quantize: bool = False,
+    q_group_size: int = 64,
+    q_bits: int = 4,
+    dtype: str = "float16",
+    upload_repo: str = None,
+):
+    print("[INFO] Loading")
+    model_path = get_model_path(hf_path)
+    model, config, tokenizer = fetch_from_hub(model_path, lazy=True)
+
+    weights = dict(tree_flatten(model.parameters()))
+    dtype = mx.float16 if quantize else getattr(mx, dtype)
+    weights = {k: v.astype(dtype) for k, v in weights.items()}
+
+    if quantize:
+        print("[INFO] Quantizing")
+        model.load_weights(list(weights.items()))
+        weights, config = quantize_model(model, config, q_group_size, q_bits)
+
+    if isinstance(mlx_path, str):
+        mlx_path = Path(mlx_path)
+
+    del model
+    save_weights(mlx_path, weights, donate_weights=True)
+
+    py_files = glob.glob(str(model_path / "*.py"))
+    for file in py_files:
+        shutil.copy(file, mlx_path)
+
+    tokenizer.save_pretrained(mlx_path)
+
+    with open(mlx_path / "config.json", "w") as fid:
+        json.dump(config, fid, indent=4)
+
+    if upload_repo is not None:
+        upload_to_hub(mlx_path, upload_repo, hf_path)
diff --git a/llms/mlx_lm/version.py b/llms/mlx_lm/version.py
new file mode 100644
index 00000000..b5f92ab1
--- /dev/null
+++ b/llms/mlx_lm/version.py
@@ -0,0 +1,3 @@
+# Copyright © 2023-2024 Apple Inc.
+
+__version__ = "0.0.14"
diff --git a/llms/setup.py b/llms/setup.py
index f8cad116..58d02291 100644
--- a/llms/setup.py
+++ b/llms/setup.py
@@ -1,15 +1,18 @@
 import sys
 from pathlib import Path
 
-import mlx_lm
-import pkg_resources
 from setuptools import setup
 
-with open(Path(__file__).parent / "mlx_lm/requirements.txt") as fid:
-    requirements = [str(r) for r in pkg_resources.parse_requirements(fid)]
+package_dir = Path(__file__).parent / "mlx_lm"
+with open(package_dir / "requirements.txt") as fid:
+    requirements = [l.strip() for l in fid.readlines()]
+
+sys.path.append(str(package_dir))
+from version import __version__
+
 setup(
     name="mlx-lm",
-    version=mlx_lm.__version__,
+    version=__version__,
     description="LLMs on Apple silicon with MLX and the Hugging Face Hub",
     long_description=open("README.md", encoding="utf-8").read(),
     long_description_content_type="text/markdown",