diff --git a/.circleci/config.yml b/.circleci/config.yml
index 94e4e909f..26305ea2d 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -63,16 +63,20 @@ jobs:
           command: ./build/tests/tests
 
   mac_build_and_test:
+    parameters:
+      xcode_version:
+        type: string
+        default: "15.2.0"
     macos:
-      xcode: "15.2.0"
+      xcode: << parameters.xcode_version >>
     resource_class: macos.m1.large.gen1
     steps:
       - checkout
       - run:
           name: Install dependencies
           command: |
-            brew install python@3.9
-            python3.9 -m venv env
+            brew install python@3.8
+            python3.8 -m venv env
             source env/bin/activate
             pip install --upgrade pip
             pip install --upgrade cmake
@@ -97,7 +101,7 @@ jobs:
           command: |
             source env/bin/activate
             LOW_MEMORY=1 DEVICE=cpu python -m xmlrunner discover -v python/tests -o test-results/cpu
-            LOW_MEMORY=1 DEVICE=gpu python3.9 -m xmlrunner discover -v python/tests -o test-results/gpu
+            LOW_MEMORY=1 DEVICE=gpu METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 python -m xmlrunner discover -v python/tests -o test-results/gpu
       # TODO: Reenable when extension api becomes stable
       # - run:
       #     name: Build example extension
@@ -235,7 +239,10 @@ workflows:
         - not: << pipeline.parameters.weekly_build >>
         - not: << pipeline.parameters.test_release >>
     jobs:
-      - mac_build_and_test
+      - mac_build_and_test:
+          matrix:
+            parameters:
+              xcode_version: ["15.0.0", "15.2.0"]
       - linux_build_and_test
 
   build_pypi_release:
@@ -254,7 +261,7 @@ workflows:
           matrix:
             parameters:
               python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-              xcode_version: ["14.3.1", "15.2.0"]
+              xcode_version: ["15.0.0", "15.2.0"]
               build_env: ["PYPI_RELEASE=1"]
   prb:
     when:
@@ -280,7 +287,7 @@ workflows:
           matrix:
             parameters:
               python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-              xcode_version: ["14.3.1", "15.2.0"]
+              xcode_version: ["15.0.0", "15.2.0"]
   weekly_build:
     when:
       and:
@@ -291,7 +298,7 @@ workflows:
           matrix:
             parameters:
               python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
-              xcode_version: ["14.3.1", "15.2.0"]
+              xcode_version: ["15.0.0", "15.2.0"]
               build_env: ["DEV_RELEASE=1"]
   linux_test_release:
     when:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a5245ce57..c04d4a0db 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -77,10 +77,8 @@ elseif (MLX_BUILD_METAL)
     set(METAL_CPP_URL https://developer.apple.com/metal/cpp/files/metal-cpp_macOS14.2_iOS17.2.zip)
   elseif (${MACOS_VERSION} GREATER_EQUAL 14.0)
     set(METAL_CPP_URL https://developer.apple.com/metal/cpp/files/metal-cpp_macOS14_iOS17-beta.zip)
-  elseif (${MACOS_VERSION} GREATER_EQUAL 13.3)
-    set(METAL_CPP_URL https://developer.apple.com/metal/cpp/files/metal-cpp_macOS13.3_iOS16.4.zip)
-  else()
-    message(FATAL_ERROR "MLX requires macOS >= 13.4 to be built with MLX_BUILD_METAL=ON" )
+  else ()
+    message(FATAL_ERROR "MLX requires macOS >= 13.5 to be built with MLX_BUILD_METAL=ON")
   endif()
 
   FetchContent_Declare(
diff --git a/docs/src/install.rst b/docs/src/install.rst
index 6c0535079..ee4267457 100644
--- a/docs/src/install.rst
+++ b/docs/src/install.rst
@@ -15,7 +15,7 @@ To install from PyPI you must meet the following requirements:
 
 - Using an M series chip (Apple silicon)
 - Using a native Python >= 3.8
-- macOS >= 13.3
+- macOS >= 13.5
 
 .. note::
     MLX is only available on devices running macOS >= 13.3 
@@ -54,7 +54,7 @@ Build Requirements
 
 - A C++ compiler with C++17 support (e.g. Clang >= 5.0)
 - `cmake <https://cmake.org/>`_ -- version 3.24 or later, and ``make``
-- Xcode >= 14.3 (Xcode >= 15.0 for macOS 14 and above)
+- Xcode >= 15.0 and macOS >= 13.5
 
 .. note::
    Ensure your shell environment is native ``arm``, not ``x86`` via Rosetta. If
diff --git a/mlx/backend/metal/matmul.cpp b/mlx/backend/metal/matmul.cpp
index 76b192d35..d8de3d832 100644
--- a/mlx/backend/metal/matmul.cpp
+++ b/mlx/backend/metal/matmul.cpp
@@ -428,12 +428,21 @@ void Matmul::eval_gpu(const std::vector<array>& inputs, array& out) {
     throw std::runtime_error(
         "[matmul] Does not yet support non-floating point types.");
   }
-  out.set_data(allocator::malloc_or_wait(out.nbytes()));
   auto& s = stream();
   auto& d = metal::device(s.device);
 
   auto& a_pre = inputs[0];
   auto& b_pre = inputs[1];
+  // Return 0s if either input is empty
+  if (a_pre.size() == 0 || b_pre.size() == 0) {
+    array zero = array(0, a_pre.dtype());
+    copy_gpu(zero, out, CopyType::Scalar, s);
+    auto command_buffer = d.get_command_buffer(s.index);
+    command_buffer->addCompletedHandler([zero](MTL::CommandBuffer*) {});
+    return;
+  }
+
+  out.set_data(allocator::malloc_or_wait(out.nbytes()));
 
   /////////////////////////////////////////////////////////////////////////////
   // Init checks and prep
@@ -573,7 +582,6 @@ void Matmul::eval_gpu(const std::vector<array>& inputs, array& out) {
         [copies](MTL::CommandBuffer*) mutable { copies.clear(); });
     return;
   }
-
   /////////////////////////////////////////////////////////////////////////////
   // Gemm specialization