diff --git a/.circleci/config.yml b/.circleci/config.yml index 94e4e909f..26305ea2d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -63,16 +63,20 @@ jobs: command: ./build/tests/tests mac_build_and_test: + parameters: + xcode_version: + type: string + default: "15.2.0" macos: - xcode: "15.2.0" + xcode: << parameters.xcode_version >> resource_class: macos.m1.large.gen1 steps: - checkout - run: name: Install dependencies command: | - brew install python@3.9 - python3.9 -m venv env + brew install python@3.8 + python3.8 -m venv env source env/bin/activate pip install --upgrade pip pip install --upgrade cmake @@ -97,7 +101,7 @@ jobs: command: | source env/bin/activate LOW_MEMORY=1 DEVICE=cpu python -m xmlrunner discover -v python/tests -o test-results/cpu - LOW_MEMORY=1 DEVICE=gpu python3.9 -m xmlrunner discover -v python/tests -o test-results/gpu + LOW_MEMORY=1 DEVICE=gpu METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 python -m xmlrunner discover -v python/tests -o test-results/gpu # TODO: Reenable when extension api becomes stable # - run: # name: Build example extension @@ -235,7 +239,10 @@ workflows: - not: << pipeline.parameters.weekly_build >> - not: << pipeline.parameters.test_release >> jobs: - - mac_build_and_test + - mac_build_and_test: + matrix: + parameters: + xcode_version: ["15.0.0", "15.2.0"] - linux_build_and_test build_pypi_release: @@ -254,7 +261,7 @@ workflows: matrix: parameters: python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - xcode_version: ["14.3.1", "15.2.0"] + xcode_version: ["15.0.0", "15.2.0"] build_env: ["PYPI_RELEASE=1"] prb: when: @@ -280,7 +287,7 @@ workflows: matrix: parameters: python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - xcode_version: ["14.3.1", "15.2.0"] + xcode_version: ["15.0.0", "15.2.0"] weekly_build: when: and: @@ -291,7 +298,7 @@ workflows: matrix: parameters: python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - xcode_version: ["14.3.1", "15.2.0"] + xcode_version: ["15.0.0", "15.2.0"] build_env: ["DEV_RELEASE=1"] linux_test_release: when: diff --git a/CMakeLists.txt b/CMakeLists.txt index a5245ce57..c04d4a0db 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,10 +77,8 @@ elseif (MLX_BUILD_METAL) set(METAL_CPP_URL https://developer.apple.com/metal/cpp/files/metal-cpp_macOS14.2_iOS17.2.zip) elseif (${MACOS_VERSION} GREATER_EQUAL 14.0) set(METAL_CPP_URL https://developer.apple.com/metal/cpp/files/metal-cpp_macOS14_iOS17-beta.zip) - elseif (${MACOS_VERSION} GREATER_EQUAL 13.3) - set(METAL_CPP_URL https://developer.apple.com/metal/cpp/files/metal-cpp_macOS13.3_iOS16.4.zip) - else() - message(FATAL_ERROR "MLX requires macOS >= 13.4 to be built with MLX_BUILD_METAL=ON" ) + else () + message(FATAL_ERROR "MLX requires macOS >= 13.5 to be built with MLX_BUILD_METAL=ON") endif() FetchContent_Declare( diff --git a/docs/src/install.rst b/docs/src/install.rst index 6c0535079..ee4267457 100644 --- a/docs/src/install.rst +++ b/docs/src/install.rst @@ -15,7 +15,7 @@ To install from PyPI you must meet the following requirements: - Using an M series chip (Apple silicon) - Using a native Python >= 3.8 -- macOS >= 13.3 +- macOS >= 13.5 .. note:: MLX is only available on devices running macOS >= 13.3 @@ -54,7 +54,7 @@ Build Requirements - A C++ compiler with C++17 support (e.g. Clang >= 5.0) - `cmake `_ -- version 3.24 or later, and ``make`` -- Xcode >= 14.3 (Xcode >= 15.0 for macOS 14 and above) +- Xcode >= 15.0 and macOS >= 13.5 .. note:: Ensure your shell environment is native ``arm``, not ``x86`` via Rosetta. If diff --git a/mlx/backend/metal/matmul.cpp b/mlx/backend/metal/matmul.cpp index 76b192d35..d8de3d832 100644 --- a/mlx/backend/metal/matmul.cpp +++ b/mlx/backend/metal/matmul.cpp @@ -428,12 +428,21 @@ void Matmul::eval_gpu(const std::vector& inputs, array& out) { throw std::runtime_error( "[matmul] Does not yet support non-floating point types."); } - out.set_data(allocator::malloc_or_wait(out.nbytes())); auto& s = stream(); auto& d = metal::device(s.device); auto& a_pre = inputs[0]; auto& b_pre = inputs[1]; + // Return 0s if either input is empty + if (a_pre.size() == 0 || b_pre.size() == 0) { + array zero = array(0, a_pre.dtype()); + copy_gpu(zero, out, CopyType::Scalar, s); + auto command_buffer = d.get_command_buffer(s.index); + command_buffer->addCompletedHandler([zero](MTL::CommandBuffer*) {}); + return; + } + + out.set_data(allocator::malloc_or_wait(out.nbytes())); ///////////////////////////////////////////////////////////////////////////// // Init checks and prep @@ -573,7 +582,6 @@ void Matmul::eval_gpu(const std::vector& inputs, array& out) { [copies](MTL::CommandBuffer*) mutable { copies.clear(); }); return; } - ///////////////////////////////////////////////////////////////////////////// // Gemm specialization