diff --git a/.github/actions/build-macos/action.yml b/.github/actions/build-macos/action.yml index c7421f147..9890838de 100644 --- a/.github/actions/build-macos/action.yml +++ b/.github/actions/build-macos/action.yml @@ -107,11 +107,6 @@ runs: -v python/tests \ -o test-results/gpu_jit - - name: Build macOS 13 package - if: inputs.build-type == 'release' - uses: ./.github/actions/build-macos-release - with: - macos-target: 13.0 - name: Build macOS 14 package if: inputs.build-type == 'release' uses: ./.github/actions/build-macos-release @@ -121,4 +116,4 @@ runs: if: inputs.build-type == 'release' uses: ./.github/actions/build-macos-release with: - macos-target: 15.0 \ No newline at end of file + macos-target: 15.0 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index d4468c12d..0446476b2 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -53,8 +53,7 @@ jobs: if: github.repository == 'ml-explore/mlx' strategy: matrix: - python-version: ["3.10", "3.13"] - # TODO: 3.14 had issues finding a compatible tensorflow + python-version: ["3.10", "3.14"] env: MACOSX_DEPLOYMENT_TARGET: "15.0" runs-on: [self-hosted, macos] diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9f2ae05f0..e7bf3f3d3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -73,8 +73,7 @@ jobs: if: github.repository == 'ml-explore/mlx' strategy: matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] - # TODO: 3.14 had issues finding a compatible tensorflow + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] runs-on: [self-hosted, macos] env: PYPI_RELEASE: 1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 0dbc0b51b..d8889d1e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,9 +127,12 @@ if(MLX_BUILD_METAL) message(STATUS "Building with macOS SDK version ${MACOS_SDK_VERSION}") set(METAL_CPP_URL - https://developer.apple.com/metal/cpp/files/metal-cpp_macOS15_iOS18.zip) + https://developer.apple.com/metal/cpp/files/metal-cpp_26.zip) if(NOT CMAKE_OSX_DEPLOYMENT_TARGET STREQUAL "") + if(${CMAKE_OSX_DEPLOYMENT_TARGET} LESS 14.0) + message(FATAL_ERROR "MLX requires macOS >= 14.0") + endif() set(XCRUN_FLAGS "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}") endif() execute_process( @@ -138,7 +141,6 @@ if(MLX_BUILD_METAL) "echo \"__METAL_VERSION__\" | xcrun -sdk macosx metal ${XCRUN_FLAGS} -E -x metal -P - | tail -1 | tr -d '\n'" OUTPUT_VARIABLE MLX_METAL_VERSION COMMAND_ERROR_IS_FATAL ANY) FetchContent_Declare(metal_cpp URL ${METAL_CPP_URL}) - FetchContent_MakeAvailable(metal_cpp) target_include_directories( mlx PUBLIC $ diff --git a/docs/src/install.rst b/docs/src/install.rst index a237613de..9f0ab67bc 100644 --- a/docs/src/install.rst +++ b/docs/src/install.rst @@ -17,11 +17,10 @@ To install from PyPI your system must meet the following requirements: - Using an M series chip (Apple silicon) - Using a native Python >= 3.10 -- macOS >= 13.5 +- macOS >= 14.0 .. note:: - MLX is only available on devices running macOS >= 13.5 - It is highly recommended to use macOS 14 (Sonoma) + MLX is only available on devices running macOS >= 14.0 and higher. CUDA ^^^^ diff --git a/mlx/backend/metal/CMakeLists.txt b/mlx/backend/metal/CMakeLists.txt index 0fd1834f6..2baa6c05b 100644 --- a/mlx/backend/metal/CMakeLists.txt +++ b/mlx/backend/metal/CMakeLists.txt @@ -21,14 +21,8 @@ function(make_jit_source SRC_FILE) target_sources(mlx PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/jit/${SRC_NAME}.cpp) endfunction(make_jit_source) -make_jit_source( - utils - kernels/jit/bf16.h - kernels/metal_3_0/bf16.h - kernels/metal_3_1/bf16.h - kernels/bf16_math.h - kernels/complex.h - kernels/defines.h) +make_jit_source(utils kernels/bf16.h kernels/bf16_math.h kernels/complex.h + kernels/defines.h) make_jit_source(unary_ops kernels/erf.h kernels/expm1f.h kernels/fp8.h) make_jit_source(binary_ops) make_jit_source(ternary_ops) diff --git a/mlx/backend/metal/device.cpp b/mlx/backend/metal/device.cpp index d1a427299..6d4d2841d 100644 --- a/mlx/backend/metal/device.cpp +++ b/mlx/backend/metal/device.cpp @@ -21,12 +21,12 @@ constexpr const char* default_mtllib_path = METAL_PATH; auto get_metal_version() { auto get_metal_version_ = []() { - if (__builtin_available(macOS 15, iOS 18, tvOS 18, visionOS 2, *)) { + if (__builtin_available(macOS 26, iOS 26, tvOS 26, visionOS 26, *)) { + return MTL::LanguageVersion4_0; + } else if (__builtin_available(macOS 15, iOS 18, tvOS 18, visionOS 2, *)) { return MTL::LanguageVersion3_2; - } else if (__builtin_available(macOS 14, iOS 17, tvOS 17, visionOS 1, *)) { - return MTL::LanguageVersion3_1; } else { - return MTL::LanguageVersion3_0; + return MTL::LanguageVersion3_1; } }; static auto metal_version_ = get_metal_version_(); diff --git a/mlx/backend/metal/kernels/CMakeLists.txt b/mlx/backend/metal/kernels/CMakeLists.txt index 69ac2a5e9..c2842d534 100644 --- a/mlx/backend/metal/kernels/CMakeLists.txt +++ b/mlx/backend/metal/kernels/CMakeLists.txt @@ -1,6 +1,5 @@ set(BASE_HEADERS - metal_3_1/bf16.h - metal_3_0/bf16.h + bf16.h bf16_math.h complex.h defines.h @@ -18,16 +17,9 @@ function(build_kernel_base TARGET SRCFILE DEPS) set(METAL_FLAGS ${METAL_FLAGS} "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}") endif() - if(MLX_METAL_VERSION GREATER_EQUAL 310) - set(VERSION_INCLUDES - ${PROJECT_SOURCE_DIR}/mlx/backend/metal/kernels/metal_3_1) - else() - set(VERSION_INCLUDES - ${PROJECT_SOURCE_DIR}/mlx/backend/metal/kernels/metal_3_0) - endif() add_custom_command( COMMAND xcrun -sdk macosx metal ${METAL_FLAGS} -c ${SRCFILE} - -I${PROJECT_SOURCE_DIR} -I${VERSION_INCLUDES} -o ${TARGET}.air + -I${PROJECT_SOURCE_DIR} -o ${TARGET}.air DEPENDS ${SRCFILE} ${DEPS} ${BASE_HEADERS} OUTPUT ${TARGET}.air COMMENT "Building ${TARGET}.air" diff --git a/mlx/backend/metal/kernels/metal_3_1/bf16.h b/mlx/backend/metal/kernels/bf16.h similarity index 100% rename from mlx/backend/metal/kernels/metal_3_1/bf16.h rename to mlx/backend/metal/kernels/bf16.h diff --git a/mlx/backend/metal/kernels/jit/bf16.h b/mlx/backend/metal/kernels/jit/bf16.h deleted file mode 100644 index 702e8a4eb..000000000 --- a/mlx/backend/metal/kernels/jit/bf16.h +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright © 2024 Apple Inc. - -// clang-format off -#define jit_if #if -#define jit_else #else -#define jit_endif #endif - -jit_if (__METAL_VERSION__ >= 310) - -#include "mlx/backend/metal/kernels/metal_3_1/bf16.h" - -jit_else - -#include "mlx/backend/metal/kernels/metal_3_0/bf16.h" - -jit_endif // clang-format on diff --git a/mlx/backend/metal/kernels/metal_3_0/bf16.h b/mlx/backend/metal/kernels/metal_3_0/bf16.h deleted file mode 100644 index f5d486706..000000000 --- a/mlx/backend/metal/kernels/metal_3_0/bf16.h +++ /dev/null @@ -1,314 +0,0 @@ -// Copyright © 2023 Apple Inc. - -#pragma once - -#include - -using namespace metal; - -///////////////////////////////////////////////////////////////////////////// -// Helpers -///////////////////////////////////////////////////////////////////////////// - -constexpr METAL_FUNC uint16_t float_to_bfloat_bits(float x) { - // Check for nan - if ((as_type(x) & ~_fp_encoding_traits::sign_mask) > - _fp_encoding_traits::inf_mask) { - return uint16_t(as_type(0x7FC0)); - } - // Take bits - uint32_t float_bits = as_type(x); - - // Round to nearest even - float_bits += ((float_bits >> 16) & 1) + as_type(0x7FFF); - - // Take upper 16 bits - return float_bits >> 16; -} - -constexpr METAL_FUNC float bfloat_bits_to_float(uint16_t x) { - // Upper 16 bits are the data and lower 16 bits are 0s - return as_type((uint32_t)x << 16); -} - -struct _MLX_BFloat16; - -template -static constexpr constant bool can_convert_to_bfloat = - !is_same_v && is_convertible_v; - -template -static constexpr constant bool can_convert_from_bfloat = - !is_same_v && is_convertible_v; - -///////////////////////////////////////////////////////////////////////////// -// Bfloat struct -///////////////////////////////////////////////////////////////////////////// - -struct _MLX_BFloat16 { - ///////////////////////////////////////////////////////////////////////////// - // Constructors - uint16_t bits_; - _MLX_BFloat16() thread = default; - _MLX_BFloat16() threadgroup = default; - _MLX_BFloat16() device = default; - _MLX_BFloat16() constant = default; - - struct bits_to_bfloat_struct {}; - static constexpr METAL_FUNC bits_to_bfloat_struct bits_to_bfloat() { - return bits_to_bfloat_struct(); - } - constexpr METAL_FUNC _MLX_BFloat16(uint16_t bits, bits_to_bfloat_struct) - : bits_(bits) {} - - ///////////////////////////////////////////////////////////////////////////// - // Conversions to bfloat - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC _MLX_BFloat16(T x) thread - : bits_(float_to_bfloat_bits(static_cast(x))) {} - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC _MLX_BFloat16(T x) threadgroup - : bits_(float_to_bfloat_bits(static_cast(x))) {} - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC _MLX_BFloat16(T x) device - : bits_(float_to_bfloat_bits(static_cast(x))) {} - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC _MLX_BFloat16(T x) constant - : bits_(float_to_bfloat_bits(static_cast(x))) {} - - ///////////////////////////////////////////////////////////////////////////// - // Conversions from bfloat - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC operator T() const thread { - return static_cast(bfloat_bits_to_float(bits_)); - } - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC operator T() const threadgroup { - return static_cast(bfloat_bits_to_float(bits_)); - } - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC operator T() const device { - return static_cast(bfloat_bits_to_float(bits_)); - } - - template < - typename T, - typename = typename enable_if>::type> - constexpr METAL_FUNC operator T() const constant { - return static_cast(bfloat_bits_to_float(bits_)); - } -}; - -///////////////////////////////////////////////////////////////////////////// -// Bfloat operators -///////////////////////////////////////////////////////////////////////////// - -///////////////////////////////////////////////////////////////////////////// -// Unary ops -constexpr METAL_FUNC _MLX_BFloat16 operator-(_MLX_BFloat16 x) { - return -static_cast(x); -} - -///////////////////////////////////////////////////////////////////////////// -// Binary operators -#define bfloat_binop_base(__op__, __operator__, otype, atype, btype, ctype) \ - constexpr METAL_FUNC otype __operator__(atype lhs, btype rhs) { \ - return static_cast(lhs) __op__ static_cast(rhs); \ - } - -#define bfloat_binop_helper(__op__, __operator__, otype, itype, ctype) \ - constexpr METAL_FUNC otype __operator__(_MLX_BFloat16 lhs, itype rhs) { \ - return static_cast(lhs) __op__ static_cast(rhs); \ - } \ - constexpr METAL_FUNC otype __operator__(itype lhs, _MLX_BFloat16 rhs) { \ - return static_cast(lhs) __op__ static_cast(rhs); \ - } - -///////////////////////////////////////////////////////////////////////////// -// Arithmetic Operators -#define bfloat_binop(_op_, _operator_) \ - bfloat_binop_base( \ - _op_, _operator_, _MLX_BFloat16, _MLX_BFloat16, _MLX_BFloat16, float); \ - bfloat_binop_helper(_op_, _operator_, float, float, float); \ - bfloat_binop_helper(_op_, _operator_, float, half, float); \ - bfloat_binop_helper(_op_, _operator_, _MLX_BFloat16, int32_t, float); \ - bfloat_binop_helper(_op_, _operator_, _MLX_BFloat16, uint32_t, float); \ - bfloat_binop_helper(_op_, _operator_, _MLX_BFloat16, int64_t, float); \ - bfloat_binop_helper(_op_, _operator_, _MLX_BFloat16, uint64_t, float); - -bfloat_binop(+, operator+); -bfloat_binop(-, operator-); -bfloat_binop(*, operator*); -bfloat_binop(/, operator/); - -///////////////////////////////////////////////////////////////////////////// -// Comparison ops -#define bfloat_compop(__op__, __operator__) \ - bfloat_binop_base( \ - __op__, __operator__, bool, _MLX_BFloat16, _MLX_BFloat16, float); \ - bfloat_binop_helper(__op__, __operator__, bool, float, float); \ - bfloat_binop_helper(__op__, __operator__, bool, half, float); \ - bfloat_binop_helper(__op__, __operator__, bool, int32_t, float); \ - bfloat_binop_helper(__op__, __operator__, bool, uint32_t, float); \ - bfloat_binop_helper(__op__, __operator__, bool, int64_t, float); \ - bfloat_binop_helper(__op__, __operator__, bool, uint64_t, float); - -bfloat_compop(>, operator>); -bfloat_compop(<, operator<); -bfloat_compop(>=, operator>=); -bfloat_compop(<=, operator<=); -bfloat_compop(==, operator==); -bfloat_compop(!=, operator!=); - -#undef bfloat_compop -#undef bfloat_binop_base -#undef bfloat_binop_helper -#undef bfloat_binop - -///////////////////////////////////////////////////////////////////////////// -// Inplace Operators -#define bfloat_inplace_op_helper(__op__, __operator__, itype, addr_space) \ - constexpr METAL_FUNC addr_space _MLX_BFloat16& __operator__( \ - addr_space _MLX_BFloat16& lhs, itype rhs) { \ - lhs = static_cast(lhs) __op__ static_cast(rhs); \ - return lhs; \ - } \ - constexpr METAL_FUNC addr_space itype& __operator__( \ - addr_space itype& lhs, _MLX_BFloat16 rhs) { \ - lhs = static_cast(lhs) __op__ static_cast(rhs); \ - return lhs; \ - } - -#define bfloat_inplace_op_addr_space_helper(__op__, __operator__, itype) \ - bfloat_inplace_op_helper(__op__, __operator__, itype, device); \ - bfloat_inplace_op_helper(__op__, __operator__, itype, thread); \ - bfloat_inplace_op_helper(__op__, __operator__, itype, threadgroup); - -#define bfloat_inplace_op(itype) \ - bfloat_inplace_op_addr_space_helper(+, operator+=, itype); \ - bfloat_inplace_op_addr_space_helper(-, operator-=, itype); \ - bfloat_inplace_op_addr_space_helper(*, operator*=, itype); \ - bfloat_inplace_op_addr_space_helper(/, operator/=, itype); - -bfloat_inplace_op(float); -bfloat_inplace_op(half); -bfloat_inplace_op(int16_t); -bfloat_inplace_op(int32_t); -bfloat_inplace_op(int64_t); -bfloat_inplace_op(uint16_t); -bfloat_inplace_op(uint32_t); -bfloat_inplace_op(uint64_t); - -#undef bfloat_inplace_op_helper -#undef bfloat_inplace_op_addr_space_helper -#undef bfloat_inplace_op - -#define bfloat_inplace_op_helper(__op__, __operator__, addr_space) \ - constexpr METAL_FUNC addr_space _MLX_BFloat16& __operator__( \ - addr_space _MLX_BFloat16& lhs, _MLX_BFloat16 rhs) { \ - lhs = static_cast(lhs) __op__ static_cast(rhs); \ - return lhs; \ - } - -#define bfloat_inplace_op_addr_space_helper(__op__, __operator__) \ - bfloat_inplace_op_helper(__op__, __operator__, device); \ - bfloat_inplace_op_helper(__op__, __operator__, thread); \ - bfloat_inplace_op_helper(__op__, __operator__, threadgroup); - -bfloat_inplace_op_addr_space_helper(+, operator+=); -bfloat_inplace_op_addr_space_helper(-, operator-=); -bfloat_inplace_op_addr_space_helper(*, operator*=); -bfloat_inplace_op_addr_space_helper(/, operator/=); - -#undef bfloat_inplace_op_helper -#undef bfloat_inplace_op_addr_space_helper - -///////////////////////////////////////////////////////////////////////////// -// Bfloat typedef -///////////////////////////////////////////////////////////////////////////// - -typedef struct _MLX_BFloat16 bfloat16_t; - -///////////////////////////////////////////////////////////////////////////// -// Bfloat numeric limits -///////////////////////////////////////////////////////////////////////////// - -#pragma METAL internals : enable - -namespace metal { - -template <> -struct _numeric_limits_impl : _fp_numeric_limits_impl_base { - static constexpr constant int digits = 8; - static constexpr constant int digits10 = 2; - static constexpr constant int max_digits10 = 4; - static constexpr constant int radix = 2; - static constexpr constant int min_exponent = -125; - static constexpr constant int min_exponent10 = -37; - static constexpr constant int max_exponent = 128; - static constexpr constant int max_exponent10 = 38; - - static constexpr bfloat16_t min() { - return _MLX_BFloat16(0x0080, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t lowest() { - return _MLX_BFloat16(0xFF7F, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t max() { - return _MLX_BFloat16(0x7F7F, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t epsilon() { - return _MLX_BFloat16(0x3C00, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t round_error() { - return _MLX_BFloat16(0x3F00, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t infinity() { - return _MLX_BFloat16(0x7F80, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t quiet_NaN() { - return _MLX_BFloat16(0x7FC0, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t signaling_NaN() { - return _MLX_BFloat16(0x7F80, _MLX_BFloat16::bits_to_bfloat()); - } - static constexpr bfloat16_t denorm_min() { - return _MLX_BFloat16(0x0001, _MLX_BFloat16::bits_to_bfloat()); - } -}; - -METAL_FUNC bool isnan(_MLX_BFloat16 x) { - return x != x; -} - -} // namespace metal - -#pragma METAL internals : disable -inline uint16_t bfloat16_to_uint16(const bfloat16_t x) { - return x.bits_; -} - -inline bfloat16_t uint16_to_bfloat16(const uint16_t x) { - return _MLX_BFloat16(x, _MLX_BFloat16::bits_to_bfloat()); -} diff --git a/mlx/backend/metal/kernels/utils.h b/mlx/backend/metal/kernels/utils.h index c30d186b8..acdbc6ad5 100644 --- a/mlx/backend/metal/kernels/utils.h +++ b/mlx/backend/metal/kernels/utils.h @@ -4,11 +4,7 @@ #include -// The correct bf16.h is included based on the metal version -// by giving the correct path to -I during compilation -// e.g. mlx/backend/metal/kernels/metal_3_0/ for Metal 3.0 -#include "bf16.h" - +#include "mlx/backend/metal/kernels/bf16.h" #include "mlx/backend/metal/kernels/bf16_math.h" #include "mlx/backend/metal/kernels/complex.h" #include "mlx/backend/metal/kernels/defines.h" diff --git a/mlx/backend/metal/metal.cpp b/mlx/backend/metal/metal.cpp index 888207322..078ea70d9 100644 --- a/mlx/backend/metal/metal.cpp +++ b/mlx/backend/metal/metal.cpp @@ -13,7 +13,7 @@ bool is_available() { return true; } -void start_capture(std::string path, id object) { +void start_capture(std::string path, NS::Object* object) { auto pool = new_scoped_memory_pool(); auto descriptor = MTL::CaptureDescriptor::alloc()->init(); diff --git a/python/mlx/_os_warning.py b/python/mlx/_os_warning.py deleted file mode 100644 index 7d40b9f4e..000000000 --- a/python/mlx/_os_warning.py +++ /dev/null @@ -1,9 +0,0 @@ -import platform - -if platform.system() == "Darwin": - version = tuple(map(int, platform.mac_ver()[0].split("."))) - major, minor = version[0], version[1] - if (major, minor) < (13, 5): - raise ImportError( - f"Only macOS 13.5 and newer are supported, not {major}.{minor}" - ) diff --git a/python/src/mlx.cpp b/python/src/mlx.cpp index 0921d2cea..2829b3219 100644 --- a/python/src/mlx.cpp +++ b/python/src/mlx.cpp @@ -28,7 +28,6 @@ NB_MODULE(core, m) { m.doc() = "mlx: A framework for machine learning on Apple silicon."; auto reprlib_fix = nb::module_::import_("mlx._reprlib_fix"); - nb::module_::import_("mlx._os_warning"); nb::set_leak_warnings(false); init_mlx_func(m);