From 4b30015872bcb45e300caa999600ccdbe3bd39f6 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Sun, 27 Jul 2025 08:05:23 -0700 Subject: [PATCH] add cuda sm 90 --- mlx/backend/cuda/CMakeLists.txt | 2 +- mlx/backend/cuda/device/atomic_ops.cuh | 4 ---- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/mlx/backend/cuda/CMakeLists.txt b/mlx/backend/cuda/CMakeLists.txt index 1a394e580..bb539c9ec 100644 --- a/mlx/backend/cuda/CMakeLists.txt +++ b/mlx/backend/cuda/CMakeLists.txt @@ -108,7 +108,7 @@ endif() # Compute capability 7 is required for synchronization between CPU/GPU with # managed memory. TODO: Add more architectures for potential performance gain. set(MLX_CUDA_ARCHITECTURES - "70;80" + "70;80;90" CACHE STRING "CUDA architectures") message(STATUS "CUDA architectures: ${MLX_CUDA_ARCHITECTURES}") set_target_properties(mlx PROPERTIES CUDA_ARCHITECTURES diff --git a/mlx/backend/cuda/device/atomic_ops.cuh b/mlx/backend/cuda/device/atomic_ops.cuh index 5df246c0e..cfb185c43 100644 --- a/mlx/backend/cuda/device/atomic_ops.cuh +++ b/mlx/backend/cuda/device/atomic_ops.cuh @@ -49,11 +49,7 @@ inline __device__ void atomic_add(__half* out, __half val) { } inline __device__ void atomic_add(complex64_t* out, complex64_t val) { -#if __CUDA_ARCH__ < 900 atomic_add_general(out, val); -#else - atomicAdd(out, val); -#endif } inline __device__ void atomic_add(__nv_bfloat16* out, __nv_bfloat16 val) {