Fail when __nvcc_device_query fails

This commit is contained in:
Cheng
2025-11-22 11:48:38 +09:00
parent 0f8c8c8a11
commit d2069c0a3c
2 changed files with 14 additions and 24 deletions

View File

@@ -123,14 +123,24 @@ if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0)
mlx PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:--compress-mode=size>")
endif()
# Compute capability >= 7.0 is required for synchronization between CPU/GPU with
# managed memory.
# Use the native CUDA arch for current device when there is no one specified.
if(NOT DEFINED MLX_CUDA_ARCHITECTURES)
execute_process(
COMMAND bash detect_cuda_arch.sh
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND __nvcc_device_query
OUTPUT_VARIABLE MLX_CUDA_ARCHITECTURES
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "__nvcc_device_query: ${MLX_CUDA_ARCHITECTURES}")
set(UPGRADABLE_COMPUTE_COMPATIBILITIES "90;100;121")
if(MLX_CUDA_ARCHITECTURES STREQUAL "")
message(
FATAL_ERROR
"Can not get native CUDA arch, must set MLX_CUDA_ARCHITECTURES")
elseif(MLX_CUDA_ARCHITECTURES IN_LIST UPGRADABLE_COMPUTE_COMPATIBILITIES)
# Use arch-specific compute capability whenever possible.
set(MLX_CUDA_ARCHITECTURES "${MLX_CUDA_ARCHITECTURES}a")
else()
set(MLX_CUDA_ARCHITECTURES "native")
endif()
endif()
message(STATUS "CUDA architectures: ${MLX_CUDA_ARCHITECTURES}")
set_target_properties(mlx PROPERTIES CUDA_ARCHITECTURES

View File

@@ -1,20 +0,0 @@
#!/bin/bash
# If nvidia-smi fails (no NVIDIA GPU or driver), default to 90a.
if ! nvidia-smi >/dev/null 2>&1; then
echo "90a"
exit 0
fi
# Otherwise, query the native architecture.
arch=`__nvcc_device_query`
case "$arch" in
"90")
echo "90a" ;;
"100")
echo "100a" ;;
"121")
echo "121a" ;;
*)
echo "native" ;;
esac