use arch specific targets when possible (#2771 )

Separate test-linux from build-linux/cuda in GitHub Actions (#2765 )
* Separate test-linux from build-linux/cuda in GitHub Actions * Prefer unittest when possible Co-authored-by: Mike Drob <mdrob@apache.org> --------- Co-authored-by: Mike Drob <mdrob@apache.org>
2025-12-16 01:49:05 +08:00 · 2025-11-14 20:04:18 -08:00 · 2025-11-15 11:14:09 +09:00
10 changed files with 105 additions and 40 deletions
--- a/.github/actions/build-cuda/action.yml
+++ b/.github/actions/build-cuda/action.yml
@@ -17,20 +17,6 @@ runs:
        CMAKE_ARGS: -DMLX_BUILD_CUDA=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }}
      run: pip install -e ".[dev]" -v
    - name: Run Python tests - CPU
      shell: bash
      env:
        LOW_MEMORY: 1
        DEVICE: cpu
      run: python -m unittest discover python/tests -v
    - name: Run Python tests - GPU
      shell: bash
      env:
        LOW_MEMORY: 1
        DEVICE: gpu
      run: python -m tests discover python/tests -v
    - name: Build CPP only
      shell: bash
      run: |
@@ -39,7 +25,3 @@ runs:
          -DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }} \
          -DCMAKE_BUILD_TYPE=DEBUG
        cmake --build build -j $(nproc)
    - name: Run CPP tests
      shell: bash
      run: ./build/tests/tests -sfe="*fft_tests.cpp,*linalg_tests.cpp"
--- a/.github/actions/build-linux/action.yml
+++ b/.github/actions/build-linux/action.yml
@@ -17,25 +17,9 @@ runs:
        pip install typing_extensions
        python setup.py generate_stubs
    - name: Run Python tests
      shell: bash
      run: |
        python -m unittest discover python/tests -v
        mpirun --bind-to none --allow-run-as-root -host localhost:8 -np 8 python python/tests/mpi_test_distributed.py
        mlx.launch --verbose -n 8 python/tests/ring_test_distributed.py -v 2> >(tee -a stderr.log >&2)
        if grep -Fq '[WARN]' stderr.log ; then
          grep -F '[WARN]' stderr.log
          echo "Distributed ring test failed";
          exit 1;
        fi
    - name: Build CPP only
      shell: bash
      run: |
        mkdir -p build && cd build
        cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG
        make -j $(nproc)
    - name: Run CPP tests
      shell: sh
      run: ./build/tests/tests
--- a/.github/actions/setup-linux/action.yml
+++ b/.github/actions/setup-linux/action.yml
@@ -46,7 +46,6 @@ runs:
        pip install --upgrade pip cmake
    - name: Install MPI
      if: inputs.runner-type == 'linux'
      shell: bash
      run: sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
--- a/.github/actions/test-linux/action.yml
+++ b/.github/actions/test-linux/action.yml
@@ -0,0 +1,63 @@
 name: 'Run Linux tests'
 inputs:
  cpu-only:
    description: 'Skip GPU tests'
    required: false
    default: false
 runs:
  using: "composite"
  steps:
    - name: Run distributed tests
      # FIXME: This test fails with CUDA build.
      if: ${{ inputs.cpu-only == 'true' }}
      shell: bash
      run: |
        echo "::group::Distributed tests"
        mpirun --bind-to none --allow-run-as-root -host localhost:8 -np 8 python python/tests/mpi_test_distributed.py
        mlx.launch --verbose -n 8 python/tests/ring_test_distributed.py -v 2> >(tee -a stderr.log >&2)
        if grep -Fq '[WARN]' stderr.log ; then
          grep -F '[WARN]' stderr.log
          echo "Distributed ring test failed";
          exit 1;
        fi
        echo "::endgroup::"
    - name: Run Python tests - CPU
      shell: bash
      env:
        DEVICE: cpu
      run: |
        echo "::group::Python tests - CPU"
        python -m unittest discover python/tests -v
        echo "::endgroup::"
    - name: Run Python tests - GPU
      if: ${{ !inputs.cpu-only }}
      shell: bash
      env:
        DEVICE: gpu
      run: |
        echo "::group::Python tests - GPU"
        python -m tests discover python/tests -v
        echo "::endgroup::"
    - name: Run CPP tests - CPU
      shell: bash
      env:
        DEVICE: cpu
      run: |
        echo "::group::CPP tests - CPU"
        ./build/tests/tests
        echo "::endgroup::"
    - name: Run CPP tests - GPU
      if: ${{ !inputs.cpu-only }}
      shell: bash
      env:
        DEVICE: gpu
      run: |
        echo "::group::CPP tests - GPU"
        ./build/tests/tests -sfe="*fft_tests.cpp,*linalg_tests.cpp"
        echo "::endgroup::"
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -51,6 +51,9 @@ jobs:
        with:
          python-version: ${{ matrix.python_version }}
      - uses: ./.github/actions/build-linux
      - uses: ./.github/actions/test-linux
        with:
          cpu-only: true
  build_mac_release:
    if: github.repository == 'ml-explore/mlx'
@@ -85,6 +88,7 @@ jobs:
        with:
          runner-type: 'cuda'
      - uses: ./.github/actions/build-cuda
      - uses: ./.github/actions/test-linux
  build_cuda_release:
    if: github.repository == 'ml-explore/mlx'
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -25,6 +25,9 @@ jobs:
      - uses: actions/checkout@v5
      - uses: ./.github/actions/setup-linux
      - uses: ./.github/actions/build-linux
      - uses: ./.github/actions/test-linux
        with:
          cpu-only: true
  mac_build_and_test:
    if: github.repository == 'ml-explore/mlx'
@@ -50,6 +53,7 @@ jobs:
        with:
          runner-type: 'cuda'
      - uses: ./.github/actions/build-cuda
      - uses: ./.github/actions/test-linux
  build_documentation:
    if: github.repository == 'ml-explore/mlx'
--- a/mlx/backend/cuda/CMakeLists.txt
+++ b/mlx/backend/cuda/CMakeLists.txt
@@ -126,7 +126,11 @@ endif()
 # Compute capability >= 7.0 is required for synchronization between CPU/GPU with
 # managed memory.
 if(NOT DEFINED MLX_CUDA_ARCHITECTURES)
-  set(MLX_CUDA_ARCHITECTURES "native")
+  execute_process(
    COMMAND bash detect_cuda_arch.sh
    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
    OUTPUT_VARIABLE MLX_CUDA_ARCHITECTURES
    OUTPUT_STRIP_TRAILING_WHITESPACE)
 endif()
 message(STATUS "CUDA architectures: ${MLX_CUDA_ARCHITECTURES}")
 set_target_properties(mlx PROPERTIES CUDA_ARCHITECTURES
--- a/mlx/backend/cuda/detect_cuda_arch.sh
+++ b/mlx/backend/cuda/detect_cuda_arch.sh
@@ -0,0 +1,13 @@
 #!/bin/bash
 arch=`__nvcc_device_query`
 case "$arch" in
    "90")
        echo "90a" ;;
    "100")
        echo "100a" ;;
    "121")
        echo "121a" ;;
    *)
        echo "native" ;;
 esac
--- a/mlx/backend/cuda/jit_module.cpp
+++ b/mlx/backend/cuda/jit_module.cpp
@@ -279,11 +279,14 @@ void compile(
  // Compile program.
  std::vector<const char*> args;
  bool use_sass = compiler_supports_device_sass(device);
  auto cc = device.compute_capability_major();
  std::string arch_tag = (cc == 90 || cc == 100 || cc == 121) ? "a" : "";
  std::string compute = fmt::format(
-      "--gpu-architecture={}_{}{}",
+      "--gpu-architecture={}_{}{}{}",
      use_sass ? "sm" : "compute",
-      device.compute_capability_major(),
+      cc,
-      device.compute_capability_minor());
+      device.compute_capability_minor(),
      arch_tag);
  args.push_back(compute.c_str());
  std::string cccl_include = cccl_dir();
  if (!cccl_include.empty()) {
--- a/setup.py
+++ b/setup.py
@@ -89,7 +89,16 @@ class CMakeBuild(build_ext):
        ]
        if build_stage == 2 and build_cuda:
            # Last arch is always real and virtual for forward-compatibility
-            cuda_archs = ";".join(("70-real", "80-real", "90-real", "100-real", "120"))
+            cuda_archs = ";".join(
                (
                    "75-real",
                    "80-real",
                    "90a-real",
                    "100a-real",
                    "120a-real",
                    "120-virtual",
                )
            )
            cmake_args += [f"-DMLX_CUDA_ARCHITECTURES={cuda_archs}"]
        # Some generators require explcitly passing config when building.
Author	SHA1	Message	Date
Awni Hannun	1bf605d56d	use arch specific targets when possible (#2771 )	2025-11-14 20:04:18 -08:00
Cheng	3c622ddd1d	Separate test-linux from build-linux/cuda in GitHub Actions (#2765 ) * Separate test-linux from build-linux/cuda in GitHub Actions * Prefer unittest when possible Co-authored-by: Mike Drob <mdrob@apache.org> --------- Co-authored-by: Mike Drob <mdrob@apache.org>	2025-11-15 11:14:09 +09:00