Compare commits

...

2 Commits

Author SHA1 Message Date
Awni Hannun
1bf605d56d use arch specific targets when possible (#2771) 2025-11-14 20:04:18 -08:00
Cheng
3c622ddd1d Separate test-linux from build-linux/cuda in GitHub Actions (#2765)
* Separate test-linux from build-linux/cuda in GitHub Actions

* Prefer unittest when possible

Co-authored-by: Mike Drob <mdrob@apache.org>

---------

Co-authored-by: Mike Drob <mdrob@apache.org>
2025-11-15 11:14:09 +09:00
10 changed files with 105 additions and 40 deletions

View File

@@ -17,20 +17,6 @@ runs:
CMAKE_ARGS: -DMLX_BUILD_CUDA=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }} CMAKE_ARGS: -DMLX_BUILD_CUDA=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }}
run: pip install -e ".[dev]" -v run: pip install -e ".[dev]" -v
- name: Run Python tests - CPU
shell: bash
env:
LOW_MEMORY: 1
DEVICE: cpu
run: python -m unittest discover python/tests -v
- name: Run Python tests - GPU
shell: bash
env:
LOW_MEMORY: 1
DEVICE: gpu
run: python -m tests discover python/tests -v
- name: Build CPP only - name: Build CPP only
shell: bash shell: bash
run: | run: |
@@ -39,7 +25,3 @@ runs:
-DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }} \ -DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }} \
-DCMAKE_BUILD_TYPE=DEBUG -DCMAKE_BUILD_TYPE=DEBUG
cmake --build build -j $(nproc) cmake --build build -j $(nproc)
- name: Run CPP tests
shell: bash
run: ./build/tests/tests -sfe="*fft_tests.cpp,*linalg_tests.cpp"

View File

@@ -17,25 +17,9 @@ runs:
pip install typing_extensions pip install typing_extensions
python setup.py generate_stubs python setup.py generate_stubs
- name: Run Python tests
shell: bash
run: |
python -m unittest discover python/tests -v
mpirun --bind-to none --allow-run-as-root -host localhost:8 -np 8 python python/tests/mpi_test_distributed.py
mlx.launch --verbose -n 8 python/tests/ring_test_distributed.py -v 2> >(tee -a stderr.log >&2)
if grep -Fq '[WARN]' stderr.log ; then
grep -F '[WARN]' stderr.log
echo "Distributed ring test failed";
exit 1;
fi
- name: Build CPP only - name: Build CPP only
shell: bash shell: bash
run: | run: |
mkdir -p build && cd build mkdir -p build && cd build
cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG
make -j $(nproc) make -j $(nproc)
- name: Run CPP tests
shell: sh
run: ./build/tests/tests

View File

@@ -46,7 +46,6 @@ runs:
pip install --upgrade pip cmake pip install --upgrade pip cmake
- name: Install MPI - name: Install MPI
if: inputs.runner-type == 'linux'
shell: bash shell: bash
run: sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev run: sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev

63
.github/actions/test-linux/action.yml vendored Normal file
View File

@@ -0,0 +1,63 @@
name: 'Run Linux tests'
inputs:
cpu-only:
description: 'Skip GPU tests'
required: false
default: false
runs:
using: "composite"
steps:
- name: Run distributed tests
# FIXME: This test fails with CUDA build.
if: ${{ inputs.cpu-only == 'true' }}
shell: bash
run: |
echo "::group::Distributed tests"
mpirun --bind-to none --allow-run-as-root -host localhost:8 -np 8 python python/tests/mpi_test_distributed.py
mlx.launch --verbose -n 8 python/tests/ring_test_distributed.py -v 2> >(tee -a stderr.log >&2)
if grep -Fq '[WARN]' stderr.log ; then
grep -F '[WARN]' stderr.log
echo "Distributed ring test failed";
exit 1;
fi
echo "::endgroup::"
- name: Run Python tests - CPU
shell: bash
env:
DEVICE: cpu
run: |
echo "::group::Python tests - CPU"
python -m unittest discover python/tests -v
echo "::endgroup::"
- name: Run Python tests - GPU
if: ${{ !inputs.cpu-only }}
shell: bash
env:
DEVICE: gpu
run: |
echo "::group::Python tests - GPU"
python -m tests discover python/tests -v
echo "::endgroup::"
- name: Run CPP tests - CPU
shell: bash
env:
DEVICE: cpu
run: |
echo "::group::CPP tests - CPU"
./build/tests/tests
echo "::endgroup::"
- name: Run CPP tests - GPU
if: ${{ !inputs.cpu-only }}
shell: bash
env:
DEVICE: gpu
run: |
echo "::group::CPP tests - GPU"
./build/tests/tests -sfe="*fft_tests.cpp,*linalg_tests.cpp"
echo "::endgroup::"

View File

@@ -51,6 +51,9 @@ jobs:
with: with:
python-version: ${{ matrix.python_version }} python-version: ${{ matrix.python_version }}
- uses: ./.github/actions/build-linux - uses: ./.github/actions/build-linux
- uses: ./.github/actions/test-linux
with:
cpu-only: true
build_mac_release: build_mac_release:
if: github.repository == 'ml-explore/mlx' if: github.repository == 'ml-explore/mlx'
@@ -85,6 +88,7 @@ jobs:
with: with:
runner-type: 'cuda' runner-type: 'cuda'
- uses: ./.github/actions/build-cuda - uses: ./.github/actions/build-cuda
- uses: ./.github/actions/test-linux
build_cuda_release: build_cuda_release:
if: github.repository == 'ml-explore/mlx' if: github.repository == 'ml-explore/mlx'

View File

@@ -25,6 +25,9 @@ jobs:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
- uses: ./.github/actions/setup-linux - uses: ./.github/actions/setup-linux
- uses: ./.github/actions/build-linux - uses: ./.github/actions/build-linux
- uses: ./.github/actions/test-linux
with:
cpu-only: true
mac_build_and_test: mac_build_and_test:
if: github.repository == 'ml-explore/mlx' if: github.repository == 'ml-explore/mlx'
@@ -50,6 +53,7 @@ jobs:
with: with:
runner-type: 'cuda' runner-type: 'cuda'
- uses: ./.github/actions/build-cuda - uses: ./.github/actions/build-cuda
- uses: ./.github/actions/test-linux
build_documentation: build_documentation:
if: github.repository == 'ml-explore/mlx' if: github.repository == 'ml-explore/mlx'

View File

@@ -126,7 +126,11 @@ endif()
# Compute capability >= 7.0 is required for synchronization between CPU/GPU with # Compute capability >= 7.0 is required for synchronization between CPU/GPU with
# managed memory. # managed memory.
if(NOT DEFINED MLX_CUDA_ARCHITECTURES) if(NOT DEFINED MLX_CUDA_ARCHITECTURES)
set(MLX_CUDA_ARCHITECTURES "native") execute_process(
COMMAND bash detect_cuda_arch.sh
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE MLX_CUDA_ARCHITECTURES
OUTPUT_STRIP_TRAILING_WHITESPACE)
endif() endif()
message(STATUS "CUDA architectures: ${MLX_CUDA_ARCHITECTURES}") message(STATUS "CUDA architectures: ${MLX_CUDA_ARCHITECTURES}")
set_target_properties(mlx PROPERTIES CUDA_ARCHITECTURES set_target_properties(mlx PROPERTIES CUDA_ARCHITECTURES

View File

@@ -0,0 +1,13 @@
#!/bin/bash
arch=`__nvcc_device_query`
case "$arch" in
"90")
echo "90a" ;;
"100")
echo "100a" ;;
"121")
echo "121a" ;;
*)
echo "native" ;;
esac

View File

@@ -279,11 +279,14 @@ void compile(
// Compile program. // Compile program.
std::vector<const char*> args; std::vector<const char*> args;
bool use_sass = compiler_supports_device_sass(device); bool use_sass = compiler_supports_device_sass(device);
auto cc = device.compute_capability_major();
std::string arch_tag = (cc == 90 || cc == 100 || cc == 121) ? "a" : "";
std::string compute = fmt::format( std::string compute = fmt::format(
"--gpu-architecture={}_{}{}", "--gpu-architecture={}_{}{}{}",
use_sass ? "sm" : "compute", use_sass ? "sm" : "compute",
device.compute_capability_major(), cc,
device.compute_capability_minor()); device.compute_capability_minor(),
arch_tag);
args.push_back(compute.c_str()); args.push_back(compute.c_str());
std::string cccl_include = cccl_dir(); std::string cccl_include = cccl_dir();
if (!cccl_include.empty()) { if (!cccl_include.empty()) {

View File

@@ -89,7 +89,16 @@ class CMakeBuild(build_ext):
] ]
if build_stage == 2 and build_cuda: if build_stage == 2 and build_cuda:
# Last arch is always real and virtual for forward-compatibility # Last arch is always real and virtual for forward-compatibility
cuda_archs = ";".join(("70-real", "80-real", "90-real", "100-real", "120")) cuda_archs = ";".join(
(
"75-real",
"80-real",
"90a-real",
"100a-real",
"120a-real",
"120-virtual",
)
)
cmake_args += [f"-DMLX_CUDA_ARCHITECTURES={cuda_archs}"] cmake_args += [f"-DMLX_CUDA_ARCHITECTURES={cuda_archs}"]
# Some generators require explcitly passing config when building. # Some generators require explcitly passing config when building.