mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Compare commits
2 Commits
27ff069175
...
1bf605d56d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1bf605d56d | ||
|
|
3c622ddd1d |
18
.github/actions/build-cuda/action.yml
vendored
18
.github/actions/build-cuda/action.yml
vendored
@@ -17,20 +17,6 @@ runs:
|
||||
CMAKE_ARGS: -DMLX_BUILD_CUDA=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }}
|
||||
run: pip install -e ".[dev]" -v
|
||||
|
||||
- name: Run Python tests - CPU
|
||||
shell: bash
|
||||
env:
|
||||
LOW_MEMORY: 1
|
||||
DEVICE: cpu
|
||||
run: python -m unittest discover python/tests -v
|
||||
|
||||
- name: Run Python tests - GPU
|
||||
shell: bash
|
||||
env:
|
||||
LOW_MEMORY: 1
|
||||
DEVICE: gpu
|
||||
run: python -m tests discover python/tests -v
|
||||
|
||||
- name: Build CPP only
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -39,7 +25,3 @@ runs:
|
||||
-DCMAKE_CUDA_COMPILER=${{ inputs.nvcc-location }} \
|
||||
-DCMAKE_BUILD_TYPE=DEBUG
|
||||
cmake --build build -j $(nproc)
|
||||
|
||||
- name: Run CPP tests
|
||||
shell: bash
|
||||
run: ./build/tests/tests -sfe="*fft_tests.cpp,*linalg_tests.cpp"
|
||||
|
||||
16
.github/actions/build-linux/action.yml
vendored
16
.github/actions/build-linux/action.yml
vendored
@@ -17,25 +17,9 @@ runs:
|
||||
pip install typing_extensions
|
||||
python setup.py generate_stubs
|
||||
|
||||
- name: Run Python tests
|
||||
shell: bash
|
||||
run: |
|
||||
python -m unittest discover python/tests -v
|
||||
mpirun --bind-to none --allow-run-as-root -host localhost:8 -np 8 python python/tests/mpi_test_distributed.py
|
||||
mlx.launch --verbose -n 8 python/tests/ring_test_distributed.py -v 2> >(tee -a stderr.log >&2)
|
||||
if grep -Fq '[WARN]' stderr.log ; then
|
||||
grep -F '[WARN]' stderr.log
|
||||
echo "Distributed ring test failed";
|
||||
exit 1;
|
||||
fi
|
||||
|
||||
- name: Build CPP only
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p build && cd build
|
||||
cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG
|
||||
make -j $(nproc)
|
||||
|
||||
- name: Run CPP tests
|
||||
shell: sh
|
||||
run: ./build/tests/tests
|
||||
|
||||
1
.github/actions/setup-linux/action.yml
vendored
1
.github/actions/setup-linux/action.yml
vendored
@@ -46,7 +46,6 @@ runs:
|
||||
pip install --upgrade pip cmake
|
||||
|
||||
- name: Install MPI
|
||||
if: inputs.runner-type == 'linux'
|
||||
shell: bash
|
||||
run: sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
|
||||
|
||||
|
||||
63
.github/actions/test-linux/action.yml
vendored
Normal file
63
.github/actions/test-linux/action.yml
vendored
Normal file
@@ -0,0 +1,63 @@
|
||||
name: 'Run Linux tests'
|
||||
|
||||
inputs:
|
||||
cpu-only:
|
||||
description: 'Skip GPU tests'
|
||||
required: false
|
||||
default: false
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Run distributed tests
|
||||
# FIXME: This test fails with CUDA build.
|
||||
if: ${{ inputs.cpu-only == 'true' }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Distributed tests"
|
||||
mpirun --bind-to none --allow-run-as-root -host localhost:8 -np 8 python python/tests/mpi_test_distributed.py
|
||||
mlx.launch --verbose -n 8 python/tests/ring_test_distributed.py -v 2> >(tee -a stderr.log >&2)
|
||||
if grep -Fq '[WARN]' stderr.log ; then
|
||||
grep -F '[WARN]' stderr.log
|
||||
echo "Distributed ring test failed";
|
||||
exit 1;
|
||||
fi
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run Python tests - CPU
|
||||
shell: bash
|
||||
env:
|
||||
DEVICE: cpu
|
||||
run: |
|
||||
echo "::group::Python tests - CPU"
|
||||
python -m unittest discover python/tests -v
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run Python tests - GPU
|
||||
if: ${{ !inputs.cpu-only }}
|
||||
shell: bash
|
||||
env:
|
||||
DEVICE: gpu
|
||||
run: |
|
||||
echo "::group::Python tests - GPU"
|
||||
python -m tests discover python/tests -v
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run CPP tests - CPU
|
||||
shell: bash
|
||||
env:
|
||||
DEVICE: cpu
|
||||
run: |
|
||||
echo "::group::CPP tests - CPU"
|
||||
./build/tests/tests
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run CPP tests - GPU
|
||||
if: ${{ !inputs.cpu-only }}
|
||||
shell: bash
|
||||
env:
|
||||
DEVICE: gpu
|
||||
run: |
|
||||
echo "::group::CPP tests - GPU"
|
||||
./build/tests/tests -sfe="*fft_tests.cpp,*linalg_tests.cpp"
|
||||
echo "::endgroup::"
|
||||
4
.github/workflows/nightly.yml
vendored
4
.github/workflows/nightly.yml
vendored
@@ -51,6 +51,9 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
- uses: ./.github/actions/build-linux
|
||||
- uses: ./.github/actions/test-linux
|
||||
with:
|
||||
cpu-only: true
|
||||
|
||||
build_mac_release:
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
@@ -85,6 +88,7 @@ jobs:
|
||||
with:
|
||||
runner-type: 'cuda'
|
||||
- uses: ./.github/actions/build-cuda
|
||||
- uses: ./.github/actions/test-linux
|
||||
|
||||
build_cuda_release:
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
|
||||
4
.github/workflows/pull_request.yml
vendored
4
.github/workflows/pull_request.yml
vendored
@@ -25,6 +25,9 @@ jobs:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: ./.github/actions/setup-linux
|
||||
- uses: ./.github/actions/build-linux
|
||||
- uses: ./.github/actions/test-linux
|
||||
with:
|
||||
cpu-only: true
|
||||
|
||||
mac_build_and_test:
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
@@ -50,6 +53,7 @@ jobs:
|
||||
with:
|
||||
runner-type: 'cuda'
|
||||
- uses: ./.github/actions/build-cuda
|
||||
- uses: ./.github/actions/test-linux
|
||||
|
||||
build_documentation:
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
|
||||
@@ -126,7 +126,11 @@ endif()
|
||||
# Compute capability >= 7.0 is required for synchronization between CPU/GPU with
|
||||
# managed memory.
|
||||
if(NOT DEFINED MLX_CUDA_ARCHITECTURES)
|
||||
set(MLX_CUDA_ARCHITECTURES "native")
|
||||
execute_process(
|
||||
COMMAND bash detect_cuda_arch.sh
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE MLX_CUDA_ARCHITECTURES
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
endif()
|
||||
message(STATUS "CUDA architectures: ${MLX_CUDA_ARCHITECTURES}")
|
||||
set_target_properties(mlx PROPERTIES CUDA_ARCHITECTURES
|
||||
|
||||
13
mlx/backend/cuda/detect_cuda_arch.sh
Normal file
13
mlx/backend/cuda/detect_cuda_arch.sh
Normal file
@@ -0,0 +1,13 @@
|
||||
#!/bin/bash
|
||||
|
||||
arch=`__nvcc_device_query`
|
||||
case "$arch" in
|
||||
"90")
|
||||
echo "90a" ;;
|
||||
"100")
|
||||
echo "100a" ;;
|
||||
"121")
|
||||
echo "121a" ;;
|
||||
*)
|
||||
echo "native" ;;
|
||||
esac
|
||||
@@ -279,11 +279,14 @@ void compile(
|
||||
// Compile program.
|
||||
std::vector<const char*> args;
|
||||
bool use_sass = compiler_supports_device_sass(device);
|
||||
auto cc = device.compute_capability_major();
|
||||
std::string arch_tag = (cc == 90 || cc == 100 || cc == 121) ? "a" : "";
|
||||
std::string compute = fmt::format(
|
||||
"--gpu-architecture={}_{}{}",
|
||||
"--gpu-architecture={}_{}{}{}",
|
||||
use_sass ? "sm" : "compute",
|
||||
device.compute_capability_major(),
|
||||
device.compute_capability_minor());
|
||||
cc,
|
||||
device.compute_capability_minor(),
|
||||
arch_tag);
|
||||
args.push_back(compute.c_str());
|
||||
std::string cccl_include = cccl_dir();
|
||||
if (!cccl_include.empty()) {
|
||||
|
||||
11
setup.py
11
setup.py
@@ -89,7 +89,16 @@ class CMakeBuild(build_ext):
|
||||
]
|
||||
if build_stage == 2 and build_cuda:
|
||||
# Last arch is always real and virtual for forward-compatibility
|
||||
cuda_archs = ";".join(("70-real", "80-real", "90-real", "100-real", "120"))
|
||||
cuda_archs = ";".join(
|
||||
(
|
||||
"75-real",
|
||||
"80-real",
|
||||
"90a-real",
|
||||
"100a-real",
|
||||
"120a-real",
|
||||
"120-virtual",
|
||||
)
|
||||
)
|
||||
cmake_args += [f"-DMLX_CUDA_ARCHITECTURES={cuda_archs}"]
|
||||
|
||||
# Some generators require explcitly passing config when building.
|
||||
|
||||
Reference in New Issue
Block a user