mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Merge build-cuda and build-linux actions (#2783)
Some checks failed
Build and Test / Check Lint (push) Has been cancelled
Build and Test / Linux (cpu, aarch64) (push) Has been cancelled
Build and Test / Linux (cpu, x86_64) (push) Has been cancelled
Build and Test / Linux (cuda-12.6, aarch64) (push) Has been cancelled
Build and Test / Linux (cuda-12.9, aarch64) (push) Has been cancelled
Build and Test / Linux (cuda-12.6, x86_64) (push) Has been cancelled
Build and Test / Linux (cuda-12.9, x86_64) (push) Has been cancelled
Build and Test / macOS (14.0) (push) Has been cancelled
Build and Test / macOS (15.0) (push) Has been cancelled
Build and Test / Build Documentation (push) Has been cancelled
Build and Test / Linux Fedora (aarch64) (push) Has been cancelled
Build and Test / Linux Fedora (x86_64) (push) Has been cancelled
Some checks failed
Build and Test / Check Lint (push) Has been cancelled
Build and Test / Linux (cpu, aarch64) (push) Has been cancelled
Build and Test / Linux (cpu, x86_64) (push) Has been cancelled
Build and Test / Linux (cuda-12.6, aarch64) (push) Has been cancelled
Build and Test / Linux (cuda-12.9, aarch64) (push) Has been cancelled
Build and Test / Linux (cuda-12.6, x86_64) (push) Has been cancelled
Build and Test / Linux (cuda-12.9, x86_64) (push) Has been cancelled
Build and Test / macOS (14.0) (push) Has been cancelled
Build and Test / macOS (15.0) (push) Has been cancelled
Build and Test / Build Documentation (push) Has been cancelled
Build and Test / Linux Fedora (aarch64) (push) Has been cancelled
Build and Test / Linux Fedora (x86_64) (push) Has been cancelled
This commit is contained in:
@@ -1,18 +1,13 @@
|
||||
name: 'Build CUDA wheel'
|
||||
description: 'Build CUDA wheel'
|
||||
|
||||
inputs:
|
||||
toolkit:
|
||||
description: 'The CUDA toolkit'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Build package
|
||||
shell: bash
|
||||
env:
|
||||
CMAKE_ARGS: -DMLX_BUILD_CUDA=ON -DCMAKE_CUDA_COMPILER=/usr/local/${{ inputs.toolkit }}/bin/nvcc
|
||||
CMAKE_ARGS: -DMLX_BUILD_CUDA=ON
|
||||
run: |
|
||||
pip install auditwheel build patchelf setuptools
|
||||
python setup.py clean --all
|
||||
|
||||
26
.github/actions/build-cuda/action.yml
vendored
26
.github/actions/build-cuda/action.yml
vendored
@@ -1,26 +0,0 @@
|
||||
name: 'Build and Test with CUDA'
|
||||
description: 'Build and test MLX with CUDA'
|
||||
|
||||
inputs:
|
||||
toolkit:
|
||||
description: 'The CUDA toolkit'
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install Python package
|
||||
shell: bash
|
||||
env:
|
||||
DEBUG: 1
|
||||
CMAKE_ARGS: -DMLX_BUILD_CUDA=ON -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_CUDA_COMPILER=/usr/local/${{ inputs.toolkit }}/bin/nvcc
|
||||
run: pip install --no-build-isolation -e ".[dev]" -v
|
||||
|
||||
- name: Build CPP only
|
||||
shell: bash
|
||||
run: |
|
||||
cmake . -B build \
|
||||
-DMLX_BUILD_CUDA=ON \
|
||||
-DCMAKE_CUDA_COMPILER=/usr/local/${{ inputs.toolkit }}/bin/nvcc \
|
||||
-DCMAKE_BUILD_TYPE=DEBUG
|
||||
cmake --build build -j $(nproc)
|
||||
28
.github/actions/build-linux/action.yml
vendored
28
.github/actions/build-linux/action.yml
vendored
@@ -1,15 +1,32 @@
|
||||
name: 'Build and Test on Linux'
|
||||
description: 'Build and test MLX on Linux'
|
||||
|
||||
inputs:
|
||||
toolkit:
|
||||
description: 'The toolkit to build with'
|
||||
required: false
|
||||
default: 'cpu'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Install Python package
|
||||
id: python_build
|
||||
shell: sh
|
||||
env:
|
||||
CMAKE_ARGS: "-DCMAKE_COMPILE_WARNING_AS_ERROR=ON"
|
||||
DEBUG: 1
|
||||
run: pip install --no-build-isolation -e ".[dev]" -v
|
||||
CMAKE_ARGS: >-
|
||||
-DCMAKE_COMPILE_WARNING_AS_ERROR=ON
|
||||
-DMLX_BUILD_CUDA=${{ startsWith(inputs.toolkit, 'cuda') && 'ON' || 'OFF' }}
|
||||
run: |
|
||||
if ${{ startsWith(inputs.toolkit, 'cuda') && runner.arch == 'arm64' }} ; then
|
||||
# There is no GPU in arm64 runner, use a common arch.
|
||||
CMAKE_ARGS="$CMAKE_ARGS -DMLX_CUDA_ARCHITECTURES=90a"
|
||||
# Can not build tests when the built executables can not run.
|
||||
CMAKE_ARGS="$CMAKE_ARGS -DMLX_BUILD_TESTS=OFF"
|
||||
fi
|
||||
pip install --no-build-isolation -e ".[dev]" -v
|
||||
# Pass the CMAKE_ARGS to following steps.
|
||||
echo CMAKE_ARGS="$CMAKE_ARGS" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Generate package stubs
|
||||
shell: sh
|
||||
@@ -20,6 +37,5 @@ runs:
|
||||
- name: Build CPP only
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p build && cd build
|
||||
cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG
|
||||
make -j $(nproc)
|
||||
cmake . -B build -DCMAKE_BUILD_TYPE=Debug ${{ steps.python_build.outputs.CMAKE_ARGS }}
|
||||
cmake --build build -j $(nproc)
|
||||
|
||||
7
.github/actions/setup-linux/action.yml
vendored
7
.github/actions/setup-linux/action.yml
vendored
@@ -51,8 +51,6 @@ runs:
|
||||
# Note: the CI machine does not meet CUDA 13's driver requirement.
|
||||
# Compatibility matrix:
|
||||
# https://docs.nvidia.com/deeplearning/cudnn/backend/latest/reference/support-matrix.html
|
||||
# The `nvcc` is installed into `/usr/local/cuda-VERSION/bin/nvcc` - but
|
||||
# it's *not* on the default toolkit path.
|
||||
PACKAGES: |
|
||||
{
|
||||
"cuda-12.6": "libcudnn9-dev-cuda-12 cuda-toolkit-12-6",
|
||||
@@ -60,13 +58,16 @@ runs:
|
||||
"cuda-13.0": "libcudnn9-dev-cuda-13 cuda-toolkit-13-0"
|
||||
}
|
||||
run: |
|
||||
export ARCH=${{ runner.arch == 'arm64' && 'arm64' || 'x86_64' }}
|
||||
# The CUDA binaries are hosted in the "sbsa" repo, the "arm64" repo is
|
||||
# Jetson specific. SBSA means Arm Server Base System Architecture.
|
||||
ARCH=${{ runner.arch == 'arm64' && 'sbsa' || 'x86_64' }}
|
||||
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/$ARCH/cuda-keyring_1.1-1_all.deb
|
||||
sudo dpkg -i cuda-keyring_1.1-1_all.deb
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y \
|
||||
libnccl2 libnccl-dev \
|
||||
${{ fromJson(env.PACKAGES)[inputs.toolkit] }}
|
||||
echo "/usr/local/${{ inputs.toolkit }}/bin" >> $GITHUB_PATH
|
||||
|
||||
- name: CUDA packages and driver report
|
||||
if: ${{ startsWith(inputs.toolkit, 'cuda') }}
|
||||
|
||||
12
.github/actions/test-linux/action.yml
vendored
12
.github/actions/test-linux/action.yml
vendored
@@ -1,8 +1,8 @@
|
||||
name: 'Run Linux tests'
|
||||
|
||||
inputs:
|
||||
cpu-only:
|
||||
description: 'Skip GPU tests'
|
||||
has-gpu:
|
||||
description: 'Run GPU tests'
|
||||
required: false
|
||||
default: false
|
||||
|
||||
@@ -17,7 +17,7 @@ runs:
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run distributed tests
|
||||
if: ${{ inputs.cpu-only == 'true' }}
|
||||
if: ${{ inputs.has-gpu == 'false' }}
|
||||
shell: bash
|
||||
run: |
|
||||
echo "::group::Distributed tests"
|
||||
@@ -30,7 +30,7 @@ runs:
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run Python tests - CPU
|
||||
if: ${{ inputs.cpu-only == 'true' }}
|
||||
if: ${{ inputs.has-gpu == 'false' }}
|
||||
shell: bash
|
||||
env:
|
||||
DEVICE: cpu
|
||||
@@ -40,7 +40,7 @@ runs:
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run Python tests - GPU
|
||||
if: ${{ inputs.cpu-only == 'false' }}
|
||||
if: ${{ inputs.has-gpu == 'true' }}
|
||||
shell: bash
|
||||
env:
|
||||
DEVICE: gpu
|
||||
@@ -59,7 +59,7 @@ runs:
|
||||
echo "::endgroup::"
|
||||
|
||||
- name: Run CPP tests - GPU
|
||||
if: ${{ inputs.cpu-only == 'false' }}
|
||||
if: ${{ inputs.has-gpu == 'true' }}
|
||||
shell: bash
|
||||
env:
|
||||
DEVICE: gpu
|
||||
|
||||
@@ -17,29 +17,51 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
check_lint:
|
||||
name: Check Lint
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: pre-commit/action@v3.0.1
|
||||
|
||||
linux_build_and_test:
|
||||
name: Linux (cpu, ${{ matrix.arch }})
|
||||
needs: check_lint
|
||||
strategy:
|
||||
matrix:
|
||||
runner:
|
||||
- ubuntu-22.04
|
||||
- ubuntu-22.04-arm
|
||||
fail-fast: false
|
||||
runs-on: ${{ matrix.runner }}
|
||||
matrix:
|
||||
arch: ['x86_64', 'aarch64']
|
||||
runs-on: ${{ matrix.arch == 'x86_64' && 'ubuntu-22.04' || 'ubuntu-22.04-arm' }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: ./.github/actions/setup-linux
|
||||
- uses: ./.github/actions/build-linux
|
||||
- uses: ./.github/actions/test-linux
|
||||
|
||||
cuda_build_and_test:
|
||||
name: Linux (${{ matrix.toolkit }}, ${{ matrix.arch }})
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
needs: check_lint
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
arch: ['x86_64', 'aarch64']
|
||||
toolkit: ['cuda-12.6', 'cuda-12.9']
|
||||
runs-on: ${{ matrix.arch == 'x86_64' && 'gpu-t4-4-core' || 'ubuntu-22.04-arm' }}
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: ./.github/actions/setup-linux
|
||||
with:
|
||||
cpu-only: true
|
||||
toolkit: ${{ matrix.toolkit }}
|
||||
- uses: ./.github/actions/build-linux
|
||||
with:
|
||||
toolkit: ${{ matrix.toolkit }}
|
||||
- uses: ./.github/actions/test-linux
|
||||
if: matrix.arch == 'x86_64'
|
||||
with:
|
||||
has-gpu: true
|
||||
|
||||
mac_build_and_test:
|
||||
name: macOS (${{ matrix.macos-target }})
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -53,25 +75,8 @@ jobs:
|
||||
- uses: ./.github/actions/setup-macos
|
||||
- uses: ./.github/actions/build-macos
|
||||
|
||||
cuda_build_and_test:
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
toolkit: ['cuda-12.6', 'cuda-12.9']
|
||||
runs-on: gpu-t4-4-core
|
||||
needs: check_lint
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
- uses: ./.github/actions/setup-linux
|
||||
with:
|
||||
toolkit: ${{ matrix.toolkit }}
|
||||
- uses: ./.github/actions/build-cuda
|
||||
with:
|
||||
toolkit: ${{ matrix.toolkit }}
|
||||
- uses: ./.github/actions/test-linux
|
||||
|
||||
build_documentation:
|
||||
name: Build Documentation
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
runs-on: ubuntu-22.04
|
||||
needs: check_lint
|
||||
@@ -80,7 +85,7 @@ jobs:
|
||||
- uses: ./.github/actions/build-docs
|
||||
|
||||
linux_fedora_build_cpp:
|
||||
name: Linux Fedora CPP Build (${{ matrix.arch }})
|
||||
name: Linux Fedora (${{ matrix.arch }})
|
||||
needs: check_lint
|
||||
strategy:
|
||||
fail-fast: false
|
||||
2
.github/workflows/nightly.yml
vendored
2
.github/workflows/nightly.yml
vendored
@@ -52,8 +52,6 @@ jobs:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
- uses: ./.github/actions/build-linux
|
||||
- uses: ./.github/actions/test-linux
|
||||
with:
|
||||
cpu-only: true
|
||||
|
||||
build_mac_release:
|
||||
if: github.repository == 'ml-explore/mlx'
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -139,8 +139,6 @@ jobs:
|
||||
toolkit: 'cuda-12.9'
|
||||
- name: Build Python package
|
||||
uses: ./.github/actions/build-cuda-release
|
||||
with:
|
||||
toolkit: 'cuda-12.9'
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
|
||||
@@ -123,14 +123,21 @@ if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8.0)
|
||||
mlx PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:--compress-mode=size>")
|
||||
endif()
|
||||
|
||||
# Compute capability >= 7.0 is required for synchronization between CPU/GPU with
|
||||
# managed memory.
|
||||
# Use native CUDA arch by default.
|
||||
if(NOT DEFINED MLX_CUDA_ARCHITECTURES)
|
||||
execute_process(
|
||||
COMMAND bash detect_cuda_arch.sh
|
||||
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
COMMAND __nvcc_device_query
|
||||
OUTPUT_VARIABLE MLX_CUDA_ARCHITECTURES
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
set(UPGRADABLE_ARCHITECTURES "90;100;121")
|
||||
if(MLX_CUDA_ARCHITECTURES STREQUAL "")
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"Can not get native CUDA arch, must set MLX_CUDA_ARCHITECTURES")
|
||||
elseif(MLX_CUDA_ARCHITECTURES IN_LIST UPGRADABLE_ARCHITECTURES)
|
||||
# Use arch-specific compute capability whenever possible.
|
||||
set(MLX_CUDA_ARCHITECTURES "${MLX_CUDA_ARCHITECTURES}a")
|
||||
endif()
|
||||
endif()
|
||||
message(STATUS "CUDA architectures: ${MLX_CUDA_ARCHITECTURES}")
|
||||
set_target_properties(mlx PROPERTIES CUDA_ARCHITECTURES
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
arch=`__nvcc_device_query`
|
||||
case "$arch" in
|
||||
"90")
|
||||
echo "90a" ;;
|
||||
"100")
|
||||
echo "100a" ;;
|
||||
"121")
|
||||
echo "121a" ;;
|
||||
*)
|
||||
echo "native" ;;
|
||||
esac
|
||||
@@ -5,3 +5,48 @@
|
||||
ncclResult_t ncclGetUniqueId(ncclUniqueId*) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
const char* ncclGetErrorString(ncclResult_t result) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ncclResult_t
|
||||
ncclCommInitRank(ncclComm_t* comm, int nranks, ncclUniqueId commId, int rank) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t ncclCommDestroy(ncclComm_t comm) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t ncclAllGather(
|
||||
const void* sendbuff,
|
||||
void* recvbuff,
|
||||
size_t sendcount,
|
||||
ncclDataType_t datatype,
|
||||
ncclComm_t comm,
|
||||
cudaStream_t stream) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t ncclAllReduce(
|
||||
const void* sendbuff,
|
||||
void* recvbuff,
|
||||
size_t count,
|
||||
ncclDataType_t datatype,
|
||||
ncclRedOp_t op,
|
||||
ncclComm_t comm,
|
||||
cudaStream_t stream) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
ncclResult_t ncclReduceScatter(
|
||||
const void* sendbuff,
|
||||
void* recvbuff,
|
||||
size_t recvcount,
|
||||
ncclDataType_t datatype,
|
||||
ncclRedOp_t op,
|
||||
ncclComm_t comm,
|
||||
cudaStream_t stream) {
|
||||
return ncclSuccess;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user