Compare commits

...

5 Commits

Author SHA1 Message Date
AN Long
1ff2b713b6 Check isnan in maximum / minimum with CPU backend (#2652)
* Check isnan in maximum / minimum with CPU backend

* Add tests

* fix

---------

Co-authored-by: Awni Hannun <awni@apple.com>
2025-11-03 08:51:14 -08:00
Mike Drob
50514a6146 Set up publishing to PyPI and Test-PyPI (#2721) 2025-11-03 07:20:11 -08:00
Awni Hannun
93d76b0f30 Fix compile multi capture (#2678)
* fix compile when compiling multiple lambdas with the same capture

* add test
2025-11-03 06:33:43 -08:00
David Koski
78678de0cd add null check -- the bundleIdentifier is optional (#2709)
* add null check -- the bundleIdentifier is optional

* use variable
2025-11-03 06:33:21 -08:00
Melissa Kilby
ed9c6b1117 update: add linux fedora container CI - CPP build test only (#2722)
* update: add linux_fedora_build_cpp CI - CPP build test only - x86-64

Signed-off-by: Melissa Kilby <mkilby@apple.com>

* update: add linux_fedora_build_cpp_aarch64 CI - CPP build test only - arm64

Co-authored-by: Mike Drob <mdrob@apple.com>
Signed-off-by: Melissa Kilby <mkilby@apple.com>

* update: convert linux_fedora_build_cpp to matrix.arch loop

Co-authored-by: Mike Drob <mdrob@apple.com>
Signed-off-by: Melissa Kilby <mkilby@apple.com>

---------

Signed-off-by: Melissa Kilby <mkilby@apple.com>
Co-authored-by: Mike Drob <mdrob@apple.com>
2025-11-03 06:33:00 -08:00
12 changed files with 287 additions and 72 deletions

View File

@@ -0,0 +1,27 @@
#!/bin/bash
set -ex
# [Setup] Install dependencies inside the container.
dnf update -y
dnf install -y \
blas-devel \
lapack-devel \
openblas-devel \
make \
cmake \
clang \
git
dnf clean all
# [C++] CI Build Sanity Check: Verifies code compilation, not for release.
export CMAKE_ARGS="-DCMAKE_COMPILE_WARNING_AS_ERROR=ON"
export DEBUG=1
export CMAKE_C_COMPILER=/usr/bin/clang
export CMAKE_CXX_COMPILER=/usr/bin/clang++
mkdir -p build
pushd build
cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG
make -j $(nproc)
./tests/tests
popd

View File

@@ -91,3 +91,24 @@ jobs:
path: wheelhouse/mlx_cuda-*.whl
retention-days: 7
linux_fedora_build_cpp:
name: Linux Fedora CPP Build (${{ matrix.arch }})
strategy:
fail-fast: false
matrix:
include:
- host: ubuntu-22.04
arch: x86_64
- host: ubuntu-22.04-arm
arch: aarch64
runs-on: ${{ matrix.host }}
container:
image: fedora:42
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: CPP Build Test - No Release
run: |
bash ./.github/scripts/setup+build-cpp-linux-fedora-container.sh

View File

@@ -44,3 +44,25 @@ jobs:
steps:
- uses: actions/checkout@v5
- uses: ./.github/actions/build-docs
linux_fedora_build_cpp:
name: Linux Fedora CPP Build (${{ matrix.arch }})
strategy:
fail-fast: false
matrix:
include:
- host: ubuntu-22.04
arch: x86_64
- host: ubuntu-22.04-arm
arch: aarch64
runs-on: ${{ matrix.host }}
container:
image: fedora:42
steps:
- name: Checkout code
uses: actions/checkout@v5
- name: CPP Build Test - No Release
run: |
bash ./.github/scripts/setup+build-cpp-linux-fedora-container.sh

View File

@@ -10,6 +10,15 @@ permissions:
contents: read
jobs:
setup:
runs-on: ubuntu-latest
outputs:
pypi_env: ${{ github.event_name == 'push' && 'pypi' || 'test-pypi' }}
pypi_url: ${{ github.event_name == 'push' && 'https://upload.pypi.org/legacy/' || 'https://test.pypi.org/legacy/' }}
steps:
- name: Set publishing variables
run: echo "Publishing setup complete"
build_documentation:
runs-on: [self-hosted, macos]
steps:
@@ -108,81 +117,90 @@ jobs:
pypi-publish:
name: Upload release to PyPI
runs-on: ubuntu-latest
needs: [build_linux_release, build_mac_release]
needs: [setup, build_linux_release, build_mac_release]
permissions:
id-token: write
environment:
name: pypi
name: ${{ needs.setup.outputs.pypi_env }}
url: https://pypi.org/p/mlx
steps:
- uses: actions/download-artifact@v6
with:
pattern: linux-wheels-*
merge-multiples: true
path: artifacts
path: dist
- uses: actions/download-artifact@v6
with:
pattern: mac-wheels-*
merge-multiples: true
path: artifacts
path: dist
- name: Display structure of downloaded files
run: ls -R artifacts
# - name: Publish package distributions to PyPI
# uses: pypa/gh-action-pypi-publish@release/v1
run: ls -R dist
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: ${{ needs.setup.outputs.pypi_url }}
pypi-publish-cuda:
name: Upload CUDA release to PyPI
runs-on: ubuntu-latest
needs: build_cuda_release
needs: [setup, build_cuda_release]
permissions:
id-token: write
environment:
name: pypi
name: ${{ needs.setup.outputs.pypi_env }}
url: https://pypi.org/p/mlx-cuda
steps:
- uses: actions/download-artifact@v6
with:
name: mlx-cuda
path: artifacts
path: dist
- name: Display structure of downloaded files
run: ls -R artifacts
# - name: Publish package distributions to PyPI
# uses: pypa/gh-action-pypi-publish@release/v1
run: ls -R dist
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: ${{ needs.setup.outputs.pypi_url }}
pypi-publish-cpu:
name: Upload CPU release to PyPI
runs-on: ubuntu-latest
needs: build_linux_release
needs: [setup, build_linux_release]
permissions:
id-token: write
environment:
name: pypi
name: ${{ needs.setup.outputs.pypi_env }}
url: https://pypi.org/p/mlx-cpu
steps:
- uses: actions/download-artifact@v6
with:
name: mlx-cpu
path: artifacts
path: dist
- name: Display structure of downloaded files
run: ls -R artifacts
# - name: Publish package distributions to PyPI
# uses: pypa/gh-action-pypi-publish@release/v1
run: ls -R dist
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: ${{ needs.setup.outputs.pypi_url }}
pypi-publish-metal:
name: Upload Metal release to PyPI
runs-on: ubuntu-latest
needs: build_mac_release
needs: [setup, build_mac_release]
permissions:
id-token: write
environment:
name: pypi
name: ${{ needs.setup.outputs.pypi_env }}
url: https://pypi.org/p/mlx-metal
steps:
- uses: actions/download-artifact@v6
with:
name: mlx-metal
path: artifacts
path: dist
- name: Display structure of downloaded files
run: ls -R artifacts
# - name: Publish package distributions to PyPI
# uses: pypa/gh-action-pypi-publish@release/v1
run: ls -R dist
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
repository-url: ${{ needs.setup.outputs.pypi_url }}

View File

@@ -294,6 +294,11 @@ class array {
return array_desc_->siblings;
}
/** The array's position in the sibling list. */
int sibling_position() const {
return array_desc_->position;
}
void set_siblings(std::vector<array> siblings, uint16_t position) {
array_desc_->siblings = std::move(siblings);
array_desc_->position = position;

View File

@@ -217,14 +217,20 @@ Simd<T, N> atan2(Simd<T, N> a, Simd<T, N> b) {
template <typename T, int N>
Simd<T, N> maximum(Simd<T, N> a, Simd<T, N> b) {
// TODO add isnan
return asd::max(a.value, b.value);
auto out = Simd<T, N>(asd::max(a.value, b.value));
if constexpr (!std::is_integral_v<T>) {
out = select(isnan(b), b, select(isnan(a), a, out));
}
return out;
}
template <typename T, int N>
Simd<T, N> minimum(Simd<T, N> a, Simd<T, N> b) {
// TODO add isnan
return asd::min(a.value, b.value);
auto out = Simd<T, N>(asd::min(a.value, b.value));
if constexpr (!std::is_integral_v<T>) {
out = select(isnan(b), b, select(isnan(a), a, out));
}
return out;
}
template <typename T, int N>

View File

@@ -119,8 +119,10 @@ std::pair<MTL::Library*, NS::Error*> load_swiftpm_library(
// if SWIFTPM_BUNDLE is a framework identifier, try loading from that
auto frameworks = NS::Bundle::allFrameworks();
for (int i = 0, c = (int)frameworks->count(); i < c; i++) {
auto bundle = reinterpret_cast<NS::Bundle*>(frameworks->object(i));
if (!strcmp(bundle->bundleIdentifier()->utf8String(), SWIFTPM_BUNDLE)) {
const auto bundle = reinterpret_cast<NS::Bundle*>(frameworks->object(i));
const auto identifier = bundle->bundleIdentifier();
if (identifier != nullptr &&
!strcmp(identifier->utf8String(), SWIFTPM_BUNDLE)) {
library = try_load_framework(device, bundle->resourceURL(), lib_name);
if (library != nullptr) {
return {library, nullptr};

View File

@@ -412,51 +412,121 @@ compile_trace(
// Traverses the graph to build a tape and a map of array ids to their parents
std::pair<std::vector<array>, ParentsMap> compile_dfs(
const std::vector<array>& inputs,
const std::vector<array>& outputs,
std::vector<array>& outputs,
const std::vector<array>& original_inputs) {
std::function<void(const array&)> recurse;
std::vector<array> tape;
std::unordered_set<std::uintptr_t> input_set;
std::unordered_set<std::uintptr_t> original_input_set;
std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>>
parents_map;
for (int i = 0; i < inputs.size(); ++i) {
input_set.insert(inputs[i].id());
original_input_set.insert(original_inputs[i].id());
{
std::function<void(const array&)> recurse;
std::unordered_set<std::uintptr_t> input_set;
std::unordered_set<std::uintptr_t> original_input_set;
for (int i = 0; i < inputs.size(); ++i) {
input_set.insert(inputs[i].id());
original_input_set.insert(original_inputs[i].id());
}
// DFS the graph to build the tape, and log parents and scalars
std::unordered_set<std::uintptr_t> cache;
recurse = [&](const array& a) {
auto id = a.id();
if (original_input_set.find(id) != original_input_set.end()) {
throw std::invalid_argument(
"[compile] Attempting to compile a function with uncaptured inputs is not allowed.");
}
if (cache.find(id) != cache.end()) {
return;
}
for (int i = 0; i < a.inputs().size(); i++) {
auto& in = a.inputs()[i];
parents_map[in.id()].push_back({a, i});
for (auto& s : a.siblings()) {
parents_map[in.id()].push_back({s, i});
}
// Don't recurse on inputs (but add them to the tape for the purpose
// of future optimizations)
if (input_set.find(a.id()) == input_set.end()) {
recurse(in);
}
}
cache.insert(id);
for (auto& s : a.siblings()) {
cache.insert(s.id());
}
tape.push_back(a);
};
for (auto& a : outputs) {
recurse(a);
}
}
// DFS the graph to build the tape, and log parents and scalars
std::unordered_set<std::uintptr_t> cache;
recurse = [&](const array& a) {
auto id = a.id();
if (original_input_set.find(id) != original_input_set.end()) {
throw std::invalid_argument(
"[compile] Attempting to compile a function with uncaptured inputs is not allowed.");
}
if (cache.find(id) != cache.end()) {
return;
}
for (int i = 0; i < a.inputs().size(); i++) {
auto& in = a.inputs()[i];
parents_map[in.id()].push_back({a, i});
for (auto& s : a.siblings()) {
parents_map[in.id()].push_back({s, i});
}
// Don't recurse on inputs (but add them to the tape for the purpose
// of future optimizations)
if (input_set.find(a.id()) == input_set.end()) {
recurse(in);
}
}
cache.insert(id);
for (auto& s : a.siblings()) {
cache.insert(s.id());
}
tape.push_back(a);
};
for (auto& a : outputs) {
recurse(a);
// Deep copy the tape and parents map while preserving inputs and outputs
std::vector<array> new_tape;
std::unordered_set<uintptr_t> io_set;
std::unordered_map<uintptr_t, array> old_to_new;
for (auto& o : outputs) {
old_to_new.insert({o.id(), o});
io_set.insert(o.id());
}
for (auto& i : inputs) {
io_set.insert(i.id());
old_to_new.insert({i.id(), i});
}
new_tape.reserve(tape.size());
for (auto& arr : tape) {
if (!arr.has_primitive() || (io_set.find(arr.id()) != io_set.end())) {
old_to_new.insert({arr.id(), arr});
new_tape.push_back(arr);
continue;
}
std::vector<array> inputs;
inputs.reserve(arr.inputs().size());
for (auto& i : arr.inputs()) {
inputs.push_back(old_to_new.find(i.id())->second);
}
if (arr.siblings().size() > 0) {
std::vector<Dtype> types;
std::vector<Shape> shapes;
auto out = arr.outputs();
for (auto& o : out) {
types.push_back(o.dtype());
shapes.push_back(o.shape());
}
auto as = array::make_arrays(
std::move(shapes), types, arr.primitive_ptr(), std::move(inputs));
for (int i = 0; i < out.size(); ++i) {
old_to_new.insert({out[i].id(), as[i]});
}
new_tape.push_back(as[arr.sibling_position()]);
} else {
auto a = array(
arr.shape(), arr.dtype(), arr.primitive_ptr(), std::move(inputs));
old_to_new.insert({arr.id(), a});
new_tape.push_back(a);
}
}
io_set.clear();
for (auto& o : outputs) {
if (!(io_set.insert(o.id()).second)) {
continue;
}
for (auto& i : o.inputs()) {
i = old_to_new.find(i.id())->second;
}
}
tape = std::move(new_tape);
std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>>
new_parents_map;
for (auto& [id, vec] : parents_map) {
for (auto& [a, _] : vec) {
a = old_to_new.find(a.id())->second;
}
new_parents_map[old_to_new.find(id)->second.id()] = std::move(vec);
}
parents_map = std::move(new_parents_map);
return {tape, parents_map};
}

View File

@@ -47,7 +47,7 @@ using ParentsMap =
// Traverses the graph to build a tape and a map of array ids to their parents
std::pair<std::vector<array>, ParentsMap> compile_dfs(
const std::vector<array>& inputs,
const std::vector<array>& outputs,
std::vector<array>& outputs,
const std::vector<array>& original_inputs);
// Simplify the tape.

View File

@@ -1134,6 +1134,30 @@ class TestCompile(mlx_tests.MLXTestCase):
a = fun2(mx.array(-1.0))
self.assertEqual(a.item(), 1.0)
def test_multiple_compile_same_capture(self):
def fun(do_compile):
t = mx.ones((10,))
u = (1.0 - t) * 0.0 + t * 3.0
o = mx.ones((6,))
b = o[:, None] * u
c = b * mx.ones_like(u)
a = mx.ones((6,))
if do_compile:
d = mx.compile(lambda x: x @ b)(a)
e = mx.compile(lambda x: x @ c.T)(d)
else:
d = a @ b
e = d @ c.T
return e
out = fun(True)
mx.eval(out)
expected = fun(False)
self.assertTrue(mx.allclose(out, expected))
if __name__ == "__main__":
mlx_tests.MLXTestRunner()

View File

@@ -194,8 +194,7 @@ auto multi_one(const std::vector<array>&) {
auto multi_two(const std::vector<array>&) {
auto a = array(1.0);
auto b = array(1.0);
auto c = divmod(a, b);
return std::vector<array>{c};
return divmod(a, b);
}
auto multi_three(const std::vector<array>&) {

View File

@@ -4052,3 +4052,24 @@ TEST_CASE("test fp8 conversion") {
auto expected = array({-448.0f, 448.0f});
CHECK(array_equal(out, expected, true).item<bool>());
}
TEST_CASE("test max min with nan") {
// Test maximum and minimum with NaN values
auto x = array({0.0f, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
auto y = array({NAN, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
auto expected_max = array({NAN, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
auto expected_min = array({NAN, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
auto max_result = maximum(x, y);
auto min_result = minimum(x, y);
CHECK(array_equal(max_result, expected_max, true).item<bool>());
CHECK(array_equal(min_result, expected_min, true).item<bool>());
// Test with all NaN values
x = array({NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN});
y = array({NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN});
max_result = maximum(x, y);
min_result = minimum(x, y);
auto expected = array({NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN});
CHECK(array_equal(max_result, expected, true).item<bool>());
CHECK(array_equal(min_result, expected, true).item<bool>());
}