mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Compare commits
5 Commits
39b04ce638
...
1ff2b713b6
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1ff2b713b6 | ||
|
|
50514a6146 | ||
|
|
93d76b0f30 | ||
|
|
78678de0cd | ||
|
|
ed9c6b1117 |
27
.github/scripts/setup+build-cpp-linux-fedora-container.sh
vendored
Executable file
27
.github/scripts/setup+build-cpp-linux-fedora-container.sh
vendored
Executable file
@@ -0,0 +1,27 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
# [Setup] Install dependencies inside the container.
|
||||
dnf update -y
|
||||
dnf install -y \
|
||||
blas-devel \
|
||||
lapack-devel \
|
||||
openblas-devel \
|
||||
make \
|
||||
cmake \
|
||||
clang \
|
||||
git
|
||||
dnf clean all
|
||||
|
||||
# [C++] CI Build Sanity Check: Verifies code compilation, not for release.
|
||||
export CMAKE_ARGS="-DCMAKE_COMPILE_WARNING_AS_ERROR=ON"
|
||||
export DEBUG=1
|
||||
export CMAKE_C_COMPILER=/usr/bin/clang
|
||||
export CMAKE_CXX_COMPILER=/usr/bin/clang++
|
||||
|
||||
mkdir -p build
|
||||
pushd build
|
||||
cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG
|
||||
make -j $(nproc)
|
||||
./tests/tests
|
||||
popd
|
||||
21
.github/workflows/nightly.yml
vendored
21
.github/workflows/nightly.yml
vendored
@@ -91,3 +91,24 @@ jobs:
|
||||
path: wheelhouse/mlx_cuda-*.whl
|
||||
retention-days: 7
|
||||
|
||||
linux_fedora_build_cpp:
|
||||
name: Linux Fedora CPP Build (${{ matrix.arch }})
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- host: ubuntu-22.04
|
||||
arch: x86_64
|
||||
- host: ubuntu-22.04-arm
|
||||
arch: aarch64
|
||||
|
||||
runs-on: ${{ matrix.host }}
|
||||
container:
|
||||
image: fedora:42
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: CPP Build Test - No Release
|
||||
run: |
|
||||
bash ./.github/scripts/setup+build-cpp-linux-fedora-container.sh
|
||||
|
||||
22
.github/workflows/pull_request.yml
vendored
22
.github/workflows/pull_request.yml
vendored
@@ -44,3 +44,25 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: ./.github/actions/build-docs
|
||||
|
||||
linux_fedora_build_cpp:
|
||||
name: Linux Fedora CPP Build (${{ matrix.arch }})
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- host: ubuntu-22.04
|
||||
arch: x86_64
|
||||
- host: ubuntu-22.04-arm
|
||||
arch: aarch64
|
||||
|
||||
runs-on: ${{ matrix.host }}
|
||||
container:
|
||||
image: fedora:42
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
|
||||
- name: CPP Build Test - No Release
|
||||
run: |
|
||||
bash ./.github/scripts/setup+build-cpp-linux-fedora-container.sh
|
||||
|
||||
68
.github/workflows/release.yml
vendored
68
.github/workflows/release.yml
vendored
@@ -10,6 +10,15 @@ permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
setup:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
pypi_env: ${{ github.event_name == 'push' && 'pypi' || 'test-pypi' }}
|
||||
pypi_url: ${{ github.event_name == 'push' && 'https://upload.pypi.org/legacy/' || 'https://test.pypi.org/legacy/' }}
|
||||
steps:
|
||||
- name: Set publishing variables
|
||||
run: echo "Publishing setup complete"
|
||||
|
||||
build_documentation:
|
||||
runs-on: [self-hosted, macos]
|
||||
steps:
|
||||
@@ -108,81 +117,90 @@ jobs:
|
||||
pypi-publish:
|
||||
name: Upload release to PyPI
|
||||
runs-on: ubuntu-latest
|
||||
needs: [build_linux_release, build_mac_release]
|
||||
needs: [setup, build_linux_release, build_mac_release]
|
||||
permissions:
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
name: ${{ needs.setup.outputs.pypi_env }}
|
||||
url: https://pypi.org/p/mlx
|
||||
steps:
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
pattern: linux-wheels-*
|
||||
merge-multiples: true
|
||||
path: artifacts
|
||||
path: dist
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
pattern: mac-wheels-*
|
||||
merge-multiples: true
|
||||
path: artifacts
|
||||
path: dist
|
||||
- name: Display structure of downloaded files
|
||||
run: ls -R artifacts
|
||||
# - name: Publish package distributions to PyPI
|
||||
# uses: pypa/gh-action-pypi-publish@release/v1
|
||||
run: ls -R dist
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: ${{ needs.setup.outputs.pypi_url }}
|
||||
|
||||
pypi-publish-cuda:
|
||||
name: Upload CUDA release to PyPI
|
||||
runs-on: ubuntu-latest
|
||||
needs: build_cuda_release
|
||||
needs: [setup, build_cuda_release]
|
||||
permissions:
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
name: ${{ needs.setup.outputs.pypi_env }}
|
||||
url: https://pypi.org/p/mlx-cuda
|
||||
steps:
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: mlx-cuda
|
||||
path: artifacts
|
||||
path: dist
|
||||
- name: Display structure of downloaded files
|
||||
run: ls -R artifacts
|
||||
# - name: Publish package distributions to PyPI
|
||||
# uses: pypa/gh-action-pypi-publish@release/v1
|
||||
run: ls -R dist
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: ${{ needs.setup.outputs.pypi_url }}
|
||||
|
||||
pypi-publish-cpu:
|
||||
name: Upload CPU release to PyPI
|
||||
runs-on: ubuntu-latest
|
||||
needs: build_linux_release
|
||||
needs: [setup, build_linux_release]
|
||||
permissions:
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
name: ${{ needs.setup.outputs.pypi_env }}
|
||||
url: https://pypi.org/p/mlx-cpu
|
||||
steps:
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: mlx-cpu
|
||||
path: artifacts
|
||||
path: dist
|
||||
- name: Display structure of downloaded files
|
||||
run: ls -R artifacts
|
||||
# - name: Publish package distributions to PyPI
|
||||
# uses: pypa/gh-action-pypi-publish@release/v1
|
||||
run: ls -R dist
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: ${{ needs.setup.outputs.pypi_url }}
|
||||
|
||||
pypi-publish-metal:
|
||||
name: Upload Metal release to PyPI
|
||||
runs-on: ubuntu-latest
|
||||
needs: build_mac_release
|
||||
needs: [setup, build_mac_release]
|
||||
permissions:
|
||||
id-token: write
|
||||
environment:
|
||||
name: pypi
|
||||
name: ${{ needs.setup.outputs.pypi_env }}
|
||||
url: https://pypi.org/p/mlx-metal
|
||||
steps:
|
||||
- uses: actions/download-artifact@v6
|
||||
with:
|
||||
name: mlx-metal
|
||||
path: artifacts
|
||||
path: dist
|
||||
- name: Display structure of downloaded files
|
||||
run: ls -R artifacts
|
||||
# - name: Publish package distributions to PyPI
|
||||
# uses: pypa/gh-action-pypi-publish@release/v1
|
||||
run: ls -R dist
|
||||
- name: Publish package distributions to PyPI
|
||||
uses: pypa/gh-action-pypi-publish@release/v1
|
||||
with:
|
||||
repository-url: ${{ needs.setup.outputs.pypi_url }}
|
||||
|
||||
|
||||
@@ -294,6 +294,11 @@ class array {
|
||||
return array_desc_->siblings;
|
||||
}
|
||||
|
||||
/** The array's position in the sibling list. */
|
||||
int sibling_position() const {
|
||||
return array_desc_->position;
|
||||
}
|
||||
|
||||
void set_siblings(std::vector<array> siblings, uint16_t position) {
|
||||
array_desc_->siblings = std::move(siblings);
|
||||
array_desc_->position = position;
|
||||
|
||||
@@ -217,14 +217,20 @@ Simd<T, N> atan2(Simd<T, N> a, Simd<T, N> b) {
|
||||
|
||||
template <typename T, int N>
|
||||
Simd<T, N> maximum(Simd<T, N> a, Simd<T, N> b) {
|
||||
// TODO add isnan
|
||||
return asd::max(a.value, b.value);
|
||||
auto out = Simd<T, N>(asd::max(a.value, b.value));
|
||||
if constexpr (!std::is_integral_v<T>) {
|
||||
out = select(isnan(b), b, select(isnan(a), a, out));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
Simd<T, N> minimum(Simd<T, N> a, Simd<T, N> b) {
|
||||
// TODO add isnan
|
||||
return asd::min(a.value, b.value);
|
||||
auto out = Simd<T, N>(asd::min(a.value, b.value));
|
||||
if constexpr (!std::is_integral_v<T>) {
|
||||
out = select(isnan(b), b, select(isnan(a), a, out));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename T, int N>
|
||||
|
||||
@@ -119,8 +119,10 @@ std::pair<MTL::Library*, NS::Error*> load_swiftpm_library(
|
||||
// if SWIFTPM_BUNDLE is a framework identifier, try loading from that
|
||||
auto frameworks = NS::Bundle::allFrameworks();
|
||||
for (int i = 0, c = (int)frameworks->count(); i < c; i++) {
|
||||
auto bundle = reinterpret_cast<NS::Bundle*>(frameworks->object(i));
|
||||
if (!strcmp(bundle->bundleIdentifier()->utf8String(), SWIFTPM_BUNDLE)) {
|
||||
const auto bundle = reinterpret_cast<NS::Bundle*>(frameworks->object(i));
|
||||
const auto identifier = bundle->bundleIdentifier();
|
||||
if (identifier != nullptr &&
|
||||
!strcmp(identifier->utf8String(), SWIFTPM_BUNDLE)) {
|
||||
library = try_load_framework(device, bundle->resourceURL(), lib_name);
|
||||
if (library != nullptr) {
|
||||
return {library, nullptr};
|
||||
|
||||
146
mlx/compile.cpp
146
mlx/compile.cpp
@@ -412,51 +412,121 @@ compile_trace(
|
||||
// Traverses the graph to build a tape and a map of array ids to their parents
|
||||
std::pair<std::vector<array>, ParentsMap> compile_dfs(
|
||||
const std::vector<array>& inputs,
|
||||
const std::vector<array>& outputs,
|
||||
std::vector<array>& outputs,
|
||||
const std::vector<array>& original_inputs) {
|
||||
std::function<void(const array&)> recurse;
|
||||
std::vector<array> tape;
|
||||
std::unordered_set<std::uintptr_t> input_set;
|
||||
std::unordered_set<std::uintptr_t> original_input_set;
|
||||
std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>>
|
||||
parents_map;
|
||||
for (int i = 0; i < inputs.size(); ++i) {
|
||||
input_set.insert(inputs[i].id());
|
||||
original_input_set.insert(original_inputs[i].id());
|
||||
{
|
||||
std::function<void(const array&)> recurse;
|
||||
std::unordered_set<std::uintptr_t> input_set;
|
||||
std::unordered_set<std::uintptr_t> original_input_set;
|
||||
for (int i = 0; i < inputs.size(); ++i) {
|
||||
input_set.insert(inputs[i].id());
|
||||
original_input_set.insert(original_inputs[i].id());
|
||||
}
|
||||
|
||||
// DFS the graph to build the tape, and log parents and scalars
|
||||
std::unordered_set<std::uintptr_t> cache;
|
||||
recurse = [&](const array& a) {
|
||||
auto id = a.id();
|
||||
if (original_input_set.find(id) != original_input_set.end()) {
|
||||
throw std::invalid_argument(
|
||||
"[compile] Attempting to compile a function with uncaptured inputs is not allowed.");
|
||||
}
|
||||
if (cache.find(id) != cache.end()) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < a.inputs().size(); i++) {
|
||||
auto& in = a.inputs()[i];
|
||||
parents_map[in.id()].push_back({a, i});
|
||||
for (auto& s : a.siblings()) {
|
||||
parents_map[in.id()].push_back({s, i});
|
||||
}
|
||||
// Don't recurse on inputs (but add them to the tape for the purpose
|
||||
// of future optimizations)
|
||||
if (input_set.find(a.id()) == input_set.end()) {
|
||||
recurse(in);
|
||||
}
|
||||
}
|
||||
cache.insert(id);
|
||||
for (auto& s : a.siblings()) {
|
||||
cache.insert(s.id());
|
||||
}
|
||||
tape.push_back(a);
|
||||
};
|
||||
for (auto& a : outputs) {
|
||||
recurse(a);
|
||||
}
|
||||
}
|
||||
|
||||
// DFS the graph to build the tape, and log parents and scalars
|
||||
std::unordered_set<std::uintptr_t> cache;
|
||||
recurse = [&](const array& a) {
|
||||
auto id = a.id();
|
||||
if (original_input_set.find(id) != original_input_set.end()) {
|
||||
throw std::invalid_argument(
|
||||
"[compile] Attempting to compile a function with uncaptured inputs is not allowed.");
|
||||
}
|
||||
if (cache.find(id) != cache.end()) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < a.inputs().size(); i++) {
|
||||
auto& in = a.inputs()[i];
|
||||
parents_map[in.id()].push_back({a, i});
|
||||
for (auto& s : a.siblings()) {
|
||||
parents_map[in.id()].push_back({s, i});
|
||||
}
|
||||
// Don't recurse on inputs (but add them to the tape for the purpose
|
||||
// of future optimizations)
|
||||
if (input_set.find(a.id()) == input_set.end()) {
|
||||
recurse(in);
|
||||
}
|
||||
}
|
||||
cache.insert(id);
|
||||
for (auto& s : a.siblings()) {
|
||||
cache.insert(s.id());
|
||||
}
|
||||
tape.push_back(a);
|
||||
};
|
||||
for (auto& a : outputs) {
|
||||
recurse(a);
|
||||
// Deep copy the tape and parents map while preserving inputs and outputs
|
||||
std::vector<array> new_tape;
|
||||
std::unordered_set<uintptr_t> io_set;
|
||||
std::unordered_map<uintptr_t, array> old_to_new;
|
||||
for (auto& o : outputs) {
|
||||
old_to_new.insert({o.id(), o});
|
||||
io_set.insert(o.id());
|
||||
}
|
||||
for (auto& i : inputs) {
|
||||
io_set.insert(i.id());
|
||||
old_to_new.insert({i.id(), i});
|
||||
}
|
||||
|
||||
new_tape.reserve(tape.size());
|
||||
for (auto& arr : tape) {
|
||||
if (!arr.has_primitive() || (io_set.find(arr.id()) != io_set.end())) {
|
||||
old_to_new.insert({arr.id(), arr});
|
||||
new_tape.push_back(arr);
|
||||
continue;
|
||||
}
|
||||
std::vector<array> inputs;
|
||||
inputs.reserve(arr.inputs().size());
|
||||
for (auto& i : arr.inputs()) {
|
||||
inputs.push_back(old_to_new.find(i.id())->second);
|
||||
}
|
||||
if (arr.siblings().size() > 0) {
|
||||
std::vector<Dtype> types;
|
||||
std::vector<Shape> shapes;
|
||||
auto out = arr.outputs();
|
||||
for (auto& o : out) {
|
||||
types.push_back(o.dtype());
|
||||
shapes.push_back(o.shape());
|
||||
}
|
||||
auto as = array::make_arrays(
|
||||
std::move(shapes), types, arr.primitive_ptr(), std::move(inputs));
|
||||
for (int i = 0; i < out.size(); ++i) {
|
||||
old_to_new.insert({out[i].id(), as[i]});
|
||||
}
|
||||
new_tape.push_back(as[arr.sibling_position()]);
|
||||
} else {
|
||||
auto a = array(
|
||||
arr.shape(), arr.dtype(), arr.primitive_ptr(), std::move(inputs));
|
||||
old_to_new.insert({arr.id(), a});
|
||||
new_tape.push_back(a);
|
||||
}
|
||||
}
|
||||
io_set.clear();
|
||||
for (auto& o : outputs) {
|
||||
if (!(io_set.insert(o.id()).second)) {
|
||||
continue;
|
||||
}
|
||||
for (auto& i : o.inputs()) {
|
||||
i = old_to_new.find(i.id())->second;
|
||||
}
|
||||
}
|
||||
tape = std::move(new_tape);
|
||||
|
||||
std::unordered_map<std::uintptr_t, std::vector<std::pair<array, int>>>
|
||||
new_parents_map;
|
||||
for (auto& [id, vec] : parents_map) {
|
||||
for (auto& [a, _] : vec) {
|
||||
a = old_to_new.find(a.id())->second;
|
||||
}
|
||||
new_parents_map[old_to_new.find(id)->second.id()] = std::move(vec);
|
||||
}
|
||||
parents_map = std::move(new_parents_map);
|
||||
|
||||
return {tape, parents_map};
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ using ParentsMap =
|
||||
// Traverses the graph to build a tape and a map of array ids to their parents
|
||||
std::pair<std::vector<array>, ParentsMap> compile_dfs(
|
||||
const std::vector<array>& inputs,
|
||||
const std::vector<array>& outputs,
|
||||
std::vector<array>& outputs,
|
||||
const std::vector<array>& original_inputs);
|
||||
|
||||
// Simplify the tape.
|
||||
|
||||
@@ -1134,6 +1134,30 @@ class TestCompile(mlx_tests.MLXTestCase):
|
||||
a = fun2(mx.array(-1.0))
|
||||
self.assertEqual(a.item(), 1.0)
|
||||
|
||||
def test_multiple_compile_same_capture(self):
|
||||
def fun(do_compile):
|
||||
t = mx.ones((10,))
|
||||
u = (1.0 - t) * 0.0 + t * 3.0
|
||||
|
||||
o = mx.ones((6,))
|
||||
b = o[:, None] * u
|
||||
|
||||
c = b * mx.ones_like(u)
|
||||
|
||||
a = mx.ones((6,))
|
||||
if do_compile:
|
||||
d = mx.compile(lambda x: x @ b)(a)
|
||||
e = mx.compile(lambda x: x @ c.T)(d)
|
||||
else:
|
||||
d = a @ b
|
||||
e = d @ c.T
|
||||
return e
|
||||
|
||||
out = fun(True)
|
||||
mx.eval(out)
|
||||
expected = fun(False)
|
||||
self.assertTrue(mx.allclose(out, expected))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mlx_tests.MLXTestRunner()
|
||||
|
||||
@@ -194,8 +194,7 @@ auto multi_one(const std::vector<array>&) {
|
||||
auto multi_two(const std::vector<array>&) {
|
||||
auto a = array(1.0);
|
||||
auto b = array(1.0);
|
||||
auto c = divmod(a, b);
|
||||
return std::vector<array>{c};
|
||||
return divmod(a, b);
|
||||
}
|
||||
|
||||
auto multi_three(const std::vector<array>&) {
|
||||
|
||||
@@ -4052,3 +4052,24 @@ TEST_CASE("test fp8 conversion") {
|
||||
auto expected = array({-448.0f, 448.0f});
|
||||
CHECK(array_equal(out, expected, true).item<bool>());
|
||||
}
|
||||
|
||||
TEST_CASE("test max min with nan") {
|
||||
// Test maximum and minimum with NaN values
|
||||
auto x = array({0.0f, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
|
||||
auto y = array({NAN, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
|
||||
auto expected_max = array({NAN, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
|
||||
auto expected_min = array({NAN, 1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f});
|
||||
auto max_result = maximum(x, y);
|
||||
auto min_result = minimum(x, y);
|
||||
CHECK(array_equal(max_result, expected_max, true).item<bool>());
|
||||
CHECK(array_equal(min_result, expected_min, true).item<bool>());
|
||||
|
||||
// Test with all NaN values
|
||||
x = array({NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN});
|
||||
y = array({NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN});
|
||||
max_result = maximum(x, y);
|
||||
min_result = minimum(x, y);
|
||||
auto expected = array({NAN, NAN, NAN, NAN, NAN, NAN, NAN, NAN});
|
||||
CHECK(array_equal(max_result, expected, true).item<bool>());
|
||||
CHECK(array_equal(min_result, expected, true).item<bool>());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user