From 02efb310cac667bc547d1b96f21596c221f84fe7 Mon Sep 17 00:00:00 2001 From: Awni Hannun Date: Tue, 10 Sep 2024 15:15:17 -0700 Subject: [PATCH] Xcode 160 (#1384) * xcode 16.0 with debug tests * limit nproc for builds * vmap bug * assert bug * run python tests in debug mode * fix view, bool copies preserve bits' * actual view fix --- .circleci/config.yml | 47 ++++++++++++++++++++----------- docs/src/install.rst | 6 ++-- mlx/backend/common/primitives.cpp | 13 +++++++-- mlx/backend/metal/rope.cpp | 1 - python/src/transforms.cpp | 2 +- 5 files changed, 45 insertions(+), 24 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index bd207ae03..8548c15c4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -38,8 +38,12 @@ jobs: - run: name: Install Python package command: | - CMAKE_ARGS="-DMLX_BUILD_METAL=OFF" CMAKE_BUILD_PARALLEL_LEVEL="" python3 setup.py build_ext --inplace - CMAKE_ARGS="-DMLX_BUILD_METAL=OFF" CMAKE_BUILD_PARALLEL_LEVEL="" python3 setup.py develop + CMAKE_ARGS="-DMLX_BUILD_METAL=OFF" \ + CMAKE_BUILD_PARALLEL_LEVEL=`nproc` \ + python3 setup.py build_ext --inplace + CMAKE_ARGS="-DMLX_BUILD_METAL=OFF" \ + CMAKE_BUILD_PARALLEL_LEVEL=`nproc` \ + python3 setup.py develop - run: name: Generate package stubs command: | @@ -53,7 +57,9 @@ jobs: - run: name: Build CPP only command: | - mkdir -p build && cd build && cmake .. -DMLX_BUILD_METAL=OFF && make -j + mkdir -p build && cd build + cmake .. -DMLX_BUILD_METAL=OFF -DCMAKE_BUILD_TYPE=DEBUG + make -j `nproc` - run: name: Run CPP tests command: ./build/tests/tests @@ -86,7 +92,7 @@ jobs: name: Install Python package command: | source env/bin/activate - CMAKE_BUILD_PARALLEL_LEVEL="" pip install -e . -v + DEBUG=1 CMAKE_BUILD_PARALLEL_LEVEL=`sysctl -n hw.ncpu` pip install -e . -v - run: name: Generate package stubs command: | @@ -113,7 +119,7 @@ jobs: name: Build CPP only command: | source env/bin/activate - mkdir -p build && cd build && cmake .. && make -j + mkdir -p build && cd build && cmake .. && make -j `sysctl -n hw.ncpu` - run: name: Run CPP tests command: | @@ -123,14 +129,23 @@ jobs: command: | source env/bin/activate cd build/ - cmake .. -DCMAKE_BUILD_TYPE=MinSizeRel -DBUILD_SHARED_LIBS=ON -DMLX_BUILD_CPU=OFF -DMLX_BUILD_SAFETENSORS=OFF -DMLX_BUILD_GGUF=OFF -DMLX_METAL_JIT=ON - make -j + cmake .. -DCMAKE_BUILD_TYPE=MinSizeRel \ + -DBUILD_SHARED_LIBS=ON \ + -DMLX_BUILD_CPU=OFF \ + -DMLX_BUILD_SAFETENSORS=OFF \ + -DMLX_BUILD_GGUF=OFF \ + -DMLX_METAL_JIT=ON + make -j `sysctl -n hw.ncpu` - run: name: Run Python tests with JIT command: | source env/bin/activate - CMAKE_BUILD_PARALLEL_LEVEL="" CMAKE_ARGS="-DMLX_METAL_JIT=ON" pip install -e . -v - LOW_MEMORY=1 DEVICE=gpu METAL_DEVICE_WRAPPER_TYPE=1 METAL_DEBUG_ERROR_MODE=0 python -m xmlrunner discover -v python/tests -o test-results/gpu_jit + CMAKE_BUILD_PARALLEL_LEVEL=`sysctl -n hw.ncpu` \ + CMAKE_ARGS="-DMLX_METAL_JIT=ON" \ + pip install -e . -v + LOW_MEMORY=1 DEVICE=gpu METAL_DEVICE_WRAPPER_TYPE=1 \ + METAL_DEBUG_ERROR_MODE=0 \ + python -m xmlrunner discover -v python/tests -o test-results/gpu_jit build_release: parameters: @@ -167,7 +182,7 @@ jobs: command: | source env/bin/activate DEV_RELEASE=1 \ - CMAKE_BUILD_PARALLEL_LEVEL="" \ + CMAKE_BUILD_PARALLEL_LEVEL=`sysctl -n hw.ncpu` \ pip install . -v - run: name: Generate package stubs @@ -180,7 +195,7 @@ jobs: command: | source env/bin/activate << parameters.build_env >> \ - CMAKE_BUILD_PARALLEL_LEVEL="" \ + CMAKE_BUILD_PARALLEL_LEVEL=`sysctl -n hw.ncpu` \ python -m build -w - when: condition: << parameters.build_env >> @@ -229,12 +244,12 @@ jobs: pip install patchelf pip install build << parameters.extra_env >> \ - CMAKE_BUILD_PARALLEL_LEVEL="" \ + CMAKE_BUILD_PARALLEL_LEVEL=`nproc` \ pip install . -v pip install typing_extensions python setup.py generate_stubs << parameters.extra_env >> \ - CMAKE_BUILD_PARALLEL_LEVEL="" \ + CMAKE_BUILD_PARALLEL_LEVEL=`nproc` \ python -m build --wheel auditwheel show dist/* auditwheel repair dist/* --plat manylinux_2_31_x86_64 @@ -255,7 +270,7 @@ workflows: - mac_build_and_test: matrix: parameters: - xcode_version: ["15.0.0", "15.2.0"] + xcode_version: ["15.0.0", "15.2.0", "16.0.0"] - linux_build_and_test build_pypi_release: @@ -290,7 +305,7 @@ workflows: requires: [ hold ] matrix: parameters: - xcode_version: ["15.0.0", "15.2.0"] + xcode_version: ["15.0.0", "15.2.0", "16.0.0"] - linux_build_and_test: requires: [ hold ] nightly_build: @@ -314,7 +329,7 @@ workflows: matrix: parameters: python_version: ["3.8", "3.9", "3.10", "3.11", "3.12"] - xcode_version: ["15.0.0", "15.2.0"] + xcode_version: ["15.0.0", "15.2.0", "16.0.0"] build_env: ["DEV_RELEASE=1"] linux_test_release: when: diff --git a/docs/src/install.rst b/docs/src/install.rst index c8cf5723b..edc3d6143 100644 --- a/docs/src/install.rst +++ b/docs/src/install.rst @@ -74,20 +74,20 @@ Then simply build and install MLX using pip: .. code-block:: shell - CMAKE_BUILD_PARALLEL_LEVEL="" pip install . + CMAKE_BUILD_PARALLEL_LEVEL=8 pip install . For developing, install the package with development dependencies, and use an editable install: .. code-block:: shell - CMAKE_BUILD_PARALLEL_LEVEL="" pip install -e ".[dev]" + CMAKE_BUILD_PARALLEL_LEVEL=8 pip install -e ".[dev]" Once the development dependencies are installed, you can build faster with: .. code-block:: shell - CMAKE_BUILD_PARALLEL_LEVEL="" python setup.py build_ext -j --inplace + CMAKE_BUILD_PARALLEL_LEVEL=8 python setup.py build_ext --inplace Run the tests with: diff --git a/mlx/backend/common/primitives.cpp b/mlx/backend/common/primitives.cpp index 14aa52bad..23c5efa19 100644 --- a/mlx/backend/common/primitives.cpp +++ b/mlx/backend/common/primitives.cpp @@ -612,11 +612,18 @@ void View::eval_cpu(const std::vector& inputs, array& out) { strides[i] /= obytes; } out.copy_shared_buffer( - in, strides, in.flags(), in.data_size() * obytes / ibytes); + in, strides, in.flags(), in.data_size() * ibytes / obytes); } else { - auto tmp = array(in.shape(), in.dtype(), nullptr, {}); + auto tmp = array( + in.shape(), in.dtype() == bool_ ? uint8 : in.dtype(), nullptr, {}); tmp.set_data(allocator::malloc_or_wait(tmp.nbytes())); - copy_inplace(in, tmp, CopyType::General); + if (in.dtype() == bool_) { + auto in_tmp = array(in.shape(), uint8, nullptr, {}); + in_tmp.copy_shared_buffer(in); + copy_inplace(in_tmp, tmp, CopyType::General); + } else { + copy_inplace(in, tmp, CopyType::General); + } auto flags = out.flags(); flags.contiguous = true; diff --git a/mlx/backend/metal/rope.cpp b/mlx/backend/metal/rope.cpp index d1d07df2c..fc6aa347c 100644 --- a/mlx/backend/metal/rope.cpp +++ b/mlx/backend/metal/rope.cpp @@ -10,7 +10,6 @@ constexpr int n_per_thread = 4; void RoPE::eval_gpu( const std::vector& inputs, std::vector& outputs) { - assert(inputs.size() == 1); assert(outputs.size() == 1); auto& in = inputs[0]; auto& out = outputs[0]; diff --git a/python/src/transforms.cpp b/python/src/transforms.cpp index 32c5b94b8..82759cfcc 100644 --- a/python/src/transforms.cpp +++ b/python/src/transforms.cpp @@ -803,7 +803,7 @@ class PyCustomFunction { "[custom vmap] Function should only accept positional arguments"); } - int arr_index; + int arr_index = 0; auto new_axes = nb::cast(tree_map(args, [&](nb::handle element) { int axis = axes[arr_index++];