From 44c1ce5e6af2625571cd384e5be49e9778770ffc Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 2 Jan 2024 00:08:17 -0500 Subject: [PATCH] Spelling (#342) * spelling: accumulates Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: across Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: additional Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: against Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: among Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: array Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: at least Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: available Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: axes Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: basically Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: bfloat Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: bounds Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: broadcast Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: buffer Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: class Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: coefficients Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: collision Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: combinations Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: committing Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: computation Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: consider Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: constructing Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: conversions Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: correctly Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: corresponding Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: declaration Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: default Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: dependency Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: destination Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: destructor Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: dimensions Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: divided Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: element-wise Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: elements Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: endianness Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: equivalent Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: explicitly Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: github Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: indices Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: irregularly Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: memory Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: metallib Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: negative Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: notable Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: optional Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: otherwise Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: overridden Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: partially Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: partition Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: perform Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: perturbations Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: positively Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: primitive Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: repeat Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: repeats Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: respect Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: respectively Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: result Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: rounding Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: separate Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: skipping Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: structure Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: the Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: transpose Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: unnecessary Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: unneeded Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> * spelling: unsupported Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --------- Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- benchmarks/python/blas/bench_gemv.py | 4 +-- benchmarks/python/comparative/compare.py | 2 +- cmake/extension.cmake | 4 +-- docs/README.md | 2 +- docs/src/dev/extensions.rst | 32 +++++++++++----------- docs/src/index.rst | 2 +- examples/cpp/tutorial.cpp | 2 +- examples/extensions/axpby/axpby.cpp | 20 +++++++------- examples/extensions/axpby/axpby.h | 4 +-- examples/extensions/axpby/axpby.metal | 2 +- examples/extensions/bindings.cpp | 2 +- mlx/allocator.h | 2 +- mlx/array.cpp | 2 +- mlx/backend/common/load.cpp | 8 +++--- mlx/backend/metal/allocator.cpp | 2 +- mlx/backend/metal/conv.cpp | 4 +-- mlx/backend/metal/indexing.cpp | 4 +-- mlx/backend/metal/kernels/arg_reduce.metal | 2 +- mlx/backend/metal/kernels/complex.h | 2 +- mlx/backend/metal/kernels/gemm/conv.h | 4 +-- mlx/backend/metal/kernels/gemm/gemm.h | 6 ++-- mlx/backend/metal/kernels/gemv.metal | 8 +++--- mlx/backend/metal/kernels/reduce.metal | 2 +- mlx/backend/metal/kernels/sort.metal | 6 ++-- mlx/backend/metal/matmul.cpp | 2 +- mlx/backend/metal/reduce.cpp | 4 +-- mlx/backend/metal/sort.cpp | 4 +-- mlx/backend/metal/utils.h | 2 +- mlx/fft.cpp | 2 +- mlx/io/load.cpp | 2 +- mlx/ops.cpp | 12 ++++---- mlx/ops.h | 2 +- mlx/primitives.h | 4 +-- mlx/random.cpp | 2 +- mlx/random.h | 2 +- mlx/transforms.h | 4 +-- mlx/types/fp16.h | 2 +- python/mlx/nn/layers/normalization.py | 2 +- python/mlx/optimizers.py | 2 +- python/src/array.cpp | 4 +-- python/src/ops.cpp | 12 ++++---- python/src/random.cpp | 2 +- python/tests/test_array.py | 2 +- python/tests/test_blas.py | 16 +++++------ python/tests/test_ops.py | 2 +- tests/arg_reduce_tests.cpp | 8 +++--- tests/autograd_tests.cpp | 4 +-- tests/creations_tests.cpp | 2 +- tests/ops_tests.cpp | 6 ++-- 49 files changed, 117 insertions(+), 117 deletions(-) diff --git a/benchmarks/python/blas/bench_gemv.py b/benchmarks/python/blas/bench_gemv.py index 5f491ffc8..2b564a78a 100644 --- a/benchmarks/python/blas/bench_gemv.py +++ b/benchmarks/python/blas/bench_gemv.py @@ -133,7 +133,7 @@ def get_gbyte_size(in_vec_len, out_vec_len, np_dtype): return float(N_iter_bench * N_iter_func * n_elem * item_size) / float(1024**3) -def bench_with_in_len(ax, in_vec_len, out_vector_lens, dtype, tranpose): +def bench_with_in_len(ax, in_vec_len, out_vector_lens, dtype, transpose): np_dtype = getattr(np, dtype) mlx_gb_s = [] mlx_gflops = [] @@ -164,7 +164,7 @@ def bench_with_in_len(ax, in_vec_len, out_vector_lens, dtype, tranpose): ax.legend() -def bench_with_out_len(ax, out_vec_len, in_vector_lens, dtype, tranpose): +def bench_with_out_len(ax, out_vec_len, in_vector_lens, dtype, transpose): np_dtype = getattr(np, dtype) mlx_gb_s = [] mlx_gflops = [] diff --git a/benchmarks/python/comparative/compare.py b/benchmarks/python/comparative/compare.py index 4adde50bc..a9d3df22d 100644 --- a/benchmarks/python/comparative/compare.py +++ b/benchmarks/python/comparative/compare.py @@ -62,7 +62,7 @@ def make_predicate(positive_filter, negative_filter): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run comparisons agains PyTorch") + parser = argparse.ArgumentParser(description="Run comparisons against PyTorch") parser.add_argument( "--filter", "-f", help="Regex filter to select benchmarks", nargs="+" ) diff --git a/cmake/extension.cmake b/cmake/extension.cmake index 383656d37..ffb02ee41 100644 --- a/cmake/extension.cmake +++ b/cmake/extension.cmake @@ -12,7 +12,7 @@ include(CMakeParseArguments) # OUTPUT_DIRECTORY: Where to place ${TITLE}.metallib # SOURCES: List of source files # INCLUDE_DIRS: List of include dirs -# DEPS: List of depedency files (like headers) +# DEPS: List of dependency files (like headers) # macro(mlx_build_metallib) # Parse args @@ -32,7 +32,7 @@ macro(mlx_build_metallib) # Collect compile options set(MTLLIB_COMPILE_OPTIONS -Wall -Wextra -fno-fast-math) - # Prepare metllib build command + # Prepare metallib build command add_custom_command( OUTPUT ${MTLLIB_BUILD_TARGET} COMMAND xcrun -sdk macosx metal diff --git a/docs/README.md b/docs/README.md index f197ecf43..01d41d697 100644 --- a/docs/README.md +++ b/docs/README.md @@ -26,7 +26,7 @@ python -m http.server and point your browser to `http://localhost:`. -### Push to Github Pages +### Push to GitHub Pages Check-out the `gh-pages` branch (`git switch gh-pages`) and build the docs. Then force add the `build/html` directory: diff --git a/docs/src/dev/extensions.rst b/docs/src/dev/extensions.rst index 9aae931a3..0a134e7f5 100644 --- a/docs/src/dev/extensions.rst +++ b/docs/src/dev/extensions.rst @@ -15,7 +15,7 @@ Introducing the Example ----------------------- Let's say that you would like an operation that takes in two arrays, -``x`` and ``y``, scales them both by some coefficents ``alpha`` and ``beta`` +``x`` and ``y``, scales them both by some coefficients ``alpha`` and ``beta`` respectively, and then adds them together to get the result ``z = alpha * x + beta * y``. Well, you can very easily do that by just writing out a function as follows: @@ -69,7 +69,7 @@ C++ API: .. code-block:: C++ /** - * Scale and sum two vectors elementwise + * Scale and sum two vectors element-wise * z = alpha * x + beta * y * * Follow numpy style broadcasting between x and y @@ -230,7 +230,7 @@ Let's re-implement our operation now in terms of our :class:`Axpby` primitive. This operation now handles the following: -#. Upcast inputs and resolve the the output data type. +#. Upcast inputs and resolve the output data type. #. Broadcast the inputs and resolve the output shape. #. Construct the primitive :class:`Axpby` using the given stream, ``alpha``, and ``beta``. #. Construct the output :class:`array` using the primitive and the inputs. @@ -284,14 +284,14 @@ pointwise. This is captured in the templated function :meth:`axpby_impl`. T alpha = static_cast(alpha_); T beta = static_cast(beta_); - // Do the elementwise operation for each output + // Do the element-wise operation for each output for (size_t out_idx = 0; out_idx < out.size(); out_idx++) { // Map linear indices to offsets in x and y auto x_offset = elem_to_loc(out_idx, x.shape(), x.strides()); auto y_offset = elem_to_loc(out_idx, y.shape(), y.strides()); // We allocate the output to be contiguous and regularly strided - // (defaults to row major) and hence it doesn't need additonal mapping + // (defaults to row major) and hence it doesn't need additional mapping out_ptr[out_idx] = alpha * x_ptr[x_offset] + beta * y_ptr[y_offset]; } } @@ -305,7 +305,7 @@ if we encounter an unexpected type. /** Fall back implementation for evaluation on CPU */ void Axpby::eval(const std::vector& inputs, array& out) { - // Check the inputs (registered in the op while contructing the out array) + // Check the inputs (registered in the op while constructing the out array) assert(inputs.size() == 2); auto& x = inputs[0]; auto& y = inputs[1]; @@ -485,7 +485,7 @@ each data type. instantiate_axpby(float32, float); instantiate_axpby(float16, half); - instantiate_axpby(bflot16, bfloat16_t); + instantiate_axpby(bfloat16, bfloat16_t); instantiate_axpby(complex64, complex64_t); This kernel will be compiled into a metal library ``mlx_ext.metallib`` as we @@ -537,7 +537,7 @@ below. compute_encoder->setComputePipelineState(kernel); // Kernel parameters are registered with buffer indices corresponding to - // those in the kernel decelaration at axpby.metal + // those in the kernel declaration at axpby.metal int ndim = out.ndim(); size_t nelem = out.size(); @@ -568,7 +568,7 @@ below. // Fix the 3D size of the launch grid (in terms of threads) MTL::Size grid_dims = MTL::Size(nelem, 1, 1); - // Launch the grid with the given number of threads divded among + // Launch the grid with the given number of threads divided among // the given threadgroups compute_encoder->dispatchThreads(grid_dims, group_dims); } @@ -581,7 +581,7 @@ to give us the active metal compute command encoder instead of building a new one and calling :meth:`compute_encoder->end_encoding` at the end. MLX keeps adding kernels (compute pipelines) to the active command encoder until some specified limit is hit or the compute encoder needs to be flushed -for synchronization. MLX also handles enqueuing and commiting the associated +for synchronization. MLX also handles enqueuing and committing the associated command buffers as needed. We suggest taking a deeper dive into :class:`metal::Device` if you would like to study this routine further. @@ -601,8 +601,8 @@ us the following :meth:`Axpby::jvp` and :meth:`Axpby::vjp` implementations. const std::vector& tangents, const std::vector& argnums) { // Forward mode diff that pushes along the tangents - // The jvp transform on the the primitive can built with ops - // that are scheduled on the same stream as the primtive + // The jvp transform on the primitive can built with ops + // that are scheduled on the same stream as the primitive // If argnums = {0}, we only push along x in which case the // jvp is just the tangent scaled by alpha @@ -642,7 +642,7 @@ own :class:`Primitive`. .. code-block:: C++ - /** Vectorize primitve along given axis */ + /** Vectorize primitive along given axis */ std::pair Axpby::vmap( const std::vector& inputs, const std::vector& axes) { @@ -666,7 +666,7 @@ Let's look at the overall directory structure first. | └── setup.py * ``extensions/axpby/`` defines the C++ extension library -* ``extensions/mlx_sample_extensions`` sets out the strucutre for the +* ``extensions/mlx_sample_extensions`` sets out the structure for the associated python package * ``extensions/bindings.cpp`` provides python bindings for our operation * ``extensions/CMakeLists.txt`` holds CMake rules to build the library and @@ -697,7 +697,7 @@ are already provided, adding our :meth:`axpby` becomes very simple! py::kw_only(), "stream"_a = py::none(), R"pbdoc( - Scale and sum two vectors elementwise + Scale and sum two vectors element-wise ``z = alpha * x + beta * y`` Follows numpy style broadcasting between ``x`` and ``y`` @@ -840,7 +840,7 @@ This will result in a directory structure as follows: | ... When you try to install using the command ``python -m pip install .`` -(in ``extensions/``), the package will be installed with the same strucutre as +(in ``extensions/``), the package will be installed with the same structure as ``extensions/mlx_sample_extensions`` and the C++ and metal library will be copied along with the python binding since they are specified as ``package_data``. diff --git a/docs/src/index.rst b/docs/src/index.rst index 207238f37..9f0445a18 100644 --- a/docs/src/index.rst +++ b/docs/src/index.rst @@ -19,7 +19,7 @@ The main differences between MLX and NumPy are: The design of MLX is inspired by frameworks like `PyTorch `_, `Jax `_, and -`ArrayFire `_. A noteable difference from these +`ArrayFire `_. A notable difference from these frameworks and MLX is the *unified memory model*. Arrays in MLX live in shared memory. Operations on MLX arrays can be performed on any of the supported device types without performing data copies. Currently supported device types diff --git a/examples/cpp/tutorial.cpp b/examples/cpp/tutorial.cpp index 5dc0e0472..091dfab2d 100644 --- a/examples/cpp/tutorial.cpp +++ b/examples/cpp/tutorial.cpp @@ -57,7 +57,7 @@ void array_basics() { assert(z.shape(0) == 2); assert(z.shape(1) == 2); - // To actually run the compuation you must evaluate `z`. + // To actually run the computation you must evaluate `z`. // Under the hood, mlx records operations in a graph. // The variable `z` is a node in the graph which points to its operation // and inputs. When `eval` is called on an array (or arrays), the array and diff --git a/examples/extensions/axpby/axpby.cpp b/examples/extensions/axpby/axpby.cpp index 56a09f34e..6da2ff591 100644 --- a/examples/extensions/axpby/axpby.cpp +++ b/examples/extensions/axpby/axpby.cpp @@ -26,7 +26,7 @@ namespace mlx::core { /////////////////////////////////////////////////////////////////////////////// /** - * Scale and sum two vectors elementwise + * Scale and sum two vectors element-wise * z = alpha * x + beta * y * * Follow numpy style broadcasting between x and y @@ -91,21 +91,21 @@ void axpby_impl( T alpha = static_cast(alpha_); T beta = static_cast(beta_); - // Do the elementwise operation for each output + // Do the element-wise operation for each output for (size_t out_idx = 0; out_idx < out.size(); out_idx++) { // Map linear indices to offsets in x and y auto x_offset = elem_to_loc(out_idx, x.shape(), x.strides()); auto y_offset = elem_to_loc(out_idx, y.shape(), y.strides()); // We allocate the output to be contiguous and regularly strided - // (defaults to row major) and hence it doesn't need additonal mapping + // (defaults to row major) and hence it doesn't need additional mapping out_ptr[out_idx] = alpha * x_ptr[x_offset] + beta * y_ptr[y_offset]; } } /** Fall back implementation for evaluation on CPU */ void Axpby::eval(const std::vector& inputs, array& out) { - // Check the inputs (registered in the op while contructing the out array) + // Check the inputs (registered in the op while constructing the out array) assert(inputs.size() == 2); auto& x = inputs[0]; auto& y = inputs[1]; @@ -192,7 +192,7 @@ void Axpby::eval_cpu(const std::vector& inputs, array& out) { eval(inputs, out); } -#else // Accelerate not avaliable +#else // Accelerate not available /** Evaluate primitive on CPU falling back to common backend */ void Axpby::eval_cpu(const std::vector& inputs, array& out) { @@ -254,7 +254,7 @@ void Axpby::eval_gpu(const std::vector& inputs, array& out) { compute_encoder->setComputePipelineState(kernel); // Kernel parameters are registered with buffer indices corresponding to - // those in the kernel decelaration at axpby.metal + // those in the kernel declaration at axpby.metal int ndim = out.ndim(); size_t nelem = out.size(); @@ -287,7 +287,7 @@ void Axpby::eval_gpu(const std::vector& inputs, array& out) { // Fix the 3D size of the launch grid (in terms of threads) MTL::Size grid_dims = MTL::Size(nelem, 1, 1); - // Launch the grid with the given number of threads divded among + // Launch the grid with the given number of threads divided among // the given threadgroups compute_encoder->dispatchThreads(grid_dims, group_dims); } @@ -311,8 +311,8 @@ array Axpby::jvp( const std::vector& tangents, const std::vector& argnums) { // Forward mode diff that pushes along the tangents - // The jvp transform on the the primitive can built with ops - // that are scheduled on the same stream as the primtive + // The jvp transform on the primitive can built with ops + // that are scheduled on the same stream as the primitive // If argnums = {0}, we only push along x in which case the // jvp is just the tangent scaled by alpha @@ -345,7 +345,7 @@ std::vector Axpby::vjp( return vjps; } -/** Vectorize primitve along given axis */ +/** Vectorize primitive along given axis */ std::pair Axpby::vmap( const std::vector& inputs, const std::vector& axes) { diff --git a/examples/extensions/axpby/axpby.h b/examples/extensions/axpby/axpby.h index 9ff6af0b1..2b85dadb2 100644 --- a/examples/extensions/axpby/axpby.h +++ b/examples/extensions/axpby/axpby.h @@ -12,7 +12,7 @@ namespace mlx::core { /////////////////////////////////////////////////////////////////////////////// /** - * Scale and sum two vectors elementwise + * Scale and sum two vectors element-wise * z = alpha * x + beta * y * * Follow numpy style broadcasting between x and y @@ -39,7 +39,7 @@ class Axpby : public Primitive { * A primitive must know how to evaluate itself on the CPU/GPU * for the given inputs and populate the output array. * - * To avoid unecessary allocations, the evaluation function + * To avoid unnecessary allocations, the evaluation function * is responsible for allocating space for the array. */ void eval_cpu(const std::vector& inputs, array& out) override; diff --git a/examples/extensions/axpby/axpby.metal b/examples/extensions/axpby/axpby.metal index 64980578f..03b373c99 100644 --- a/examples/extensions/axpby/axpby.metal +++ b/examples/extensions/axpby/axpby.metal @@ -59,5 +59,5 @@ template instantiate_axpby(float32, float); instantiate_axpby(float16, half); -instantiate_axpby(bflot16, bfloat16_t); +instantiate_axpby(bfloat16, bfloat16_t); instantiate_axpby(complex64, complex64_t); \ No newline at end of file diff --git a/examples/extensions/bindings.cpp b/examples/extensions/bindings.cpp index 661ddcbaf..d05e6b636 100644 --- a/examples/extensions/bindings.cpp +++ b/examples/extensions/bindings.cpp @@ -23,7 +23,7 @@ PYBIND11_MODULE(mlx_sample_extensions, m) { py::kw_only(), "stream"_a = py::none(), R"pbdoc( - Scale and sum two vectors elementwise + Scale and sum two vectors element-wise ``z = alpha * x + beta * y`` Follows numpy style broadcasting between ``x`` and ``y`` diff --git a/mlx/allocator.h b/mlx/allocator.h index 2c3adadf4..ce0c1cd00 100644 --- a/mlx/allocator.h +++ b/mlx/allocator.h @@ -37,7 +37,7 @@ void free(Buffer buffer); Buffer malloc_or_wait(size_t size); class Allocator { - /** Abstract base clase for a memory allocator. */ + /** Abstract base class for a memory allocator. */ public: virtual Buffer malloc(size_t size) = 0; virtual void free(Buffer buffer) = 0; diff --git a/mlx/array.cpp b/mlx/array.cpp index 0a7b52a94..a70cb43a0 100644 --- a/mlx/array.cpp +++ b/mlx/array.cpp @@ -129,7 +129,7 @@ array::ArrayDesc::ArrayDesc( } // Needed because the Primitive type used in array.h is incomplete and the -// compiler needs to see the call to the desctructor after the type is complete. +// compiler needs to see the call to the destructor after the type is complete. array::ArrayDesc::~ArrayDesc() = default; array::ArrayIterator::reference array::ArrayIterator::operator*() const { diff --git a/mlx/backend/common/load.cpp b/mlx/backend/common/load.cpp index 6cf8ffe53..91f4cee62 100644 --- a/mlx/backend/common/load.cpp +++ b/mlx/backend/common/load.cpp @@ -13,7 +13,7 @@ namespace mlx::core { namespace { template -void swap_endianess(uint8_t* data_bytes, size_t N) { +void swap_endianness(uint8_t* data_bytes, size_t N) { struct Elem { uint8_t bytes[scalar_size]; }; @@ -39,13 +39,13 @@ void Load::eval(const std::vector& inputs, array& out) { if (swap_endianness_) { switch (out.itemsize()) { case 2: - swap_endianess<2>(out.data(), out.data_size()); + swap_endianness<2>(out.data(), out.data_size()); break; case 4: - swap_endianess<4>(out.data(), out.data_size()); + swap_endianness<4>(out.data(), out.data_size()); break; case 8: - swap_endianess<8>(out.data(), out.data_size()); + swap_endianness<8>(out.data(), out.data_size()); break; } } diff --git a/mlx/backend/metal/allocator.cpp b/mlx/backend/metal/allocator.cpp index a55690947..af4dd2e36 100644 --- a/mlx/backend/metal/allocator.cpp +++ b/mlx/backend/metal/allocator.cpp @@ -165,7 +165,7 @@ Buffer MetalAllocator::malloc(size_t size) { // Prepare to allocate new memory as needed if (!buf) { - // If we are under very high memoory pressure, we don't allocate further + // If we are under very high memory pressure, we don't allocate further if (device_->currentAllocatedSize() >= block_limit_) { return Buffer{nullptr}; } diff --git a/mlx/backend/metal/conv.cpp b/mlx/backend/metal/conv.cpp index e25599caf..3377939ba 100644 --- a/mlx/backend/metal/conv.cpp +++ b/mlx/backend/metal/conv.cpp @@ -68,7 +68,7 @@ void explicit_gemm_conv_1D_gpu( array in_strided(strided_reshape, in_strided_view.dtype(), nullptr, {}); copy_gpu(in_strided_view, in_strided, CopyType::General, s); - // Peform gemm + // Perform gemm std::vector copies = {in_padded, in_strided}; mlx_matmul( s, @@ -260,7 +260,7 @@ void explicit_gemm_conv_2D_gpu( array in_strided(strided_reshape, in_strided_view.dtype(), nullptr, {}); copy_gpu(in_strided_view, in_strided, CopyType::General, s); - // Peform gemm + // Perform gemm std::vector copies = {in_padded, in_strided}; mlx_matmul( s, diff --git a/mlx/backend/metal/indexing.cpp b/mlx/backend/metal/indexing.cpp index eb9a8efb6..1f905db1a 100644 --- a/mlx/backend/metal/indexing.cpp +++ b/mlx/backend/metal/indexing.cpp @@ -102,7 +102,7 @@ void Gather::eval_gpu(const std::vector& inputs, array& out) { static_cast(idx_strides_buf.raw_ptr()) + i * idx_ndim); } - // Allocate the argument bufer + // Allocate the argument buffer auto arg_buf = allocator::malloc_or_wait(arg_enc->encodedLength()); // Register data with the encoder @@ -246,7 +246,7 @@ void Scatter::eval_gpu(const std::vector& inputs, array& out) { static_cast(idx_strides_buf.raw_ptr()) + i * idx_ndim); } - // Allocate the argument bufer + // Allocate the argument buffer auto arg_buf = allocator::malloc_or_wait(arg_enc->encodedLength()); // Register data with the encoder diff --git a/mlx/backend/metal/kernels/arg_reduce.metal b/mlx/backend/metal/kernels/arg_reduce.metal index 31bcbfa05..467e768d6 100644 --- a/mlx/backend/metal/kernels/arg_reduce.metal +++ b/mlx/backend/metal/kernels/arg_reduce.metal @@ -114,7 +114,7 @@ template // 4. Reduce among them and go to 3 // 4. Reduce in each simd_group // 6. Write in the thread local memory - // 6. Reduce them accross thread group + // 6. Reduce them across thread group // 7. Write the output without need for atomic Op op; diff --git a/mlx/backend/metal/kernels/complex.h b/mlx/backend/metal/kernels/complex.h index c9fedb797..ac966a293 100644 --- a/mlx/backend/metal/kernels/complex.h +++ b/mlx/backend/metal/kernels/complex.h @@ -45,7 +45,7 @@ struct complex64_t { typename = typename enable_if>::type> constexpr complex64_t(T x) constant : real(x), imag(0) {} - // Converstions from complex64_t + // Conversions from complex64_t template < typename T, typename = typename enable_if>::type> diff --git a/mlx/backend/metal/kernels/gemm/conv.h b/mlx/backend/metal/kernels/gemm/conv.h index 2c4a7074a..1db3ebac8 100644 --- a/mlx/backend/metal/kernels/gemm/conv.h +++ b/mlx/backend/metal/kernels/gemm/conv.h @@ -105,7 +105,7 @@ struct Conv2DInputBlockLoader { } } - // Zero pad otherwize + // Zero pad otherwise else { #pragma clang loop unroll(full) for (short j = 0; j < vec_size; ++j) { @@ -334,7 +334,7 @@ struct Conv2DBlockMMA { } simdgroup_barrier(mem_flags::mem_none); -// Multiply and accumulate into resulr simdgroup matrices +// Multiply and accumulate into result simdgroup matrices #pragma clang loop unroll(full) for (short i = 0; i < TM; i++) { #pragma clang loop unroll(full) diff --git a/mlx/backend/metal/kernels/gemm/gemm.h b/mlx/backend/metal/kernels/gemm/gemm.h index f551947dd..95d2e6497 100644 --- a/mlx/backend/metal/kernels/gemm/gemm.h +++ b/mlx/backend/metal/kernels/gemm/gemm.h @@ -93,13 +93,13 @@ struct BlockLoader { tmp_idx[j] = bj + j < src_tile_dim.x ? j : 0; } - // Read all valid indcies into tmp_val + // Read all valid indices into tmp_val #pragma clang loop unroll(full) for (short j = 0; j < vec_size; j++) { tmp_val[j] = src[i * src_ld + tmp_idx[j]]; } - // Zero out uneeded values + // Zero out unneeded values #pragma clang loop unroll(full) for (short j = 0; j < vec_size; j++) { tmp_val[j] = bj + j < src_tile_dim.x ? tmp_val[j] : T(0); @@ -241,7 +241,7 @@ struct BlockMMA { } simdgroup_barrier(mem_flags::mem_none); -// Multiply and accumulate into resulr simdgroup matrices +// Multiply and accumulate into result simdgroup matrices #pragma clang loop unroll(full) for (short i = 0; i < TM; i++) { #pragma clang loop unroll(full) diff --git a/mlx/backend/metal/kernels/gemv.metal b/mlx/backend/metal/kernels/gemv.metal index 3b4c0a30a..d85d72d9e 100644 --- a/mlx/backend/metal/kernels/gemv.metal +++ b/mlx/backend/metal/kernels/gemv.metal @@ -28,7 +28,7 @@ struct GEMVKernel { static_assert(BN == SIMD_SIZE, "gemv block must have a width of SIMD_SIZE"); // - The matrix of size (M = out_vec_size, N = in_vec_size) is divided up - // into blocks of (BM * TM, BN * TN) divided amoung threadgroups + // into blocks of (BM * TM, BN * TN) divided among threadgroups // - Every thread works on a block of (TM, TN) // - We assume each thead group is launched with (BN, BM, 1) threads // @@ -42,7 +42,7 @@ struct GEMVKernel { // Edge case handling: // - The threadgroup with the largest tid will have blocks that exceed the matrix // * The blocks that start outside the matrix are never read (thread results remain zero) - // * The last thread that partialy overlaps with the matrix is shifted inwards + // * The last thread that partially overlaps with the matrix is shifted inwards // such that the thread block fits exactly in the matrix MLX_MTL_CONST short tgp_mem_size = BN * TN * 2; @@ -166,7 +166,7 @@ template < struct GEMVTKernel { // - The matrix of size (M = in_vec_size, N = out_vec_size) is divided up - // into blocks of (BM * TM, BN * TN) divided amoung threadgroups + // into blocks of (BM * TM, BN * TN) divided among threadgroups // - Every thread works on a block of (TM, TN) // - We assume each thead group is launched with (BN, BM, 1) threads // @@ -180,7 +180,7 @@ struct GEMVTKernel { // Edge case handling: // - The threadgroup with the largest tid will have blocks that exceed the matrix // * The blocks that start outside the matrix are never read (thread results remain zero) - // * The last thread that partialy overlaps with the matrix is shifted inwards + // * The last thread that partially overlaps with the matrix is shifted inwards // such that the thread block fits exactly in the matrix diff --git a/mlx/backend/metal/kernels/reduce.metal b/mlx/backend/metal/kernels/reduce.metal index 85ff41f44..4182184c2 100644 --- a/mlx/backend/metal/kernels/reduce.metal +++ b/mlx/backend/metal/kernels/reduce.metal @@ -65,7 +65,7 @@ template in += grid_size * N_READS; } - // Sepate case for the last set as we close the reduction size + // Separate case for the last set as we close the reduction size size_t curr_idx = (gid + r * (size_t)grid_size) * N_READS; if (curr_idx < in_size) { int max_reads = in_size - curr_idx; diff --git a/mlx/backend/metal/kernels/sort.metal b/mlx/backend/metal/kernels/sort.metal index 3aa54de3e..50b1cfbb6 100644 --- a/mlx/backend/metal/kernels/sort.metal +++ b/mlx/backend/metal/kernels/sort.metal @@ -592,7 +592,7 @@ template < bool ARG_SORT, short BLOCK_THREADS, short N_PER_THREAD> -[[kernel, max_total_threads_per_threadgroup(BLOCK_THREADS)]] void mb_block_partiton( +[[kernel, max_total_threads_per_threadgroup(BLOCK_THREADS)]] void mb_block_partition( device idx_t* block_partitions [[buffer(0)]], const device val_t* dev_vals [[buffer(1)]], const device idx_t* dev_idxs [[buffer(2)]], @@ -777,8 +777,8 @@ template < const device size_t* nc_strides [[buffer(7)]], \ uint3 tid [[threadgroup_position_in_grid]], \ uint3 lid [[thread_position_in_threadgroup]]); \ - template [[host_name("mb_block_partiton_" #vtname "_" #itname "_bn" #bn "_tn" #tn)]] \ - [[kernel]] void mb_block_partiton( \ + template [[host_name("mb_block_partition_" #vtname "_" #itname "_bn" #bn "_tn" #tn)]] \ + [[kernel]] void mb_block_partition( \ device itype* block_partitions [[buffer(0)]], \ const device vtype* dev_vals [[buffer(1)]], \ const device itype* dev_idxs [[buffer(2)]], \ diff --git a/mlx/backend/metal/matmul.cpp b/mlx/backend/metal/matmul.cpp index 864181da9..0bce599d3 100644 --- a/mlx/backend/metal/matmul.cpp +++ b/mlx/backend/metal/matmul.cpp @@ -61,7 +61,7 @@ inline void mps_matmul( // 2. Only one of a or b has batch_size_out matrices worth of data and // the other has matrix worth of data - // The matrix dimsenisons of a and b are sure to be regularly strided + // The matrix dimensions of a and b are sure to be regularly strided if (batch_size_out > 1) { // No broadcasting defaults auto batch_size_a = a.data_size() / (M * K); diff --git a/mlx/backend/metal/reduce.cpp b/mlx/backend/metal/reduce.cpp index 6a2ce084b..9da5c79bf 100644 --- a/mlx/backend/metal/reduce.cpp +++ b/mlx/backend/metal/reduce.cpp @@ -40,7 +40,7 @@ void all_reduce_dispatch( // Set grid dimensions // We make sure each thread has enough to do by making it read in - // atleast n_reads inputs + // at least n_reads inputs int n_reads = REDUCE_N_READS; // mod_in_size gives us the groups of n_reads needed to go over the entire @@ -176,7 +176,7 @@ void strided_reduce_general_dispatch( // We spread outputs over the x dimension and inputs over the y dimension // Threads with the same lid.x in a given threadgroup work on the same - // output and each thread in the y dimension accumlates for that output + // output and each thread in the y dimension accumulates for that output uint threadgroup_dim_x = std::min(out_size, 128ul); uint threadgroup_dim_y = kernel->maxTotalThreadsPerThreadgroup() / threadgroup_dim_x; diff --git a/mlx/backend/metal/sort.cpp b/mlx/backend/metal/sort.cpp index befbf2d81..9eb9960e0 100644 --- a/mlx/backend/metal/sort.cpp +++ b/mlx/backend/metal/sort.cpp @@ -165,10 +165,10 @@ void multi_block_sort( dev_idxs_out = ping ? dev_idxs_0 : dev_idxs_1; ping = !ping; - // Do partiton + // Do partition { std::ostringstream kname; - kname << "mb_block_partiton_" << type_to_name(dev_vals_in) << "_" + kname << "mb_block_partition_" << type_to_name(dev_vals_in) << "_" << type_to_name(dev_idxs_in) << "_bn" << bn << "_tn" << tn; auto kernel = d.get_kernel(kname.str()); diff --git a/mlx/backend/metal/utils.h b/mlx/backend/metal/utils.h index 6fa08e42a..378850802 100644 --- a/mlx/backend/metal/utils.h +++ b/mlx/backend/metal/utils.h @@ -18,7 +18,7 @@ void set_array_buffer( auto offset = a.data() - static_cast(const_cast(a_buf)->contents()); enc->setBuffer(a_buf, offset, idx); - // MTL::Resource usage through argument buffer needs to be explicity + // MTL::Resource usage through argument buffer needs to be explicitly // flagged to enable hazard tracking compute_encoder->useResource(a_buf, MTL::ResourceUsageRead); } diff --git a/mlx/fft.cpp b/mlx/fft.cpp index 6cb33048d..96d0424ab 100644 --- a/mlx/fft.cpp +++ b/mlx/fft.cpp @@ -45,7 +45,7 @@ array fft_impl( throw std::invalid_argument(msg.str()); } - // In the following shape manipulations there are three cases to consdier: + // In the following shape manipulations there are three cases to consider: // 1. In a complex to complex transform (fftn / ifftn) the output // and input shapes are the same. // 2. In a real to complex transform (rfftn) n specifies the input dims diff --git a/mlx/io/load.cpp b/mlx/io/load.cpp index 856cf17a2..74e0784f8 100644 --- a/mlx/io/load.cpp +++ b/mlx/io/load.cpp @@ -155,7 +155,7 @@ array load(std::shared_ptr in_stream, StreamOrDevice s) { // Read and check version if (read_magic_and_ver[6] != 1 && read_magic_and_ver[6] != 2) { throw std::runtime_error( - "[load] Unsupport npy format version in " + in_stream->label()); + "[load] Unsupported npy format version in " + in_stream->label()); } // Read header len and header diff --git a/mlx/ops.cpp b/mlx/ops.cpp index f4f6b922d..014707b38 100644 --- a/mlx/ops.cpp +++ b/mlx/ops.cpp @@ -247,7 +247,7 @@ array tri(int n, int m, int k, Dtype type, StreamOrDevice s /* = {} */) { array tril(array x, int k, StreamOrDevice s /* = {} */) { if (x.ndim() < 2) { - throw std::invalid_argument("[tril] array must be atleast 2-D"); + throw std::invalid_argument("[tril] array must be at least 2-D"); } auto mask = tri(x.shape(-2), x.shape(-1), k, x.dtype(), s); return where(mask, x, zeros_like(x, s), s); @@ -255,7 +255,7 @@ array tril(array x, int k, StreamOrDevice s /* = {} */) { array triu(array x, int k, StreamOrDevice s /* = {} */) { if (x.ndim() < 2) { - throw std::invalid_argument("[triu] array must be atleast 2-D"); + throw std::invalid_argument("[triu] array must be at least 2-D"); } auto mask = tri(x.shape(-2), x.shape(-1), k - 1, x.dtype(), s); return where(mask, zeros_like(x, s), x, s); @@ -350,7 +350,7 @@ array squeeze( ax = ax < 0 ? ax + a.ndim() : ax; if (ax < 0 || ax >= a.ndim()) { std::ostringstream msg; - msg << "[squeeze] Invalid axies " << ax << " for array with " << a.ndim() + msg << "[squeeze] Invalid axes " << ax << " for array with " << a.ndim() << " dimensions."; throw std::invalid_argument(msg.str()); } @@ -405,7 +405,7 @@ array expand_dims( ax = ax < 0 ? ax + out_ndim : ax; if (ax < 0 || ax >= out_ndim) { std::ostringstream msg; - msg << "[squeeze] Invalid axies " << ax << " for output array with " + msg << "[squeeze] Invalid axes " << ax << " for output array with " << a.ndim() << " dimensions."; throw std::invalid_argument(msg.str()); } @@ -478,7 +478,7 @@ array slice( // If strides are negative, slice and then make a copy with axes flipped if (negatively_strided_axes.size() > 0) { - // First, take the slice of the positvely strided axes + // First, take the slice of the positively strided axes auto out = array( out_shape, a.dtype(), @@ -517,7 +517,7 @@ array slice( // Gather moves the axis up, remainder needs to be squeezed out_reshape[i] = indices[i].size(); - // Gather moves the axis up, needs to be tranposed + // Gather moves the axis up, needs to be transposed t_axes[ax] = i; } diff --git a/mlx/ops.h b/mlx/ops.h index a99465f3e..c888c80cd 100644 --- a/mlx/ops.h +++ b/mlx/ops.h @@ -214,7 +214,7 @@ array concatenate(const std::vector& arrays, StreamOrDevice s = {}); array stack(const std::vector& arrays, int axis, StreamOrDevice s = {}); array stack(const std::vector& arrays, StreamOrDevice s = {}); -/** Repeate an array along an axis. */ +/** Repeat an array along an axis. */ array repeat(const array& arr, int repeats, int axis, StreamOrDevice s = {}); array repeat(const array& arr, int repeats, StreamOrDevice s = {}); diff --git a/mlx/primitives.h b/mlx/primitives.h index 747b26c10..deeb498fa 100644 --- a/mlx/primitives.h +++ b/mlx/primitives.h @@ -49,7 +49,7 @@ class Primitive { * A primitive must know how to evaluate itself on * the CPU/GPU for the given inputs and populate the output array. * - * To avoid unecessary allocations, the evaluation function + * To avoid unnecessary allocations, the evaluation function * is responsible for allocating space for the array. */ virtual void eval_cpu(const std::vector& inputs, array& out) = 0; @@ -84,7 +84,7 @@ class Primitive { /** Print the primitive. */ virtual void print(std::ostream& os) = 0; - /** Equivalence check defaults to false unless overriden by the primitive */ + /** Equivalence check defaults to false unless overridden by the primitive */ virtual bool is_equivalent(const Primitive& other) const { return false; } diff --git a/mlx/random.cpp b/mlx/random.cpp index ef11f8c65..63e39cdcc 100644 --- a/mlx/random.cpp +++ b/mlx/random.cpp @@ -232,7 +232,7 @@ array truncated_normal( auto u = uniform(a, b, shape, dtype, key, s); auto out = multiply(sqrt2, erfinv(u, s), s); - // Clip in bouds + // Clip in bounds return maximum(minimum(upper_t, out, s), lower_t, s); } diff --git a/mlx/random.h b/mlx/random.h index e684464bc..360bdbdb1 100644 --- a/mlx/random.h +++ b/mlx/random.h @@ -16,7 +16,7 @@ class KeySequence { void seed(uint64_t seed); array next(); - // static defualt + // static default static KeySequence& default_() { static KeySequence ks(0); return ks; diff --git a/mlx/transforms.h b/mlx/transforms.h index ff47638bf..caf648163 100644 --- a/mlx/transforms.h +++ b/mlx/transforms.h @@ -80,7 +80,7 @@ ValueAndGradFn value_and_grad( /** * Returns a function which computes the value and gradient of the input - * function with repsect to a single input array. + * function with respect to a single input array. **/ ValueAndGradFn inline value_and_grad( const std::function(const std::vector&)>& fun, @@ -132,7 +132,7 @@ std::function(const std::vector&)> inline grad( /** * Returns a function which computes the gradient of the input function with - * repsect to a single input array. + * respect to a single input array. * * The function being differentiated takes a vector of arrays and returns an * array. The optional `argnum` index specifies which the argument to compute diff --git a/mlx/types/fp16.h b/mlx/types/fp16.h index 58e1bc088..c174afebc 100644 --- a/mlx/types/fp16.h +++ b/mlx/types/fp16.h @@ -68,7 +68,7 @@ struct _MLX_Float16 { inf_scale.u = uint32_t(0x77800000); zero_scale.u = uint32_t(0x08800000); - // Combine with magic and let addition do rouding + // Combine with magic and let addition do rounding magic_bits.u = x_expo_32; magic_bits.f += (std::abs(x) * inf_scale.f) * zero_scale.f; diff --git a/python/mlx/nn/layers/normalization.py b/python/mlx/nn/layers/normalization.py index d5e1a1c6e..9c77667e7 100644 --- a/python/mlx/nn/layers/normalization.py +++ b/python/mlx/nn/layers/normalization.py @@ -198,7 +198,7 @@ class BatchNorm(Module): batch, ``C`` is the number of features or channels, and ``L`` is the sequence length. The output has the same shape as the input. For four-dimensional arrays, the shape is ``NHWC``, where ``H`` and ``W`` are - the height and width respecitvely. + the height and width respectively. For more information on Batch Normalization, see the original paper `Batch Normalization: Accelerating Deep Network Training by Reducing Internal diff --git a/python/mlx/optimizers.py b/python/mlx/optimizers.py index 17a16c459..601d87b03 100644 --- a/python/mlx/optimizers.py +++ b/python/mlx/optimizers.py @@ -253,7 +253,7 @@ class AdaDelta(Optimizer): rho (float, optional): The coefficient :math:`\rho` used for computing a running average of squared gradients. Default: ``0.9`` eps (float, optional): The term :math:`\epsilon` added to the denominator to improve - numerical stability. Ddefault: `1e-8` + numerical stability. Default: `1e-8` """ def __init__(self, learning_rate: float, rho: float = 0.9, eps: float = 1e-6): diff --git a/python/src/array.cpp b/python/src/array.cpp index f8a1a27cd..1c6f724f4 100644 --- a/python/src/array.cpp +++ b/python/src/array.cpp @@ -507,7 +507,7 @@ void init_array(py::module_& m) { array_class .def_property_readonly( - "size", &array::size, R"pbdoc(Number of elments in the array.)pbdoc") + "size", &array::size, R"pbdoc(Number of elements in the array.)pbdoc") .def_property_readonly( "ndim", &array::ndim, R"pbdoc(The array's dimension.)pbdoc") .def_property_readonly( @@ -559,7 +559,7 @@ void init_array(py::module_& m) { If the array has more than one dimension then the result is a nested list of lists. - The value type of the list correpsonding to the last dimension is either + The value type of the list corresponding to the last dimension is either ``bool``, ``int`` or ``float`` depending on the ``dtype`` of the array. )pbdoc") .def("__array__", &mlx_array_to_np) diff --git a/python/src/ops.cpp b/python/src/ops.cpp index 8da45e0eb..1f60c6444 100644 --- a/python/src/ops.cpp +++ b/python/src/ops.cpp @@ -1263,7 +1263,7 @@ void init_ops(py::module_& m) { If the axis is not specified the array is treated as a flattened 1-D array prior to performing the take. - As an example, if the ``axis=1`` this is equialent to ``a[:, indices, ...]``. + As an example, if the ``axis=1`` this is equivalent to ``a[:, indices, ...]``. Args: a (array): Input array. @@ -1742,7 +1742,7 @@ void init_ops(py::module_& m) { "a"_a, py::pos_only(), "source"_a, - "destiantion"_a, + "destination"_a, py::kw_only(), "stream"_a = none, R"pbdoc( @@ -2253,7 +2253,7 @@ void init_ops(py::module_& m) { will be of elements less or equal to the element at the ``kth`` index and all indices after will be of elements greater or equal to the element at the ``kth`` index. - axis (int or None, optional): Optional axis to partiton over. + axis (int or None, optional): Optional axis to partition over. If ``None``, this partitions over the flattened array. If unspecified, it defaults to ``-1``. @@ -2426,13 +2426,13 @@ void init_ops(py::module_& m) { R"pbdoc( repeat(array: array, repeats: int, axis: Optional[int] = None, *, stream: Union[None, Stream, Device] = None) -> array - Repeate an array along a specified axis. + Repeat an array along a specified axis. Args: array (array): Input array. repeats (int): The number of repetitions for each element. axis (int, optional): The axis in which to repeat the array along. If - unspecified it uses the flattened array of the input and repeates + unspecified it uses the flattened array of the input and repeats along axis 0. stream (Stream, optional): Stream or device. Defaults to ``None``. @@ -3050,7 +3050,7 @@ void init_ops(py::module_& m) { Round to the given number of decimals. - Bascially performs: + Basically performs: .. code-block:: python diff --git a/python/src/random.cpp b/python/src/random.cpp index 943370db5..f648a2714 100644 --- a/python/src/random.cpp +++ b/python/src/random.cpp @@ -212,7 +212,7 @@ void init_random(py::module_& parent_module) { upper (scalar or array): Upper bound of the domain. shape (list(int), optional): The shape of the output. Default is ``()``. - dtype (Dtype, optinoal): The data type of the output. + dtype (Dtype, optional): The data type of the output. Default is ``float32``. key (array, optional): A PRNG key. Default: None. diff --git a/python/tests/test_array.py b/python/tests/test_array.py index b6471cdbd..9016f3ae4 100644 --- a/python/tests/test_array.py +++ b/python/tests/test_array.py @@ -952,7 +952,7 @@ class TestArray(mlx_tests.MLXTestCase): b_mx = a_mx[25:-50:-3] self.assertTrue(np.array_equal(b_np, b_mx)) - # Negatie slice and ascending bounds + # Negative slice and ascending bounds b_np = a_np[0:20:-3] b_mx = a_mx[0:20:-3] self.assertTrue(np.array_equal(b_np, b_mx)) diff --git a/python/tests/test_blas.py b/python/tests/test_blas.py index b2a762681..8a7d632fa 100644 --- a/python/tests/test_blas.py +++ b/python/tests/test_blas.py @@ -53,10 +53,10 @@ class TestBlas(mlx_tests.MLXTestCase): for dtype in self.dtypes: np_dtype = getattr(np, dtype) base_shapes = [4, 8, 16, 32, 64, 128] - pertubations = [-2, -1, 0, 1, 2] + perturbations = [-2, -1, 0, 1, 2] for dim in base_shapes: - for p in pertubations: + for p in perturbations: shape_a = (dim + p, dim + p) shape_b = (dim + p, dim + p) self.__gemm_test(shape_a, shape_b, np_dtype) @@ -81,12 +81,12 @@ class TestBlas(mlx_tests.MLXTestCase): for B, M, N, K in shapes: - with self.subTest(tranpose="nn"): + with self.subTest(transpose="nn"): shape_a = (B, M, K) shape_b = (B, K, N) self.__gemm_test(shape_a, shape_b, np_dtype) - with self.subTest(tranpose="nt"): + with self.subTest(transpose="nt"): shape_a = (B, M, K) shape_b = (B, N, K) self.__gemm_test( @@ -97,7 +97,7 @@ class TestBlas(mlx_tests.MLXTestCase): f_mx_b=lambda x: mx.transpose(x, (0, 2, 1)), ) - with self.subTest(tranpose="tn"): + with self.subTest(transpose="tn"): shape_a = (B, K, M) shape_b = (B, K, N) self.__gemm_test( @@ -108,7 +108,7 @@ class TestBlas(mlx_tests.MLXTestCase): f_mx_a=lambda x: mx.transpose(x, (0, 2, 1)), ) - with self.subTest(tranpose="tt"): + with self.subTest(transpose="tt"): shape_a = (B, K, M) shape_b = (B, N, K) self.__gemm_test( @@ -191,7 +191,7 @@ class TestBlas(mlx_tests.MLXTestCase): self.assertListEqual(list(c_npy.shape), list(c_mlx.shape)) self.assertTrue(np.allclose(c_mlx, c_npy, atol=1e-6)) - # Batched matmul with simple broadast + # Batched matmul with simple broadcast a_npy = np.random.normal(0.0, 1.0 / 128, (32, 128, 16)).astype(np.float32) b_npy = np.random.normal(0.0, 1.0 / 128, (16, 16)).astype(np.float32) c_npy = a_npy @ b_npy @@ -213,7 +213,7 @@ class TestBlas(mlx_tests.MLXTestCase): self.assertListEqual(list(e_npy.shape), list(e_mlx.shape)) self.assertTrue(np.allclose(e_mlx, e_npy, atol=1e-6)) - # Batched and transposed matmul with simple broadast + # Batched and transposed matmul with simple broadcast a_npy = np.random.normal(0.0, 1.0 / 128, (32, 128, 16)).astype(np.float32) b_npy = np.random.normal(0.0, 1.0 / 128, (128, 16)).astype(np.float32) a_mlx = mx.array(a_npy) diff --git a/python/tests/test_ops.py b/python/tests/test_ops.py index 89edfdefa..782249b56 100644 --- a/python/tests/test_ops.py +++ b/python/tests/test_ops.py @@ -88,7 +88,7 @@ class TestOps(mlx_tests.MLXTestCase): self.assertEqual(a.dtype, mx.float32) self.assertEqual(a.item(), 3.0) - # Check comibinations with mlx arrays + # Check combinations with mlx arrays a = mx.add(mx.array(True), False) self.assertEqual(a.dtype, mx.bool_) self.assertEqual(a.item(), True) diff --git a/tests/arg_reduce_tests.cpp b/tests/arg_reduce_tests.cpp index 7e3750e7b..b571c8f61 100644 --- a/tests/arg_reduce_tests.cpp +++ b/tests/arg_reduce_tests.cpp @@ -76,7 +76,7 @@ TEST_CASE("test arg reduce small") { {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); if (!metal::is_available()) { - INFO("Skiping arg reduction gpu tests"); + INFO("Skipping arg reduction gpu tests"); return; } @@ -106,7 +106,7 @@ TEST_CASE("test arg reduce small") { TEST_CASE("test arg reduce against cpu") { if (!metal::is_available()) { - INFO("Skiping arg reduction gpu tests"); + INFO("Skipping arg reduction gpu tests"); return; } @@ -148,7 +148,7 @@ void test_arg_reduce_small_bool( TEST_CASE("test arg reduce bool") { if (!metal::is_available()) { - INFO("Skiping arg reduction gpu tests"); + INFO("Skipping arg reduction gpu tests"); return; } auto x = array( @@ -201,7 +201,7 @@ TEST_CASE("test arg reduce irregular strides") { Device::cpu, x, ArgReduce::ArgMin, {4, 2}, 2, {0, 0, 1, 1, 1, 1, 2, 2}); if (!metal::is_available()) { - INFO("Skiping arg reduction gpu tests"); + INFO("Skipping arg reduction gpu tests"); return; } } diff --git a/tests/autograd_tests.cpp b/tests/autograd_tests.cpp index 85dad9160..a7b7e7fca 100644 --- a/tests/autograd_tests.cpp +++ b/tests/autograd_tests.cpp @@ -989,7 +989,7 @@ TEST_CASE("test as_strided grads") { } TEST_CASE("test jvp from vjp") { - // Unary elementwise ops + // Unary element-wise ops { auto x = random::uniform({5, 10}); eval(x); @@ -1022,7 +1022,7 @@ TEST_CASE("test jvp from vjp") { CHECK(compute_derivs(mlx::core::rsqrt)); } - // Binary elementwise ops + // Binary element-wise ops { auto x = random::uniform({5, 10}); auto y = random::uniform({5, 10}); diff --git a/tests/creations_tests.cpp b/tests/creations_tests.cpp index edb40a9fe..ea28638af 100644 --- a/tests/creations_tests.cpp +++ b/tests/creations_tests.cpp @@ -7,7 +7,7 @@ using namespace mlx::core; TEST_CASE("test arange") { - // Check type is inferred correclty + // Check type is inferred correctly { auto x = arange(10); CHECK_EQ(x.dtype(), int32); diff --git a/tests/ops_tests.cpp b/tests/ops_tests.cpp index 2c6c8e8ef..f6443bc7e 100644 --- a/tests/ops_tests.cpp +++ b/tests/ops_tests.cpp @@ -1411,7 +1411,7 @@ TEST_CASE("test broadcast") { x.eval(); CHECK_EQ(x.strides(), std::vector{0, 0, 1}); - // Broadcast on transposed arrray works + // Broadcast on transposed array works x = array({0, 1, 2, 3, 4, 5}, {2, 3}); x = broadcast_to(transpose(x), {2, 3, 2}); CHECK_EQ(x.shape(), std::vector{2, 3, 2}); @@ -1733,7 +1733,7 @@ TEST_CASE("test scatter") { out = scatter(in, inds, updates, 0); CHECK(array_equal(out, reshape(arange(16, float32), {4, 4})).item()); - // Irregular strided index and reduce collison test + // Irregular strided index and reduce collision test in = zeros({10}, float32); inds = broadcast_to(array(3), {10}); updates = ones({10, 1}, float32); @@ -1750,7 +1750,7 @@ TEST_CASE("test scatter") { out = scatter_max(array(1), {}, array(2), std::vector{}); CHECK_EQ(out.item(), 2); - // Irregularaly strided updates test + // Irregularly strided updates test in = ones({3, 3}); updates = broadcast_to(array({0, 0, 0}), {1, 3, 3}); inds = array({0});