Extensions (#962)

* start to fix extensions * mostly fixed extensions * fix extension build * couple more nits
2025-10-19 16:48:10 +08:00 · 2024-04-09 08:50:36 -07:00
parent 42afe27e12
commit b63ef10a7f
9 changed files with 292 additions and 309 deletions
--- a/examples/extensions/axpby/axpby.cpp
+++ b/examples/extensions/axpby/axpby.cpp
@@ -1,4 +1,4 @@
-// Copyright © 2023 Apple Inc.
+// Copyright © 2023-2024 Apple Inc.

 #include <cassert>
 #include <iostream>
@@ -43,7 +43,7 @@ array axpby(
  auto promoted_dtype = promote_types(x.dtype(), y.dtype());

  // Upcast to float32 for non-floating point inputs x and y
-  auto out_dtype = is_floating_point(promoted_dtype)
+  auto out_dtype = issubdtype(promoted_dtype, float32)
      ? promoted_dtype
      : promote_types(promoted_dtype, float32);

@@ -106,12 +106,12 @@ void axpby_impl(
 /** Fall back implementation for evaluation on CPU */
 void Axpby::eval(
    const std::vector<array>& inputs,
-    std::vector<array>& out_arr) {
-  auto out = out_arr[0];
+    std::vector<array>& outputs) {
  // Check the inputs (registered in the op while constructing the out array)
  assert(inputs.size() == 2);
  auto& x = inputs[0];
  auto& y = inputs[1];
+  auto& out = outputs[0];

  // Dispatch to the correct dtype
  if (out.dtype() == float32) {
@@ -150,11 +150,7 @@ void axpby_impl_accelerate(
  // The data in the output array is allocated to match the strides in y
  // such that x, y, and out are contiguous in the same mode and
  // no transposition is needed
-  out.set_data(
-      allocator::malloc_or_wait(y.data_size() * out.itemsize()),
-      y.data_size(),
-      y.strides(),
-      y.flags());
+  out.set_data(allocator::malloc_or_wait(out.nbytes()));

  // We then copy over the elements using the contiguous vector specialization
  copy_inplace(y, out, CopyType::Vector);
@@ -180,11 +176,11 @@ void axpby_impl_accelerate(
 /** Evaluate primitive on CPU using accelerate specializations */
 void Axpby::eval_cpu(
    const std::vector<array>& inputs,
-    std::vector<array>& outarr) {
-  auto out = outarr[0];
+    std::vector<array>& outputs) {
  assert(inputs.size() == 2);
  auto& x = inputs[0];
  auto& y = inputs[1];
+  auto& out = outputs[0];

  // Accelerate specialization for contiguous single precision float arrays
  if (out.dtype() == float32 &&
@@ -195,7 +191,7 @@ void Axpby::eval_cpu(
  }

  // Fall back to common backend if specializations are not available
-  eval(inputs, outarr);
+  eval(inputs, outputs);
 }

 #else // Accelerate not available
@@ -203,8 +199,8 @@ void Axpby::eval_cpu(
 /** Evaluate primitive on CPU falling back to common backend */
 void Axpby::eval_cpu(
    const std::vector<array>& inputs,
-    std::vector<array>& out) {
-  eval(inputs, out);
+    const std::vector<array>& outputs) {
+  eval(inputs, outputs);
 }

 #endif
@@ -218,12 +214,12 @@ void Axpby::eval_cpu(
 /** Evaluate primitive on GPU */
 void Axpby::eval_gpu(
    const std::vector<array>& inputs,
-    std::vector<array>& outarr) {
+    std::vector<array>& outputs) {
  // Prepare inputs
-  auto out = outarr[0];
  assert(inputs.size() == 2);
  auto& x = inputs[0];
  auto& y = inputs[1];
+  auto& out = outputs[0];

  // Each primitive carries the stream it should execute on
  // and each stream carries its device identifiers
@@ -372,4 +368,4 @@ bool Axpby::is_equivalent(const Primitive& other) const {
  return alpha_ == r_other.alpha_ && beta_ == r_other.beta_;
 }

-} // namespace mlx::core
+} // namespace mlx::core
--- a/examples/extensions/axpby/axpby.h
+++ b/examples/extensions/axpby/axpby.h
@@ -42,9 +42,9 @@ class Axpby : public Primitive {
   * To avoid unnecessary allocations, the evaluation function
   * is responsible for allocating space for the array.
   */
-  void eval_cpu(const std::vector<array>& inputs, std::vector<array>& out)
+  void eval_cpu(const std::vector<array>& inputs, std::vector<array>& outputs)
      override;
-  void eval_gpu(const std::vector<array>& inputs, std::vector<array>& out)
+  void eval_gpu(const std::vector<array>& inputs, std::vector<array>& outputs)
      override;

  /** The Jacobian-vector product. */
@@ -83,7 +83,7 @@ class Axpby : public Primitive {
  float beta_;

  /** Fall back implementation for evaluation on CPU */
-  void eval(const std::vector<array>& inputs, std::vector<array>& out);
+  void eval(const std::vector<array>& inputs, std::vector<array>& outputs);
 };

-} // namespace mlx::core
+} // namespace mlx::core