Update GEMM (#424)

* Organize and collect metal subroutine templates and elements in `metal/kernels/steel/` * Update gemm elements for better performance * Add split-K specialization for gemm * Add `addmm` primitive, op and bindings for fused matmul and bias addition * Update tests and benchmarks as needed
2025-12-14 17:12:49 +08:00 · 2024-01-17 12:42:39 -08:00
parent 556cdf0e06
commit 78102a47ad
30 changed files with 2361 additions and 646 deletions
--- a/python/src/ops.cpp
+++ b/python/src/ops.cpp
@@ -3476,4 +3476,34 @@ void init_ops(py::module_& m) {
      Returns:
        result (array): The tiled array.
    )pbdoc");
+  m.def(
+      "addmm",
+      &addmm,
+      "c"_a,
+      "a"_a,
+      "b"_a,
+      py::pos_only(),
+      "alpha"_a = 1.0f,
+      "beta"_a = 1.0f,
+      py::kw_only(),
+      "stream"_a = none,
+      R"pbdoc(
+        addmm(c: array, a: array, b: array, /, alpha: float = 1.0, beta: float = 1.0,  *, stream: Union[None, Stream, Device] = None) -> array
+
+        Matrix multiplication with addition and optional scaling.
+
+        Perform the (possibly batched) matrix multiplication of two arrays and add to the result
+        with optional scaling factors.
+
+        Args:
+            c (array): Input array or scalar.
+            a (array): Input array or scalar.
+            b (array): Input array or scalar.
+            alpha (float, optional): Scaling factor for the 
+                matrix product of ``a`` and ``b`` (default: ``1``)
+            beta (float, optional): Scaling factor for ``c`` (default: ``1``)
+
+        Returns:
+            array: ``alpha * (a @ b)  + beta * c``
+      )pbdoc");
 }