Optimizing Complex Matrix Multiplication using Karatsuba’s Algorithm (#2220)

* Implementing Complex Matmul using Karatsuba Algorithm * Implemented Karatsuba's Algorithm for complex matmul and pre-commit them * fix --------- Co-authored-by: Awni Hannun <awni@apple.com>
2025-08-25 06:46:38 +08:00 · 2025-06-02 18:58:46 -04:00 · 2025-06-02 18:58:46 -04:00 · 0408ba0a76
commit 0408ba0a76
parent cbad6c3093
2 changed files with 24 additions and 15 deletions
--- a/mlx/ops.cpp
+++ b/mlx/ops.cpp
@ -2862,21 +2862,30 @@ array matmul(
        << " second input with shape " << b.shape() << ".";
    throw std::invalid_argument(msg.str());
  }
-  // Type promotion
+
-  auto out_type = promote_types(a.dtype(), b.dtype());
+  // complex matmul using Karatsuba's Algorithm
-  // Complex matmul in terms of real matmuls
+  if (a.dtype() == complex64 || b.dtype() == complex64) {
-  if (out_type == complex64) {
+    // Extract real and imaginary parts
    auto a_real = real(a, s);
    auto b_real = real(b, s);
    auto a_imag = imag(a, s);
    auto b_real = real(b, s);
    auto b_imag = imag(b, s);
-    auto c_real =
+
-        subtract(matmul(a_real, b_real, s), matmul(a_imag, b_imag, s), s);
+    // Compute real and imaginary components of the result
-    auto c_imag = add(matmul(a_real, b_imag, s), matmul(a_imag, b_real, s), s);
+    auto m1 = matmul(a_real, b_real, s);
    auto m2 = matmul(a_imag, b_imag, s);
    auto m3 = matmul(add(a_real, a_imag, s), add(b_real, b_imag, s), s);
    auto c_real = subtract(m1, m2, s);
    auto c_imag = subtract(m3, add(m1, m2, s), s);
    return add(
        c_real, multiply(array(complex64_t{0, 1}, complex64), c_imag, s), s);
  }
  // Type promotion
  auto out_type = promote_types(a.dtype(), b.dtype());
  if (!issubdtype(out_type, floating)) {
    std::ostringstream msg;
    msg << "[matmul] Only real floating point types are supported but "
--- a/python/tests/test_blas.py
+++ b/python/tests/test_blas.py
@ -1210,13 +1210,6 @@ class TestBlas(mlx_tests.MLXTestCase):
        self.assertTrue(np.allclose(c, c_np))
        # Test addmm
        M = 16
        K = 50
        N = 32
        def rand(shape):
            return mx.random.uniform(shape=shape) + 1j * mx.random.uniform(shape=shape)
        a = rand((M, K))
        b = rand((K, N))
        c = rand((M, N))
@ -1224,6 +1217,13 @@ class TestBlas(mlx_tests.MLXTestCase):
        out_np = 2.0 * np.matmul(a, b) + 2.0 * c
        self.assertTrue(np.allclose(out, out_np))
        # complex with real
        a = rand((M, K)).real
        b = rand((K, N))
        c = mx.matmul(a, b)
        c_np = np.matmul(a, b)
        self.assertTrue(np.allclose(out, out_np))
 if __name__ == "__main__":
    unittest.main()