From 0408ba0a768a3493fc3e12262162eca2e55346f0 Mon Sep 17 00:00:00 2001
From: Suryash Malviya <71389351+thesuryash@users.noreply.github.com>
Date: Mon, 2 Jun 2025 18:58:46 -0400
Subject: [PATCH] =?UTF-8?q?Optimizing=20Complex=20Matrix=20Multiplication?=
 =?UTF-8?q?=20using=20Karatsuba=E2=80=99s=20Algorithm=20=20(#2220)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Implementing Complex Matmul using Karatsuba Algorithm

* Implemented Karatsuba's Algorithm for complex matmul and pre-commit them

* fix

---------

Co-authored-by: Awni Hannun <awni@apple.com>
---
 mlx/ops.cpp               | 25 +++++++++++++++++--------
 python/tests/test_blas.py | 14 +++++++-------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/mlx/ops.cpp b/mlx/ops.cpp
index a72c2bc85..9602f667a 100644
--- a/mlx/ops.cpp
+++ b/mlx/ops.cpp
@@ -2862,21 +2862,30 @@ array matmul(
         << " second input with shape " << b.shape() << ".";
     throw std::invalid_argument(msg.str());
   }
-  // Type promotion
-  auto out_type = promote_types(a.dtype(), b.dtype());
-  // Complex matmul in terms of real matmuls
-  if (out_type == complex64) {
+
+  // complex matmul using Karatsuba's Algorithm
+  if (a.dtype() == complex64 || b.dtype() == complex64) {
+    // Extract real and imaginary parts
     auto a_real = real(a, s);
-    auto b_real = real(b, s);
     auto a_imag = imag(a, s);
+    auto b_real = real(b, s);
     auto b_imag = imag(b, s);
-    auto c_real =
-        subtract(matmul(a_real, b_real, s), matmul(a_imag, b_imag, s), s);
-    auto c_imag = add(matmul(a_real, b_imag, s), matmul(a_imag, b_real, s), s);
+
+    // Compute real and imaginary components of the result
+    auto m1 = matmul(a_real, b_real, s);
+    auto m2 = matmul(a_imag, b_imag, s);
+    auto m3 = matmul(add(a_real, a_imag, s), add(b_real, b_imag, s), s);
+
+    auto c_real = subtract(m1, m2, s);
+    auto c_imag = subtract(m3, add(m1, m2, s), s);
+
     return add(
         c_real, multiply(array(complex64_t{0, 1}, complex64), c_imag, s), s);
   }
 
+  // Type promotion
+  auto out_type = promote_types(a.dtype(), b.dtype());
+
   if (!issubdtype(out_type, floating)) {
     std::ostringstream msg;
     msg << "[matmul] Only real floating point types are supported but "
diff --git a/python/tests/test_blas.py b/python/tests/test_blas.py
index df459eadc..8c7a97ba8 100644
--- a/python/tests/test_blas.py
+++ b/python/tests/test_blas.py
@@ -1210,13 +1210,6 @@ class TestBlas(mlx_tests.MLXTestCase):
         self.assertTrue(np.allclose(c, c_np))
 
         # Test addmm
-        M = 16
-        K = 50
-        N = 32
-
-        def rand(shape):
-            return mx.random.uniform(shape=shape) + 1j * mx.random.uniform(shape=shape)
-
         a = rand((M, K))
         b = rand((K, N))
         c = rand((M, N))
@@ -1224,6 +1217,13 @@ class TestBlas(mlx_tests.MLXTestCase):
         out_np = 2.0 * np.matmul(a, b) + 2.0 * c
         self.assertTrue(np.allclose(out, out_np))
 
+        # complex with real
+        a = rand((M, K)).real
+        b = rand((K, N))
+        c = mx.matmul(a, b)
+        c_np = np.matmul(a, b)
+        self.assertTrue(np.allclose(out, out_np))
+
 
 if __name__ == "__main__":
     unittest.main()