Gather qmm batched kernel and refactoring of quantized (#2078)

2025-12-16 01:49:05 +08:00 · 2025-04-17 13:53:11 -07:00
parent 99eefd2ec0
commit 5de6d94a90
15 changed files with 1479 additions and 449 deletions
--- a/mlx/backend/metal/jit_kernels.cpp
+++ b/mlx/backend/metal/jit_kernels.cpp
@@ -752,4 +752,43 @@ MTL::ComputePipelineState* get_quantized_kernel(
  return d.get_kernel(kernel_name, lib);
 }

+MTL::ComputePipelineState* get_gather_qmm_kernel(
+    metal::Device& d,
+    const std::string& kernel_name,
+    const std::string& hash_name,
+    const metal::MTLFCList& func_consts,
+    const array& x,
+    int group_size,
+    int bits,
+    int bm,
+    int bn,
+    int bk,
+    int wm,
+    int wn,
+    bool transpose) {
+  const auto& lib_name = kernel_name;
+  auto lib = d.get_library(lib_name, [&]() {
+    std::string kernel_source;
+    concatenate(
+        kernel_source,
+        metal::utils(),
+        metal::gemm(),
+        metal::quantized(),
+        get_template_definition(
+            lib_name,
+            "gather_qmm_rhs",
+            get_type_string(x.dtype()),
+            group_size,
+            bits,
+            bm,
+            bn,
+            bk,
+            wm,
+            wn,
+            transpose));
+    return kernel_source;
+  });
+  return d.get_kernel(kernel_name, lib, hash_name, func_consts);
+}
+
 } // namespace mlx::core