Rename block sparse (#1149)

* block_sparse_mm to gather_mm * rename * nit * nit
2025-10-22 19:28:14 +08:00 · 2024-05-22 07:48:34 -07:00
parent e6fecbb3e1
commit d568c7ee36
16 changed files with 120 additions and 111 deletions
--- a/mlx/backend/metal/matmul.cpp
+++ b/mlx/backend/metal/matmul.cpp
@@ -324,12 +324,12 @@ void steel_matmul_conv_groups(
  };

  // clang-format off
-  kname << "_has_batch_" << (has_batch ? 't' : 'n') 
-        << "_use_out_source_" << (use_out_source ? 't' : 'n') 
-        << "_do_axpby_" << (do_axpby ? 't' : 'n') 
+  kname << "_has_batch_" << (has_batch ? 't' : 'n')
+        << "_use_out_source_" << (use_out_source ? 't' : 'n')
+        << "_do_axpby_" << (do_axpby ? 't' : 'n')
        << "_align_M_" << (align_M ? 't' : 'n')
-        << "_align_N_" << (align_N ? 't' : 'n') 
-        << "_align_K_" << (align_K ? 't' : 'n') 
+        << "_align_N_" << (align_N ? 't' : 'n')
+        << "_align_K_" << (align_K ? 't' : 'n')
        << "_do_gather_" << (do_gather ? 't' : 'n'); // clang-format on

  std::string hash_name = kname.str();
@@ -575,12 +575,12 @@ void steel_matmul(
  };

  // clang-format off
-  kname << "_has_batch_" << (has_batch ? 't' : 'n') 
-        << "_use_out_source_" << (use_out_source ? 't' : 'n') 
-        << "_do_axpby_" << (do_axpby ? 't' : 'n') 
+  kname << "_has_batch_" << (has_batch ? 't' : 'n')
+        << "_use_out_source_" << (use_out_source ? 't' : 'n')
+        << "_do_axpby_" << (do_axpby ? 't' : 'n')
        << "_align_M_" << (align_M ? 't' : 'n')
-        << "_align_N_" << (align_N ? 't' : 'n') 
-        << "_align_K_" << (align_K ? 't' : 'n') 
+        << "_align_N_" << (align_N ? 't' : 'n')
+        << "_align_K_" << (align_K ? 't' : 'n')
        << "_do_gather_" << (do_gather ? 't' : 'n'); // clang-format on

  std::string hash_name = kname.str();
@@ -1170,12 +1170,12 @@ void AddMM::eval_gpu(const std::vector<array>& inputs, array& out) {
  };

  // clang-format off
-  kname << "_has_batch_" << (has_batch ? 't' : 'n') 
-        << "_use_out_source_" << (use_out_source ? 't' : 'n') 
-        << "_do_axpby_" << (do_axpby ? 't' : 'n') 
+  kname << "_has_batch_" << (has_batch ? 't' : 'n')
+        << "_use_out_source_" << (use_out_source ? 't' : 'n')
+        << "_do_axpby_" << (do_axpby ? 't' : 'n')
        << "_align_M_" << (align_M ? 't' : 'n')
-        << "_align_N_" << (align_N ? 't' : 'n') 
-        << "_align_K_" << (align_K ? 't' : 'n') 
+        << "_align_N_" << (align_N ? 't' : 'n')
+        << "_align_K_" << (align_K ? 't' : 'n')
        << "_do_gather_" << (do_gather ? 't' : 'n'); // clang-format on

  std::string hash_name = kname.str();
@@ -1435,12 +1435,12 @@ void BlockMaskedMM::eval_gpu(const std::vector<array>& inputs, array& out) {
  return;
 }

-void BlockSparseMM::eval_gpu(const std::vector<array>& inputs, array& out) {
+void GatherMM::eval_gpu(const std::vector<array>& inputs, array& out) {
  using namespace mlx::steel;
  // assert(inputs.size() == 2);
  if (!issubdtype(out.dtype(), floating)) {
    throw std::runtime_error(
-        "[matmul] Does not yet support non-floating point types.");
+        "[GatherMM] Does not yet support non-floating point types.");
  }
  auto& s = stream();
  auto& d = metal::device(s.device);
@@ -1700,12 +1700,12 @@ void BlockSparseMM::eval_gpu(const std::vector<array>& inputs, array& out) {
  };

  // clang-format off
-  kname << "_has_batch_" << (has_batch ? 't' : 'n') 
-        << "_use_out_source_" << (use_out_source ? 't' : 'n') 
-        << "_do_axpby_" << (do_axpby ? 't' : 'n') 
+  kname << "_has_batch_" << (has_batch ? 't' : 'n')
+        << "_use_out_source_" << (use_out_source ? 't' : 'n')
+        << "_do_axpby_" << (do_axpby ? 't' : 'n')
        << "_align_M_" << (align_M ? 't' : 'n')
-        << "_align_N_" << (align_N ? 't' : 'n') 
-        << "_align_K_" << (align_K ? 't' : 'n') 
+        << "_align_N_" << (align_N ? 't' : 'n')
+        << "_align_K_" << (align_K ? 't' : 'n')
        << "_do_gather_" << (do_gather ? 't' : 'n'); // clang-format on

  std::string hash_name = kname.str();
--- a/mlx/backend/metal/quantized.cpp
+++ b/mlx/backend/metal/quantized.cpp
@@ -196,7 +196,7 @@ void QuantizedMatmul::eval_gpu(const std::vector<array>& inputs, array& out) {
      [copies](MTL::CommandBuffer*) mutable { copies.clear(); });
 }

-void BlockSparseQMM::eval_gpu(const std::vector<array>& inputs, array& out) {
+void GatherQMM::eval_gpu(const std::vector<array>& inputs, array& out) {
  assert(inputs.size() == 6);

  out.set_data(allocator::malloc_or_wait(out.nbytes()));