[CUDA] Implement DynamicSlice/DynamicSliceUpdate (#2533)

* Move DynamicSlice to gpu/primitives * Implement compute_dynamic_offset in CUDA
2025-12-16 01:49:05 +08:00 · 2025-08-26 07:31:39 +09:00
parent 2ca75bb529
commit 4822c3dbe9
12 changed files with 226 additions and 134 deletions
--- a/mlx/backend/cuda/jit_module.h
+++ b/mlx/backend/cuda/jit_module.h
@@ -46,6 +46,11 @@ struct KernelArgs {
    append_ptr(std::get<SmallVector<T>>(storage_.back()).data());
  }

+  template <typename T>
+  void append(const std::vector<T>& vec) {
+    append(SmallVector<T>(vec.begin(), vec.end()));
+  }
+
  // Make sure the arg is copied to an array with size of NDIM.
  template <size_t NDIM = MAX_NDIM, typename T>
  void append_ndim(SmallVector<T> vec) {