[CUDA] Implement DynamicSlice/DynamicSliceUpdate (#2533)

* Move DynamicSlice to gpu/primitives

* Implement compute_dynamic_offset in CUDA
This commit is contained in:
Cheng
2025-08-26 07:31:39 +09:00
committed by GitHub
parent 2ca75bb529
commit 4822c3dbe9
12 changed files with 226 additions and 134 deletions

View File

@@ -46,6 +46,11 @@ struct KernelArgs {
append_ptr(std::get<SmallVector<T>>(storage_.back()).data());
}
template <typename T>
void append(const std::vector<T>& vec) {
append(SmallVector<T>(vec.begin(), vec.end()));
}
// Make sure the arg is copied to an array with size of NDIM.
template <size_t NDIM = MAX_NDIM, typename T>
void append_ndim(SmallVector<T> vec) {