RoPE for CUDA (#2293)

* First working CUDA rope

* Fix random
This commit is contained in:
Angelos Katharopoulos
2025-06-15 06:08:07 -07:00
committed by GitHub
parent a14aaa7c9d
commit 580776559b
8 changed files with 443 additions and 29 deletions

View File

@@ -95,6 +95,9 @@ Dims get_2d_grid_dims_common(
const Strides& strides,
size_t divisor);
// Get both the block and a grid of blocks that covers dim0, dim1 and dim2.
std::pair<Dims, Dims> get_grid_and_block_common(int dim0, int dim1, int dim2);
struct ContiguousIterator {
inline void step() {
int dims = shape_.size();