Implement compute_dynamic_offset in CUDA

This commit is contained in:
Cheng
2025-08-23 18:36:58 -07:00
parent 5746c0c658
commit 57b2b8817a
9 changed files with 101 additions and 17 deletions

View File

@@ -1,7 +1,6 @@
cuda_skip = {
"TestLoad.test_load_f8_e4m3",
"TestLayers.test_quantized_embedding",
"TestOps.test_dynamic_slicing",
# Block masked matmul NYI
"TestBlas.test_block_masked_matmul",
# Gather matmul NYI