[CUDA] Implement Scan kernel (#2347)

* Contiguous scan

* Strided scan

* Enable tests

* Fix failing logaddexp test

* Use cexpf in Metal
This commit is contained in:
Cheng
2025-07-11 08:54:12 +09:00
committed by GitHub
parent b6eec20260
commit 8347575ba1
13 changed files with 815 additions and 64 deletions

View File

@@ -13,11 +13,6 @@ cuda_skip = {
"TestBlas.test_gather_mm_sorted",
# Segmented matmul NYI
"TestBlas.test_segmented_mm",
# Scan NYI
"TestArray.test_api",
"TestAutograd.test_cumprod_grad",
"TestOps.test_scans",
"TestOps.test_logcumsumexp",
# Hadamard NYI
"TestOps.test_hadamard",
"TestOps.test_hadamard_grad_vmap",