[CUDA] Fix reductions (#2314)

2025-12-11 23:14:50 +08:00 · 2025-06-27 12:59:20 -07:00
parent 2c11d10f8d
commit 772f471ff2
16 changed files with 862 additions and 419 deletions
--- a/python/tests/cuda_skip.py
+++ b/python/tests/cuda_skip.py
@@ -1,7 +1,6 @@
 cuda_skip = {
    "TestArray.test_api",
    "TestBF16.test_arg_reduction_ops",
-    "TestBF16.test_reduction_ops",
    "TestBlas.test_complex_gemm",
    "TestEinsum.test_ellipses",
    "TestEinsum.test_opt_einsum_test_cases",
@@ -13,11 +12,7 @@ cuda_skip = {
    "TestLayers.test_upsample",
    "TestOps.test_complex_ops",
    "TestOps.test_dynamic_slicing",
-    "TestOps.test_softmax",
-    "TestReduce.test_axis_permutation_sums",
    "TestReduce.test_dtypes",
-    "TestReduce.test_expand_sums",
-    "TestReduce.test_many_reduction_axes",
    "TestUpsample.test_torch_upsample",
    # Block masked matmul NYI
    "TestBlas.test_block_masked_matmul",