cuda_skip = {
    "TestArray.test_api",
    "TestBF16.test_arg_reduction_ops",
    "TestBF16.test_reduction_ops",
    "TestBlas.test_complex_gemm",
    "TestEinsum.test_ellipses",
    "TestEinsum.test_opt_einsum_test_cases",
    "TestLoad.test_load_f8_e4m3",
    "TestLayers.test_group_norm",
    "TestLayers.test_pooling",
    "TestLayers.test_quantized_embedding",
    "TestLayers.test_sin_pe",
    "TestLayers.test_upsample",
    "TestOps.test_complex_ops",
    "TestOps.test_dynamic_slicing",
    "TestReduce.test_axis_permutation_sums",
    "TestReduce.test_dtypes",
    "TestReduce.test_expand_sums",
    "TestReduce.test_many_reduction_axes",
    "TestUpsample.test_torch_upsample",
    # Block masked matmul NYI
    "TestBlas.test_block_masked_matmul",
    # Gather matmul NYI
    "TestBlas.test_gather_matmul",
    "TestBlas.test_gather_matmul_grad",
    # Scan NYI
    "TestAutograd.test_cumprod_grad",
    "TestOps.test_scans",
    "TestOps.test_logcumsumexp",
    # Hadamard NYI
    "TestOps.test_hadamard",
    "TestOps.test_hadamard_grad_vmap",
    # Convolutions NYI
    "TestConv.test_1d_conv_with_2d",
    "TestConv.test_asymmetric_padding",
    "TestConv.test_basic_grad_shapes",
    "TestConv.test_conv2d_unaligned_channels",
    "TestConv.test_conv_1d_groups_flipped",
    "TestConv.test_conv_general_flip_grad",
    "TestConv.test_conv_groups_grad",
    "TestConv.test_numpy_conv",
    "TestConv.test_repeated_conv",
    "TestConv.test_torch_conv_1D",
    "TestConv.test_torch_conv_1D_grad",
    "TestConv.test_torch_conv_2D",
    "TestConv.test_torch_conv_2D_grad",
    "TestConv.test_torch_conv_3D",
    "TestConv.test_torch_conv_3D_grad",
    "TestConv.test_torch_conv_depthwise",
    "TestConv.test_torch_conv_general",
    "TestConvTranspose.test_torch_conv_tranpose_1d_output_padding",
    "TestConvTranspose.test_torch_conv_transpose_1D",
    "TestConvTranspose.test_torch_conv_transpose_1D_grad",
    "TestConvTranspose.test_torch_conv_transpose_2D",
    "TestConvTranspose.test_torch_conv_transpose_2D_grad",
    "TestConvTranspose.test_torch_conv_transpose_2d_output_padding",
    "TestConvTranspose.test_torch_conv_transpose_3D",
    "TestConvTranspose.test_torch_conv_transpose_3D_grad",
    "TestConvTranspose.test_torch_conv_transpose_3d_output_padding",
    "TestExportImport.test_export_conv",
    "TestLayers.test_conv1d",
    "TestLayers.test_conv2d",
    "TestVmap.test_vmap_conv",
    # FFTs NYI
    "TestFFT.test_fft",
    "TestFFT.test_fft_big_powers_of_two",
    "TestFFT.test_fft_contiguity",
    "TestFFT.test_fft_exhaustive",
    "TestFFT.test_fft_grads",
    "TestFFT.test_fft_into_ifft",
    "TestFFT.test_fft_large_numbers",
    "TestFFT.test_fft_shared_mem",
    "TestFFT.test_fftn",
    # Lapack ops NYI
    "TestLinalg.test_cholesky",
    "TestLinalg.test_cholesky_inv",
    "TestLinalg.test_eig",
    "TestLinalg.test_eigh",
    "TestLinalg.test_inverse",
    "TestVmap.test_vmap_inverse",
    "TestLinalg.test_lu",
    "TestLinalg.test_lu_factor",
    "TestLinalg.test_pseudo_inverse",
    "TestLinalg.test_qr_factorization",
    "TestInit.test_orthogonal",
    "TestLinalg.test_svd_decomposition",
    "TestVmap.test_vmap_svd",
    "TestLinalg.test_tri_inverse",
    # Quantization NYI
    "TestQuantized.test_gather_matmul_grad",
    "TestQuantized.test_gather_qmm",
    "TestQuantized.test_gather_qmm_sorted",
    "TestQuantized.test_non_multiples",
    "TestQuantized.test_qmm",
    "TestQuantized.test_qmm_jvp",
    "TestQuantized.test_qmm_shapes",
    "TestQuantized.test_qmm_vjp",
    "TestQuantized.test_qmv",
    "TestQuantized.test_quantize_dequantize",
    "TestQuantized.test_qvm",
    "TestQuantized.test_qvm_splitk",
    "TestQuantized.test_small_matrix",
    "TestQuantized.test_throw",
    "TestQuantized.test_vjp_scales_biases",
}