cuda_skip = { "TestArray.test_api", "TestBF16.test_arg_reduction_ops", "TestBF16.test_reduction_ops", "TestBlas.test_complex_gemm", "TestEinsum.test_ellipses", "TestEinsum.test_opt_einsum_test_cases", "TestLoad.test_load_f8_e4m3", "TestLayers.test_group_norm", "TestLayers.test_pooling", "TestLayers.test_quantized_embedding", "TestLayers.test_sin_pe", "TestLayers.test_upsample", "TestOps.test_complex_ops", "TestOps.test_dynamic_slicing", "TestReduce.test_axis_permutation_sums", "TestReduce.test_dtypes", "TestReduce.test_expand_sums", "TestReduce.test_many_reduction_axes", "TestUpsample.test_torch_upsample", # Block masked matmul NYI "TestBlas.test_block_masked_matmul", # Gather matmul NYI "TestBlas.test_gather_matmul", "TestBlas.test_gather_matmul_grad", # Scan NYI "TestAutograd.test_cumprod_grad", "TestOps.test_scans", "TestOps.test_logcumsumexp", # Hadamard NYI "TestOps.test_hadamard", "TestOps.test_hadamard_grad_vmap", # Convolutions NYI "TestConv.test_1d_conv_with_2d", "TestConv.test_asymmetric_padding", "TestConv.test_basic_grad_shapes", "TestConv.test_conv2d_unaligned_channels", "TestConv.test_conv_1d_groups_flipped", "TestConv.test_conv_general_flip_grad", "TestConv.test_conv_groups_grad", "TestConv.test_numpy_conv", "TestConv.test_repeated_conv", "TestConv.test_torch_conv_1D", "TestConv.test_torch_conv_1D_grad", "TestConv.test_torch_conv_2D", "TestConv.test_torch_conv_2D_grad", "TestConv.test_torch_conv_3D", "TestConv.test_torch_conv_3D_grad", "TestConv.test_torch_conv_depthwise", "TestConv.test_torch_conv_general", "TestConvTranspose.test_torch_conv_tranpose_1d_output_padding", "TestConvTranspose.test_torch_conv_transpose_1D", "TestConvTranspose.test_torch_conv_transpose_1D_grad", "TestConvTranspose.test_torch_conv_transpose_2D", "TestConvTranspose.test_torch_conv_transpose_2D_grad", "TestConvTranspose.test_torch_conv_transpose_2d_output_padding", "TestConvTranspose.test_torch_conv_transpose_3D", "TestConvTranspose.test_torch_conv_transpose_3D_grad", "TestConvTranspose.test_torch_conv_transpose_3d_output_padding", "TestExportImport.test_export_conv", "TestLayers.test_conv1d", "TestLayers.test_conv2d", "TestVmap.test_vmap_conv", # FFTs NYI "TestFFT.test_fft", "TestFFT.test_fft_big_powers_of_two", "TestFFT.test_fft_contiguity", "TestFFT.test_fft_exhaustive", "TestFFT.test_fft_grads", "TestFFT.test_fft_into_ifft", "TestFFT.test_fft_large_numbers", "TestFFT.test_fft_shared_mem", "TestFFT.test_fftn", # Lapack ops NYI "TestLinalg.test_cholesky", "TestLinalg.test_cholesky_inv", "TestLinalg.test_eig", "TestLinalg.test_eigh", "TestLinalg.test_inverse", "TestVmap.test_vmap_inverse", "TestLinalg.test_lu", "TestLinalg.test_lu_factor", "TestLinalg.test_pseudo_inverse", "TestLinalg.test_qr_factorization", "TestInit.test_orthogonal", "TestLinalg.test_svd_decomposition", "TestVmap.test_vmap_svd", "TestLinalg.test_tri_inverse", # Quantization NYI "TestQuantized.test_gather_matmul_grad", "TestQuantized.test_gather_qmm", "TestQuantized.test_gather_qmm_sorted", "TestQuantized.test_non_multiples", "TestQuantized.test_qmm", "TestQuantized.test_qmm_jvp", "TestQuantized.test_qmm_shapes", "TestQuantized.test_qmm_vjp", "TestQuantized.test_qmv", "TestQuantized.test_quantize_dequantize", "TestQuantized.test_qvm", "TestQuantized.test_qvm_splitk", "TestQuantized.test_small_matrix", "TestQuantized.test_throw", "TestQuantized.test_vjp_scales_biases", }