Cuda bug fixes 2 (#2298)

* more bug fixes

* more bug fixes

* format
This commit is contained in:
Awni Hannun
2025-06-16 13:14:46 -07:00
committed by GitHub
parent c552ff2451
commit bc53f8293f
11 changed files with 143 additions and 107 deletions

View File

@@ -1,24 +1,50 @@
cuda_skip = {
"TestArray.test_api",
"TestAutograd.test_cumprod_grad",
"TestAutograd.test_slice_grads",
"TestAutograd.test_split_against_slice",
"TestAutograd.test_stop_gradient",
"TestAutograd.test_topk_grad",
"TestAutograd.test_update_state",
"TestAutograd.test_vjp",
"TestBF16.test_arg_reduction_ops",
"TestBF16.test_binary_ops",
"TestBF16.test_reduction_ops",
"TestBlas.test_block_masked_matmul",
"TestBlas.test_complex_gemm",
"TestCompile.test_compile_dynamic_dims",
"TestEinsum.test_ellipses",
"TestEinsum.test_opt_einsum_test_cases",
"TestLoad.test_load_f8_e4m3",
"TestMemory.test_memory_info",
"TestLayers.test_group_norm",
"TestLayers.test_pooling",
"TestLayers.test_quantized_embedding",
"TestLayers.test_sin_pe",
"TestLayers.test_upsample",
"TestOps.test_array_equal",
"TestOps.test_complex_ops",
"TestOps.test_dynamic_slicing",
"TestOps.test_softmax",
"TestOps.test_sort",
"TestOps.test_tile",
"TestReduce.test_axis_permutation_sums",
"TestReduce.test_dtypes",
"TestReduce.test_expand_sums",
"TestReduce.test_many_reduction_axes",
"TestUpsample.test_torch_upsample",
# DivMod NYI
"TestOps.test_divmod",
"TestEval.test_multi_output_eval_during_transform",
# Partition NYI
"TestAutograd.test_topk_grad",
"TestOps.test_argpartition",
"TestOps.test_partition",
# Block masked matmul NYI
"TestBlas.test_block_masked_matmul",
# Gather matmul NYI
"TestBlas.test_gather_matmul",
"TestBlas.test_gather_matmul_grad",
"TestBlas.test_matmul_batched",
"TestBlas.test_matrix_vector_attn",
"TestCompile.test_compile_dynamic_dims",
"TestCompile.test_compile_inf",
"TestCompile.test_inf_constant",
# Scan NYI
"TestAutograd.test_cumprod_grad",
"TestOps.test_scans",
"TestOps.test_logcumsumexp",
# Hadamard NYI
"TestOps.test_hadamard",
"TestOps.test_hadamard_grad_vmap",
# Convolutions NYI
"TestConv.test_1d_conv_with_2d",
"TestConv.test_asymmetric_padding",
"TestConv.test_basic_grad_shapes",
@@ -45,11 +71,11 @@ cuda_skip = {
"TestConvTranspose.test_torch_conv_transpose_3D",
"TestConvTranspose.test_torch_conv_transpose_3D_grad",
"TestConvTranspose.test_torch_conv_transpose_3d_output_padding",
"TestEinsum.test_attention",
"TestEinsum.test_ellipses",
"TestEinsum.test_opt_einsum_test_cases",
"TestEval.test_multi_output_eval_during_transform",
"TestExportImport.test_export_conv",
"TestLayers.test_conv1d",
"TestLayers.test_conv2d",
"TestVmap.test_vmap_conv",
# FFTs NYI
"TestFFT.test_fft",
"TestFFT.test_fft_big_powers_of_two",
"TestFFT.test_fft_contiguity",
@@ -59,52 +85,22 @@ cuda_skip = {
"TestFFT.test_fft_large_numbers",
"TestFFT.test_fft_shared_mem",
"TestFFT.test_fftn",
"TestInit.test_orthogonal",
# Lapack ops NYI
"TestLinalg.test_cholesky",
"TestLinalg.test_cholesky_inv",
"TestLinalg.test_eig",
"TestLinalg.test_eigh",
"TestLinalg.test_inverse",
"TestVmap.test_vmap_inverse",
"TestLinalg.test_lu",
"TestLinalg.test_lu_factor",
"TestLinalg.test_pseudo_inverse",
"TestLinalg.test_qr_factorization",
"TestInit.test_orthogonal",
"TestLinalg.test_svd_decomposition",
"TestVmap.test_vmap_svd",
"TestLinalg.test_tri_inverse",
"TestLoad.test_load_f8_e4m3",
"TestLosses.test_binary_cross_entropy",
"TestMemory.test_memory_info",
"TestLayers.test_conv1d",
"TestLayers.test_conv2d",
"TestLayers.test_elu",
"TestLayers.test_group_norm",
"TestLayers.test_hard_shrink",
"TestLayers.test_pooling",
"TestLayers.test_quantized_embedding",
"TestLayers.test_sin_pe",
"TestLayers.test_softshrink",
"TestLayers.test_upsample",
"TestOps.test_argpartition",
"TestOps.test_array_equal",
"TestOps.test_as_strided",
"TestOps.test_binary_ops",
"TestOps.test_bitwise_grad",
"TestOps.test_complex_ops",
"TestOps.test_divmod",
"TestOps.test_dynamic_slicing",
"TestOps.test_hadamard",
"TestOps.test_hadamard_grad_vmap",
"TestOps.test_irregular_binary_ops",
"TestOps.test_kron",
"TestOps.test_log1p",
"TestOps.test_logaddexp",
"TestOps.test_logcumsumexp",
"TestOps.test_partition",
"TestOps.test_scans",
"TestOps.test_softmax",
"TestOps.test_sort",
"TestOps.test_tensordot",
"TestOps.test_tile",
# Quantization NYI
"TestQuantized.test_gather_matmul_grad",
"TestQuantized.test_gather_qmm",
"TestQuantized.test_gather_qmm_sorted",
@@ -120,12 +116,4 @@ cuda_skip = {
"TestQuantized.test_small_matrix",
"TestQuantized.test_throw",
"TestQuantized.test_vjp_scales_biases",
"TestReduce.test_axis_permutation_sums",
"TestReduce.test_dtypes",
"TestReduce.test_expand_sums",
"TestReduce.test_many_reduction_axes",
"TestUpsample.test_torch_upsample",
"TestVmap.test_vmap_conv",
"TestVmap.test_vmap_inverse",
"TestVmap.test_vmap_svd",
}

View File

@@ -83,14 +83,14 @@ class TestLosses(mlx_tests.MLXTestCase):
logits, targets, reduction="mean"
)
expected_mean = mx.mean(expected_none)
self.assertEqual(losses_mean, expected_mean)
self.assertTrue(mx.allclose(losses_mean, expected_mean))
# Test with reduction 'sum'
losses_sum = nn.losses.binary_cross_entropy(
logits, targets, reduction="sum"
)
expected_sum = mx.sum(expected_none)
self.assertEqual(losses_sum, expected_sum)
self.assertTrue(mx.allclose(losses_sum, expected_sum))
# With weights, no label smoothing
weights = mx.array([1.0, 2.0, 1.0, 2.0])