[CUDA] Fix back-end bugs and enable corresponding tests (#2296)

* Fix some cuda back-end bugs and enable corresponding tests

* more fixes

* enable more tests

* format
This commit is contained in:
Awni Hannun
2025-06-16 08:45:40 -07:00
committed by GitHub
parent 4fda5fbdf9
commit c552ff2451
16 changed files with 115 additions and 98 deletions

View File

@@ -116,7 +116,7 @@ void ternary_op_gpu_inplace(
b.data<DType>(),
c.data<DType>(),
out.data<DType>(),
out.data_size(),
out.size(),
const_param<NDIM>(shape),
const_param<NDIM>(a_strides),
const_param<NDIM>(b_strides),
@@ -142,7 +142,8 @@ void ternary_op_gpu_inplace(
MLX_SWITCH_BOOL(out.data_size() > UINT32_MAX, LARGE, {
using IdxT = std::conditional_t<LARGE, int64_t, uint32_t>;
auto kernel = cu::ternary_v<Op, DType, IdxT>;
auto [num_blocks, block_dims] = get_launch_args(kernel, out, LARGE);
auto [num_blocks, block_dims] = get_launch_args(
kernel, out.data_size(), out.shape(), out.strides(), LARGE);
kernel<<<num_blocks, block_dims, 0, stream>>>(
a.data<bool>(),
b.data<DType>(),