mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
add cuda gemv (#2400)
This commit is contained in:
@@ -37,7 +37,7 @@ __global__ void copy_gg(
|
||||
int ndim) {
|
||||
IdxT index = cg::this_grid().thread_rank();
|
||||
if (index < size) {
|
||||
auto [idx_in, idx_out] = elem_to_loc_4d(
|
||||
auto [idx_in, idx_out] = elem_to_loc(
|
||||
index, shape.data(), strides_in.data(), strides_out.data(), ndim);
|
||||
out[idx_out] = CastOp<In, Out>{}(in[idx_in]);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user