mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
fix bw for elementwise ops (#2151)
* fix bw for elementwise ops * add compile * fix * fix * fix * fix
This commit is contained in:
@@ -45,7 +45,7 @@ void ternary_op_gpu_inplace(
|
||||
work_per_thread = large ? 4 : 2;
|
||||
} else {
|
||||
large = out.data_size() > INT32_MAX;
|
||||
work_per_thread = 1;
|
||||
work_per_thread = get_work_per_thread(b.dtype());
|
||||
}
|
||||
std::string kernel_name;
|
||||
if (topt == TernaryOpType::General) {
|
||||
@@ -106,13 +106,19 @@ void ternary_op_gpu_inplace(
|
||||
compute_encoder.dispatch_threads(grid_dims, group_dims);
|
||||
} else {
|
||||
// Launch a 1D or 2D grid of threads
|
||||
size_t nthreads = out.data_size();
|
||||
size_t nthreads = ceildiv(out.data_size(), work_per_thread);
|
||||
if (thread_group_size > nthreads) {
|
||||
thread_group_size = nthreads;
|
||||
}
|
||||
MTL::Size group_dims = MTL::Size(thread_group_size, 1, 1);
|
||||
MTL::Size grid_dims = large ? get_2d_grid_dims(out.shape(), out.strides())
|
||||
: MTL::Size(nthreads, 1, 1);
|
||||
MTL::Size grid_dims;
|
||||
if (large) {
|
||||
compute_encoder.set_bytes<int64_t>(out.data_size(), 4);
|
||||
grid_dims = get_2d_grid_dims(out.shape(), out.strides(), work_per_thread);
|
||||
} else {
|
||||
compute_encoder.set_bytes<int>(out.data_size(), 4);
|
||||
grid_dims = MTL::Size(nthreads, 1, 1);
|
||||
}
|
||||
compute_encoder.dispatch_threads(grid_dims, group_dims);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user