mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Faster metal compiled kernels + some fixes (#1486)
* bump mac tests to use py39 * work per thread for compiled kernels * fixe for large arrays * fix
This commit is contained in:
@@ -38,8 +38,7 @@ void ternary_op_gpu_inplace(
|
||||
|
||||
bool use_2d = out.data_size() > UINT_MAX;
|
||||
auto ndim = shape.size();
|
||||
int work_per_thread =
|
||||
(topt == TernaryOpType::General && shape[ndim - 1] > 4) ? 4 : 1;
|
||||
int work_per_thread = (topt == TernaryOpType::General) ? 4 : 1;
|
||||
std::string kernel_name;
|
||||
{
|
||||
std::ostringstream kname;
|
||||
|
||||
Reference in New Issue
Block a user