mirror of
https://github.com/ml-explore/mlx.git
synced 2025-12-16 01:49:05 +08:00
Improve metal elementwise kernels (#2247)
* improve metal elementwise kernels * compile and copy * fix jit
This commit is contained in:
@@ -72,6 +72,10 @@ void concatenate(std::string& acc, T first, Args... args) {
|
||||
inline int get_work_per_thread(Dtype dtype) {
|
||||
return std::max(1, 8 / dtype.size());
|
||||
}
|
||||
inline int get_work_per_thread(Dtype dtype, size_t size) {
|
||||
constexpr size_t wpt_threshold = 1 << 16;
|
||||
return size < wpt_threshold ? 1 : std::max(1, 8 / dtype.size());
|
||||
}
|
||||
|
||||
inline size_t ceildiv(size_t n, size_t m) {
|
||||
return (n + m - 1) / m;
|
||||
|
||||
Reference in New Issue
Block a user