mirror of
https://github.com/ml-explore/mlx.git
synced 2025-10-22 02:58:16 +08:00
LogCumSumExp (#2069)
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "mlx/backend/metal/kernels/binary_ops.h"
|
||||
|
||||
#define DEFINE_SIMD_SCAN() \
|
||||
template <typename T, metal::enable_if_t<sizeof(T) < 8, bool> = true> \
|
||||
T simd_scan(T val) { \
|
||||
@@ -139,6 +141,29 @@ struct CumMin {
|
||||
}
|
||||
};
|
||||
|
||||
template <typename U>
|
||||
struct CumLogaddexp {
|
||||
static constexpr constant U init = Limits<U>::min;
|
||||
|
||||
template <typename T>
|
||||
U operator()(U a, T b) {
|
||||
return LogAddExp{}(a, static_cast<U>(b));
|
||||
}
|
||||
|
||||
U simd_scan(U x) {
|
||||
for (int i = 1; i <= 16; i *= 2) {
|
||||
U other = simd_shuffle_and_fill_up(x, init, i);
|
||||
x = LogAddExp{}(x, other);
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
U simd_exclusive_scan(U x) {
|
||||
x = simd_scan(x);
|
||||
return simd_shuffle_and_fill_up(x, init, 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename U, int N_READS, bool reverse>
|
||||
inline void load_unsafe(U values[N_READS], const device T* input) {
|
||||
if (reverse) {
|
||||
|
@@ -101,4 +101,7 @@ instantiate_scan_helper(min_int64_int64, int64_t, int64_t, CumMi
|
||||
instantiate_scan_helper(min_float16_float16, half, half, CumMin, 4)
|
||||
instantiate_scan_helper(min_float32_float32, float, float, CumMin, 4)
|
||||
instantiate_scan_helper(min_bfloat16_bfloat16, bfloat16_t, bfloat16_t, CumMin, 4)
|
||||
instantiate_scan_helper(min_complex64_complex64, complex64_t, complex64_t, CumMin, 2) // clang-format on
|
||||
instantiate_scan_helper(min_complex64_complex64, complex64_t, complex64_t, CumMin, 2)
|
||||
instantiate_scan_helper(logaddexp_float16_float16, half, half, CumLogaddexp, 4)
|
||||
instantiate_scan_helper(logaddexp_float32_float32, float, float, CumLogaddexp, 4)
|
||||
instantiate_scan_helper(logaddexp_bfloat16_bfloat16, bfloat16_t, bfloat16_t, CumLogaddexp, 4) // clang-format on
|
||||
|
@@ -60,6 +60,9 @@ void Scan::eval_gpu(const std::vector<array>& inputs, array& out) {
|
||||
case Scan::Min:
|
||||
reduce_type = "min";
|
||||
break;
|
||||
case Scan::LogAddExp:
|
||||
reduce_type = "logaddexp";
|
||||
break;
|
||||
}
|
||||
kname << reduce_type << "_" << type_to_name(in) << "_" << type_to_name(out);
|
||||
auto kernel = get_scan_kernel(
|
||||
|
Reference in New Issue
Block a user