mirror of
https://github.com/ml-explore/mlx.git
synced 2025-07-16 22:11:15 +08:00
Align mlx::core::max op nan propagation with NumPy (#2339)
* Make max op NaN propagation rules align with numpy * Adding benchmarks and testing for max op nanpropagation * Pre-commit formatting * Fix max complex64 nan propagation and add test * Improve the cpp unittest * Only check nans on non-integral types in simd_reduce_impl. * Cleanup using namespace alias * Add cpu Max nanpropagation. Fix a small fib in cpu max dispatch data types for int8/int16. * Make the max nanpropagation test more meaningful for integer types * Remove tuple unpacking syntax to comply with earlier python versions. Add cuda skip to nanpropagation tests, fix cuda implementation in a separate PR.
This commit is contained in:
parent
fb4e8b896b
commit
8b9a3f3cea
@ -192,6 +192,17 @@ void time_reductions() {
|
||||
|
||||
auto argmin_along_1 = [&a]() { return mx::argmin(a, 1, false); };
|
||||
TIME(argmin_along_1);
|
||||
|
||||
auto indices = mx::array({1});
|
||||
auto updates = mx::reshape(mx::array({NAN}), {1, 1, 1});
|
||||
std::vector<int> axes{0};
|
||||
auto b = scatter(a, {indices}, updates, axes);
|
||||
mx::eval(b);
|
||||
|
||||
auto max_along_0 = [&b]() { return mx::max(b, 0, false); };
|
||||
TIME(max_along_0);
|
||||
auto max_along_1 = [&b]() { return mx::max(b, 1, false); };
|
||||
TIME(max_along_1);
|
||||
}
|
||||
|
||||
void time_gather_scatter() {
|
||||
|
@ -51,6 +51,13 @@ def time_maximum():
|
||||
time_fn(mx.maximum, a, b)
|
||||
|
||||
|
||||
def time_max():
|
||||
a = mx.random.uniform(shape=(32, 1024, 1024))
|
||||
a[1, 1] = mx.nan
|
||||
mx.eval(a)
|
||||
time_fn(mx.max, a, 0)
|
||||
|
||||
|
||||
def time_negative():
|
||||
a = mx.random.uniform(shape=(10000, 1000))
|
||||
mx.eval(a)
|
||||
@ -108,6 +115,7 @@ if __name__ == "__main__":
|
||||
|
||||
time_add()
|
||||
time_matmul()
|
||||
time_max()
|
||||
time_maximum()
|
||||
time_exp()
|
||||
time_negative()
|
||||
|
@ -325,7 +325,15 @@ struct MaxReduce {
|
||||
};
|
||||
|
||||
template <int N, typename T>
|
||||
T operator()(simd::Simd<T, N> x) {
|
||||
std::enable_if_t<std::is_integral_v<T>, T> operator()(simd::Simd<T, N> x) {
|
||||
return simd::max(x);
|
||||
};
|
||||
|
||||
template <int N, typename T>
|
||||
std::enable_if_t<!std::is_integral_v<T>, T> operator()(simd::Simd<T, N> x) {
|
||||
if (simd::any(x != x)) {
|
||||
return static_cast<T>(NAN);
|
||||
}
|
||||
return simd::max(x);
|
||||
};
|
||||
};
|
||||
@ -527,10 +535,10 @@ void Reduce::eval_cpu(const std::vector<array>& inputs, array& out) {
|
||||
reduce_dispatch_min_max<uint64_t>(in, out, reduce_type_, axes_);
|
||||
break;
|
||||
case int8:
|
||||
reduce_dispatch_min_max<uint8_t>(in, out, reduce_type_, axes_);
|
||||
reduce_dispatch_min_max<int8_t>(in, out, reduce_type_, axes_);
|
||||
break;
|
||||
case int16:
|
||||
reduce_dispatch_min_max<uint16_t>(in, out, reduce_type_, axes_);
|
||||
reduce_dispatch_min_max<int16_t>(in, out, reduce_type_, axes_);
|
||||
break;
|
||||
case int32:
|
||||
reduce_dispatch_min_max<int32_t>(in, out, reduce_type_, axes_);
|
||||
|
@ -186,7 +186,15 @@ struct Max {
|
||||
DEFINE_SIMD_REDUCE()
|
||||
|
||||
template <typename T>
|
||||
T simd_reduce_impl(T val) {
|
||||
metal::enable_if_t<metal::is_integral_v<T>, T> simd_reduce_impl(T val) {
|
||||
return simd_max(val);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
metal::enable_if_t<!metal::is_integral_v<T>, T> simd_reduce_impl(T val) {
|
||||
if (simd_any(val != val)) {
|
||||
return static_cast<T>(NAN);
|
||||
}
|
||||
return simd_max(val);
|
||||
}
|
||||
|
||||
@ -198,7 +206,35 @@ struct Max {
|
||||
}
|
||||
|
||||
// Operator
|
||||
U operator()(U a, U b) {
|
||||
template <typename T>
|
||||
metal::enable_if_t<metal::is_integral_v<T>, T> operator()(T a, T b) {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
metal::enable_if_t<!metal::is_integral_v<T>, T> operator()(T a, T b) {
|
||||
if (metal::isnan(a) || metal::isnan(b)) {
|
||||
return static_cast<T>(NAN);
|
||||
} else {
|
||||
return a > b ? a : b;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
complex64_t operator()(complex64_t a, complex64_t b) {
|
||||
bool real_is_nan = metal::isnan(a.real) || metal::isnan(b.real);
|
||||
bool imag_is_nan = metal::isnan(a.imag) || metal::isnan(b.imag);
|
||||
|
||||
if (!real_is_nan && !imag_is_nan) {
|
||||
return a > b ? a : b;
|
||||
} else if (real_is_nan && !imag_is_nan) {
|
||||
return complex64_t(
|
||||
static_cast<float>(NAN), a.imag > b.imag ? a.imag : b.imag);
|
||||
} else if (!real_is_nan && imag_is_nan) {
|
||||
return complex64_t(
|
||||
a.real > b.real ? a.real : b.real, static_cast<float>(NAN));
|
||||
} else {
|
||||
return complex64_t(static_cast<float>(NAN), static_cast<float>(NAN));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
@ -3,6 +3,8 @@ cuda_skip = {
|
||||
"TestLayers.test_quantized_embedding",
|
||||
"TestOps.test_dynamic_slicing",
|
||||
"TestReduce.test_dtypes",
|
||||
"TestReduce.test_nanpropagation",
|
||||
"TestReduce.test_nanpropagation_complex64",
|
||||
# Block masked matmul NYI
|
||||
"TestBlas.test_block_masked_matmul",
|
||||
# Gather matmul NYI
|
||||
|
@ -153,6 +153,63 @@ class TestReduce(mlx_tests.MLXTestCase):
|
||||
x = x.transpose(1, 0, 2, 3, 4, 5, 6, 7, 8, 9)
|
||||
check(x, (1, 3, 5, 7, 9))
|
||||
|
||||
def test_nanpropagation(self):
|
||||
dtypes = [
|
||||
"uint8",
|
||||
"uint16",
|
||||
"uint32",
|
||||
"int8",
|
||||
"int16",
|
||||
"int32",
|
||||
"float16",
|
||||
"float32",
|
||||
]
|
||||
|
||||
for dtype in dtypes:
|
||||
with self.subTest(dtype=dtype):
|
||||
x = (mx.random.normal((4, 4)) * 10).astype(getattr(mx, dtype))
|
||||
indices = mx.random.randint(0, 4, shape=(6,)).reshape(3, 2)
|
||||
for idx in indices:
|
||||
x[idx[0], idx[1]] = mx.nan
|
||||
x_np = np.array(x)
|
||||
|
||||
for op in ["max"]:
|
||||
for axis in [0, 1]:
|
||||
out = getattr(mx, op)(x, axis=axis)
|
||||
ref = getattr(np, op)(x_np, axis=axis)
|
||||
self.assertTrue(np.array_equal(out, ref, equal_nan=True))
|
||||
|
||||
def test_nanpropagation_complex64(self):
|
||||
complex_array_1 = mx.array(
|
||||
[1 + 1j, 2 + 2j, 3 + 3j, mx.nan + 4j], dtype=mx.complex64
|
||||
).reshape(2, 2)
|
||||
complex_array_2 = mx.array(
|
||||
[1 + 1j, 2 + 2j, 3 + mx.nan * 1j, 4 + 4j], dtype=mx.complex64
|
||||
).reshape(2, 2)
|
||||
complex_array_3 = mx.array(
|
||||
[1 + 1j, 2 + mx.nan * 1j, 3 + 3j, 4 + 4j], dtype=mx.complex64
|
||||
).reshape(2, 2)
|
||||
complex_array_4 = mx.array(
|
||||
[mx.nan + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=mx.complex64
|
||||
).reshape(2, 2)
|
||||
|
||||
np_arrays = [
|
||||
np.array(complex_array_1),
|
||||
np.array(complex_array_2),
|
||||
np.array(complex_array_3),
|
||||
np.array(complex_array_4),
|
||||
]
|
||||
|
||||
for mx_arr, np_arr in zip(
|
||||
[complex_array_1, complex_array_2, complex_array_3, complex_array_4],
|
||||
np_arrays,
|
||||
):
|
||||
for axis in [0, 1]:
|
||||
for op in ["max"]:
|
||||
out = getattr(mx, op)(mx_arr, axis=axis)
|
||||
ref = getattr(np, op)(np_arr, axis=axis)
|
||||
self.assertTrue(np.array_equal(out, ref, equal_nan=True))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
mlx_tests.MLXTestRunner(failfast=True)
|
||||
|
@ -1024,6 +1024,10 @@ TEST_CASE("test reduction ops") {
|
||||
x = array({true, true, true, false, true, false}, {2, 3});
|
||||
CHECK(array_equal(min(x, 1), array({true, false})).item<bool>());
|
||||
CHECK(array_equal(min(x, 0), array({false, true, false})).item<bool>());
|
||||
|
||||
x = array({1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f}, {2, 3});
|
||||
CHECK(array_equal(max(x, 0), array({4.0f, NAN, 6.0f}), true).item<bool>());
|
||||
CHECK(array_equal(max(x, 1), array({NAN, 6.0f}), true).item<bool>());
|
||||
}
|
||||
|
||||
// Test logsumexp
|
||||
|
Loading…
Reference in New Issue
Block a user