Compare commits

...

10 Commits

Author SHA1 Message Date
Joona Havukainen
9a742090ae Remove tuple unpacking syntax to comply with earlier python versions. Add cuda skip to nanpropagation tests, fix cuda implementation in a separate PR. 2025-07-08 20:56:06 -07:00
Joona Havukainen
aca7fac9ef Make the max nanpropagation test more meaningful for integer types 2025-07-08 16:42:19 -07:00
Joona Havukainen
8b15773206 Add cpu Max nanpropagation. Fix a small fib in cpu max dispatch data types for int8/int16. 2025-07-08 16:41:56 -07:00
Joona Havukainen
3e885f583a Cleanup using namespace alias 2025-07-07 18:25:57 -07:00
Joona Havukainen
c7af3016eb Only check nans on non-integral types in simd_reduce_impl. 2025-07-07 18:24:30 -07:00
Joona Havukainen
9794ec6b8e Improve the cpp unittest 2025-07-06 16:04:50 -07:00
Joona Havukainen
e0bb9f3ef8 Fix max complex64 nan propagation and add test 2025-07-06 15:53:50 -07:00
Joona Havukainen
5b089dc5da Pre-commit formatting 2025-07-06 15:50:16 -07:00
Joona Havukainen
af74818528 Adding benchmarks and testing for max op nanpropagation 2025-07-06 15:50:16 -07:00
Joona Havukainen
0d30e9e8ec Make max op NaN propagation rules align with numpy 2025-07-06 15:50:16 -07:00
7 changed files with 131 additions and 5 deletions

View File

@@ -192,6 +192,17 @@ void time_reductions() {
auto argmin_along_1 = [&a]() { return mx::argmin(a, 1, false); };
TIME(argmin_along_1);
auto indices = mx::array({1});
auto updates = mx::reshape(mx::array({NAN}), {1, 1, 1});
std::vector<int> axes{0};
auto b = scatter(a, {indices}, updates, axes);
mx::eval(b);
auto max_along_0 = [&b]() { return mx::max(b, 0, false); };
TIME(max_along_0);
auto max_along_1 = [&b]() { return mx::max(b, 1, false); };
TIME(max_along_1);
}
void time_gather_scatter() {

View File

@@ -51,6 +51,13 @@ def time_maximum():
time_fn(mx.maximum, a, b)
def time_max():
a = mx.random.uniform(shape=(32, 1024, 1024))
a[1, 1] = mx.nan
mx.eval(a)
time_fn(mx.max, a, 0)
def time_negative():
a = mx.random.uniform(shape=(10000, 1000))
mx.eval(a)
@@ -108,6 +115,7 @@ if __name__ == "__main__":
time_add()
time_matmul()
time_max()
time_maximum()
time_exp()
time_negative()

View File

@@ -325,7 +325,15 @@ struct MaxReduce {
};
template <int N, typename T>
T operator()(simd::Simd<T, N> x) {
std::enable_if_t<std::is_integral_v<T>, T> operator()(simd::Simd<T, N> x) {
return simd::max(x);
};
template <int N, typename T>
std::enable_if_t<!std::is_integral_v<T>, T> operator()(simd::Simd<T, N> x) {
if (simd::any(x != x)) {
return static_cast<T>(NAN);
}
return simd::max(x);
};
};
@@ -527,10 +535,10 @@ void Reduce::eval_cpu(const std::vector<array>& inputs, array& out) {
reduce_dispatch_min_max<uint64_t>(in, out, reduce_type_, axes_);
break;
case int8:
reduce_dispatch_min_max<uint8_t>(in, out, reduce_type_, axes_);
reduce_dispatch_min_max<int8_t>(in, out, reduce_type_, axes_);
break;
case int16:
reduce_dispatch_min_max<uint16_t>(in, out, reduce_type_, axes_);
reduce_dispatch_min_max<int16_t>(in, out, reduce_type_, axes_);
break;
case int32:
reduce_dispatch_min_max<int32_t>(in, out, reduce_type_, axes_);

View File

@@ -186,7 +186,15 @@ struct Max {
DEFINE_SIMD_REDUCE()
template <typename T>
T simd_reduce_impl(T val) {
metal::enable_if_t<metal::is_integral_v<T>, T> simd_reduce_impl(T val) {
return simd_max(val);
}
template <typename T>
metal::enable_if_t<!metal::is_integral_v<T>, T> simd_reduce_impl(T val) {
if (simd_any(val != val)) {
return static_cast<T>(NAN);
}
return simd_max(val);
}
@@ -198,7 +206,35 @@ struct Max {
}
// Operator
U operator()(U a, U b) {
template <typename T>
metal::enable_if_t<metal::is_integral_v<T>, T> operator()(T a, T b) {
return a > b ? a : b;
}
template <typename T>
metal::enable_if_t<!metal::is_integral_v<T>, T> operator()(T a, T b) {
if (metal::isnan(a) || metal::isnan(b)) {
return static_cast<T>(NAN);
} else {
return a > b ? a : b;
}
}
template <>
complex64_t operator()(complex64_t a, complex64_t b) {
bool real_is_nan = metal::isnan(a.real) || metal::isnan(b.real);
bool imag_is_nan = metal::isnan(a.imag) || metal::isnan(b.imag);
if (!real_is_nan && !imag_is_nan) {
return a > b ? a : b;
} else if (real_is_nan && !imag_is_nan) {
return complex64_t(
static_cast<float>(NAN), a.imag > b.imag ? a.imag : b.imag);
} else if (!real_is_nan && imag_is_nan) {
return complex64_t(
a.real > b.real ? a.real : b.real, static_cast<float>(NAN));
} else {
return complex64_t(static_cast<float>(NAN), static_cast<float>(NAN));
}
}
};

View File

@@ -3,6 +3,8 @@ cuda_skip = {
"TestLayers.test_quantized_embedding",
"TestOps.test_dynamic_slicing",
"TestReduce.test_dtypes",
"TestReduce.test_nanpropagation",
"TestReduce.test_nanpropagation_complex64",
# Block masked matmul NYI
"TestBlas.test_block_masked_matmul",
# Gather matmul NYI

View File

@@ -153,6 +153,63 @@ class TestReduce(mlx_tests.MLXTestCase):
x = x.transpose(1, 0, 2, 3, 4, 5, 6, 7, 8, 9)
check(x, (1, 3, 5, 7, 9))
def test_nanpropagation(self):
dtypes = [
"uint8",
"uint16",
"uint32",
"int8",
"int16",
"int32",
"float16",
"float32",
]
for dtype in dtypes:
with self.subTest(dtype=dtype):
x = (mx.random.normal((4, 4)) * 10).astype(getattr(mx, dtype))
indices = mx.random.randint(0, 4, shape=(6,)).reshape(3, 2)
for idx in indices:
x[idx[0], idx[1]] = mx.nan
x_np = np.array(x)
for op in ["max"]:
for axis in [0, 1]:
out = getattr(mx, op)(x, axis=axis)
ref = getattr(np, op)(x_np, axis=axis)
self.assertTrue(np.array_equal(out, ref, equal_nan=True))
def test_nanpropagation_complex64(self):
complex_array_1 = mx.array(
[1 + 1j, 2 + 2j, 3 + 3j, mx.nan + 4j], dtype=mx.complex64
).reshape(2, 2)
complex_array_2 = mx.array(
[1 + 1j, 2 + 2j, 3 + mx.nan * 1j, 4 + 4j], dtype=mx.complex64
).reshape(2, 2)
complex_array_3 = mx.array(
[1 + 1j, 2 + mx.nan * 1j, 3 + 3j, 4 + 4j], dtype=mx.complex64
).reshape(2, 2)
complex_array_4 = mx.array(
[mx.nan + 1j, 2 + 2j, 3 + 3j, 4 + 4j], dtype=mx.complex64
).reshape(2, 2)
np_arrays = [
np.array(complex_array_1),
np.array(complex_array_2),
np.array(complex_array_3),
np.array(complex_array_4),
]
for mx_arr, np_arr in zip(
[complex_array_1, complex_array_2, complex_array_3, complex_array_4],
np_arrays,
):
for axis in [0, 1]:
for op in ["max"]:
out = getattr(mx, op)(mx_arr, axis=axis)
ref = getattr(np, op)(np_arr, axis=axis)
self.assertTrue(np.array_equal(out, ref, equal_nan=True))
if __name__ == "__main__":
mlx_tests.MLXTestRunner(failfast=True)

View File

@@ -1024,6 +1024,10 @@ TEST_CASE("test reduction ops") {
x = array({true, true, true, false, true, false}, {2, 3});
CHECK(array_equal(min(x, 1), array({true, false})).item<bool>());
CHECK(array_equal(min(x, 0), array({false, true, false})).item<bool>());
x = array({1.0f, NAN, 3.0f, 4.0f, 5.0f, 6.0f}, {2, 3});
CHECK(array_equal(max(x, 0), array({4.0f, NAN, 6.0f}), true).item<bool>());
CHECK(array_equal(max(x, 1), array({NAN, 6.0f}), true).item<bool>());
}
// Test logsumexp