Precise sigmoid (#2659 )

* bump patch * Sigmoid matches PyTorch and is more precise on tails
enable admm low-precision cpu (#2661 )
2025-12-16 01:49:05 +08:00 · 2025-10-10 10:05:23 -07:00 · 2025-10-10 09:50:54 -07:00 · 2025-10-10 08:36:41 -07:00
7 changed files with 22 additions and 10 deletions
--- a/mlx/backend/cpu/matmul.cpp
+++ b/mlx/backend/cpu/matmul.cpp
@@ -131,10 +131,6 @@ void Matmul::eval_cpu(const std::vector<array>& inputs, array& out) {
 }

 void AddMM::eval_cpu(const std::vector<array>& inputs, array& out) {
-  if (out.dtype() != float32) {
-    throw std::runtime_error(
-        "[AddMM::eval_cpu] Currently only supports float32.");
-  }
  if (out.size() == 0) {
    out.set_data(allocator::malloc(out.nbytes()));
    return;
--- a/mlx/backend/cpu/unary_ops.h
+++ b/mlx/backend/cpu/unary_ops.h
@@ -77,7 +77,8 @@ struct Real {
 struct Sigmoid {
  template <int N, typename T>
  Simd<T, N> operator()(Simd<T, N> x) {
-    return 1.0f / (1.0f + simd::exp(-x));
+    auto y = 1.0f / (1.0f + simd::exp(simd::abs(x)));
+    return simd::select(x < Simd<T, N>{0}, y, Simd<T, N>{1} - y);
  }
  SINGLE()
 };
--- a/mlx/backend/cuda/device/unary_ops.cuh
+++ b/mlx/backend/cuda/device/unary_ops.cuh
@@ -257,8 +257,8 @@ struct Round {
 struct Sigmoid {
  template <typename T>
  __device__ T operator()(T x) {
-    T y = 1 / (1 + exp(-abs(x)));
-    return (x < 0) ? 1 - y : y;
+    T y = 1 / (1 + exp(abs(x)));
+    return (x < 0) ? y : 1 - y;
  }
 };

--- a/mlx/backend/metal/kernels/unary_ops.h
+++ b/mlx/backend/metal/kernels/unary_ops.h
@@ -309,8 +309,8 @@ struct Round {
 struct Sigmoid {
  template <typename T>
  T operator()(T x) {
-    auto y = 1 / (1 + metal::exp(-metal::abs(x)));
-    return (x < 0) ? 1 - y : y;
+    auto y = 1 / (1 + metal::exp(metal::abs(x)));
+    return (x < 0) ? y : 1 - y;
  }
 };

--- a/mlx/version.h
+++ b/mlx/version.h
@@ -4,7 +4,7 @@

 #define MLX_VERSION_MAJOR 0
 #define MLX_VERSION_MINOR 29
-#define MLX_VERSION_PATCH 2
+#define MLX_VERSION_PATCH 3
 #define MLX_VERSION_NUMERIC \
  (100000 * MLX_VERSION_MAJOR + 1000 * MLX_VERSION_MINOR + MLX_VERSION_PATCH)

--- a/python/tests/test_blas.py
+++ b/python/tests/test_blas.py
@@ -712,6 +712,15 @@ class TestBlas(mlx_tests.MLXTestCase):
            expected = beta * c + alpha * (a @ b)
            self.assertTrue(mx.allclose(expected, out))

+        # Test half precision
+        for t, tol in [(mx.float16, 1e-3), (mx.bfloat16, 1e-2)]:
+            c = mx.ones((32, 32)).astype(t)
+            a = mx.random.uniform(shape=(32, 32)).astype(t)
+            b = mx.random.uniform(shape=(32, 32)).astype(t)
+            out = mx.addmm(c, a, b)
+            expected = a @ b + c
+            self.assertTrue(mx.allclose(out, expected, rtol=tol, atol=tol))
+
    def test_addmm_grad(self):
        def make_ref_addmm(alpha, beta):
            return lambda c, a, b: alpha * (a @ b) + beta * c
--- a/python/tests/test_ops.py
+++ b/python/tests/test_ops.py
@@ -1041,6 +1041,12 @@ class TestOps(mlx_tests.MLXTestCase):
        expected = 1 / (1 + np.exp(-a, dtype=np.float32))
        self.assertTrue(np.allclose(result, expected))

+        # Low precision
+        a = mx.array(-8.0).astype(mx.float16)
+        self.assertNotEqual(mx.sigmoid(a).item(), 0.0)
+        a = mx.array(8.0).astype(mx.float16)
+        self.assertNotEqual(mx.sigmoid(a).item(), 1.0)
+
    def test_allclose(self):
        a = mx.array(1.0)
        b = mx.array(1.0)
Author	SHA1	Message	Date
Awni Hannun	630350ad3e	Precise sigmoid (#2659 ) * bump patch * Sigmoid matches PyTorch and is more precise on tails	2025-10-10 10:05:23 -07:00
Awni Hannun	380aeb58ae	enable admm low-precision cpu (#2661 )	2025-10-10 09:50:54 -07:00
Awni Hannun	f37389d100	bump patch (#2658 )	2025-10-10 08:36:41 -07:00