From 60cd4a5a6ff42f72c182f513c622a1c862d9d353 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Mon, 16 Jun 2025 22:33:24 +0200 Subject: [PATCH 1/6] initial commit --- python/mlx/nn/layers/activations.py | 33 +++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/python/mlx/nn/layers/activations.py b/python/mlx/nn/layers/activations.py index 8eafd75d3..5bff0ad14 100644 --- a/python/mlx/nn/layers/activations.py +++ b/python/mlx/nn/layers/activations.py @@ -132,6 +132,20 @@ def silu(x): return x * mx.sigmoid(x) +@partial(mx.compile, shapeless=True) +def clipped_silu(x, a_min=-100, a_max=100): + r"""Applies the Clipped Sigmoid Linear Unit. + + Applies :math:`\text{clip}(x \sigma(x), a\_min, a\_max)` element wise, where + :math:`\sigma(\cdot)` is the logistic sigmoid. + + Args: + a_min: minimum value for clipping. Default: ``-100`` + a_max: maximum value for clipping. Default: ``100`` + """ + return mx.clip(x * mx.sigmoid(x), a_min=a_min, a_max=a_max) + + @partial(mx.compile, shapeless=True) def log_sigmoid(x): r"""Applies the Log Sigmoid function. @@ -488,6 +502,25 @@ class SiLU(Module): """ +class ClippedSiLU(Module): + r"""Applies the Clipped Sigmoid Linear Unit. + + See :func:`clipped_silu` for the functional equivalent. + + Args: + a_min: minimum value for clipping. Default: ``-100`` + a_max: maximum value for clipping. Default: ``100`` + """ + + def __init__(self, a_min=-100, a_max=100): + super().__init__() + self.a_min = a_min + self.a_max = a_max + + def __call__(self, x): + return clipped_silu(x, self.a_min, self.a_max) + + @_make_activation_module(log_softmax) class LogSoftmax(Module): r"""Applies the Log Softmax function. From a426880bafb50b8ee921486eca7c6ff7dd6d89f7 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Mon, 16 Jun 2025 22:36:04 +0200 Subject: [PATCH 2/6] format --- python/mlx/nn/layers/activations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/mlx/nn/layers/activations.py b/python/mlx/nn/layers/activations.py index 5bff0ad14..2076b192a 100644 --- a/python/mlx/nn/layers/activations.py +++ b/python/mlx/nn/layers/activations.py @@ -136,9 +136,9 @@ def silu(x): def clipped_silu(x, a_min=-100, a_max=100): r"""Applies the Clipped Sigmoid Linear Unit. - Applies :math:`\text{clip}(x \sigma(x), a\_min, a\_max)` element wise, where + Applies :math:`\text{clip}(x \sigma(x), a\_min, a\_max)` element wise, where :math:`\sigma(\cdot)` is the logistic sigmoid. - + Args: a_min: minimum value for clipping. Default: ``-100`` a_max: maximum value for clipping. Default: ``100`` @@ -506,7 +506,7 @@ class ClippedSiLU(Module): r"""Applies the Clipped Sigmoid Linear Unit. See :func:`clipped_silu` for the functional equivalent. - + Args: a_min: minimum value for clipping. Default: ``-100`` a_max: maximum value for clipping. Default: ``100`` From 9cb6df5960834afb36901284cbb2ec3432981021 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Mon, 16 Jun 2025 22:36:35 +0200 Subject: [PATCH 3/6] adding to __init__.py --- python/mlx/nn/layers/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/mlx/nn/layers/__init__.py b/python/mlx/nn/layers/__init__.py index 26f77917f..205a755fa 100644 --- a/python/mlx/nn/layers/__init__.py +++ b/python/mlx/nn/layers/__init__.py @@ -18,6 +18,7 @@ from mlx.nn.layers.activations import ( ReLU6, Sigmoid, SiLU, + ClippedSiLU, Softmax, Softmin, Softplus, @@ -44,6 +45,7 @@ from mlx.nn.layers.activations import ( selu, sigmoid, silu, + clipped_silu, softmax, softmin, softplus, From 3713832e5e002d310421bd67aa56e69f35b713b9 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Mon, 16 Jun 2025 22:44:13 +0200 Subject: [PATCH 4/6] adding test for silu and clipped silu --- python/tests/test_nn.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/python/tests/test_nn.py b/python/tests/test_nn.py index 10bbe821e..2934dbd9d 100644 --- a/python/tests/test_nn.py +++ b/python/tests/test_nn.py @@ -955,6 +955,43 @@ class TestLayers(mlx_tests.MLXTestCase): self.assertTrue(mx.all(mx.abs(y - expected_y) < epsilon)) self.assertEqual(y.shape, (3,)) self.assertEqual(y.dtype, mx.float32) + + def test_silu(self): + x = mx.array([1.0, -1.0, 0.0]) + y = nn.silu(x) + epsilon = 1e-4 + expected_y = mx.array([0.7311, -0.2689, 0.0]) + self.assertTrue(mx.all(mx.abs(y - expected_y) < epsilon)) + self.assertEqual(y.shape, (3,)) + self.assertEqual(y.dtype, mx.float32) + + y = nn.SiLU()(x) + self.assertTrue(mx.all(mx.abs(y - expected_y) < epsilon)) + self.assertEqual(y.shape, (3,)) + self.assertEqual(y.dtype, mx.float32) + + def test_clipped_silu(self): + x = mx.array([1.0, -1.0, 0.0]) + y = nn.clipped_silu(x, a_min=-100, a_max=100) + epsilon = 1e-4 + expected_y = mx.array([0.7311, -0.2689, 0.0]) + self.assertTrue(mx.all(mx.abs(y - expected_y) < epsilon)) + self.assertEqual(y.shape, (3,)) + self.assertEqual(y.dtype, mx.float32) + + y = nn.ClippedSiLU(a_min=-100, a_max=100)(x) + self.assertTrue(mx.all(mx.abs(y - expected_y) < epsilon)) + self.assertEqual(y.shape, (3,)) + self.assertEqual(y.dtype, mx.float32) + + x_extreme = mx.array([200.0, -200.0]) + y_clipped = nn.clipped_silu(x_extreme, a_min=-50, a_max=50) + expected_clipped = mx.array([50.0, -50.0]) + self.assertTrue(mx.all(mx.abs(y_clipped - expected_clipped) < epsilon)) + + y_custom = nn.ClippedSiLU(a_min=-10, a_max=10)(x_extreme) + expected_custom = mx.array([10.0, -10.0]) + self.assertTrue(mx.all(mx.abs(y_custom - expected_custom) < epsilon)) def test_log_softmax(self): x = mx.array([1.0, 2.0, 3.0]) From a315af8981600600b7c52bc0ed433b5e62a517c8 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Mon, 16 Jun 2025 22:45:09 +0200 Subject: [PATCH 5/6] format --- python/mlx/nn/layers/__init__.py | 4 ++-- python/tests/test_nn.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/mlx/nn/layers/__init__.py b/python/mlx/nn/layers/__init__.py index 205a755fa..929d3b032 100644 --- a/python/mlx/nn/layers/__init__.py +++ b/python/mlx/nn/layers/__init__.py @@ -6,6 +6,7 @@ from mlx.nn.layers.activations import ( GELU, GLU, SELU, + ClippedSiLU, HardShrink, Hardswish, HardTanh, @@ -18,7 +19,6 @@ from mlx.nn.layers.activations import ( ReLU6, Sigmoid, SiLU, - ClippedSiLU, Softmax, Softmin, Softplus, @@ -27,6 +27,7 @@ from mlx.nn.layers.activations import ( Step, Tanh, celu, + clipped_silu, elu, gelu, gelu_approx, @@ -45,7 +46,6 @@ from mlx.nn.layers.activations import ( selu, sigmoid, silu, - clipped_silu, softmax, softmin, softplus, diff --git a/python/tests/test_nn.py b/python/tests/test_nn.py index 2934dbd9d..4bec32d2c 100644 --- a/python/tests/test_nn.py +++ b/python/tests/test_nn.py @@ -955,7 +955,7 @@ class TestLayers(mlx_tests.MLXTestCase): self.assertTrue(mx.all(mx.abs(y - expected_y) < epsilon)) self.assertEqual(y.shape, (3,)) self.assertEqual(y.dtype, mx.float32) - + def test_silu(self): x = mx.array([1.0, -1.0, 0.0]) y = nn.silu(x) @@ -983,12 +983,12 @@ class TestLayers(mlx_tests.MLXTestCase): self.assertTrue(mx.all(mx.abs(y - expected_y) < epsilon)) self.assertEqual(y.shape, (3,)) self.assertEqual(y.dtype, mx.float32) - + x_extreme = mx.array([200.0, -200.0]) y_clipped = nn.clipped_silu(x_extreme, a_min=-50, a_max=50) expected_clipped = mx.array([50.0, -50.0]) self.assertTrue(mx.all(mx.abs(y_clipped - expected_clipped) < epsilon)) - + y_custom = nn.ClippedSiLU(a_min=-10, a_max=10)(x_extreme) expected_custom = mx.array([10.0, -10.0]) self.assertTrue(mx.all(mx.abs(y_custom - expected_custom) < epsilon)) From 74d6ebd4bdf9bd410a189e2629a6c49accd84bc6 Mon Sep 17 00:00:00 2001 From: Goekdeniz-Guelmez Date: Mon, 16 Jun 2025 22:49:53 +0200 Subject: [PATCH 6/6] update ackn. --- ACKNOWLEDGMENTS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ACKNOWLEDGMENTS.md b/ACKNOWLEDGMENTS.md index 4b0cea123..fe7b86939 100644 --- a/ACKNOWLEDGMENTS.md +++ b/ACKNOWLEDGMENTS.md @@ -19,6 +19,7 @@ MLX was developed with contributions from the following individuals: - Gleb Pobudzey: Added the `where` primitive, and groups in 1D and 2D convolutions. - Paul Paczuski: Improved stability of BCE loss calculation - Max-Heinrich Laves: Added `conv_transpose1d`, `conv_transpose2d`, and `conv_transpose3d` ops. +- Gökkdeniz Gülmez: Added `clipped SiLU`.