Add Step, ELU, SELU, Swish activation functions (#117)

* Add Step, ELU, SELU, Swish activation functions This commit adds the Step, ELU, SELU and Swish activations functions * add to the docs * review
2025-10-28 22:28:11 +08:00 · 2023-12-12 02:04:07 +01:00
parent b9226c367c
commit f5df47ec6e
7 changed files with 132 additions and 2 deletions
--- a/benchmarks/python/comparative/bench_mlx.py
+++ b/benchmarks/python/comparative/bench_mlx.py
@@ -223,6 +223,20 @@ def topk(axis, x):
    mx.eval(ys)


+def step_function(x):
+    y = x
+    for i in range(100):
+        y = nn.step(x)
+    mx.eval(y)
+
+
+def selu(x):
+    y = x
+    for i in range(100):
+        y = nn.selu(x)
+    mx.eval(y)
+
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("benchmark", help="Choose the benchmark to run")
@@ -372,5 +386,11 @@ if __name__ == "__main__":
    elif args.benchmark == "topk":
        print(bench(topk, axis, x))

+    elif args.benchmark == "step":
+        print(bench(step_function, x))
+
+    elif args.benchmark == "selu":
+        print(bench(selu, x))
+
    else:
        raise ValueError("Unknown benchmark")
--- a/benchmarks/python/comparative/bench_torch.py
+++ b/benchmarks/python/comparative/bench_torch.py
@@ -257,6 +257,14 @@ def topk(axis, x):
    sync_if_needed(x)


+@torch.no_grad()
+def selu(x):
+    y = x
+    for i in range(100):
+        y = torch.nn.functional.selu(y)
+    sync_if_needed(x)
+
+
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("benchmark", help="Choose the benchmark to run")
--- a/benchmarks/python/comparative/compare.py
+++ b/benchmarks/python/comparative/compare.py
@@ -205,6 +205,10 @@ if __name__ == "__main__":
    compare_filtered("celu --size 32x16x1024 --cpu")
    compare_filtered("log_sigmoid --size 32x16x1024")
    compare_filtered("log_sigmoid --size 32x16x1024 --cpu")
+    compare_filtered("step --size 32x16x1024")
+    compare_filtered("step --size 32x16x1024 --cpu")
+    compare_filtered("selu --size 32x16x1024")
+    compare_filtered("selu --size 32x16x1024 --cpu")
    compare_filtered("scalar_mul --size 32x16x1024")
    compare_filtered("scalar_mul --size 32x16x1024 --cpu")
    compare_filtered("cross_entropy --size 256x1024")
--- a/docs/src/python/nn.rst
+++ b/docs/src/python/nn.rst
@@ -97,7 +97,7 @@ Updating the parameters

 MLX modules allow accessing and updating individual parameters. However, most
 times we need to update large subsets of a module's parameters. This action is
-performed by :meth:`Module.update`. 
+performed by :meth:`Module.update`.

 Value and grad
 --------------
@@ -148,6 +148,8 @@ Neural Network Layers
   ReLU
   GELU
   SiLU
+   Step
+   SELU
   Linear
   Conv1d
   Conv2d
@@ -170,6 +172,8 @@ simple functions.
   gelu_fast_approx
   relu
   silu
+   step
+   selu

 Loss Functions
 --------------
--- a/python/mlx/nn/layers/init.py
+++ b/python/mlx/nn/layers/init.py
@@ -4,12 +4,14 @@ from mlx.nn.layers.activations import (
    CELU,
    ELU,
    GELU,
+    SELU,
    LeakyReLU,
    LogSigmoid,
    ReLU,
    ReLU6,
    SiLU,
    Softplus,
+    Step,
    celu,
    elu,
    gelu,
@@ -19,8 +21,10 @@ from mlx.nn.layers.activations import (
    log_sigmoid,
    relu,
    relu6,
+    selu,
    silu,
    softplus,
+    step,
 )
 from mlx.nn.layers.base import Module
 from mlx.nn.layers.containers import Sequential
--- a/python/mlx/nn/layers/activations.py
+++ b/python/mlx/nn/layers/activations.py
@@ -74,7 +74,7 @@ def celu(x, alpha=1.0):


 def silu(x):
-    r"""Applies the Sigmoid Linear Unit.
+    r"""Applies the Sigmoid Linear Unit. Also known as Swish.

    Applies :math:`x \sigma(x)` element wise, where :math:`\sigma(\cdot)` is
    the logistic sigmoid.
@@ -143,6 +143,41 @@ class Sigmoid(Module):
    pass


+def step(x: mx.array, threshold: float = 0.0):
+    r"""Applies the Step Activation Function.
+
+    This function implements a binary step activation, where the output is set
+    to 1 if the input is greater than a specified threshold, and 0 otherwise.
+
+    .. math::
+        \text{step}(x) = \begin{cases}
+        0 & \text{if } x < \text{threshold} \\
+        1 & \text{if } x \geq \text{threshold}
+        \end{cases}
+
+    Args:
+        threshold: The value to threshold at.
+    """
+
+    return mx.where(x > threshold, 1, 0)
+
+
+def selu(x):
+    r"""Applies the Scaled Exponential Linear Unit.
+
+    .. math::
+        \text{selu}(x) = \begin{cases}
+        \lambda x & \text{if } x > 0 \\
+        \lambda \alpha (\exp(x) - 1) & \text{if } x \leq 0
+        \end{cases}
+
+    where :math:`\lambda = 1.0507` and :math:`\alpha = 1.67326`.
+
+    See also :func:`elu`.
+    """
+    return elu(x, 1.67326) * 1.0507
+
+
@_make_activation_module(relu)
 class ReLU(Module):
    pass
@@ -274,3 +309,32 @@ def tanh(x):
@_make_activation_module(tanh)
 class Tanh(Module):
    pass
+
+
+class Step(Module):
+    r"""Applies the Step Activation Function.
+
+    This function implements a binary step activation, where the output is set
+    to 1 if the input is greater than a specified threshold, and 0 otherwise.
+
+    .. math::
+        \text{step}(x) = \begin{cases}
+        0 & \text{if } x < \text{threshold} \\
+        1 & \text{if } x \geq \text{threshold}
+        \end{cases}
+
+    Args:
+        threshold: The value to threshold at.
+    """
+
+    def __init__(self, threshold: float = 0.0):
+        super().__init__()
+        self.threshold = threshold
+
+    def __call__(self, x: mx.array):
+        return step(x, self.threshold)
+
+
+@_make_activation_module(selu)
+class SELU(Module):
+    pass
--- a/python/tests/test_nn.py
+++ b/python/tests/test_nn.py
@@ -449,6 +449,32 @@ class TestNN(mlx_tests.MLXTestCase):
        self.assertEqual(y.shape, [3])
        self.assertEqual(y.dtype, mx.float32)

+    def test_step_activation(self):
+        x = mx.arange(-3, 4)
+        expected = mx.array([0, 0, 0, 0, 0, 1, 1])
+        y = nn.Step()(x)
+        self.assertTrue(mx.array_equal(y, expected))
+
+        y = nn.Step(2)(x)
+        expected = mx.array([0, 0, 0, 0, 0, 0, 1])
+        self.assertTrue(mx.array_equal(y, expected))
+
+    def test_selu(self):
+        x = mx.arange(-3, 4)
+        expected = mx.array(
+            [
+                -1.670563817024231,
+                -1.5201621055603027,
+                -1.1113275289535522,
+                0.0,
+                1.0506999492645264,
+                2.1013998985290527,
+                3.152099847793579,
+            ]
+        )
+        y = nn.SELU()(x)
+        self.assertTrue(mx.allclose(y, expected))
+

 if __name__ == "__main__":
    unittest.main()