Add cyclic_lr scheduler

2025-12-16 01:49:05 +08:00 · 2025-08-11 20:40:33 -04:00
parent 7fde1b6a1e
commit 1d9ce9d744
3 changed files with 82 additions and 3 deletions
--- a/docs/src/python/optimizers/schedulers.rst
+++ b/docs/src/python/optimizers/schedulers.rst
@@ -9,6 +9,7 @@ Schedulers
   :toctree: _autosummary
   cosine_decay
   cyclic_lr
   exponential_decay
   join_schedules
   linear_schedule
--- a/python/mlx/optimizers/schedulers.py
+++ b/python/mlx/optimizers/schedulers.py
@@ -1,7 +1,7 @@
 # Copyright © 2023-2024 Apple Inc.
 import math
-from typing import Callable, List
+from typing import Callable, List, Optional
 import mlx.core as mx
@@ -156,3 +156,64 @@ def linear_schedule(init: float, end: float, steps: int) -> Callable:
        return step * ((end - init) / steps) + init
    return schedule
 def cyclic_lr(
    base_lr: float,
    max_lr: float,
    step_size_up: int = 2000,
    step_size_down: Optional[int] = None,
    mode: str = "triangular",
    gamma: float = 1.0,
 ) -> Callable:
    r"""Make a cyclic learning rate scheduler.
    Args:
        base_lr (float): Lower learning rate boundary.
        max_lr (float): Upper learning rate boundary.
        step_size_up (int): Number of steps in the increasing half of a cycle. Default: ``2000``.
        step_size_down (int, optional): Number of steps in the decreasing half.
            If ``None``, equals ``step_size_up``. Default: ``None``.
        mode (str): One of ``"triangular"``, ``"triangular2"``, ``"exp_range"``. Default: ``"triangular"``.
        gamma (float): Scaling factor for ``"exp_range"`` mode. Default: ``1.0``.
    Example:
        >>> lr_schedule = optim.cyclic_lr(0.001, 0.1, step_size_up=10)
        >>> optimizer = optim.SGD(learning_rate=lr_schedule)
        >>> optimizer.learning_rate
        array(0.001, dtype=float32)
        >>>
        >>> for _ in range(5): optimizer.update({}, {})
        ...
        >>> optimizer.learning_rate
        array(0.0505, dtype=float32)
    """
    step_size_down = step_size_down if step_size_down is not None else step_size_up
    total_size = step_size_up + step_size_down
    step_ratio = step_size_up / total_size
    def schedule(step):
        if isinstance(step, mx.array):
            step_val = step.item() if step.size == 1 else step
        else:
            step_val = step
        cycle = math.floor(1 + step_val / total_size)
        x = 1.0 + step_val / total_size - cycle
        if x <= step_ratio:
            scale_factor = x / step_ratio
        else:
            scale_factor = (x - 1) / (step_ratio - 1)
        if mode == "triangular":
            scale_fn_val = 1.0
        elif mode == "triangular2":
            scale_fn_val = 1 / (2.0 ** (cycle - 1))
        else:  # exp_range
            scale_fn_val = gamma ** (cycle - 1)
        base_height = (max_lr - base_lr) * scale_factor
        return base_lr + base_height * scale_fn_val
    return schedule
--- a/python/tests/test_optimizers.py
+++ b/python/tests/test_optimizers.py
@@ -446,6 +446,23 @@ class TestSchedulers(mlx_tests.MLXTestCase):
        lr = lr_schedule(20)
        self.assertEqual(lr, expected_end_lr)
    def test_cyclic_lr(self):
        lr_schedule = opt.cyclic_lr(0.001, 0.1, step_size_up=10)
        lr = lr_schedule(0)
        self.assertAlmostEqual(lr, 0.001, delta=1e-7)
        lr = lr_schedule(10)
        self.assertAlmostEqual(lr, 0.1, delta=1e-7)
        lr = lr_schedule(20)
        self.assertAlmostEqual(lr, 0.001, delta=1e-7)
        lr_schedule = opt.cyclic_lr(0.001, 0.1, step_size_up=5, mode="triangular2")
        lr = lr_schedule(15)
        expected_lr = 0.001 + (0.1 - 0.001) * 0.5
        self.assertAlmostEqual(lr, expected_lr, delta=1e-6)
    def test_schedule_joiner(self):
        boundaries = [2, 3, 4]
        schedules = [lambda _: 3, lambda _: 4, lambda _: 5]