From 7d9d6ef456f391efb764fa2cc5691a17fe90e19e Mon Sep 17 00:00:00 2001 From: Skonor <82577632+Skonor@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:40:45 -0700 Subject: [PATCH] docs: fix adam and adamw eps placement (#2416) Co-authored-by: Mikhail Gorbunov --- python/mlx/optimizers/optimizers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/mlx/optimizers/optimizers.py b/python/mlx/optimizers/optimizers.py index 07b68cc5b..bc300e523 100644 --- a/python/mlx/optimizers/optimizers.py +++ b/python/mlx/optimizers/optimizers.py @@ -477,7 +477,7 @@ class Adam(Optimizer): m_{t+1} &= \beta_1 m_t + (1 - \beta_1) g_t \\ v_{t+1} &= \beta_2 v_t + (1 - \beta_2) g_t^2 \\ - w_{t+1} &= w_t - \lambda \frac{m_{t+1}}{\sqrt{v_{t+1} + \epsilon}} + w_{t+1} &= w_t - \lambda \frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon} Args: learning_rate (float or callable): The learning rate :math:`\lambda`. @@ -546,7 +546,7 @@ class AdamW(Adam): m_{t+1} &= \beta_1 m_t + (1 - \beta_1) g_t \\ v_{t+1} &= \beta_2 v_t + (1 - \beta_2) g_t^2 \\ - w_{t+1} &= w_t - \alpha (\frac{m_{t+1}}{\sqrt{v_{t+1} + \epsilon}} + \lambda w_t) + w_{t+1} &= w_t - \alpha (\frac{m_{t+1}}{\sqrt{v_{t+1}} + \epsilon} + \lambda w_t) Args: learning_rate (float or callable): The learning rate :math:`\alpha`.