Doc fix (#1615)

2025-12-15 09:29:26 +08:00 · 2024-11-22 11:12:25 -08:00
parent 02bec0bb6d
commit 7cbb4aef17
2 changed files with 40 additions and 100 deletions
--- a/docs/src/python/nn/layers.rst
+++ b/docs/src/python/nn/layers.rst
@@ -12,6 +12,7 @@ Layers
   ALiBi
   AvgPool1d
   AvgPool2d
+   AvgPool3d
   BatchNorm
   CELU
   Conv1d
@@ -41,6 +42,7 @@ Layers
   LSTM
   MaxPool1d
   MaxPool2d
+   MaxPool3d
   Mish
   MultiHeadAttention
   PReLU
--- a/python/mlx/nn/layers/pooling.py
+++ b/python/mlx/nn/layers/pooling.py
@@ -185,16 +185,8 @@ class _Pool3d(_Pool):
 class MaxPool1d(_Pool1d):
    r"""Applies 1-dimensional max pooling.

-    Assuming an input of shape :math:`(N, L, C)` and ``kernel_size`` is
-    :math:`k`, the output is a tensor of shape :math:`(N, L_{out}, C)`, given
-    by:
-
-        .. math::
-            \text{out}(N_i, t, C_j) = \max_{m=0, \ldots, k - 1}
-                    \text{input}(N_i, \text{stride} \times t + m, C_j),
-
-    where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
-    \text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
+    Spatially downsamples the input by taking the maximum of a sliding window
+    of size ``kernel_size`` and sliding stride ``stride``.

    Args:
        kernel_size (int or tuple(int)): The size of the pooling window kernel.
@@ -224,16 +216,8 @@ class MaxPool1d(_Pool1d):
 class AvgPool1d(_Pool1d):
    r"""Applies 1-dimensional average pooling.

-    Assuming an input of shape :math:`(N, L, C)` and ``kernel_size`` is
-    :math:`k`, the output is a tensor of shape :math:`(N, L_{out}, C)`, given
-    by:
-
-        .. math::
-            \text{out}(N_i, t, C_j) = \frac{1}{k} \sum_{m=0, \ldots, k - 1}
-                    \text{input}(N_i, \text{stride} \times t + m, C_j),
-
-    where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
-    \text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
+    Spatially downsamples the input by taking the average of a sliding window
+    of size ``kernel_size`` and sliding stride ``stride``.

    Args:
        kernel_size (int or tuple(int)): The size of the pooling window kernel.
@@ -263,25 +247,14 @@ class AvgPool1d(_Pool1d):
 class MaxPool2d(_Pool2d):
    r"""Applies 2-dimensional max pooling.

-    Assuming an input of shape :math:`(N, H, W, C)` and ``kernel_size`` is
-    :math:`(k_H, k_W)`, the output is a tensor of shape :math:`(N, H_{out},
-    W_{out}, C)`, given by:
+    Spatially downsamples the input by taking the maximum of a sliding window
+    of size ``kernel_size`` and sliding stride ``stride``.

-    .. math::
-        \begin{aligned}
-            \text{out}(N_i, h, w, C_j) = & \max_{m=0, \ldots, k_H-1} \max_{n=0, \ldots, k_W-1} \\
-                                    & \text{input}(N_i, \text{stride[0]} \times h + m,
-                                                \text{stride[1]} \times w + n, C_j),
-        \end{aligned}
+    The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:

-    where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
-    :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
-
-    The parameters ``kernel_size``, ``stride``, ``padding``, can either be:
-
-        - a single ``int`` -- in which case the same value is used for both the
-          height and width axis;
-        - a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
+    * a single ``int`` -- in which case the same value is used for both the
+      height and width axis.
+    * a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
      used for the height axis, the second ``int`` for the width axis.

    Args:
@@ -312,25 +285,14 @@ class MaxPool2d(_Pool2d):
 class AvgPool2d(_Pool2d):
    r"""Applies 2-dimensional average pooling.

-    Assuming an input of shape :math:`(N, H, W, C)` and ``kernel_size`` is
-    :math:`(k_H, k_W)`, the output is a tensor of shape :math:`(N, H_{out},
-    W_{out}, C)`, given by:
+    Spatially downsamples the input by taking the average of a sliding window
+    of size ``kernel_size`` and sliding stride ``stride``.

-    .. math::
-        \begin{aligned}
-            \text{out}(N_i, h, w, C_j) = & \frac{1}{k_H k_W} \sum_{m=0, \ldots, k_H-1} \sum_{n=0, \ldots, k_W-1} \\
-                                    & \text{input}(N_i, \text{stride[0]} \times h + m,
-                                                \text{stride[1]} \times w + n, C_j),
-        \end{aligned}
+    The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:

-    where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
-    :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
-
-    The parameters ``kernel_size``, ``stride``, ``padding``, can either be:
-
-        - a single ``int`` -- in which case the same value is used for both the
-          height and width axis;
-        - a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
+    * a single ``int`` -- in which case the same value is used for both the
+      height and width axis.
+    * a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
      used for the height axis, the second ``int`` for the width axis.

    Args:
@@ -359,28 +321,16 @@ class AvgPool2d(_Pool2d):


 class MaxPool3d(_Pool3d):
-    """
-        Assuming an input of shape :math:`(N, D, H, W, C)` and ``kernel_size`` is
-    :math:`(k_D, k_H, k_W)`, the output is a tensor of shape :math:`(N, D_{out},
-    H_{out}, W_{out}, C)`, given by:
+    r"""Applies 3-dimensional max pooling.

-    .. math::
-        \begin{aligned}
-            \text{out}(N_i, d, h, w, C_j) = & \max_{l=0, \ldots, k_D-1} \max_{m=0, \ldots, k_H-1} \max_{n=0, \ldots, k_W-1} \\
-                                    & \text{input}(N_i, \text{stride[0]} \times d + l,
-                                                \text{stride[1]} \times h + m,
-                                                \text{stride[2]} \times w + n, C_j),
-        \end{aligned}
+    Spatially downsamples the input by taking the maximum of a sliding window
+    of size ``kernel_size`` and sliding stride ``stride``.

-    where :math:`D_{out} = \left\lfloor\frac{D + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
-    :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`,
-    :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[2]} - \text{kernel\_size[2]}}{\text{stride[2]}}\right\rfloor + 1`.
+    The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:

-    The parameters ``kernel_size``, ``stride``, ``padding``, can either be:
-
-        - a single ``int`` -- in which case the same value is used for the depth,
-        height and width axis;
-        - a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used
+    * a single ``int`` -- in which case the same value is used for the depth,
+      height, and width axis.
+    * a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used
      for the depth axis, the second ``int`` for the height axis, and the third
      ``int`` for the width axis.

@@ -410,28 +360,16 @@ class MaxPool3d(_Pool3d):


 class AvgPool3d(_Pool3d):
-    """
-        Assuming an input of shape :math:`(N, D, H, W, C)` and ``kernel_size`` is
-        :math:`(k_D, k_H, k_W)`, the output is a tensor of shape :math:`(N, D_{out},
-        H_{out}, W_{out}, C)`, given by:
+    r"""Applies 3-dimensional average pooling.

-        .. math::
-        \begin{aligned}
-            \text{out}(N_i, d, h, w, C_j) = & \frac{1}{k_D k_H k_W} \sum_{l=0, \ldots, k_D-1} \sum_{m=0, \ldots, k_H-1} \sum_{n=0, \ldots, k_W-1} \\
-                                    & \text{input}(N_i, \text{stride[0]} \times d + l,
-                                                \text{stride[1]} \times h + m,
-                                                \text{stride[2]} \times w + n, C_j),
-        \end{aligned}
+    Spatially downsamples the input by taking the average of a sliding window
+    of size ``kernel_size`` and sliding stride ``stride``.

-        where :math:`D_{out} = \left\lfloor\frac{D + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
-        :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`,
-        :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[2]} - \text{kernel\_size[2]}}{\text{stride[2]}}\right\rfloor + 1`.
+    The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:

-        The parameters ``kernel_size``, ``stride``, ``padding``, can either be:
-
-        - a single ``int`` -- in which case the same value is used for the depth,
-            height and width axis;
-        - a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used
+    * a single ``int`` -- in which case the same value is used for the depth,
+      height, and width axis.
+    * a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used
      for the depth axis, the second ``int`` for the height axis, and the third
      ``int`` for the width axis.