This commit is contained in:
Awni Hannun 2024-11-22 11:12:25 -08:00 committed by GitHub
parent 02bec0bb6d
commit 7cbb4aef17
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 40 additions and 100 deletions

View File

@ -12,6 +12,7 @@ Layers
ALiBi ALiBi
AvgPool1d AvgPool1d
AvgPool2d AvgPool2d
AvgPool3d
BatchNorm BatchNorm
CELU CELU
Conv1d Conv1d
@ -41,6 +42,7 @@ Layers
LSTM LSTM
MaxPool1d MaxPool1d
MaxPool2d MaxPool2d
MaxPool3d
Mish Mish
MultiHeadAttention MultiHeadAttention
PReLU PReLU

View File

@ -185,16 +185,8 @@ class _Pool3d(_Pool):
class MaxPool1d(_Pool1d): class MaxPool1d(_Pool1d):
r"""Applies 1-dimensional max pooling. r"""Applies 1-dimensional max pooling.
Assuming an input of shape :math:`(N, L, C)` and ``kernel_size`` is Spatially downsamples the input by taking the maximum of a sliding window
:math:`k`, the output is a tensor of shape :math:`(N, L_{out}, C)`, given of size ``kernel_size`` and sliding stride ``stride``.
by:
.. math::
\text{out}(N_i, t, C_j) = \max_{m=0, \ldots, k - 1}
\text{input}(N_i, \text{stride} \times t + m, C_j),
where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
\text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
Args: Args:
kernel_size (int or tuple(int)): The size of the pooling window kernel. kernel_size (int or tuple(int)): The size of the pooling window kernel.
@ -224,16 +216,8 @@ class MaxPool1d(_Pool1d):
class AvgPool1d(_Pool1d): class AvgPool1d(_Pool1d):
r"""Applies 1-dimensional average pooling. r"""Applies 1-dimensional average pooling.
Assuming an input of shape :math:`(N, L, C)` and ``kernel_size`` is Spatially downsamples the input by taking the average of a sliding window
:math:`k`, the output is a tensor of shape :math:`(N, L_{out}, C)`, given of size ``kernel_size`` and sliding stride ``stride``.
by:
.. math::
\text{out}(N_i, t, C_j) = \frac{1}{k} \sum_{m=0, \ldots, k - 1}
\text{input}(N_i, \text{stride} \times t + m, C_j),
where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
\text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
Args: Args:
kernel_size (int or tuple(int)): The size of the pooling window kernel. kernel_size (int or tuple(int)): The size of the pooling window kernel.
@ -263,26 +247,15 @@ class AvgPool1d(_Pool1d):
class MaxPool2d(_Pool2d): class MaxPool2d(_Pool2d):
r"""Applies 2-dimensional max pooling. r"""Applies 2-dimensional max pooling.
Assuming an input of shape :math:`(N, H, W, C)` and ``kernel_size`` is Spatially downsamples the input by taking the maximum of a sliding window
:math:`(k_H, k_W)`, the output is a tensor of shape :math:`(N, H_{out}, of size ``kernel_size`` and sliding stride ``stride``.
W_{out}, C)`, given by:
.. math:: The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:
\begin{aligned}
\text{out}(N_i, h, w, C_j) = & \max_{m=0, \ldots, k_H-1} \max_{n=0, \ldots, k_W-1} \\
& \text{input}(N_i, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n, C_j),
\end{aligned}
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, * a single ``int`` -- in which case the same value is used for both the
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`. height and width axis.
* a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
The parameters ``kernel_size``, ``stride``, ``padding``, can either be: used for the height axis, the second ``int`` for the width axis.
- a single ``int`` -- in which case the same value is used for both the
height and width axis;
- a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
used for the height axis, the second ``int`` for the width axis.
Args: Args:
kernel_size (int or tuple(int, int)): The size of the pooling window. kernel_size (int or tuple(int, int)): The size of the pooling window.
@ -312,26 +285,15 @@ class MaxPool2d(_Pool2d):
class AvgPool2d(_Pool2d): class AvgPool2d(_Pool2d):
r"""Applies 2-dimensional average pooling. r"""Applies 2-dimensional average pooling.
Assuming an input of shape :math:`(N, H, W, C)` and ``kernel_size`` is Spatially downsamples the input by taking the average of a sliding window
:math:`(k_H, k_W)`, the output is a tensor of shape :math:`(N, H_{out}, of size ``kernel_size`` and sliding stride ``stride``.
W_{out}, C)`, given by:
.. math:: The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:
\begin{aligned}
\text{out}(N_i, h, w, C_j) = & \frac{1}{k_H k_W} \sum_{m=0, \ldots, k_H-1} \sum_{n=0, \ldots, k_W-1} \\
& \text{input}(N_i, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n, C_j),
\end{aligned}
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, * a single ``int`` -- in which case the same value is used for both the
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`. height and width axis.
* a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
The parameters ``kernel_size``, ``stride``, ``padding``, can either be: used for the height axis, the second ``int`` for the width axis.
- a single ``int`` -- in which case the same value is used for both the
height and width axis;
- a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is
used for the height axis, the second ``int`` for the width axis.
Args: Args:
kernel_size (int or tuple(int, int)): The size of the pooling window. kernel_size (int or tuple(int, int)): The size of the pooling window.
@ -359,30 +321,18 @@ class AvgPool2d(_Pool2d):
class MaxPool3d(_Pool3d): class MaxPool3d(_Pool3d):
""" r"""Applies 3-dimensional max pooling.
Assuming an input of shape :math:`(N, D, H, W, C)` and ``kernel_size`` is
:math:`(k_D, k_H, k_W)`, the output is a tensor of shape :math:`(N, D_{out},
H_{out}, W_{out}, C)`, given by:
.. math:: Spatially downsamples the input by taking the maximum of a sliding window
\begin{aligned} of size ``kernel_size`` and sliding stride ``stride``.
\text{out}(N_i, d, h, w, C_j) = & \max_{l=0, \ldots, k_D-1} \max_{m=0, \ldots, k_H-1} \max_{n=0, \ldots, k_W-1} \\
& \text{input}(N_i, \text{stride[0]} \times d + l,
\text{stride[1]} \times h + m,
\text{stride[2]} \times w + n, C_j),
\end{aligned}
where :math:`D_{out} = \left\lfloor\frac{D + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:
:math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[2]} - \text{kernel\_size[2]}}{\text{stride[2]}}\right\rfloor + 1`.
The parameters ``kernel_size``, ``stride``, ``padding``, can either be: * a single ``int`` -- in which case the same value is used for the depth,
height, and width axis.
- a single ``int`` -- in which case the same value is used for the depth, * a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used
height and width axis; for the depth axis, the second ``int`` for the height axis, and the third
- a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used ``int`` for the width axis.
for the depth axis, the second ``int`` for the height axis, and the third
``int`` for the width axis.
Args: Args:
kernel_size (int or tuple(int, int, int)): The size of the pooling window. kernel_size (int or tuple(int, int, int)): The size of the pooling window.
@ -410,32 +360,20 @@ class MaxPool3d(_Pool3d):
class AvgPool3d(_Pool3d): class AvgPool3d(_Pool3d):
""" r"""Applies 3-dimensional average pooling.
Assuming an input of shape :math:`(N, D, H, W, C)` and ``kernel_size`` is
:math:`(k_D, k_H, k_W)`, the output is a tensor of shape :math:`(N, D_{out},
H_{out}, W_{out}, C)`, given by:
.. math:: Spatially downsamples the input by taking the average of a sliding window
\begin{aligned} of size ``kernel_size`` and sliding stride ``stride``.
\text{out}(N_i, d, h, w, C_j) = & \frac{1}{k_D k_H k_W} \sum_{l=0, \ldots, k_D-1} \sum_{m=0, \ldots, k_H-1} \sum_{n=0, \ldots, k_W-1} \\
& \text{input}(N_i, \text{stride[0]} \times d + l,
\text{stride[1]} \times h + m,
\text{stride[2]} \times w + n, C_j),
\end{aligned}
where :math:`D_{out} = \left\lfloor\frac{D + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, The parameters ``kernel_size``, ``stride``, and ``padding`` can either be:
:math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[2]} - \text{kernel\_size[2]}}{\text{stride[2]}}\right\rfloor + 1`.
The parameters ``kernel_size``, ``stride``, ``padding``, can either be: * a single ``int`` -- in which case the same value is used for the depth,
height, and width axis.
* a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used
for the depth axis, the second ``int`` for the height axis, and the third
``int`` for the width axis.
- a single ``int`` -- in which case the same value is used for the depth, Args:
height and width axis;
- a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used
for the depth axis, the second ``int`` for the height axis, and the third
``int`` for the width axis.
Args:
kernel_size (int or tuple(int, int, int)): The size of the pooling window. kernel_size (int or tuple(int, int, int)): The size of the pooling window.
stride (int or tuple(int, int, int), optional): The stride of the pooling stride (int or tuple(int, int, int), optional): The stride of the pooling
window. Default: ``kernel_size``. window. Default: ``kernel_size``.
@ -443,7 +381,7 @@ class AvgPool3d(_Pool3d):
padding to apply to the input. The padding is applied on both sides padding to apply to the input. The padding is applied on both sides
of the depth, height and width axis. Default: ``0``. of the depth, height and width axis. Default: ``0``.
Examples: Examples:
>>> import mlx.core as mx >>> import mlx.core as mx
>>> import mlx.nn.layers as nn >>> import mlx.nn.layers as nn
>>> x = mx.random.normal(shape=(8, 16, 32, 32, 4)) >>> x = mx.random.normal(shape=(8, 16, 32, 32, 4))