mirror of
				https://github.com/ml-explore/mlx.git
				synced 2025-11-01 00:28:11 +08:00 
			
		
		
		
	Doc fix (#1615)
This commit is contained in:
		| @@ -12,6 +12,7 @@ Layers | |||||||
|    ALiBi |    ALiBi | ||||||
|    AvgPool1d |    AvgPool1d | ||||||
|    AvgPool2d |    AvgPool2d | ||||||
|  |    AvgPool3d | ||||||
|    BatchNorm |    BatchNorm | ||||||
|    CELU |    CELU | ||||||
|    Conv1d |    Conv1d | ||||||
| @@ -41,6 +42,7 @@ Layers | |||||||
|    LSTM |    LSTM | ||||||
|    MaxPool1d |    MaxPool1d | ||||||
|    MaxPool2d |    MaxPool2d | ||||||
|  |    MaxPool3d | ||||||
|    Mish |    Mish | ||||||
|    MultiHeadAttention |    MultiHeadAttention | ||||||
|    PReLU |    PReLU | ||||||
|   | |||||||
| @@ -185,16 +185,8 @@ class _Pool3d(_Pool): | |||||||
| class MaxPool1d(_Pool1d): | class MaxPool1d(_Pool1d): | ||||||
|     r"""Applies 1-dimensional max pooling. |     r"""Applies 1-dimensional max pooling. | ||||||
|  |  | ||||||
|     Assuming an input of shape :math:`(N, L, C)` and ``kernel_size`` is |     Spatially downsamples the input by taking the maximum of a sliding window | ||||||
|     :math:`k`, the output is a tensor of shape :math:`(N, L_{out}, C)`, given |     of size ``kernel_size`` and sliding stride ``stride``. | ||||||
|     by: |  | ||||||
|  |  | ||||||
|         .. math:: |  | ||||||
|             \text{out}(N_i, t, C_j) = \max_{m=0, \ldots, k - 1} |  | ||||||
|                     \text{input}(N_i, \text{stride} \times t + m, C_j), |  | ||||||
|  |  | ||||||
|     where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} - |  | ||||||
|     \text{kernel\_size}}{\text{stride}}\right\rfloor + 1`. |  | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|         kernel_size (int or tuple(int)): The size of the pooling window kernel. |         kernel_size (int or tuple(int)): The size of the pooling window kernel. | ||||||
| @@ -224,16 +216,8 @@ class MaxPool1d(_Pool1d): | |||||||
| class AvgPool1d(_Pool1d): | class AvgPool1d(_Pool1d): | ||||||
|     r"""Applies 1-dimensional average pooling. |     r"""Applies 1-dimensional average pooling. | ||||||
|  |  | ||||||
|     Assuming an input of shape :math:`(N, L, C)` and ``kernel_size`` is |     Spatially downsamples the input by taking the average of a sliding window | ||||||
|     :math:`k`, the output is a tensor of shape :math:`(N, L_{out}, C)`, given |     of size ``kernel_size`` and sliding stride ``stride``. | ||||||
|     by: |  | ||||||
|  |  | ||||||
|         .. math:: |  | ||||||
|             \text{out}(N_i, t, C_j) = \frac{1}{k} \sum_{m=0, \ldots, k - 1} |  | ||||||
|                     \text{input}(N_i, \text{stride} \times t + m, C_j), |  | ||||||
|  |  | ||||||
|     where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} - |  | ||||||
|     \text{kernel\_size}}{\text{stride}}\right\rfloor + 1`. |  | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|         kernel_size (int or tuple(int)): The size of the pooling window kernel. |         kernel_size (int or tuple(int)): The size of the pooling window kernel. | ||||||
| @@ -263,25 +247,14 @@ class AvgPool1d(_Pool1d): | |||||||
| class MaxPool2d(_Pool2d): | class MaxPool2d(_Pool2d): | ||||||
|     r"""Applies 2-dimensional max pooling. |     r"""Applies 2-dimensional max pooling. | ||||||
|  |  | ||||||
|     Assuming an input of shape :math:`(N, H, W, C)` and ``kernel_size`` is |     Spatially downsamples the input by taking the maximum of a sliding window | ||||||
|     :math:`(k_H, k_W)`, the output is a tensor of shape :math:`(N, H_{out}, |     of size ``kernel_size`` and sliding stride ``stride``. | ||||||
|     W_{out}, C)`, given by: |  | ||||||
|  |  | ||||||
|     .. math:: |     The parameters ``kernel_size``, ``stride``, and ``padding`` can either be: | ||||||
|         \begin{aligned} |  | ||||||
|             \text{out}(N_i, h, w, C_j) = & \max_{m=0, \ldots, k_H-1} \max_{n=0, \ldots, k_W-1} \\ |  | ||||||
|                                     & \text{input}(N_i, \text{stride[0]} \times h + m, |  | ||||||
|                                                 \text{stride[1]} \times w + n, C_j), |  | ||||||
|         \end{aligned} |  | ||||||
|  |  | ||||||
|     where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, |     * a single ``int`` -- in which case the same value is used for both the | ||||||
|     :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`. |       height and width axis. | ||||||
|  |     * a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is | ||||||
|     The parameters ``kernel_size``, ``stride``, ``padding``, can either be: |  | ||||||
|  |  | ||||||
|         - a single ``int`` -- in which case the same value is used for both the |  | ||||||
|           height and width axis; |  | ||||||
|         - a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is |  | ||||||
|       used for the height axis, the second ``int`` for the width axis. |       used for the height axis, the second ``int`` for the width axis. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
| @@ -312,25 +285,14 @@ class MaxPool2d(_Pool2d): | |||||||
| class AvgPool2d(_Pool2d): | class AvgPool2d(_Pool2d): | ||||||
|     r"""Applies 2-dimensional average pooling. |     r"""Applies 2-dimensional average pooling. | ||||||
|  |  | ||||||
|     Assuming an input of shape :math:`(N, H, W, C)` and ``kernel_size`` is |     Spatially downsamples the input by taking the average of a sliding window | ||||||
|     :math:`(k_H, k_W)`, the output is a tensor of shape :math:`(N, H_{out}, |     of size ``kernel_size`` and sliding stride ``stride``. | ||||||
|     W_{out}, C)`, given by: |  | ||||||
|  |  | ||||||
|     .. math:: |     The parameters ``kernel_size``, ``stride``, and ``padding`` can either be: | ||||||
|         \begin{aligned} |  | ||||||
|             \text{out}(N_i, h, w, C_j) = & \frac{1}{k_H k_W} \sum_{m=0, \ldots, k_H-1} \sum_{n=0, \ldots, k_W-1} \\ |  | ||||||
|                                     & \text{input}(N_i, \text{stride[0]} \times h + m, |  | ||||||
|                                                 \text{stride[1]} \times w + n, C_j), |  | ||||||
|         \end{aligned} |  | ||||||
|  |  | ||||||
|     where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, |     * a single ``int`` -- in which case the same value is used for both the | ||||||
|     :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`. |       height and width axis. | ||||||
|  |     * a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is | ||||||
|     The parameters ``kernel_size``, ``stride``, ``padding``, can either be: |  | ||||||
|  |  | ||||||
|         - a single ``int`` -- in which case the same value is used for both the |  | ||||||
|           height and width axis; |  | ||||||
|         - a ``tuple`` of two ``int`` s -- in which case, the first ``int`` is |  | ||||||
|       used for the height axis, the second ``int`` for the width axis. |       used for the height axis, the second ``int`` for the width axis. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
| @@ -359,28 +321,16 @@ class AvgPool2d(_Pool2d): | |||||||
|  |  | ||||||
|  |  | ||||||
| class MaxPool3d(_Pool3d): | class MaxPool3d(_Pool3d): | ||||||
|     """ |     r"""Applies 3-dimensional max pooling. | ||||||
|         Assuming an input of shape :math:`(N, D, H, W, C)` and ``kernel_size`` is |  | ||||||
|     :math:`(k_D, k_H, k_W)`, the output is a tensor of shape :math:`(N, D_{out}, |  | ||||||
|     H_{out}, W_{out}, C)`, given by: |  | ||||||
|  |  | ||||||
|     .. math:: |     Spatially downsamples the input by taking the maximum of a sliding window | ||||||
|         \begin{aligned} |     of size ``kernel_size`` and sliding stride ``stride``. | ||||||
|             \text{out}(N_i, d, h, w, C_j) = & \max_{l=0, \ldots, k_D-1} \max_{m=0, \ldots, k_H-1} \max_{n=0, \ldots, k_W-1} \\ |  | ||||||
|                                     & \text{input}(N_i, \text{stride[0]} \times d + l, |  | ||||||
|                                                 \text{stride[1]} \times h + m, |  | ||||||
|                                                 \text{stride[2]} \times w + n, C_j), |  | ||||||
|         \end{aligned} |  | ||||||
|  |  | ||||||
|     where :math:`D_{out} = \left\lfloor\frac{D + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, |     The parameters ``kernel_size``, ``stride``, and ``padding`` can either be: | ||||||
|     :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`, |  | ||||||
|     :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[2]} - \text{kernel\_size[2]}}{\text{stride[2]}}\right\rfloor + 1`. |  | ||||||
|  |  | ||||||
|     The parameters ``kernel_size``, ``stride``, ``padding``, can either be: |     * a single ``int`` -- in which case the same value is used for the depth, | ||||||
|  |       height, and width axis. | ||||||
|         - a single ``int`` -- in which case the same value is used for the depth, |     * a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used | ||||||
|         height and width axis; |  | ||||||
|         - a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used |  | ||||||
|       for the depth axis, the second ``int`` for the height axis, and the third |       for the depth axis, the second ``int`` for the height axis, and the third | ||||||
|       ``int`` for the width axis. |       ``int`` for the width axis. | ||||||
|  |  | ||||||
| @@ -410,28 +360,16 @@ class MaxPool3d(_Pool3d): | |||||||
|  |  | ||||||
|  |  | ||||||
| class AvgPool3d(_Pool3d): | class AvgPool3d(_Pool3d): | ||||||
|     """ |     r"""Applies 3-dimensional average pooling. | ||||||
|         Assuming an input of shape :math:`(N, D, H, W, C)` and ``kernel_size`` is |  | ||||||
|         :math:`(k_D, k_H, k_W)`, the output is a tensor of shape :math:`(N, D_{out}, |  | ||||||
|         H_{out}, W_{out}, C)`, given by: |  | ||||||
|  |  | ||||||
|         .. math:: |     Spatially downsamples the input by taking the average of a sliding window | ||||||
|         \begin{aligned} |     of size ``kernel_size`` and sliding stride ``stride``. | ||||||
|             \text{out}(N_i, d, h, w, C_j) = & \frac{1}{k_D k_H k_W} \sum_{l=0, \ldots, k_D-1} \sum_{m=0, \ldots, k_H-1} \sum_{n=0, \ldots, k_W-1} \\ |  | ||||||
|                                     & \text{input}(N_i, \text{stride[0]} \times d + l, |  | ||||||
|                                                 \text{stride[1]} \times h + m, |  | ||||||
|                                                 \text{stride[2]} \times w + n, C_j), |  | ||||||
|         \end{aligned} |  | ||||||
|  |  | ||||||
|         where :math:`D_{out} = \left\lfloor\frac{D + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`, |     The parameters ``kernel_size``, ``stride``, and ``padding`` can either be: | ||||||
|         :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`, |  | ||||||
|         :math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[2]} - \text{kernel\_size[2]}}{\text{stride[2]}}\right\rfloor + 1`. |  | ||||||
|  |  | ||||||
|         The parameters ``kernel_size``, ``stride``, ``padding``, can either be: |     * a single ``int`` -- in which case the same value is used for the depth, | ||||||
|  |       height, and width axis. | ||||||
|         - a single ``int`` -- in which case the same value is used for the depth, |     * a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used | ||||||
|             height and width axis; |  | ||||||
|         - a ``tuple`` of three ``int`` s -- in which case, the first ``int`` is used |  | ||||||
|       for the depth axis, the second ``int`` for the height axis, and the third |       for the depth axis, the second ``int`` for the height axis, and the third | ||||||
|       ``int`` for the width axis. |       ``int`` for the width axis. | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Awni Hannun
					Awni Hannun