post nanobind docs fixes and some updates (#889)

* post nanobind docs fixes and some updates

* one more doc nit

* fix for stubs and latex
This commit is contained in:
Awni Hannun
2024-03-24 15:03:27 -07:00
committed by GitHub
parent be98f4ab6b
commit 1e16331d9c
16 changed files with 185 additions and 118 deletions

View File

@@ -166,7 +166,7 @@ class MaxPool1d(_Pool1d):
\text{input}(N_i, \text{stride} \times t + m, C_j),
where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
\text{kernel_size}}{\text{stride}}\right\rfloor + 1`.
\text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
Args:
kernel_size (int or tuple(int)): The size of the pooling window kernel.
@@ -205,7 +205,7 @@ class AvgPool1d(_Pool1d):
\text{input}(N_i, \text{stride} \times t + m, C_j),
where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
\text{kernel_size}}{\text{stride}}\right\rfloor + 1`.
\text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
Args:
kernel_size (int or tuple(int)): The size of the pooling window kernel.
@@ -246,8 +246,8 @@ class MaxPool2d(_Pool2d):
\text{stride[1]} \times w + n, C_j),
\end{aligned}
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
The parameters ``kernel_size``, ``stride``, ``padding``, can either be:
@@ -295,8 +295,8 @@ class AvgPool2d(_Pool2d):
\text{stride[1]} \times w + n, C_j),
\end{aligned}
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
The parameters ``kernel_size``, ``stride``, ``padding``, can either be:

View File

@@ -103,12 +103,12 @@ class GRU(Module):
.. math::
\begin{align*}
\begin{aligned}
r_t &= \sigma (W_{xr}x_t + W_{hr}h_t + b_{r}) \\
z_t &= \sigma (W_{xz}x_t + W_{hz}h_t + b_{z}) \\
n_t &= \text{tanh}(W_{xn}x_t + b_{n} + r_t \odot (W_{hn}h_t + b_{hn})) \\
h_{t + 1} &= (1 - z_t) \odot n_t + z_t \odot h_t
\end{align*}
\end{aligned}
The hidden state :math:`h` has shape ``NH`` or ``H`` depending on
whether the input is batched or not. Returns the hidden state at each
@@ -206,14 +206,14 @@ class LSTM(Module):
Concretely, for each element of the sequence, this layer computes:
.. math::
\begin{align*}
\begin{aligned}
i_t &= \sigma (W_{xi}x_t + W_{hi}h_t + b_{i}) \\
f_t &= \sigma (W_{xf}x_t + W_{hf}h_t + b_{f}) \\
g_t &= \text{tanh} (W_{xg}x_t + W_{hg}h_t + b_{g}) \\
o_t &= \sigma (W_{xo}x_t + W_{ho}h_t + b_{o}) \\
c_{t + 1} &= f_t \odot c_t + i_t \odot g_t \\
h_{t + 1} &= o_t \text{tanh}(c_{t + 1})
\end{align*}
\end{aligned}
The hidden state :math:`h` and cell state :math:`c` have shape ``NH``
or ``H``, depending on whether the input is batched or not.