post nanobind docs fixes and some updates (#889)

* post nanobind docs fixes and some updates

* one more doc nit

* fix for stubs and latex
This commit is contained in:
Awni Hannun
2024-03-24 15:03:27 -07:00
committed by GitHub
parent be98f4ab6b
commit 1e16331d9c
16 changed files with 185 additions and 118 deletions

View File

@@ -156,7 +156,7 @@ def glorot_normal(
(``fan_out``) units according to:
.. math::
\sigma = \gamma \sqrt{\frac{2.0}{\text{fan_in} + \text{fan_out}}}
\sigma = \gamma \sqrt{\frac{2.0}{\text{fan\_in} + \text{fan\_out}}}
For more details see the original reference: `Understanding the difficulty
of training deep feedforward neural networks
@@ -199,7 +199,7 @@ def glorot_uniform(
units according to:
.. math::
\sigma = \gamma \sqrt{\frac{6.0}{\text{fan_in} + \text{fan_out}}}
\sigma = \gamma \sqrt{\frac{6.0}{\text{fan\_in} + \text{fan\_out}}}
For more details see the original reference: `Understanding the difficulty
of training deep feedforward neural networks

View File

@@ -166,7 +166,7 @@ class MaxPool1d(_Pool1d):
\text{input}(N_i, \text{stride} \times t + m, C_j),
where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
\text{kernel_size}}{\text{stride}}\right\rfloor + 1`.
\text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
Args:
kernel_size (int or tuple(int)): The size of the pooling window kernel.
@@ -205,7 +205,7 @@ class AvgPool1d(_Pool1d):
\text{input}(N_i, \text{stride} \times t + m, C_j),
where :math:`L_{out} = \left\lfloor \frac{L + 2 \times \text{padding} -
\text{kernel_size}}{\text{stride}}\right\rfloor + 1`.
\text{kernel\_size}}{\text{stride}}\right\rfloor + 1`.
Args:
kernel_size (int or tuple(int)): The size of the pooling window kernel.
@@ -246,8 +246,8 @@ class MaxPool2d(_Pool2d):
\text{stride[1]} \times w + n, C_j),
\end{aligned}
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
The parameters ``kernel_size``, ``stride``, ``padding``, can either be:
@@ -295,8 +295,8 @@ class AvgPool2d(_Pool2d):
\text{stride[1]} \times w + n, C_j),
\end{aligned}
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
where :math:`H_{out} = \left\lfloor\frac{H + 2 * \text{padding[0]} - \text{kernel\_size[0]}}{\text{stride[0]}}\right\rfloor + 1`,
:math:`W_{out} = \left\lfloor\frac{W + 2 * \text{padding[1]} - \text{kernel\_size[1]}}{\text{stride[1]}}\right\rfloor + 1`.
The parameters ``kernel_size``, ``stride``, ``padding``, can either be:

View File

@@ -103,12 +103,12 @@ class GRU(Module):
.. math::
\begin{align*}
\begin{aligned}
r_t &= \sigma (W_{xr}x_t + W_{hr}h_t + b_{r}) \\
z_t &= \sigma (W_{xz}x_t + W_{hz}h_t + b_{z}) \\
n_t &= \text{tanh}(W_{xn}x_t + b_{n} + r_t \odot (W_{hn}h_t + b_{hn})) \\
h_{t + 1} &= (1 - z_t) \odot n_t + z_t \odot h_t
\end{align*}
\end{aligned}
The hidden state :math:`h` has shape ``NH`` or ``H`` depending on
whether the input is batched or not. Returns the hidden state at each
@@ -206,14 +206,14 @@ class LSTM(Module):
Concretely, for each element of the sequence, this layer computes:
.. math::
\begin{align*}
\begin{aligned}
i_t &= \sigma (W_{xi}x_t + W_{hi}h_t + b_{i}) \\
f_t &= \sigma (W_{xf}x_t + W_{hf}h_t + b_{f}) \\
g_t &= \text{tanh} (W_{xg}x_t + W_{hg}h_t + b_{g}) \\
o_t &= \sigma (W_{xo}x_t + W_{ho}h_t + b_{o}) \\
c_{t + 1} &= f_t \odot c_t + i_t \odot g_t \\
h_{t + 1} &= o_t \text{tanh}(c_{t + 1})
\end{align*}
\end{aligned}
The hidden state :math:`h` and cell state :math:`c` have shape ``NH``
or ``H``, depending on whether the input is batched or not.

View File

@@ -343,10 +343,9 @@ def smooth_l1_loss(
.. math::
l =
\begin{cases}
0.5 (x - y)^2, & \text{ if } & (x - y) < \beta \\
|x - y| - 0.5 \beta, & & \text{otherwise}
l = \begin{cases}
0.5 (x - y)^2, & \text{if } (x - y) < \beta \\
|x - y| - 0.5 \beta, & \text{otherwise}
\end{cases}
Args: