diff --git a/docs/src/install.rst b/docs/src/install.rst
index 213e04f64..693385e2c 100644
--- a/docs/src/install.rst
+++ b/docs/src/install.rst
@@ -191,14 +191,14 @@ The MLX CMake build has several additional options to make smaller binaries.
 For example, if you don't need the CPU backend or support for safetensors and
 GGUF, you can do:
 
-```shell
-cmake .. \
-  -DCMAKE_BUILD_TYPE=MinSizeRel \
-  -DBUILD_SHARED_LIBS=ON \
-  -DMLX_BUILD_CPU=ON \
-  -DMLX_BUILD_SAFETENSORS=OFF \
-  -DMLX_BUILD_GGUF=OFF
-```
+.. code-block:: shell
+
+  cmake ..
+    -DCMAKE_BUILD_TYPE=MinSizeRel \
+    -DBUILD_SHARED_LIBS=ON \
+    -DMLX_BUILD_CPU=ON \
+    -DMLX_BUILD_SAFETENSORS=OFF \
+    -DMLX_BUILD_GGUF=OFF
 
 Troubleshooting
 ^^^^^^^^^^^^^^^
diff --git a/docs/src/python/linalg.rst b/docs/src/python/linalg.rst
index b96ed35df..3c34cb3f7 100644
--- a/docs/src/python/linalg.rst
+++ b/docs/src/python/linalg.rst
@@ -10,5 +10,6 @@ Linear Algebra
 
     inv
     norm
+    cholesky
     qr
     svd
diff --git a/python/mlx/nn/layers/convolution.py b/python/mlx/nn/layers/convolution.py
index 0126c6d2a..3202ebb73 100644
--- a/python/mlx/nn/layers/convolution.py
+++ b/python/mlx/nn/layers/convolution.py
@@ -11,9 +11,10 @@ class Conv1d(Module):
     """Applies a 1-dimensional convolution over the multi-channel input sequence.
 
     The channels are expected to be last i.e. the input shape should be ``NLC`` where:
-        - ``N`` is the batch dimension
-        - ``L`` is the sequence length
-        - ``C`` is the number of input channels
+
+    * ``N`` is the batch dimension
+    * ``L`` is the sequence length
+    * ``C`` is the number of input channels
 
     Args:
         in_channels (int): The number of input channels
@@ -72,10 +73,11 @@ class Conv2d(Module):
     """Applies a 2-dimensional convolution over the multi-channel input image.
 
     The channels are expected to be last i.e. the input shape should be ``NHWC`` where:
-        - ``N`` is the batch dimension
-        - ``H`` is the input image height
-        - ``W`` is the input image width
-        - ``C`` is the number of input channels
+
+    * ``N`` is the batch dimension
+    * ``H`` is the input image height
+    * ``W`` is the input image width
+    * ``C`` is the number of input channels
 
     Args:
         in_channels (int): The number of input channels.
@@ -136,12 +138,15 @@ class Conv2d(Module):
 
 class Conv3d(Module):
     """Applies a 3-dimensional convolution over the multi-channel input image.
+
     The channels are expected to be last i.e. the input shape should be ``NDHWC`` where:
-        - ``N`` is the batch dimension
-        - ``D`` is the input image depth
-        - ``H`` is the input image height
-        - ``W`` is the input image width
-        - ``C`` is the number of input channels
+
+    * ``N`` is the batch dimension
+    * ``D`` is the input image depth
+    * ``H`` is the input image height
+    * ``W`` is the input image width
+    * ``C`` is the number of input channels
+
     Args:
         in_channels (int): The number of input channels.
         out_channels (int): The number of output channels.
diff --git a/python/src/linalg.cpp b/python/src/linalg.cpp
index eed8fe53f..3ba94e9f8 100644
--- a/python/src/linalg.cpp
+++ b/python/src/linalg.cpp
@@ -235,7 +235,7 @@ void init_linalg(nb::module_& parent_module) {
 
         Returns:
             tuple(array, array, array): The ``U``, ``S``, and ``Vt`` matrices, such that
-              ``A = U @ diag(S) @ Vt``
+            ``A = U @ diag(S) @ Vt``
       )pbdoc");
   m.def(
       "inv",
@@ -286,7 +286,8 @@ void init_linalg(nb::module_& parent_module) {
               in which case the default stream of the default device is used.
 
         Returns:
-            array: if ``upper = False``, it returns a lower trinagular ``L``matrix such that ``dot(L, L.T) = a``.
-              If ``upper = True``, it returns an upper triangular ``U`` matrix such that ``dot(U.T, U) = a``.
+          array: If ``upper = False``, it returns a lower trinagular ``L`` matrix such
+          that ``dot(L, L.T) = a``.  If ``upper = True``, it returns an upper triangular
+          ``U`` matrix such that ``dot(U.T, U) = a``.
       )pbdoc");
 }
diff --git a/python/src/ops.cpp b/python/src/ops.cpp
index 23d503d4b..0f8f680a7 100644
--- a/python/src/ops.cpp
+++ b/python/src/ops.cpp
@@ -3501,7 +3501,7 @@ void init_ops(nb::module_& m) {
               support matadata. The metadata will be returned as an
               additional dictionary.
         Returns:
-            result (array, dict):
+            array or dict:
                 A single array if loading from a ``.npy`` file or a dict
                 mapping names to arrays if loading from a ``.npz`` or
                 ``.safetensors`` file. If ``return_metadata` is ``True`` an
@@ -3584,7 +3584,7 @@ void init_ops(nb::module_& m) {
           y (array): The input selected from where condition is ``False``.
 
         Returns:
-            result (array): The output containing elements selected from
+            array: The output containing elements selected from
             ``x`` and ``y``.
       )pbdoc");
   m.def(
@@ -3613,7 +3613,7 @@ void init_ops(nb::module_& m) {
           decimals (int): Number of decimal places to round to. (default: 0)
 
         Returns:
-          result (array): An array of the same type as ``a`` rounded to the
+          array: An array of the same type as ``a`` rounded to the
           given number of decimals.
       )pbdoc");
   m.def(
@@ -3650,7 +3650,7 @@ void init_ops(nb::module_& m) {
             ``w``. (default: ``4``)
 
         Returns:
-          result (array): The result of the multiplication of ``x`` with ``w``.
+          array: The result of the multiplication of ``x`` with ``w``.
       )pbdoc");
   m.def(
       "quantize",
@@ -3705,11 +3705,11 @@ void init_ops(nb::module_& m) {
             ``w`` in the returned quantized matrix. (default: ``4``)
 
         Returns:
-          (tuple): A tuple containing
+          tuple: A tuple containing
 
-            - w_q (array): The quantized version of ``w``
-            - scales (array): The scale to multiply each element with, namely :math:`s`
-            - biases (array): The biases to add to each element, namely :math:`\beta`
+          * w_q (array): The quantized version of ``w``
+          * scales (array): The scale to multiply each element with, namely :math:`s`
+          * biases (array): The biases to add to each element, namely :math:`\beta`
       )pbdoc");
   m.def(
       "dequantize",
@@ -3745,7 +3745,7 @@ void init_ops(nb::module_& m) {
             ``w``. (default: ``4``)
 
         Returns:
-          result (array): The dequantized version of ``w``
+          array: The dequantized version of ``w``
       )pbdoc");
   m.def(
       "block_sparse_qmm",
@@ -3790,7 +3790,7 @@ void init_ops(nb::module_& m) {
             ``w``. (default: ``4``)
 
         Returns:
-          result (array): The result of the multiplication of ``x`` with ``w``
+          array: The result of the multiplication of ``x`` with ``w``
             after gathering using ``lhs_indices`` and ``rhs_indices``.
       )pbdoc");
   m.def(
@@ -3830,7 +3830,7 @@ void init_ops(nb::module_& m) {
             corresponding dimensions of ``a`` and ``b``. (default: 2)
 
         Returns:
-          result (array): The tensor dot product.
+          array: The tensor dot product.
       )pbdoc");
   m.def(
       "inner",
@@ -3849,7 +3849,7 @@ void init_ops(nb::module_& m) {
         b (array): Input array
 
       Returns:
-        result (array): The inner product.
+        array: The inner product.
     )pbdoc");
   m.def(
       "outer",
@@ -3868,7 +3868,7 @@ void init_ops(nb::module_& m) {
         b (array): Input array
 
       Returns:
-        result (array): The outer product.
+        array: The outer product.
     )pbdoc");
   m.def(
       "tile",
@@ -3895,7 +3895,7 @@ void init_ops(nb::module_& m) {
         reps (int or list(int)): The number of times to repeat ``a`` along each axis.
 
       Returns:
-        result (array): The tiled array.
+        array: The tiled array.
     )pbdoc");
   m.def(
       "addmm",