diff --git a/python/mlx/nn/layers/convolution.py b/python/mlx/nn/layers/convolution.py
index c6928e188..6e1c9780e 100644
--- a/python/mlx/nn/layers/convolution.py
+++ b/python/mlx/nn/layers/convolution.py
@@ -23,6 +23,7 @@ class Conv1d(Module):
             Default: 1.
         padding (int, optional): How many positions to 0-pad the input with.
             Default: 0.
+        dilation (int, optional): The dilation of the convolution.
         bias (bool, optional): If ``True`` add a learnable bias to the output.
             Default: ``True``
     """
@@ -34,6 +35,7 @@ class Conv1d(Module):
         kernel_size: int,
         stride: int = 1,
         padding: int = 0,
+        dilation: int = 1,
         bias: bool = True,
     ):
         super().__init__()
@@ -48,17 +50,19 @@ class Conv1d(Module):
             self.bias = mx.zeros((out_channels,))
 
         self.padding = padding
+        self.dilation = dilation
         self.stride = stride
 
     def _extra_repr(self):
         return (
             f"{self.weight.shape[-1]}, {self.weight.shape[0]}, "
             f"kernel_size={self.weight.shape[1]}, stride={self.stride}, "
-            f"padding={self.padding}, bias={'bias' in self}"
+            f"padding={self.padding}, dilation={self.dilation}, "
+            f"bias={'bias' in self}"
         )
 
     def __call__(self, x):
-        y = mx.conv1d(x, self.weight, self.stride, self.padding)
+        y = mx.conv1d(x, self.weight, self.stride, self.padding, self.dilation)
         if "bias" in self:
             y = y + self.bias
         return y
@@ -81,6 +85,7 @@ class Conv2d(Module):
             applying the filter. Default: 1.
         padding (int or tuple, optional): How many positions to 0-pad
             the input with. Default: 0.
+        dilation (int or tuple, optional): The dilation of the convolution.
         bias (bool, optional): If ``True`` add a learnable bias to the
             output. Default: ``True``
     """
@@ -92,6 +97,7 @@ class Conv2d(Module):
         kernel_size: Union[int, tuple],
         stride: Union[int, tuple] = 1,
         padding: Union[int, tuple] = 0,
+        dilation: Union[int, tuple] = 1,
         bias: bool = True,
     ):
         super().__init__()
@@ -111,16 +117,18 @@ class Conv2d(Module):
 
         self.padding = padding
         self.stride = stride
+        self.dilation = dilation
 
     def _extra_repr(self):
         return (
             f"{self.weight.shape[-1]}, {self.weight.shape[0]}, "
             f"kernel_size={self.weight.shape[1:2]}, stride={self.stride}, "
-            f"padding={self.padding}, bias={'bias' in self}"
+            f"padding={self.padding}, dilation={self.dilation}, "
+            f"bias={'bias' in self}"
         )
 
     def __call__(self, x):
-        y = mx.conv2d(x, self.weight, self.stride, self.padding)
+        y = mx.conv2d(x, self.weight, self.stride, self.padding, self.dilation)
         if "bias" in self:
             y = y + self.bias
         return y
diff --git a/python/tests/test_nn.py b/python/tests/test_nn.py
index 99154d3f6..678acfd5b 100644
--- a/python/tests/test_nn.py
+++ b/python/tests/test_nn.py
@@ -586,6 +586,13 @@ class TestLayers(mlx_tests.MLXTestCase):
         self.assertEqual(y.shape, (N, (L - ks + 1) // 2, C_out))
         self.assertTrue("bias" in c.parameters())
 
+        dil = 2
+        c = nn.Conv1d(
+            in_channels=C_in, out_channels=C_out, kernel_size=ks, dilation=dil
+        )
+        y = c(x)
+        self.assertEqual(y.shape, (N, L - (ks - 1) * dil, C_out))
+
         c = nn.Conv1d(in_channels=C_in, out_channels=C_out, kernel_size=ks, bias=False)
         self.assertTrue("bias" not in c.parameters())
 
@@ -632,6 +639,11 @@ class TestLayers(mlx_tests.MLXTestCase):
         self.assertEqual(y.shape, (4, 3, 3, 8))
         self.assertLess(mx.abs(y - c.weight.sum((1, 2, 3))).max(), 1e-4)
 
+        c = nn.Conv2d(3, 8, 3, dilation=2)
+        y = c(x)
+        self.assertEqual(y.shape, (4, 4, 4, 8))
+        self.assertLess(mx.abs(y - c.weight.sum((1, 2, 3))).max(), 1e-4)
+
     def test_sequential(self):
         x = mx.ones((10, 2))
         m = nn.Sequential(nn.Linear(2, 10), nn.ReLU(), nn.Linear(10, 1))