diff --git a/llms/mlx_lm/sample_utils.py b/llms/mlx_lm/sample_utils.py
index c27b52d8..f9868422 100644
--- a/llms/mlx_lm/sample_utils.py
+++ b/llms/mlx_lm/sample_utils.py
@@ -1,5 +1,6 @@
 # Copyright © 2023-2024 Apple Inc.
 
+import math
 from functools import partial
 from typing import Callable, Dict, Optional
 
@@ -80,7 +81,7 @@ def make_logits_processors(
 
 @partial(mx.compile, inputs=mx.random.state, outputs=mx.random.state)
 def min_p_sampling(
-    logits: mx.array,
+    logprobs: mx.array,
     min_p: float,
     min_tokens_to_keep: int = 1,
     temperature=1.0,
@@ -93,7 +94,7 @@ def min_p_sampling(
     aggressive given a very high-probability token.
 
     Args:
-        logits: The logits from the model's output.
+        logprobs: A vector of log probabilities.
         min_p (float): Minimum token probability. Typical values are in the
             0.01-0.2 range, comparably selective as setting `top_p` in the
             0.99-0.8 range.
@@ -111,28 +112,27 @@ def min_p_sampling(
         )
     # reference implementation: https://github.com/huggingface/transformers/blob/main/src/transformers/generation/logits_process.py#L531-L605
 
-    # Softmax probabilities
-    probs = mx.softmax(logits * (1 / temperature), axis=-1)
+    logprobs = logprobs * (1 / temperature)
 
     # Indices sorted in decreasing order
-    sorted_indices = mx.argsort(-logits).squeeze(0)
-    sorted_probs = probs[..., sorted_indices]
+    sorted_indices = mx.argsort(-logprobs).squeeze(0)
+    sorted_logprobs = logprobs[..., sorted_indices]
 
     # Top probability
-    top_probs = probs[..., sorted_indices[0]]
+    top_logprobs = logprobs[..., sorted_indices[0]]
 
     # Calculate the min_p threshold
-    scaled_min_p = min_p * top_probs
+    scaled_min_p = top_logprobs + math.log(min_p)
 
     # Mask tokens that have a probability less than the scaled min_p
-    tokens_to_remove = sorted_probs < scaled_min_p
+    tokens_to_remove = sorted_logprobs < scaled_min_p
     tokens_to_remove[..., :min_tokens_to_keep] = False
 
     # Create pool of tokens with probability less than scaled min_p
-    selected_probs = mx.where(tokens_to_remove, 0, sorted_probs)
+    selected_logprobs = mx.where(tokens_to_remove, -float("inf"), sorted_logprobs)
 
     # Return sampled token
-    sorted_token = mx.random.categorical(mx.log(selected_probs))
+    sorted_token = mx.random.categorical(selected_logprobs)
     return sorted_indices[sorted_token]
 
 
diff --git a/llms/tests/test_generate.py b/llms/tests/test_generate.py
index e0a372a9..f2345394 100644
--- a/llms/tests/test_generate.py
+++ b/llms/tests/test_generate.py
@@ -2,6 +2,7 @@
 
 import unittest
 
+from mlx_lm.sample_utils import make_logits_processors
 from mlx_lm.utils import generate, load
 
 
@@ -25,8 +26,8 @@ class TestGenerate(unittest.TestCase):
             self.tokenizer,
             "hello",
             max_tokens=5,
+            logits_processors=make_logits_processors(logit_bias),
             verbose=False,
-            logit_bias=logit_bias,
         )
         self.assertEqual(text, "!!!!!")
 
diff --git a/llms/tests/test_sample_utils.py b/llms/tests/test_sample_utils.py
index ec0e2cb7..ebc90ce8 100644
--- a/llms/tests/test_sample_utils.py
+++ b/llms/tests/test_sample_utils.py
@@ -1,10 +1,10 @@
 import unittest
 
 import mlx.core as mx
-from mlx_lm.sample_utils import top_p_sampling
+from mlx_lm.sample_utils import min_p_sampling, top_p_sampling
 
 
-class TestSamplingUtils(unittest.TestCase):
+class TestSampleUtils(unittest.TestCase):
     def test_top_p_sampling(self):
         probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]
         logits = mx.log(probs)
@@ -28,6 +28,20 @@ class TestSamplingUtils(unittest.TestCase):
         token = top_p_sampling(logits, 0.95, temperature).item()
         self.assertTrue(token in (1, 2, 3))
 
+    def test_min_p_sampling(self):
+        probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]
+        logits = mx.log(probs)
+        temperature = 1.0
+        token = min_p_sampling(logits, 0.8)
+        self.assertEqual(token, 0)
+
+        probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]
+        logits = mx.log(probs)
+        temperature = 1.0
+        for _ in range(5):
+            token = min_p_sampling(logits, 0.05)
+            self.assertTrue(token in (0, 3))
+
 
 if __name__ == "__main__":
     unittest.main()