mlx-examples/llms/tests/test_sample_utils.py

import unittest

import mlx.core as mx
from mlx_lm.sample_utils import min_p_sampling, top_k_sampling, top_p_sampling


class TestSampleUtils(unittest.TestCase):
    def test_top_p_sampling(self):
        probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]
        logits = mx.log(probs)

        actual_logits = top_p_sampling(logits, 0.3)
        actual_probs = mx.softmax(actual_logits.squeeze())
        self.assertEqual(actual_probs.tolist(), [1.0, 0.0, 0.0, 0.0])

        actual_logits = top_p_sampling(logits, 0.95)
        actual_probs = mx.softmax(actual_logits.squeeze())
        self.assertEqual(probs.squeeze().tolist(), actual_probs.tolist())

        probs = mx.array([0.0, 0.5, 0.4, 0.1])[None]
        logits = mx.log(probs)
        actual_logits = top_p_sampling(logits, 0.4)
        actual_probs = mx.softmax(actual_logits.squeeze())
        self.assertEqual(actual_probs.tolist(), [0.0, 1.0, 0.0, 0.0])

        actual_logits = top_p_sampling(logits, 0.6)
        actual_probs = mx.softmax(actual_logits.squeeze())
        self.assertEqual(
            [round(p, 4) for p in actual_probs.tolist()], [0.0, 0.5556, 0.4444, 0.0]
        )

        actual_logits = top_p_sampling(logits, 0.95)
        actual_probs = mx.softmax(actual_logits.squeeze())
        actual_rounded = [round(p, 4) for p in actual_probs.tolist()]
        expected_rounded = [0.0, 0.5, 0.4, 0.1]
        self.assertEqual(actual_rounded, expected_rounded)
        self.assertAlmostEqual(sum(actual_probs.tolist()), 1.0)

        # Batch mode works
        probs = mx.array([[0.9, 0.0, 0.0, 0.1], [0.0, 0.8, 0.1, 0.1]])
        logits = mx.log(probs)
        actual_logits = top_p_sampling(logits, 0.5)
        actual_probs = mx.softmax(actual_logits, axis=-1)
        self.assertEqual(
            actual_probs.tolist(), [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]]
        )

    def test_min_p_sampling(self):
        probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]
        logits = mx.log(probs)
        temperature = 1.0
        token = min_p_sampling(logits, 0.8)
        self.assertEqual(token, 0)

        probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]
        logits = mx.log(probs)
        temperature = 1.0
        for _ in range(5):
            token = min_p_sampling(logits, 0.05)
            self.assertTrue(token in (0, 3))

        # Batch mode works
        probs = mx.array([[0.9, 0.0, 0.0, 0.1], [0.0, 0.8, 0.0, 0.1]])
        logits = mx.log(probs)
        tokens = min_p_sampling(logits, 0.7)
        self.assertEqual(tokens.tolist(), [0, 1])

    def test_top_k_sampling(self):
        probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]
        logits = mx.log(probs)

        token = top_k_sampling(logits, 1).item()
        self.assertEqual(token, 0)

        probs = mx.array([0.5, 0.0, 0.0, 0.5])[None]
        tokens = set()
        for _ in range(100):
            token = top_k_sampling(logits, 2)
            tokens.add(token.item())
        self.assertEqual(tokens, {0, 3})

        # Batch mode works
        probs = mx.array([[0.9, 0.0, 0.0, 0.1], [0.0, 0.8, 0.0, 0.1]])
        logits = mx.log(probs)

        tokens = top_k_sampling(logits, 1)
        self.assertEqual(tokens.tolist(), [0, 1])


if __name__ == "__main__":
    unittest.main()
chore(mlx-lm): fix the top_p implementation. (#602) * chore(mlx-lm): clean up the top p imp * chore: clean up * chore: add test * chore: address comments * chore: clean up docs string * chore: clean up test 2024-03-22 03:18:23 +08:00			`import unittest`

			`import mlx.core as mx`
Fix no template prompt + top_k sampling (#1166) * fix no template prompt * add top_k sampling * fix chinese 2024-12-19 10:46:50 +08:00			`from mlx_lm.sample_utils import min_p_sampling, top_k_sampling, top_p_sampling`
chore(mlx-lm): fix the top_p implementation. (#602) * chore(mlx-lm): clean up the top p imp * chore: clean up * chore: add test * chore: address comments * chore: clean up docs string * chore: clean up test 2024-03-22 03:18:23 +08:00

Generation refactor: part 2 (#1099) * unify with stream_generate * fixes * nit * some cleanup, warnings, tests * fix test + faster min p + test * version 2024-11-24 03:47:06 +08:00			`class TestSampleUtils(unittest.TestCase):`
Faster sampling with `mx.compile` (#937) * faster sampling with compile * fix test 2024-08-16 02:29:09 +08:00			`def test_top_p_sampling(self):`
			`probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]`
			`logits = mx.log(probs)`

top_p refactor 2025-03-08 21:55:49 +08:00			`actual_logits = top_p_sampling(logits, 0.3)`
			`actual_probs = mx.softmax(actual_logits.squeeze())`
			`self.assertEqual(actual_probs.tolist(), [1.0, 0.0, 0.0, 0.0])`
Faster sampling with `mx.compile` (#937) * faster sampling with compile * fix test 2024-08-16 02:29:09 +08:00
top_p refactor 2025-03-08 21:55:49 +08:00			`actual_logits = top_p_sampling(logits, 0.95)`
			`actual_probs = mx.softmax(actual_logits.squeeze())`
			`self.assertEqual(probs.squeeze().tolist(), actual_probs.tolist())`
Faster sampling with `mx.compile` (#937) * faster sampling with compile * fix test 2024-08-16 02:29:09 +08:00
			`probs = mx.array([0.0, 0.5, 0.4, 0.1])[None]`
			`logits = mx.log(probs)`
top_p refactor 2025-03-08 21:55:49 +08:00			`actual_logits = top_p_sampling(logits, 0.4)`
			`actual_probs = mx.softmax(actual_logits.squeeze())`
			`self.assertEqual(actual_probs.tolist(), [0.0, 1.0, 0.0, 0.0])`

			`actual_logits = top_p_sampling(logits, 0.6)`
			`actual_probs = mx.softmax(actual_logits.squeeze())`
			`self.assertEqual(`
			`[round(p, 4) for p in actual_probs.tolist()], [0.0, 0.5556, 0.4444, 0.0]`
			`)`

			`actual_logits = top_p_sampling(logits, 0.95)`
			`actual_probs = mx.softmax(actual_logits.squeeze())`
			`actual_rounded = [round(p, 4) for p in actual_probs.tolist()]`
			`expected_rounded = [0.0, 0.5, 0.4, 0.1]`
			`self.assertEqual(actual_rounded, expected_rounded)`
			`self.assertAlmostEqual(sum(actual_probs.tolist()), 1.0)`
chore(mlx-lm): fix the top_p implementation. (#602) * chore(mlx-lm): clean up the top p imp * chore: clean up * chore: add test * chore: address comments * chore: clean up docs string * chore: clean up test 2024-03-22 03:18:23 +08:00
batched min p and fix spec gen sampling (#1222) 2025-01-28 07:40:31 +08:00			`# Batch mode works`
top_p refactor 2025-03-08 21:55:49 +08:00			`probs = mx.array([[0.9, 0.0, 0.0, 0.1], [0.0, 0.8, 0.1, 0.1]])`
batched min p and fix spec gen sampling (#1222) 2025-01-28 07:40:31 +08:00			`logits = mx.log(probs)`
top_p refactor 2025-03-08 21:55:49 +08:00			`actual_logits = top_p_sampling(logits, 0.5)`
			`actual_probs = mx.softmax(actual_logits, axis=-1)`
			`self.assertEqual(`
			`actual_probs.tolist(), [[1.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0]]`
			`)`
batched min p and fix spec gen sampling (#1222) 2025-01-28 07:40:31 +08:00
Generation refactor: part 2 (#1099) * unify with stream_generate * fixes * nit * some cleanup, warnings, tests * fix test + faster min p + test * version 2024-11-24 03:47:06 +08:00			`def test_min_p_sampling(self):`
			`probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]`
			`logits = mx.log(probs)`
			`temperature = 1.0`
			`token = min_p_sampling(logits, 0.8)`
			`self.assertEqual(token, 0)`

			`probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]`
			`logits = mx.log(probs)`
			`temperature = 1.0`
			`for _ in range(5):`
			`token = min_p_sampling(logits, 0.05)`
			`self.assertTrue(token in (0, 3))`

batched min p and fix spec gen sampling (#1222) 2025-01-28 07:40:31 +08:00			`# Batch mode works`
			`probs = mx.array([[0.9, 0.0, 0.0, 0.1], [0.0, 0.8, 0.0, 0.1]])`
			`logits = mx.log(probs)`
			`tokens = min_p_sampling(logits, 0.7)`
			`self.assertEqual(tokens.tolist(), [0, 1])`

Fix no template prompt + top_k sampling (#1166) * fix no template prompt * add top_k sampling * fix chinese 2024-12-19 10:46:50 +08:00			`def test_top_k_sampling(self):`
			`probs = mx.array([0.9, 0.0, 0.0, 0.1])[None]`
			`logits = mx.log(probs)`

			`token = top_k_sampling(logits, 1).item()`
			`self.assertEqual(token, 0)`

			`probs = mx.array([0.5, 0.0, 0.0, 0.5])[None]`
			`tokens = set()`
			`for _ in range(100):`
			`token = top_k_sampling(logits, 2)`
			`tokens.add(token.item())`
			`self.assertEqual(tokens, {0, 3})`

			`# Batch mode works`
			`probs = mx.array([[0.9, 0.0, 0.0, 0.1], [0.0, 0.8, 0.0, 0.1]])`
			`logits = mx.log(probs)`

			`tokens = top_k_sampling(logits, 1)`
			`self.assertEqual(tokens.tolist(), [0, 1])`

chore(mlx-lm): fix the top_p implementation. (#602) * chore(mlx-lm): clean up the top p imp * chore: clean up * chore: add test * chore: address comments * chore: clean up docs string * chore: clean up test 2024-03-22 03:18:23 +08:00
			`if __name__ == "__main__":`
			`unittest.main()`