This commit is contained in:
CircleCI Docs
2025-08-29 17:13:50 +00:00
parent 9a878bae64
commit ec3f487eed
547 changed files with 10067 additions and 2222 deletions

View File

@@ -8,7 +8,7 @@
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>mlx.nn.quantize &#8212; MLX 0.28.0 documentation</title>
<title>mlx.nn.quantize &#8212; MLX 0.29.0 documentation</title>
@@ -30,15 +30,18 @@
<link rel="stylesheet" type="text/css" href="../../_static/pygments.css?v=03e43079" />
<link rel="stylesheet" type="text/css" href="../../_static/styles/sphinx-book-theme.css?v=eba8b062" />
<link rel="stylesheet" type="text/css" href="../../_static/copybutton.css?v=76b2166b" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../_static/scripts/bootstrap.js?digest=dfe6caa3a7d634c4db9b" />
<link rel="preload" as="script" href="../../_static/scripts/pydata-sphinx-theme.js?digest=dfe6caa3a7d634c4db9b" />
<script src="../../_static/vendor/fontawesome/6.5.2/js/all.min.js?digest=dfe6caa3a7d634c4db9b"></script>
<script src="../../_static/documentation_options.js?v=60827de6"></script>
<script src="../../_static/documentation_options.js?v=9c58480a"></script>
<script src="../../_static/doctools.js?v=9a2dae69"></script>
<script src="../../_static/sphinx_highlight.js?v=dc90522c"></script>
<script src="../../_static/clipboard.min.js?v=a7894cd8"></script>
<script src="../../_static/copybutton.js?v=f281be69"></script>
<script src="../../_static/scripts/sphinx-book-theme.js?v=887ef09a"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'python/_autosummary/mlx.nn.quantize';</script>
<link rel="icon" href="../../_static/mlx_logo.png"/>
@@ -137,8 +140,8 @@
<img src="../../_static/mlx_logo.png" class="logo__image only-light" alt="MLX 0.28.0 documentation - Home"/>
<script>document.write(`<img src="../../_static/mlx_logo_dark.png" class="logo__image only-dark" alt="MLX 0.28.0 documentation - Home"/>`);</script>
<img src="../../_static/mlx_logo.png" class="logo__image only-light" alt="MLX 0.29.0 documentation - Home"/>
<script>document.write(`<img src="../../_static/mlx_logo_dark.png" class="logo__image only-dark" alt="MLX 0.29.0 documentation - Home"/>`);</script>
</a></div>
@@ -477,6 +480,7 @@
<li class="toctree-l2"><a class="reference internal" href="mlx.core.fast.rope.html">mlx.core.fast.rope</a></li>
<li class="toctree-l2"><a class="reference internal" href="mlx.core.fast.scaled_dot_product_attention.html">mlx.core.fast.scaled_dot_product_attention</a></li>
<li class="toctree-l2"><a class="reference internal" href="mlx.core.fast.metal_kernel.html">mlx.core.fast.metal_kernel</a></li>
<li class="toctree-l2"><a class="reference internal" href="mlx.core.fast.cuda_kernel.html">mlx.core.fast.cuda_kernel</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../fft.html">FFT</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
@@ -523,6 +527,10 @@
<li class="toctree-l2"><a class="reference internal" href="mlx.core.metal.stop_capture.html">mlx.core.metal.stop_capture</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../cuda.html">CUDA</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="mlx.core.cuda.is_available.html">mlx.core.cuda.is_available</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../memory_management.html">Memory Management</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="mlx.core.get_active_memory.html">mlx.core.get_active_memory</a></li>
<li class="toctree-l2"><a class="reference internal" href="mlx.core.get_peak_memory.html">mlx.core.get_peak_memory</a></li>
@@ -922,7 +930,7 @@ document.write(`
<h1>mlx.nn.quantize<a class="headerlink" href="#mlx-nn-quantize" title="Link to this heading">#</a></h1>
<dl class="py function">
<dt class="sig sig-object py" id="mlx.nn.quantize">
<span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../nn/module.html#mlx.nn.Module" title="mlx.nn.layers.base.Module"><span class="pre">Module</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">class_predicate</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="../nn/module.html#mlx.nn.Module" title="mlx.nn.layers.base.Module"><span class="pre">Module</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><span class="pre">dict</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#mlx.nn.quantize" title="Link to this definition">#</a></dt>
<span class="sig-name descname"><span class="pre">quantize</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">model</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference internal" href="../nn/module.html#mlx.nn.Module" title="mlx.nn.layers.base.Module"><span class="pre">Module</span></a></span></em>, <em class="sig-param"><span class="n"><span class="pre">group_size</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">64</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">bits</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><span class="pre">int</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">4</span></span></em>, <em class="sig-param"><span class="o"><span class="pre">*</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">mode</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">'affine'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">class_predicate</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><a class="reference external" href="https://docs.python.org/3/library/typing.html#typing.Callable" title="(in Python v3.13)"><span class="pre">Callable</span></a><span class="p"><span class="pre">[</span></span><span class="p"><span class="pre">[</span></span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><span class="pre">str</span></a><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference internal" href="../nn/module.html#mlx.nn.Module" title="mlx.nn.layers.base.Module"><span class="pre">Module</span></a><span class="p"><span class="pre">]</span></span><span class="p"><span class="pre">,</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.13)"><span class="pre">bool</span></a><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#dict" title="(in Python v3.13)"><span class="pre">dict</span></a><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><a class="reference external" href="https://docs.python.org/3/library/constants.html#None" title="(in Python v3.13)"><span class="pre">None</span></a></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#mlx.nn.quantize" title="Link to this definition">#</a></dt>
<dd><p>Quantize the sub-modules of a module according to a predicate.</p>
<p>By default all layers that define a <code class="docutils literal notranslate"><span class="pre">to_quantized(group_size,</span> <span class="pre">bits)</span></code>
method will be quantized. Both <a class="reference internal" href="../nn/_autosummary/mlx.nn.Linear.html#mlx.nn.Linear" title="mlx.nn.Linear"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Linear</span></code></a> and <a class="reference internal" href="../nn/_autosummary/mlx.nn.Embedding.html#mlx.nn.Embedding" title="mlx.nn.Embedding"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Embedding</span></code></a> layers
@@ -935,6 +943,8 @@ will be quantized. Note also, the module is updated in-place.</p>
<a class="reference internal" href="mlx.core.quantize.html#mlx.core.quantize" title="mlx.core.quantize"><code class="xref py py-func docutils literal notranslate"><span class="pre">mlx.core.quantize()</span></code></a>). Default: <code class="docutils literal notranslate"><span class="pre">64</span></code>.</p></li>
<li><p><strong>bits</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.13)"><em>int</em></a>) The number of bits per parameter (see
<a class="reference internal" href="mlx.core.quantize.html#mlx.core.quantize" title="mlx.core.quantize"><code class="xref py py-func docutils literal notranslate"><span class="pre">mlx.core.quantize()</span></code></a>). Default: <code class="docutils literal notranslate"><span class="pre">4</span></code>.</p></li>
<li><p><strong>mode</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.13)"><em>str</em></a>) The quantization method to use (see
<a class="reference internal" href="mlx.core.quantize.html#mlx.core.quantize" title="mlx.core.quantize"><code class="xref py py-func docutils literal notranslate"><span class="pre">mlx.core.quantize()</span></code></a>). Default: <code class="docutils literal notranslate"><span class="pre">&quot;affine&quot;</span></code>.</p></li>
<li><p><strong>class_predicate</strong> (<em>Optional</em><em>[</em><em>Callable</em><em>]</em>) A callable which receives the
<a class="reference internal" href="../nn/module.html#mlx.nn.Module" title="mlx.nn.Module"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Module</span></code></a> path and <a class="reference internal" href="../nn/module.html#mlx.nn.Module" title="mlx.nn.Module"><code class="xref py py-obj docutils literal notranslate"><span class="pre">Module</span></code></a> itself and returns <code class="docutils literal notranslate"><span class="pre">True</span></code> or a
dict of params for <cite>to_quantized</cite> if it should be quantized and