mlx/docs/build/html/quantized_8h.html
2025-06-04 01:01:51 +00:00

2088 lines
106 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=11"/>
<meta name="generator" content="Doxygen 1.10.0"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>MLX: mlx/backend/metal/kernels/quantized.h File Reference</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="jquery.js"></script>
<script type="text/javascript" src="dynsections.js"></script>
<script type="text/javascript" src="clipboard.js"></script>
<script type="text/javascript" src="cookie.js"></script>
<link href="search/search.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="search/searchdata.js"></script>
<script type="text/javascript" src="search/search.js"></script>
<link href="doxygen.css" rel="stylesheet" type="text/css" />
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr id="projectrow">
<td id="projectalign">
<div id="projectname">MLX
</div>
</td>
</tr>
</tbody>
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.10.0 -->
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&amp;dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
</script>
<script type="text/javascript" src="menudata.js"></script>
<script type="text/javascript" src="menu.js"></script>
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&amp;dn=expat.txt MIT */
$(function() {
initMenu('',true,false,'search.php','Search');
$(function() { init_search(); });
});
/* @license-end */
</script>
<div id="main-nav"></div>
<!-- window showing the filter options -->
<div id="MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
</div>
<!-- iframe showing the search results (closed by default) -->
<div id="MSearchResultsWindow">
<div id="MSearchResults">
<div class="SRPage">
<div id="SRIndex">
<div id="SRResults"></div>
<div class="SRStatus" id="Loading">Loading...</div>
<div class="SRStatus" id="Searching">Searching...</div>
<div class="SRStatus" id="NoMatches">No Matches</div>
</div>
</div>
</div>
</div>
<div id="nav-path" class="navpath">
<ul>
<li class="navelem"><a class="el" href="dir_938ab0ecf10b8b860ff766c820f665fd.html">mlx</a></li><li class="navelem"><a class="el" href="dir_1d446c9bd3c99228254c9484e0bc5c06.html">backend</a></li><li class="navelem"><a class="el" href="dir_d0c977ea65824390717cdb7efc36c157.html">metal</a></li><li class="navelem"><a class="el" href="dir_70a37effa88bcbd6b791977fa1e64356.html">kernels</a></li> </ul>
</div>
</div><!-- top -->
<div class="header">
<div class="summary">
<a href="#nested-classes">Classes</a> &#124;
<a href="#define-members">Macros</a> &#124;
<a href="#func-members">Functions</a> &#124;
<a href="#var-members">Variables</a> </div>
<div class="headertitle"><div class="title">quantized.h File Reference</div></div>
</div><!--header-->
<div class="contents">
<div class="textblock"><code>#include &lt;metal_simdgroup&gt;</code><br />
<code>#include &lt;metal_stdlib&gt;</code><br />
</div>
<p><a href="quantized_8h_source.html">Go to the source code of this file.</a></p>
<table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a id="nested-classes" name="nested-classes"></a>
Classes</h2></td></tr>
<tr class="memitem:"><td class="memItemLeft" align="right" valign="top">struct &#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="struct_quantized_block_loader.html">QuantizedBlockLoader&lt; T, BROWS, BCOLS, dst_ld, reduction_dim, tgp_size, group_size, bits &gt;</a></td></tr>
<tr class="separator:"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a id="define-members" name="define-members"></a>
Macros</h2></td></tr>
<tr class="memitem:a0386011c52d03e60885a31e6fbd903dd" id="r_a0386011c52d03e60885a31e6fbd903dd"><td class="memItemLeft" align="right" valign="top">#define&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="#a0386011c52d03e60885a31e6fbd903dd">MLX_MTL_CONST</a>&#160;&#160;&#160;static constant constexpr const</td></tr>
<tr class="separator:a0386011c52d03e60885a31e6fbd903dd"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a id="func-members" name="func-members"></a>
Functions</h2></td></tr>
<tr class="memitem:a8dbace41de9e1e21dd59d016db11b3e9" id="r_a8dbace41de9e1e21dd59d016db11b3e9"><td class="memTemplParams" colspan="2">template&lt;typename T , typename U , int values_per_thread, int bits&gt; </td></tr>
<tr class="memitem:a8dbace41de9e1e21dd59d016db11b3e9"><td class="memTemplItemLeft" align="right" valign="top">U&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a8dbace41de9e1e21dd59d016db11b3e9">load_vector</a> (const device T *x, thread U *x_thread)</td></tr>
<tr class="separator:a8dbace41de9e1e21dd59d016db11b3e9"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aa69e143d646fad332c1a53e8c9b337b7" id="r_aa69e143d646fad332c1a53e8c9b337b7"><td class="memTemplParams" colspan="2">template&lt;typename T , typename U , int values_per_thread, int bits&gt; </td></tr>
<tr class="memitem:aa69e143d646fad332c1a53e8c9b337b7"><td class="memTemplItemLeft" align="right" valign="top">U&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#aa69e143d646fad332c1a53e8c9b337b7">load_vector_safe</a> (const device T *x, thread U *x_thread, int N)</td></tr>
<tr class="separator:aa69e143d646fad332c1a53e8c9b337b7"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ab364d58ab652e3ad87a8f80910556071" id="r_ab364d58ab652e3ad87a8f80910556071"><td class="memTemplParams" colspan="2">template&lt;typename U , int values_per_thread, int bits&gt; </td></tr>
<tr class="memitem:ab364d58ab652e3ad87a8f80910556071"><td class="memTemplItemLeft" align="right" valign="top">U&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#ab364d58ab652e3ad87a8f80910556071">qdot</a> (const device uint8_t *w, const thread U *x_thread, U scale, U bias, U sum)</td></tr>
<tr class="separator:ab364d58ab652e3ad87a8f80910556071"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a07b26d2d0b0d65dfe925c452c453fa42" id="r_a07b26d2d0b0d65dfe925c452c453fa42"><td class="memTemplParams" colspan="2">template&lt;typename U , int values_per_thread, int bits&gt; </td></tr>
<tr class="memitem:a07b26d2d0b0d65dfe925c452c453fa42"><td class="memTemplItemLeft" align="right" valign="top">U&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a07b26d2d0b0d65dfe925c452c453fa42">qdot_safe</a> (const device uint8_t *w, const thread U *x_thread, U scale, U bias, U sum, int N)</td></tr>
<tr class="separator:a07b26d2d0b0d65dfe925c452c453fa42"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ae756f6817b584c60f5dcdd1d9c6b4f58" id="r_ae756f6817b584c60f5dcdd1d9c6b4f58"><td class="memTemplParams" colspan="2">template&lt;typename U , int values_per_thread, int bits&gt; </td></tr>
<tr class="memitem:ae756f6817b584c60f5dcdd1d9c6b4f58"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#ae756f6817b584c60f5dcdd1d9c6b4f58">qouter</a> (const thread uint8_t *w, U x, U scale, U bias, thread U *result)</td></tr>
<tr class="separator:ae756f6817b584c60f5dcdd1d9c6b4f58"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aecff265b63566d0d5689cfc4e5b037d2" id="r_aecff265b63566d0d5689cfc4e5b037d2"><td class="memTemplParams" colspan="2">template&lt;typename U , int N, int bits&gt; </td></tr>
<tr class="memitem:aecff265b63566d0d5689cfc4e5b037d2"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#aecff265b63566d0d5689cfc4e5b037d2">dequantize</a> (const device uint8_t *w, U scale, U bias, threadgroup U *w_local)</td></tr>
<tr class="separator:aecff265b63566d0d5689cfc4e5b037d2"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aba7687e6f8f1d29c0a1b2a3db150bd81" id="r_aba7687e6f8f1d29c0a1b2a3db150bd81"><td class="memTemplParams" colspan="2">template&lt;typename T , int group_size, int bits&gt; </td></tr>
<tr class="memitem:aba7687e6f8f1d29c0a1b2a3db150bd81"><td class="memTemplItemLeft" align="right" valign="top">METAL_FUNC void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#aba7687e6f8f1d29c0a1b2a3db150bd81">qmv_fast_impl</a> (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:aba7687e6f8f1d29c0a1b2a3db150bd81"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a8e13c7d895624f738d2a6d9893b687fd" id="r_a8e13c7d895624f738d2a6d9893b687fd"><td class="memTemplParams" colspan="2">template&lt;typename T , int group_size, int bits&gt; </td></tr>
<tr class="memitem:a8e13c7d895624f738d2a6d9893b687fd"><td class="memTemplItemLeft" align="right" valign="top">METAL_FUNC void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a8e13c7d895624f738d2a6d9893b687fd">qmv_impl</a> (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a8e13c7d895624f738d2a6d9893b687fd"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a9d466e3e3af0d75cfc4eddcf46cb13e4" id="r_a9d466e3e3af0d75cfc4eddcf46cb13e4"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits&gt; </td></tr>
<tr class="memitem:a9d466e3e3af0d75cfc4eddcf46cb13e4"><td class="memTemplItemLeft" align="right" valign="top">METAL_FUNC void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a9d466e3e3af0d75cfc4eddcf46cb13e4">qvm_impl</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a9d466e3e3af0d75cfc4eddcf46cb13e4"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ac7b6accc41b026ef8efc312987aa9efe" id="r_ac7b6accc41b026ef8efc312987aa9efe"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32&gt; </td></tr>
<tr class="memitem:ac7b6accc41b026ef8efc312987aa9efe"><td class="memTemplItemLeft" align="right" valign="top">METAL_FUNC void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#ac7b6accc41b026ef8efc312987aa9efe">qmm_t_impl</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, device T *y, threadgroup T *Xs, threadgroup T *Ws, const constant int &amp;M, const constant int &amp;N, const constant int &amp;K, uint3 tid, uint lid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:ac7b6accc41b026ef8efc312987aa9efe"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a5f7b71eef2e328af3225d7c777ffb339" id="r_a5f7b71eef2e328af3225d7c777ffb339"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32&gt; </td></tr>
<tr class="memitem:a5f7b71eef2e328af3225d7c777ffb339"><td class="memTemplItemLeft" align="right" valign="top">METAL_FUNC void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a5f7b71eef2e328af3225d7c777ffb339">qmm_n_impl</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, device T *y, threadgroup T *Xs, threadgroup T *Ws, const constant int &amp;M, const constant int &amp;N, const constant int &amp;K, uint3 tid, uint lid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a5f7b71eef2e328af3225d7c777ffb339"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a3ab400746ad77be89c30d25638e01698" id="r_a3ab400746ad77be89c30d25638e01698"><td class="memTemplParams" colspan="2">template&lt;typename T &gt; </td></tr>
<tr class="memitem:a3ab400746ad77be89c30d25638e01698"><td class="memTemplItemLeft" align="right" valign="top">METAL_FUNC void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a3ab400746ad77be89c30d25638e01698">adjust_matrix_offsets</a> (const device T *&amp;x, const device uint32_t *&amp;w, const device T *&amp;scales, const device T *&amp;biases, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, device T *&amp;y, int output_stride, const constant int &amp;batch_ndims, const constant int *batch_shape, const constant size_t *lhs_strides, const constant size_t *rhs_strides, const constant int &amp;x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int &amp;w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid)</td></tr>
<tr class="separator:a3ab400746ad77be89c30d25638e01698"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ad6d0aa9b080358581844d6583aa2f4ea" id="r_ad6d0aa9b080358581844d6583aa2f4ea"><td class="memTemplParams" colspan="2">template&lt;typename T , int group_size, int bits&gt; </td></tr>
<tr class="memitem:ad6d0aa9b080358581844d6583aa2f4ea"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#ad6d0aa9b080358581844d6583aa2f4ea">qmv_fast</a> (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:ad6d0aa9b080358581844d6583aa2f4ea"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ae024e11c3e38dda71fef772ff0a82fe2" id="r_ae024e11c3e38dda71fef772ff0a82fe2"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits&gt; </td></tr>
<tr class="memitem:ae024e11c3e38dda71fef772ff0a82fe2"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#ae024e11c3e38dda71fef772ff0a82fe2">qmv</a> (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:ae024e11c3e38dda71fef772ff0a82fe2"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a2cce78e6692cfcbab8f6aa89afa61580" id="r_a2cce78e6692cfcbab8f6aa89afa61580"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits&gt; </td></tr>
<tr class="memitem:a2cce78e6692cfcbab8f6aa89afa61580"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a2cce78e6692cfcbab8f6aa89afa61580">qvm</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a2cce78e6692cfcbab8f6aa89afa61580"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:af9b25269ab95632e343631eeef79dc8d" id="r_af9b25269ab95632e343631eeef79dc8d"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32&gt; </td></tr>
<tr class="memitem:af9b25269ab95632e343631eeef79dc8d"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#af9b25269ab95632e343631eeef79dc8d">qmm_t</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, device T *y, const constant int &amp;M, const constant int &amp;N, const constant int &amp;K, uint3 tid, uint lid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:af9b25269ab95632e343631eeef79dc8d"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:ab5c4cfe531d7a0e1c19353060e3d3a6c" id="r_ab5c4cfe531d7a0e1c19353060e3d3a6c"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32&gt; </td></tr>
<tr class="memitem:ab5c4cfe531d7a0e1c19353060e3d3a6c"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#ab5c4cfe531d7a0e1c19353060e3d3a6c">qmm_n</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, device T *y, const constant int &amp;M, const constant int &amp;N, const constant int &amp;K, uint3 tid, uint lid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:ab5c4cfe531d7a0e1c19353060e3d3a6c"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a6f92bb9f3d29d707bfc680bebe1c80f7" id="r_a6f92bb9f3d29d707bfc680bebe1c80f7"><td class="memTemplParams" colspan="2">template&lt;typename T , int group_size, int bits&gt; </td></tr>
<tr class="memitem:a6f92bb9f3d29d707bfc680bebe1c80f7"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a6f92bb9f3d29d707bfc680bebe1c80f7">bs_qmv_fast</a> (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, const constant int &amp;batch_ndims, const constant int *batch_shape, const constant size_t *lhs_strides, const constant size_t *rhs_strides, const constant int &amp;x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int &amp;w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a6f92bb9f3d29d707bfc680bebe1c80f7"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a03ba4a4a5fe9955319b0aa477d2d7d98" id="r_a03ba4a4a5fe9955319b0aa477d2d7d98"><td class="memTemplParams" colspan="2">template&lt;typename T , int group_size, int bits&gt; </td></tr>
<tr class="memitem:a03ba4a4a5fe9955319b0aa477d2d7d98"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a03ba4a4a5fe9955319b0aa477d2d7d98">bs_qmv</a> (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, const constant int &amp;batch_ndims, const constant int *batch_shape, const constant size_t *lhs_strides, const constant size_t *rhs_strides, const constant int &amp;x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int &amp;w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a03ba4a4a5fe9955319b0aa477d2d7d98"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a3af1c89416632c9275b8218a33cb8a04" id="r_a3af1c89416632c9275b8218a33cb8a04"><td class="memTemplParams" colspan="2">template&lt;typename T , int group_size, int bits&gt; </td></tr>
<tr class="memitem:a3af1c89416632c9275b8218a33cb8a04"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a3af1c89416632c9275b8218a33cb8a04">bs_qvm</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, device T *y, const constant int &amp;in_vec_size, const constant int &amp;out_vec_size, const constant int &amp;batch_ndims, const constant int *batch_shape, const constant size_t *lhs_strides, const constant size_t *rhs_strides, const constant int &amp;x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int &amp;w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a3af1c89416632c9275b8218a33cb8a04"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a3acc2ace766cb855f13da2d1834e7dc7" id="r_a3acc2ace766cb855f13da2d1834e7dc7"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32&gt; </td></tr>
<tr class="memitem:a3acc2ace766cb855f13da2d1834e7dc7"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a3acc2ace766cb855f13da2d1834e7dc7">bs_qmm_t</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, device T *y, const constant int &amp;M, const constant int &amp;N, const constant int &amp;K, const constant int &amp;batch_ndims, const constant int *batch_shape, const constant size_t *lhs_strides, const constant size_t *rhs_strides, const constant int &amp;x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int &amp;w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint lid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:a3acc2ace766cb855f13da2d1834e7dc7"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:acfc43edcf0c742ddc090d22cb7229284" id="r_acfc43edcf0c742ddc090d22cb7229284"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32&gt; </td></tr>
<tr class="memitem:acfc43edcf0c742ddc090d22cb7229284"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#acfc43edcf0c742ddc090d22cb7229284">bs_qmm_n</a> (const device T *x, const device uint32_t *w, const device T *scales, const device T *biases, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, device T *y, const constant int &amp;M, const constant int &amp;N, const constant int &amp;K, const constant int &amp;batch_ndims, const constant int *batch_shape, const constant size_t *lhs_strides, const constant size_t *rhs_strides, const constant int &amp;x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int &amp;w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint lid, uint simd_gid, uint simd_lid)</td></tr>
<tr class="separator:acfc43edcf0c742ddc090d22cb7229284"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a47610f886f988d84c3e789eb564a6c44" id="r_a47610f886f988d84c3e789eb564a6c44"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits&gt; </td></tr>
<tr class="memitem:a47610f886f988d84c3e789eb564a6c44"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a47610f886f988d84c3e789eb564a6c44">affine_quantize</a> (const device T *w, device uint8_t *out, device T *scales, device T *biases, uint index)</td></tr>
<tr class="separator:a47610f886f988d84c3e789eb564a6c44"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:a57cd320aab5fa1abb65f79b0b781f575" id="r_a57cd320aab5fa1abb65f79b0b781f575"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits&gt; </td></tr>
<tr class="memitem:a57cd320aab5fa1abb65f79b0b781f575"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#a57cd320aab5fa1abb65f79b0b781f575">affine_quantize_scales_biases</a> (const device T *w, const device T *scales, const device T *biases, device uint8_t *out, uint index)</td></tr>
<tr class="separator:a57cd320aab5fa1abb65f79b0b781f575"><td class="memSeparator" colspan="2">&#160;</td></tr>
<tr class="memitem:aecc3db76dcae742776e71ec8fca470c5" id="r_aecc3db76dcae742776e71ec8fca470c5"><td class="memTemplParams" colspan="2">template&lt;typename T , const int group_size, const int bits&gt; </td></tr>
<tr class="memitem:aecc3db76dcae742776e71ec8fca470c5"><td class="memTemplItemLeft" align="right" valign="top">void&#160;</td><td class="memTemplItemRight" valign="bottom"><a class="el" href="#aecc3db76dcae742776e71ec8fca470c5">affine_dequantize</a> (const device uint8_t *w, const device T *scales, const device T *biases, device T *out, uint index)</td></tr>
<tr class="separator:aecc3db76dcae742776e71ec8fca470c5"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table><table class="memberdecls">
<tr class="heading"><td colspan="2"><h2 class="groupheader"><a id="var-members" name="var-members"></a>
Variables</h2></td></tr>
<tr class="memitem:a62969a218d93680f5e35d0c61b160b99" id="r_a62969a218d93680f5e35d0c61b160b99"><td class="memItemLeft" align="right" valign="top">static constant constexpr const int&#160;</td><td class="memItemRight" valign="bottom"><a class="el" href="#a62969a218d93680f5e35d0c61b160b99">SIMD_SIZE</a> = 32</td></tr>
<tr class="separator:a62969a218d93680f5e35d0c61b160b99"><td class="memSeparator" colspan="2">&#160;</td></tr>
</table>
<h2 class="groupheader">Macro Definition Documentation</h2>
<a id="a0386011c52d03e60885a31e6fbd903dd" name="a0386011c52d03e60885a31e6fbd903dd"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a0386011c52d03e60885a31e6fbd903dd">&#9670;&#160;</a></span>MLX_MTL_CONST</h2>
<div class="memitem">
<div class="memproto">
<table class="memname">
<tr>
<td class="memname">#define MLX_MTL_CONST&#160;&#160;&#160;static constant constexpr const</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<h2 class="groupheader">Function Documentation</h2>
<a id="a3ab400746ad77be89c30d25638e01698" name="a3ab400746ad77be89c30d25638e01698"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a3ab400746ad77be89c30d25638e01698">&#9670;&#160;</a></span>adjust_matrix_offsets()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T &gt; </div>
<table class="memname">
<tr>
<td class="memname">METAL_FUNC void adjust_matrix_offsets </td>
<td>(</td>
<td class="paramtype">const device T *&amp;</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *&amp;</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *&amp;</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *&amp;</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>lhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>rhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *&amp;</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int</td> <td class="paramname"><span class="paramname"><em>output_stride</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>batch_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>lhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>rhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>x_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>x_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>x_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>w_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>w_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>w_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>s_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>b_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="aecc3db76dcae742776e71ec8fca470c5" name="aecc3db76dcae742776e71ec8fca470c5"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aecc3db76dcae742776e71ec8fca470c5">&#9670;&#160;</a></span>affine_dequantize()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void affine_dequantize </td>
<td>(</td>
<td class="paramtype">const device uint8_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>out</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>index</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a47610f886f988d84c3e789eb564a6c44" name="a47610f886f988d84c3e789eb564a6c44"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a47610f886f988d84c3e789eb564a6c44">&#9670;&#160;</a></span>affine_quantize()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void affine_quantize </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device uint8_t *</td> <td class="paramname"><span class="paramname"><em>out</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>index</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a57cd320aab5fa1abb65f79b0b781f575" name="a57cd320aab5fa1abb65f79b0b781f575"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a57cd320aab5fa1abb65f79b0b781f575">&#9670;&#160;</a></span>affine_quantize_scales_biases()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void affine_quantize_scales_biases </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device uint8_t *</td> <td class="paramname"><span class="paramname"><em>out</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>index</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="acfc43edcf0c742ddc090d22cb7229284" name="acfc43edcf0c742ddc090d22cb7229284"></a>
<h2 class="memtitle"><span class="permalink"><a href="#acfc43edcf0c742ddc090d22cb7229284">&#9670;&#160;</a></span>bs_qmm_n()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32&gt; </div>
<table class="memname">
<tr>
<td class="memname">void bs_qmm_n </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>lhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>rhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>M</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>N</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>K</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>batch_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>lhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>rhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>x_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>x_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>x_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>w_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>w_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>w_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>s_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>b_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>lid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a3acc2ace766cb855f13da2d1834e7dc7" name="a3acc2ace766cb855f13da2d1834e7dc7"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a3acc2ace766cb855f13da2d1834e7dc7">&#9670;&#160;</a></span>bs_qmm_t()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32&gt; </div>
<table class="memname">
<tr>
<td class="memname">void bs_qmm_t </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>lhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>rhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>M</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>N</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>K</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>batch_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>lhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>rhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>x_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>x_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>x_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>w_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>w_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>w_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>s_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>b_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>lid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a03ba4a4a5fe9955319b0aa477d2d7d98" name="a03ba4a4a5fe9955319b0aa477d2d7d98"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a03ba4a4a5fe9955319b0aa477d2d7d98">&#9670;&#160;</a></span>bs_qmv()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , int group_size, int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void bs_qmv </td>
<td>(</td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>lhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>rhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>batch_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>lhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>rhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>x_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>x_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>x_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>w_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>w_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>w_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>s_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>b_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a6f92bb9f3d29d707bfc680bebe1c80f7" name="a6f92bb9f3d29d707bfc680bebe1c80f7"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a6f92bb9f3d29d707bfc680bebe1c80f7">&#9670;&#160;</a></span>bs_qmv_fast()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , int group_size, int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void bs_qmv_fast </td>
<td>(</td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>lhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>rhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>batch_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>lhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>rhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>x_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>x_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>x_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>w_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>w_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>w_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>s_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>b_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a3af1c89416632c9275b8218a33cb8a04" name="a3af1c89416632c9275b8218a33cb8a04"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a3af1c89416632c9275b8218a33cb8a04">&#9670;&#160;</a></span>bs_qvm()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , int group_size, int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void bs_qvm </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>lhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>rhs_indices</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>batch_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>lhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>rhs_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>x_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>x_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>x_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>w_batch_ndims</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int *</td> <td class="paramname"><span class="paramname"><em>w_shape</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>w_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>s_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant size_t *</td> <td class="paramname"><span class="paramname"><em>b_strides</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="aecff265b63566d0d5689cfc4e5b037d2" name="aecff265b63566d0d5689cfc4e5b037d2"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aecff265b63566d0d5689cfc4e5b037d2">&#9670;&#160;</a></span>dequantize()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename U , int N, int bits&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">void dequantize </td>
<td>(</td>
<td class="paramtype">const device uint8_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>scale</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>bias</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">threadgroup U *</td> <td class="paramname"><span class="paramname"><em>w_local</em></span>&#160;)</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a8dbace41de9e1e21dd59d016db11b3e9" name="a8dbace41de9e1e21dd59d016db11b3e9"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a8dbace41de9e1e21dd59d016db11b3e9">&#9670;&#160;</a></span>load_vector()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , typename U , int values_per_thread, int bits&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">U load_vector </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">thread U *</td> <td class="paramname"><span class="paramname"><em>x_thread</em></span>&#160;)</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="aa69e143d646fad332c1a53e8c9b337b7" name="aa69e143d646fad332c1a53e8c9b337b7"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aa69e143d646fad332c1a53e8c9b337b7">&#9670;&#160;</a></span>load_vector_safe()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , typename U , int values_per_thread, int bits&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">U load_vector_safe </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">thread U *</td> <td class="paramname"><span class="paramname"><em>x_thread</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int</td> <td class="paramname"><span class="paramname"><em>N</em></span>&#160;)</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ab364d58ab652e3ad87a8f80910556071" name="ab364d58ab652e3ad87a8f80910556071"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ab364d58ab652e3ad87a8f80910556071">&#9670;&#160;</a></span>qdot()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename U , int values_per_thread, int bits&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">U qdot </td>
<td>(</td>
<td class="paramtype">const device uint8_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const thread U *</td> <td class="paramname"><span class="paramname"><em>x_thread</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>scale</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>bias</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>sum</em></span>&#160;)</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a07b26d2d0b0d65dfe925c452c453fa42" name="a07b26d2d0b0d65dfe925c452c453fa42"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a07b26d2d0b0d65dfe925c452c453fa42">&#9670;&#160;</a></span>qdot_safe()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename U , int values_per_thread, int bits&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">U qdot_safe </td>
<td>(</td>
<td class="paramtype">const device uint8_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const thread U *</td> <td class="paramname"><span class="paramname"><em>x_thread</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>scale</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>bias</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>sum</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">int</td> <td class="paramname"><span class="paramname"><em>N</em></span>&#160;)</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ab5c4cfe531d7a0e1c19353060e3d3a6c" name="ab5c4cfe531d7a0e1c19353060e3d3a6c"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ab5c4cfe531d7a0e1c19353060e3d3a6c">&#9670;&#160;</a></span>qmm_n()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32&gt; </div>
<table class="memname">
<tr>
<td class="memname">void qmm_n </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>M</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>N</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>K</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>lid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a5f7b71eef2e328af3225d7c777ffb339" name="a5f7b71eef2e328af3225d7c777ffb339"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a5f7b71eef2e328af3225d7c777ffb339">&#9670;&#160;</a></span>qmm_n_impl()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32&gt; </div>
<table class="memname">
<tr>
<td class="memname">METAL_FUNC void qmm_n_impl </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">threadgroup T *</td> <td class="paramname"><span class="paramname"><em>Xs</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">threadgroup T *</td> <td class="paramname"><span class="paramname"><em>Ws</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>M</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>N</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>K</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>lid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="af9b25269ab95632e343631eeef79dc8d" name="af9b25269ab95632e343631eeef79dc8d"></a>
<h2 class="memtitle"><span class="permalink"><a href="#af9b25269ab95632e343631eeef79dc8d">&#9670;&#160;</a></span>qmm_t()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32&gt; </div>
<table class="memname">
<tr>
<td class="memname">void qmm_t </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>M</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>N</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>K</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>lid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ac7b6accc41b026ef8efc312987aa9efe" name="ac7b6accc41b026ef8efc312987aa9efe"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ac7b6accc41b026ef8efc312987aa9efe">&#9670;&#160;</a></span>qmm_t_impl()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32&gt; </div>
<table class="memname">
<tr>
<td class="memname">METAL_FUNC void qmm_t_impl </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">threadgroup T *</td> <td class="paramname"><span class="paramname"><em>Xs</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">threadgroup T *</td> <td class="paramname"><span class="paramname"><em>Ws</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>M</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>N</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>K</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>lid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ae024e11c3e38dda71fef772ff0a82fe2" name="ae024e11c3e38dda71fef772ff0a82fe2"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ae024e11c3e38dda71fef772ff0a82fe2">&#9670;&#160;</a></span>qmv()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void qmv </td>
<td>(</td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ad6d0aa9b080358581844d6583aa2f4ea" name="ad6d0aa9b080358581844d6583aa2f4ea"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ad6d0aa9b080358581844d6583aa2f4ea">&#9670;&#160;</a></span>qmv_fast()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , int group_size, int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void qmv_fast </td>
<td>(</td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="aba7687e6f8f1d29c0a1b2a3db150bd81" name="aba7687e6f8f1d29c0a1b2a3db150bd81"></a>
<h2 class="memtitle"><span class="permalink"><a href="#aba7687e6f8f1d29c0a1b2a3db150bd81">&#9670;&#160;</a></span>qmv_fast_impl()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , int group_size, int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">METAL_FUNC void qmv_fast_impl </td>
<td>(</td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a8e13c7d895624f738d2a6d9893b687fd" name="a8e13c7d895624f738d2a6d9893b687fd"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a8e13c7d895624f738d2a6d9893b687fd">&#9670;&#160;</a></span>qmv_impl()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , int group_size, int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">METAL_FUNC void qmv_impl </td>
<td>(</td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="ae756f6817b584c60f5dcdd1d9c6b4f58" name="ae756f6817b584c60f5dcdd1d9c6b4f58"></a>
<h2 class="memtitle"><span class="permalink"><a href="#ae756f6817b584c60f5dcdd1d9c6b4f58">&#9670;&#160;</a></span>qouter()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename U , int values_per_thread, int bits&gt; </div>
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">void qouter </td>
<td>(</td>
<td class="paramtype">const thread uint8_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>scale</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">U</td> <td class="paramname"><span class="paramname"><em>bias</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">thread U *</td> <td class="paramname"><span class="paramname"><em>result</em></span>&#160;)</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">inline</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a2cce78e6692cfcbab8f6aa89afa61580" name="a2cce78e6692cfcbab8f6aa89afa61580"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a2cce78e6692cfcbab8f6aa89afa61580">&#9670;&#160;</a></span>qvm()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">void qvm </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<a id="a9d466e3e3af0d75cfc4eddcf46cb13e4" name="a9d466e3e3af0d75cfc4eddcf46cb13e4"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a9d466e3e3af0d75cfc4eddcf46cb13e4">&#9670;&#160;</a></span>qvm_impl()</h2>
<div class="memitem">
<div class="memproto">
<div class="memtemplate">
template&lt;typename T , const int group_size, const int bits&gt; </div>
<table class="memname">
<tr>
<td class="memname">METAL_FUNC void qvm_impl </td>
<td>(</td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>x</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device uint32_t *</td> <td class="paramname"><span class="paramname"><em>w</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>scales</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const device T *</td> <td class="paramname"><span class="paramname"><em>biases</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">device T *</td> <td class="paramname"><span class="paramname"><em>y</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>in_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">const constant int &amp;</td> <td class="paramname"><span class="paramname"><em>out_vec_size</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint3</td> <td class="paramname"><span class="paramname"><em>tid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_gid</em>, </span></td>
</tr>
<tr>
<td class="paramkey"></td>
<td></td>
<td class="paramtype">uint</td> <td class="paramname"><span class="paramname"><em>simd_lid</em></span>&#160;)</td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
<h2 class="groupheader">Variable Documentation</h2>
<a id="a62969a218d93680f5e35d0c61b160b99" name="a62969a218d93680f5e35d0c61b160b99"></a>
<h2 class="memtitle"><span class="permalink"><a href="#a62969a218d93680f5e35d0c61b160b99">&#9670;&#160;</a></span>SIMD_SIZE</h2>
<div class="memitem">
<div class="memproto">
<table class="mlabels">
<tr>
<td class="mlabels-left">
<table class="memname">
<tr>
<td class="memname">constant constexpr const int SIMD_SIZE = 32</td>
</tr>
</table>
</td>
<td class="mlabels-right">
<span class="mlabels"><span class="mlabel">static</span><span class="mlabel">constexpr</span></span> </td>
</tr>
</table>
</div><div class="memdoc">
</div>
</div>
</div><!-- contents -->
<!-- start footer part -->
<hr class="footer"/><address class="footer"><small>
Generated by&#160;<a href="https://www.doxygen.org/index.html"><img class="footer" src="doxygen.svg" width="104" height="31" alt="doxygen"/></a> 1.10.0
</small></address>
</body>
</html>