2024-10-15 23:12:17 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
< meta name = "generator" content = "Doxygen 1.12.0" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > MLX: mlx/backend/metal/kernels/quantized.h File Reference< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< link href = "navtree.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "resize.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
<!-- Generated by Doxygen 1.12.0 -->
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { codefold.init(0); });
/* @license-end */
< / script >
< script type = "text/javascript" src = "menudata.js" > < / script >
< script type = "text/javascript" src = "menu.js" > < / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() {
initMenu('',true,false,'search.php','Search',false);
$(function() { init_search(); });
});
/* @license-end */
< / script >
< div id = "main-nav" > < / div >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function(){ initResizable(false); });
/* @license-end */
< / script >
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div id = "nav-path" class = "navpath" >
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_d0c977ea65824390717cdb7efc36c157.html" > metal< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_70a37effa88bcbd6b791977fa1e64356.html" > kernels< / a > < / li > < / ul >
< / div >
< / div > <!-- top -->
< div id = "doc-content" >
< div class = "header" >
< div class = "summary" >
< a href = "#nested-classes" > Classes< / a > |
< a href = "#define-members" > Macros< / a > |
< a href = "#func-members" > Functions< / a > |
< a href = "#var-members" > Variables< / a > < / div >
< div class = "headertitle" > < div class = "title" > quantized.h File Reference< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< div class = "textblock" > < code > #include < metal_simdgroup> < / code > < br / >
< code > #include < metal_stdlib> < / code > < br / >
< / div >
< p > < a href = "quantized_8h_source.html" > Go to the source code of this file.< / a > < / p >
< table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a id = "nested-classes" name = "nested-classes" > < / a >
Classes< / h2 > < / td > < / tr >
< tr class = "memitem:" > < td class = "memItemLeft" align = "right" valign = "top" > struct   < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "struct_quantized_block_loader.html" > QuantizedBlockLoader< T, BROWS, BCOLS, dst_ld, reduction_dim, tgp_size, group_size, bits > < / a > < / td > < / tr >
< tr class = "separator:" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< / table > < table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a id = "define-members" name = "define-members" > < / a >
Macros< / h2 > < / td > < / tr >
< tr class = "memitem:a0386011c52d03e60885a31e6fbd903dd" id = "r_a0386011c52d03e60885a31e6fbd903dd" > < td class = "memItemLeft" align = "right" valign = "top" > #define  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "#a0386011c52d03e60885a31e6fbd903dd" > MLX_MTL_CONST< / a >       static constant constexpr const< / td > < / tr >
< tr class = "separator:a0386011c52d03e60885a31e6fbd903dd" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< / table > < table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a id = "func-members" name = "func-members" > < / a >
Functions< / h2 > < / td > < / tr >
< tr class = "memitem:a8dbace41de9e1e21dd59d016db11b3e9" id = "r_a8dbace41de9e1e21dd59d016db11b3e9" > < td class = "memTemplParams" colspan = "2" > template< typename T , typename U , int values_per_thread, int bits> < / td > < / tr >
< tr class = "memitem:a8dbace41de9e1e21dd59d016db11b3e9" > < td class = "memTemplItemLeft" align = "right" valign = "top" > U  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a8dbace41de9e1e21dd59d016db11b3e9" > load_vector< / a > (const device T *x, thread U *x_thread)< / td > < / tr >
< tr class = "separator:a8dbace41de9e1e21dd59d016db11b3e9" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:aa69e143d646fad332c1a53e8c9b337b7" id = "r_aa69e143d646fad332c1a53e8c9b337b7" > < td class = "memTemplParams" colspan = "2" > template< typename T , typename U , int values_per_thread, int bits> < / td > < / tr >
< tr class = "memitem:aa69e143d646fad332c1a53e8c9b337b7" > < td class = "memTemplItemLeft" align = "right" valign = "top" > U  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#aa69e143d646fad332c1a53e8c9b337b7" > load_vector_safe< / a > (const device T *x, thread U *x_thread, int N)< / td > < / tr >
< tr class = "separator:aa69e143d646fad332c1a53e8c9b337b7" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:ab364d58ab652e3ad87a8f80910556071" id = "r_ab364d58ab652e3ad87a8f80910556071" > < td class = "memTemplParams" colspan = "2" > template< typename U , int values_per_thread, int bits> < / td > < / tr >
< tr class = "memitem:ab364d58ab652e3ad87a8f80910556071" > < td class = "memTemplItemLeft" align = "right" valign = "top" > U  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#ab364d58ab652e3ad87a8f80910556071" > qdot< / a > (const device uint8_t *w, const thread U *x_thread, U scale, U bias, U sum)< / td > < / tr >
< tr class = "separator:ab364d58ab652e3ad87a8f80910556071" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a07b26d2d0b0d65dfe925c452c453fa42" id = "r_a07b26d2d0b0d65dfe925c452c453fa42" > < td class = "memTemplParams" colspan = "2" > template< typename U , int values_per_thread, int bits> < / td > < / tr >
< tr class = "memitem:a07b26d2d0b0d65dfe925c452c453fa42" > < td class = "memTemplItemLeft" align = "right" valign = "top" > U  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a07b26d2d0b0d65dfe925c452c453fa42" > qdot_safe< / a > (const device uint8_t *w, const thread U *x_thread, U scale, U bias, U sum, int N)< / td > < / tr >
< tr class = "separator:a07b26d2d0b0d65dfe925c452c453fa42" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:ae756f6817b584c60f5dcdd1d9c6b4f58" id = "r_ae756f6817b584c60f5dcdd1d9c6b4f58" > < td class = "memTemplParams" colspan = "2" > template< typename U , int values_per_thread, int bits> < / td > < / tr >
< tr class = "memitem:ae756f6817b584c60f5dcdd1d9c6b4f58" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#ae756f6817b584c60f5dcdd1d9c6b4f58" > qouter< / a > (const thread uint8_t *w, U x, U scale, U bias, thread U *result)< / td > < / tr >
< tr class = "separator:ae756f6817b584c60f5dcdd1d9c6b4f58" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:aecff265b63566d0d5689cfc4e5b037d2" id = "r_aecff265b63566d0d5689cfc4e5b037d2" > < td class = "memTemplParams" colspan = "2" > template< typename U , int N, int bits> < / td > < / tr >
< tr class = "memitem:aecff265b63566d0d5689cfc4e5b037d2" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#aecff265b63566d0d5689cfc4e5b037d2" > dequantize< / a > (const device uint8_t *w, U scale, U bias, threadgroup U *w_local)< / td > < / tr >
< tr class = "separator:aecff265b63566d0d5689cfc4e5b037d2" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-26 04:23:45 +08:00
< tr class = "memitem:ad5cf1cf63656bc1780685d22169cd4ef" id = "r_ad5cf1cf63656bc1780685d22169cd4ef" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits, int D> < / td > < / tr >
< tr class = "memitem:ad5cf1cf63656bc1780685d22169cd4ef" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#ad5cf1cf63656bc1780685d22169cd4ef" > qmv_quad_impl< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, constant int & in_vec_size, const constant int & out_vec_size, uint3 tid, uint quad_gid, uint quad_lid)< / td > < / tr >
< tr class = "separator:ad5cf1cf63656bc1780685d22169cd4ef" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-15 23:12:17 +08:00
< tr class = "memitem:aba7687e6f8f1d29c0a1b2a3db150bd81" id = "r_aba7687e6f8f1d29c0a1b2a3db150bd81" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits> < / td > < / tr >
< tr class = "memitem:aba7687e6f8f1d29c0a1b2a3db150bd81" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#aba7687e6f8f1d29c0a1b2a3db150bd81" > qmv_fast_impl< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:aba7687e6f8f1d29c0a1b2a3db150bd81" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a8e13c7d895624f738d2a6d9893b687fd" id = "r_a8e13c7d895624f738d2a6d9893b687fd" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits> < / td > < / tr >
< tr class = "memitem:a8e13c7d895624f738d2a6d9893b687fd" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a8e13c7d895624f738d2a6d9893b687fd" > qmv_impl< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a8e13c7d895624f738d2a6d9893b687fd" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-26 04:23:45 +08:00
< tr class = "memitem:a4a8c8db7d5d480733726fd6d1a645e12" id = "r_a4a8c8db7d5d480733726fd6d1a645e12" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits> < / td > < / tr >
< tr class = "memitem:a4a8c8db7d5d480733726fd6d1a645e12" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a4a8c8db7d5d480733726fd6d1a645e12" > qvm_impl< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a4a8c8db7d5d480733726fd6d1a645e12" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:af5750a35e8f5462218effba719f7f5b8" id = "r_af5750a35e8f5462218effba719f7f5b8" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32> < / td > < / tr >
< tr class = "memitem:af5750a35e8f5462218effba719f7f5b8" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#af5750a35e8f5462218effba719f7f5b8" > qmm_t_impl< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, threadgroup T *Xs, threadgroup T *Ws, const constant int & K, const constant int & N, const constant int & M, uint3 tid, uint lid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:af5750a35e8f5462218effba719f7f5b8" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a0ba59096494f1001c195312571523ae9" id = "r_a0ba59096494f1001c195312571523ae9" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32> < / td > < / tr >
< tr class = "memitem:a0ba59096494f1001c195312571523ae9" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a0ba59096494f1001c195312571523ae9" > qmm_n_impl< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, threadgroup T *Xs, threadgroup T *Ws, const constant int & K, const constant int & N, const constant int & M, uint3 tid, uint lid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a0ba59096494f1001c195312571523ae9" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:accab1f9e17a65242347c051f98e4c0be" id = "r_accab1f9e17a65242347c051f98e4c0be" > < td class = "memTemplParams" colspan = "2" > template< typename T > < / td > < / tr >
< tr class = "memitem:accab1f9e17a65242347c051f98e4c0be" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#accab1f9e17a65242347c051f98e4c0be" > adjust_matrix_offsets< / a > (const device T *& x, const device uint32_t *& w, const device T *& scales, const device T *& biases, device T *& y, int output_stride, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid)< / td > < / tr >
< tr class = "separator:accab1f9e17a65242347c051f98e4c0be" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-15 23:12:17 +08:00
< tr class = "memitem:a3ab400746ad77be89c30d25638e01698" id = "r_a3ab400746ad77be89c30d25638e01698" > < td class = "memTemplParams" colspan = "2" > template< typename T > < / td > < / tr >
< tr class = "memitem:a3ab400746ad77be89c30d25638e01698" > < td class = "memTemplItemLeft" align = "right" valign = "top" > METAL_FUNC void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a3ab400746ad77be89c30d25638e01698" > adjust_matrix_offsets< / a > (const device T *& x, const device uint32_t *& w, const device T *& scales, const device T *& biases, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, device T *& y, int output_stride, const constant int & batch_ndims, const constant int *batch_shape, const constant size_t *lhs_strides, const constant size_t *rhs_strides, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid)< / td > < / tr >
< tr class = "separator:a3ab400746ad77be89c30d25638e01698" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-26 04:23:45 +08:00
< tr class = "memitem:a7ce5f53a4d6d1555e9402d545408d0ad" id = "r_a7ce5f53a4d6d1555e9402d545408d0ad" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits, int D, bool batched> < / td > < / tr >
< tr class = "memitem:a7ce5f53a4d6d1555e9402d545408d0ad" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a7ce5f53a4d6d1555e9402d545408d0ad" > qmv_quad< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint quad_gid, uint quad_lid)< / td > < / tr >
< tr class = "separator:a7ce5f53a4d6d1555e9402d545408d0ad" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a7bd1d9f17c86c8fd34ec13678cff755f" id = "r_a7bd1d9f17c86c8fd34ec13678cff755f" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits, bool batched> < / td > < / tr >
< tr class = "memitem:a7bd1d9f17c86c8fd34ec13678cff755f" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a7bd1d9f17c86c8fd34ec13678cff755f" > qmv_fast< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a7bd1d9f17c86c8fd34ec13678cff755f" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a639c50a08b5cf57e8be5279a116274bd" id = "r_a639c50a08b5cf57e8be5279a116274bd" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, bool batched> < / td > < / tr >
< tr class = "memitem:a639c50a08b5cf57e8be5279a116274bd" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a639c50a08b5cf57e8be5279a116274bd" > qmv< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a639c50a08b5cf57e8be5279a116274bd" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" id = "r_ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, bool batched> < / td > < / tr >
< tr class = "memitem:ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" > qvm< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:abe2e3ef0ee4ec2cb61dc5330ad463d10" id = "r_abe2e3ef0ee4ec2cb61dc5330ad463d10" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, const bool aligned_N, const bool batched, const int BM = 32, const int BK = 32, const int BN = 32> < / td > < / tr >
< tr class = "memitem:abe2e3ef0ee4ec2cb61dc5330ad463d10" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#abe2e3ef0ee4ec2cb61dc5330ad463d10" > qmm_t< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & K, const constant int & N, const constant int & M, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint lid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:abe2e3ef0ee4ec2cb61dc5330ad463d10" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a2ce135e392dbf9a3e5180fb083792ed7" id = "r_a2ce135e392dbf9a3e5180fb083792ed7" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, const bool batched, const int BM = 32, const int BK = 32, const int BN = 32> < / td > < / tr >
< tr class = "memitem:a2ce135e392dbf9a3e5180fb083792ed7" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a2ce135e392dbf9a3e5180fb083792ed7" > qmm_n< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & K, const constant int & N, const constant int & M, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, uint3 tid, uint lid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a2ce135e392dbf9a3e5180fb083792ed7" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a530b720e123e59d73ea89a0a2d0946b7" id = "r_a530b720e123e59d73ea89a0a2d0946b7" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits> < / td > < / tr >
< tr class = "memitem:a530b720e123e59d73ea89a0a2d0946b7" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a530b720e123e59d73ea89a0a2d0946b7" > bs_qmv_fast< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, const constant int & batch_ndims, const constant int *batch_shape, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, const constant size_t *lhs_strides, const constant size_t *rhs_strides, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a530b720e123e59d73ea89a0a2d0946b7" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:acf4c7fc77821a83b31aedfb48443d3ed" id = "r_acf4c7fc77821a83b31aedfb48443d3ed" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits> < / td > < / tr >
< tr class = "memitem:acf4c7fc77821a83b31aedfb48443d3ed" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#acf4c7fc77821a83b31aedfb48443d3ed" > bs_qmv< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, const constant int & batch_ndims, const constant int *batch_shape, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, const constant size_t *lhs_strides, const constant size_t *rhs_strides, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:acf4c7fc77821a83b31aedfb48443d3ed" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a6d6e3c31e44f232e58ae9d605e1f4494" id = "r_a6d6e3c31e44f232e58ae9d605e1f4494" > < td class = "memTemplParams" colspan = "2" > template< typename T , int group_size, int bits> < / td > < / tr >
< tr class = "memitem:a6d6e3c31e44f232e58ae9d605e1f4494" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a6d6e3c31e44f232e58ae9d605e1f4494" > bs_qvm< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & in_vec_size, const constant int & out_vec_size, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, const constant int & batch_ndims, const constant int *batch_shape, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, const constant size_t *lhs_strides, const constant size_t *rhs_strides, uint3 tid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a6d6e3c31e44f232e58ae9d605e1f4494" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:ab1ae143eba2afceb8df63f38b26f9a84" id = "r_ab1ae143eba2afceb8df63f38b26f9a84" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32> < / td > < / tr >
< tr class = "memitem:ab1ae143eba2afceb8df63f38b26f9a84" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#ab1ae143eba2afceb8df63f38b26f9a84" > bs_qmm_t< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & K, const constant int & N, const constant int & M, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, const constant int & batch_ndims, const constant int *batch_shape, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, const constant size_t *lhs_strides, const constant size_t *rhs_strides, uint3 tid, uint lid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:ab1ae143eba2afceb8df63f38b26f9a84" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a1a66b061c46383952a0f067c3848971f" id = "r_a1a66b061c46383952a0f067c3848971f" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32> < / td > < / tr >
< tr class = "memitem:a1a66b061c46383952a0f067c3848971f" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a1a66b061c46383952a0f067c3848971f" > bs_qmm_n< / a > (const device uint32_t *w, const device T *scales, const device T *biases, const device T *x, device T *y, const constant int & K, const constant int & N, const constant int & M, const constant int & x_batch_ndims, const constant int *x_shape, const constant size_t *x_strides, const constant int & w_batch_ndims, const constant int *w_shape, const constant size_t *w_strides, const constant size_t *s_strides, const constant size_t *b_strides, const constant int & batch_ndims, const constant int *batch_shape, const device uint32_t *lhs_indices, const device uint32_t *rhs_indices, const constant size_t *lhs_strides, const constant size_t *rhs_strides, uint3 tid, uint lid, uint simd_gid, uint simd_lid)< / td > < / tr >
< tr class = "separator:a1a66b061c46383952a0f067c3848971f" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-15 23:12:17 +08:00
< tr class = "memitem:a47bcf4a14566e01e14bd3c155811db59" id = "r_a47bcf4a14566e01e14bd3c155811db59" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits> < / td > < / tr >
< tr class = "memitem:a47bcf4a14566e01e14bd3c155811db59" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a47bcf4a14566e01e14bd3c155811db59" > affine_quantize< / a > (const device T *w, device uint8_t *out, device T *scales, device T *biases, uint2 index, uint2 grid_dim)< / td > < / tr >
< tr class = "separator:a47bcf4a14566e01e14bd3c155811db59" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a7561acefd7b55e7e2b25393be08bb99c" id = "r_a7561acefd7b55e7e2b25393be08bb99c" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits> < / td > < / tr >
< tr class = "memitem:a7561acefd7b55e7e2b25393be08bb99c" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a7561acefd7b55e7e2b25393be08bb99c" > affine_quantize_scales_biases< / a > (const device T *w, const device T *scales, const device T *biases, device uint8_t *out, uint2 index, uint2 grid_dim)< / td > < / tr >
< tr class = "separator:a7561acefd7b55e7e2b25393be08bb99c" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a6076203615038eb06816158f7b3869c6" id = "r_a6076203615038eb06816158f7b3869c6" > < td class = "memTemplParams" colspan = "2" > template< typename T , const int group_size, const int bits> < / td > < / tr >
< tr class = "memitem:a6076203615038eb06816158f7b3869c6" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a6076203615038eb06816158f7b3869c6" > affine_dequantize< / a > (const device uint8_t *w, const device T *scales, const device T *biases, device T *out, uint2 index, uint2 grid_dim)< / td > < / tr >
< tr class = "separator:a6076203615038eb06816158f7b3869c6" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< / table > < table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a id = "var-members" name = "var-members" > < / a >
Variables< / h2 > < / td > < / tr >
< tr class = "memitem:a62969a218d93680f5e35d0c61b160b99" id = "r_a62969a218d93680f5e35d0c61b160b99" > < td class = "memItemLeft" align = "right" valign = "top" > static constant constexpr const int  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "#a62969a218d93680f5e35d0c61b160b99" > SIMD_SIZE< / a > = 32< / td > < / tr >
< tr class = "separator:a62969a218d93680f5e35d0c61b160b99" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-26 04:23:45 +08:00
< tr class = "memitem:a803e4d5a1459844ba647aea5b004e133" id = "r_a803e4d5a1459844ba647aea5b004e133" > < td class = "memItemLeft" align = "right" valign = "top" > static constant constexpr const int  < / td > < td class = "memItemRight" valign = "bottom" > < a class = "el" href = "#a803e4d5a1459844ba647aea5b004e133" > QUAD_SIZE< / a > = 4< / td > < / tr >
< tr class = "separator:a803e4d5a1459844ba647aea5b004e133" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-15 23:12:17 +08:00
< / table >
< h2 class = "groupheader" > Macro Definition Documentation< / h2 >
< a id = "a0386011c52d03e60885a31e6fbd903dd" name = "a0386011c52d03e60885a31e6fbd903dd" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a0386011c52d03e60885a31e6fbd903dd" > ◆   < / a > < / span > MLX_MTL_CONST< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "memname" >
< tr >
< td class = "memname" > #define MLX_MTL_CONST      static constant constexpr const< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< h2 class = "groupheader" > Function Documentation< / h2 >
< a id = "a3ab400746ad77be89c30d25638e01698" name = "a3ab400746ad77be89c30d25638e01698" > < / a >
2024-10-26 04:23:45 +08:00
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a3ab400746ad77be89c30d25638e01698" > ◆   < / a > < / span > adjust_matrix_offsets() < span class = "overload" > [1/2]< / span > < / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T > < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void adjust_matrix_offsets < / td >
< td > (< / td >
< td class = "paramtype" > const device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device uint32_t *& < / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_indices< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_indices< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > int< / td > < td class = "paramname" > < span class = "paramname" > < em > output_stride< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > batch_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
2024-10-26 04:23:45 +08:00
< / div >
< / div >
< a id = "accab1f9e17a65242347c051f98e4c0be" name = "accab1f9e17a65242347c051f98e4c0be" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#accab1f9e17a65242347c051f98e4c0be" > ◆   < / a > < / span > adjust_matrix_offsets() < span class = "overload" > [2/2]< / span > < / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T > < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void adjust_matrix_offsets < / td >
< td > (< / td >
< td class = "paramtype" > const device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device uint32_t *& < / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *& < / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > int< / td > < td class = "paramname" > < span class = "paramname" > < em > output_stride< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
2024-10-15 23:12:17 +08:00
< / div >
< / div >
< a id = "a6076203615038eb06816158f7b3869c6" name = "a6076203615038eb06816158f7b3869c6" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a6076203615038eb06816158f7b3869c6" > ◆   < / a > < / span > affine_dequantize()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void affine_dequantize < / td >
< td > (< / td >
< td class = "paramtype" > const device uint8_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint2< / td > < td class = "paramname" > < span class = "paramname" > < em > index< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint2< / td > < td class = "paramname" > < span class = "paramname" > < em > grid_dim< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "a47bcf4a14566e01e14bd3c155811db59" name = "a47bcf4a14566e01e14bd3c155811db59" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a47bcf4a14566e01e14bd3c155811db59" > ◆   < / a > < / span > affine_quantize()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void affine_quantize < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device uint8_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint2< / td > < td class = "paramname" > < span class = "paramname" > < em > index< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint2< / td > < td class = "paramname" > < span class = "paramname" > < em > grid_dim< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "a7561acefd7b55e7e2b25393be08bb99c" name = "a7561acefd7b55e7e2b25393be08bb99c" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a7561acefd7b55e7e2b25393be08bb99c" > ◆   < / a > < / span > affine_quantize_scales_biases()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void affine_quantize_scales_biases < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device uint8_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint2< / td > < td class = "paramname" > < span class = "paramname" > < em > index< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint2< / td > < td class = "paramname" > < span class = "paramname" > < em > grid_dim< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a1a66b061c46383952a0f067c3848971f" name = "a1a66b061c46383952a0f067c3848971f" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a1a66b061c46383952a0f067c3848971f" > ◆   < / a > < / span > bs_qmm_n()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void bs_qmm_n < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > K< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > M< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > batch_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "ab1ae143eba2afceb8df63f38b26f9a84" name = "ab1ae143eba2afceb8df63f38b26f9a84" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ab1ae143eba2afceb8df63f38b26f9a84" > ◆   < / a > < / span > bs_qmm_t()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void bs_qmm_t < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > K< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > M< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > batch_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "acf4c7fc77821a83b31aedfb48443d3ed" name = "acf4c7fc77821a83b31aedfb48443d3ed" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#acf4c7fc77821a83b31aedfb48443d3ed" > ◆   < / a > < / span > bs_qmv()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , int group_size, int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void bs_qmv < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > batch_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a530b720e123e59d73ea89a0a2d0946b7" name = "a530b720e123e59d73ea89a0a2d0946b7" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a530b720e123e59d73ea89a0a2d0946b7" > ◆   < / a > < / span > bs_qmv_fast()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , int group_size, int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void bs_qmv_fast < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > batch_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a6d6e3c31e44f232e58ae9d605e1f4494" name = "a6d6e3c31e44f232e58ae9d605e1f4494" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a6d6e3c31e44f232e58ae9d605e1f4494" > ◆   < / a > < / span > bs_qvm()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , int group_size, int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void bs_qvm < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > batch_ndims< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > batch_shape< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_indices< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_indices< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > lhs_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > rhs_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "aecff265b63566d0d5689cfc4e5b037d2" name = "aecff265b63566d0d5689cfc4e5b037d2" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#aecff265b63566d0d5689cfc4e5b037d2" > ◆   < / a > < / span > dequantize()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename U , int N, int bits> < / div >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > void dequantize < / td >
< td > (< / td >
< td class = "paramtype" > const device uint8_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > scale< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > bias< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > threadgroup U *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_local< / em > < / span >   )< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > inline< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "a8dbace41de9e1e21dd59d016db11b3e9" name = "a8dbace41de9e1e21dd59d016db11b3e9" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a8dbace41de9e1e21dd59d016db11b3e9" > ◆   < / a > < / span > load_vector()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , typename U , int values_per_thread, int bits> < / div >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > U load_vector < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > thread U *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_thread< / em > < / span >   )< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > inline< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "aa69e143d646fad332c1a53e8c9b337b7" name = "aa69e143d646fad332c1a53e8c9b337b7" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#aa69e143d646fad332c1a53e8c9b337b7" > ◆   < / a > < / span > load_vector_safe()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , typename U , int values_per_thread, int bits> < / div >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > U load_vector_safe < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > thread U *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_thread< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > int< / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span >   )< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > inline< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "ab364d58ab652e3ad87a8f80910556071" name = "ab364d58ab652e3ad87a8f80910556071" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ab364d58ab652e3ad87a8f80910556071" > ◆   < / a > < / span > qdot()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename U , int values_per_thread, int bits> < / div >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > U qdot < / td >
< td > (< / td >
< td class = "paramtype" > const device uint8_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const thread U *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_thread< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > scale< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > bias< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > sum< / em > < / span >   )< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > inline< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "a07b26d2d0b0d65dfe925c452c453fa42" name = "a07b26d2d0b0d65dfe925c452c453fa42" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a07b26d2d0b0d65dfe925c452c453fa42" > ◆   < / a > < / span > qdot_safe()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename U , int values_per_thread, int bits> < / div >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > U qdot_safe < / td >
< td > (< / td >
< td class = "paramtype" > const device uint8_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const thread U *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_thread< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > scale< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > bias< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > sum< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > int< / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span >   )< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > inline< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a2ce135e392dbf9a3e5180fb083792ed7" name = "a2ce135e392dbf9a3e5180fb083792ed7" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a2ce135e392dbf9a3e5180fb083792ed7" > ◆   < / a > < / span > qmm_n()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2024-10-26 04:23:45 +08:00
template< typename T , const int group_size, const int bits, const bool batched, const int BM = 32, const int BK = 32, const int BN = 32> < / div >
2024-10-15 23:12:17 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void qmm_n < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > K< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > M< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a0ba59096494f1001c195312571523ae9" name = "a0ba59096494f1001c195312571523ae9" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a0ba59096494f1001c195312571523ae9" > ◆   < / a > < / span > qmm_n_impl()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits, const int BM = 32, const int BK = 32, const int BN = 32> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void qmm_n_impl < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > threadgroup T *< / td > < td class = "paramname" > < span class = "paramname" > < em > Xs< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > threadgroup T *< / td > < td class = "paramname" > < span class = "paramname" > < em > Ws< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > K< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > M< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "abe2e3ef0ee4ec2cb61dc5330ad463d10" name = "abe2e3ef0ee4ec2cb61dc5330ad463d10" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#abe2e3ef0ee4ec2cb61dc5330ad463d10" > ◆   < / a > < / span > qmm_t()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2024-10-26 04:23:45 +08:00
template< typename T , const int group_size, const int bits, const bool aligned_N, const bool batched, const int BM = 32, const int BK = 32, const int BN = 32> < / div >
2024-10-15 23:12:17 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void qmm_t < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > K< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > M< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "af5750a35e8f5462218effba719f7f5b8" name = "af5750a35e8f5462218effba719f7f5b8" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#af5750a35e8f5462218effba719f7f5b8" > ◆   < / a > < / span > qmm_t_impl()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits, const bool aligned_N, const int BM = 32, const int BK = 32, const int BN = 32> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void qmm_t_impl < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > threadgroup T *< / td > < td class = "paramname" > < span class = "paramname" > < em > Xs< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > threadgroup T *< / td > < td class = "paramname" > < span class = "paramname" > < em > Ws< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > K< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > N< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > M< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a639c50a08b5cf57e8be5279a116274bd" name = "a639c50a08b5cf57e8be5279a116274bd" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a639c50a08b5cf57e8be5279a116274bd" > ◆   < / a > < / span > qmv()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2024-10-26 04:23:45 +08:00
template< typename T , const int group_size, const int bits, bool batched> < / div >
2024-10-15 23:12:17 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void qmv < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
2024-10-26 04:23:45 +08:00
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
< / tr >
2024-10-15 23:12:17 +08:00
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a7bd1d9f17c86c8fd34ec13678cff755f" name = "a7bd1d9f17c86c8fd34ec13678cff755f" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a7bd1d9f17c86c8fd34ec13678cff755f" > ◆   < / a > < / span > qmv_fast()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2024-10-26 04:23:45 +08:00
template< typename T , int group_size, int bits, bool batched> < / div >
2024-10-15 23:12:17 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void qmv_fast < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
2024-10-26 04:23:45 +08:00
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
< / tr >
2024-10-15 23:12:17 +08:00
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "aba7687e6f8f1d29c0a1b2a3db150bd81" name = "aba7687e6f8f1d29c0a1b2a3db150bd81" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#aba7687e6f8f1d29c0a1b2a3db150bd81" > ◆   < / a > < / span > qmv_fast_impl()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , int group_size, int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void qmv_fast_impl < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "a8e13c7d895624f738d2a6d9893b687fd" name = "a8e13c7d895624f738d2a6d9893b687fd" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a8e13c7d895624f738d2a6d9893b687fd" > ◆   < / a > < / span > qmv_impl()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , int group_size, int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void qmv_impl < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
2024-10-26 04:23:45 +08:00
< / div >
< / div >
< a id = "a7ce5f53a4d6d1555e9402d545408d0ad" name = "a7ce5f53a4d6d1555e9402d545408d0ad" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a7ce5f53a4d6d1555e9402d545408d0ad" > ◆   < / a > < / span > qmv_quad()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , int group_size, int bits, int D, bool batched> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > void qmv_quad < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > quad_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > quad_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< a id = "ad5cf1cf63656bc1780685d22169cd4ef" name = "ad5cf1cf63656bc1780685d22169cd4ef" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ad5cf1cf63656bc1780685d22169cd4ef" > ◆   < / a > < / span > qmv_quad_impl()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , int group_size, int bits, int D> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void qmv_quad_impl < / td >
< td > (< / td >
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > quad_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > quad_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
2024-10-15 23:12:17 +08:00
< / div >
< / div >
< a id = "ae756f6817b584c60f5dcdd1d9c6b4f58" name = "ae756f6817b584c60f5dcdd1d9c6b4f58" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ae756f6817b584c60f5dcdd1d9c6b4f58" > ◆   < / a > < / span > qouter()< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename U , int values_per_thread, int bits> < / div >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > void qouter < / td >
< td > (< / td >
< td class = "paramtype" > const thread uint8_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > scale< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > U< / td > < td class = "paramname" > < span class = "paramname" > < em > bias< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > thread U *< / td > < td class = "paramname" > < span class = "paramname" > < em > result< / em > < / span >   )< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > inline< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" name = "ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#ad84f7d5ab9e32dbbe3ca759ae5d5d5c5" > ◆   < / a > < / span > qvm()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2024-10-26 04:23:45 +08:00
template< typename T , const int group_size, const int bits, bool batched> < / div >
2024-10-15 23:12:17 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void qvm < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
2024-10-26 04:23:45 +08:00
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > x_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > x_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > w_batch_ndims< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > s_strides< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > b_strides< / em > < / span > , < / td >
< / tr >
2024-10-15 23:12:17 +08:00
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-26 04:23:45 +08:00
< a id = "a4a8c8db7d5d480733726fd6d1a645e12" name = "a4a8c8db7d5d480733726fd6d1a645e12" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a4a8c8db7d5d480733726fd6d1a645e12" > ◆   < / a > < / span > qvm_impl()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
template< typename T , const int group_size, const int bits> < / div >
< table class = "memname" >
< tr >
< td class = "memname" > METAL_FUNC void qvm_impl < / td >
< td > (< / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device uint32_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > w< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > scales< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > biases< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-10-26 04:23:45 +08:00
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > x< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > y< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > in_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_vec_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lid< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< h2 class = "groupheader" > Variable Documentation< / h2 >
2024-10-26 04:23:45 +08:00
< a id = "a803e4d5a1459844ba647aea5b004e133" name = "a803e4d5a1459844ba647aea5b004e133" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a803e4d5a1459844ba647aea5b004e133" > ◆   < / a > < / span > QUAD_SIZE< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > constant constexpr const int QUAD_SIZE = 4< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > static< / span > < span class = "mlabel" > constexpr< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2024-10-15 23:12:17 +08:00
< a id = "a62969a218d93680f5e35d0c61b160b99" name = "a62969a218d93680f5e35d0c61b160b99" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a62969a218d93680f5e35d0c61b160b99" > ◆   < / a > < / span > SIMD_SIZE< / h2 >
< div class = "memitem" >
< div class = "memproto" >
< table class = "mlabels" >
< tr >
< td class = "mlabels-left" >
< table class = "memname" >
< tr >
< td class = "memname" > constant constexpr const int SIMD_SIZE = 32< / td >
< / tr >
< / table >
< / td >
< td class = "mlabels-right" >
< span class = "mlabels" > < span class = "mlabel" > static< / span > < span class = "mlabel" > constexpr< / span > < / span > < / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< / div > <!-- contents -->
<!-- start footer part -->
< hr class = "footer" / > < address class = "footer" > < small >
Generated by  < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.12.0
< / small > < / address >
< / div > <!-- doc - content -->
< / body >
< / html >