2024-06-07 11:28:06 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
< meta name = "generator" content = "Doxygen 1.10.0" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > MLX: mlx/backend/metal/kernels/reduction/reduce_col.h File Reference< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
<!-- Generated by Doxygen 1.10.0 -->
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" src = "menudata.js" > < / script >
< script type = "text/javascript" src = "menu.js" > < / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() {
initMenu('',true,false,'search.php','Search');
$(function() { init_search(); });
});
/* @license-end */
< / script >
< div id = "main-nav" > < / div >
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div id = "nav-path" class = "navpath" >
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_d0c977ea65824390717cdb7efc36c157.html" > metal< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_70a37effa88bcbd6b791977fa1e64356.html" > kernels< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_f60cd69d27fd3faa641c79056fff0e2d.html" > reduction< / a > < / li > < / ul >
< / div >
< / div > <!-- top -->
< div class = "header" >
< div class = "summary" >
< a href = "#func-members" > Functions< / a > < / div >
< div class = "headertitle" > < div class = "title" > reduce_col.h File Reference< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< p > < a href = "reduce__col_8h_source.html" > Go to the source code of this file.< / a > < / p >
< table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a id = "func-members" name = "func-members" > < / a >
Functions< / h2 > < / td > < / tr >
2024-08-24 03:14:53 +08:00
< tr class = "memitem:adf7aeb18cd1d5042cf6d9b46b582d8ce" id = "r_adf7aeb18cd1d5042cf6d9b46b582d8ce" > < td class = "memTemplParams" colspan = "2" > template< typename T , typename U , typename Op , int NDIMS = 0, int N_READS = REDUCE_N_READS> < / td > < / tr >
< tr class = "memitem:adf7aeb18cd1d5042cf6d9b46b582d8ce" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#adf7aeb18cd1d5042cf6d9b46b582d8ce" > col_reduce_small< / a > (const device T *in, device U *out, const constant size_t & reduction_size, const constant size_t & reduction_stride, const constant int *shape, const constant size_t *strides, const constant int & ndim, const constant int *reduce_shape, const constant size_t *reduce_strides, const constant int & reduce_ndim, const constant size_t & non_col_reductions, uint3 gid, uint3 gsize, uint simd_lane_id, uint simd_group_id, uint3 tid, uint3 tsize)< / td > < / tr >
< tr class = "separator:adf7aeb18cd1d5042cf6d9b46b582d8ce" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a11bfc6112ae2386ac03f5ea7b7d93385" id = "r_a11bfc6112ae2386ac03f5ea7b7d93385" > < td class = "memTemplParams" colspan = "2" > template< typename T , typename U , typename Op , int NDIMS = 0, int BM = 8, int BN = 128> < / td > < / tr >
< tr class = "memitem:a11bfc6112ae2386ac03f5ea7b7d93385" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a11bfc6112ae2386ac03f5ea7b7d93385" > col_reduce_looped< / a > (const device T *in, device U *out, const constant size_t & reduction_size, const constant size_t & reduction_stride, const constant int *shape, const constant size_t *strides, const constant int & ndim, const constant int *reduce_shape, const constant size_t *reduce_strides, const constant int & reduce_ndim, const constant size_t & non_col_reductions, uint3 gid, uint3 gsize, uint simd_lane_id, uint simd_group_id)< / td > < / tr >
< tr class = "memdesc:a11bfc6112ae2386ac03f5ea7b7d93385" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Our approach is the following simple looped approach: < br / > < / td > < / tr >
< tr class = "separator:a11bfc6112ae2386ac03f5ea7b7d93385" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-06-07 11:28:06 +08:00
< / table >
< h2 class = "groupheader" > Function Documentation< / h2 >
2024-08-24 03:14:53 +08:00
< a id = "a11bfc6112ae2386ac03f5ea7b7d93385" name = "a11bfc6112ae2386ac03f5ea7b7d93385" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a11bfc6112ae2386ac03f5ea7b7d93385" > ◆   < / a > < / span > col_reduce_looped()< / h2 >
2024-06-07 11:28:06 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2024-08-24 03:14:53 +08:00
template< typename T , typename U , typename Op , int NDIMS = 0, int BM = 8, int BN = 128> < / div >
2024-06-07 11:28:06 +08:00
< table class = "memname" >
< tr >
2024-08-24 03:14:53 +08:00
< td class = "memname" > void col_reduce_looped < / td >
2024-06-07 11:28:06 +08:00
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > in< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > device U *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_size< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_stride< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > shape< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > strides< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > ndim< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_shape< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_strides< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_ndim< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > non_col_reductions< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gid< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gsize< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lane_id< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_group_id< / em > < / span >   )< / td >
2024-06-07 11:28:06 +08:00
< / tr >
< / table >
< / div > < div class = "memdoc" >
2024-08-24 03:14:53 +08:00
< p > Our approach is the following simple looped approach: < / p >
< ol type = "1" >
< li > Each thread keeps running totals for BN / n_simdgroups outputs.< / li >
< li > Load a tile BM, BN in registers and accumulate in the running totals< / li >
< li > Move ahead by BM steps until the column axis and the non column reductions are exhausted.< / li >
< li > If BM == 32 then transpose in SM and simd reduce the running totals. Otherwise write in shared memory and BN threads accumulate the running totals with a loop.< / li >
< li > Write them to the output < / li >
< / ol >
2024-06-07 11:28:06 +08:00
< / div >
< / div >
2024-08-24 03:14:53 +08:00
< a id = "adf7aeb18cd1d5042cf6d9b46b582d8ce" name = "adf7aeb18cd1d5042cf6d9b46b582d8ce" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#adf7aeb18cd1d5042cf6d9b46b582d8ce" > ◆   < / a > < / span > col_reduce_small()< / h2 >
2024-06-07 11:28:06 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2024-08-24 03:14:53 +08:00
template< typename T , typename U , typename Op , int NDIMS = 0, int N_READS = REDUCE_N_READS> < / div >
2024-06-07 11:28:06 +08:00
< table class = "memname" >
< tr >
2024-08-24 03:14:53 +08:00
< td class = "memname" > void col_reduce_small < / td >
2024-06-07 11:28:06 +08:00
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > in< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device U *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_size< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_stride< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > shape< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > strides< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > ndim< / em > , < / span > < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_shape< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant size_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_strides< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_ndim< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > non_col_reductions< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gid< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gsize< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lane_id< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_group_id< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tid< / em > , < / span > < / td >
2024-06-07 11:28:06 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-08-24 03:14:53 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > tsize< / em > < / span >   )< / td >
2024-06-07 11:28:06 +08:00
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< / div > <!-- contents -->
<!-- start footer part -->
< hr class = "footer" / > < address class = "footer" > < small >
Generated by  < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.10.0
< / small > < / address >
< / body >
< / html >