2024-10-15 23:12:17 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
2025-01-10 05:56:20 +08:00
< meta name = "generator" content = "Doxygen 1.13.1" / >
2024-10-15 23:12:17 +08:00
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > MLX: mlx/backend/metal/kernels/reduction/reduce_col.h File Reference< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< link href = "navtree.css" rel = "stylesheet" type = "text/css" / >
2025-01-10 05:56:20 +08:00
< script type = "text/javascript" src = "navtreedata.js" > < / script >
< script type = "text/javascript" src = "navtree.js" > < / script >
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" src = "resize.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
2025-01-10 05:56:20 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { init_search(); });
/* @license-end */
< / script >
2024-10-15 23:12:17 +08:00
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
2025-01-10 05:56:20 +08:00
< td > < div id = "MSearchBox" class = "MSearchBoxInactive" >
< span class = "left" >
< span id = "MSearchSelect" onmouseover = "return searchBox.OnSearchSelectShow()" onmouseout = "return searchBox.OnSearchSelectHide()" >   < / span >
< input type = "text" id = "MSearchField" value = "" placeholder = "Search" accesskey = "S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
< / span > < span class = "right" >
< a id = "MSearchClose" href = "javascript:searchBox.CloseResultsWindow()" > < img id = "MSearchCloseImg" border = "0" src = "search/close.svg" alt = "" / > < / a >
< / span >
< / div >
< / td >
2024-10-15 23:12:17 +08:00
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
2025-01-10 05:56:20 +08:00
<!-- Generated by Doxygen 1.13.1 -->
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { codefold.init(0); });
/* @license-end */
< / script >
2025-01-10 05:56:20 +08:00
< / div > <!-- top -->
< div id = "side-nav" class = "ui-resizable side-nav-resizable" >
< div id = "nav-tree" >
< div id = "nav-tree-contents" >
< div id = "nav-sync" class = "sync" > < / div >
< / div >
< / div >
< div id = "splitbar" style = "-moz-user-select:none;"
class="ui-resizable-handle">
< / div >
< / div >
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
2025-01-10 05:56:20 +08:00
$(function(){initNavTree('reduce__col_8h.html',''); initResizable(true); });
2024-10-15 23:12:17 +08:00
/* @license-end */
< / script >
2025-01-10 05:56:20 +08:00
< div id = "doc-content" >
2024-10-15 23:12:17 +08:00
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div class = "header" >
< div class = "summary" >
< a href = "#func-members" > Functions< / a > < / div >
< div class = "headertitle" > < div class = "title" > reduce_col.h File Reference< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< p > < a href = "reduce__col_8h_source.html" > Go to the source code of this file.< / a > < / p >
< table class = "memberdecls" >
< tr class = "heading" > < td colspan = "2" > < h2 class = "groupheader" > < a id = "func-members" name = "func-members" > < / a >
Functions< / h2 > < / td > < / tr >
2025-01-10 05:56:20 +08:00
< tr class = "memitem:a674f4b6075bab1b89778e10ab24c557e" id = "r_a674f4b6075bab1b89778e10ab24c557e" > < td class = "memTemplParams" colspan = "2" > template< typename T, typename U, typename Op, typename IdxT, int NDIMS> < / td > < / tr >
< tr class = "memitem:a674f4b6075bab1b89778e10ab24c557e" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a674f4b6075bab1b89778e10ab24c557e" > col_reduce_small< / a > (const device T *in, device U *out, const constant size_t & reduction_size, const constant int64_t & reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int & ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int & reduce_ndim, const constant size_t & non_col_reductions, uint3 gid, uint3 gsize, uint3 lid, uint3 lsize)< / td > < / tr >
< tr class = "separator:a674f4b6075bab1b89778e10ab24c557e" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a00bb1ed154aef8c1d2cb15d0c4231a51" id = "r_a00bb1ed154aef8c1d2cb15d0c4231a51" > < td class = "memTemplParams" colspan = "2" > template< typename T, typename U, typename Op, typename IdxT, int NDIMS> < / td > < / tr >
< tr class = "memitem:a00bb1ed154aef8c1d2cb15d0c4231a51" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a00bb1ed154aef8c1d2cb15d0c4231a51" > col_reduce_longcolumn< / a > (const device T *in, device U *out, const constant size_t & reduction_size, const constant size_t & reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int & ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int & reduce_ndim, const constant size_t & non_col_reductions, const constant size_t & out_size, uint3 gid, uint3 gsize, uint3 lid, uint3 lsize)< / td > < / tr >
< tr class = "separator:a00bb1ed154aef8c1d2cb15d0c4231a51" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a59be8d8c4bca2217f62ea868550393c0" id = "r_a59be8d8c4bca2217f62ea868550393c0" > < td class = "memTemplParams" colspan = "2" > template< typename T, typename U, typename Op, typename IdxT, int NDIMS, int BM, int BN> < / td > < / tr >
< tr class = "memitem:a59be8d8c4bca2217f62ea868550393c0" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a59be8d8c4bca2217f62ea868550393c0" > col_reduce_looped< / a > (const device T *in, device U *out, const constant size_t & reduction_size, const constant int64_t & reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int & ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int & reduce_ndim, const constant size_t & non_col_reductions, uint3 gid, uint3 gsize, uint simd_lane_id, uint simd_group_id)< / td > < / tr >
< tr class = "memdesc:a59be8d8c4bca2217f62ea868550393c0" > < td class = "mdescLeft" >   < / td > < td class = "mdescRight" > Our approach is the following simple looped approach: < br / > < / td > < / tr >
< tr class = "separator:a59be8d8c4bca2217f62ea868550393c0" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
< tr class = "memitem:a5a3d4847e4ae940d3a532d790c0b9b27" id = "r_a5a3d4847e4ae940d3a532d790c0b9b27" > < td class = "memTemplParams" colspan = "2" > template< typename T, typename U, typename Op, typename IdxT, int NDIMS, int BM, int BN> < / td > < / tr >
< tr class = "memitem:a5a3d4847e4ae940d3a532d790c0b9b27" > < td class = "memTemplItemLeft" align = "right" valign = "top" > void  < / td > < td class = "memTemplItemRight" valign = "bottom" > < a class = "el" href = "#a5a3d4847e4ae940d3a532d790c0b9b27" > col_reduce_2pass< / a > (const device T *in, device U *out, const constant size_t & reduction_size, const constant int64_t & reduction_stride, const constant int *shape, const constant int64_t *strides, const constant int & ndim, const constant int *reduce_shape, const constant int64_t *reduce_strides, const constant int & reduce_ndim, const constant size_t & non_col_reductions, const constant size_t & out_size, uint3 gid, uint3 gsize, uint simd_lane_id, uint simd_group_id)< / td > < / tr >
< tr class = "separator:a5a3d4847e4ae940d3a532d790c0b9b27" > < td class = "memSeparator" colspan = "2" >   < / td > < / tr >
2024-10-15 23:12:17 +08:00
< / table >
< h2 class = "groupheader" > Function Documentation< / h2 >
2025-01-10 05:56:20 +08:00
< a id = "a5a3d4847e4ae940d3a532d790c0b9b27" name = "a5a3d4847e4ae940d3a532d790c0b9b27" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a5a3d4847e4ae940d3a532d790c0b9b27" > ◆   < / a > < / span > col_reduce_2pass()< / h2 >
2024-11-06 03:54:16 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2025-01-10 05:56:20 +08:00
template< typename T, typename U, typename Op, typename IdxT, int NDIMS, int BM, int BN> < / div >
2024-11-06 03:54:16 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void col_reduce_2pass < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > in< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device U *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_stride< / em > < / span > , < / td >
2024-11-06 03:54:16 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > strides< / em > < / span > , < / td >
2024-11-06 03:54:16 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_strides< / em > < / span > , < / td >
2024-11-06 03:54:16 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > non_col_reductions< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gsize< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lane_id< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_group_id< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2025-01-10 05:56:20 +08:00
< a id = "a00bb1ed154aef8c1d2cb15d0c4231a51" name = "a00bb1ed154aef8c1d2cb15d0c4231a51" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a00bb1ed154aef8c1d2cb15d0c4231a51" > ◆   < / a > < / span > col_reduce_longcolumn()< / h2 >
2024-11-06 03:54:16 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2025-01-10 05:56:20 +08:00
template< typename T, typename U, typename Op, typename IdxT, int NDIMS> < / div >
2024-11-06 03:54:16 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void col_reduce_longcolumn < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > in< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device U *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_stride< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > strides< / em > < / span > , < / td >
2024-11-06 03:54:16 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_strides< / em > < / span > , < / td >
2024-11-06 03:54:16 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > non_col_reductions< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > out_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gsize< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > lsize< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
2025-01-10 05:56:20 +08:00
< a id = "a59be8d8c4bca2217f62ea868550393c0" name = "a59be8d8c4bca2217f62ea868550393c0" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a59be8d8c4bca2217f62ea868550393c0" > ◆   < / a > < / span > col_reduce_looped()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2025-01-10 05:56:20 +08:00
template< typename T, typename U, typename Op, typename IdxT, int NDIMS, int BM, int BN> < / div >
2024-10-15 23:12:17 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void col_reduce_looped < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > in< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device U *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_stride< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > non_col_reductions< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gsize< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_lane_id< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint< / td > < td class = "paramname" > < span class = "paramname" > < em > simd_group_id< / em > < / span >   )< / td >
< / tr >
< / table >
< / div > < div class = "memdoc" >
< p > Our approach is the following simple looped approach: < / p >
< ol type = "1" >
< li > Each thread keeps running totals for BN / n_simdgroups outputs.< / li >
< li > Load a tile BM, BN in registers and accumulate in the running totals< / li >
< li > Move ahead by BM steps until the column axis and the non column reductions are exhausted.< / li >
< li > If BM == 32 then transpose in SM and simd reduce the running totals. Otherwise write in shared memory and BN threads accumulate the running totals with a loop.< / li >
< li > Write them to the output < / li >
< / ol >
< / div >
< / div >
2025-01-10 05:56:20 +08:00
< a id = "a674f4b6075bab1b89778e10ab24c557e" name = "a674f4b6075bab1b89778e10ab24c557e" > < / a >
< h2 class = "memtitle" > < span class = "permalink" > < a href = "#a674f4b6075bab1b89778e10ab24c557e" > ◆   < / a > < / span > col_reduce_small()< / h2 >
2024-10-15 23:12:17 +08:00
< div class = "memitem" >
< div class = "memproto" >
< div class = "memtemplate" >
2025-01-10 05:56:20 +08:00
template< typename T, typename U, typename Op, typename IdxT, int NDIMS> < / div >
2024-10-15 23:12:17 +08:00
< table class = "memname" >
< tr >
< td class = "memname" > void col_reduce_small < / td >
< td > (< / td >
< td class = "paramtype" > const device T *< / td > < td class = "paramname" > < span class = "paramname" > < em > in< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > device U *< / td > < td class = "paramname" > < span class = "paramname" > < em > out< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_size< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduction_stride< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_shape< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2025-01-10 05:56:20 +08:00
< td class = "paramtype" > const constant int64_t *< / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_strides< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant int & < / td > < td class = "paramname" > < span class = "paramname" > < em > reduce_ndim< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > const constant size_t & < / td > < td class = "paramname" > < span class = "paramname" > < em > non_col_reductions< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gid< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > gsize< / em > < / span > , < / td >
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-11-06 03:54:16 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > lid< / em > < / span > , < / td >
2024-10-15 23:12:17 +08:00
< / tr >
< tr >
< td class = "paramkey" > < / td >
< td > < / td >
2024-11-06 03:54:16 +08:00
< td class = "paramtype" > uint3< / td > < td class = "paramname" > < span class = "paramname" > < em > lsize< / em > < / span >   )< / td >
2024-10-15 23:12:17 +08:00
< / tr >
< / table >
< / div > < div class = "memdoc" >
< / div >
< / div >
< / div > <!-- contents -->
< / div > <!-- doc - content -->
2025-01-10 05:56:20 +08:00
<!-- start footer part -->
< div id = "nav-path" class = "navpath" > <!-- id is needed for treeview function! -->
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_d0c977ea65824390717cdb7efc36c157.html" > metal< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_70a37effa88bcbd6b791977fa1e64356.html" > kernels< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_f60cd69d27fd3faa641c79056fff0e2d.html" > reduction< / a > < / li > < li class = "navelem" > < a class = "el" href = "reduce__col_8h.html" > reduce_col.h< / a > < / li >
< li class = "footer" > Generated by < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.13.1 < / li >
< / ul >
< / div >
2024-10-15 23:12:17 +08:00
< / body >
< / html >