2024-10-15 23:12:17 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
2025-01-10 05:56:20 +08:00
< meta name = "generator" content = "Doxygen 1.13.1" / >
2024-10-15 23:12:17 +08:00
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
2024-11-23 04:24:16 +08:00
< title > MLX: mlx/backend/metal/kernels/steel/attn/mma.h Source File< / title >
2024-10-15 23:12:17 +08:00
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< link href = "navtree.css" rel = "stylesheet" type = "text/css" / >
2025-01-10 05:56:20 +08:00
< script type = "text/javascript" src = "navtreedata.js" > < / script >
< script type = "text/javascript" src = "navtree.js" > < / script >
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" src = "resize.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
2025-01-10 05:56:20 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { init_search(); });
/* @license-end */
< / script >
2024-10-15 23:12:17 +08:00
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
2025-01-10 05:56:20 +08:00
< td > < div id = "MSearchBox" class = "MSearchBoxInactive" >
< span class = "left" >
< span id = "MSearchSelect" onmouseover = "return searchBox.OnSearchSelectShow()" onmouseout = "return searchBox.OnSearchSelectHide()" >   < / span >
< input type = "text" id = "MSearchField" value = "" placeholder = "Search" accesskey = "S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
< / span > < span class = "right" >
< a id = "MSearchClose" href = "javascript:searchBox.CloseResultsWindow()" > < img id = "MSearchCloseImg" border = "0" src = "search/close.svg" alt = "" / > < / a >
< / span >
< / div >
< / td >
2024-10-15 23:12:17 +08:00
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
2025-01-10 05:56:20 +08:00
<!-- Generated by Doxygen 1.13.1 -->
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { codefold.init(0); });
/* @license-end */
< / script >
2025-01-10 05:56:20 +08:00
< / div > <!-- top -->
< div id = "side-nav" class = "ui-resizable side-nav-resizable" >
< div id = "nav-tree" >
< div id = "nav-tree-contents" >
< div id = "nav-sync" class = "sync" > < / div >
< / div >
< / div >
< div id = "splitbar" style = "-moz-user-select:none;"
class="ui-resizable-handle">
< / div >
< / div >
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
2025-01-10 05:56:20 +08:00
$(function(){initNavTree('attn_2mma_8h_source.html',''); initResizable(true); });
2024-10-15 23:12:17 +08:00
/* @license-end */
< / script >
2025-01-10 05:56:20 +08:00
< div id = "doc-content" >
2024-10-15 23:12:17 +08:00
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div class = "header" >
< div class = "headertitle" > < div class = "title" > mma.h< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
2024-11-23 04:24:16 +08:00
< a href = "attn_2mma_8h.html" > Go to the documentation of this file.< / a > < div class = "fragment" > < div class = "line" > < a id = "l00001" name = "l00001" > < / a > < span class = "lineno" > 1< / span > < span class = "comment" > // Copyright © 2024 Apple Inc.< / span > < / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00002" name = "l00002" > < / a > < span class = "lineno" > 2< / span > < / div >
< div class = "line" > < a id = "l00003" name = "l00003" > < / a > < span class = "lineno" > 3< / span > < span class = "preprocessor" > #pragma once< / span > < / div >
< div class = "line" > < a id = "l00004" name = "l00004" > < / a > < span class = "lineno" > 4< / span > < / div >
< div class = "line" > < a id = "l00005" name = "l00005" > < / a > < span class = "lineno" > 5< / span > < span class = "preprocessor" > #include < metal_simdgroup> < / span > < / div >
< div class = "line" > < a id = "l00006" name = "l00006" > < / a > < span class = "lineno" > 6< / span > < span class = "preprocessor" > #include < metal_simdgroup_matrix> < / span > < / div >
< div class = "line" > < a id = "l00007" name = "l00007" > < / a > < span class = "lineno" > 7< / span > < span class = "preprocessor" > #include < metal_stdlib> < / span > < / div >
< div class = "line" > < a id = "l00008" name = "l00008" > < / a > < span class = "lineno" > 8< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00009" name = "l00009" > < / a > < span class = "lineno" > 9< / span > < span class = "preprocessor" > #include " < a class = "code" href = "backend_2metal_2kernels_2steel_2attn_2transforms_8h.html" > mlx/backend/metal/kernels/steel/attn/transforms.h< / a > " < / span > < / div >
< div class = "line" > < a id = "l00010" name = "l00010" > < / a > < span class = "lineno" > 10< / span > < span class = "preprocessor" > #include " < a class = "code" href = "steel_2defines_8h.html" > mlx/backend/metal/kernels/steel/defines.h< / a > " < / span > < / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00011" name = "l00011" > < / a > < span class = "lineno" > 11< / span > < span class = "preprocessor" > #include " < a class = "code" href = "integral__constant_8h.html" > mlx/backend/metal/kernels/steel/utils/integral_constant.h< / a > " < / span > < / div >
< div class = "line" > < a id = "l00012" name = "l00012" > < / a > < span class = "lineno" > 12< / span > < / div >
< div class = "line" > < a id = "l00013" name = "l00013" > < / a > < span class = "lineno" > 13< / span > < span class = "keyword" > using namespace < / span > < a class = "code hl_namespace" href = "namespacemetal.html" > metal< / a > ;< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00014" name = "l00014" > < / a > < span class = "lineno" > 14< / span > < / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00016" name = "l00016" > < / a > < span class = "lineno" > 16< / span > < span class = "comment" > // MMA helper< / span > < / div >
< div class = "line" > < a id = "l00018" name = "l00018" > < / a > < span class = "lineno" > 18< / span > < / div >
< div class = "line" > < a id = "l00019" name = "l00019" > < / a > < span class = "lineno" > 19< / span > < span class = "keyword" > namespace < / span > < a class = "code hl_namespace" href = "namespacemlx.html" > mlx< / a > {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00020" name = "l00020" > < / a > < span class = "lineno" > 20< / span > < span class = "keyword" > namespace < / span > < a class = "code hl_namespace" href = "namespacemlx_1_1steel.html" > steel< / a > {< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00021" name = "l00021" > < / a > < span class = "lineno" > 21< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00022" name = "l00022" > < / a > < span class = "lineno" > 22< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > RInt, < span class = "keyword" > typename< / span > CInt> < / div >
2024-10-31 11:00:19 +08:00
< div class = "foldopen" id = "foldopen00023" data-start = "{" data-end = "};" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00023" name = "l00023" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_shape2_d.html" > 23< / a > < / span > < span class = "keyword" > struct < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_shape2_d.html#a070ce70eb6d84361c7f313159c438a5c" > Shape2D< / a > {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00024" name = "l00024" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_shape2_d.html#a6e9e8d56782fc8772bc432c7f58393fe" > 24< / a > < / span > RInt < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_shape2_d.html#a6e9e8d56782fc8772bc432c7f58393fe" > r< / a > ;< / div >
< div class = "line" > < a id = "l00025" name = "l00025" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_shape2_d.html#ae51347b2131647f2ed735ed43840d26e" > 25< / a > < / span > CInt < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_shape2_d.html#ae51347b2131647f2ed735ed43840d26e" > c< / a > ;< / div >
< div class = "line" > < a id = "l00026" name = "l00026" > < / a > < span class = "lineno" > 26< / span > < / div >
< div class = "line" > < a id = "l00027" name = "l00027" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_shape2_d.html#a070ce70eb6d84361c7f313159c438a5c" > 27< / a > < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_shape2_d.html#a070ce70eb6d84361c7f313159c438a5c" > Shape2D< / a > (RInt r_, CInt c_) : < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_shape2_d.html#a6e9e8d56782fc8772bc432c7f58393fe" > r< / a > (r_), < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_shape2_d.html#ae51347b2131647f2ed735ed43840d26e" > c< / a > (c_) {}< / div >
< div class = "line" > < a id = "l00028" name = "l00028" > < / a > < span class = "lineno" > 28< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00029" name = "l00029" > < / a > < span class = "lineno" > 29< / span > < / div >
< div class = "line" > < a id = "l00030" name = "l00030" > < / a > < span class = "lineno" > 30< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > Shape, < span class = "keyword" > typename< / span > Layout> < / div >
< div class = "foldopen" id = "foldopen00031" data-start = "{" data-end = "};" >
< div class = "line" > < a id = "l00031" name = "l00031" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_layout2_d.html" > 31< / a > < / span > < span class = "keyword" > struct < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_layout2_d.html" > Layout2D< / a > {< / div >
< div class = "line" > < a id = "l00032" name = "l00032" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_layout2_d.html#a23183747ab1ddbdd3f1fcac6d0faa2cd" > 32< / a > < / span > Shape < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_layout2_d.html#a23183747ab1ddbdd3f1fcac6d0faa2cd" > shape< / a > ;< / div >
< div class = "line" > < a id = "l00033" name = "l00033" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_layout2_d.html#a6beedf1677ee1b192fb48c83a29ac8a1" > 33< / a > < / span > Layout < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_layout2_d.html#a6beedf1677ee1b192fb48c83a29ac8a1" > layout< / a > ;< / div >
< div class = "line" > < a id = "l00034" name = "l00034" > < / a > < span class = "lineno" > 34< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00035" name = "l00035" > < / a > < span class = "lineno" > 35< / span > < / div >
< div class = "line" > < a id = "l00036" name = "l00036" > < / a > < span class = "lineno" > 36< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keywordtype" > int< / span > kFragRows_, < span class = "keywordtype" > int< / span > kFragCols_> < / div >
< div class = "line" > < a id = "l00037" name = "l00037" > < / a > < span class = "lineno" > 37< / span > < span class = "keyword" > struct < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html" > BaseMMAFrag< / a > {< / div >
< div class = "line" > < a id = "l00038" name = "l00038" > < / a > < span class = "lineno" > 38< / span > < span class = "keyword" > static_assert< / span > (< / div >
< div class = "line" > < a id = "l00039" name = "l00039" > < / a > < span class = "lineno" > 39< / span > kFragRows_ == 8,< / div >
< div class = "line" > < a id = "l00040" name = "l00040" > < / a > < span class = "lineno" > 40< / span > < span class = "stringliteral" > " Only 8 x 8 fragment matrices are currently supported" < / span > );< / div >
< div class = "line" > < a id = "l00041" name = "l00041" > < / a > < span class = "lineno" > 41< / span > < span class = "keyword" > static_assert< / span > (< / div >
< div class = "line" > < a id = "l00042" name = "l00042" > < / a > < span class = "lineno" > 42< / span > kFragCols_ == 8,< / div >
< div class = "line" > < a id = "l00043" name = "l00043" > < / a > < span class = "lineno" > 43< / span > < span class = "stringliteral" > " Only 8 x 8 fragment matrices are currently supported" < / span > );< / div >
< div class = "line" > < a id = "l00044" name = "l00044" > < / a > < span class = "lineno" > 44< / span > };< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00045" name = "l00045" > < / a > < span class = "lineno" > 45< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00046" name = "l00046" > < / a > < span class = "lineno" > 46< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T> < / div >
< div class = "line" > < a id = "l00047" name = "l00047" > < / a > < span class = "lineno" > 47< / span > < span class = "keyword" > struct < / span > BaseMMAFrag< T, 8, 8> {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00048" name = "l00048" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a2fe53db449c692226f23f6b99fb2c0d4" > 48< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a2fe53db449c692226f23f6b99fb2c0d4" > kFragRows< / a > = 8;< / div >
< div class = "line" > < a id = "l00049" name = "l00049" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a211102315e2afbcfcd2e2c201b638e9f" > 49< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a211102315e2afbcfcd2e2c201b638e9f" > kFragCols< / a > = 8;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00050" name = "l00050" > < / a > < span class = "lineno" > 50< / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00051" name = "l00051" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3c34dfdc944db110f4735f1b25307cf0" > 51< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3c34dfdc944db110f4735f1b25307cf0" > kElemsPerFrag< / a > = (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a2fe53db449c692226f23f6b99fb2c0d4" > kFragRows< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a211102315e2afbcfcd2e2c201b638e9f" > kFragCols< / a > ) / 32;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00052" name = "l00052" > < / a > < span class = "lineno" > 52< / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00053" name = "l00053" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f" > 53< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f" > kElemRows< / a > = 1;< / div >
< div class = "line" > < a id = "l00054" name = "l00054" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > 54< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > = 2;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00055" name = "l00055" > < / a > < span class = "lineno" > 55< / span > < / div >
< div class = "line" > < a id = "l00056" name = "l00056" > < / a > < span class = "lineno" > 56< / span > < span class = "keyword" > static_assert< / span > (< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00057" name = "l00057" > < / a > < span class = "lineno" > 57< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f" > kElemRows< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > == < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3c34dfdc944db110f4735f1b25307cf0" > kElemsPerFrag< / a > ,< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00058" name = "l00058" > < / a > < span class = "lineno" > 58< / span > < span class = "stringliteral" > " MMAFrag shape is not consistent with MMAFrag size" < / span > );< / div >
< div class = "line" > < a id = "l00059" name = "l00059" > < / a > < span class = "lineno" > 59< / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00060" name = "l00060" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > 60< / a > < / span > < span class = "keyword" > typedef< / span > metal::simdgroup_matrix< T, kFragRows, kFragCols> < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > ;< / div >
< div class = "line" > < a id = "l00061" name = "l00061" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > 61< / a > < / span > < span class = "keyword" > typedef< / span > metal::vec< T, kElemsPerFrag> < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > ;< / div >
< div class = "line" > < a id = "l00062" name = "l00062" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3dcd4301390937f89ed1dde6d28e341f" > 62< / a > < / span > < span class = "keyword" > typedef< / span > metal::vec< T, kElemRows> < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3dcd4301390937f89ed1dde6d28e341f" > row_frag_type< / a > ;< / div >
< div class = "line" > < a id = "l00063" name = "l00063" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#adbb262a3c872e26533b68a39db16459e" > 63< / a > < / span > < span class = "keyword" > typedef< / span > metal::vec< T, kElemCols> < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#adbb262a3c872e26533b68a39db16459e" > col_frag_type< / a > ;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00064" name = "l00064" > < / a > < span class = "lineno" > 64< / span > < / div >
< div class = "foldopen" id = "foldopen00065" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00065" name = "l00065" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7331fff1d12f2f8b72b0006a3ad0dd83" > 65< / a > < / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > short2 < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7331fff1d12f2f8b72b0006a3ad0dd83" > get_coord< / a > (ushort simd_lane_id< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00066" name = "l00066" > < / a > < span class = "lineno" > 66< / span > [[thread_index_in_simdgroup]]) {< / div >
< div class = "line" > < a id = "l00067" name = "l00067" > < / a > < span class = "lineno" > 67< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > qid = simd_lane_id / 4;< / div >
< div class = "line" > < a id = "l00068" name = "l00068" > < / a > < span class = "lineno" > 68< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > fm = (qid & 4) + ((simd_lane_id / 2) % 4);< / div >
< div class = "line" > < a id = "l00069" name = "l00069" > < / a > < span class = "lineno" > 69< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > fn = (qid & 2) * 2 + (simd_lane_id % 2) * 2;< / div >
< div class = "line" > < a id = "l00070" name = "l00070" > < / a > < span class = "lineno" > 70< / span > < span class = "keywordflow" > return< / span > short2{fn, fm};< / div >
< div class = "line" > < a id = "l00071" name = "l00071" > < / a > < span class = "lineno" > 71< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00072" name = "l00072" > < / a > < span class = "lineno" > 72< / span > < / div >
< div class = "line" > < a id = "l00073" name = "l00073" > < / a > < span class = "lineno" > 73< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > SrcPtrType, < span class = "keyword" > typename< / span > StrX, < span class = "keyword" > typename< / span > StrY> < / div >
< div class = "line" > < a id = "l00074" name = "l00074" > < / a > < span class = "lineno" > 74< / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00075" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00075" name = "l00075" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#ac73006b36fc710feda3a7c796e21415c" > 75< / a > < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#ac73006b36fc710feda3a7c796e21415c" > load< / a > (thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & dst, SrcPtrType src, StrX str_x, StrY str_y) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00076" name = "l00076" > < / a > < span class = "lineno" > 76< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00077" name = "l00077" > < / a > < span class = "lineno" > 77< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f" > kElemRows< / a > ; i++) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00078" name = "l00078" > < / a > < span class = "lineno" > 78< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00079" name = "l00079" > < / a > < span class = "lineno" > 79< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00080" name = "l00080" > < / a > < span class = "lineno" > 80< / span > dst[i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > + j] = < span class = "keyword" > static_cast< < / span > T< span class = "keyword" > > < / span > (src[i * str_x + j * str_y]);< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00081" name = "l00081" > < / a > < span class = "lineno" > 81< / span > }< / div >
< div class = "line" > < a id = "l00082" name = "l00082" > < / a > < span class = "lineno" > 82< / span > }< / div >
< div class = "line" > < a id = "l00083" name = "l00083" > < / a > < span class = "lineno" > 83< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00084" name = "l00084" > < / a > < span class = "lineno" > 84< / span > < / div >
< div class = "line" > < a id = "l00085" name = "l00085" > < / a > < span class = "lineno" > 85< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00086" name = "l00086" > < / a > < span class = "lineno" > 86< / span > < span class = "keyword" > typename< / span > SrcPtrType,< / div >
< div class = "line" > < a id = "l00087" name = "l00087" > < / a > < span class = "lineno" > 87< / span > < span class = "keyword" > typename< / span > StrX,< / div >
< div class = "line" > < a id = "l00088" name = "l00088" > < / a > < span class = "lineno" > 88< / span > < span class = "keyword" > typename< / span > StrY,< / div >
< div class = "line" > < a id = "l00089" name = "l00089" > < / a > < span class = "lineno" > 89< / span > < span class = "keyword" > typename< / span > LimX,< / div >
< div class = "line" > < a id = "l00090" name = "l00090" > < / a > < span class = "lineno" > 90< / span > < span class = "keyword" > typename< / span > LimY,< / div >
< div class = "line" > < a id = "l00091" name = "l00091" > < / a > < span class = "lineno" > 91< / span > < span class = "keyword" > typename< / span > OffX,< / div >
< div class = "line" > < a id = "l00092" name = "l00092" > < / a > < span class = "lineno" > 92< / span > < span class = "keyword" > typename< / span > OffY> < / div >
< div class = "foldopen" id = "foldopen00093" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00093" name = "l00093" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#ad22aaee4a2938cbdd315b39eda84e07d" > 93< / a > < / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#ad22aaee4a2938cbdd315b39eda84e07d" > load_safe< / a > (< / div >
< div class = "line" > < a id = "l00094" name = "l00094" > < / a > < span class = "lineno" > 94< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & dst,< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00095" name = "l00095" > < / a > < span class = "lineno" > 95< / span > SrcPtrType src,< / div >
< div class = "line" > < a id = "l00096" name = "l00096" > < / a > < span class = "lineno" > 96< / span > StrX str_x,< / div >
< div class = "line" > < a id = "l00097" name = "l00097" > < / a > < span class = "lineno" > 97< / span > StrY str_y,< / div >
< div class = "line" > < a id = "l00098" name = "l00098" > < / a > < span class = "lineno" > 98< / span > LimX lim_x,< / div >
< div class = "line" > < a id = "l00099" name = "l00099" > < / a > < span class = "lineno" > 99< / span > LimY lim_y,< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00100" name = "l00100" > < / a > < span class = "lineno" > 100< / span > OffX off_x = < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 0> < / a > {},< / div >
< div class = "line" > < a id = "l00101" name = "l00101" > < / a > < span class = "lineno" > 101< / span > OffY off_y = < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 0> < / a > {}) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00102" name = "l00102" > < / a > < span class = "lineno" > 102< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00103" name = "l00103" > < / a > < span class = "lineno" > 103< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < kElemRows; i++) {< / div >
< div class = "line" > < a id = "l00104" name = "l00104" > < / a > < span class = "lineno" > 104< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00105" name = "l00105" > < / a > < span class = "lineno" > 105< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < kElemCols; j++) {< / div >
< div class = "line" > < a id = "l00106" name = "l00106" > < / a > < span class = "lineno" > 106< / span > < span class = "keywordflow" > if< / span > ((off_x + i) < lim_x & & (off_y + j) < lim_y) {< / div >
< div class = "line" > < a id = "l00107" name = "l00107" > < / a > < span class = "lineno" > 107< / span > dst[i * kElemCols + j] =< / div >
< div class = "line" > < a id = "l00108" name = "l00108" > < / a > < span class = "lineno" > 108< / span > < span class = "keyword" > static_cast< < / span > T< span class = "keyword" > > < / span > (src[(off_x + i) * str_x + (off_x + j) * str_y]);< / div >
< div class = "line" > < a id = "l00109" name = "l00109" > < / a > < span class = "lineno" > 109< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00110" name = "l00110" > < / a > < span class = "lineno" > 110< / span > dst[i * kElemCols + j] = T(0);< / div >
< div class = "line" > < a id = "l00111" name = "l00111" > < / a > < span class = "lineno" > 111< / span > }< / div >
< div class = "line" > < a id = "l00112" name = "l00112" > < / a > < span class = "lineno" > 112< / span > }< / div >
< div class = "line" > < a id = "l00113" name = "l00113" > < / a > < span class = "lineno" > 113< / span > }< / div >
< div class = "line" > < a id = "l00114" name = "l00114" > < / a > < span class = "lineno" > 114< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00115" name = "l00115" > < / a > < span class = "lineno" > 115< / span > < / div >
< div class = "line" > < a id = "l00116" name = "l00116" > < / a > < span class = "lineno" > 116< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > DstPtrType, < span class = "keyword" > typename< / span > StrX, < span class = "keyword" > typename< / span > StrY> < / div >
< div class = "line" > < a id = "l00117" name = "l00117" > < / a > < span class = "lineno" > 117< / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00118" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00118" name = "l00118" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#aa8f50ea8961ec5b35c1b81366d64f2cb" > 118< / a > < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#aa8f50ea8961ec5b35c1b81366d64f2cb" > store< / a > (< span class = "keyword" > const< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & src, DstPtrType dst, StrX str_x, StrY str_y) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00119" name = "l00119" > < / a > < span class = "lineno" > 119< / span > < span class = "keyword" > using < / span > U = < a class = "code hl_typedef" href = "namespacemetal.html#ac82ee6c3fbe9ec5c78c07329424aaec9" > pointer_element_t< DstPtrType> < / a > ;< / div >
< div class = "line" > < a id = "l00120" name = "l00120" > < / a > < span class = "lineno" > 120< / span > < / div >
< div class = "line" > < a id = "l00121" name = "l00121" > < / a > < span class = "lineno" > 121< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00122" name = "l00122" > < / a > < span class = "lineno" > 122< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f" > kElemRows< / a > ; i++) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00123" name = "l00123" > < / a > < span class = "lineno" > 123< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00124" name = "l00124" > < / a > < span class = "lineno" > 124< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00125" name = "l00125" > < / a > < span class = "lineno" > 125< / span > dst[i * str_x + j * str_y] = < span class = "keyword" > static_cast< < / span > U< span class = "keyword" > > < / span > (src[i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > + j]);< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00126" name = "l00126" > < / a > < span class = "lineno" > 126< / span > }< / div >
< div class = "line" > < a id = "l00127" name = "l00127" > < / a > < span class = "lineno" > 127< / span > }< / div >
< div class = "line" > < a id = "l00128" name = "l00128" > < / a > < span class = "lineno" > 128< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00129" name = "l00129" > < / a > < span class = "lineno" > 129< / span > < / div >
< div class = "line" > < a id = "l00130" name = "l00130" > < / a > < span class = "lineno" > 130< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00131" name = "l00131" > < / a > < span class = "lineno" > 131< / span > < span class = "keyword" > typename< / span > DstPtrType,< / div >
< div class = "line" > < a id = "l00132" name = "l00132" > < / a > < span class = "lineno" > 132< / span > < span class = "keyword" > typename< / span > StrX,< / div >
< div class = "line" > < a id = "l00133" name = "l00133" > < / a > < span class = "lineno" > 133< / span > < span class = "keyword" > typename< / span > StrY,< / div >
< div class = "line" > < a id = "l00134" name = "l00134" > < / a > < span class = "lineno" > 134< / span > < span class = "keyword" > typename< / span > LimX,< / div >
< div class = "line" > < a id = "l00135" name = "l00135" > < / a > < span class = "lineno" > 135< / span > < span class = "keyword" > typename< / span > LimY,< / div >
< div class = "line" > < a id = "l00136" name = "l00136" > < / a > < span class = "lineno" > 136< / span > < span class = "keyword" > typename< / span > OffX,< / div >
< div class = "line" > < a id = "l00137" name = "l00137" > < / a > < span class = "lineno" > 137< / span > < span class = "keyword" > typename< / span > OffY> < / div >
< div class = "foldopen" id = "foldopen00138" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00138" name = "l00138" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1f0b00daad8eba2f855bb306e70d2328" > 138< / a > < / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1f0b00daad8eba2f855bb306e70d2328" > store_safe< / a > (< / div >
< div class = "line" > < a id = "l00139" name = "l00139" > < / a > < span class = "lineno" > 139< / span > < span class = "keyword" > const< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & src,< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00140" name = "l00140" > < / a > < span class = "lineno" > 140< / span > DstPtrType dst,< / div >
< div class = "line" > < a id = "l00141" name = "l00141" > < / a > < span class = "lineno" > 141< / span > StrX str_x,< / div >
< div class = "line" > < a id = "l00142" name = "l00142" > < / a > < span class = "lineno" > 142< / span > StrY str_y,< / div >
< div class = "line" > < a id = "l00143" name = "l00143" > < / a > < span class = "lineno" > 143< / span > LimX lim_x,< / div >
< div class = "line" > < a id = "l00144" name = "l00144" > < / a > < span class = "lineno" > 144< / span > LimY lim_y,< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00145" name = "l00145" > < / a > < span class = "lineno" > 145< / span > OffX off_x = < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 0> < / a > {},< / div >
< div class = "line" > < a id = "l00146" name = "l00146" > < / a > < span class = "lineno" > 146< / span > OffY off_y = < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 0> < / a > {}) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00147" name = "l00147" > < / a > < span class = "lineno" > 147< / span > < span class = "keyword" > using < / span > U = < a class = "code hl_typedef" href = "namespacemetal.html#ac82ee6c3fbe9ec5c78c07329424aaec9" > pointer_element_t< DstPtrType> < / a > ;< / div >
< div class = "line" > < a id = "l00148" name = "l00148" > < / a > < span class = "lineno" > 148< / span > < / div >
< div class = "line" > < a id = "l00149" name = "l00149" > < / a > < span class = "lineno" > 149< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00150" name = "l00150" > < / a > < span class = "lineno" > 150< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < kElemRows; i++) {< / div >
< div class = "line" > < a id = "l00151" name = "l00151" > < / a > < span class = "lineno" > 151< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00152" name = "l00152" > < / a > < span class = "lineno" > 152< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < kElemCols; j++) {< / div >
< div class = "line" > < a id = "l00153" name = "l00153" > < / a > < span class = "lineno" > 153< / span > < span class = "keywordflow" > if< / span > ((off_x + i) < lim_x & & (off_y + j) < lim_y) {< / div >
< div class = "line" > < a id = "l00154" name = "l00154" > < / a > < span class = "lineno" > 154< / span > dst[(off_x + i) * str_x + (off_y + j) * str_y] =< / div >
< div class = "line" > < a id = "l00155" name = "l00155" > < / a > < span class = "lineno" > 155< / span > < span class = "keyword" > static_cast< < / span > U< span class = "keyword" > > < / span > (src[i * kElemCols + j]);< / div >
< div class = "line" > < a id = "l00156" name = "l00156" > < / a > < span class = "lineno" > 156< / span > }< / div >
< div class = "line" > < a id = "l00157" name = "l00157" > < / a > < span class = "lineno" > 157< / span > }< / div >
< div class = "line" > < a id = "l00158" name = "l00158" > < / a > < span class = "lineno" > 158< / span > }< / div >
< div class = "line" > < a id = "l00159" name = "l00159" > < / a > < span class = "lineno" > 159< / span > }< / div >
< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00160" name = "l00160" > < / a > < span class = "lineno" > 160< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "foldopen" id = "foldopen00161" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00161" name = "l00161" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3" > 161< / a > < / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3" > mma< / a > (< / div >
< div class = "line" > < a id = "l00162" name = "l00162" > < / a > < span class = "lineno" > 162< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & D,< / div >
< div class = "line" > < a id = "l00163" name = "l00163" > < / a > < span class = "lineno" > 163< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & A,< / div >
< div class = "line" > < a id = "l00164" name = "l00164" > < / a > < span class = "lineno" > 164< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & B,< / div >
< div class = "line" > < a id = "l00165" name = "l00165" > < / a > < span class = "lineno" > 165< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & C) {< / div >
< div class = "line" > < a id = "l00166" name = "l00166" > < / a > < span class = "lineno" > 166< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > D_mat;< / div >
< div class = "line" > < a id = "l00167" name = "l00167" > < / a > < span class = "lineno" > 167< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > A_mat;< / div >
< div class = "line" > < a id = "l00168" name = "l00168" > < / a > < span class = "lineno" > 168< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > B_mat;< / div >
< div class = "line" > < a id = "l00169" name = "l00169" > < / a > < span class = "lineno" > 169< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > C_mat;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00170" name = "l00170" > < / a > < span class = "lineno" > 170< / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00171" name = "l00171" > < / a > < span class = "lineno" > 171< / span > < span class = "keyword" > reinterpret_cast< < / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & < span class = "keyword" > > < / span > (A_mat.thread_elements()) = A;< / div >
< div class = "line" > < a id = "l00172" name = "l00172" > < / a > < span class = "lineno" > 172< / span > < span class = "keyword" > reinterpret_cast< < / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & < span class = "keyword" > > < / span > (B_mat.thread_elements()) = B;< / div >
< div class = "line" > < a id = "l00173" name = "l00173" > < / a > < span class = "lineno" > 173< / span > < span class = "keyword" > reinterpret_cast< < / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & < span class = "keyword" > > < / span > (C_mat.thread_elements()) = C;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00174" name = "l00174" > < / a > < span class = "lineno" > 174< / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00175" name = "l00175" > < / a > < span class = "lineno" > 175< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3" > mma< / a > (D_mat, A_mat, B_mat, C_mat);< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00176" name = "l00176" > < / a > < span class = "lineno" > 176< / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00177" name = "l00177" > < / a > < span class = "lineno" > 177< / span > D = < span class = "keyword" > reinterpret_cast< < / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & < span class = "keyword" > > < / span > (D_mat.thread_elements());< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00178" name = "l00178" > < / a > < span class = "lineno" > 178< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00179" name = "l00179" > < / a > < span class = "lineno" > 179< / span > < / div >
< div class = "foldopen" id = "foldopen00180" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00180" name = "l00180" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1868f57d57c8adedab2c58492ec76946" > 180< / a > < / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1868f57d57c8adedab2c58492ec76946" > mma< / a > (< / div >
< div class = "line" > < a id = "l00181" name = "l00181" > < / a > < span class = "lineno" > 181< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > & D,< / div >
< div class = "line" > < a id = "l00182" name = "l00182" > < / a > < span class = "lineno" > 182< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > & A,< / div >
< div class = "line" > < a id = "l00183" name = "l00183" > < / a > < span class = "lineno" > 183< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > & B,< / div >
< div class = "line" > < a id = "l00184" name = "l00184" > < / a > < span class = "lineno" > 184< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mat_type< / a > & C) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00185" name = "l00185" > < / a > < span class = "lineno" > 185< / span > simdgroup_multiply_accumulate(D, A, B, C);< / div >
< div class = "line" > < a id = "l00186" name = "l00186" > < / a > < span class = "lineno" > 186< / span > }< / div >
< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00187" name = "l00187" > < / a > < span class = "lineno" > 187< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00188" name = "l00188" > < / a > < span class = "lineno" > 188< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > Op> < / div >
< div class = "foldopen" id = "foldopen00189" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00189" name = "l00189" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a51d662e4cff88b5ad17d7c44bb6b6970" > 189< / a > < / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a51d662e4cff88b5ad17d7c44bb6b6970" > row_reduce< / a > (< / div >
< div class = "line" > < a id = "l00190" name = "l00190" > < / a > < span class = "lineno" > 190< / span > thread < span class = "keyword" > const< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & inp_vals,< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00191" name = "l00191" > < / a > < span class = "lineno" > 191< / span > thread T* reduced_vals) {< / div >
< div class = "line" > < a id = "l00192" name = "l00192" > < / a > < span class = "lineno" > 192< / span > T thr_reduce = Op::apply(inp_vals.x, inp_vals.y);< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00193" name = "l00193" > < / a > < span class = "lineno" > 193< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00194" name = "l00194" > < / a > < span class = "lineno" > 194< / span > T qgr_reduce = < a class = "code hl_function" href = "namespacemetal.html#a5017efc9605e069cfb507137cd1a1852" > simd_shuffle_xor< / a > (thr_reduce, ushort(1));< / div >
< div class = "line" > < a id = "l00195" name = "l00195" > < / a > < span class = "lineno" > 195< / span > qgr_reduce = Op::apply(thr_reduce, qgr_reduce);< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00196" name = "l00196" > < / a > < span class = "lineno" > 196< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00197" name = "l00197" > < / a > < span class = "lineno" > 197< / span > T sgr_reduce = < a class = "code hl_function" href = "namespacemetal.html#a5017efc9605e069cfb507137cd1a1852" > simd_shuffle_xor< / a > (qgr_reduce, ushort(8));< / div >
< div class = "line" > < a id = "l00198" name = "l00198" > < / a > < span class = "lineno" > 198< / span > sgr_reduce = Op::apply(qgr_reduce, sgr_reduce);< / div >
< div class = "line" > < a id = "l00199" name = "l00199" > < / a > < span class = "lineno" > 199< / span > < / div >
< div class = "line" > < a id = "l00200" name = "l00200" > < / a > < span class = "lineno" > 200< / span > reduced_vals[0] = Op::apply(reduced_vals[0], sgr_reduce);< / div >
< div class = "line" > < a id = "l00201" name = "l00201" > < / a > < span class = "lineno" > 201< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00202" name = "l00202" > < / a > < span class = "lineno" > 202< / span > < / div >
< div class = "line" > < a id = "l00203" name = "l00203" > < / a > < span class = "lineno" > 203< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > Op> < / div >
< div class = "foldopen" id = "foldopen00204" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00204" name = "l00204" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a318c4279bdc7b39b7919f108b1cd8010" > 204< / a > < / span > METAL_FUNC < span class = "keyword" > static< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a318c4279bdc7b39b7919f108b1cd8010" > row_bin_op< / a > (< / div >
< div class = "line" > < a id = "l00205" name = "l00205" > < / a > < span class = "lineno" > 205< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > frag_type< / a > & inp_vals,< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00206" name = "l00206" > < / a > < span class = "lineno" > 206< / span > thread T* row_vals) {< / div >
< div class = "line" > < a id = "l00207" name = "l00207" > < / a > < span class = "lineno" > 207< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00208" name = "l00208" > < / a > < span class = "lineno" > 208< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f" > kElemRows< / a > ; i++) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00209" name = "l00209" > < / a > < span class = "lineno" > 209< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00210" name = "l00210" > < / a > < span class = "lineno" > 210< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00211" name = "l00211" > < / a > < span class = "lineno" > 211< / span > inp_vals[i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > + j] =< / div >
< div class = "line" > < a id = "l00212" name = "l00212" > < / a > < span class = "lineno" > 212< / span > Op::apply(inp_vals[i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > kElemCols< / a > + j], row_vals[i]);< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00213" name = "l00213" > < / a > < span class = "lineno" > 213< / span > }< / div >
< div class = "line" > < a id = "l00214" name = "l00214" > < / a > < span class = "lineno" > 214< / span > }< / div >
< div class = "line" > < a id = "l00215" name = "l00215" > < / a > < span class = "lineno" > 215< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00216" name = "l00216" > < / a > < span class = "lineno" > 216< / span > };< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00217" name = "l00217" > < / a > < span class = "lineno" > 217< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00218" name = "l00218" > < / a > < span class = "lineno" > 218< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00219" name = "l00219" > < / a > < span class = "lineno" > 219< / span > < span class = "keyword" > typename< / span > T,< / div >
< div class = "line" > < a id = "l00220" name = "l00220" > < / a > < span class = "lineno" > 220< / span > < span class = "keywordtype" > int< / span > kTileRows_,< / div >
< div class = "line" > < a id = "l00221" name = "l00221" > < / a > < span class = "lineno" > 221< / span > < span class = "keywordtype" > int< / span > kTileCols_,< / div >
< div class = "line" > < a id = "l00222" name = "l00222" > < / a > < span class = "lineno" > 222< / span > < span class = "keyword" > class < / span > MMAFrag_ = BaseMMAFrag< T, 8, 8> > < / div >
< div class = "line" > < a id = "l00223" name = "l00223" > < / a > < span class = "lineno" > 223< / span > < span class = "keyword" > struct < / span > MMATile {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00224" name = "l00224" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#abe33de70e34300745bad9aa822fd0382" > 224< / a > < / span > < span class = "keyword" > using < / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#abe33de70e34300745bad9aa822fd0382" > MMAFrag_t< / a > = MMAFrag_;< / div >
< div class = "line" > < a id = "l00225" name = "l00225" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c" > 225< / a > < / span > < span class = "keyword" > using < / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c" > elem_type< / a > = T;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00226" name = "l00226" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > 226< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > = MMAFrag_t::kFragRows;< / div >
< div class = "line" > < a id = "l00227" name = "l00227" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > 227< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > = MMAFrag_t::kFragCols;< / div >
< div class = "line" > < a id = "l00228" name = "l00228" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aef0ea2387e1ff5767bff8563b2d36bd6" > 228< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aef0ea2387e1ff5767bff8563b2d36bd6" > kElemsPerFrag< / a > = MMAFrag_t::kElemsPerFrag;< / div >
< div class = "line" > < a id = "l00229" name = "l00229" > < / a > < span class = "lineno" > 229< / span > < / div >
< div class = "line" > < a id = "l00230" name = "l00230" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > 230< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > = kTileRows_;< / div >
< div class = "line" > < a id = "l00231" name = "l00231" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > 231< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > = kTileCols_;< / div >
< div class = "line" > < a id = "l00232" name = "l00232" > < / a > < span class = "lineno" > 232< / span > < / div >
< div class = "line" > < a id = "l00233" name = "l00233" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a60ea6b8ff2923b7fe6f598e74ac54323" > 233< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a60ea6b8ff2923b7fe6f598e74ac54323" > kRows< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > ;< / div >
< div class = "line" > < a id = "l00234" name = "l00234" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a948784652e93830887ee8ad506ec3257" > 234< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a948784652e93830887ee8ad506ec3257" > kCols< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > ;< / div >
< div class = "line" > < a id = "l00235" name = "l00235" > < / a > < span class = "lineno" > 235< / span > < / div >
< div class = "line" > < a id = "l00236" name = "l00236" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae326e7693eb77c22d5a6e3e9219019d3" > 236< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae326e7693eb77c22d5a6e3e9219019d3" > kNumFrags< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ;< / div >
< div class = "line" > < a id = "l00237" name = "l00237" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a98357339ec98f804a1b12597937b318f" > 237< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a98357339ec98f804a1b12597937b318f" > kElemsPerTile< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae326e7693eb77c22d5a6e3e9219019d3" > kNumFrags< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aef0ea2387e1ff5767bff8563b2d36bd6" > kElemsPerFrag< / a > ;< / div >
< div class = "line" > < a id = "l00238" name = "l00238" > < / a > < span class = "lineno" > 238< / span > < / div >
< div class = "line" > < a id = "l00239" name = "l00239" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a5b1d1c85a5046108a4e38bdc5a0ea74e" > 239< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a5b1d1c85a5046108a4e38bdc5a0ea74e" > kRowsPerThread< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > * MMAFrag_t::kElemRows;< / div >
< div class = "line" > < a id = "l00240" name = "l00240" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1ea49efd92696b15302ee4b52ecd548c" > 240< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1ea49efd92696b15302ee4b52ecd548c" > kColsPerThread< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > * MMAFrag_t::kElemCols;< / div >
< div class = "line" > < a id = "l00241" name = "l00241" > < / a > < span class = "lineno" > 241< / span > < / div >
< div class = "line" > < a id = "l00242" name = "l00242" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1eeb197c9bdf4db42892a39cdb9bd73a" > 242< / a > < / span > < span class = "keyword" > typedef< / span > < span class = "keyword" > typename< / span > MMAFrag_t::mat_type < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1eeb197c9bdf4db42892a39cdb9bd73a" > mat_type< / a > ;< / div >
< div class = "line" > < a id = "l00243" name = "l00243" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > 243< / a > < / span > < span class = "keyword" > typedef< / span > < span class = "keyword" > typename< / span > MMAFrag_t::frag_type < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > frag_type< / a > ;< / div >
< div class = "line" > < a id = "l00244" name = "l00244" > < / a > < span class = "lineno" > 244< / span > < / div >
< div class = "line" > < a id = "l00245" name = "l00245" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > 245< / a > < / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > frag_type< / a > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > val_frags< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae326e7693eb77c22d5a6e3e9219019d3" > kNumFrags< / a > ] = {< a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > frag_type< / a > (0)};< / div >
< div class = "line" > < a id = "l00246" name = "l00246" > < / a > < span class = "lineno" > 246< / span > < / div >
< div class = "line" > < a id = "l00247" name = "l00247" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa3fb310dd08ec23c334511f7b316d1b6" > 247< / a > < / span > METAL_FUNC < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa3fb310dd08ec23c334511f7b316d1b6" > MMATile< / a > () thread {}< / div >
< div class = "line" > < a id = "l00248" name = "l00248" > < / a > < span class = "lineno" > 248< / span > < / div >
< div class = "foldopen" id = "foldopen00249" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00249" name = "l00249" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa97a98e423827a889c13a92217626ec7" > 249< / a > < / span > METAL_FUNC < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa97a98e423827a889c13a92217626ec7" > clear< / a > () {< / div >
< div class = "line" > < a id = "l00250" name = "l00250" > < / a > < span class = "lineno" > 250< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00251" name = "l00251" > < / a > < span class = "lineno" > 251< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae326e7693eb77c22d5a6e3e9219019d3" > kNumFrags< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00252" name = "l00252" > < / a > < span class = "lineno" > 252< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > val_frags< / a > [i] = < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > frag_type< / a > (0);< / div >
< div class = "line" > < a id = "l00253" name = "l00253" > < / a > < span class = "lineno" > 253< / span > }< / div >
< div class = "line" > < a id = "l00254" name = "l00254" > < / a > < span class = "lineno" > 254< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00255" name = "l00255" > < / a > < span class = "lineno" > 255< / span > < / div >
< div class = "foldopen" id = "foldopen00256" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00256" name = "l00256" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > 256< / a > < / span > METAL_FUNC < span class = "keyword" > constexpr< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > frag_type< / a > & < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (< span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > i, < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > j) {< / div >
< div class = "line" > < a id = "l00257" name = "l00257" > < / a > < span class = "lineno" > 257< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > val_frags< / a > [i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > + j];< / div >
< div class = "line" > < a id = "l00258" name = "l00258" > < / a > < span class = "lineno" > 258< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00259" name = "l00259" > < / a > < span class = "lineno" > 259< / span > < / div >
< div class = "foldopen" id = "foldopen00260" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00260" name = "l00260" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad476e1d9a12178fb35c207312339e485" > 260< / a > < / span > METAL_FUNC < span class = "keyword" > constexpr< / span > < span class = "keyword" > const< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > frag_type< / a > & < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad476e1d9a12178fb35c207312339e485" > frag_at< / a > (< / div >
< div class = "line" > < a id = "l00261" name = "l00261" > < / a > < span class = "lineno" > 261< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > i,< / div >
< div class = "line" > < a id = "l00262" name = "l00262" > < / a > < span class = "lineno" > 262< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > j)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00263" name = "l00263" > < / a > < span class = "lineno" > 263< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > val_frags< / a > [i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > + j];< / div >
< div class = "line" > < a id = "l00264" name = "l00264" > < / a > < span class = "lineno" > 264< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00265" name = "l00265" > < / a > < span class = "lineno" > 265< / span > < / div >
< div class = "foldopen" id = "foldopen00266" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00266" name = "l00266" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a323a4f38cd0693bf333832bb4258b28e" > 266< / a > < / span > METAL_FUNC < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1eeb197c9bdf4db42892a39cdb9bd73a" > mat_type< / a > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a323a4f38cd0693bf333832bb4258b28e" > mat_at< / a > (< span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > i, < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > j) {< / div >
< div class = "line" > < a id = "l00267" name = "l00267" > < / a > < span class = "lineno" > 267< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1eeb197c9bdf4db42892a39cdb9bd73a" > mat_type< / a > val_mat;< / div >
< div class = "line" > < a id = "l00268" name = "l00268" > < / a > < span class = "lineno" > 268< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00269" name = "l00269" > < / a > < span class = "lineno" > 269< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > ii = 0; ii < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aef0ea2387e1ff5767bff8563b2d36bd6" > kElemsPerFrag< / a > ; ++ii) {< / div >
< div class = "line" > < a id = "l00270" name = "l00270" > < / a > < span class = "lineno" > 270< / span > val_mat.thread_elements()[ii] = < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j)[ii];< / div >
< div class = "line" > < a id = "l00271" name = "l00271" > < / a > < span class = "lineno" > 271< / span > }< / div >
< div class = "line" > < a id = "l00272" name = "l00272" > < / a > < span class = "lineno" > 272< / span > < span class = "keywordflow" > return< / span > val_mat;< / div >
< div class = "line" > < a id = "l00273" name = "l00273" > < / a > < span class = "lineno" > 273< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00274" name = "l00274" > < / a > < span class = "lineno" > 274< / span > < / div >
< div class = "foldopen" id = "foldopen00275" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00275" name = "l00275" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a865ece5ad0b9a56937b6d77a18b5a1dc" > 275< / a > < / span > METAL_FUNC thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c" > elem_type< / a > * < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a865ece5ad0b9a56937b6d77a18b5a1dc" > elems< / a > () {< / div >
< div class = "line" > < a id = "l00276" name = "l00276" > < / a > < span class = "lineno" > 276< / span > < span class = "keywordflow" > return< / span > < span class = "keyword" > reinterpret_cast< < / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c" > elem_type< / a > *< span class = "keyword" > > < / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > val_frags< / a > );< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00277" name = "l00277" > < / a > < span class = "lineno" > 277< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00278" name = "l00278" > < / a > < span class = "lineno" > 278< / span > < / div >
< div class = "foldopen" id = "foldopen00279" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00279" name = "l00279" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae21bb7cce701290de84c6015e064d8a1" > 279< / a > < / span > METAL_FUNC < span class = "keyword" > const< / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c" > elem_type< / a > * < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae21bb7cce701290de84c6015e064d8a1" > elems< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00280" name = "l00280" > < / a > < span class = "lineno" > 280< / span > < span class = "keywordflow" > return< / span > < span class = "keyword" > reinterpret_cast< < / span > < span class = "keyword" > const < / span > thread < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c" > elem_type< / a > *< span class = "keyword" > > < / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > val_frags< / a > );< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00281" name = "l00281" > < / a > < span class = "lineno" > 281< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00282" name = "l00282" > < / a > < span class = "lineno" > 282< / span > < / div >
< div class = "line" > < a id = "l00283" name = "l00283" > < / a > < span class = "lineno" > 283< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > Op> < / div >
< div class = "foldopen" id = "foldopen00284" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00284" name = "l00284" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa0ad5cb750ace934bf230385d8bd9f88" > 284< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa0ad5cb750ace934bf230385d8bd9f88" > row_reduce< / a > (thread T vals[< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a5b1d1c85a5046108a4e38bdc5a0ea74e" > kRowsPerThread< / a > ])< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00285" name = "l00285" > < / a > < span class = "lineno" > 285< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00286" name = "l00286" > < / a > < span class = "lineno" > 286< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00287" name = "l00287" > < / a > < span class = "lineno" > 287< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00288" name = "l00288" > < / a > < span class = "lineno" > 288< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00289" name = "l00289" > < / a > < span class = "lineno" > 289< / span > MMAFrag_t::template < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa0ad5cb750ace934bf230385d8bd9f88" > row_reduce< Op> < / a > (< / div >
< div class = "line" > < a id = "l00290" name = "l00290" > < / a > < span class = "lineno" > 290< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j), & vals[i * MMAFrag_t::kElemRows]);< / div >
< div class = "line" > < a id = "l00291" name = "l00291" > < / a > < span class = "lineno" > 291< / span > }< / div >
< div class = "line" > < a id = "l00292" name = "l00292" > < / a > < span class = "lineno" > 292< / span > }< / div >
< div class = "line" > < a id = "l00293" name = "l00293" > < / a > < span class = "lineno" > 293< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00294" name = "l00294" > < / a > < span class = "lineno" > 294< / span > < / div >
< div class = "line" > < a id = "l00295" name = "l00295" > < / a > < span class = "lineno" > 295< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > Op> < / div >
< div class = "foldopen" id = "foldopen00296" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00296" name = "l00296" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a3d0d5b9c7962658cc6d5afbbbb2f19e2" > 296< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a3d0d5b9c7962658cc6d5afbbbb2f19e2" > row_bin_op< / a > (thread T vals[< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a5b1d1c85a5046108a4e38bdc5a0ea74e" > kRowsPerThread< / a > ]) {< / div >
< div class = "line" > < a id = "l00297" name = "l00297" > < / a > < span class = "lineno" > 297< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00298" name = "l00298" > < / a > < span class = "lineno" > 298< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00299" name = "l00299" > < / a > < span class = "lineno" > 299< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00300" name = "l00300" > < / a > < span class = "lineno" > 300< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00301" name = "l00301" > < / a > < span class = "lineno" > 301< / span > MMAFrag_t::template < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a3d0d5b9c7962658cc6d5afbbbb2f19e2" > row_bin_op< Op> < / a > (< / div >
< div class = "line" > < a id = "l00302" name = "l00302" > < / a > < span class = "lineno" > 302< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j), & vals[i * MMAFrag_t::kElemRows]);< / div >
< div class = "line" > < a id = "l00303" name = "l00303" > < / a > < span class = "lineno" > 303< / span > }< / div >
< div class = "line" > < a id = "l00304" name = "l00304" > < / a > < span class = "lineno" > 304< / span > }< / div >
< div class = "line" > < a id = "l00305" name = "l00305" > < / a > < span class = "lineno" > 305< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00306" name = "l00306" > < / a > < span class = "lineno" > 306< / span > < / div >
< div class = "line" > < a id = "l00307" name = "l00307" > < / a > < span class = "lineno" > 307< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > U, < span class = "keywordtype" > int< / span > w_x, < span class = "keywordtype" > int< / span > w_y, < span class = "keywordtype" > int< / span > str_x, < span class = "keywordtype" > int< / span > str_y> < / div >
< div class = "foldopen" id = "foldopen00308" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00308" name = "l00308" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa5426c6beabfb3ee41b58f01b3392a96" > 308< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa5426c6beabfb3ee41b58f01b3392a96" > load< / a > (< span class = "keyword" > const< / span > threadgroup U* src) {< / div >
< div class = "line" > < a id = "l00309" name = "l00309" > < / a > < span class = "lineno" > 309< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00310" name = "l00310" > < / a > < span class = "lineno" > 310< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00311" name = "l00311" > < / a > < span class = "lineno" > 311< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00312" name = "l00312" > < / a > < span class = "lineno" > 312< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00313" name = "l00313" > < / a > < span class = "lineno" > 313< / span > MMAFrag_t::load(< / div >
< div class = "line" > < a id = "l00314" name = "l00314" > < / a > < span class = "lineno" > 314< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j),< / div >
< div class = "line" > < a id = "l00315" name = "l00315" > < / a > < span class = "lineno" > 315< / span > & (< / div >
< div class = "line" > < a id = "l00316" name = "l00316" > < / a > < span class = "lineno" > 316< / span > src[(i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > ) * w_x * str_x +< / div >
< div class = "line" > < a id = "l00317" name = "l00317" > < / a > < span class = "lineno" > 317< / span > (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > ) * w_y * str_y]),< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00318" name = "l00318" > < / a > < span class = "lineno" > 318< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< str_x> < / a > {},< / div >
< div class = "line" > < a id = "l00319" name = "l00319" > < / a > < span class = "lineno" > 319< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< str_y> < / a > {});< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00320" name = "l00320" > < / a > < span class = "lineno" > 320< / span > }< / div >
< div class = "line" > < a id = "l00321" name = "l00321" > < / a > < span class = "lineno" > 321< / span > }< / div >
< div class = "line" > < a id = "l00322" name = "l00322" > < / a > < span class = "lineno" > 322< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00323" name = "l00323" > < / a > < span class = "lineno" > 323< / span > < / div >
< div class = "line" > < a id = "l00324" name = "l00324" > < / a > < span class = "lineno" > 324< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > U, < span class = "keywordtype" > int< / span > w_x, < span class = "keywordtype" > int< / span > w_y, < span class = "keywordtype" > int< / span > str_x, < span class = "keywordtype" > int< / span > str_y> < / div >
< div class = "foldopen" id = "foldopen00325" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00325" name = "l00325" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a2aadaa3239cb3af0c2ee8af9b88c8a98" > 325< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a2aadaa3239cb3af0c2ee8af9b88c8a98" > store< / a > (threadgroup U* dst)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00326" name = "l00326" > < / a > < span class = "lineno" > 326< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00327" name = "l00327" > < / a > < span class = "lineno" > 327< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00328" name = "l00328" > < / a > < span class = "lineno" > 328< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00329" name = "l00329" > < / a > < span class = "lineno" > 329< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00330" name = "l00330" > < / a > < span class = "lineno" > 330< / span > MMAFrag_t::store(< / div >
< div class = "line" > < a id = "l00331" name = "l00331" > < / a > < span class = "lineno" > 331< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j),< / div >
< div class = "line" > < a id = "l00332" name = "l00332" > < / a > < span class = "lineno" > 332< / span > & (< / div >
< div class = "line" > < a id = "l00333" name = "l00333" > < / a > < span class = "lineno" > 333< / span > dst[(i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > ) * w_x * str_x +< / div >
< div class = "line" > < a id = "l00334" name = "l00334" > < / a > < span class = "lineno" > 334< / span > (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > ) * w_y * str_y]),< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00335" name = "l00335" > < / a > < span class = "lineno" > 335< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< str_x> < / a > {},< / div >
< div class = "line" > < a id = "l00336" name = "l00336" > < / a > < span class = "lineno" > 336< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< str_y> < / a > {});< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00337" name = "l00337" > < / a > < span class = "lineno" > 337< / span > }< / div >
< div class = "line" > < a id = "l00338" name = "l00338" > < / a > < span class = "lineno" > 338< / span > }< / div >
< div class = "line" > < a id = "l00339" name = "l00339" > < / a > < span class = "lineno" > 339< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00340" name = "l00340" > < / a > < span class = "lineno" > 340< / span > < / div >
< div class = "line" > < a id = "l00341" name = "l00341" > < / a > < span class = "lineno" > 341< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > U, < span class = "keywordtype" > int< / span > w_x, < span class = "keywordtype" > int< / span > w_y> < / div >
< div class = "foldopen" id = "foldopen00342" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00342" name = "l00342" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa9e484d8cae936503898d5b772c573f9" > 342< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa9e484d8cae936503898d5b772c573f9" > load< / a > (< span class = "keyword" > const< / span > device U* src, < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ld) {< / div >
< div class = "line" > < a id = "l00343" name = "l00343" > < / a > < span class = "lineno" > 343< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00344" name = "l00344" > < / a > < span class = "lineno" > 344< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00345" name = "l00345" > < / a > < span class = "lineno" > 345< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00346" name = "l00346" > < / a > < span class = "lineno" > 346< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00347" name = "l00347" > < / a > < span class = "lineno" > 347< / span > MMAFrag_t::load(< / div >
< div class = "line" > < a id = "l00348" name = "l00348" > < / a > < span class = "lineno" > 348< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j),< / div >
< div class = "line" > < a id = "l00349" name = "l00349" > < / a > < span class = "lineno" > 349< / span > & (src[(i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > ) * w_x * ld + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > ) * w_y]),< / div >
< div class = "line" > < a id = "l00350" name = "l00350" > < / a > < span class = "lineno" > 350< / span > ld,< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00351" name = "l00351" > < / a > < span class = "lineno" > 351< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 1> < / a > {});< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00352" name = "l00352" > < / a > < span class = "lineno" > 352< / span > }< / div >
< div class = "line" > < a id = "l00353" name = "l00353" > < / a > < span class = "lineno" > 353< / span > }< / div >
< div class = "line" > < a id = "l00354" name = "l00354" > < / a > < span class = "lineno" > 354< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00355" name = "l00355" > < / a > < span class = "lineno" > 355< / span > < / div >
< div class = "line" > < a id = "l00356" name = "l00356" > < / a > < span class = "lineno" > 356< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > U, < span class = "keywordtype" > int< / span > w_x, < span class = "keywordtype" > int< / span > w_y> < / div >
< div class = "foldopen" id = "foldopen00357" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00357" name = "l00357" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a752f708e4fe5ef37fdd902dae153179f" > 357< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a752f708e4fe5ef37fdd902dae153179f" > store< / a > (device U* dst, < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ld)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00358" name = "l00358" > < / a > < span class = "lineno" > 358< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00359" name = "l00359" > < / a > < span class = "lineno" > 359< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00360" name = "l00360" > < / a > < span class = "lineno" > 360< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00361" name = "l00361" > < / a > < span class = "lineno" > 361< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00362" name = "l00362" > < / a > < span class = "lineno" > 362< / span > MMAFrag_t::store(< / div >
< div class = "line" > < a id = "l00363" name = "l00363" > < / a > < span class = "lineno" > 363< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j),< / div >
< div class = "line" > < a id = "l00364" name = "l00364" > < / a > < span class = "lineno" > 364< / span > & (dst[(i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > ) * w_x * ld + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > ) * w_y]),< / div >
< div class = "line" > < a id = "l00365" name = "l00365" > < / a > < span class = "lineno" > 365< / span > ld,< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00366" name = "l00366" > < / a > < span class = "lineno" > 366< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 1> < / a > {});< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00367" name = "l00367" > < / a > < span class = "lineno" > 367< / span > }< / div >
< div class = "line" > < a id = "l00368" name = "l00368" > < / a > < span class = "lineno" > 368< / span > }< / div >
< div class = "line" > < a id = "l00369" name = "l00369" > < / a > < span class = "lineno" > 369< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00370" name = "l00370" > < / a > < span class = "lineno" > 370< / span > < / div >
< div class = "line" > < a id = "l00371" name = "l00371" > < / a > < span class = "lineno" > 371< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > U, < span class = "keywordtype" > int< / span > w_x, < span class = "keywordtype" > int< / span > w_y> < / div >
< div class = "line" > < a id = "l00372" name = "l00372" > < / a > < span class = "lineno" > 372< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00373" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00373" name = "l00373" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa3a4af67813908109da08ce7352f82da" > 373< / a > < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa3a4af67813908109da08ce7352f82da" > load_safe< / a > (< span class = "keyword" > const< / span > device U* src, < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ld, < span class = "keyword" > const< / span > short2 src_tile_dims) {< / div >
< div class = "line" > < a id = "l00374" name = "l00374" > < / a > < span class = "lineno" > 374< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00375" name = "l00375" > < / a > < span class = "lineno" > 375< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00376" name = "l00376" > < / a > < span class = "lineno" > 376< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00377" name = "l00377" > < / a > < span class = "lineno" > 377< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00378" name = "l00378" > < / a > < span class = "lineno" > 378< / span > MMAFrag_t::load_safe(< / div >
< div class = "line" > < a id = "l00379" name = "l00379" > < / a > < span class = "lineno" > 379< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j),< / div >
< div class = "line" > < a id = "l00380" name = "l00380" > < / a > < span class = "lineno" > 380< / span > src,< / div >
< div class = "line" > < a id = "l00381" name = "l00381" > < / a > < span class = "lineno" > 381< / span > ld,< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00382" name = "l00382" > < / a > < span class = "lineno" > 382< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 1> < / a > {},< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00383" name = "l00383" > < / a > < span class = "lineno" > 383< / span > src_tile_dims.y,< / div >
< div class = "line" > < a id = "l00384" name = "l00384" > < / a > < span class = "lineno" > 384< / span > src_tile_dims.x,< / div >
< div class = "line" > < a id = "l00385" name = "l00385" > < / a > < span class = "lineno" > 385< / span > (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > ) * w_x,< / div >
< div class = "line" > < a id = "l00386" name = "l00386" > < / a > < span class = "lineno" > 386< / span > (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > ) * w_y);< / div >
< div class = "line" > < a id = "l00387" name = "l00387" > < / a > < span class = "lineno" > 387< / span > }< / div >
< div class = "line" > < a id = "l00388" name = "l00388" > < / a > < span class = "lineno" > 388< / span > }< / div >
< div class = "line" > < a id = "l00389" name = "l00389" > < / a > < span class = "lineno" > 389< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00390" name = "l00390" > < / a > < span class = "lineno" > 390< / span > < / div >
< div class = "line" > < a id = "l00391" name = "l00391" > < / a > < span class = "lineno" > 391< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > U, < span class = "keywordtype" > int< / span > w_x, < span class = "keywordtype" > int< / span > w_y> < / div >
< div class = "line" > < a id = "l00392" name = "l00392" > < / a > < span class = "lineno" > 392< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00393" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00393" name = "l00393" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a57703f522c7409dbe2c0a68bb7acc2ba" > 393< / a > < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a57703f522c7409dbe2c0a68bb7acc2ba" > store_safe< / a > (device U* dst, < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ld, < span class = "keyword" > const< / span > short2 dst_tile_dims)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00394" name = "l00394" > < / a > < span class = "lineno" > 394< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00395" name = "l00395" > < / a > < span class = "lineno" > 395< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > kTileRows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00396" name = "l00396" > < / a > < span class = "lineno" > 396< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00397" name = "l00397" > < / a > < span class = "lineno" > 397< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > kTileCols< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00398" name = "l00398" > < / a > < span class = "lineno" > 398< / span > MMAFrag_t::store_safe(< / div >
< div class = "line" > < a id = "l00399" name = "l00399" > < / a > < span class = "lineno" > 399< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > frag_at< / a > (i, j),< / div >
< div class = "line" > < a id = "l00400" name = "l00400" > < / a > < span class = "lineno" > 400< / span > dst,< / div >
< div class = "line" > < a id = "l00401" name = "l00401" > < / a > < span class = "lineno" > 401< / span > ld,< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00402" name = "l00402" > < / a > < span class = "lineno" > 402< / span > < a class = "code hl_typedef" href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > Int< 1> < / a > {},< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00403" name = "l00403" > < / a > < span class = "lineno" > 403< / span > dst_tile_dims.y,< / div >
< div class = "line" > < a id = "l00404" name = "l00404" > < / a > < span class = "lineno" > 404< / span > dst_tile_dims.x,< / div >
< div class = "line" > < a id = "l00405" name = "l00405" > < / a > < span class = "lineno" > 405< / span > (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > kFragRows< / a > ) * w_x,< / div >
< div class = "line" > < a id = "l00406" name = "l00406" > < / a > < span class = "lineno" > 406< / span > (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > kFragCols< / a > ) * w_y);< / div >
< div class = "line" > < a id = "l00407" name = "l00407" > < / a > < span class = "lineno" > 407< / span > }< / div >
< div class = "line" > < a id = "l00408" name = "l00408" > < / a > < span class = "lineno" > 408< / span > }< / div >
< div class = "line" > < a id = "l00409" name = "l00409" > < / a > < span class = "lineno" > 409< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00410" name = "l00410" > < / a > < span class = "lineno" > 410< / span > };< / div >
< div class = "line" > < a id = "l00411" name = "l00411" > < / a > < span class = "lineno" > 411< / span > < / div >
< div class = "line" > < a id = "l00412" name = "l00412" > < / a > < span class = "lineno" > 412< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keyword" > typename< / span > U, < span class = "keywordtype" > int< / span > M, < span class = "keywordtype" > int< / span > N, < span class = "keywordtype" > int< / span > K> < / div >
< div class = "foldopen" id = "foldopen00413" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00413" name = "l00413" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1steel.html#ad583e6038efc119542410f43b603d4ad" > 413< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "namespacemlx_1_1steel.html#ad583e6038efc119542410f43b603d4ad" > tile_matmad< / a > (< / div >
< div class = "line" > < a id = "l00414" name = "l00414" > < / a > < span class = "lineno" > 414< / span > thread < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< T, M, N> < / a > & D,< / div >
< div class = "line" > < a id = "l00415" name = "l00415" > < / a > < span class = "lineno" > 415< / span > thread < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< U, M, K> < / a > & A,< / div >
< div class = "line" > < a id = "l00416" name = "l00416" > < / a > < span class = "lineno" > 416< / span > thread < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< U, K, N> < / a > & B,< / div >
< div class = "line" > < a id = "l00417" name = "l00417" > < / a > < span class = "lineno" > 417< / span > thread < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< T, M, N> < / a > & C) {< / div >
< div class = "line" > < a id = "l00418" name = "l00418" > < / a > < span class = "lineno" > 418< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00419" name = "l00419" > < / a > < span class = "lineno" > 419< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > k = 0; k < K; ++k) {< / div >
< div class = "line" > < a id = "l00420" name = "l00420" > < / a > < span class = "lineno" > 420< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00421" name = "l00421" > < / a > < span class = "lineno" > 421< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > m = 0; m < M; ++m) {< / div >
< div class = "line" > < a id = "l00422" name = "l00422" > < / a > < span class = "lineno" > 422< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00423" name = "l00423" > < / a > < span class = "lineno" > 423< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > n = 0; n < N; ++n) {< / div >
< div class = "line" > < a id = "l00424" name = "l00424" > < / a > < span class = "lineno" > 424< / span > < span class = "keywordtype" > short< / span > n_serp = (m % 2) ? (N - 1 - n) : n;< / div >
< div class = "line" > < a id = "l00425" name = "l00425" > < / a > < span class = "lineno" > 425< / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< T, M, N> ::MMAFrag_t::mma< / a > (< / div >
< div class = "line" > < a id = "l00426" name = "l00426" > < / a > < span class = "lineno" > 426< / span > D.frag_at(m, n_serp),< / div >
< div class = "line" > < a id = "l00427" name = "l00427" > < / a > < span class = "lineno" > 427< / span > A.frag_at(m, k),< / div >
< div class = "line" > < a id = "l00428" name = "l00428" > < / a > < span class = "lineno" > 428< / span > B.frag_at(k, n_serp),< / div >
< div class = "line" > < a id = "l00429" name = "l00429" > < / a > < span class = "lineno" > 429< / span > C.frag_at(m, n_serp));< / div >
< div class = "line" > < a id = "l00430" name = "l00430" > < / a > < span class = "lineno" > 430< / span > }< / div >
< div class = "line" > < a id = "l00431" name = "l00431" > < / a > < span class = "lineno" > 431< / span > }< / div >
< div class = "line" > < a id = "l00432" name = "l00432" > < / a > < span class = "lineno" > 432< / span > }< / div >
< div class = "line" > < a id = "l00433" name = "l00433" > < / a > < span class = "lineno" > 433< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00434" name = "l00434" > < / a > < span class = "lineno" > 434< / span > < / div >
< div class = "line" > < a id = "l00435" name = "l00435" > < / a > < span class = "lineno" > 435< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00436" name = "l00436" > < / a > < span class = "lineno" > 436< / span > < span class = "keyword" > typename< / span > T,< / div >
< div class = "line" > < a id = "l00437" name = "l00437" > < / a > < span class = "lineno" > 437< / span > < span class = "keyword" > typename< / span > U,< / div >
< div class = "line" > < a id = "l00438" name = "l00438" > < / a > < span class = "lineno" > 438< / span > < span class = "keywordtype" > int< / span > BM,< / div >
< div class = "line" > < a id = "l00439" name = "l00439" > < / a > < span class = "lineno" > 439< / span > < span class = "keywordtype" > int< / span > BN,< / div >
< div class = "line" > < a id = "l00440" name = "l00440" > < / a > < span class = "lineno" > 440< / span > < span class = "keywordtype" > int< / span > BK,< / div >
< div class = "line" > < a id = "l00441" name = "l00441" > < / a > < span class = "lineno" > 441< / span > < span class = "keywordtype" > int< / span > WM,< / div >
< div class = "line" > < a id = "l00442" name = "l00442" > < / a > < span class = "lineno" > 442< / span > < span class = "keywordtype" > int< / span > WN,< / div >
< div class = "line" > < a id = "l00443" name = "l00443" > < / a > < span class = "lineno" > 443< / span > < span class = "keywordtype" > bool< / span > transpose_a,< / div >
< div class = "line" > < a id = "l00444" name = "l00444" > < / a > < span class = "lineno" > 444< / span > < span class = "keywordtype" > bool< / span > transpose_b,< / div >
< div class = "line" > < a id = "l00445" name = "l00445" > < / a > < span class = "lineno" > 445< / span > < span class = "keywordtype" > short< / span > lda_tgp,< / div >
< div class = "line" > < a id = "l00446" name = "l00446" > < / a > < span class = "lineno" > 446< / span > < span class = "keywordtype" > short< / span > ldb_tgp,< / div >
< div class = "line" > < a id = "l00447" name = "l00447" > < / a > < span class = "lineno" > 447< / span > < span class = "keyword" > typename< / span > AccumType = float,< / div >
< div class = "line" > < a id = "l00448" name = "l00448" > < / a > < span class = "lineno" > 448< / span > < span class = "keyword" > typename< / span > Epilogue = TransformNone< U, AccumType> > < / div >
< div class = "line" > < a id = "l00449" name = "l00449" > < / a > < span class = "lineno" > 449< / span > < span class = "keyword" > struct < / span > BlockMMA {< / div >
< div class = "line" > < a id = "l00450" name = "l00450" > < / a > < span class = "lineno" > 450< / span > < span class = "comment" > // MMAFrag size< / span > < / div >
< div class = "line" > < a id = "l00451" name = "l00451" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > 451< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > = 8;< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00452" name = "l00452" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ae2c42cb6d0dde785859164c195f4d13c" > 452< / a > < / span > < span class = "keyword" > using < / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ae2c42cb6d0dde785859164c195f4d13c" > MMAFrag_acc_t< / a > = < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html" > BaseMMAFrag< AccumType, kFragSize, kFragSize> < / a > ;< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00453" name = "l00453" > < / a > < span class = "lineno" > 453< / span > < / div >
< div class = "line" > < a id = "l00454" name = "l00454" > < / a > < span class = "lineno" > 454< / span > < span class = "comment" > // Warp tile simdgroup matrix strides along M< / span > < / div >
< div class = "line" > < a id = "l00455" name = "l00455" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > 455< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > * WM;< / div >
< div class = "line" > < a id = "l00456" name = "l00456" > < / a > < span class = "lineno" > 456< / span > < span class = "comment" > // Warp tile simdgroup matrix strides along M< / span > < / div >
< div class = "line" > < a id = "l00457" name = "l00457" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > 457< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > * WN;< / div >
< div class = "line" > < a id = "l00458" name = "l00458" > < / a > < span class = "lineno" > 458< / span > < / div >
< div class = "line" > < a id = "l00459" name = "l00459" > < / a > < span class = "lineno" > 459< / span > < span class = "comment" > // Warp tile size along M< / span > < / div >
< div class = "line" > < a id = "l00460" name = "l00460" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aba5f749fdf32d8bd9d9e29f2a9ae4591" > 460< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aba5f749fdf32d8bd9d9e29f2a9ae4591" > TM< / a > = BM / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > ;< / div >
< div class = "line" > < a id = "l00461" name = "l00461" > < / a > < span class = "lineno" > 461< / span > < span class = "comment" > // Warp tile size along N< / span > < / div >
< div class = "line" > < a id = "l00462" name = "l00462" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a706ae779c1f8d2eb18f19c248567d424" > 462< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a706ae779c1f8d2eb18f19c248567d424" > TN< / a > = BN / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > ;< / div >
< div class = "line" > < a id = "l00463" name = "l00463" > < / a > < span class = "lineno" > 463< / span > < / div >
< div class = "line" > < a id = "l00464" name = "l00464" > < / a > < span class = "lineno" > 464< / span > < span class = "comment" > // Threadgroup A strides< / span > < / div >
< div class = "line" > < a id = "l00465" name = "l00465" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ab9c7f5386594497f5f4df7e59670b877" > 465< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ab9c7f5386594497f5f4df7e59670b877" > A_str_m< / a > = transpose_a ? 1 : lda_tgp; < span class = "comment" > // M< / span > < / div >
< div class = "line" > < a id = "l00466" name = "l00466" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a257287702dc849d0d8a078fced453142" > 466< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a257287702dc849d0d8a078fced453142" > A_str_k< / a > = transpose_a ? lda_tgp : 1; < span class = "comment" > // K< / span > < / div >
< div class = "line" > < a id = "l00467" name = "l00467" > < / a > < span class = "lineno" > 467< / span > < / div >
< div class = "line" > < a id = "l00468" name = "l00468" > < / a > < span class = "lineno" > 468< / span > < span class = "comment" > // Threadgroup B strides< / span > < / div >
< div class = "line" > < a id = "l00469" name = "l00469" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa71400922babd388177f228c2c82b211" > 469< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa71400922babd388177f228c2c82b211" > B_str_k< / a > = transpose_b ? 1 : ldb_tgp; < span class = "comment" > // K< / span > < / div >
< div class = "line" > < a id = "l00470" name = "l00470" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a49538190209e522ddbef45fe95563d17" > 470< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a49538190209e522ddbef45fe95563d17" > B_str_n< / a > = transpose_b ? ldb_tgp : 1; < span class = "comment" > // N< / span > < / div >
< div class = "line" > < a id = "l00471" name = "l00471" > < / a > < span class = "lineno" > 471< / span > < / div >
< div class = "line" > < a id = "l00472" name = "l00472" > < / a > < span class = "lineno" > 472< / span > < span class = "comment" > // Threadgroup strides along K< / span > < / div >
< div class = "line" > < a id = "l00473" name = "l00473" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8fddaa78913cdc8eea5e1cf7d2776330" > 473< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8fddaa78913cdc8eea5e1cf7d2776330" > tile_stride_a< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a257287702dc849d0d8a078fced453142" > A_str_k< / a > ;< / div >
< div class = "line" > < a id = "l00474" name = "l00474" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ae3f35453b3afbaac9df64ad5966b34a4" > 474< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ae3f35453b3afbaac9df64ad5966b34a4" > tile_stride_b< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa71400922babd388177f228c2c82b211" > B_str_k< / a > ;< / div >
< div class = "line" > < a id = "l00475" name = "l00475" > < / a > < span class = "lineno" > 475< / span > < / div >
< div class = "line" > < a id = "l00476" name = "l00476" > < / a > < span class = "lineno" > 476< / span > < span class = "comment" > // Simdgroup matrices< / span > < / div >
< div class = "line" > < a id = "l00477" name = "l00477" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a47e614120c650f7479db79f23a0df586" > 477< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< AccumType, TM, 1, MMAFrag_acc_t> < / a > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a47e614120c650f7479db79f23a0df586" > Atile< / a > ;< / div >
< div class = "line" > < a id = "l00478" name = "l00478" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a44fca27c821764317263047a780977b0" > 478< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< AccumType, 1, TN, MMAFrag_acc_t> < / a > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a44fca27c821764317263047a780977b0" > Btile< / a > ;< / div >
< div class = "line" > < a id = "l00479" name = "l00479" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > 479< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > MMATile< AccumType, TM, TN, MMAFrag_acc_t> < / a > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > ;< / div >
< div class = "line" > < a id = "l00480" name = "l00480" > < / a > < span class = "lineno" > 480< / span > < / div >
< div class = "line" > < a id = "l00481" name = "l00481" > < / a > < span class = "lineno" > 481< / span > < span class = "comment" > // Offsets within threadgroup< / span > < / div >
< div class = "line" > < a id = "l00482" name = "l00482" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > 482< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > ;< / div >
< div class = "line" > < a id = "l00483" name = "l00483" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > 483< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > ;< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00484" name = "l00484" > < / a > < span class = "lineno" > 484< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00485" name = "l00485" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a138ed1bbad2ca88d3a3c7d162cd36562" > 485< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a138ed1bbad2ca88d3a3c7d162cd36562" > As_offset< / a > ;< / div >
< div class = "line" > < a id = "l00486" name = "l00486" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a92f6aeee432f53638447eac842f43eca" > 486< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a92f6aeee432f53638447eac842f43eca" > Bs_offset< / a > ;< / div >
< div class = "line" > < a id = "l00487" name = "l00487" > < / a > < span class = "lineno" > 487< / span > < / div >
< div class = "line" > < a id = "l00488" name = "l00488" > < / a > < span class = "lineno" > 488< / span > < span class = "comment" > /* Constructor */< / span > < / div >
< div class = "foldopen" id = "foldopen00489" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00489" name = "l00489" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa14406b7298456ac45d23dd3c4642dd8" > 489< / a > < / span > METAL_FUNC < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa14406b7298456ac45d23dd3c4642dd8" > BlockMMA< / a > (< / div >
< div class = "line" > < a id = "l00490" name = "l00490" > < / a > < span class = "lineno" > 490< / span > ushort simd_group_id [[simdgroup_index_in_threadgroup]],< / div >
< div class = "line" > < a id = "l00491" name = "l00491" > < / a > < span class = "lineno" > 491< / span > ushort simd_lane_id [[thread_index_in_simdgroup]]) {< / div >
< div class = "line" > < a id = "l00492" name = "l00492" > < / a > < span class = "lineno" > 492< / span > < span class = "comment" > // Determine thread position in simdgroup matrix< / span > < / div >
< div class = "line" > < a id = "l00493" name = "l00493" > < / a > < span class = "lineno" > 493< / span > < span class = "keywordtype" > short< / span > tm = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > * (simd_group_id / WN);< / div >
< div class = "line" > < a id = "l00494" name = "l00494" > < / a > < span class = "lineno" > 494< / span > < span class = "keywordtype" > short< / span > tn = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > * (simd_group_id % WN);< / div >
< div class = "line" > < a id = "l00495" name = "l00495" > < / a > < span class = "lineno" > 495< / span > < / div >
< div class = "line" > < a id = "l00496" name = "l00496" > < / a > < span class = "lineno" > 496< / span > short2 simd_coord = MMAFrag_acc_t::get_coord(simd_lane_id);< / div >
< div class = "line" > < a id = "l00497" name = "l00497" > < / a > < span class = "lineno" > 497< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > = simd_coord.y;< / div >
< div class = "line" > < a id = "l00498" name = "l00498" > < / a > < span class = "lineno" > 498< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > = simd_coord.x;< / div >
< div class = "line" > < a id = "l00499" name = "l00499" > < / a > < span class = "lineno" > 499< / span > < / div >
< div class = "line" > < a id = "l00500" name = "l00500" > < / a > < span class = "lineno" > 500< / span > < span class = "comment" > // Determine thread and simdgroup offset< / span > < / div >
< div class = "line" > < a id = "l00501" name = "l00501" > < / a > < span class = "lineno" > 501< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a138ed1bbad2ca88d3a3c7d162cd36562" > As_offset< / a > = (tm + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > ) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ab9c7f5386594497f5f4df7e59670b877" > A_str_m< / a > + (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > )*< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a257287702dc849d0d8a078fced453142" > A_str_k< / a > ; < span class = "comment" > // M, K< / span > < / div >
< div class = "line" > < a id = "l00502" name = "l00502" > < / a > < span class = "lineno" > 502< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a92f6aeee432f53638447eac842f43eca" > Bs_offset< / a > = (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > )*< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa71400922babd388177f228c2c82b211" > B_str_k< / a > + (tn + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > ) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a49538190209e522ddbef45fe95563d17" > B_str_n< / a > ; < span class = "comment" > // K, N< / span > < / div >
< div class = "line" > < a id = "l00503" name = "l00503" > < / a > < span class = "lineno" > 503< / span > < / div >
< div class = "line" > < a id = "l00504" name = "l00504" > < / a > < span class = "lineno" > 504< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > += tm;< / div >
< div class = "line" > < a id = "l00505" name = "l00505" > < / a > < span class = "lineno" > 505< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > += tn;< / div >
< div class = "line" > < a id = "l00506" name = "l00506" > < / a > < span class = "lineno" > 506< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00507" name = "l00507" > < / a > < span class = "lineno" > 507< / span > < / div >
< div class = "line" > < a id = "l00508" name = "l00508" > < / a > < span class = "lineno" > 508< / span > < span class = "comment" > /* (BM, BK) X (BK, BN) multiply accumulate function */< / span > < / div >
< div class = "foldopen" id = "foldopen00509" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00509" name = "l00509" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a6a2c2a6d5e767d52c41b42a9d36086b0" > 509< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a6a2c2a6d5e767d52c41b42a9d36086b0" > mma< / a > (< span class = "keyword" > const< / span > threadgroup T* As, < span class = "keyword" > const< / span > threadgroup T* Bs) {< / div >
< div class = "line" > < a id = "l00510" name = "l00510" > < / a > < span class = "lineno" > 510< / span > < span class = "comment" > // Adjust for simdgroup and thread location< / span > < / div >
< div class = "line" > < a id = "l00511" name = "l00511" > < / a > < span class = "lineno" > 511< / span > As += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a138ed1bbad2ca88d3a3c7d162cd36562" > As_offset< / a > ;< / div >
< div class = "line" > < a id = "l00512" name = "l00512" > < / a > < span class = "lineno" > 512< / span > Bs += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a92f6aeee432f53638447eac842f43eca" > Bs_offset< / a > ;< / div >
< div class = "line" > < a id = "l00513" name = "l00513" > < / a > < span class = "lineno" > 513< / span > < / div >
< div class = "line" > < a id = "l00514" name = "l00514" > < / a > < span class = "lineno" > 514< / span > < span class = "comment" > // Iterate over BK in blocks of kFragSize< / span > < / div >
< div class = "line" > < a id = "l00515" name = "l00515" > < / a > < span class = "lineno" > 515< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00516" name = "l00516" > < / a > < span class = "lineno" > 516< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > kk = 0; kk < BK; kk += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > kFragSize< / a > ) {< / div >
< div class = "line" > < a id = "l00517" name = "l00517" > < / a > < span class = "lineno" > 517< / span > simdgroup_barrier(mem_flags::mem_none);< / div >
< div class = "line" > < a id = "l00518" name = "l00518" > < / a > < span class = "lineno" > 518< / span > < / div >
< div class = "line" > < a id = "l00519" name = "l00519" > < / a > < span class = "lineno" > 519< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a47e614120c650f7479db79f23a0df586" > Atile< / a > .template load< T, WM, 1, A_str_m, A_str_k> (As);< / div >
< div class = "line" > < a id = "l00520" name = "l00520" > < / a > < span class = "lineno" > 520< / span > < / div >
< div class = "line" > < a id = "l00521" name = "l00521" > < / a > < span class = "lineno" > 521< / span > simdgroup_barrier(mem_flags::mem_none);< / div >
< div class = "line" > < a id = "l00522" name = "l00522" > < / a > < span class = "lineno" > 522< / span > < / div >
< div class = "line" > < a id = "l00523" name = "l00523" > < / a > < span class = "lineno" > 523< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a44fca27c821764317263047a780977b0" > Btile< / a > .template load< T, 1, WN, B_str_k, B_str_n> (Bs);< / div >
< div class = "line" > < a id = "l00524" name = "l00524" > < / a > < span class = "lineno" > 524< / span > < / div >
< div class = "line" > < a id = "l00525" name = "l00525" > < / a > < span class = "lineno" > 525< / span > simdgroup_barrier(mem_flags::mem_none);< / div >
< div class = "line" > < a id = "l00526" name = "l00526" > < / a > < span class = "lineno" > 526< / span > < / div >
< div class = "line" > < a id = "l00527" name = "l00527" > < / a > < span class = "lineno" > 527< / span > < a class = "code hl_function" href = "namespacemlx_1_1steel.html#ad583e6038efc119542410f43b603d4ad" > tile_matmad< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > , < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a47e614120c650f7479db79f23a0df586" > Atile< / a > , < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a44fca27c821764317263047a780977b0" > Btile< / a > , < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > );< / div >
< div class = "line" > < a id = "l00528" name = "l00528" > < / a > < span class = "lineno" > 528< / span > < / div >
< div class = "line" > < a id = "l00529" name = "l00529" > < / a > < span class = "lineno" > 529< / span > < span class = "comment" > // Progress to next simdgroup tile< / span > < / div >
< div class = "line" > < a id = "l00530" name = "l00530" > < / a > < span class = "lineno" > 530< / span > As += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8fddaa78913cdc8eea5e1cf7d2776330" > tile_stride_a< / a > ;< / div >
< div class = "line" > < a id = "l00531" name = "l00531" > < / a > < span class = "lineno" > 531< / span > Bs += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ae3f35453b3afbaac9df64ad5966b34a4" > tile_stride_b< / a > ;< / div >
< div class = "line" > < a id = "l00532" name = "l00532" > < / a > < span class = "lineno" > 532< / span > }< / div >
< div class = "line" > < a id = "l00533" name = "l00533" > < / a > < span class = "lineno" > 533< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00534" name = "l00534" > < / a > < span class = "lineno" > 534< / span > < / div >
< div class = "line" > < a id = "l00535" name = "l00535" > < / a > < span class = "lineno" > 535< / span > < span class = "comment" > /* Store results from simdgroup_matrix results into device memory */< / span > < / div >
< div class = "foldopen" id = "foldopen00536" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00536" name = "l00536" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a0461451ffb5041b6a916ea17ed34288b" > 536< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a0461451ffb5041b6a916ea17ed34288b" > store_result< / a > (device U* D, < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldd) {< / div >
< div class = "line" > < a id = "l00537" name = "l00537" > < / a > < span class = "lineno" > 537< / span > < span class = "comment" > // Apply epilogue< / span > < / div >
< div class = "line" > < a id = "l00538" name = "l00538" > < / a > < span class = "lineno" > 538< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00539" name = "l00539" > < / a > < span class = "lineno" > 539< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < span class = "keyword" > decltype< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > )::kElemsPerTile; i++) {< / div >
< div class = "line" > < a id = "l00540" name = "l00540" > < / a > < span class = "lineno" > 540< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .elems()[i] = Epilogue::apply(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .elems()[i]);< / div >
< div class = "line" > < a id = "l00541" name = "l00541" > < / a > < span class = "lineno" > 541< / span > }< / div >
< div class = "line" > < a id = "l00542" name = "l00542" > < / a > < span class = "lineno" > 542< / span > < / div >
< div class = "line" > < a id = "l00543" name = "l00543" > < / a > < span class = "lineno" > 543< / span > < span class = "comment" > // Adjust for simdgroup and thread location< / span > < / div >
< div class = "line" > < a id = "l00544" name = "l00544" > < / a > < span class = "lineno" > 544< / span > D += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > * ldd + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > ;< / div >
< div class = "line" > < a id = "l00545" name = "l00545" > < / a > < span class = "lineno" > 545< / span > < / div >
< div class = "line" > < a id = "l00546" name = "l00546" > < / a > < span class = "lineno" > 546< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .template store< U, WM, WN> (D, ldd);< / div >
< div class = "line" > < a id = "l00547" name = "l00547" > < / a > < span class = "lineno" > 547< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00548" name = "l00548" > < / a > < span class = "lineno" > 548< / span > < / div >
< div class = "line" > < a id = "l00549" name = "l00549" > < / a > < span class = "lineno" > 549< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00550" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00550" name = "l00550" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a081ba538d30d1d02498a7f341e6bd611" > 550< / a > < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a081ba538d30d1d02498a7f341e6bd611" > store_result_safe< / a > (device U* D, < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldd, short2 dst_tile_dims) {< / div >
< div class = "line" > < a id = "l00551" name = "l00551" > < / a > < span class = "lineno" > 551< / span > < span class = "comment" > // Apply epilogue< / span > < / div >
< div class = "line" > < a id = "l00552" name = "l00552" > < / a > < span class = "lineno" > 552< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00553" name = "l00553" > < / a > < span class = "lineno" > 553< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < span class = "keyword" > decltype< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > )::kElemsPerTile; i++) {< / div >
< div class = "line" > < a id = "l00554" name = "l00554" > < / a > < span class = "lineno" > 554< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .elems()[i] = Epilogue::apply(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .elems()[i]);< / div >
< div class = "line" > < a id = "l00555" name = "l00555" > < / a > < span class = "lineno" > 555< / span > }< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00556" name = "l00556" > < / a > < span class = "lineno" > 556< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00557" name = "l00557" > < / a > < span class = "lineno" > 557< / span > < span class = "comment" > // Adjust for simdgroup and thread location< / span > < / div >
< div class = "line" > < a id = "l00558" name = "l00558" > < / a > < span class = "lineno" > 558< / span > D += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > * ldd + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > ;< / div >
< div class = "line" > < a id = "l00559" name = "l00559" > < / a > < span class = "lineno" > 559< / span > dst_tile_dims -= short2(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > , < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > );< / div >
< div class = "line" > < a id = "l00560" name = "l00560" > < / a > < span class = "lineno" > 560< / span > < / div >
< div class = "line" > < a id = "l00561" name = "l00561" > < / a > < span class = "lineno" > 561< / span > < span class = "keywordflow" > if< / span > (dst_tile_dims.x < = 0 || dst_tile_dims.y < = 0)< / div >
< div class = "line" > < a id = "l00562" name = "l00562" > < / a > < span class = "lineno" > 562< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00563" name = "l00563" > < / a > < span class = "lineno" > 563< / span > < / div >
< div class = "line" > < a id = "l00564" name = "l00564" > < / a > < span class = "lineno" > 564< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .template store_safe< U, WM, WN> (D, ldd, dst_tile_dims);< / div >
< div class = "line" > < a id = "l00565" name = "l00565" > < / a > < span class = "lineno" > 565< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00566" name = "l00566" > < / a > < span class = "lineno" > 566< / span > < / div >
< div class = "line" > < a id = "l00567" name = "l00567" > < / a > < span class = "lineno" > 567< / span > < span class = "comment" > /* Apply epilogue */< / span > < / div >
< div class = "line" > < a id = "l00568" name = "l00568" > < / a > < span class = "lineno" > 568< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > UnaryEpilogue> < / div >
< div class = "foldopen" id = "foldopen00569" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00569" name = "l00569" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#af653c0808ba4fa9a25286f1febb7baff" > 569< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#af653c0808ba4fa9a25286f1febb7baff" > apply_epilogue< / a > (thread < span class = "keyword" > const< / span > UnaryEpilogue& epilogue_op) {< / div >
< div class = "line" > < a id = "l00570" name = "l00570" > < / a > < span class = "lineno" > 570< / span > < span class = "comment" > // Loop over all simdgroup tiles< / span > < / div >
< div class = "line" > < a id = "l00571" name = "l00571" > < / a > < span class = "lineno" > 571< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00572" name = "l00572" > < / a > < span class = "lineno" > 572< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < span class = "keyword" > decltype< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > )::kElemsPerTile; i++) {< / div >
< div class = "line" > < a id = "l00573" name = "l00573" > < / a > < span class = "lineno" > 573< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .elems()[i] = epilogue_op.apply(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .elems()[i]);< / div >
< div class = "line" > < a id = "l00574" name = "l00574" > < / a > < span class = "lineno" > 574< / span > }< / div >
< div class = "line" > < a id = "l00575" name = "l00575" > < / a > < span class = "lineno" > 575< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00576" name = "l00576" > < / a > < span class = "lineno" > 576< / span > < / div >
< div class = "line" > < a id = "l00577" name = "l00577" > < / a > < span class = "lineno" > 577< / span > < span class = "comment" > /* Apply epilogue */< / span > < / div >
< div class = "line" > < a id = "l00578" name = "l00578" > < / a > < span class = "lineno" > 578< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > BinaryEpilogue> < / div >
2024-10-31 11:00:19 +08:00
< div class = "foldopen" id = "foldopen00579" data-start = "{" data-end = "}" >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00579" name = "l00579" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a823c56cbd2086f10272df7284a5247ae" > 579< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a823c56cbd2086f10272df7284a5247ae" > apply_epilogue< / a > (< / div >
< div class = "line" > < a id = "l00580" name = "l00580" > < / a > < span class = "lineno" > 580< / span > < span class = "keyword" > const< / span > device U* C,< / div >
< div class = "line" > < a id = "l00581" name = "l00581" > < / a > < span class = "lineno" > 581< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldc,< / div >
< div class = "line" > < a id = "l00582" name = "l00582" > < / a > < span class = "lineno" > 582< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > fdc,< / div >
< div class = "line" > < a id = "l00583" name = "l00583" > < / a > < span class = "lineno" > 583< / span > thread < span class = "keyword" > const< / span > BinaryEpilogue& epilogue_op) {< / div >
< div class = "line" > < a id = "l00584" name = "l00584" > < / a > < span class = "lineno" > 584< / span > < span class = "comment" > // Adjust for simdgroup and thread location< / span > < / div >
< div class = "line" > < a id = "l00585" name = "l00585" > < / a > < span class = "lineno" > 585< / span > C += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > )*ldc + (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > )*fdc;< / div >
< div class = "line" > < a id = "l00586" name = "l00586" > < / a > < span class = "lineno" > 586< / span > < / div >
< div class = "line" > < a id = "l00587" name = "l00587" > < / a > < span class = "lineno" > 587< / span > < span class = "comment" > // Loop over all simdgroup tiles< / span > < / div >
< div class = "line" > < a id = "l00588" name = "l00588" > < / a > < span class = "lineno" > 588< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00589" name = "l00589" > < / a > < span class = "lineno" > 589< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aba5f749fdf32d8bd9d9e29f2a9ae4591" > TM< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00590" name = "l00590" > < / a > < span class = "lineno" > 590< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00591" name = "l00591" > < / a > < span class = "lineno" > 591< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a706ae779c1f8d2eb18f19c248567d424" > TN< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00592" name = "l00592" > < / a > < span class = "lineno" > 592< / span > < span class = "comment" > // Get accumulated result and associated offset in C< / span > < / div >
< div class = "line" > < a id = "l00593" name = "l00593" > < / a > < span class = "lineno" > 593< / span > thread < span class = "keyword" > auto< / span > & accum = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .frag_at(i, j);< / div >
< div class = "line" > < a id = "l00594" name = "l00594" > < / a > < span class = "lineno" > 594< / span > < span class = "keywordtype" > int< / span > offset_c = (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > ) * ldc + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > ) * fdc;< / div >
< div class = "line" > < a id = "l00595" name = "l00595" > < / a > < span class = "lineno" > 595< / span > < / div >
< div class = "line" > < a id = "l00596" name = "l00596" > < / a > < span class = "lineno" > 596< / span > < span class = "comment" > // Apply epilogue< / span > < / div >
< div class = "line" > < a id = "l00597" name = "l00597" > < / a > < span class = "lineno" > 597< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00598" name = "l00598" > < / a > < span class = "lineno" > 598< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > k = 0; k < < span class = "keyword" > decltype< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > )::kElemsPerFrag; k++) {< / div >
< div class = "line" > < a id = "l00599" name = "l00599" > < / a > < span class = "lineno" > 599< / span > accum[k] = epilogue_op.apply(accum[k], C[offset_c + k * fdc]);< / div >
< div class = "line" > < a id = "l00600" name = "l00600" > < / a > < span class = "lineno" > 600< / span > }< / div >
< div class = "line" > < a id = "l00601" name = "l00601" > < / a > < span class = "lineno" > 601< / span > }< / div >
< div class = "line" > < a id = "l00602" name = "l00602" > < / a > < span class = "lineno" > 602< / span > }< / div >
< div class = "line" > < a id = "l00603" name = "l00603" > < / a > < span class = "lineno" > 603< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00604" name = "l00604" > < / a > < span class = "lineno" > 604< / span > < / div >
< div class = "line" > < a id = "l00605" name = "l00605" > < / a > < span class = "lineno" > 605< / span > < span class = "comment" > /* Apply epilogue */< / span > < / div >
< div class = "line" > < a id = "l00606" name = "l00606" > < / a > < span class = "lineno" > 606< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > BinaryEpilogue> < / div >
< div class = "foldopen" id = "foldopen00607" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00607" name = "l00607" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a9e48f2d51099ec00171506724faab54a" > 607< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a9e48f2d51099ec00171506724faab54a" > apply_epilogue_safe< / a > (< / div >
< div class = "line" > < a id = "l00608" name = "l00608" > < / a > < span class = "lineno" > 608< / span > < span class = "keyword" > const< / span > device U* C,< / div >
< div class = "line" > < a id = "l00609" name = "l00609" > < / a > < span class = "lineno" > 609< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldc,< / div >
< div class = "line" > < a id = "l00610" name = "l00610" > < / a > < span class = "lineno" > 610< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > fdc,< / div >
< div class = "line" > < a id = "l00611" name = "l00611" > < / a > < span class = "lineno" > 611< / span > short2 dst_tile_dims,< / div >
< div class = "line" > < a id = "l00612" name = "l00612" > < / a > < span class = "lineno" > 612< / span > thread < span class = "keyword" > const< / span > BinaryEpilogue& epilogue_op) {< / div >
< div class = "line" > < a id = "l00613" name = "l00613" > < / a > < span class = "lineno" > 613< / span > < span class = "comment" > // Adjust for simdgroup and thread location< / span > < / div >
< div class = "line" > < a id = "l00614" name = "l00614" > < / a > < span class = "lineno" > 614< / span > C += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > )*ldc + (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > )*fdc;< / div >
< div class = "line" > < a id = "l00615" name = "l00615" > < / a > < span class = "lineno" > 615< / span > dst_tile_dims -= short2(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > , < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > );< / div >
< div class = "line" > < a id = "l00616" name = "l00616" > < / a > < span class = "lineno" > 616< / span > < / div >
< div class = "line" > < a id = "l00617" name = "l00617" > < / a > < span class = "lineno" > 617< / span > < span class = "keywordflow" > if< / span > (dst_tile_dims.x < = 0 || dst_tile_dims.y < = 0)< / div >
< div class = "line" > < a id = "l00618" name = "l00618" > < / a > < span class = "lineno" > 618< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00619" name = "l00619" > < / a > < span class = "lineno" > 619< / span > < / div >
< div class = "line" > < a id = "l00620" name = "l00620" > < / a > < span class = "lineno" > 620< / span > < span class = "comment" > // Loop over all simdgroup tiles< / span > < / div >
< div class = "line" > < a id = "l00621" name = "l00621" > < / a > < span class = "lineno" > 621< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00622" name = "l00622" > < / a > < span class = "lineno" > 622< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aba5f749fdf32d8bd9d9e29f2a9ae4591" > TM< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00623" name = "l00623" > < / a > < span class = "lineno" > 623< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00624" name = "l00624" > < / a > < span class = "lineno" > 624< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a706ae779c1f8d2eb18f19c248567d424" > TN< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00625" name = "l00625" > < / a > < span class = "lineno" > 625< / span > < span class = "comment" > // Get accumulated result and associated offset in C< / span > < / div >
< div class = "line" > < a id = "l00626" name = "l00626" > < / a > < span class = "lineno" > 626< / span > thread < span class = "keyword" > auto< / span > & accum = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .frag_at(i, j);< / div >
< div class = "line" > < a id = "l00627" name = "l00627" > < / a > < span class = "lineno" > 627< / span > < span class = "keywordtype" > int< / span > offset_c = (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > ) * ldc + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > ) * fdc;< / div >
2024-10-31 11:00:19 +08:00
< div class = "line" > < a id = "l00628" name = "l00628" > < / a > < span class = "lineno" > 628< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00629" name = "l00629" > < / a > < span class = "lineno" > 629< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > short< / span > kelems = < span class = "keyword" > decltype< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > )::kElemsPerFrag;< / div >
< div class = "line" > < a id = "l00630" name = "l00630" > < / a > < span class = "lineno" > 630< / span > < / div >
< div class = "line" > < a id = "l00631" name = "l00631" > < / a > < span class = "lineno" > 631< / span > < span class = "comment" > // Read C< / span > < / div >
< div class = "line" > < a id = "l00632" name = "l00632" > < / a > < span class = "lineno" > 632< / span > U c_elems[kelems] = {0};< / div >
< div class = "line" > < a id = "l00633" name = "l00633" > < / a > < span class = "lineno" > 633< / span > < / div >
< div class = "line" > < a id = "l00634" name = "l00634" > < / a > < span class = "lineno" > 634< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00635" name = "l00635" > < / a > < span class = "lineno" > 635< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > k = 0; k < kelems; k++) {< / div >
< div class = "line" > < a id = "l00636" name = "l00636" > < / a > < span class = "lineno" > 636< / span > < span class = "keywordflow" > if< / span > ((j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > + k) < dst_tile_dims.x) {< / div >
< div class = "line" > < a id = "l00637" name = "l00637" > < / a > < span class = "lineno" > 637< / span > c_elems[k] = C[offset_c + k * fdc];< / div >
< div class = "line" > < a id = "l00638" name = "l00638" > < / a > < span class = "lineno" > 638< / span > }< / div >
< div class = "line" > < a id = "l00639" name = "l00639" > < / a > < span class = "lineno" > 639< / span > }< / div >
< div class = "line" > < a id = "l00640" name = "l00640" > < / a > < span class = "lineno" > 640< / span > < / div >
< div class = "line" > < a id = "l00641" name = "l00641" > < / a > < span class = "lineno" > 641< / span > < span class = "comment" > // Apply epilogue< / span > < / div >
< div class = "line" > < a id = "l00642" name = "l00642" > < / a > < span class = "lineno" > 642< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00643" name = "l00643" > < / a > < span class = "lineno" > 643< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > k = 0; k < kelems; k++) {< / div >
< div class = "line" > < a id = "l00644" name = "l00644" > < / a > < span class = "lineno" > 644< / span > accum[k] = epilogue_op.apply(accum[k], c_elems[k]);< / div >
< div class = "line" > < a id = "l00645" name = "l00645" > < / a > < span class = "lineno" > 645< / span > }< / div >
< div class = "line" > < a id = "l00646" name = "l00646" > < / a > < span class = "lineno" > 646< / span > }< / div >
< div class = "line" > < a id = "l00647" name = "l00647" > < / a > < span class = "lineno" > 647< / span > }< / div >
< div class = "line" > < a id = "l00648" name = "l00648" > < / a > < span class = "lineno" > 648< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00649" name = "l00649" > < / a > < span class = "lineno" > 649< / span > < / div >
< div class = "line" > < a id = "l00650" name = "l00650" > < / a > < span class = "lineno" > 650< / span > < span class = "comment" > /* Store results from simdgroup_matrix results into device memory */< / span > < / div >
< div class = "foldopen" id = "foldopen00651" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00651" name = "l00651" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a7cf757e9785e23997b1417e024559ed3" > 651< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a7cf757e9785e23997b1417e024559ed3" > store_result< / a > (< / div >
< div class = "line" > < a id = "l00652" name = "l00652" > < / a > < span class = "lineno" > 652< / span > device U* D,< / div >
< div class = "line" > < a id = "l00653" name = "l00653" > < / a > < span class = "lineno" > 653< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldd,< / div >
< div class = "line" > < a id = "l00654" name = "l00654" > < / a > < span class = "lineno" > 654< / span > < span class = "keyword" > const< / span > device U* C,< / div >
< div class = "line" > < a id = "l00655" name = "l00655" > < / a > < span class = "lineno" > 655< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldc,< / div >
< div class = "line" > < a id = "l00656" name = "l00656" > < / a > < span class = "lineno" > 656< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > fdc,< / div >
< div class = "line" > < a id = "l00657" name = "l00657" > < / a > < span class = "lineno" > 657< / span > thread < span class = "keyword" > const< / span > Epilogue& epilogue_op)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00658" name = "l00658" > < / a > < span class = "lineno" > 658< / span > < span class = "comment" > // Adjust for simdgroup and thread location< / span > < / div >
< div class = "line" > < a id = "l00659" name = "l00659" > < / a > < span class = "lineno" > 659< / span > C += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > )*ldc + (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > )*fdc;< / div >
< div class = "line" > < a id = "l00660" name = "l00660" > < / a > < span class = "lineno" > 660< / span > D += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > )*ldd + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > ;< / div >
< div class = "line" > < a id = "l00661" name = "l00661" > < / a > < span class = "lineno" > 661< / span > < / div >
< div class = "line" > < a id = "l00662" name = "l00662" > < / a > < span class = "lineno" > 662< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > short< / span > kelems = < span class = "keyword" > decltype< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > )::kElemsPerFrag;< / div >
< div class = "line" > < a id = "l00663" name = "l00663" > < / a > < span class = "lineno" > 663< / span > < / div >
< div class = "line" > < a id = "l00664" name = "l00664" > < / a > < span class = "lineno" > 664< / span > < span class = "comment" > // Loop over all simdgroup tiles< / span > < / div >
< div class = "line" > < a id = "l00665" name = "l00665" > < / a > < span class = "lineno" > 665< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00666" name = "l00666" > < / a > < span class = "lineno" > 666< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aba5f749fdf32d8bd9d9e29f2a9ae4591" > TM< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00667" name = "l00667" > < / a > < span class = "lineno" > 667< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00668" name = "l00668" > < / a > < span class = "lineno" > 668< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a706ae779c1f8d2eb18f19c248567d424" > TN< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00669" name = "l00669" > < / a > < span class = "lineno" > 669< / span > < span class = "comment" > // Get accumulated result and associated offset in C< / span > < / div >
< div class = "line" > < a id = "l00670" name = "l00670" > < / a > < span class = "lineno" > 670< / span > thread < span class = "keyword" > const< / span > < span class = "keyword" > auto< / span > & accum = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .frag_at(i, j);< / div >
< div class = "line" > < a id = "l00671" name = "l00671" > < / a > < span class = "lineno" > 671< / span > < span class = "keywordtype" > int< / span > offset_c = (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > ) * ldc + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > ) * fdc;< / div >
< div class = "line" > < a id = "l00672" name = "l00672" > < / a > < span class = "lineno" > 672< / span > < span class = "keywordtype" > int< / span > offset_d = (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > ) * ldd + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > );< / div >
< div class = "line" > < a id = "l00673" name = "l00673" > < / a > < span class = "lineno" > 673< / span > < / div >
< div class = "line" > < a id = "l00674" name = "l00674" > < / a > < span class = "lineno" > 674< / span > < span class = "comment" > // Apply epilogue< / span > < / div >
< div class = "line" > < a id = "l00675" name = "l00675" > < / a > < span class = "lineno" > 675< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00676" name = "l00676" > < / a > < span class = "lineno" > 676< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > k = 0; k < kelems; k++) {< / div >
< div class = "line" > < a id = "l00677" name = "l00677" > < / a > < span class = "lineno" > 677< / span > D[offset_d + k] = epilogue_op.apply(accum[k], C[offset_c + k * fdc]);< / div >
< div class = "line" > < a id = "l00678" name = "l00678" > < / a > < span class = "lineno" > 678< / span > }< / div >
< div class = "line" > < a id = "l00679" name = "l00679" > < / a > < span class = "lineno" > 679< / span > }< / div >
< div class = "line" > < a id = "l00680" name = "l00680" > < / a > < span class = "lineno" > 680< / span > }< / div >
< div class = "line" > < a id = "l00681" name = "l00681" > < / a > < span class = "lineno" > 681< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00682" name = "l00682" > < / a > < span class = "lineno" > 682< / span > < / div >
< div class = "foldopen" id = "foldopen00683" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00683" name = "l00683" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a7b324c992750ed3aaa4c485f15b2f391" > 683< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a7b324c992750ed3aaa4c485f15b2f391" > store_result_safe< / a > (< / div >
< div class = "line" > < a id = "l00684" name = "l00684" > < / a > < span class = "lineno" > 684< / span > device U* D,< / div >
< div class = "line" > < a id = "l00685" name = "l00685" > < / a > < span class = "lineno" > 685< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldd,< / div >
< div class = "line" > < a id = "l00686" name = "l00686" > < / a > < span class = "lineno" > 686< / span > < span class = "keyword" > const< / span > device U* C,< / div >
< div class = "line" > < a id = "l00687" name = "l00687" > < / a > < span class = "lineno" > 687< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > ldc,< / div >
< div class = "line" > < a id = "l00688" name = "l00688" > < / a > < span class = "lineno" > 688< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > fdc,< / div >
< div class = "line" > < a id = "l00689" name = "l00689" > < / a > < span class = "lineno" > 689< / span > short2 dst_tile_dims,< / div >
< div class = "line" > < a id = "l00690" name = "l00690" > < / a > < span class = "lineno" > 690< / span > thread < span class = "keyword" > const< / span > Epilogue& epilogue_op)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00691" name = "l00691" > < / a > < span class = "lineno" > 691< / span > < span class = "comment" > // Adjust for simdgroup and thread location< / span > < / div >
< div class = "line" > < a id = "l00692" name = "l00692" > < / a > < span class = "lineno" > 692< / span > C += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > )*ldc + (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > )*fdc;< / div >
< div class = "line" > < a id = "l00693" name = "l00693" > < / a > < span class = "lineno" > 693< / span > D += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > )*ldd + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > ;< / div >
< div class = "line" > < a id = "l00694" name = "l00694" > < / a > < span class = "lineno" > 694< / span > dst_tile_dims -= short2(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > sn< / a > , < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > sm< / a > );< / div >
< div class = "line" > < a id = "l00695" name = "l00695" > < / a > < span class = "lineno" > 695< / span > < / div >
< div class = "line" > < a id = "l00696" name = "l00696" > < / a > < span class = "lineno" > 696< / span > < span class = "keywordflow" > if< / span > (dst_tile_dims.x < = 0 || dst_tile_dims.y < = 0)< / div >
< div class = "line" > < a id = "l00697" name = "l00697" > < / a > < span class = "lineno" > 697< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00698" name = "l00698" > < / a > < span class = "lineno" > 698< / span > < / div >
< div class = "line" > < a id = "l00699" name = "l00699" > < / a > < span class = "lineno" > 699< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > short< / span > kelems = < span class = "keyword" > decltype< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > )::kElemsPerFrag;< / div >
< div class = "line" > < a id = "l00700" name = "l00700" > < / a > < span class = "lineno" > 700< / span > < / div >
< div class = "line" > < a id = "l00701" name = "l00701" > < / a > < span class = "lineno" > 701< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00702" name = "l00702" > < / a > < span class = "lineno" > 702< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#aba5f749fdf32d8bd9d9e29f2a9ae4591" > TM< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00703" name = "l00703" > < / a > < span class = "lineno" > 703< / span > < span class = "keywordflow" > if< / span > (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > < dst_tile_dims.y) {< / div >
< div class = "line" > < a id = "l00704" name = "l00704" > < / a > < span class = "lineno" > 704< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00705" name = "l00705" > < / a > < span class = "lineno" > 705< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a706ae779c1f8d2eb18f19c248567d424" > TN< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00706" name = "l00706" > < / a > < span class = "lineno" > 706< / span > < span class = "comment" > // Get accumulated result and associated offset in C< / span > < / div >
< div class = "line" > < a id = "l00707" name = "l00707" > < / a > < span class = "lineno" > 707< / span > thread < span class = "keyword" > const< / span > < span class = "keyword" > auto< / span > & accum = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > Ctile< / a > .frag_at(i, j);< / div >
< div class = "line" > < a id = "l00708" name = "l00708" > < / a > < span class = "lineno" > 708< / span > < span class = "keywordtype" > int< / span > offset_c = (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > ) * ldc + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > ) * fdc;< / div >
< div class = "line" > < a id = "l00709" name = "l00709" > < / a > < span class = "lineno" > 709< / span > < span class = "keywordtype" > int< / span > offset_d = (i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > TM_stride< / a > ) * ldd + (j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > );< / div >
< div class = "line" > < a id = "l00710" name = "l00710" > < / a > < span class = "lineno" > 710< / span > < / div >
< div class = "line" > < a id = "l00711" name = "l00711" > < / a > < span class = "lineno" > 711< / span > < span class = "comment" > // Apply epilogue< / span > < / div >
< div class = "line" > < a id = "l00712" name = "l00712" > < / a > < span class = "lineno" > 712< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00713" name = "l00713" > < / a > < span class = "lineno" > 713< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > k = 0; k < kelems; k++) {< / div >
< div class = "line" > < a id = "l00714" name = "l00714" > < / a > < span class = "lineno" > 714< / span > < span class = "keywordflow" > if< / span > ((j * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > TN_stride< / a > + k) < dst_tile_dims.x) {< / div >
< div class = "line" > < a id = "l00715" name = "l00715" > < / a > < span class = "lineno" > 715< / span > D[offset_d + k] =< / div >
< div class = "line" > < a id = "l00716" name = "l00716" > < / a > < span class = "lineno" > 716< / span > epilogue_op.apply(accum[k], C[offset_c + k * fdc]);< / div >
< div class = "line" > < a id = "l00717" name = "l00717" > < / a > < span class = "lineno" > 717< / span > }< / div >
< div class = "line" > < a id = "l00718" name = "l00718" > < / a > < span class = "lineno" > 718< / span > }< / div >
< div class = "line" > < a id = "l00719" name = "l00719" > < / a > < span class = "lineno" > 719< / span > }< / div >
< div class = "line" > < a id = "l00720" name = "l00720" > < / a > < span class = "lineno" > 720< / span > }< / div >
< div class = "line" > < a id = "l00721" name = "l00721" > < / a > < span class = "lineno" > 721< / span > }< / div >
< div class = "line" > < a id = "l00722" name = "l00722" > < / a > < span class = "lineno" > 722< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00723" name = "l00723" > < / a > < span class = "lineno" > 723< / span > };< / div >
< div class = "line" > < a id = "l00724" name = "l00724" > < / a > < span class = "lineno" > 724< / span > < / div >
< div class = "line" > < a id = "l00725" name = "l00725" > < / a > < span class = "lineno" > 725< / span > } < span class = "comment" > // namespace steel< / span > < / div >
< div class = "line" > < a id = "l00726" name = "l00726" > < / a > < span class = "lineno" > 726< / span > } < span class = "comment" > // namespace mlx< / span > < / div >
< div class = "ttc" id = "abackend_2metal_2kernels_2steel_2attn_2transforms_8h_html" > < div class = "ttname" > < a href = "backend_2metal_2kernels_2steel_2attn_2transforms_8h.html" > transforms.h< / a > < / div > < / div >
2024-10-31 11:00:19 +08:00
< div class = "ttc" id = "aintegral__constant_8h_html" > < div class = "ttname" > < a href = "integral__constant_8h.html" > integral_constant.h< / a > < / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "anamespacemetal_html" > < div class = "ttname" > < a href = "namespacemetal.html" > metal< / a > < / div > < div class = "ttdef" > < b > Definition< / b > bf16_math.h:226< / div > < / div >
< div class = "ttc" id = "anamespacemetal_html_a5017efc9605e069cfb507137cd1a1852" > < div class = "ttname" > < a href = "namespacemetal.html#a5017efc9605e069cfb507137cd1a1852" > metal::simd_shuffle_xor< / a > < / div > < div class = "ttdeci" > METAL_FUNC bfloat16_t simd_shuffle_xor(bfloat16_t data, ushort mask)< / div > < div class = "ttdef" > < b > Definition< / b > bf16_math.h:377< / div > < / div >
2024-10-31 11:00:19 +08:00
< div class = "ttc" id = "anamespacemetal_html_ac82ee6c3fbe9ec5c78c07329424aaec9" > < div class = "ttname" > < a href = "namespacemetal.html#ac82ee6c3fbe9ec5c78c07329424aaec9" > metal::pointer_element_t< / a > < / div > < div class = "ttdeci" > typename pointer_element< remove_cv_t< T > > ::type pointer_element_t< / div > < div class = "ttdef" > < b > Definition< / b > type_traits.h:51< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "anamespacemlx_1_1steel_html" > < div class = "ttname" > < a href = "namespacemlx_1_1steel.html" > mlx::steel< / a > < / div > < div class = "ttdef" > < b > Definition< / b > attn.h:19< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "anamespacemlx_1_1steel_html_ad583e6038efc119542410f43b603d4ad" > < div class = "ttname" > < a href = "namespacemlx_1_1steel.html#ad583e6038efc119542410f43b603d4ad" > mlx::steel::tile_matmad< / a > < / div > < div class = "ttdeci" > METAL_FUNC void tile_matmad(thread MMATile< T, M, N > & D, thread MMATile< U, M, K > & A, thread MMATile< U, K, N > & B, thread MMATile< T, M, N > & C)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:413< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "anamespacemlx_1_1steel_html_afe36ddf6725498d273e5eef4f1579891" > < div class = "ttname" > < a href = "namespacemlx_1_1steel.html#afe36ddf6725498d273e5eef4f1579891" > mlx::steel::Int< / a > < / div > < div class = "ttdeci" > integral_constant< int, val > Int< / div > < div class = "ttdef" > < b > Definition< / b > integral_constant.h:48< / div > < / div >
2024-10-15 23:12:17 +08:00
< div class = "ttc" id = "anamespacemlx_html" > < div class = "ttname" > < a href = "namespacemlx.html" > mlx< / a > < / div > < div class = "ttdef" > < b > Definition< / b > allocator.h:7< / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html" > < div class = "ttname" > < a href = "steel_2defines_8h.html" > defines.h< / a > < / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html_a5a5c3095b132a7589bc19cd5cb80e2c6" > < div class = "ttname" > < a href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div > < div class = "ttdeci" > #define STEEL_PRAGMA_UNROLL< / div > < div class = "ttdef" > < b > Definition< / b > defines.h:4< / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html_a90b91c866313ffa46eff6d9cc944ad2b" > < div class = "ttname" > < a href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < / div > < div class = "ttdeci" > #define STEEL_CONST< / div > < div class = "ttdef" > < b > Definition< / b > defines.h:3< / div > < / div >
2024-10-31 11:00:19 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html" > mlx::steel::BaseMMAFrag< / a > < / div > < div class = "ttdef" > < b > Definition< / b > mma.h:23< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a1868f57d57c8adedab2c58492ec76946" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1868f57d57c8adedab2c58492ec76946" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::mma< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void mma(thread mat_type & D, thread mat_type & A, thread mat_type & B, thread mat_type & C)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:180< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a1f0b00daad8eba2f855bb306e70d2328" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1f0b00daad8eba2f855bb306e70d2328" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::store_safe< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void store_safe(const thread frag_type & src, DstPtrType dst, StrX str_x, StrY str_y, LimX lim_x, LimY lim_y, OffX off_x=Int< 0 > {}, OffY off_y=Int< 0 > {})< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:138< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a211102315e2afbcfcd2e2c201b638e9f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a211102315e2afbcfcd2e2c201b638e9f" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::kFragCols< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kFragCols< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:49< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a25675ae18947a97c6e04157b540103a9" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::mat_type< / a > < / div > < div class = "ttdeci" > metal::simdgroup_matrix< T, kFragRows, kFragCols > mat_type< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:60< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a2fe53db449c692226f23f6b99fb2c0d4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a2fe53db449c692226f23f6b99fb2c0d4" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::kFragRows< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kFragRows< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:48< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a318c4279bdc7b39b7919f108b1cd8010" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a318c4279bdc7b39b7919f108b1cd8010" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::row_bin_op< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void row_bin_op(thread frag_type & inp_vals, thread T *row_vals)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:204< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a3c34dfdc944db110f4735f1b25307cf0" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3c34dfdc944db110f4735f1b25307cf0" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::kElemsPerFrag< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kElemsPerFrag< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:51< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a3dcd4301390937f89ed1dde6d28e341f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3dcd4301390937f89ed1dde6d28e341f" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::row_frag_type< / a > < / div > < div class = "ttdeci" > metal::vec< T, kElemRows > row_frag_type< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:62< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a51d662e4cff88b5ad17d7c44bb6b6970" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a51d662e4cff88b5ad17d7c44bb6b6970" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::row_reduce< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void row_reduce(thread const frag_type & inp_vals, thread T *reduced_vals)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:189< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a7331fff1d12f2f8b72b0006a3ad0dd83" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7331fff1d12f2f8b72b0006a3ad0dd83" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::get_coord< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr short2 get_coord(ushort simd_lane_id)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:65< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a76aa5aa690dbcc954e957d767fad661f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::kElemRows< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kElemRows< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:53< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a7c212200d86b4e93f274d99addf668bd" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::kElemCols< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kElemCols< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:54< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a8028512f5a3d2b6acaf966be529627a3" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::mma< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void mma(thread frag_type & D, thread frag_type & A, thread frag_type & B, thread frag_type & C)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:161< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a8536bfaa108031c2ea3e9ccdc766ee5b" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::frag_type< / a > < / div > < div class = "ttdeci" > metal::vec< T, kElemsPerFrag > frag_type< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:61< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_aa8f50ea8961ec5b35c1b81366d64f2cb" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#aa8f50ea8961ec5b35c1b81366d64f2cb" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::store< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void store(const thread frag_type & src, DstPtrType dst, StrX str_x, StrY str_y)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:118< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_ac73006b36fc710feda3a7c796e21415c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#ac73006b36fc710feda3a7c796e21415c" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::load< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void load(thread frag_type & dst, SrcPtrType src, StrX str_x, StrY str_y)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:75< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_ad22aaee4a2938cbdd315b39eda84e07d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#ad22aaee4a2938cbdd315b39eda84e07d" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::load_safe< / a > < / div > < div class = "ttdeci" > static METAL_FUNC constexpr void load_safe(thread frag_type & dst, SrcPtrType src, StrX str_x, StrY str_y, LimX lim_x, LimY lim_y, OffX off_x=Int< 0 > {}, OffY off_y=Int< 0 > {})< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:93< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_base_m_m_a_frag_html_adbb262a3c872e26533b68a39db16459e" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_base_m_m_a_frag.html#adbb262a3c872e26533b68a39db16459e" > mlx::steel::BaseMMAFrag< T, 8, 8 > ::col_frag_type< / a > < / div > < div class = "ttdeci" > metal::vec< T, kElemCols > col_frag_type< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:63< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a0461451ffb5041b6a916ea17ed34288b" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a0461451ffb5041b6a916ea17ed34288b" > mlx::steel::BlockMMA::store_result< / a > < / div > < div class = "ttdeci" > METAL_FUNC void store_result(device U *D, const int ldd)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:536< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a081ba538d30d1d02498a7f341e6bd611" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a081ba538d30d1d02498a7f341e6bd611" > mlx::steel::BlockMMA::store_result_safe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void store_result_safe(device U *D, const int ldd, short2 dst_tile_dims)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:550< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a138ed1bbad2ca88d3a3c7d162cd36562" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a138ed1bbad2ca88d3a3c7d162cd36562" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::As_offset< / a > < / div > < div class = "ttdeci" > short As_offset< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:485< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a21b0c40d16eced109bd3196186170bc6" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a21b0c40d16eced109bd3196186170bc6" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::Ctile< / a > < / div > < div class = "ttdeci" > MMATile< AccumType, TM, TN, MMAFrag_acc_t > Ctile< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:479< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a257287702dc849d0d8a078fced453142" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a257287702dc849d0d8a078fced453142" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::A_str_k< / a > < / div > < div class = "ttdeci" > STEEL_CONST short A_str_k< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:466< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a44fca27c821764317263047a780977b0" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a44fca27c821764317263047a780977b0" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::Btile< / a > < / div > < div class = "ttdeci" > MMATile< AccumType, 1, TN, MMAFrag_acc_t > Btile< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:478< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a47e614120c650f7479db79f23a0df586" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a47e614120c650f7479db79f23a0df586" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::Atile< / a > < / div > < div class = "ttdeci" > MMATile< AccumType, TM, 1, MMAFrag_acc_t > Atile< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:477< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a49538190209e522ddbef45fe95563d17" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a49538190209e522ddbef45fe95563d17" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::B_str_n< / a > < / div > < div class = "ttdeci" > STEEL_CONST short B_str_n< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:470< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a5b0029866f493363942133b55bff7307" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a5b0029866f493363942133b55bff7307" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::TM_stride< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TM_stride< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:455< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a6a2c2a6d5e767d52c41b42a9d36086b0" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a6a2c2a6d5e767d52c41b42a9d36086b0" > mlx::steel::BlockMMA::mma< / a > < / div > < div class = "ttdeci" > METAL_FUNC void mma(const threadgroup T *As, const threadgroup T *Bs)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:509< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a706ae779c1f8d2eb18f19c248567d424" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a706ae779c1f8d2eb18f19c248567d424" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::TN< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TN< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:462< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a7b324c992750ed3aaa4c485f15b2f391" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a7b324c992750ed3aaa4c485f15b2f391" > mlx::steel::BlockMMA::store_result_safe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void store_result_safe(device U *D, const int ldd, const device U *C, const int ldc, const int fdc, short2 dst_tile_dims, thread const Epilogue & epilogue_op) const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:683< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a7cf757e9785e23997b1417e024559ed3" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a7cf757e9785e23997b1417e024559ed3" > mlx::steel::BlockMMA::store_result< / a > < / div > < div class = "ttdeci" > METAL_FUNC void store_result(device U *D, const int ldd, const device U *C, const int ldc, const int fdc, thread const Epilogue & epilogue_op) const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:651< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a823c56cbd2086f10272df7284a5247ae" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a823c56cbd2086f10272df7284a5247ae" > mlx::steel::BlockMMA::apply_epilogue< / a > < / div > < div class = "ttdeci" > METAL_FUNC void apply_epilogue(const device U *C, const int ldc, const int fdc, thread const BinaryEpilogue & epilogue_op)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:579< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a8b3690b383afd26563efb38f9c375e50" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8b3690b383afd26563efb38f9c375e50" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::TN_stride< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TN_stride< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:457< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a8fddaa78913cdc8eea5e1cf7d2776330" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a8fddaa78913cdc8eea5e1cf7d2776330" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::tile_stride_a< / a > < / div > < div class = "ttdeci" > STEEL_CONST short tile_stride_a< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:473< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a92f6aeee432f53638447eac842f43eca" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a92f6aeee432f53638447eac842f43eca" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::Bs_offset< / a > < / div > < div class = "ttdeci" > short Bs_offset< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:486< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_a9e48f2d51099ec00171506724faab54a" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#a9e48f2d51099ec00171506724faab54a" > mlx::steel::BlockMMA::apply_epilogue_safe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void apply_epilogue_safe(const device U *C, const int ldc, const int fdc, short2 dst_tile_dims, thread const BinaryEpilogue & epilogue_op)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:607< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_aa14406b7298456ac45d23dd3c4642dd8" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa14406b7298456ac45d23dd3c4642dd8" > mlx::steel::BlockMMA::BlockMMA< / a > < / div > < div class = "ttdeci" > METAL_FUNC BlockMMA(ushort simd_group_id, ushort simd_lane_id)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:489< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_aa71400922babd388177f228c2c82b211" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa71400922babd388177f228c2c82b211" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::B_str_k< / a > < / div > < div class = "ttdeci" > STEEL_CONST short B_str_k< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:469< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_aa85451edf6900fd6af164d4d50889ae3" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::sm< / a > < / div > < div class = "ttdeci" > short sm< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:482< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_ab9c7f5386594497f5f4df7e59670b877" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#ab9c7f5386594497f5f4df7e59670b877" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::A_str_m< / a > < / div > < div class = "ttdeci" > STEEL_CONST short A_str_m< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:465< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_aba5f749fdf32d8bd9d9e29f2a9ae4591" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#aba5f749fdf32d8bd9d9e29f2a9ae4591" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::TM< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TM< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:460< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_ade420e8b811d597345783c324c23a34a" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#ade420e8b811d597345783c324c23a34a" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::sn< / a > < / div > < div class = "ttdeci" > short sn< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:483< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_ae2c42cb6d0dde785859164c195f4d13c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#ae2c42cb6d0dde785859164c195f4d13c" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::MMAFrag_acc_t< / a > < / div > < div class = "ttdeci" > BaseMMAFrag< AccumType, kFragSize, kFragSize > MMAFrag_acc_t< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:452< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_ae3f35453b3afbaac9df64ad5966b34a4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#ae3f35453b3afbaac9df64ad5966b34a4" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::tile_stride_b< / a > < / div > < div class = "ttdeci" > STEEL_CONST short tile_stride_b< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:474< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_aee8caec45c1f9e4428586effbfe6137d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d" > mlx::steel::BlockMMA< T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue > ::kFragSize< / a > < / div > < div class = "ttdeci" > STEEL_CONST short kFragSize< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:451< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_m_m_a_html_af653c0808ba4fa9a25286f1febb7baff" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_m_m_a.html#af653c0808ba4fa9a25286f1febb7baff" > mlx::steel::BlockMMA::apply_epilogue< / a > < / div > < div class = "ttdeci" > METAL_FUNC void apply_epilogue(thread const UnaryEpilogue & epilogue_op)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:569< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_layout2_d_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_layout2_d.html" > mlx::steel::Layout2D< / a > < / div > < div class = "ttdef" > < b > Definition< / b > mma.h:31< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_layout2_d_html_a23183747ab1ddbdd3f1fcac6d0faa2cd" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_layout2_d.html#a23183747ab1ddbdd3f1fcac6d0faa2cd" > mlx::steel::Layout2D::shape< / a > < / div > < div class = "ttdeci" > Shape shape< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:32< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_layout2_d_html_a6beedf1677ee1b192fb48c83a29ac8a1" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_layout2_d.html#a6beedf1677ee1b192fb48c83a29ac8a1" > mlx::steel::Layout2D::layout< / a > < / div > < div class = "ttdeci" > Layout layout< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:33< / div > < / div >
2024-10-31 11:00:19 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html" > mlx::steel::MMATile< / a > < / div > < div class = "ttdef" > < b > Definition< / b > mma.h:178< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a1a6b1446e8c8da46885bbaa8e8fdc7e4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4" > mlx::steel::MMATile::frag_at< / a > < / div > < div class = "ttdeci" > METAL_FUNC constexpr thread frag_type & frag_at(const short i, const short j)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:256< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a1d126b14910385ab644e224ac1d0307a" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a" > mlx::steel::MMATile::kTileRows< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kTileRows< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:230< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a1ea49efd92696b15302ee4b52ecd548c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1ea49efd92696b15302ee4b52ecd548c" > mlx::steel::MMATile::kColsPerThread< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kColsPerThread< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:240< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a1eeb197c9bdf4db42892a39cdb9bd73a" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a1eeb197c9bdf4db42892a39cdb9bd73a" > mlx::steel::MMATile::mat_type< / a > < / div > < div class = "ttdeci" > MMAFrag_t::mat_type mat_type< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:242< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a2aadaa3239cb3af0c2ee8af9b88c8a98" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a2aadaa3239cb3af0c2ee8af9b88c8a98" > mlx::steel::MMATile::store< / a > < / div > < div class = "ttdeci" > METAL_FUNC void store(threadgroup U *dst) const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:325< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a323a4f38cd0693bf333832bb4258b28e" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a323a4f38cd0693bf333832bb4258b28e" > mlx::steel::MMATile::mat_at< / a > < / div > < div class = "ttdeci" > METAL_FUNC mat_type mat_at(const short i, const short j)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:266< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a3d0d5b9c7962658cc6d5afbbbb2f19e2" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a3d0d5b9c7962658cc6d5afbbbb2f19e2" > mlx::steel::MMATile::row_bin_op< / a > < / div > < div class = "ttdeci" > METAL_FUNC void row_bin_op(thread T vals[kRowsPerThread])< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:296< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a46324d40f8ad61cade08a1ebad6d9ad4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a46324d40f8ad61cade08a1ebad6d9ad4" > mlx::steel::MMATile::kTileCols< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kTileCols< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:231< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a57703f522c7409dbe2c0a68bb7acc2ba" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a57703f522c7409dbe2c0a68bb7acc2ba" > mlx::steel::MMATile::store_safe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void store_safe(device U *dst, const int ld, const short2 dst_tile_dims) const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:393< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a594142f957ffb99296a243f7af7b59e7" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7" > mlx::steel::MMATile::kFragRows< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kFragRows< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:226< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a5b1d1c85a5046108a4e38bdc5a0ea74e" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a5b1d1c85a5046108a4e38bdc5a0ea74e" > mlx::steel::MMATile::kRowsPerThread< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kRowsPerThread< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:239< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a60ea6b8ff2923b7fe6f598e74ac54323" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a60ea6b8ff2923b7fe6f598e74ac54323" > mlx::steel::MMATile::kRows< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kRows< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:233< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a684e6c6d9f00f583994285b60aaa3b62" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a684e6c6d9f00f583994285b60aaa3b62" > mlx::steel::MMATile::val_frags< / a > < / div > < div class = "ttdeci" > frag_type val_frags[kNumFrags]< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:245< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a752f708e4fe5ef37fdd902dae153179f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a752f708e4fe5ef37fdd902dae153179f" > mlx::steel::MMATile::store< / a > < / div > < div class = "ttdeci" > METAL_FUNC void store(device U *dst, const int ld) const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:357< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a80078f0dfa4c225e79d9b460202d5e2c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c" > mlx::steel::MMATile::elem_type< / a > < / div > < div class = "ttdeci" > T elem_type< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:225< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a865ece5ad0b9a56937b6d77a18b5a1dc" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a865ece5ad0b9a56937b6d77a18b5a1dc" > mlx::steel::MMATile::elems< / a > < / div > < div class = "ttdeci" > METAL_FUNC thread elem_type * elems()< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:275< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a948784652e93830887ee8ad506ec3257" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a948784652e93830887ee8ad506ec3257" > mlx::steel::MMATile::kCols< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kCols< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:234< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_a98357339ec98f804a1b12597937b318f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#a98357339ec98f804a1b12597937b318f" > mlx::steel::MMATile::kElemsPerTile< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kElemsPerTile< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:237< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aa0ad5cb750ace934bf230385d8bd9f88" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa0ad5cb750ace934bf230385d8bd9f88" > mlx::steel::MMATile::row_reduce< / a > < / div > < div class = "ttdeci" > METAL_FUNC void row_reduce(thread T vals[kRowsPerThread]) const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:284< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aa3a4af67813908109da08ce7352f82da" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa3a4af67813908109da08ce7352f82da" > mlx::steel::MMATile::load_safe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_safe(const device U *src, const int ld, const short2 src_tile_dims)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:373< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aa3fb310dd08ec23c334511f7b316d1b6" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa3fb310dd08ec23c334511f7b316d1b6" > mlx::steel::MMATile::MMATile< / a > < / div > < div class = "ttdeci" > METAL_FUNC MMATile() thread< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:247< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aa5426c6beabfb3ee41b58f01b3392a96" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa5426c6beabfb3ee41b58f01b3392a96" > mlx::steel::MMATile::load< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load(const threadgroup U *src)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:308< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aa97a98e423827a889c13a92217626ec7" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa97a98e423827a889c13a92217626ec7" > mlx::steel::MMATile::clear< / a > < / div > < div class = "ttdeci" > METAL_FUNC constexpr void clear()< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:249< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aa9e484d8cae936503898d5b772c573f9" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aa9e484d8cae936503898d5b772c573f9" > mlx::steel::MMATile::load< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load(const device U *src, const int ld)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:342< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aac25cd0a9bdf24aa2af809c95f0bd171" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aac25cd0a9bdf24aa2af809c95f0bd171" > mlx::steel::MMATile::frag_type< / a > < / div > < div class = "ttdeci" > MMAFrag_t::frag_type frag_type< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:243< / div > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_abe33de70e34300745bad9aa822fd0382" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#abe33de70e34300745bad9aa822fd0382" > mlx::steel::MMATile::MMAFrag_t< / a > < / div > < div class = "ttdeci" > MMAFrag_ MMAFrag_t< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:224< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_ad095371db98e7c335ec41ca77c10f906" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad095371db98e7c335ec41ca77c10f906" > mlx::steel::MMATile::kFragCols< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kFragCols< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:227< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_ad476e1d9a12178fb35c207312339e485" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ad476e1d9a12178fb35c207312339e485" > mlx::steel::MMATile::frag_at< / a > < / div > < div class = "ttdeci" > METAL_FUNC constexpr const thread frag_type & frag_at(const short i, const short j) const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:260< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_ae21bb7cce701290de84c6015e064d8a1" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae21bb7cce701290de84c6015e064d8a1" > mlx::steel::MMATile::elems< / a > < / div > < div class = "ttdeci" > METAL_FUNC const thread elem_type * elems() const< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:279< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_ae326e7693eb77c22d5a6e3e9219019d3" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#ae326e7693eb77c22d5a6e3e9219019d3" > mlx::steel::MMATile::kNumFrags< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kNumFrags< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:236< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_m_m_a_tile_html_aef0ea2387e1ff5767bff8563b2d36bd6" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_m_m_a_tile.html#aef0ea2387e1ff5767bff8563b2d36bd6" > mlx::steel::MMATile::kElemsPerFrag< / a > < / div > < div class = "ttdeci" > STEEL_CONST int kElemsPerFrag< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:228< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_shape2_d_html_a070ce70eb6d84361c7f313159c438a5c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_shape2_d.html#a070ce70eb6d84361c7f313159c438a5c" > mlx::steel::Shape2D::Shape2D< / a > < / div > < div class = "ttdeci" > Shape2D(RInt r_, CInt c_)< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:27< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_shape2_d_html_a6e9e8d56782fc8772bc432c7f58393fe" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_shape2_d.html#a6e9e8d56782fc8772bc432c7f58393fe" > mlx::steel::Shape2D::r< / a > < / div > < div class = "ttdeci" > RInt r< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:24< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_shape2_d_html_ae51347b2131647f2ed735ed43840d26e" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_shape2_d.html#ae51347b2131647f2ed735ed43840d26e" > mlx::steel::Shape2D::c< / a > < / div > < div class = "ttdeci" > CInt c< / div > < div class = "ttdef" > < b > Definition< / b > mma.h:25< / div > < / div >
2024-10-15 23:12:17 +08:00
< / div > <!-- fragment --> < / div > <!-- contents -->
< / div > <!-- doc - content -->
2025-01-10 05:56:20 +08:00
<!-- start footer part -->
< div id = "nav-path" class = "navpath" > <!-- id is needed for treeview function! -->
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_d0c977ea65824390717cdb7efc36c157.html" > metal< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_70a37effa88bcbd6b791977fa1e64356.html" > kernels< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_76215a6c54e2b67053e723fc2395583c.html" > steel< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_e1756c7634b0c14aead026895ad71c6d.html" > attn< / a > < / li > < li class = "navelem" > < a class = "el" href = "attn_2mma_8h.html" > mma.h< / a > < / li >
< li class = "footer" > Generated by < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.13.1 < / li >
< / ul >
< / div >
2024-10-15 23:12:17 +08:00
< / body >
< / html >