This commit is contained in:
CircleCI Docs
2025-02-06 20:16:29 +00:00
parent d8d647015b
commit cc43b2d401
2299 changed files with 64934 additions and 28640 deletions

View File

@@ -3,7 +3,7 @@
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=11"/>
<meta name="generator" content="Doxygen 1.13.1"/>
<meta name="generator" content="Doxygen 1.13.2"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<title>MLX: mlx/backend/metal/kernels/steel/gemm/mma.h Source File</title>
<link href="tabs.css" rel="stylesheet" type="text/css"/>
@@ -52,7 +52,7 @@
</table>
</div>
<!-- end header part -->
<!-- Generated by Doxygen 1.13.1 -->
<!-- Generated by Doxygen 1.13.2 -->
<script type="text/javascript">
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699&amp;dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
@@ -125,8 +125,7 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00020" name="l00020"></a><span class="lineno"> 20</span><span class="keyword">namespace </span><a class="code hl_namespace" href="namespacemlx_1_1steel.html">steel</a> {</div>
<div class="line"><a id="l00021" name="l00021"></a><span class="lineno"> 21</span> </div>
<div class="line"><a id="l00022" name="l00022"></a><span class="lineno"> 22</span><span class="keyword">template</span> &lt;<span class="keyword">typename</span> T, <span class="keywordtype">int</span> kFragRows_, <span class="keywordtype">int</span> kFragCols_&gt;</div>
<div class="foldopen" id="foldopen00023" data-start="{" data-end="};">
<div class="line"><a id="l00023" name="l00023"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html"> 23</a></span><span class="keyword">struct </span><a class="code hl_struct" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html">BaseMMAFrag</a> {</div>
<div class="line"><a id="l00023" name="l00023"></a><span class="lineno"> 23</span><span class="keyword">struct </span><a class="code hl_struct" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html">BaseMMAFrag</a> {</div>
<div class="line"><a id="l00024" name="l00024"></a><span class="lineno"> 24</span> <span class="keyword">static_assert</span>(</div>
<div class="line"><a id="l00025" name="l00025"></a><span class="lineno"> 25</span> kFragRows_ == 8,</div>
<div class="line"><a id="l00026" name="l00026"></a><span class="lineno"> 26</span> <span class="stringliteral">&quot;Only 8 x 8 fragment matrices are currently supported&quot;</span>);</div>
@@ -134,28 +133,26 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00028" name="l00028"></a><span class="lineno"> 28</span> kFragCols_ == 8,</div>
<div class="line"><a id="l00029" name="l00029"></a><span class="lineno"> 29</span> <span class="stringliteral">&quot;Only 8 x 8 fragment matrices are currently supported&quot;</span>);</div>
<div class="line"><a id="l00030" name="l00030"></a><span class="lineno"> 30</span>};</div>
</div>
<div class="line"><a id="l00031" name="l00031"></a><span class="lineno"> 31</span> </div>
<div class="line"><a id="l00032" name="l00032"></a><span class="lineno"> 32</span><span class="keyword">template</span> &lt;<span class="keyword">typename</span> T&gt;</div>
<div class="foldopen" id="foldopen00033" data-start="{" data-end="};">
<div class="line"><a id="l00033" name="l00033"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html"> 33</a></span><span class="keyword">struct </span><a class="code hl_struct" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html">BaseMMAFrag</a>&lt;T, 8, 8&gt; {</div>
<div class="line"><a id="l00034" name="l00034"></a><span class="lineno"> 34</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a2fe53db449c692226f23f6b99fb2c0d4">kFragRows</a> = 8;</div>
<div class="line"><a id="l00035" name="l00035"></a><span class="lineno"> 35</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a211102315e2afbcfcd2e2c201b638e9f">kFragCols</a> = 8;</div>
<div class="line"><a id="l00033" name="l00033"></a><span class="lineno"> 33</span><span class="keyword">struct </span><a class="code hl_struct" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html">BaseMMAFrag</a>&lt;T, 8, 8&gt; {</div>
<div class="line"><a id="l00034" name="l00034"></a><span class="lineno"> 34</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> kFragRows = 8;</div>
<div class="line"><a id="l00035" name="l00035"></a><span class="lineno"> 35</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> kFragCols = 8;</div>
<div class="line"><a id="l00036" name="l00036"></a><span class="lineno"> 36</span> </div>
<div class="line"><a id="l00037" name="l00037"></a><span class="lineno"> 37</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3c34dfdc944db110f4735f1b25307cf0">kElemsPerFrag</a> = (<a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a2fe53db449c692226f23f6b99fb2c0d4">kFragRows</a> * <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a211102315e2afbcfcd2e2c201b638e9f">kFragCols</a>) / 32;</div>
<div class="line"><a id="l00037" name="l00037"></a><span class="lineno"> 37</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> kElemsPerFrag = (kFragRows * kFragCols) / 32;</div>
<div class="line"><a id="l00038" name="l00038"></a><span class="lineno"> 38</span> </div>
<div class="line"><a id="l00039" name="l00039"></a><span class="lineno"> 39</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f">kElemRows</a> = 1;</div>
<div class="line"><a id="l00040" name="l00040"></a><span class="lineno"> 40</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a> = 2;</div>
<div class="line"><a id="l00039" name="l00039"></a><span class="lineno"> 39</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> kElemRows = 1;</div>
<div class="line"><a id="l00040" name="l00040"></a><span class="lineno"> 40</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> kElemCols = 2;</div>
<div class="line"><a id="l00041" name="l00041"></a><span class="lineno"> 41</span> </div>
<div class="line"><a id="l00042" name="l00042"></a><span class="lineno"> 42</span> <span class="keyword">static_assert</span>(</div>
<div class="line"><a id="l00043" name="l00043"></a><span class="lineno"> 43</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f">kElemRows</a> * <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a> == <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3c34dfdc944db110f4735f1b25307cf0">kElemsPerFrag</a>,</div>
<div class="line"><a id="l00043" name="l00043"></a><span class="lineno"> 43</span> kElemRows * kElemCols == kElemsPerFrag,</div>
<div class="line"><a id="l00044" name="l00044"></a><span class="lineno"> 44</span> <span class="stringliteral">&quot;MMAFrag shape is not consistent with MMAFrag size&quot;</span>);</div>
<div class="line"><a id="l00045" name="l00045"></a><span class="lineno"> 45</span> </div>
<div class="line"><a id="l00046" name="l00046"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9"> 46</a></span> <span class="keyword">typedef</span> metal::simdgroup_matrix&lt;T, kFragRows, kFragCols&gt; <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>;</div>
<div class="line"><a id="l00047" name="l00047"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b"> 47</a></span> <span class="keyword">typedef</span> metal::vec&lt;T, kElemsPerFrag&gt; <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>;</div>
<div class="line"><a id="l00046" name="l00046"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9"> 46</a></span> <span class="keyword">typedef</span> metal::simdgroup_matrix&lt;T, kFragRows, kFragCols&gt; <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>;</div>
<div class="line"><a id="l00047" name="l00047"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b"> 47</a></span> <span class="keyword">typedef</span> metal::vec&lt;T, kElemsPerFrag&gt; <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>;</div>
<div class="line"><a id="l00048" name="l00048"></a><span class="lineno"> 48</span> </div>
<div class="foldopen" id="foldopen00049" data-start="{" data-end="}">
<div class="line"><a id="l00049" name="l00049"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7331fff1d12f2f8b72b0006a3ad0dd83"> 49</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> short2 <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7331fff1d12f2f8b72b0006a3ad0dd83">get_coord</a>(ushort simd_lane_id</div>
<div class="line"><a id="l00049" name="l00049"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7331fff1d12f2f8b72b0006a3ad0dd83"> 49</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> short2 <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7331fff1d12f2f8b72b0006a3ad0dd83">get_coord</a>(ushort simd_lane_id</div>
<div class="line"><a id="l00050" name="l00050"></a><span class="lineno"> 50</span> [[thread_index_in_simdgroup]]) {</div>
<div class="line"><a id="l00051" name="l00051"></a><span class="lineno"> 51</span> <span class="keyword">const</span> <span class="keywordtype">short</span> qid = simd_lane_id / 4;</div>
<div class="line"><a id="l00052" name="l00052"></a><span class="lineno"> 52</span> <span class="keyword">const</span> <span class="keywordtype">short</span> fm = (qid &amp; 4) + ((simd_lane_id / 2) % 4);</div>
@@ -167,12 +164,12 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00057" name="l00057"></a><span class="lineno"> 57</span> <span class="keyword">template</span> &lt;<span class="keyword">typename</span> SrcPtrType, <span class="keyword">typename</span> StrX, <span class="keyword">typename</span> StrY&gt;</div>
<div class="line"><a id="l00058" name="l00058"></a><span class="lineno"> 58</span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span></div>
<div class="foldopen" id="foldopen00059" data-start="{" data-end="}">
<div class="line"><a id="l00059" name="l00059"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#ac73006b36fc710feda3a7c796e21415c"> 59</a></span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#ac73006b36fc710feda3a7c796e21415c">load</a>(thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; dst, SrcPtrType src, StrX str_x, StrY str_y) {</div>
<div class="line"><a id="l00059" name="l00059"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#ac73006b36fc710feda3a7c796e21415c"> 59</a></span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#ac73006b36fc710feda3a7c796e21415c">load</a>(thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; dst, SrcPtrType src, StrX str_x, StrY str_y) {</div>
<div class="line"><a id="l00060" name="l00060"></a><span class="lineno"> 60</span> <a class="code hl_define" href="steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6">STEEL_PRAGMA_UNROLL</a></div>
<div class="line"><a id="l00061" name="l00061"></a><span class="lineno"> 61</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> i = 0; i &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f">kElemRows</a>; i++) {</div>
<div class="line"><a id="l00061" name="l00061"></a><span class="lineno"> 61</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> i = 0; i &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a76aa5aa690dbcc954e957d767fad661f">kElemRows</a>; i++) {</div>
<div class="line"><a id="l00062" name="l00062"></a><span class="lineno"> 62</span> <a class="code hl_define" href="steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6">STEEL_PRAGMA_UNROLL</a></div>
<div class="line"><a id="l00063" name="l00063"></a><span class="lineno"> 63</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> j = 0; j &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a>; j++) {</div>
<div class="line"><a id="l00064" name="l00064"></a><span class="lineno"> 64</span> dst[i * <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a> + j] = <span class="keyword">static_cast&lt;</span>T<span class="keyword">&gt;</span>(src[i * str_x + j * str_y]);</div>
<div class="line"><a id="l00063" name="l00063"></a><span class="lineno"> 63</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> j = 0; j &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a>; j++) {</div>
<div class="line"><a id="l00064" name="l00064"></a><span class="lineno"> 64</span> dst[i * <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a> + j] = <span class="keyword">static_cast&lt;</span>T<span class="keyword">&gt;</span>(src[i * str_x + j * str_y]);</div>
<div class="line"><a id="l00065" name="l00065"></a><span class="lineno"> 65</span> }</div>
<div class="line"><a id="l00066" name="l00066"></a><span class="lineno"> 66</span> }</div>
<div class="line"><a id="l00067" name="l00067"></a><span class="lineno"> 67</span> }</div>
@@ -187,8 +184,8 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00075" name="l00075"></a><span class="lineno"> 75</span> <span class="keyword">typename</span> OffX,</div>
<div class="line"><a id="l00076" name="l00076"></a><span class="lineno"> 76</span> <span class="keyword">typename</span> OffY&gt;</div>
<div class="foldopen" id="foldopen00077" data-start="{" data-end="}">
<div class="line"><a id="l00077" name="l00077"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#ad22aaee4a2938cbdd315b39eda84e07d"> 77</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#ad22aaee4a2938cbdd315b39eda84e07d">load_safe</a>(</div>
<div class="line"><a id="l00078" name="l00078"></a><span class="lineno"> 78</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; dst,</div>
<div class="line"><a id="l00077" name="l00077"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#ad22aaee4a2938cbdd315b39eda84e07d"> 77</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#ad22aaee4a2938cbdd315b39eda84e07d">load_safe</a>(</div>
<div class="line"><a id="l00078" name="l00078"></a><span class="lineno"> 78</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; dst,</div>
<div class="line"><a id="l00079" name="l00079"></a><span class="lineno"> 79</span> SrcPtrType src,</div>
<div class="line"><a id="l00080" name="l00080"></a><span class="lineno"> 80</span> StrX str_x,</div>
<div class="line"><a id="l00081" name="l00081"></a><span class="lineno"> 81</span> StrY str_y,</div>
@@ -214,14 +211,14 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00100" name="l00100"></a><span class="lineno"> 100</span> <span class="keyword">template</span> &lt;<span class="keyword">typename</span> DstPtrType, <span class="keyword">typename</span> StrX, <span class="keyword">typename</span> StrY&gt;</div>
<div class="line"><a id="l00101" name="l00101"></a><span class="lineno"> 101</span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span></div>
<div class="foldopen" id="foldopen00102" data-start="{" data-end="}">
<div class="line"><a id="l00102" name="l00102"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#aa8f50ea8961ec5b35c1b81366d64f2cb"> 102</a></span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#aa8f50ea8961ec5b35c1b81366d64f2cb">store</a>(<span class="keyword">const</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; src, DstPtrType dst, StrX str_x, StrY str_y) {</div>
<div class="line"><a id="l00102" name="l00102"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#aa8f50ea8961ec5b35c1b81366d64f2cb"> 102</a></span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#aa8f50ea8961ec5b35c1b81366d64f2cb">store</a>(<span class="keyword">const</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; src, DstPtrType dst, StrX str_x, StrY str_y) {</div>
<div class="line"><a id="l00103" name="l00103"></a><span class="lineno"> 103</span> <span class="keyword">using </span>U = <a class="code hl_typedef" href="namespacemetal.html#ac82ee6c3fbe9ec5c78c07329424aaec9">pointer_element_t&lt;DstPtrType&gt;</a>;</div>
<div class="line"><a id="l00104" name="l00104"></a><span class="lineno"> 104</span> </div>
<div class="line"><a id="l00105" name="l00105"></a><span class="lineno"> 105</span> <a class="code hl_define" href="steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6">STEEL_PRAGMA_UNROLL</a></div>
<div class="line"><a id="l00106" name="l00106"></a><span class="lineno"> 106</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> i = 0; i &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f">kElemRows</a>; i++) {</div>
<div class="line"><a id="l00106" name="l00106"></a><span class="lineno"> 106</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> i = 0; i &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a76aa5aa690dbcc954e957d767fad661f">kElemRows</a>; i++) {</div>
<div class="line"><a id="l00107" name="l00107"></a><span class="lineno"> 107</span> <a class="code hl_define" href="steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6">STEEL_PRAGMA_UNROLL</a></div>
<div class="line"><a id="l00108" name="l00108"></a><span class="lineno"> 108</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> j = 0; j &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a>; j++) {</div>
<div class="line"><a id="l00109" name="l00109"></a><span class="lineno"> 109</span> dst[i * str_x + j * str_y] = <span class="keyword">static_cast&lt;</span>U<span class="keyword">&gt;</span>(src[i * <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a> + j]);</div>
<div class="line"><a id="l00108" name="l00108"></a><span class="lineno"> 108</span> <span class="keywordflow">for</span> (<span class="keywordtype">short</span> j = 0; j &lt; <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a>; j++) {</div>
<div class="line"><a id="l00109" name="l00109"></a><span class="lineno"> 109</span> dst[i * str_x + j * str_y] = <span class="keyword">static_cast&lt;</span>U<span class="keyword">&gt;</span>(src[i * <a class="code hl_variable" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7c212200d86b4e93f274d99addf668bd">kElemCols</a> + j]);</div>
<div class="line"><a id="l00110" name="l00110"></a><span class="lineno"> 110</span> }</div>
<div class="line"><a id="l00111" name="l00111"></a><span class="lineno"> 111</span> }</div>
<div class="line"><a id="l00112" name="l00112"></a><span class="lineno"> 112</span> }</div>
@@ -236,8 +233,8 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00120" name="l00120"></a><span class="lineno"> 120</span> <span class="keyword">typename</span> OffX,</div>
<div class="line"><a id="l00121" name="l00121"></a><span class="lineno"> 121</span> <span class="keyword">typename</span> OffY&gt;</div>
<div class="foldopen" id="foldopen00122" data-start="{" data-end="}">
<div class="line"><a id="l00122" name="l00122"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1f0b00daad8eba2f855bb306e70d2328"> 122</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1f0b00daad8eba2f855bb306e70d2328">store_safe</a>(</div>
<div class="line"><a id="l00123" name="l00123"></a><span class="lineno"> 123</span> <span class="keyword">const</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; src,</div>
<div class="line"><a id="l00122" name="l00122"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a1f0b00daad8eba2f855bb306e70d2328"> 122</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a1f0b00daad8eba2f855bb306e70d2328">store_safe</a>(</div>
<div class="line"><a id="l00123" name="l00123"></a><span class="lineno"> 123</span> <span class="keyword">const</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; src,</div>
<div class="line"><a id="l00124" name="l00124"></a><span class="lineno"> 124</span> DstPtrType dst,</div>
<div class="line"><a id="l00125" name="l00125"></a><span class="lineno"> 125</span> StrX str_x,</div>
<div class="line"><a id="l00126" name="l00126"></a><span class="lineno"> 126</span> StrY str_y,</div>
@@ -261,45 +258,43 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
</div>
<div class="line"><a id="l00144" name="l00144"></a><span class="lineno"> 144</span> </div>
<div class="foldopen" id="foldopen00145" data-start="{" data-end="}">
<div class="line"><a id="l00145" name="l00145"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3"> 145</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3">mma</a>(</div>
<div class="line"><a id="l00146" name="l00146"></a><span class="lineno"> 146</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; D,</div>
<div class="line"><a id="l00147" name="l00147"></a><span class="lineno"> 147</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; A,</div>
<div class="line"><a id="l00148" name="l00148"></a><span class="lineno"> 148</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; B,</div>
<div class="line"><a id="l00149" name="l00149"></a><span class="lineno"> 149</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; C) {</div>
<div class="line"><a id="l00150" name="l00150"></a><span class="lineno"> 150</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> D_mat;</div>
<div class="line"><a id="l00151" name="l00151"></a><span class="lineno"> 151</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> A_mat;</div>
<div class="line"><a id="l00152" name="l00152"></a><span class="lineno"> 152</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> B_mat;</div>
<div class="line"><a id="l00153" name="l00153"></a><span class="lineno"> 153</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> C_mat;</div>
<div class="line"><a id="l00145" name="l00145"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8028512f5a3d2b6acaf966be529627a3"> 145</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8028512f5a3d2b6acaf966be529627a3">mma</a>(</div>
<div class="line"><a id="l00146" name="l00146"></a><span class="lineno"> 146</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; D,</div>
<div class="line"><a id="l00147" name="l00147"></a><span class="lineno"> 147</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; A,</div>
<div class="line"><a id="l00148" name="l00148"></a><span class="lineno"> 148</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; B,</div>
<div class="line"><a id="l00149" name="l00149"></a><span class="lineno"> 149</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp; C) {</div>
<div class="line"><a id="l00150" name="l00150"></a><span class="lineno"> 150</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> D_mat;</div>
<div class="line"><a id="l00151" name="l00151"></a><span class="lineno"> 151</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> A_mat;</div>
<div class="line"><a id="l00152" name="l00152"></a><span class="lineno"> 152</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> B_mat;</div>
<div class="line"><a id="l00153" name="l00153"></a><span class="lineno"> 153</span> <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a> C_mat;</div>
<div class="line"><a id="l00154" name="l00154"></a><span class="lineno"> 154</span> </div>
<div class="line"><a id="l00155" name="l00155"></a><span class="lineno"> 155</span> <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(A_mat.thread_elements()) = A;</div>
<div class="line"><a id="l00156" name="l00156"></a><span class="lineno"> 156</span> <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(B_mat.thread_elements()) = B;</div>
<div class="line"><a id="l00157" name="l00157"></a><span class="lineno"> 157</span> <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(C_mat.thread_elements()) = C;</div>
<div class="line"><a id="l00155" name="l00155"></a><span class="lineno"> 155</span> <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(A_mat.thread_elements()) = A;</div>
<div class="line"><a id="l00156" name="l00156"></a><span class="lineno"> 156</span> <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(B_mat.thread_elements()) = B;</div>
<div class="line"><a id="l00157" name="l00157"></a><span class="lineno"> 157</span> <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(C_mat.thread_elements()) = C;</div>
<div class="line"><a id="l00158" name="l00158"></a><span class="lineno"> 158</span> </div>
<div class="line"><a id="l00159" name="l00159"></a><span class="lineno"> 159</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3">mma</a>(D_mat, A_mat, B_mat, C_mat);</div>
<div class="line"><a id="l00159" name="l00159"></a><span class="lineno"> 159</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8028512f5a3d2b6acaf966be529627a3">mma</a>(D_mat, A_mat, B_mat, C_mat);</div>
<div class="line"><a id="l00160" name="l00160"></a><span class="lineno"> 160</span> </div>
<div class="line"><a id="l00161" name="l00161"></a><span class="lineno"> 161</span> D = <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(D_mat.thread_elements());</div>
<div class="line"><a id="l00161" name="l00161"></a><span class="lineno"> 161</span> D = <span class="keyword">reinterpret_cast&lt;</span>thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">frag_type</a>&amp;<span class="keyword">&gt;</span>(D_mat.thread_elements());</div>
<div class="line"><a id="l00162" name="l00162"></a><span class="lineno"> 162</span> }</div>
</div>
<div class="line"><a id="l00163" name="l00163"></a><span class="lineno"> 163</span> </div>
<div class="foldopen" id="foldopen00164" data-start="{" data-end="}">
<div class="line"><a id="l00164" name="l00164"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1868f57d57c8adedab2c58492ec76946"> 164</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1868f57d57c8adedab2c58492ec76946">mma</a>(</div>
<div class="line"><a id="l00165" name="l00165"></a><span class="lineno"> 165</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; D,</div>
<div class="line"><a id="l00166" name="l00166"></a><span class="lineno"> 166</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; A,</div>
<div class="line"><a id="l00167" name="l00167"></a><span class="lineno"> 167</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; B,</div>
<div class="line"><a id="l00168" name="l00168"></a><span class="lineno"> 168</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; C) {</div>
<div class="line"><a id="l00164" name="l00164"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a1868f57d57c8adedab2c58492ec76946"> 164</a></span> METAL_FUNC <span class="keyword">static</span> <span class="keyword">constexpr</span> <span class="keywordtype">void</span> <a class="code hl_function" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a1868f57d57c8adedab2c58492ec76946">mma</a>(</div>
<div class="line"><a id="l00165" name="l00165"></a><span class="lineno"> 165</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; D,</div>
<div class="line"><a id="l00166" name="l00166"></a><span class="lineno"> 166</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; A,</div>
<div class="line"><a id="l00167" name="l00167"></a><span class="lineno"> 167</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; B,</div>
<div class="line"><a id="l00168" name="l00168"></a><span class="lineno"> 168</span> thread <a class="code hl_typedef" href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mat_type</a>&amp; C) {</div>
<div class="line"><a id="l00169" name="l00169"></a><span class="lineno"> 169</span> simdgroup_multiply_accumulate(D, A, B, C);</div>
<div class="line"><a id="l00170" name="l00170"></a><span class="lineno"> 170</span> }</div>
</div>
<div class="line"><a id="l00171" name="l00171"></a><span class="lineno"> 171</span>};</div>
</div>
<div class="line"><a id="l00172" name="l00172"></a><span class="lineno"> 172</span> </div>
<div class="line"><a id="l00173" name="l00173"></a><span class="lineno"> 173</span><span class="keyword">template</span> &lt;</div>
<div class="line"><a id="l00174" name="l00174"></a><span class="lineno"> 174</span> <span class="keyword">typename</span> T,</div>
<div class="line"><a id="l00175" name="l00175"></a><span class="lineno"> 175</span> <span class="keywordtype">int</span> kTileRows_,</div>
<div class="line"><a id="l00176" name="l00176"></a><span class="lineno"> 176</span> <span class="keywordtype">int</span> kTileCols_,</div>
<div class="line"><a id="l00177" name="l00177"></a><span class="lineno"> 177</span> <span class="keyword">class </span>MMAFrag_ = BaseMMAFrag&lt;T, 8, 8&gt;&gt;</div>
<div class="foldopen" id="foldopen00178" data-start="{" data-end="};">
<div class="line"><a id="l00178" name="l00178"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_m_m_a_tile.html"> 178</a></span><span class="keyword">struct </span><a class="code hl_function" href="structmlx_1_1steel_1_1_m_m_a_tile.html#aa3fb310dd08ec23c334511f7b316d1b6">MMATile</a> {</div>
<div class="line"><a id="l00178" name="l00178"></a><span class="lineno"> 178</span><span class="keyword">struct </span>MMATile {</div>
<div class="line"><a id="l00179" name="l00179"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_m_m_a_tile.html#abe33de70e34300745bad9aa822fd0382"> 179</a></span> <span class="keyword">using </span><a class="code hl_typedef" href="structmlx_1_1steel_1_1_m_m_a_tile.html#abe33de70e34300745bad9aa822fd0382">MMAFrag_t</a> = MMAFrag_;</div>
<div class="line"><a id="l00180" name="l00180"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c"> 180</a></span> <span class="keyword">using </span><a class="code hl_typedef" href="structmlx_1_1steel_1_1_m_m_a_tile.html#a80078f0dfa4c225e79d9b460202d5e2c">elem_type</a> = T;</div>
<div class="line"><a id="l00181" name="l00181"></a><span class="lineno"> 181</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">int</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_m_m_a_tile.html#a594142f957ffb99296a243f7af7b59e7">kFragRows</a> = MMAFrag_t::kFragRows;</div>
@@ -484,7 +479,6 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00337" name="l00337"></a><span class="lineno"> 337</span> }</div>
</div>
<div class="line"><a id="l00338" name="l00338"></a><span class="lineno"> 338</span>};</div>
</div>
<div class="line"><a id="l00339" name="l00339"></a><span class="lineno"> 339</span> </div>
<div class="line"><a id="l00340" name="l00340"></a><span class="lineno"> 340</span><span class="keyword">template</span> &lt;<span class="keyword">typename</span> T, <span class="keyword">typename</span> U, <span class="keywordtype">int</span> M, <span class="keywordtype">int</span> N, <span class="keywordtype">int</span> K&gt;</div>
<div class="line"><a id="l00341" name="l00341"></a><span class="lineno"> 341</span>METAL_FUNC <span class="keywordtype">void</span> <a class="code hl_function" href="namespacemlx_1_1steel.html#ad583e6038efc119542410f43b603d4ad">tile_matmad</a>(</div>
@@ -523,8 +517,7 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00374" name="l00374"></a><span class="lineno"> 374</span> <span class="keywordtype">short</span> ldb_tgp,</div>
<div class="line"><a id="l00375" name="l00375"></a><span class="lineno"> 375</span> <span class="keyword">typename</span> AccumType = float,</div>
<div class="line"><a id="l00376" name="l00376"></a><span class="lineno"> 376</span> <span class="keyword">typename</span> Epilogue = <a class="code hl_struct" href="structmlx_1_1steel_1_1_transform_none.html">TransformNone&lt;U, AccumType&gt;</a>&gt;</div>
<div class="foldopen" id="foldopen00377" data-start="{" data-end="};">
<div class="line"><a id="l00377" name="l00377"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_block_m_m_a.html"> 377</a></span><span class="keyword">struct </span><a class="code hl_function" href="structmlx_1_1steel_1_1_block_m_m_a.html#aa14406b7298456ac45d23dd3c4642dd8">BlockMMA</a> {</div>
<div class="line"><a id="l00377" name="l00377"></a><span class="lineno"> 377</span><span class="keyword">struct </span><a class="code hl_struct" href="structmlx_1_1steel_1_1_block_m_m_a.html">BlockMMA</a> {</div>
<div class="line"><a id="l00378" name="l00378"></a><span class="lineno"> 378</span> <span class="comment">// MMAFrag size</span></div>
<div class="line"><a id="l00379" name="l00379"></a><span class="lineno"> 379</span> <a class="code hl_define" href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a> <span class="keywordtype">short</span> <a class="code hl_variable" href="structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d">kFragSize</a> = 8;</div>
<div class="line"><a id="l00380" name="l00380"></a><span class="lineno"><a class="line" href="structmlx_1_1steel_1_1_block_m_m_a.html#ae2c42cb6d0dde785859164c195f4d13c"> 380</a></span> <span class="keyword">using </span><a class="code hl_typedef" href="structmlx_1_1steel_1_1_block_m_m_a.html#ae2c42cb6d0dde785859164c195f4d13c">MMAFrag_acc_t</a> = <a class="code hl_struct" href="structmlx_1_1steel_1_1_base_m_m_a_frag.html">BaseMMAFrag&lt;AccumType, kFragSize, kFragSize&gt;</a>;</div>
@@ -817,7 +810,6 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="line"><a id="l00650" name="l00650"></a><span class="lineno"> 650</span> }</div>
</div>
<div class="line"><a id="l00651" name="l00651"></a><span class="lineno"> 651</span>};</div>
</div>
<div class="line"><a id="l00652" name="l00652"></a><span class="lineno"> 652</span> </div>
<div class="line"><a id="l00653" name="l00653"></a><span class="lineno"> 653</span>} <span class="comment">// namespace steel</span></div>
<div class="line"><a id="l00654" name="l00654"></a><span class="lineno"> 654</span>} <span class="comment">// namespace mlx</span></div>
@@ -832,21 +824,19 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="ttc" id="asteel_2defines_8h_html"><div class="ttname"><a href="steel_2defines_8h.html">defines.h</a></div></div>
<div class="ttc" id="asteel_2defines_8h_html_a5a5c3095b132a7589bc19cd5cb80e2c6"><div class="ttname"><a href="steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6">STEEL_PRAGMA_UNROLL</a></div><div class="ttdeci">#define STEEL_PRAGMA_UNROLL</div><div class="ttdef"><b>Definition</b> defines.h:4</div></div>
<div class="ttc" id="asteel_2defines_8h_html_a90b91c866313ffa46eff6d9cc944ad2b"><div class="ttname"><a href="steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b">STEEL_CONST</a></div><div class="ttdeci">#define STEEL_CONST</div><div class="ttdef"><b>Definition</b> defines.h:3</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html">mlx::steel::BaseMMAFrag</a></div><div class="ttdef"><b>Definition</b> mma.h:23</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a1868f57d57c8adedab2c58492ec76946"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1868f57d57c8adedab2c58492ec76946">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::mma</a></div><div class="ttdeci">static METAL_FUNC constexpr void mma(thread mat_type &amp;D, thread mat_type &amp;A, thread mat_type &amp;B, thread mat_type &amp;C)</div><div class="ttdef"><b>Definition</b> mma.h:164</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a1f0b00daad8eba2f855bb306e70d2328"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a1f0b00daad8eba2f855bb306e70d2328">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::store_safe</a></div><div class="ttdeci">static METAL_FUNC constexpr void store_safe(const thread frag_type &amp;src, DstPtrType dst, StrX str_x, StrY str_y, LimX lim_x, LimY lim_y, OffX off_x=Int&lt; 0 &gt;{}, OffY off_y=Int&lt; 0 &gt;{})</div><div class="ttdef"><b>Definition</b> mma.h:122</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a211102315e2afbcfcd2e2c201b638e9f"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a211102315e2afbcfcd2e2c201b638e9f">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::kFragCols</a></div><div class="ttdeci">STEEL_CONST int kFragCols</div><div class="ttdef"><b>Definition</b> mma.h:49</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a25675ae18947a97c6e04157b540103a9"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a25675ae18947a97c6e04157b540103a9">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::mat_type</a></div><div class="ttdeci">metal::simdgroup_matrix&lt; T, kFragRows, kFragCols &gt; mat_type</div><div class="ttdef"><b>Definition</b> mma.h:60</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a2fe53db449c692226f23f6b99fb2c0d4"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a2fe53db449c692226f23f6b99fb2c0d4">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::kFragRows</a></div><div class="ttdeci">STEEL_CONST int kFragRows</div><div class="ttdef"><b>Definition</b> mma.h:48</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a3c34dfdc944db110f4735f1b25307cf0"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a3c34dfdc944db110f4735f1b25307cf0">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::kElemsPerFrag</a></div><div class="ttdeci">STEEL_CONST int kElemsPerFrag</div><div class="ttdef"><b>Definition</b> mma.h:51</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a7331fff1d12f2f8b72b0006a3ad0dd83"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7331fff1d12f2f8b72b0006a3ad0dd83">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::get_coord</a></div><div class="ttdeci">static METAL_FUNC constexpr short2 get_coord(ushort simd_lane_id)</div><div class="ttdef"><b>Definition</b> mma.h:49</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a76aa5aa690dbcc954e957d767fad661f"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a76aa5aa690dbcc954e957d767fad661f">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::kElemRows</a></div><div class="ttdeci">STEEL_CONST int kElemRows</div><div class="ttdef"><b>Definition</b> mma.h:53</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a7c212200d86b4e93f274d99addf668bd"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a7c212200d86b4e93f274d99addf668bd">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::kElemCols</a></div><div class="ttdeci">STEEL_CONST int kElemCols</div><div class="ttdef"><b>Definition</b> mma.h:54</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a8028512f5a3d2b6acaf966be529627a3"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8028512f5a3d2b6acaf966be529627a3">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::mma</a></div><div class="ttdeci">static METAL_FUNC constexpr void mma(thread frag_type &amp;D, thread frag_type &amp;A, thread frag_type &amp;B, thread frag_type &amp;C)</div><div class="ttdef"><b>Definition</b> mma.h:145</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_a8536bfaa108031c2ea3e9ccdc766ee5b"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#a8536bfaa108031c2ea3e9ccdc766ee5b">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::frag_type</a></div><div class="ttdeci">metal::vec&lt; T, kElemsPerFrag &gt; frag_type</div><div class="ttdef"><b>Definition</b> mma.h:61</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_aa8f50ea8961ec5b35c1b81366d64f2cb"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#aa8f50ea8961ec5b35c1b81366d64f2cb">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::store</a></div><div class="ttdeci">static METAL_FUNC constexpr void store(const thread frag_type &amp;src, DstPtrType dst, StrX str_x, StrY str_y)</div><div class="ttdef"><b>Definition</b> mma.h:102</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_ac73006b36fc710feda3a7c796e21415c"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#ac73006b36fc710feda3a7c796e21415c">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::load</a></div><div class="ttdeci">static METAL_FUNC constexpr void load(thread frag_type &amp;dst, SrcPtrType src, StrX str_x, StrY str_y)</div><div class="ttdef"><b>Definition</b> mma.h:59</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html_ad22aaee4a2938cbdd315b39eda84e07d"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html#ad22aaee4a2938cbdd315b39eda84e07d">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::load_safe</a></div><div class="ttdeci">static METAL_FUNC constexpr void load_safe(thread frag_type &amp;dst, SrcPtrType src, StrX str_x, StrY str_y, LimX lim_x, LimY lim_y, OffX off_x=Int&lt; 0 &gt;{}, OffY off_y=Int&lt; 0 &gt;{})</div><div class="ttdef"><b>Definition</b> mma.h:77</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a1868f57d57c8adedab2c58492ec76946"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a1868f57d57c8adedab2c58492ec76946">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::mma</a></div><div class="ttdeci">static METAL_FUNC constexpr void mma(thread mat_type &amp;D, thread mat_type &amp;A, thread mat_type &amp;B, thread mat_type &amp;C)</div><div class="ttdef"><b>Definition</b> mma.h:164</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a1f0b00daad8eba2f855bb306e70d2328"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a1f0b00daad8eba2f855bb306e70d2328">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::store_safe</a></div><div class="ttdeci">static METAL_FUNC constexpr void store_safe(const thread frag_type &amp;src, DstPtrType dst, StrX str_x, StrY str_y, LimX lim_x, LimY lim_y, OffX off_x=Int&lt; 0 &gt;{}, OffY off_y=Int&lt; 0 &gt;{})</div><div class="ttdef"><b>Definition</b> mma.h:122</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a25675ae18947a97c6e04157b540103a9"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a25675ae18947a97c6e04157b540103a9">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::mat_type</a></div><div class="ttdeci">metal::simdgroup_matrix&lt; T, kFragRows, kFragCols &gt; mat_type</div><div class="ttdef"><b>Definition</b> mma.h:60</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a7331fff1d12f2f8b72b0006a3ad0dd83"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7331fff1d12f2f8b72b0006a3ad0dd83">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::get_coord</a></div><div class="ttdeci">static METAL_FUNC constexpr short2 get_coord(ushort simd_lane_id)</div><div class="ttdef"><b>Definition</b> mma.h:49</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a76aa5aa690dbcc954e957d767fad661f"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a76aa5aa690dbcc954e957d767fad661f">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::kElemRows</a></div><div class="ttdeci">STEEL_CONST int kElemRows</div><div class="ttdef"><b>Definition</b> mma.h:53</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a7c212200d86b4e93f274d99addf668bd"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a7c212200d86b4e93f274d99addf668bd">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::kElemCols</a></div><div class="ttdeci">STEEL_CONST int kElemCols</div><div class="ttdef"><b>Definition</b> mma.h:54</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a8028512f5a3d2b6acaf966be529627a3"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8028512f5a3d2b6acaf966be529627a3">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::mma</a></div><div class="ttdeci">static METAL_FUNC constexpr void mma(thread frag_type &amp;D, thread frag_type &amp;A, thread frag_type &amp;B, thread frag_type &amp;C)</div><div class="ttdef"><b>Definition</b> mma.h:145</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_a8536bfaa108031c2ea3e9ccdc766ee5b"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#a8536bfaa108031c2ea3e9ccdc766ee5b">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::frag_type</a></div><div class="ttdeci">metal::vec&lt; T, kElemsPerFrag &gt; frag_type</div><div class="ttdef"><b>Definition</b> mma.h:61</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_aa8f50ea8961ec5b35c1b81366d64f2cb"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#aa8f50ea8961ec5b35c1b81366d64f2cb">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::store</a></div><div class="ttdeci">static METAL_FUNC constexpr void store(const thread frag_type &amp;src, DstPtrType dst, StrX str_x, StrY str_y)</div><div class="ttdef"><b>Definition</b> mma.h:102</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_ac73006b36fc710feda3a7c796e21415c"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#ac73006b36fc710feda3a7c796e21415c">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::load</a></div><div class="ttdeci">static METAL_FUNC constexpr void load(thread frag_type &amp;dst, SrcPtrType src, StrX str_x, StrY str_y)</div><div class="ttdef"><b>Definition</b> mma.h:59</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4_html_ad22aaee4a2938cbdd315b39eda84e07d"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag_3_01_t_00_018_00_018_01_4.html#ad22aaee4a2938cbdd315b39eda84e07d">mlx::steel::BaseMMAFrag&lt; T, 8, 8 &gt;::load_safe</a></div><div class="ttdeci">static METAL_FUNC constexpr void load_safe(thread frag_type &amp;dst, SrcPtrType src, StrX str_x, StrY str_y, LimX lim_x, LimY lim_y, OffX off_x=Int&lt; 0 &gt;{}, OffY off_y=Int&lt; 0 &gt;{})</div><div class="ttdef"><b>Definition</b> mma.h:77</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_base_m_m_a_frag_html"><div class="ttname"><a href="structmlx_1_1steel_1_1_base_m_m_a_frag.html">mlx::steel::BaseMMAFrag</a></div><div class="ttdef"><b>Definition</b> mma.h:37</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html">mlx::steel::BlockMMA</a></div><div class="ttdef"><b>Definition</b> mma.h:449</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_a0461451ffb5041b6a916ea17ed34288b"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#a0461451ffb5041b6a916ea17ed34288b">mlx::steel::BlockMMA::store_result</a></div><div class="ttdeci">METAL_FUNC void store_result(device U *D, const int ldd)</div><div class="ttdef"><b>Definition</b> mma.h:464</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_a081ba538d30d1d02498a7f341e6bd611"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#a081ba538d30d1d02498a7f341e6bd611">mlx::steel::BlockMMA::store_result_safe</a></div><div class="ttdeci">METAL_FUNC void store_result_safe(device U *D, const int ldd, short2 dst_tile_dims)</div><div class="ttdef"><b>Definition</b> mma.h:478</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_a138ed1bbad2ca88d3a3c7d162cd36562"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#a138ed1bbad2ca88d3a3c7d162cd36562">mlx::steel::BlockMMA&lt; T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue &gt;::As_offset</a></div><div class="ttdeci">short As_offset</div><div class="ttdef"><b>Definition</b> mma.h:485</div></div>
@@ -865,7 +855,7 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_a8fddaa78913cdc8eea5e1cf7d2776330"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#a8fddaa78913cdc8eea5e1cf7d2776330">mlx::steel::BlockMMA&lt; T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue &gt;::tile_stride_a</a></div><div class="ttdeci">STEEL_CONST short tile_stride_a</div><div class="ttdef"><b>Definition</b> mma.h:473</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_a92f6aeee432f53638447eac842f43eca"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#a92f6aeee432f53638447eac842f43eca">mlx::steel::BlockMMA&lt; T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue &gt;::Bs_offset</a></div><div class="ttdeci">short Bs_offset</div><div class="ttdef"><b>Definition</b> mma.h:486</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_a9e48f2d51099ec00171506724faab54a"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#a9e48f2d51099ec00171506724faab54a">mlx::steel::BlockMMA::apply_epilogue_safe</a></div><div class="ttdeci">METAL_FUNC void apply_epilogue_safe(const device U *C, const int ldc, const int fdc, short2 dst_tile_dims, thread const BinaryEpilogue &amp;epilogue_op)</div><div class="ttdef"><b>Definition</b> mma.h:535</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_aa14406b7298456ac45d23dd3c4642dd8"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#aa14406b7298456ac45d23dd3c4642dd8">mlx::steel::BlockMMA::BlockMMA</a></div><div class="ttdeci">METAL_FUNC BlockMMA(ushort simd_group_id, ushort simd_lane_id)</div><div class="ttdef"><b>Definition</b> mma.h:489</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_aa14406b7298456ac45d23dd3c4642dd8"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#aa14406b7298456ac45d23dd3c4642dd8">mlx::steel::BlockMMA::BlockMMA</a></div><div class="ttdeci">METAL_FUNC BlockMMA(ushort simd_group_id, ushort simd_lane_id)</div><div class="ttdef"><b>Definition</b> mma.h:417</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_aa71400922babd388177f228c2c82b211"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#aa71400922babd388177f228c2c82b211">mlx::steel::BlockMMA&lt; T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue &gt;::B_str_k</a></div><div class="ttdeci">STEEL_CONST short B_str_k</div><div class="ttdef"><b>Definition</b> mma.h:469</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_aa85451edf6900fd6af164d4d50889ae3"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#aa85451edf6900fd6af164d4d50889ae3">mlx::steel::BlockMMA&lt; T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue &gt;::sm</a></div><div class="ttdeci">short sm</div><div class="ttdef"><b>Definition</b> mma.h:482</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_ab9c7f5386594497f5f4df7e59670b877"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#ab9c7f5386594497f5f4df7e59670b877">mlx::steel::BlockMMA&lt; T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue &gt;::A_str_m</a></div><div class="ttdeci">STEEL_CONST short A_str_m</div><div class="ttdef"><b>Definition</b> mma.h:465</div></div>
@@ -875,7 +865,7 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_ae3f35453b3afbaac9df64ad5966b34a4"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#ae3f35453b3afbaac9df64ad5966b34a4">mlx::steel::BlockMMA&lt; T, U, BM, BN, BK, WM, WN, transpose_a, transpose_b, transpose_a ? BM+tgp_padding_a :BK+tgp_padding_a, transpose_b ? BK+tgp_padding_b :BN+tgp_padding_b, AccumType, Epilogue &gt;::tile_stride_b</a></div><div class="ttdeci">STEEL_CONST short tile_stride_b</div><div class="ttdef"><b>Definition</b> mma.h:474</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_aee8caec45c1f9e4428586effbfe6137d"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#aee8caec45c1f9e4428586effbfe6137d">mlx::steel::BlockMMA::kFragSize</a></div><div class="ttdeci">STEEL_CONST short kFragSize</div><div class="ttdef"><b>Definition</b> mma.h:451</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_block_m_m_a_html_af653c0808ba4fa9a25286f1febb7baff"><div class="ttname"><a href="structmlx_1_1steel_1_1_block_m_m_a.html#af653c0808ba4fa9a25286f1febb7baff">mlx::steel::BlockMMA::apply_epilogue</a></div><div class="ttdeci">METAL_FUNC void apply_epilogue(thread const UnaryEpilogue &amp;epilogue_op)</div><div class="ttdef"><b>Definition</b> mma.h:497</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html">mlx::steel::MMATile</a></div><div class="ttdef"><b>Definition</b> mma.h:178</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html">mlx::steel::MMATile</a></div><div class="ttdef"><b>Definition</b> mma.h:223</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_a1a6b1446e8c8da46885bbaa8e8fdc7e4"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#a1a6b1446e8c8da46885bbaa8e8fdc7e4">mlx::steel::MMATile::frag_at</a></div><div class="ttdeci">METAL_FUNC constexpr thread frag_type &amp; frag_at(const short i, const short j)</div><div class="ttdef"><b>Definition</b> mma.h:208</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_a1d126b14910385ab644e224ac1d0307a"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#a1d126b14910385ab644e224ac1d0307a">mlx::steel::MMATile::kTileRows</a></div><div class="ttdeci">STEEL_CONST int kTileRows</div><div class="ttdef"><b>Definition</b> mma.h:230</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_a1eeb197c9bdf4db42892a39cdb9bd73a"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#a1eeb197c9bdf4db42892a39cdb9bd73a">mlx::steel::MMATile::mat_type</a></div><div class="ttdeci">MMAFrag_t::mat_type mat_type</div><div class="ttdef"><b>Definition</b> mma.h:242</div></div>
@@ -892,7 +882,7 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_a948784652e93830887ee8ad506ec3257"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#a948784652e93830887ee8ad506ec3257">mlx::steel::MMATile::kCols</a></div><div class="ttdeci">STEEL_CONST int kCols</div><div class="ttdef"><b>Definition</b> mma.h:234</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_a98357339ec98f804a1b12597937b318f"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#a98357339ec98f804a1b12597937b318f">mlx::steel::MMATile::kElemsPerTile</a></div><div class="ttdeci">STEEL_CONST int kElemsPerTile</div><div class="ttdef"><b>Definition</b> mma.h:237</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_aa3a4af67813908109da08ce7352f82da"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#aa3a4af67813908109da08ce7352f82da">mlx::steel::MMATile::load_safe</a></div><div class="ttdeci">METAL_FUNC void load_safe(const device U *src, const int ld, const short2 src_tile_dims)</div><div class="ttdef"><b>Definition</b> mma.h:301</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_aa3fb310dd08ec23c334511f7b316d1b6"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#aa3fb310dd08ec23c334511f7b316d1b6">mlx::steel::MMATile::MMATile</a></div><div class="ttdeci">METAL_FUNC MMATile() thread</div><div class="ttdef"><b>Definition</b> mma.h:247</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_aa3fb310dd08ec23c334511f7b316d1b6"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#aa3fb310dd08ec23c334511f7b316d1b6">mlx::steel::MMATile::MMATile</a></div><div class="ttdeci">METAL_FUNC MMATile() thread</div><div class="ttdef"><b>Definition</b> mma.h:199</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_aa5426c6beabfb3ee41b58f01b3392a96"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#aa5426c6beabfb3ee41b58f01b3392a96">mlx::steel::MMATile::load</a></div><div class="ttdeci">METAL_FUNC void load(const threadgroup U *src)</div><div class="ttdef"><b>Definition</b> mma.h:236</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_aa97a98e423827a889c13a92217626ec7"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#aa97a98e423827a889c13a92217626ec7">mlx::steel::MMATile::clear</a></div><div class="ttdeci">METAL_FUNC constexpr void clear()</div><div class="ttdef"><b>Definition</b> mma.h:201</div></div>
<div class="ttc" id="astructmlx_1_1steel_1_1_m_m_a_tile_html_aa9e484d8cae936503898d5b772c573f9"><div class="ttname"><a href="structmlx_1_1steel_1_1_m_m_a_tile.html#aa9e484d8cae936503898d5b772c573f9">mlx::steel::MMATile::load</a></div><div class="ttdeci">METAL_FUNC void load(const device U *src, const int ld)</div><div class="ttdef"><b>Definition</b> mma.h:270</div></div>
@@ -910,7 +900,7 @@ $(function(){initNavTree('gemm_2mma_8h_source.html',''); initResizable(true); })
<div id="nav-path" class="navpath"><!-- id is needed for treeview function! -->
<ul>
<li class="navelem"><a class="el" href="dir_938ab0ecf10b8b860ff766c820f665fd.html">mlx</a></li><li class="navelem"><a class="el" href="dir_1d446c9bd3c99228254c9484e0bc5c06.html">backend</a></li><li class="navelem"><a class="el" href="dir_d0c977ea65824390717cdb7efc36c157.html">metal</a></li><li class="navelem"><a class="el" href="dir_70a37effa88bcbd6b791977fa1e64356.html">kernels</a></li><li class="navelem"><a class="el" href="dir_76215a6c54e2b67053e723fc2395583c.html">steel</a></li><li class="navelem"><a class="el" href="dir_6768c99e6145fb9510ccdb40db8ede25.html">gemm</a></li><li class="navelem"><a class="el" href="gemm_2mma_8h.html">mma.h</a></li>
<li class="footer">Generated by <a href="https://www.doxygen.org/index.html"><img class="footer" src="doxygen.svg" width="104" height="31" alt="doxygen"/></a> 1.13.1 </li>
<li class="footer">Generated by <a href="https://www.doxygen.org/index.html"><img class="footer" src="doxygen.svg" width="104" height="31" alt="doxygen"/></a> 1.13.2 </li>
</ul>
</div>
</body>