2024-10-15 23:12:17 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
2025-01-10 05:56:20 +08:00
< meta name = "generator" content = "Doxygen 1.13.1" / >
2024-10-15 23:12:17 +08:00
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > MLX: mlx/backend/metal/kernels/steel/gemm/loader.h Source File< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< link href = "navtree.css" rel = "stylesheet" type = "text/css" / >
2025-01-10 05:56:20 +08:00
< script type = "text/javascript" src = "navtreedata.js" > < / script >
< script type = "text/javascript" src = "navtree.js" > < / script >
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" src = "resize.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
2025-01-10 05:56:20 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { init_search(); });
/* @license-end */
< / script >
2024-10-15 23:12:17 +08:00
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
2025-01-10 05:56:20 +08:00
< td > < div id = "MSearchBox" class = "MSearchBoxInactive" >
< span class = "left" >
< span id = "MSearchSelect" onmouseover = "return searchBox.OnSearchSelectShow()" onmouseout = "return searchBox.OnSearchSelectHide()" >   < / span >
< input type = "text" id = "MSearchField" value = "" placeholder = "Search" accesskey = "S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
< / span > < span class = "right" >
< a id = "MSearchClose" href = "javascript:searchBox.CloseResultsWindow()" > < img id = "MSearchCloseImg" border = "0" src = "search/close.svg" alt = "" / > < / a >
< / span >
< / div >
< / td >
2024-10-15 23:12:17 +08:00
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
2025-01-10 05:56:20 +08:00
<!-- Generated by Doxygen 1.13.1 -->
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { codefold.init(0); });
/* @license-end */
< / script >
2025-01-10 05:56:20 +08:00
< / div > <!-- top -->
< div id = "side-nav" class = "ui-resizable side-nav-resizable" >
< div id = "nav-tree" >
< div id = "nav-tree-contents" >
< div id = "nav-sync" class = "sync" > < / div >
< / div >
< / div >
< div id = "splitbar" style = "-moz-user-select:none;"
class="ui-resizable-handle">
< / div >
< / div >
2024-10-15 23:12:17 +08:00
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
2025-01-10 05:56:20 +08:00
$(function(){initNavTree('gemm_2loader_8h_source.html',''); initResizable(true); });
2024-10-15 23:12:17 +08:00
/* @license-end */
< / script >
2025-01-10 05:56:20 +08:00
< div id = "doc-content" >
2024-10-15 23:12:17 +08:00
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div class = "header" >
< div class = "headertitle" > < div class = "title" > loader.h< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< a href = "gemm_2loader_8h.html" > Go to the documentation of this file.< / a > < div class = "fragment" > < div class = "line" > < a id = "l00001" name = "l00001" > < / a > < span class = "lineno" > 1< / span > < span class = "comment" > // Copyright © 2024 Apple Inc.< / span > < / div >
< div class = "line" > < a id = "l00002" name = "l00002" > < / a > < span class = "lineno" > 2< / span > < / div >
< div class = "line" > < a id = "l00003" name = "l00003" > < / a > < span class = "lineno" > 3< / span > < span class = "preprocessor" > #pragma once< / span > < / div >
< div class = "line" > < a id = "l00004" name = "l00004" > < / a > < span class = "lineno" > 4< / span > < / div >
< div class = "line" > < a id = "l00005" name = "l00005" > < / a > < span class = "lineno" > 5< / span > < span class = "preprocessor" > #include " < a class = "code" href = "steel_2defines_8h.html" > mlx/backend/metal/kernels/steel/defines.h< / a > " < / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00006" name = "l00006" > < / a > < span class = "lineno" > 6< / span > < / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00008" name = "l00008" > < / a > < span class = "lineno" > 8< / span > < span class = "comment" > // Loading helper< / span > < / div >
< div class = "line" > < a id = "l00010" name = "l00010" > < / a > < span class = "lineno" > 10< / span > < / div >
< div class = "line" > < a id = "l00011" name = "l00011" > < / a > < span class = "lineno" > 11< / span > < span class = "keyword" > namespace < / span > < a class = "code hl_namespace" href = "namespacemlx.html" > mlx< / a > {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00012" name = "l00012" > < / a > < span class = "lineno" > 12< / span > < span class = "keyword" > namespace < / span > < a class = "code hl_namespace" href = "namespacemlx_1_1steel.html" > steel< / a > {< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00013" name = "l00013" > < / a > < span class = "lineno" > 13< / span > < / div >
< div class = "line" > < a id = "l00014" name = "l00014" > < / a > < span class = "lineno" > 14< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00015" name = "l00015" > < / a > < span class = "lineno" > 15< / span > < span class = "keyword" > typename< / span > T,< / div >
< div class = "line" > < a id = "l00016" name = "l00016" > < / a > < span class = "lineno" > 16< / span > < span class = "keywordtype" > short< / span > BROWS,< / div >
< div class = "line" > < a id = "l00017" name = "l00017" > < / a > < span class = "lineno" > 17< / span > < span class = "keywordtype" > short< / span > BCOLS,< / div >
< div class = "line" > < a id = "l00018" name = "l00018" > < / a > < span class = "lineno" > 18< / span > < span class = "keywordtype" > short< / span > dst_ld,< / div >
< div class = "line" > < a id = "l00019" name = "l00019" > < / a > < span class = "lineno" > 19< / span > < span class = "keywordtype" > short< / span > reduction_dim,< / div >
< div class = "line" > < a id = "l00020" name = "l00020" > < / a > < span class = "lineno" > 20< / span > < span class = "keywordtype" > short< / span > tgp_size,< / div >
< div class = "line" > < a id = "l00021" name = "l00021" > < / a > < span class = "lineno" > 21< / span > < span class = "keywordtype" > short< / span > alignment = 1,< / div >
< div class = "line" > < a id = "l00022" name = "l00022" > < / a > < span class = "lineno" > 22< / span > < span class = "keywordtype" > short< / span > n_reads = (BCOLS * BROWS) / (tgp_size),< / div >
< div class = "line" > < a id = "l00023" name = "l00023" > < / a > < span class = "lineno" > 23< / span > < span class = "keywordtype" > short< / span > TCOLS = BCOLS / n_reads,< / div >
< div class = "line" > < a id = "l00024" name = "l00024" > < / a > < span class = "lineno" > 24< / span > < span class = "keywordtype" > short< / span > TROWS = tgp_size / TCOLS> < / div >
< div class = "foldopen" id = "foldopen00025" data-start = "{" data-end = "};" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00025" name = "l00025" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_loader.html" > 25< / a > < / span > < span class = "keyword" > struct < / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_loader.html#a37aca066e63dff238865b5923a2d4335" > BlockLoader< / a > {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00026" name = "l00026" > < / a > < span class = "lineno" > 26< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a973804e5b1d418c98c90861cda1a6fb5" > n_rows< / a > = (BROWS + TROWS - 1) / TROWS;< / div >
< div class = "line" > < a id = "l00027" name = "l00027" > < / a > < span class = "lineno" > 27< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > = n_reads;< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00028" name = "l00028" > < / a > < span class = "lineno" > 28< / span > < / div >
< div class = "line" > < a id = "l00029" name = "l00029" > < / a > < span class = "lineno" > 29< / span > < span class = "comment" > // Leading dimension for src< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00030" name = "l00030" > < / a > < span class = "lineno" > 30< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#aadafc50f7f06af434149d7469df4714d" > src_ld< / a > ;< / div >
< div class = "line" > < a id = "l00031" name = "l00031" > < / a > < span class = "lineno" > 31< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ab87876699d55473620c7ea99f9da911d" > tile_stride< / a > ;< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00032" name = "l00032" > < / a > < span class = "lineno" > 32< / span > < / div >
< div class = "line" > < a id = "l00033" name = "l00033" > < / a > < span class = "lineno" > 33< / span > < span class = "comment" > // Thread location indices< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00034" name = "l00034" > < / a > < span class = "lineno" > 34< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a064e2cc77e0b1cf0f8027929e031775b" > thread_idx< / a > ;< / div >
< div class = "line" > < a id = "l00035" name = "l00035" > < / a > < span class = "lineno" > 35< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a9ef13742bcdf07532d8f09394928a8af" > bi< / a > ;< / div >
< div class = "line" > < a id = "l00036" name = "l00036" > < / a > < span class = "lineno" > 36< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a78c326e75ee35a484685771143047cd4" > bj< / a > ;< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00037" name = "l00037" > < / a > < span class = "lineno" > 37< / span > < / div >
< div class = "line" > < a id = "l00038" name = "l00038" > < / a > < span class = "lineno" > 38< / span > < span class = "comment" > // threadgroup and device memory< / span > < / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00039" name = "l00039" > < / a > < span class = "lineno" > 39< / span > threadgroup T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > dst< / a > ;< / div >
< div class = "line" > < a id = "l00040" name = "l00040" > < / a > < span class = "lineno" > 40< / span > < span class = "keyword" > const< / span > device T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ad1db14517568ae9eddfb6986ef31c7aa" > src< / a > ;< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00041" name = "l00041" > < / a > < span class = "lineno" > 41< / span > < / div >
< div class = "foldopen" id = "foldopen00042" data-start = "{" data-end = "};" >
< div class = "line" > < a id = "l00042" name = "l00042" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_loader_1_1_read_vector.html" > 42< / a > < / span > < span class = "keyword" > struct < / span > < span class = "keyword" > alignas< / span > (alignment * sizeof(T)) < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_block_loader_1_1_read_vector.html" > ReadVector< / a > {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00043" name = "l00043" > < / a > < span class = "lineno" > 43< / span > uint8_t < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader_1_1_read_vector.html#a20963f7191251defca48bf8a843d019d" > v< / a > [< span class = "keyword" > sizeof< / span > (T) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ];< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00044" name = "l00044" > < / a > < span class = "lineno" > 44< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00045" name = "l00045" > < / a > < span class = "lineno" > 45< / span > < / div >
< div class = "line" > < a id = "l00046" name = "l00046" > < / a > < span class = "lineno" > 46< / span > < span class = "comment" > /* Constructor */< / span > < / div >
< div class = "foldopen" id = "foldopen00047" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00047" name = "l00047" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_loader.html#a37aca066e63dff238865b5923a2d4335" > 47< / a > < / span > METAL_FUNC < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_loader.html#a37aca066e63dff238865b5923a2d4335" > BlockLoader< / a > (< / div >
< div class = "line" > < a id = "l00048" name = "l00048" > < / a > < span class = "lineno" > 48< / span > < span class = "keyword" > const< / span > device T* src_,< / div >
< div class = "line" > < a id = "l00049" name = "l00049" > < / a > < span class = "lineno" > 49< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > src_ld_,< / div >
< div class = "line" > < a id = "l00050" name = "l00050" > < / a > < span class = "lineno" > 50< / span > threadgroup T* dst_,< / div >
< div class = "line" > < a id = "l00051" name = "l00051" > < / a > < span class = "lineno" > 51< / span > ushort simd_group_id [[simdgroup_index_in_threadgroup]],< / div >
< div class = "line" > < a id = "l00052" name = "l00052" > < / a > < span class = "lineno" > 52< / span > ushort simd_lane_id [[thread_index_in_simdgroup]])< / div >
< div class = "line" > < a id = "l00053" name = "l00053" > < / a > < span class = "lineno" > 53< / span > : < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#aadafc50f7f06af434149d7469df4714d" > src_ld< / a > (src_ld_),< / div >
< div class = "line" > < a id = "l00054" name = "l00054" > < / a > < span class = "lineno" > 54< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ab87876699d55473620c7ea99f9da911d" > tile_stride< / a > (reduction_dim ? BCOLS : BROWS * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#aadafc50f7f06af434149d7469df4714d" > src_ld< / a > ),< / div >
< div class = "line" > < a id = "l00055" name = "l00055" > < / a > < span class = "lineno" > 55< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a064e2cc77e0b1cf0f8027929e031775b" > thread_idx< / a > (simd_group_id * 32 + simd_lane_id),< / div >
< div class = "line" > < a id = "l00056" name = "l00056" > < / a > < span class = "lineno" > 56< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a9ef13742bcdf07532d8f09394928a8af" > bi< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a064e2cc77e0b1cf0f8027929e031775b" > thread_idx< / a > / TCOLS),< / div >
< div class = "line" > < a id = "l00057" name = "l00057" > < / a > < span class = "lineno" > 57< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a78c326e75ee35a484685771143047cd4" > bj< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > * (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a064e2cc77e0b1cf0f8027929e031775b" > thread_idx< / a > % TCOLS)),< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00058" name = "l00058" > < / a > < span class = "lineno" > 58< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > dst< / a > (dst_ + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a9ef13742bcdf07532d8f09394928a8af" > bi< / a > * dst_ld + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a78c326e75ee35a484685771143047cd4" > bj< / a > ),< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00059" name = "l00059" > < / a > < span class = "lineno" > 59< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ad1db14517568ae9eddfb6986ef31c7aa" > src< / a > (src_ + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a9ef13742bcdf07532d8f09394928a8af" > bi< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#aadafc50f7f06af434149d7469df4714d" > src_ld< / a > + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a78c326e75ee35a484685771143047cd4" > bj< / a > ) {}< / div >
2024-10-15 23:12:17 +08:00
< / div >
< div class = "line" > < a id = "l00060" name = "l00060" > < / a > < span class = "lineno" > 60< / span > < / div >
< div class = "line" > < a id = "l00061" name = "l00061" > < / a > < span class = "lineno" > 61< / span > < span class = "comment" > /* Apply operation to threadgroup without bound checking */< / span > < / div >
< div class = "line" > < a id = "l00062" name = "l00062" > < / a > < span class = "lineno" > 62< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > UnaryOp> < / div >
< div class = "foldopen" id = "foldopen00063" data-start = "{" data-end = "}" >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00063" name = "l00063" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_loader.html#adb4ca2cc193630a779de552fa8847ddf" > 63< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_loader.html#adb4ca2cc193630a779de552fa8847ddf" > apply_inplace_op< / a > (thread < span class = "keyword" > const< / span > UnaryOp& op)< span class = "keyword" > const < / span > {< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00064" name = "l00064" > < / a > < span class = "lineno" > 64< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00065" name = "l00065" > < / a > < span class = "lineno" > 65< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < BROWS; i += TROWS) {< / div >
< div class = "line" > < a id = "l00066" name = "l00066" > < / a > < span class = "lineno" > 66< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00067" name = "l00067" > < / a > < span class = "lineno" > 67< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ; j++) {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00068" name = "l00068" > < / a > < span class = "lineno" > 68< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > dst< / a > [i * dst_ld + j] = op.apply(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > dst< / a > [i * dst_ld + j]);< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00069" name = "l00069" > < / a > < span class = "lineno" > 69< / span > }< / div >
< div class = "line" > < a id = "l00070" name = "l00070" > < / a > < span class = "lineno" > 70< / span > }< / div >
< div class = "line" > < a id = "l00071" name = "l00071" > < / a > < span class = "lineno" > 71< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00072" name = "l00072" > < / a > < span class = "lineno" > 72< / span > < / div >
< div class = "line" > < a id = "l00073" name = "l00073" > < / a > < span class = "lineno" > 73< / span > < span class = "comment" > /* Load from device memory into threadgroup memory - without bound checking */< / span > < / div >
< div class = "foldopen" id = "foldopen00074" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00074" name = "l00074" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_loader.html#a6c9e27f11f48b34580ed2c7e9cad9a27" > 74< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_loader.html#a6c9e27f11f48b34580ed2c7e9cad9a27" > load_unsafe< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00075" name = "l00075" > < / a > < span class = "lineno" > 75< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00076" name = "l00076" > < / a > < span class = "lineno" > 76< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < BROWS; i += TROWS) {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00077" name = "l00077" > < / a > < span class = "lineno" > 77< / span > *((threadgroup ReadVector*)(& < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > dst< / a > [i * dst_ld])) =< / div >
< div class = "line" > < a id = "l00078" name = "l00078" > < / a > < span class = "lineno" > 78< / span > *((< span class = "keyword" > const< / span > device ReadVector*)(& < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ad1db14517568ae9eddfb6986ef31c7aa" > src< / a > [i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#aadafc50f7f06af434149d7469df4714d" > src_ld< / a > ]));< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00079" name = "l00079" > < / a > < span class = "lineno" > 79< / span > }< / div >
< div class = "line" > < a id = "l00080" name = "l00080" > < / a > < span class = "lineno" > 80< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00081" name = "l00081" > < / a > < span class = "lineno" > 81< / span > < / div >
< div class = "line" > < a id = "l00082" name = "l00082" > < / a > < span class = "lineno" > 82< / span > < span class = "comment" > /* Load from device memory into threadgroup memory - with bound checking */< / span > < / div >
< div class = "foldopen" id = "foldopen00083" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00083" name = "l00083" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_loader.html#abb0f4f66ec8b123627beb8eb4fbb609d" > 83< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_loader.html#abb0f4f66ec8b123627beb8eb4fbb609d" > load_safe< / a > (short2 src_tile_dim)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00084" name = "l00084" > < / a > < span class = "lineno" > 84< / span > src_tile_dim = src_tile_dim - short2(< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a78c326e75ee35a484685771143047cd4" > bj< / a > , < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a9ef13742bcdf07532d8f09394928a8af" > bi< / a > );< / div >
< div class = "line" > < a id = "l00085" name = "l00085" > < / a > < span class = "lineno" > 85< / span > < / div >
< div class = "line" > < a id = "l00086" name = "l00086" > < / a > < span class = "lineno" > 86< / span > < span class = "comment" > // Skip loading if thread has no valid reads< / span > < / div >
< div class = "line" > < a id = "l00087" name = "l00087" > < / a > < span class = "lineno" > 87< / span > < span class = "keywordflow" > if< / span > (src_tile_dim.x < = 0 || src_tile_dim.y < = 0) {< / div >
< div class = "line" > < a id = "l00088" name = "l00088" > < / a > < span class = "lineno" > 88< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00089" name = "l00089" > < / a > < span class = "lineno" > 89< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < BROWS; i += TROWS) {< / div >
< div class = "line" > < a id = "l00090" name = "l00090" > < / a > < span class = "lineno" > 90< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00091" name = "l00091" > < / a > < span class = "lineno" > 91< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ; j++) {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00092" name = "l00092" > < / a > < span class = "lineno" > 92< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > dst< / a > [i * dst_ld + j] = T(0);< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00093" name = "l00093" > < / a > < span class = "lineno" > 93< / span > }< / div >
< div class = "line" > < a id = "l00094" name = "l00094" > < / a > < span class = "lineno" > 94< / span > }< / div >
< div class = "line" > < a id = "l00095" name = "l00095" > < / a > < span class = "lineno" > 95< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00096" name = "l00096" > < / a > < span class = "lineno" > 96< / span > }< / div >
< div class = "line" > < a id = "l00097" name = "l00097" > < / a > < span class = "lineno" > 97< / span > < / div >
< div class = "line" > < a id = "l00098" name = "l00098" > < / a > < span class = "lineno" > 98< / span > < span class = "comment" > // Use fast thread memory for bound checks< / span > < / div >
< div class = "line" > < a id = "l00099" name = "l00099" > < / a > < span class = "lineno" > 99< / span > < span class = "keywordtype" > bool< / span > tmp_idx[< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ];< / div >
< div class = "line" > < a id = "l00100" name = "l00100" > < / a > < span class = "lineno" > 100< / span > T tmp_val[< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ];< / div >
< div class = "line" > < a id = "l00101" name = "l00101" > < / a > < span class = "lineno" > 101< / span > < / div >
< div class = "line" > < a id = "l00102" name = "l00102" > < / a > < span class = "lineno" > 102< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00103" name = "l00103" > < / a > < span class = "lineno" > 103< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < BROWS; i += TROWS) {< / div >
< div class = "line" > < a id = "l00104" name = "l00104" > < / a > < span class = "lineno" > 104< / span > < span class = "comment" > // Make sure tmp_idx only contains valid indices< / span > < / div >
< div class = "line" > < a id = "l00105" name = "l00105" > < / a > < span class = "lineno" > 105< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00106" name = "l00106" > < / a > < span class = "lineno" > 106< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00107" name = "l00107" > < / a > < span class = "lineno" > 107< / span > tmp_idx[j] = (i < src_tile_dim.y) & & (j < src_tile_dim.x);< / div >
< div class = "line" > < a id = "l00108" name = "l00108" > < / a > < span class = "lineno" > 108< / span > }< / div >
< div class = "line" > < a id = "l00109" name = "l00109" > < / a > < span class = "lineno" > 109< / span > < / div >
< div class = "line" > < a id = "l00110" name = "l00110" > < / a > < span class = "lineno" > 110< / span > < span class = "comment" > // Read valid indices into tmp_val< / span > < / div >
< div class = "line" > < a id = "l00111" name = "l00111" > < / a > < span class = "lineno" > 111< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00112" name = "l00112" > < / a > < span class = "lineno" > 112< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ; j++) {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00113" name = "l00113" > < / a > < span class = "lineno" > 113< / span > tmp_val[j] = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ad1db14517568ae9eddfb6986ef31c7aa" > src< / a > [(tmp_idx[j] ? i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#aadafc50f7f06af434149d7469df4714d" > src_ld< / a > + j : 0)];< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00114" name = "l00114" > < / a > < span class = "lineno" > 114< / span > }< / div >
< div class = "line" > < a id = "l00115" name = "l00115" > < / a > < span class = "lineno" > 115< / span > < / div >
< div class = "line" > < a id = "l00116" name = "l00116" > < / a > < span class = "lineno" > 116< / span > < span class = "comment" > // Zero out uneeded values< / span > < / div >
< div class = "line" > < a id = "l00117" name = "l00117" > < / a > < span class = "lineno" > 117< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00118" name = "l00118" > < / a > < span class = "lineno" > 118< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00119" name = "l00119" > < / a > < span class = "lineno" > 119< / span > tmp_val[j] = tmp_idx[j] ? tmp_val[j] : T(0);< / div >
< div class = "line" > < a id = "l00120" name = "l00120" > < / a > < span class = "lineno" > 120< / span > }< / div >
< div class = "line" > < a id = "l00121" name = "l00121" > < / a > < span class = "lineno" > 121< / span > < / div >
< div class = "line" > < a id = "l00122" name = "l00122" > < / a > < span class = "lineno" > 122< / span > < span class = "comment" > // Copy values to threadgroup memory< / span > < / div >
< div class = "line" > < a id = "l00123" name = "l00123" > < / a > < span class = "lineno" > 123< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00124" name = "l00124" > < / a > < span class = "lineno" > 124< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > vec_size< / a > ; j++) {< / div >
2025-01-10 05:56:20 +08:00
< div class = "line" > < a id = "l00125" name = "l00125" > < / a > < span class = "lineno" > 125< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > dst< / a > [i * dst_ld + j] = tmp_val[j];< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00126" name = "l00126" > < / a > < span class = "lineno" > 126< / span > }< / div >
< div class = "line" > < a id = "l00127" name = "l00127" > < / a > < span class = "lineno" > 127< / span > }< / div >
< div class = "line" > < a id = "l00128" name = "l00128" > < / a > < span class = "lineno" > 128< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00129" name = "l00129" > < / a > < span class = "lineno" > 129< / span > < / div >
< div class = "line" > < a id = "l00130" name = "l00130" > < / a > < span class = "lineno" > 130< / span > < span class = "comment" > /* Iteration helper */< / span > < / div >
< div class = "foldopen" id = "foldopen00131" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00131" name = "l00131" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_block_loader.html#a6af21428f0e7c17b48ddedf4dd20a1e8" > 131< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_block_loader.html#a6af21428f0e7c17b48ddedf4dd20a1e8" > next< / a > () {< / div >
2024-11-23 04:24:16 +08:00
< div class = "line" > < a id = "l00132" name = "l00132" > < / a > < span class = "lineno" > 132< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ad1db14517568ae9eddfb6986ef31c7aa" > src< / a > += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_block_loader.html#ab87876699d55473620c7ea99f9da911d" > tile_stride< / a > ;< / div >
2024-10-15 23:12:17 +08:00
< div class = "line" > < a id = "l00133" name = "l00133" > < / a > < span class = "lineno" > 133< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00134" name = "l00134" > < / a > < span class = "lineno" > 134< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00135" name = "l00135" > < / a > < span class = "lineno" > 135< / span > < / div >
< div class = "line" > < a id = "l00136" name = "l00136" > < / a > < span class = "lineno" > 136< / span > } < span class = "comment" > // namespace steel< / span > < / div >
< div class = "line" > < a id = "l00137" name = "l00137" > < / a > < span class = "lineno" > 137< / span > } < span class = "comment" > // namespace mlx< / span > < / div >
2025-01-10 05:56:20 +08:00
< div class = "ttc" id = "anamespacemlx_1_1steel_html" > < div class = "ttname" > < a href = "namespacemlx_1_1steel.html" > mlx::steel< / a > < / div > < div class = "ttdef" > < b > Definition< / b > attn.h:19< / div > < / div >
2024-10-15 23:12:17 +08:00
< div class = "ttc" id = "anamespacemlx_html" > < div class = "ttname" > < a href = "namespacemlx.html" > mlx< / a > < / div > < div class = "ttdef" > < b > Definition< / b > allocator.h:7< / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html" > < div class = "ttname" > < a href = "steel_2defines_8h.html" > defines.h< / a > < / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html_a5a5c3095b132a7589bc19cd5cb80e2c6" > < div class = "ttname" > < a href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div > < div class = "ttdeci" > #define STEEL_PRAGMA_UNROLL< / div > < div class = "ttdef" > < b > Definition< / b > defines.h:4< / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html_a90b91c866313ffa46eff6d9cc944ad2b" > < div class = "ttname" > < a href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < / div > < div class = "ttdeci" > #define STEEL_CONST< / div > < div class = "ttdef" > < b > Definition< / b > defines.h:3< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_1_1_read_vector_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader_1_1_read_vector.html" > mlx::steel::BlockLoader::ReadVector< / a > < / div > < div class = "ttdef" > < b > Definition< / b > loader.h:42< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_1_1_read_vector_html_a20963f7191251defca48bf8a843d019d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader_1_1_read_vector.html#a20963f7191251defca48bf8a843d019d" > mlx::steel::BlockLoader::ReadVector::v< / a > < / div > < div class = "ttdeci" > uint8_t v[sizeof(T) *vec_size]< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:43< / div > < / div >
2024-10-15 23:12:17 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a064e2cc77e0b1cf0f8027929e031775b" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a064e2cc77e0b1cf0f8027929e031775b" > mlx::steel::BlockLoader::thread_idx< / a > < / div > < div class = "ttdeci" > const short thread_idx< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:34< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a37aca066e63dff238865b5923a2d4335" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a37aca066e63dff238865b5923a2d4335" > mlx::steel::BlockLoader::BlockLoader< / a > < / div > < div class = "ttdeci" > METAL_FUNC BlockLoader(const device T *src_, const int src_ld_, threadgroup T *dst_, ushort simd_group_id, ushort simd_lane_id)< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:47< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a58bdf9b9c81962733e22ecdeae28c092" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a58bdf9b9c81962733e22ecdeae28c092" > mlx::steel::BlockLoader::vec_size< / a > < / div > < div class = "ttdeci" > STEEL_CONST short vec_size< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:27< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a6af21428f0e7c17b48ddedf4dd20a1e8" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a6af21428f0e7c17b48ddedf4dd20a1e8" > mlx::steel::BlockLoader::next< / a > < / div > < div class = "ttdeci" > METAL_FUNC void next()< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:131< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a6c9e27f11f48b34580ed2c7e9cad9a27" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a6c9e27f11f48b34580ed2c7e9cad9a27" > mlx::steel::BlockLoader::load_unsafe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_unsafe() const< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:74< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a78c326e75ee35a484685771143047cd4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a78c326e75ee35a484685771143047cd4" > mlx::steel::BlockLoader::bj< / a > < / div > < div class = "ttdeci" > const short bj< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:36< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a973804e5b1d418c98c90861cda1a6fb5" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a973804e5b1d418c98c90861cda1a6fb5" > mlx::steel::BlockLoader::n_rows< / a > < / div > < div class = "ttdeci" > STEEL_CONST short n_rows< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:26< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_a9ef13742bcdf07532d8f09394928a8af" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#a9ef13742bcdf07532d8f09394928a8af" > mlx::steel::BlockLoader::bi< / a > < / div > < div class = "ttdeci" > const short bi< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:35< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_aadafc50f7f06af434149d7469df4714d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#aadafc50f7f06af434149d7469df4714d" > mlx::steel::BlockLoader::src_ld< / a > < / div > < div class = "ttdeci" > const int src_ld< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:30< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_ab87876699d55473620c7ea99f9da911d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#ab87876699d55473620c7ea99f9da911d" > mlx::steel::BlockLoader::tile_stride< / a > < / div > < div class = "ttdeci" > const int tile_stride< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:31< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_abb0f4f66ec8b123627beb8eb4fbb609d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#abb0f4f66ec8b123627beb8eb4fbb609d" > mlx::steel::BlockLoader::load_safe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_safe(short2 src_tile_dim) const< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:83< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_ad1db14517568ae9eddfb6986ef31c7aa" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#ad1db14517568ae9eddfb6986ef31c7aa" > mlx::steel::BlockLoader::src< / a > < / div > < div class = "ttdeci" > const device T * src< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:40< / div > < / div >
2024-10-15 23:12:17 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_adb4ca2cc193630a779de552fa8847ddf" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#adb4ca2cc193630a779de552fa8847ddf" > mlx::steel::BlockLoader::apply_inplace_op< / a > < / div > < div class = "ttdeci" > METAL_FUNC void apply_inplace_op(thread const UnaryOp & op) const< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:63< / div > < / div >
2024-11-23 04:24:16 +08:00
< div class = "ttc" id = "astructmlx_1_1steel_1_1_block_loader_html_af1c6c35a42e9da4408c1013ff1741bc2" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_block_loader.html#af1c6c35a42e9da4408c1013ff1741bc2" > mlx::steel::BlockLoader::dst< / a > < / div > < div class = "ttdeci" > threadgroup T * dst< / div > < div class = "ttdef" > < b > Definition< / b > loader.h:39< / div > < / div >
2024-10-15 23:12:17 +08:00
< / div > <!-- fragment --> < / div > <!-- contents -->
< / div > <!-- doc - content -->
2025-01-10 05:56:20 +08:00
<!-- start footer part -->
< div id = "nav-path" class = "navpath" > <!-- id is needed for treeview function! -->
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_d0c977ea65824390717cdb7efc36c157.html" > metal< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_70a37effa88bcbd6b791977fa1e64356.html" > kernels< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_76215a6c54e2b67053e723fc2395583c.html" > steel< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_6768c99e6145fb9510ccdb40db8ede25.html" > gemm< / a > < / li > < li class = "navelem" > < a class = "el" href = "gemm_2loader_8h.html" > loader.h< / a > < / li >
< li class = "footer" > Generated by < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.13.1 < / li >
< / ul >
< / div >
2024-10-15 23:12:17 +08:00
< / body >
< / html >