2024-06-07 11:28:06 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
< meta name = "generator" content = "Doxygen 1.10.0" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > MLX: mlx/backend/metal/kernels/steel/conv/loaders/loader_channel_l.h Source File< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
<!-- Generated by Doxygen 1.10.0 -->
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" src = "menudata.js" > < / script >
< script type = "text/javascript" src = "menu.js" > < / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() {
initMenu('',true,false,'search.php','Search');
$(function() { init_search(); });
});
/* @license-end */
< / script >
< div id = "main-nav" > < / div >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { codefold.init(0); });
/* @license-end */
< / script >
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div id = "nav-path" class = "navpath" >
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_d0c977ea65824390717cdb7efc36c157.html" > metal< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_70a37effa88bcbd6b791977fa1e64356.html" > kernels< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_76215a6c54e2b67053e723fc2395583c.html" > steel< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_df9494e83ef22ae6150a0e080d9709ed.html" > conv< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_ba4426224ef60f409462a2a12fa18f06.html" > loaders< / a > < / li > < / ul >
< / div >
< / div > <!-- top -->
< div class = "header" >
< div class = "headertitle" > < div class = "title" > loader_channel_l.h< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< a href = "loader__channel__l_8h.html" > Go to the documentation of this file.< / a > < div class = "fragment" > < div class = "line" > < a id = "l00001" name = "l00001" > < / a > < span class = "lineno" > 1< / span > < span class = "comment" > // Copyright © 2024 Apple Inc.< / span > < / div >
< div class = "line" > < a id = "l00002" name = "l00002" > < / a > < span class = "lineno" > 2< / span > < / div >
< div class = "line" > < a id = "l00003" name = "l00003" > < / a > < span class = "lineno" > 3< / span > < span class = "preprocessor" > #pragma once< / span > < / div >
< div class = "line" > < a id = "l00004" name = "l00004" > < / a > < span class = "lineno" > 4< / span > < / div >
< div class = "line" > < a id = "l00005" name = "l00005" > < / a > < span class = "lineno" > 5< / span > < span class = "preprocessor" > #include " < a class = "code" href = "backend_2metal_2kernels_2steel_2utils_8h.html" > mlx/backend/metal/kernels/steel/utils.h< / a > " < / span > < / div >
< div class = "line" > < a id = "l00006" name = "l00006" > < / a > < span class = "lineno" > 6< / span > < / div >
< div class = "line" > < a id = "l00007" name = "l00007" > < / a > < span class = "lineno" > 7< / span > < span class = "preprocessor" > #include " < a class = "code" href = "conv_2params_8h.html" > mlx/backend/metal/kernels/steel/conv/params.h< / a > " < / span > < / div >
< div class = "line" > < a id = "l00008" name = "l00008" > < / a > < span class = "lineno" > 8< / span > < / div >
< div class = "line" > < a id = "l00010" name = "l00010" > < / a > < span class = "lineno" > 10< / span > < span class = "comment" > // Loading helper< / span > < / div >
< div class = "line" > < a id = "l00012" name = "l00012" > < / a > < span class = "lineno" > 12< / span > < / div >
< div class = "line" > < a id = "l00013" name = "l00013" > < / a > < span class = "lineno" > 13< / span > < span class = "keyword" > namespace < / span > < a class = "code hl_namespace" href = "namespacemlx.html" > mlx< / a > {< / div >
< div class = "foldopen" id = "foldopen00014" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00014" name = "l00014" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1steel.html" > 14< / a > < / span > < span class = "keyword" > namespace < / span > steel {< / div >
< div class = "line" > < a id = "l00015" name = "l00015" > < / a > < span class = "lineno" > 15< / span > < / div >
< div class = "line" > < a id = "l00016" name = "l00016" > < / a > < span class = "lineno" > 16< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00017" name = "l00017" > < / a > < span class = "lineno" > 17< / span > < span class = "keyword" > typename< / span > T,< / div >
< div class = "line" > < a id = "l00018" name = "l00018" > < / a > < span class = "lineno" > 18< / span > < span class = "keywordtype" > short< / span > BM,< / div >
< div class = "line" > < a id = "l00019" name = "l00019" > < / a > < span class = "lineno" > 19< / span > < span class = "keywordtype" > short< / span > BN,< / div >
< div class = "line" > < a id = "l00020" name = "l00020" > < / a > < span class = "lineno" > 20< / span > < span class = "keywordtype" > short< / span > BK,< / div >
< div class = "line" > < a id = "l00021" name = "l00021" > < / a > < span class = "lineno" > 21< / span > < span class = "keywordtype" > short< / span > tgp_size,< / div >
< div class = "line" > < a id = "l00022" name = "l00022" > < / a > < span class = "lineno" > 22< / span > < span class = "keywordtype" > short< / span > tgp_padding = 0> < / div >
< div class = "foldopen" id = "foldopen00023" data-start = "{" data-end = "};" >
< div class = "line" > < a id = "l00023" name = "l00023" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html" > 23< / a > < / span > < span class = "keyword" > struct < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html" > Conv2DInputBlockLoaderLargeFilter< / a > {< / div >
< div class = "line" > < a id = "l00024" name = "l00024" > < / a > < span class = "lineno" > 24< / span > < span class = "comment" > // Destination dimensions< / span > < / div >
< div class = "line" > < a id = "l00025" name = "l00025" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ac070c6bd5be85b1ae805e18890db4fd4" > 25< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ac070c6bd5be85b1ae805e18890db4fd4" > BROWS< / a > = BM;< / div >
< div class = "line" > < a id = "l00026" name = "l00026" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a29fbeeacdf5b6feeb74815ced255fa5a" > 26< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a29fbeeacdf5b6feeb74815ced255fa5a" > BCOLS< / a > = BK;< / div >
< div class = "line" > < a id = "l00027" name = "l00027" > < / a > < span class = "lineno" > 27< / span > < / div >
< div class = "line" > < a id = "l00028" name = "l00028" > < / a > < span class = "lineno" > 28< / span > < span class = "comment" > // Read dimensions< / span > < / div >
< div class = "line" > < a id = "l00029" name = "l00029" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a91192d512e7a18c2d16a139065000959" > 29< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a91192d512e7a18c2d16a139065000959" > dst_ld< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a29fbeeacdf5b6feeb74815ced255fa5a" > BCOLS< / a > + tgp_padding;< / div >
< div class = "line" > < a id = "l00030" name = "l00030" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#adcc83bf6c02391cc2375e55c06a1c9a4" > 30< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#adcc83bf6c02391cc2375e55c06a1c9a4" > vec_size< / a > = tgp_size / (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ac070c6bd5be85b1ae805e18890db4fd4" > BROWS< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a29fbeeacdf5b6feeb74815ced255fa5a" > BCOLS< / a > ) > = 8 ? 8 : 4;< / div >
< div class = "line" > < a id = "l00031" name = "l00031" > < / a > < span class = "lineno" > 31< / span > < / div >
< div class = "line" > < a id = "l00032" name = "l00032" > < / a > < span class = "lineno" > 32< / span > < span class = "comment" > // Thread read shape< / span > < / div >
< div class = "line" > < a id = "l00033" name = "l00033" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a0b5303f3258e0a21862dead8e3f5401e" > 33< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a0b5303f3258e0a21862dead8e3f5401e" > TCOLS< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a29fbeeacdf5b6feeb74815ced255fa5a" > BCOLS< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#adcc83bf6c02391cc2375e55c06a1c9a4" > vec_size< / a > ;< / div >
< div class = "line" > < a id = "l00034" name = "l00034" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a2528ff5ed472e4ed35415ada42276b07" > 34< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a2528ff5ed472e4ed35415ada42276b07" > TROWS< / a > = tgp_size / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a0b5303f3258e0a21862dead8e3f5401e" > TCOLS< / a > ;< / div >
< div class = "line" > < a id = "l00035" name = "l00035" > < / a > < span class = "lineno" > 35< / span > < / div >
< div class = "line" > < a id = "l00036" name = "l00036" > < / a > < span class = "lineno" > 36< / span > < span class = "comment" > // Rows / strided reads within the block< / span > < / div >
< div class = "line" > < a id = "l00037" name = "l00037" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > 37< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ac070c6bd5be85b1ae805e18890db4fd4" > BROWS< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a2528ff5ed472e4ed35415ada42276b07" > TROWS< / a > ;< / div >
< div class = "line" > < a id = "l00038" name = "l00038" > < / a > < span class = "lineno" > 38< / span > < / div >
< div class = "line" > < a id = "l00039" name = "l00039" > < / a > < span class = "lineno" > 39< / span > < span class = "comment" > // Thread location indices< / span > < / div >
< div class = "line" > < a id = "l00040" name = "l00040" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a70da26a715135d973f88371a70255be9" > 40< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a70da26a715135d973f88371a70255be9" > thread_idx< / a > ;< / div >
< div class = "line" > < a id = "l00041" name = "l00041" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8e53b0a9951cb840d922cc285b257ee3" > 41< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8e53b0a9951cb840d922cc285b257ee3" > bi< / a > ;< / div >
< div class = "line" > < a id = "l00042" name = "l00042" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a7ae9e41f50c0c63c35b63086a1c22cc3" > 42< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a7ae9e41f50c0c63c35b63086a1c22cc3" > bj< / a > ;< / div >
< div class = "line" > < a id = "l00043" name = "l00043" > < / a > < span class = "lineno" > 43< / span > < / div >
< div class = "line" > < a id = "l00044" name = "l00044" > < / a > < span class = "lineno" > 44< / span > < span class = "comment" > // threadgroup and device memory< / span > < / div >
< div class = "line" > < a id = "l00045" name = "l00045" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ae048eb79f8b8d98f0fe8805c30fbb09f" > 45< / a > < / span > threadgroup T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ae048eb79f8b8d98f0fe8805c30fbb09f" > dst< / a > ;< / div >
< div class = "line" > < a id = "l00046" name = "l00046" > < / a > < span class = "lineno" > 46< / span > < / div >
< div class = "line" > < a id = "l00047" name = "l00047" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > 47< / a > < / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "struct_m_l_x_conv_params.html" > MLXConvParams< 2> < / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > ;< / div >
< div class = "line" > < a id = "l00048" name = "l00048" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ab0724eb3ef52ee773b6607f6433b9f2c" > 48< / a > < / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html" > ImplicitGemmConv2DParams< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ab0724eb3ef52ee773b6607f6433b9f2c" > gemm_params< / a > ;< / div >
< div class = "line" > < a id = "l00049" name = "l00049" > < / a > < span class = "lineno" > 49< / span > < / div >
< div class = "line" > < a id = "l00050" name = "l00050" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3be4815d4090cb27ebe2f9bad1a39e95" > 50< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3be4815d4090cb27ebe2f9bad1a39e95" > weight_h< / a > ;< / div >
< div class = "line" > < a id = "l00051" name = "l00051" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#add1186c7accb62bfa8a4a7e87fc4cc84" > 51< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#add1186c7accb62bfa8a4a7e87fc4cc84" > weight_w< / a > ;< / div >
< div class = "line" > < a id = "l00052" name = "l00052" > < / a > < span class = "lineno" > 52< / span > < / div >
< div class = "line" > < a id = "l00053" name = "l00053" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > 53< / a > < / span > < span class = "keyword" > const< / span > device T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > src< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00054" name = "l00054" > < / a > < span class = "lineno" > 54< / span > < / div >
< div class = "line" > < a id = "l00055" name = "l00055" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#aeb67767e2d60d5ff0279a55553f3184e" > 55< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#aeb67767e2d60d5ff0279a55553f3184e" > read_n< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00056" name = "l00056" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a82dd8230e1f37500f1a562177c3ad692" > 56< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a82dd8230e1f37500f1a562177c3ad692" > read_ih< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00057" name = "l00057" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a17550360cae0a942a9552d7a67827512" > 57< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a17550360cae0a942a9552d7a67827512" > read_iw< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00058" name = "l00058" > < / a > < span class = "lineno" > 58< / span > < / div >
< div class = "line" > < a id = "l00059" name = "l00059" > < / a > < span class = "lineno" > 59< / span > < span class = "comment" > /* Constructor */< / span > < / div >
< div class = "foldopen" id = "foldopen00060" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00060" name = "l00060" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8755116a535539744e4947bc69f9c50f" > 60< / a > < / span > METAL_FUNC < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8755116a535539744e4947bc69f9c50f" > Conv2DInputBlockLoaderLargeFilter< / a > (< / div >
< div class = "line" > < a id = "l00061" name = "l00061" > < / a > < span class = "lineno" > 61< / span > < span class = "keyword" > const< / span > device T* src_,< / div >
< div class = "line" > < a id = "l00062" name = "l00062" > < / a > < span class = "lineno" > 62< / span > threadgroup T* dst_,< / div >
< div class = "line" > < a id = "l00063" name = "l00063" > < / a > < span class = "lineno" > 63< / span > < span class = "keyword" > const< / span > int2 offsets,< / div >
< div class = "line" > < a id = "l00064" name = "l00064" > < / a > < span class = "lineno" > 64< / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "struct_m_l_x_conv_params.html" > MLXConvParams< 2> < / a > * params_,< / div >
< div class = "line" > < a id = "l00065" name = "l00065" > < / a > < span class = "lineno" > 65< / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html" > ImplicitGemmConv2DParams< / a > * gemm_params_,< / div >
< div class = "line" > < a id = "l00066" name = "l00066" > < / a > < span class = "lineno" > 66< / span > uint simd_group_id [[simdgroup_index_in_threadgroup]],< / div >
< div class = "line" > < a id = "l00067" name = "l00067" > < / a > < span class = "lineno" > 67< / span > uint simd_lane_id [[thread_index_in_simdgroup]])< / div >
< div class = "line" > < a id = "l00068" name = "l00068" > < / a > < span class = "lineno" > 68< / span > : < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a70da26a715135d973f88371a70255be9" > thread_idx< / a > (simd_group_id * 32 + simd_lane_id),< / div >
< div class = "line" > < a id = "l00069" name = "l00069" > < / a > < span class = "lineno" > 69< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8e53b0a9951cb840d922cc285b257ee3" > bi< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a70da26a715135d973f88371a70255be9" > thread_idx< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a0b5303f3258e0a21862dead8e3f5401e" > TCOLS< / a > ),< / div >
< div class = "line" > < a id = "l00070" name = "l00070" > < / a > < span class = "lineno" > 70< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a7ae9e41f50c0c63c35b63086a1c22cc3" > bj< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#adcc83bf6c02391cc2375e55c06a1c9a4" > vec_size< / a > * (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a70da26a715135d973f88371a70255be9" > thread_idx< / a > % < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a0b5303f3258e0a21862dead8e3f5401e" > TCOLS< / a > )),< / div >
< div class = "line" > < a id = "l00071" name = "l00071" > < / a > < span class = "lineno" > 71< / span > dst(dst_ + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8e53b0a9951cb840d922cc285b257ee3" > bi< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a91192d512e7a18c2d16a139065000959" > dst_ld< / a > + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a7ae9e41f50c0c63c35b63086a1c22cc3" > bj< / a > ),< / div >
< div class = "line" > < a id = "l00072" name = "l00072" > < / a > < span class = "lineno" > 72< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > (params_),< / div >
< div class = "line" > < a id = "l00073" name = "l00073" > < / a > < span class = "lineno" > 73< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ab0724eb3ef52ee773b6607f6433b9f2c" > gemm_params< / a > (gemm_params_),< / div >
< div class = "line" > < a id = "l00074" name = "l00074" > < / a > < span class = "lineno" > 74< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3be4815d4090cb27ebe2f9bad1a39e95" > weight_h< / a > (0),< / div >
< div class = "line" > < a id = "l00075" name = "l00075" > < / a > < span class = "lineno" > 75< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#add1186c7accb62bfa8a4a7e87fc4cc84" > weight_w< / a > (0) {< / div >
< div class = "line" > < a id = "l00076" name = "l00076" > < / a > < span class = "lineno" > 76< / span > < span class = "keywordtype" > int< / span > out_n_pixels = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [0] * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [1];< / div >
< div class = "line" > < a id = "l00077" name = "l00077" > < / a > < span class = "lineno" > 77< / span > < / div >
< div class = "line" > < a id = "l00078" name = "l00078" > < / a > < span class = "lineno" > 78< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00079" name = "l00079" > < / a > < span class = "lineno" > 79< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00080" name = "l00080" > < / a > < span class = "lineno" > 80< / span > < span class = "keywordtype" > int< / span > offset_nhw = offsets.y + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8e53b0a9951cb840d922cc285b257ee3" > bi< / a > + i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a2528ff5ed472e4ed35415ada42276b07" > TROWS< / a > ;< / div >
< div class = "line" > < a id = "l00081" name = "l00081" > < / a > < span class = "lineno" > 81< / span > < span class = "keywordtype" > int< / span > n = offset_nhw / out_n_pixels;< / div >
< div class = "line" > < a id = "l00082" name = "l00082" > < / a > < span class = "lineno" > 82< / span > < span class = "keywordtype" > int< / span > hw = offset_nhw % out_n_pixels;< / div >
< div class = "line" > < a id = "l00083" name = "l00083" > < / a > < span class = "lineno" > 83< / span > < span class = "keywordtype" > int< / span > oh = hw / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [1];< / div >
< div class = "line" > < a id = "l00084" name = "l00084" > < / a > < span class = "lineno" > 84< / span > < span class = "keywordtype" > int< / span > ow = hw % < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [1];< / div >
< div class = "line" > < a id = "l00085" name = "l00085" > < / a > < span class = "lineno" > 85< / span > < / div >
< div class = "line" > < a id = "l00086" name = "l00086" > < / a > < span class = "lineno" > 86< / span > < span class = "keywordtype" > int< / span > ih = oh * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a862191e8ab1bc8a47aa1396b36d46058" > str< / a > [0] - < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ae84a9afb3a95b57e0b763bb0ebda0753" > pad< / a > [0];< / div >
< div class = "line" > < a id = "l00087" name = "l00087" > < / a > < span class = "lineno" > 87< / span > < span class = "keywordtype" > int< / span > iw = ow * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a862191e8ab1bc8a47aa1396b36d46058" > str< / a > [1] - < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ae84a9afb3a95b57e0b763bb0ebda0753" > pad< / a > [1];< / div >
< div class = "line" > < a id = "l00088" name = "l00088" > < / a > < span class = "lineno" > 88< / span > < / div >
< div class = "line" > < a id = "l00089" name = "l00089" > < / a > < span class = "lineno" > 89< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#aeb67767e2d60d5ff0279a55553f3184e" > read_n< / a > [i] = n;< / div >
< div class = "line" > < a id = "l00090" name = "l00090" > < / a > < span class = "lineno" > 90< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a82dd8230e1f37500f1a562177c3ad692" > read_ih< / a > [i] = ih;< / div >
< div class = "line" > < a id = "l00091" name = "l00091" > < / a > < span class = "lineno" > 91< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a17550360cae0a942a9552d7a67827512" > read_iw< / a > [i] = iw;< / div >
< div class = "line" > < a id = "l00092" name = "l00092" > < / a > < span class = "lineno" > 92< / span > < / div >
< div class = "line" > < a id = "l00093" name = "l00093" > < / a > < span class = "lineno" > 93< / span > < span class = "comment" > // Adjust for flip< / span > < / div >
< div class = "line" > < a id = "l00094" name = "l00094" > < / a > < span class = "lineno" > 94< / span > < span class = "keywordflow" > if< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a8b30cda15eda20f84f12db868f21d0ef" > flip< / a > ) {< / div >
< div class = "line" > < a id = "l00095" name = "l00095" > < / a > < span class = "lineno" > 95< / span > ih += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [0] - 1) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [0];< / div >
< div class = "line" > < a id = "l00096" name = "l00096" > < / a > < span class = "lineno" > 96< / span > iw += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [1] - 1) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [1];< / div >
< div class = "line" > < a id = "l00097" name = "l00097" > < / a > < span class = "lineno" > 97< / span > }< / div >
< div class = "line" > < a id = "l00098" name = "l00098" > < / a > < span class = "lineno" > 98< / span > < / div >
< div class = "line" > < a id = "l00099" name = "l00099" > < / a > < span class = "lineno" > 99< / span > < span class = "comment" > // Read from input if in bounds< / span > < / div >
< div class = "line" > < a id = "l00100" name = "l00100" > < / a > < span class = "lineno" > 100< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > src< / a > [i] = src_ + n * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ab25eade6573784985dbea1216f9068cf" > in_strides< / a > [0] + ih * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ab25eade6573784985dbea1216f9068cf" > in_strides< / a > [1] +< / div >
< div class = "line" > < a id = "l00101" name = "l00101" > < / a > < span class = "lineno" > 101< / span > iw * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ab25eade6573784985dbea1216f9068cf" > in_strides< / a > [2] + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a7ae9e41f50c0c63c35b63086a1c22cc3" > bj< / a > ;< / div >
< div class = "line" > < a id = "l00102" name = "l00102" > < / a > < span class = "lineno" > 102< / span > }< / div >
< div class = "line" > < a id = "l00103" name = "l00103" > < / a > < span class = "lineno" > 103< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00104" name = "l00104" > < / a > < span class = "lineno" > 104< / span > < / div >
< div class = "line" > < a id = "l00105" name = "l00105" > < / a > < span class = "lineno" > 105< / span > < span class = "comment" > /* Load from device memory into threadgroup memory - without bound checking */< / span > < / div >
< div class = "foldopen" id = "foldopen00106" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00106" name = "l00106" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a961836be363409744e48e595d5e0c2ec" > 106< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a961836be363409744e48e595d5e0c2ec" > load_unsafe< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00107" name = "l00107" > < / a > < span class = "lineno" > 107< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00108" name = "l00108" > < / a > < span class = "lineno" > 108< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0, is = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ; ++i, is += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a2528ff5ed472e4ed35415ada42276b07" > TROWS< / a > ) {< / div >
< div class = "line" > < a id = "l00109" name = "l00109" > < / a > < span class = "lineno" > 109< / span > < span class = "comment" > // Find bounds< / span > < / div >
< div class = "line" > < a id = "l00110" name = "l00110" > < / a > < span class = "lineno" > 110< / span > < span class = "keywordtype" > int< / span > n = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#aeb67767e2d60d5ff0279a55553f3184e" > read_n< / a > [i];< / div >
< div class = "line" > < a id = "l00111" name = "l00111" > < / a > < span class = "lineno" > 111< / span > < span class = "keywordtype" > int< / span > ih = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a82dd8230e1f37500f1a562177c3ad692" > read_ih< / a > [i] + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3be4815d4090cb27ebe2f9bad1a39e95" > weight_h< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [0];< / div >
< div class = "line" > < a id = "l00112" name = "l00112" > < / a > < span class = "lineno" > 112< / span > < span class = "keywordtype" > int< / span > iw = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a17550360cae0a942a9552d7a67827512" > read_iw< / a > [i] + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#add1186c7accb62bfa8a4a7e87fc4cc84" > weight_w< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [1];< / div >
< div class = "line" > < a id = "l00113" name = "l00113" > < / a > < span class = "lineno" > 113< / span > < / div >
< div class = "line" > < a id = "l00114" name = "l00114" > < / a > < span class = "lineno" > 114< / span > < span class = "comment" > // Read from input if in bounds< / span > < / div >
< div class = "line" > < a id = "l00115" name = "l00115" > < / a > < span class = "lineno" > 115< / span > < span class = "keywordflow" > if< / span > ((n < params-> N) & & (ih > = 0 & & ih < params-> iS[0]) & & < / div >
< div class = "line" > < a id = "l00116" name = "l00116" > < / a > < span class = "lineno" > 116< / span > (iw > = 0 & & iw < params-> iS[1])) {< / div >
< div class = "line" > < a id = "l00117" name = "l00117" > < / a > < span class = "lineno" > 117< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00118" name = "l00118" > < / a > < span class = "lineno" > 118< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#adcc83bf6c02391cc2375e55c06a1c9a4" > vec_size< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00119" name = "l00119" > < / a > < span class = "lineno" > 119< / span > dst[is * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a91192d512e7a18c2d16a139065000959" > dst_ld< / a > + j] = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > src< / a > [i][j];< / div >
< div class = "line" > < a id = "l00120" name = "l00120" > < / a > < span class = "lineno" > 120< / span > }< / div >
< div class = "line" > < a id = "l00121" name = "l00121" > < / a > < span class = "lineno" > 121< / span > }< / div >
< div class = "line" > < a id = "l00122" name = "l00122" > < / a > < span class = "lineno" > 122< / span > < / div >
< div class = "line" > < a id = "l00123" name = "l00123" > < / a > < span class = "lineno" > 123< / span > < span class = "comment" > // Zero pad otherwise< / span > < / div >
< div class = "line" > < a id = "l00124" name = "l00124" > < / a > < span class = "lineno" > 124< / span > < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00125" name = "l00125" > < / a > < span class = "lineno" > 125< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00126" name = "l00126" > < / a > < span class = "lineno" > 126< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#adcc83bf6c02391cc2375e55c06a1c9a4" > vec_size< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00127" name = "l00127" > < / a > < span class = "lineno" > 127< / span > dst[is * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a91192d512e7a18c2d16a139065000959" > dst_ld< / a > + j] = T(0);< / div >
< div class = "line" > < a id = "l00128" name = "l00128" > < / a > < span class = "lineno" > 128< / span > }< / div >
< div class = "line" > < a id = "l00129" name = "l00129" > < / a > < span class = "lineno" > 129< / span > }< / div >
< div class = "line" > < a id = "l00130" name = "l00130" > < / a > < span class = "lineno" > 130< / span > }< / div >
< div class = "line" > < a id = "l00131" name = "l00131" > < / a > < span class = "lineno" > 131< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00132" name = "l00132" > < / a > < span class = "lineno" > 132< / span > < / div >
< div class = "line" > < a id = "l00133" name = "l00133" > < / a > < span class = "lineno" > 133< / span > < span class = "comment" > /* Iteration helper */< / span > < / div >
< div class = "foldopen" id = "foldopen00134" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00134" name = "l00134" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3b71f379ff9baf39830c92f4f1ecde52" > 134< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3b71f379ff9baf39830c92f4f1ecde52" > next< / a > () {< / div >
< div class = "line" > < a id = "l00135" name = "l00135" > < / a > < span class = "lineno" > 135< / span > < span class = "keywordflow" > if< / span > (++weight_w < params-> wS[1]) {< / div >
< div class = "line" > < a id = "l00136" name = "l00136" > < / a > < span class = "lineno" > 136< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00137" name = "l00137" > < / a > < span class = "lineno" > 137< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00138" name = "l00138" > < / a > < span class = "lineno" > 138< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > src< / a > [i] += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ab0724eb3ef52ee773b6607f6433b9f2c" > gemm_params< / a > -> < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#acf168c72f4a86b72b8f5f386f07c9d8c" > inp_jump_w< / a > ;< / div >
< div class = "line" > < a id = "l00139" name = "l00139" > < / a > < span class = "lineno" > 139< / span > }< / div >
< div class = "line" > < a id = "l00140" name = "l00140" > < / a > < span class = "lineno" > 140< / span > < / div >
< div class = "line" > < a id = "l00141" name = "l00141" > < / a > < span class = "lineno" > 141< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00142" name = "l00142" > < / a > < span class = "lineno" > 142< / span > }< / div >
< div class = "line" > < a id = "l00143" name = "l00143" > < / a > < span class = "lineno" > 143< / span > < / div >
< div class = "line" > < a id = "l00144" name = "l00144" > < / a > < span class = "lineno" > 144< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#add1186c7accb62bfa8a4a7e87fc4cc84" > weight_w< / a > = 0;< / div >
< div class = "line" > < a id = "l00145" name = "l00145" > < / a > < span class = "lineno" > 145< / span > < / div >
< div class = "line" > < a id = "l00146" name = "l00146" > < / a > < span class = "lineno" > 146< / span > < span class = "keywordflow" > if< / span > (++weight_h < params-> wS[0]) {< / div >
< div class = "line" > < a id = "l00147" name = "l00147" > < / a > < span class = "lineno" > 147< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00148" name = "l00148" > < / a > < span class = "lineno" > 148< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00149" name = "l00149" > < / a > < span class = "lineno" > 149< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > src< / a > [i] += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ab0724eb3ef52ee773b6607f6433b9f2c" > gemm_params< / a > -> < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#a03685a4066cdb11ffb647408e2c5b122" > inp_jump_h< / a > ;< / div >
< div class = "line" > < a id = "l00150" name = "l00150" > < / a > < span class = "lineno" > 150< / span > }< / div >
< div class = "line" > < a id = "l00151" name = "l00151" > < / a > < span class = "lineno" > 151< / span > < / div >
< div class = "line" > < a id = "l00152" name = "l00152" > < / a > < span class = "lineno" > 152< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00153" name = "l00153" > < / a > < span class = "lineno" > 153< / span > }< / div >
< div class = "line" > < a id = "l00154" name = "l00154" > < / a > < span class = "lineno" > 154< / span > < / div >
< div class = "line" > < a id = "l00155" name = "l00155" > < / a > < span class = "lineno" > 155< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3be4815d4090cb27ebe2f9bad1a39e95" > weight_h< / a > = 0;< / div >
< div class = "line" > < a id = "l00156" name = "l00156" > < / a > < span class = "lineno" > 156< / span > < / div >
< div class = "line" > < a id = "l00157" name = "l00157" > < / a > < span class = "lineno" > 157< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00158" name = "l00158" > < / a > < span class = "lineno" > 158< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > n_rows< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00159" name = "l00159" > < / a > < span class = "lineno" > 159< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > src< / a > [i] += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ab0724eb3ef52ee773b6607f6433b9f2c" > gemm_params< / a > -> < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#a78d30e843d65d1829623afb0b607f0a5" > inp_jump_c< / a > ;< / div >
< div class = "line" > < a id = "l00160" name = "l00160" > < / a > < span class = "lineno" > 160< / span > }< / div >
< div class = "line" > < a id = "l00161" name = "l00161" > < / a > < span class = "lineno" > 161< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00162" name = "l00162" > < / a > < span class = "lineno" > 162< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00163" name = "l00163" > < / a > < span class = "lineno" > 163< / span > < / div >
< div class = "line" > < a id = "l00164" name = "l00164" > < / a > < span class = "lineno" > 164< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00165" name = "l00165" > < / a > < span class = "lineno" > 165< / span > < span class = "keyword" > typename< / span > T,< / div >
< div class = "line" > < a id = "l00166" name = "l00166" > < / a > < span class = "lineno" > 166< / span > < span class = "keywordtype" > short< / span > BM,< / div >
< div class = "line" > < a id = "l00167" name = "l00167" > < / a > < span class = "lineno" > 167< / span > < span class = "keywordtype" > short< / span > BN,< / div >
< div class = "line" > < a id = "l00168" name = "l00168" > < / a > < span class = "lineno" > 168< / span > < span class = "keywordtype" > short< / span > BK,< / div >
< div class = "line" > < a id = "l00169" name = "l00169" > < / a > < span class = "lineno" > 169< / span > < span class = "keywordtype" > short< / span > tgp_size,< / div >
< div class = "line" > < a id = "l00170" name = "l00170" > < / a > < span class = "lineno" > 170< / span > < span class = "keywordtype" > short< / span > tgp_padding = 0> < / div >
< div class = "foldopen" id = "foldopen00171" data-start = "{" data-end = "};" >
< div class = "line" > < a id = "l00171" name = "l00171" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html" > 171< / a > < / span > < span class = "keyword" > struct < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html" > Conv2DInputBlockLoaderSmallFilter< / a > {< / div >
< div class = "line" > < a id = "l00172" name = "l00172" > < / a > < span class = "lineno" > 172< / span > < span class = "comment" > // Destination dimensions< / span > < / div >
< div class = "line" > < a id = "l00173" name = "l00173" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a10591ea957605a9c662f93d59ff3410d" > 173< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a10591ea957605a9c662f93d59ff3410d" > BROWS< / a > = BM;< / div >
< div class = "line" > < a id = "l00174" name = "l00174" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac3b40db720055350bba59d614ea1dd79" > 174< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac3b40db720055350bba59d614ea1dd79" > BCOLS< / a > = BK;< / div >
< div class = "line" > < a id = "l00175" name = "l00175" > < / a > < span class = "lineno" > 175< / span > < / div >
< div class = "line" > < a id = "l00176" name = "l00176" > < / a > < span class = "lineno" > 176< / span > < span class = "comment" > // Read dimensions< / span > < / div >
< div class = "line" > < a id = "l00177" name = "l00177" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a9e59da7e4436e61b2d3c3f982355910b" > 177< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a9e59da7e4436e61b2d3c3f982355910b" > dst_ld< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac3b40db720055350bba59d614ea1dd79" > BCOLS< / a > + tgp_padding;< / div >
< div class = "line" > < a id = "l00178" name = "l00178" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a71c313e1597a2bb99f7b07d434e119d2" > 178< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a71c313e1597a2bb99f7b07d434e119d2" > vec_size< / a > = tgp_size / (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a10591ea957605a9c662f93d59ff3410d" > BROWS< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac3b40db720055350bba59d614ea1dd79" > BCOLS< / a > ) > = 8 ? 8 : 4;< / div >
< div class = "line" > < a id = "l00179" name = "l00179" > < / a > < span class = "lineno" > 179< / span > < / div >
< div class = "line" > < a id = "l00180" name = "l00180" > < / a > < span class = "lineno" > 180< / span > < span class = "comment" > // Thread read shape< / span > < / div >
< div class = "line" > < a id = "l00181" name = "l00181" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a5adbd51e9adb6f7853724d83de4ff755" > 181< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a5adbd51e9adb6f7853724d83de4ff755" > TCOLS< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac3b40db720055350bba59d614ea1dd79" > BCOLS< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a71c313e1597a2bb99f7b07d434e119d2" > vec_size< / a > ;< / div >
< div class = "line" > < a id = "l00182" name = "l00182" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3957fb263fe040fe70683fd1d7b06487" > 182< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3957fb263fe040fe70683fd1d7b06487" > TROWS< / a > = tgp_size / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a5adbd51e9adb6f7853724d83de4ff755" > TCOLS< / a > ;< / div >
< div class = "line" > < a id = "l00183" name = "l00183" > < / a > < span class = "lineno" > 183< / span > < / div >
< div class = "line" > < a id = "l00184" name = "l00184" > < / a > < span class = "lineno" > 184< / span > < span class = "comment" > // Rows / strided reads within the block< / span > < / div >
< div class = "line" > < a id = "l00185" name = "l00185" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > 185< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a10591ea957605a9c662f93d59ff3410d" > BROWS< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3957fb263fe040fe70683fd1d7b06487" > TROWS< / a > ;< / div >
< div class = "line" > < a id = "l00186" name = "l00186" > < / a > < span class = "lineno" > 186< / span > < / div >
< div class = "line" > < a id = "l00187" name = "l00187" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > 187< / a > < / span > < span class = "keyword" > using < / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mask_t< / a > = short;< / div >
< div class = "line" > < a id = "l00188" name = "l00188" > < / a > < span class = "lineno" > 188< / span > < / div >
< div class = "line" > < a id = "l00189" name = "l00189" > < / a > < span class = "lineno" > 189< / span > < span class = "comment" > // Thread location indices< / span > < / div >
< div class = "line" > < a id = "l00190" name = "l00190" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac18de37cde1459595bfe18b0d5ef146d" > 190< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac18de37cde1459595bfe18b0d5ef146d" > thread_idx< / a > ;< / div >
< div class = "line" > < a id = "l00191" name = "l00191" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ae3af75287f279d2cdeef189126740d4c" > 191< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ae3af75287f279d2cdeef189126740d4c" > bi< / a > ;< / div >
< div class = "line" > < a id = "l00192" name = "l00192" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a6fd3dd7b74d91609fa9dd61c657a0e32" > 192< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a6fd3dd7b74d91609fa9dd61c657a0e32" > bj< / a > ;< / div >
< div class = "line" > < a id = "l00193" name = "l00193" > < / a > < span class = "lineno" > 193< / span > < / div >
< div class = "line" > < a id = "l00194" name = "l00194" > < / a > < span class = "lineno" > 194< / span > < span class = "comment" > // threadgroup and device memory< / span > < / div >
< div class = "line" > < a id = "l00195" name = "l00195" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a8598bf23a2bce6af13c876cbfa76449f" > 195< / a > < / span > threadgroup T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a8598bf23a2bce6af13c876cbfa76449f" > dst< / a > ;< / div >
< div class = "line" > < a id = "l00196" name = "l00196" > < / a > < span class = "lineno" > 196< / span > < / div >
< div class = "line" > < a id = "l00197" name = "l00197" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > 197< / a > < / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "struct_m_l_x_conv_params.html" > MLXConvParams< 2> < / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > ;< / div >
< div class = "line" > < a id = "l00198" name = "l00198" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#acc778b3c0b7ec38a43e8ea943df8704c" > 198< / a > < / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html" > ImplicitGemmConv2DParams< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#acc778b3c0b7ec38a43e8ea943df8704c" > gemm_params< / a > ;< / div >
< div class = "line" > < a id = "l00199" name = "l00199" > < / a > < span class = "lineno" > 199< / span > < / div >
< div class = "line" > < a id = "l00200" name = "l00200" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a366c3cee4ed1165545287c8d5ce49445" > 200< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a366c3cee4ed1165545287c8d5ce49445" > weight_h< / a > ;< / div >
< div class = "line" > < a id = "l00201" name = "l00201" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4744bd79fb05e81eaa53d2eabe017446" > 201< / a > < / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4744bd79fb05e81eaa53d2eabe017446" > weight_w< / a > ;< / div >
< div class = "line" > < a id = "l00202" name = "l00202" > < / a > < span class = "lineno" > 202< / span > < / div >
< div class = "line" > < a id = "l00203" name = "l00203" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > 203< / a > < / span > < span class = "keyword" > const< / span > device T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > src< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00204" name = "l00204" > < / a > < span class = "lineno" > 204< / span > < / div >
< div class = "line" > < a id = "l00205" name = "l00205" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0b892c1a7edb9ed20c076d8945855c19" > 205< / a > < / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mask_t< / a > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0b892c1a7edb9ed20c076d8945855c19" > mask_h< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00206" name = "l00206" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a19ddba7259c3c2c02ed90f3f635557be" > 206< / a > < / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mask_t< / a > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a19ddba7259c3c2c02ed90f3f635557be" > mask_w< / a > [< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00207" name = "l00207" > < / a > < span class = "lineno" > 207< / span > < / div >
< div class = "line" > < a id = "l00208" name = "l00208" > < / a > < span class = "lineno" > 208< / span > < span class = "comment" > /* Constructor */< / span > < / div >
< div class = "foldopen" id = "foldopen00209" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00209" name = "l00209" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0a2cbf57c51cd928722e3f06aafcf933" > 209< / a > < / span > METAL_FUNC < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0a2cbf57c51cd928722e3f06aafcf933" > Conv2DInputBlockLoaderSmallFilter< / a > (< / div >
< div class = "line" > < a id = "l00210" name = "l00210" > < / a > < span class = "lineno" > 210< / span > < span class = "keyword" > const< / span > device T* src_,< / div >
< div class = "line" > < a id = "l00211" name = "l00211" > < / a > < span class = "lineno" > 211< / span > threadgroup T* dst_,< / div >
< div class = "line" > < a id = "l00212" name = "l00212" > < / a > < span class = "lineno" > 212< / span > < span class = "keyword" > const< / span > int2 offsets,< / div >
< div class = "line" > < a id = "l00213" name = "l00213" > < / a > < span class = "lineno" > 213< / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "struct_m_l_x_conv_params.html" > MLXConvParams< 2> < / a > * params_,< / div >
< div class = "line" > < a id = "l00214" name = "l00214" > < / a > < span class = "lineno" > 214< / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html" > ImplicitGemmConv2DParams< / a > * gemm_params_,< / div >
< div class = "line" > < a id = "l00215" name = "l00215" > < / a > < span class = "lineno" > 215< / span > uint simd_group_id [[simdgroup_index_in_threadgroup]],< / div >
< div class = "line" > < a id = "l00216" name = "l00216" > < / a > < span class = "lineno" > 216< / span > uint simd_lane_id [[thread_index_in_simdgroup]])< / div >
< div class = "line" > < a id = "l00217" name = "l00217" > < / a > < span class = "lineno" > 217< / span > : < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac18de37cde1459595bfe18b0d5ef146d" > thread_idx< / a > (simd_group_id * 32 + simd_lane_id),< / div >
< div class = "line" > < a id = "l00218" name = "l00218" > < / a > < span class = "lineno" > 218< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ae3af75287f279d2cdeef189126740d4c" > bi< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac18de37cde1459595bfe18b0d5ef146d" > thread_idx< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a5adbd51e9adb6f7853724d83de4ff755" > TCOLS< / a > ),< / div >
< div class = "line" > < a id = "l00219" name = "l00219" > < / a > < span class = "lineno" > 219< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a6fd3dd7b74d91609fa9dd61c657a0e32" > bj< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a71c313e1597a2bb99f7b07d434e119d2" > vec_size< / a > * (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac18de37cde1459595bfe18b0d5ef146d" > thread_idx< / a > % < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a5adbd51e9adb6f7853724d83de4ff755" > TCOLS< / a > )),< / div >
< div class = "line" > < a id = "l00220" name = "l00220" > < / a > < span class = "lineno" > 220< / span > dst(dst_ + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ae3af75287f279d2cdeef189126740d4c" > bi< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a9e59da7e4436e61b2d3c3f982355910b" > dst_ld< / a > + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a6fd3dd7b74d91609fa9dd61c657a0e32" > bj< / a > ),< / div >
< div class = "line" > < a id = "l00221" name = "l00221" > < / a > < span class = "lineno" > 221< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > (params_),< / div >
< div class = "line" > < a id = "l00222" name = "l00222" > < / a > < span class = "lineno" > 222< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#acc778b3c0b7ec38a43e8ea943df8704c" > gemm_params< / a > (gemm_params_),< / div >
< div class = "line" > < a id = "l00223" name = "l00223" > < / a > < span class = "lineno" > 223< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a366c3cee4ed1165545287c8d5ce49445" > weight_h< / a > (0),< / div >
< div class = "line" > < a id = "l00224" name = "l00224" > < / a > < span class = "lineno" > 224< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4744bd79fb05e81eaa53d2eabe017446" > weight_w< / a > (0) {< / div >
< div class = "line" > < a id = "l00225" name = "l00225" > < / a > < span class = "lineno" > 225< / span > < span class = "keywordtype" > int< / span > out_n_pixels = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [0] * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [1];< / div >
< div class = "line" > < a id = "l00226" name = "l00226" > < / a > < span class = "lineno" > 226< / span > < / div >
< div class = "line" > < a id = "l00227" name = "l00227" > < / a > < span class = "lineno" > 227< / span > < span class = "keywordtype" > int< / span > read_n[< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00228" name = "l00228" > < / a > < span class = "lineno" > 228< / span > < span class = "keywordtype" > int< / span > read_ih[< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00229" name = "l00229" > < / a > < span class = "lineno" > 229< / span > < span class = "keywordtype" > int< / span > read_iw[< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ];< / div >
< div class = "line" > < a id = "l00230" name = "l00230" > < / a > < span class = "lineno" > 230< / span > < / div >
< div class = "line" > < a id = "l00231" name = "l00231" > < / a > < span class = "lineno" > 231< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00232" name = "l00232" > < / a > < span class = "lineno" > 232< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00233" name = "l00233" > < / a > < span class = "lineno" > 233< / span > < span class = "keywordtype" > int< / span > offset_nhw = offsets.y + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ae3af75287f279d2cdeef189126740d4c" > bi< / a > + i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3957fb263fe040fe70683fd1d7b06487" > TROWS< / a > ;< / div >
< div class = "line" > < a id = "l00234" name = "l00234" > < / a > < span class = "lineno" > 234< / span > < span class = "keywordtype" > int< / span > n = offset_nhw / out_n_pixels;< / div >
< div class = "line" > < a id = "l00235" name = "l00235" > < / a > < span class = "lineno" > 235< / span > < span class = "keywordtype" > int< / span > hw = offset_nhw % out_n_pixels;< / div >
< div class = "line" > < a id = "l00236" name = "l00236" > < / a > < span class = "lineno" > 236< / span > < span class = "keywordtype" > int< / span > oh = hw / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [1];< / div >
< div class = "line" > < a id = "l00237" name = "l00237" > < / a > < span class = "lineno" > 237< / span > < span class = "keywordtype" > int< / span > ow = hw % < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > oS< / a > [1];< / div >
< div class = "line" > < a id = "l00238" name = "l00238" > < / a > < span class = "lineno" > 238< / span > < / div >
< div class = "line" > < a id = "l00239" name = "l00239" > < / a > < span class = "lineno" > 239< / span > < span class = "keywordtype" > int< / span > ih = oh * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a862191e8ab1bc8a47aa1396b36d46058" > str< / a > [0] - < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ae84a9afb3a95b57e0b763bb0ebda0753" > pad< / a > [0];< / div >
< div class = "line" > < a id = "l00240" name = "l00240" > < / a > < span class = "lineno" > 240< / span > < span class = "keywordtype" > int< / span > iw = ow * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a862191e8ab1bc8a47aa1396b36d46058" > str< / a > [1] - < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ae84a9afb3a95b57e0b763bb0ebda0753" > pad< / a > [1];< / div >
< div class = "line" > < a id = "l00241" name = "l00241" > < / a > < span class = "lineno" > 241< / span > < / div >
< div class = "line" > < a id = "l00242" name = "l00242" > < / a > < span class = "lineno" > 242< / span > read_n[i] = n;< / div >
< div class = "line" > < a id = "l00243" name = "l00243" > < / a > < span class = "lineno" > 243< / span > read_ih[i] = ih;< / div >
< div class = "line" > < a id = "l00244" name = "l00244" > < / a > < span class = "lineno" > 244< / span > read_iw[i] = iw;< / div >
< div class = "line" > < a id = "l00245" name = "l00245" > < / a > < span class = "lineno" > 245< / span > < / div >
< div class = "line" > < a id = "l00246" name = "l00246" > < / a > < span class = "lineno" > 246< / span > < span class = "comment" > // Adjust for flip< / span > < / div >
< div class = "line" > < a id = "l00247" name = "l00247" > < / a > < span class = "lineno" > 247< / span > < span class = "keywordflow" > if< / span > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a8b30cda15eda20f84f12db868f21d0ef" > flip< / a > ) {< / div >
< div class = "line" > < a id = "l00248" name = "l00248" > < / a > < span class = "lineno" > 248< / span > ih += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [0] - 1) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [0];< / div >
< div class = "line" > < a id = "l00249" name = "l00249" > < / a > < span class = "lineno" > 249< / span > iw += (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [1] - 1) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [1];< / div >
< div class = "line" > < a id = "l00250" name = "l00250" > < / a > < span class = "lineno" > 250< / span > }< / div >
< div class = "line" > < a id = "l00251" name = "l00251" > < / a > < span class = "lineno" > 251< / span > < / div >
< div class = "line" > < a id = "l00252" name = "l00252" > < / a > < span class = "lineno" > 252< / span > < span class = "comment" > // Read from input if in bounds< / span > < / div >
< div class = "line" > < a id = "l00253" name = "l00253" > < / a > < span class = "lineno" > 253< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > src< / a > [i] = src_ + n * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ab25eade6573784985dbea1216f9068cf" > in_strides< / a > [0] + ih * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ab25eade6573784985dbea1216f9068cf" > in_strides< / a > [1] +< / div >
< div class = "line" > < a id = "l00254" name = "l00254" > < / a > < span class = "lineno" > 254< / span > iw * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ab25eade6573784985dbea1216f9068cf" > in_strides< / a > [2] + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a6fd3dd7b74d91609fa9dd61c657a0e32" > bj< / a > ;< / div >
< div class = "line" > < a id = "l00255" name = "l00255" > < / a > < span class = "lineno" > 255< / span > }< / div >
< div class = "line" > < a id = "l00256" name = "l00256" > < / a > < span class = "lineno" > 256< / span > < / div >
< div class = "line" > < a id = "l00257" name = "l00257" > < / a > < span class = "lineno" > 257< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00258" name = "l00258" > < / a > < span class = "lineno" > 258< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00259" name = "l00259" > < / a > < span class = "lineno" > 259< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0b892c1a7edb9ed20c076d8945855c19" > mask_h< / a > [i] = 0;< / div >
< div class = "line" > < a id = "l00260" name = "l00260" > < / a > < span class = "lineno" > 260< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a19ddba7259c3c2c02ed90f3f635557be" > mask_w< / a > [i] = 0;< / div >
< div class = "line" > < a id = "l00261" name = "l00261" > < / a > < span class = "lineno" > 261< / span > }< / div >
< div class = "line" > < a id = "l00262" name = "l00262" > < / a > < span class = "lineno" > 262< / span > < / div >
< div class = "line" > < a id = "l00263" name = "l00263" > < / a > < span class = "lineno" > 263< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > kh = 0; kh < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [0]; kh++) {< / div >
< div class = "line" > < a id = "l00264" name = "l00264" > < / a > < span class = "lineno" > 264< / span > < span class = "keywordtype" > short< / span > flip_h = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a8b30cda15eda20f84f12db868f21d0ef" > flip< / a > ? < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [0] - kh - 1 : kh;< / div >
< div class = "line" > < a id = "l00265" name = "l00265" > < / a > < span class = "lineno" > 265< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00266" name = "l00266" > < / a > < span class = "lineno" > 266< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00267" name = "l00267" > < / a > < span class = "lineno" > 267< / span > < span class = "keywordtype" > int< / span > n = read_n[i];< / div >
< div class = "line" > < a id = "l00268" name = "l00268" > < / a > < span class = "lineno" > 268< / span > < span class = "keywordtype" > int< / span > ih = read_ih[i] + flip_h * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [0];< / div >
< div class = "line" > < a id = "l00269" name = "l00269" > < / a > < span class = "lineno" > 269< / span > < / div >
< div class = "line" > < a id = "l00270" name = "l00270" > < / a > < span class = "lineno" > 270< / span > < span class = "keywordtype" > bool< / span > in_bounds = n < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ae6b7054dc3cffa8e6aedeb29fa7da932" > N< / a > & & ih > = 0 & & ih < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a72e1c3b4da0f70622cf18036bbf97fe6" > iS< / a > [0];< / div >
< div class = "line" > < a id = "l00271" name = "l00271" > < / a > < span class = "lineno" > 271< / span > < / div >
< div class = "line" > < a id = "l00272" name = "l00272" > < / a > < span class = "lineno" > 272< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0b892c1a7edb9ed20c076d8945855c19" > mask_h< / a > [i] |= (in_bounds < < kh);< / div >
< div class = "line" > < a id = "l00273" name = "l00273" > < / a > < span class = "lineno" > 273< / span > }< / div >
< div class = "line" > < a id = "l00274" name = "l00274" > < / a > < span class = "lineno" > 274< / span > }< / div >
< div class = "line" > < a id = "l00275" name = "l00275" > < / a > < span class = "lineno" > 275< / span > < / div >
< div class = "line" > < a id = "l00276" name = "l00276" > < / a > < span class = "lineno" > 276< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > kw = 0; kw < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [1]; kw++) {< / div >
< div class = "line" > < a id = "l00277" name = "l00277" > < / a > < span class = "lineno" > 277< / span > < span class = "keywordtype" > short< / span > flip_w = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a8b30cda15eda20f84f12db868f21d0ef" > flip< / a > ? < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [1] - kw - 1 : kw;< / div >
< div class = "line" > < a id = "l00278" name = "l00278" > < / a > < span class = "lineno" > 278< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00279" name = "l00279" > < / a > < span class = "lineno" > 279< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; ++i) {< / div >
< div class = "line" > < a id = "l00280" name = "l00280" > < / a > < span class = "lineno" > 280< / span > < span class = "keywordtype" > int< / span > iw = read_iw[i] + flip_w * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > kdil< / a > [1];< / div >
< div class = "line" > < a id = "l00281" name = "l00281" > < / a > < span class = "lineno" > 281< / span > < / div >
< div class = "line" > < a id = "l00282" name = "l00282" > < / a > < span class = "lineno" > 282< / span > < span class = "keywordtype" > bool< / span > in_bounds = iw > = 0 & & iw < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a72e1c3b4da0f70622cf18036bbf97fe6" > iS< / a > [1];< / div >
< div class = "line" > < a id = "l00283" name = "l00283" > < / a > < span class = "lineno" > 283< / span > < / div >
< div class = "line" > < a id = "l00284" name = "l00284" > < / a > < span class = "lineno" > 284< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a19ddba7259c3c2c02ed90f3f635557be" > mask_w< / a > [i] |= (in_bounds < < kw);< / div >
< div class = "line" > < a id = "l00285" name = "l00285" > < / a > < span class = "lineno" > 285< / span > }< / div >
< div class = "line" > < a id = "l00286" name = "l00286" > < / a > < span class = "lineno" > 286< / span > }< / div >
< div class = "line" > < a id = "l00287" name = "l00287" > < / a > < span class = "lineno" > 287< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00288" name = "l00288" > < / a > < span class = "lineno" > 288< / span > < / div >
< div class = "line" > < a id = "l00289" name = "l00289" > < / a > < span class = "lineno" > 289< / span > < span class = "comment" > /* Load from device memory into threadgroup memory - without bound checking */< / span > < / div >
< div class = "foldopen" id = "foldopen00290" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00290" name = "l00290" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a8034abc10483487fc94313e3674d1111" > 290< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a8034abc10483487fc94313e3674d1111" > load_unsafe< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00291" name = "l00291" > < / a > < span class = "lineno" > 291< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mask_t< / a > h_mask = < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mask_t< / a > (1) < < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a366c3cee4ed1165545287c8d5ce49445" > weight_h< / a > ;< / div >
< div class = "line" > < a id = "l00292" name = "l00292" > < / a > < span class = "lineno" > 292< / span > < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mask_t< / a > w_mask = < a class = "code hl_typedef" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mask_t< / a > (1) < < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4744bd79fb05e81eaa53d2eabe017446" > weight_w< / a > ;< / div >
< div class = "line" > < a id = "l00293" name = "l00293" > < / a > < span class = "lineno" > 293< / span > < / div >
< div class = "line" > < a id = "l00294" name = "l00294" > < / a > < span class = "lineno" > 294< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00295" name = "l00295" > < / a > < span class = "lineno" > 295< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0, is = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; ++i, is += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3957fb263fe040fe70683fd1d7b06487" > TROWS< / a > ) {< / div >
< div class = "line" > < a id = "l00296" name = "l00296" > < / a > < span class = "lineno" > 296< / span > < span class = "comment" > // Read from input if in bounds< / span > < / div >
< div class = "line" > < a id = "l00297" name = "l00297" > < / a > < span class = "lineno" > 297< / span > < span class = "keywordflow" > if< / span > ((< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0b892c1a7edb9ed20c076d8945855c19" > mask_h< / a > [i] & h_mask) & & (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a19ddba7259c3c2c02ed90f3f635557be" > mask_w< / a > [i] & w_mask)) {< / div >
< div class = "line" > < a id = "l00298" name = "l00298" > < / a > < span class = "lineno" > 298< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00299" name = "l00299" > < / a > < span class = "lineno" > 299< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a71c313e1597a2bb99f7b07d434e119d2" > vec_size< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00300" name = "l00300" > < / a > < span class = "lineno" > 300< / span > dst[is * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a9e59da7e4436e61b2d3c3f982355910b" > dst_ld< / a > + j] = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > src< / a > [i][j];< / div >
< div class = "line" > < a id = "l00301" name = "l00301" > < / a > < span class = "lineno" > 301< / span > }< / div >
< div class = "line" > < a id = "l00302" name = "l00302" > < / a > < span class = "lineno" > 302< / span > }< / div >
< div class = "line" > < a id = "l00303" name = "l00303" > < / a > < span class = "lineno" > 303< / span > < / div >
< div class = "line" > < a id = "l00304" name = "l00304" > < / a > < span class = "lineno" > 304< / span > < span class = "comment" > // Zero pad otherwise< / span > < / div >
< div class = "line" > < a id = "l00305" name = "l00305" > < / a > < span class = "lineno" > 305< / span > < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00306" name = "l00306" > < / a > < span class = "lineno" > 306< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00307" name = "l00307" > < / a > < span class = "lineno" > 307< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a71c313e1597a2bb99f7b07d434e119d2" > vec_size< / a > ; ++j) {< / div >
< div class = "line" > < a id = "l00308" name = "l00308" > < / a > < span class = "lineno" > 308< / span > dst[is * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a9e59da7e4436e61b2d3c3f982355910b" > dst_ld< / a > + j] = T(0);< / div >
< div class = "line" > < a id = "l00309" name = "l00309" > < / a > < span class = "lineno" > 309< / span > }< / div >
< div class = "line" > < a id = "l00310" name = "l00310" > < / a > < span class = "lineno" > 310< / span > }< / div >
< div class = "line" > < a id = "l00311" name = "l00311" > < / a > < span class = "lineno" > 311< / span > }< / div >
< div class = "line" > < a id = "l00312" name = "l00312" > < / a > < span class = "lineno" > 312< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00313" name = "l00313" > < / a > < span class = "lineno" > 313< / span > < / div >
< div class = "line" > < a id = "l00314" name = "l00314" > < / a > < span class = "lineno" > 314< / span > < span class = "comment" > /* Iteration helper */< / span > < / div >
< div class = "foldopen" id = "foldopen00315" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00315" name = "l00315" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a78d2b0098311a278be8394edbd5fc731" > 315< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a78d2b0098311a278be8394edbd5fc731" > next< / a > () {< / div >
< div class = "line" > < a id = "l00316" name = "l00316" > < / a > < span class = "lineno" > 316< / span > < span class = "keywordflow" > if< / span > (++weight_w < params-> wS[1]) {< / div >
< div class = "line" > < a id = "l00317" name = "l00317" > < / a > < span class = "lineno" > 317< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00318" name = "l00318" > < / a > < span class = "lineno" > 318< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00319" name = "l00319" > < / a > < span class = "lineno" > 319< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > src< / a > [i] += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#acc778b3c0b7ec38a43e8ea943df8704c" > gemm_params< / a > -> < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#acf168c72f4a86b72b8f5f386f07c9d8c" > inp_jump_w< / a > ;< / div >
< div class = "line" > < a id = "l00320" name = "l00320" > < / a > < span class = "lineno" > 320< / span > }< / div >
< div class = "line" > < a id = "l00321" name = "l00321" > < / a > < span class = "lineno" > 321< / span > < / div >
< div class = "line" > < a id = "l00322" name = "l00322" > < / a > < span class = "lineno" > 322< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00323" name = "l00323" > < / a > < span class = "lineno" > 323< / span > }< / div >
< div class = "line" > < a id = "l00324" name = "l00324" > < / a > < span class = "lineno" > 324< / span > < / div >
< div class = "line" > < a id = "l00325" name = "l00325" > < / a > < span class = "lineno" > 325< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4744bd79fb05e81eaa53d2eabe017446" > weight_w< / a > = 0;< / div >
< div class = "line" > < a id = "l00326" name = "l00326" > < / a > < span class = "lineno" > 326< / span > < / div >
< div class = "line" > < a id = "l00327" name = "l00327" > < / a > < span class = "lineno" > 327< / span > < span class = "keywordflow" > if< / span > (++weight_h < params-> wS[0]) {< / div >
< div class = "line" > < a id = "l00328" name = "l00328" > < / a > < span class = "lineno" > 328< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00329" name = "l00329" > < / a > < span class = "lineno" > 329< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00330" name = "l00330" > < / a > < span class = "lineno" > 330< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > src< / a > [i] += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#acc778b3c0b7ec38a43e8ea943df8704c" > gemm_params< / a > -> < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#a03685a4066cdb11ffb647408e2c5b122" > inp_jump_h< / a > ;< / div >
< div class = "line" > < a id = "l00331" name = "l00331" > < / a > < span class = "lineno" > 331< / span > }< / div >
< div class = "line" > < a id = "l00332" name = "l00332" > < / a > < span class = "lineno" > 332< / span > < / div >
< div class = "line" > < a id = "l00333" name = "l00333" > < / a > < span class = "lineno" > 333< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00334" name = "l00334" > < / a > < span class = "lineno" > 334< / span > }< / div >
< div class = "line" > < a id = "l00335" name = "l00335" > < / a > < span class = "lineno" > 335< / span > < / div >
< div class = "line" > < a id = "l00336" name = "l00336" > < / a > < span class = "lineno" > 336< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a366c3cee4ed1165545287c8d5ce49445" > weight_h< / a > = 0;< / div >
< div class = "line" > < a id = "l00337" name = "l00337" > < / a > < span class = "lineno" > 337< / span > < / div >
< div class = "line" > < a id = "l00338" name = "l00338" > < / a > < span class = "lineno" > 338< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00339" name = "l00339" > < / a > < span class = "lineno" > 339< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > n_rows< / a > ; i++) {< / div >
< div class = "line" > < a id = "l00340" name = "l00340" > < / a > < span class = "lineno" > 340< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > src< / a > [i] += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#acc778b3c0b7ec38a43e8ea943df8704c" > gemm_params< / a > -> < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#a78d30e843d65d1829623afb0b607f0a5" > inp_jump_c< / a > ;< / div >
< div class = "line" > < a id = "l00341" name = "l00341" > < / a > < span class = "lineno" > 341< / span > }< / div >
< div class = "line" > < a id = "l00342" name = "l00342" > < / a > < span class = "lineno" > 342< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00343" name = "l00343" > < / a > < span class = "lineno" > 343< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00344" name = "l00344" > < / a > < span class = "lineno" > 344< / span > < / div >
< div class = "line" > < a id = "l00345" name = "l00345" > < / a > < span class = "lineno" > 345< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00346" name = "l00346" > < / a > < span class = "lineno" > 346< / span > < span class = "keyword" > typename< / span > T,< / div >
< div class = "line" > < a id = "l00347" name = "l00347" > < / a > < span class = "lineno" > 347< / span > < span class = "keywordtype" > short< / span > BM,< / div >
< div class = "line" > < a id = "l00348" name = "l00348" > < / a > < span class = "lineno" > 348< / span > < span class = "keywordtype" > short< / span > BN,< / div >
< div class = "line" > < a id = "l00349" name = "l00349" > < / a > < span class = "lineno" > 349< / span > < span class = "keywordtype" > short< / span > BK,< / div >
< div class = "line" > < a id = "l00350" name = "l00350" > < / a > < span class = "lineno" > 350< / span > < span class = "keywordtype" > short< / span > tgp_size,< / div >
< div class = "line" > < a id = "l00351" name = "l00351" > < / a > < span class = "lineno" > 351< / span > < span class = "keywordtype" > short< / span > tgp_padding = 0> < / div >
< div class = "foldopen" id = "foldopen00352" data-start = "{" data-end = "};" >
< div class = "line" > < a id = "l00352" name = "l00352" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html" > 352< / a > < / span > < span class = "keyword" > struct < / span > < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html" > Conv2DWeightBlockLoader< / a > {< / div >
< div class = "line" > < a id = "l00353" name = "l00353" > < / a > < span class = "lineno" > 353< / span > < span class = "comment" > // Destination dimensions< / span > < / div >
< div class = "line" > < a id = "l00354" name = "l00354" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae9b86b05b23153ea1abaeead456c491c" > 354< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae9b86b05b23153ea1abaeead456c491c" > BROWS< / a > = BN;< / div >
< div class = "line" > < a id = "l00355" name = "l00355" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a86519729ef0561686bb86e474c95b93d" > 355< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a86519729ef0561686bb86e474c95b93d" > BCOLS< / a > = BK;< / div >
< div class = "line" > < a id = "l00356" name = "l00356" > < / a > < span class = "lineno" > 356< / span > < / div >
< div class = "line" > < a id = "l00357" name = "l00357" > < / a > < span class = "lineno" > 357< / span > < span class = "comment" > // Read dimensions< / span > < / div >
< div class = "line" > < a id = "l00358" name = "l00358" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a0ff5a6d503e0bbac4634030a75ab818d" > 358< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a0ff5a6d503e0bbac4634030a75ab818d" > dst_ld< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a86519729ef0561686bb86e474c95b93d" > BCOLS< / a > + tgp_padding;< / div >
< div class = "line" > < a id = "l00359" name = "l00359" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > 359< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > vec_size< / a > =< / div >
< div class = "line" > < a id = "l00360" name = "l00360" > < / a > < span class = "lineno" > 360< / span > (BN == 8) ? 1 : (tgp_size / (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae9b86b05b23153ea1abaeead456c491c" > BROWS< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a86519729ef0561686bb86e474c95b93d" > BCOLS< / a > ) > = 8 ? 8 : 4);< / div >
< div class = "line" > < a id = "l00361" name = "l00361" > < / a > < span class = "lineno" > 361< / span > < / div >
< div class = "line" > < a id = "l00362" name = "l00362" > < / a > < span class = "lineno" > 362< / span > < span class = "comment" > // Thread read shape< / span > < / div >
< div class = "line" > < a id = "l00363" name = "l00363" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a80cb90674f839d5d4ecfde384fa0a7a2" > 363< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a80cb90674f839d5d4ecfde384fa0a7a2" > TCOLS< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a86519729ef0561686bb86e474c95b93d" > BCOLS< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > vec_size< / a > ;< / div >
< div class = "line" > < a id = "l00364" name = "l00364" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > 364< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > TROWS< / a > = tgp_size / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a80cb90674f839d5d4ecfde384fa0a7a2" > TCOLS< / a > ;< / div >
< div class = "line" > < a id = "l00365" name = "l00365" > < / a > < span class = "lineno" > 365< / span > < / div >
< div class = "line" > < a id = "l00366" name = "l00366" > < / a > < span class = "lineno" > 366< / span > < span class = "comment" > // Rows / strided reads within the block< / span > < / div >
< div class = "line" > < a id = "l00367" name = "l00367" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a593ec140370d53f8c968f6240116d38b" > 367< / a > < / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a593ec140370d53f8c968f6240116d38b" > n_rows< / a > = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae9b86b05b23153ea1abaeead456c491c" > BROWS< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > TROWS< / a > ;< / div >
< div class = "line" > < a id = "l00368" name = "l00368" > < / a > < span class = "lineno" > 368< / span > < / div >
< div class = "line" > < a id = "l00369" name = "l00369" > < / a > < span class = "lineno" > 369< / span > < span class = "comment" > // Leading dimension for src< / span > < / div >
< div class = "line" > < a id = "l00370" name = "l00370" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7464ec687323fa79050702952ed9084f" > 370< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7464ec687323fa79050702952ed9084f" > src_ld< / a > ;< / div >
< div class = "line" > < a id = "l00371" name = "l00371" > < / a > < span class = "lineno" > 371< / span > < / div >
< div class = "line" > < a id = "l00372" name = "l00372" > < / a > < span class = "lineno" > 372< / span > < span class = "comment" > // Thread location indices< / span > < / div >
< div class = "line" > < a id = "l00373" name = "l00373" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ab1cb2ade639787243e0325dcd3dc0a11" > 373< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ab1cb2ade639787243e0325dcd3dc0a11" > thread_idx< / a > ;< / div >
< div class = "line" > < a id = "l00374" name = "l00374" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a8c5e74003600132954cb953616e1a026" > 374< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a8c5e74003600132954cb953616e1a026" > bi< / a > ;< / div >
< div class = "line" > < a id = "l00375" name = "l00375" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a6f2fdcaf5a67567cca38ae3d8120ab37" > 375< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a6f2fdcaf5a67567cca38ae3d8120ab37" > bj< / a > ;< / div >
< div class = "line" > < a id = "l00376" name = "l00376" > < / a > < span class = "lineno" > 376< / span > < / div >
< div class = "line" > < a id = "l00377" name = "l00377" > < / a > < span class = "lineno" > 377< / span > < span class = "comment" > // threadgroup and device memory< / span > < / div >
< div class = "line" > < a id = "l00378" name = "l00378" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#aea6494838175225d02cbc7768a646ec7" > 378< / a > < / span > threadgroup T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#aea6494838175225d02cbc7768a646ec7" > dst< / a > ;< / div >
< div class = "line" > < a id = "l00379" name = "l00379" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > 379< / a > < / span > < span class = "keyword" > const< / span > device T* < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > src< / a > ;< / div >
< div class = "line" > < a id = "l00380" name = "l00380" > < / a > < span class = "lineno" > 380< / span > < / div >
< div class = "line" > < a id = "l00381" name = "l00381" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > 381< / a > < / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "struct_m_l_x_conv_params.html" > MLXConvParams< 2> < / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > ;< / div >
< div class = "line" > < a id = "l00382" name = "l00382" > < / a > < span class = "lineno" > 382< / span > < / div >
< div class = "line" > < a id = "l00383" name = "l00383" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae39d43f741c9c87cce9c6d3144dc8b94" > 383< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae39d43f741c9c87cce9c6d3144dc8b94" > weight_hw< / a > ;< / div >
< div class = "line" > < a id = "l00384" name = "l00384" > < / a > < span class = "lineno" > 384< / span > < / div >
< div class = "line" > < a id = "l00385" name = "l00385" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a5afa232b7c84b5025247ac4f83eb9ca9" > 385< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a5afa232b7c84b5025247ac4f83eb9ca9" > read_n< / a > ;< / div >
< div class = "line" > < a id = "l00386" name = "l00386" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a13eb86acf6abe288c19645935a47d2ad" > 386< / a > < / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > bool< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a13eb86acf6abe288c19645935a47d2ad" > do_read< / a > ;< / div >
< div class = "line" > < a id = "l00387" name = "l00387" > < / a > < span class = "lineno" > 387< / span > < / div >
< div class = "line" > < a id = "l00388" name = "l00388" > < / a > < span class = "lineno" > 388< / span > < span class = "comment" > /* Constructor */< / span > < / div >
< div class = "foldopen" id = "foldopen00389" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00389" name = "l00389" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a9a7dca3512b64cffb6eac305d795831c" > 389< / a > < / span > METAL_FUNC < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a9a7dca3512b64cffb6eac305d795831c" > Conv2DWeightBlockLoader< / a > (< / div >
< div class = "line" > < a id = "l00390" name = "l00390" > < / a > < span class = "lineno" > 390< / span > < span class = "keyword" > const< / span > device T* src_,< / div >
< div class = "line" > < a id = "l00391" name = "l00391" > < / a > < span class = "lineno" > 391< / span > threadgroup T* dst_,< / div >
< div class = "line" > < a id = "l00392" name = "l00392" > < / a > < span class = "lineno" > 392< / span > < span class = "keyword" > const< / span > int2 offsets,< / div >
< div class = "line" > < a id = "l00393" name = "l00393" > < / a > < span class = "lineno" > 393< / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "struct_m_l_x_conv_params.html" > MLXConvParams< 2> < / a > * params_,< / div >
< div class = "line" > < a id = "l00394" name = "l00394" > < / a > < span class = "lineno" > 394< / span > < span class = "keyword" > const< / span > constant < a class = "code hl_struct" href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html" > ImplicitGemmConv2DParams< / a > * gemm_params_,< / div >
< div class = "line" > < a id = "l00395" name = "l00395" > < / a > < span class = "lineno" > 395< / span > uint simd_group_id [[simdgroup_index_in_threadgroup]],< / div >
< div class = "line" > < a id = "l00396" name = "l00396" > < / a > < span class = "lineno" > 396< / span > uint simd_lane_id [[thread_index_in_simdgroup]])< / div >
2024-09-18 03:06:14 +08:00
< div class = "line" > < a id = "l00397" name = "l00397" > < / a > < span class = "lineno" > 397< / span > : < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7464ec687323fa79050702952ed9084f" > src_ld< / a > (params_-> wt_strides[0]),< / div >
2024-06-07 11:28:06 +08:00
< div class = "line" > < a id = "l00398" name = "l00398" > < / a > < span class = "lineno" > 398< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ab1cb2ade639787243e0325dcd3dc0a11" > thread_idx< / a > (simd_group_id * 32 + simd_lane_id),< / div >
< div class = "line" > < a id = "l00399" name = "l00399" > < / a > < span class = "lineno" > 399< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a8c5e74003600132954cb953616e1a026" > bi< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ab1cb2ade639787243e0325dcd3dc0a11" > thread_idx< / a > / < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a80cb90674f839d5d4ecfde384fa0a7a2" > TCOLS< / a > ),< / div >
< div class = "line" > < a id = "l00400" name = "l00400" > < / a > < span class = "lineno" > 400< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a6f2fdcaf5a67567cca38ae3d8120ab37" > bj< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > vec_size< / a > * (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ab1cb2ade639787243e0325dcd3dc0a11" > thread_idx< / a > % < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a80cb90674f839d5d4ecfde384fa0a7a2" > TCOLS< / a > )),< / div >
< div class = "line" > < a id = "l00401" name = "l00401" > < / a > < span class = "lineno" > 401< / span > dst(dst_ + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a8c5e74003600132954cb953616e1a026" > bi< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a0ff5a6d503e0bbac4634030a75ab818d" > dst_ld< / a > + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a6f2fdcaf5a67567cca38ae3d8120ab37" > bj< / a > ),< / div >
< div class = "line" > < a id = "l00402" name = "l00402" > < / a > < span class = "lineno" > 402< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > src< / a > (src_ + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a8c5e74003600132954cb953616e1a026" > bi< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7464ec687323fa79050702952ed9084f" > src_ld< / a > + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a6f2fdcaf5a67567cca38ae3d8120ab37" > bj< / a > ),< / div >
< div class = "line" > < a id = "l00403" name = "l00403" > < / a > < span class = "lineno" > 403< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > (params_),< / div >
< div class = "line" > < a id = "l00404" name = "l00404" > < / a > < span class = "lineno" > 404< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae39d43f741c9c87cce9c6d3144dc8b94" > weight_hw< / a > (0),< / div >
< div class = "line" > < a id = "l00405" name = "l00405" > < / a > < span class = "lineno" > 405< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a5afa232b7c84b5025247ac4f83eb9ca9" > read_n< / a > (offsets.y + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a8c5e74003600132954cb953616e1a026" > bi< / a > ),< / div >
< div class = "line" > < a id = "l00406" name = "l00406" > < / a > < span class = "lineno" > 406< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a13eb86acf6abe288c19645935a47d2ad" > do_read< / a > (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a5afa232b7c84b5025247ac4f83eb9ca9" > read_n< / a > + < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a593ec140370d53f8c968f6240116d38b" > n_rows< / a > * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > TROWS< / a > < = gemm_params_-> N) {}< / div >
< / div >
< div class = "line" > < a id = "l00407" name = "l00407" > < / a > < span class = "lineno" > 407< / span > < / div >
< div class = "line" > < a id = "l00408" name = "l00408" > < / a > < span class = "lineno" > 408< / span > < span class = "comment" > /* Load from device memory into threadgroup memory - without bound checking */< / span > < / div >
< div class = "foldopen" id = "foldopen00409" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00409" name = "l00409" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a69e2f7c9814d1cc1c5c267be8618dc55" > 409< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a69e2f7c9814d1cc1c5c267be8618dc55" > load_unsafe< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00410" name = "l00410" > < / a > < span class = "lineno" > 410< / span > < span class = "keywordflow" > if< / span > (BN != 8 || < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a13eb86acf6abe288c19645935a47d2ad" > do_read< / a > ) {< / div >
< div class = "line" > < a id = "l00411" name = "l00411" > < / a > < span class = "lineno" > 411< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00412" name = "l00412" > < / a > < span class = "lineno" > 412< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < BN; i += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > TROWS< / a > ) {< / div >
< div class = "line" > < a id = "l00413" name = "l00413" > < / a > < span class = "lineno" > 413< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00414" name = "l00414" > < / a > < span class = "lineno" > 414< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > vec_size< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00415" name = "l00415" > < / a > < span class = "lineno" > 415< / span > dst[i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a0ff5a6d503e0bbac4634030a75ab818d" > dst_ld< / a > + j] = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > src< / a > [i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7464ec687323fa79050702952ed9084f" > src_ld< / a > + j];< / div >
< div class = "line" > < a id = "l00416" name = "l00416" > < / a > < span class = "lineno" > 416< / span > }< / div >
< div class = "line" > < a id = "l00417" name = "l00417" > < / a > < span class = "lineno" > 417< / span > }< / div >
< div class = "line" > < a id = "l00418" name = "l00418" > < / a > < span class = "lineno" > 418< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00419" name = "l00419" > < / a > < span class = "lineno" > 419< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > i = 0; i < BN; i += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > TROWS< / a > ) {< / div >
< div class = "line" > < a id = "l00420" name = "l00420" > < / a > < span class = "lineno" > 420< / span > < span class = "keywordflow" > if< / span > ((< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a5afa232b7c84b5025247ac4f83eb9ca9" > read_n< / a > + i) < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#ad55ff586d30072d8154865f9dfe92d97" > O< / a > ) {< / div >
< div class = "line" > < a id = "l00421" name = "l00421" > < / a > < span class = "lineno" > 421< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00422" name = "l00422" > < / a > < span class = "lineno" > 422< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > vec_size< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00423" name = "l00423" > < / a > < span class = "lineno" > 423< / span > dst[i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a0ff5a6d503e0bbac4634030a75ab818d" > dst_ld< / a > + j] = < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > src< / a > [i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7464ec687323fa79050702952ed9084f" > src_ld< / a > + j];< / div >
< div class = "line" > < a id = "l00424" name = "l00424" > < / a > < span class = "lineno" > 424< / span > }< / div >
< div class = "line" > < a id = "l00425" name = "l00425" > < / a > < span class = "lineno" > 425< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00426" name = "l00426" > < / a > < span class = "lineno" > 426< / span > < a class = "code hl_define" href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div >
< div class = "line" > < a id = "l00427" name = "l00427" > < / a > < span class = "lineno" > 427< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > j = 0; j < < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > vec_size< / a > ; j++) {< / div >
< div class = "line" > < a id = "l00428" name = "l00428" > < / a > < span class = "lineno" > 428< / span > dst[i * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a0ff5a6d503e0bbac4634030a75ab818d" > dst_ld< / a > + j] = T(0);< / div >
< div class = "line" > < a id = "l00429" name = "l00429" > < / a > < span class = "lineno" > 429< / span > }< / div >
< div class = "line" > < a id = "l00430" name = "l00430" > < / a > < span class = "lineno" > 430< / span > }< / div >
< div class = "line" > < a id = "l00431" name = "l00431" > < / a > < span class = "lineno" > 431< / span > }< / div >
< div class = "line" > < a id = "l00432" name = "l00432" > < / a > < span class = "lineno" > 432< / span > }< / div >
< div class = "line" > < a id = "l00433" name = "l00433" > < / a > < span class = "lineno" > 433< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00434" name = "l00434" > < / a > < span class = "lineno" > 434< / span > < / div >
< div class = "line" > < a id = "l00435" name = "l00435" > < / a > < span class = "lineno" > 435< / span > < span class = "comment" > /* Iteration helper */< / span > < / div >
< div class = "foldopen" id = "foldopen00436" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00436" name = "l00436" > < / a > < span class = "lineno" > < a class = "line" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#aae56c19bb562219770fec38e5666c6ce" > 436< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#aae56c19bb562219770fec38e5666c6ce" > next< / a > () {< / div >
< div class = "line" > < a id = "l00437" name = "l00437" > < / a > < span class = "lineno" > 437< / span > < span class = "keywordflow" > if< / span > (++< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae39d43f741c9c87cce9c6d3144dc8b94" > weight_hw< / a > < (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [1] * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [0])) {< / div >
< div class = "line" > < a id = "l00438" name = "l00438" > < / a > < span class = "lineno" > 438< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > src< / a > += < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a887fee0da1494d038526fb0f59faff45" > wt_strides< / a > [2];< / div >
< div class = "line" > < a id = "l00439" name = "l00439" > < / a > < span class = "lineno" > 439< / span > < span class = "keywordflow" > return< / span > ;< / div >
< div class = "line" > < a id = "l00440" name = "l00440" > < / a > < span class = "lineno" > 440< / span > }< / div >
< div class = "line" > < a id = "l00441" name = "l00441" > < / a > < span class = "lineno" > 441< / span > < / div >
< div class = "line" > < a id = "l00442" name = "l00442" > < / a > < span class = "lineno" > 442< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae39d43f741c9c87cce9c6d3144dc8b94" > weight_hw< / a > = 0;< / div >
< div class = "line" > < a id = "l00443" name = "l00443" > < / a > < span class = "lineno" > 443< / span > < / div >
< div class = "line" > < a id = "l00444" name = "l00444" > < / a > < span class = "lineno" > 444< / span > < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > src< / a > += BK - (< a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [1] * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > wS< / a > [0] - 1) * < a class = "code hl_variable" href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > params< / a > -> < a class = "code hl_variable" href = "struct_m_l_x_conv_params.html#a887fee0da1494d038526fb0f59faff45" > wt_strides< / a > [2];< / div >
< div class = "line" > < a id = "l00445" name = "l00445" > < / a > < span class = "lineno" > 445< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00446" name = "l00446" > < / a > < span class = "lineno" > 446< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00447" name = "l00447" > < / a > < span class = "lineno" > 447< / span > < / div >
< div class = "line" > < a id = "l00448" name = "l00448" > < / a > < span class = "lineno" > 448< / span > } < span class = "comment" > // namespace steel< / span > < / div >
< / div >
< div class = "line" > < a id = "l00449" name = "l00449" > < / a > < span class = "lineno" > 449< / span > } < span class = "comment" > // namespace mlx< / span > < / div >
< div class = "ttc" id = "abackend_2metal_2kernels_2steel_2utils_8h_html" > < div class = "ttname" > < a href = "backend_2metal_2kernels_2steel_2utils_8h.html" > utils.h< / a > < / div > < / div >
< div class = "ttc" id = "aconv_2params_8h_html" > < div class = "ttname" > < a href = "conv_2params_8h.html" > params.h< / a > < / div > < / div >
< div class = "ttc" id = "anamespacemlx_html" > < div class = "ttname" > < a href = "namespacemlx.html" > mlx< / a > < / div > < div class = "ttdef" > < b > Definition< / b > allocator.h:7< / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html_a5a5c3095b132a7589bc19cd5cb80e2c6" > < div class = "ttname" > < a href = "steel_2defines_8h.html#a5a5c3095b132a7589bc19cd5cb80e2c6" > STEEL_PRAGMA_UNROLL< / a > < / div > < div class = "ttdeci" > #define STEEL_PRAGMA_UNROLL< / div > < div class = "ttdef" > < b > Definition< / b > defines.h:4< / div > < / div >
< div class = "ttc" id = "asteel_2defines_8h_html_a90b91c866313ffa46eff6d9cc944ad2b" > < div class = "ttname" > < a href = "steel_2defines_8h.html#a90b91c866313ffa46eff6d9cc944ad2b" > STEEL_CONST< / a > < / div > < div class = "ttdeci" > #define STEEL_CONST< / div > < div class = "ttdef" > < b > Definition< / b > defines.h:3< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html" > MLXConvParams< / a > < / div > < div class = "ttdef" > < b > Definition< / b > params.h:6< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_a19ccb9fecfccdc18b6a7f0cc43adbc6e" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#a19ccb9fecfccdc18b6a7f0cc43adbc6e" > MLXConvParams::oS< / a > < / div > < div class = "ttdeci" > const int oS[NDIM]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:12< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_a72e1c3b4da0f70622cf18036bbf97fe6" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#a72e1c3b4da0f70622cf18036bbf97fe6" > MLXConvParams::iS< / a > < / div > < div class = "ttdeci" > const int iS[NDIM]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:10< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_a7611db8f1621c7e09fc685ed44073b14" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#a7611db8f1621c7e09fc685ed44073b14" > MLXConvParams::kdil< / a > < / div > < div class = "ttdeci" > const int kdil[NDIM]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:15< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_a862191e8ab1bc8a47aa1396b36d46058" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#a862191e8ab1bc8a47aa1396b36d46058" > MLXConvParams::str< / a > < / div > < div class = "ttdeci" > const int str[NDIM]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:13< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_a887fee0da1494d038526fb0f59faff45" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#a887fee0da1494d038526fb0f59faff45" > MLXConvParams::wt_strides< / a > < / div > < div class = "ttdeci" > const size_t wt_strides[NDIM+2]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:18< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_a8b30cda15eda20f84f12db868f21d0ef" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#a8b30cda15eda20f84f12db868f21d0ef" > MLXConvParams::flip< / a > < / div > < div class = "ttdeci" > const bool flip< / div > < div class = "ttdef" > < b > Definition< / b > params.h:21< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_ab25eade6573784985dbea1216f9068cf" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#ab25eade6573784985dbea1216f9068cf" > MLXConvParams::in_strides< / a > < / div > < div class = "ttdeci" > const size_t in_strides[NDIM+2]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:17< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_aba2074189644b1b59567d018409277a9" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#aba2074189644b1b59567d018409277a9" > MLXConvParams::wS< / a > < / div > < div class = "ttdeci" > const int wS[NDIM]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:11< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_ad55ff586d30072d8154865f9dfe92d97" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#ad55ff586d30072d8154865f9dfe92d97" > MLXConvParams::O< / a > < / div > < div class = "ttdeci" > const int O< / div > < div class = "ttdef" > < b > Definition< / b > params.h:9< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_ae6b7054dc3cffa8e6aedeb29fa7da932" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#ae6b7054dc3cffa8e6aedeb29fa7da932" > MLXConvParams::N< / a > < / div > < div class = "ttdeci" > const int N< / div > < div class = "ttdef" > < b > Definition< / b > params.h:7< / div > < / div >
< div class = "ttc" id = "astruct_m_l_x_conv_params_html_ae84a9afb3a95b57e0b763bb0ebda0753" > < div class = "ttname" > < a href = "struct_m_l_x_conv_params.html#ae84a9afb3a95b57e0b763bb0ebda0753" > MLXConvParams::pad< / a > < / div > < div class = "ttdeci" > const int pad[NDIM]< / div > < div class = "ttdef" > < b > Definition< / b > params.h:14< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html" > mlx::steel::Conv2DInputBlockLoaderLargeFilter< / a > < / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:23< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a097c48a23e1bd7d8cf3e9d531397602f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a097c48a23e1bd7d8cf3e9d531397602f" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::n_rows< / a > < / div > < div class = "ttdeci" > STEEL_CONST short n_rows< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:37< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a09b4719415c5bddb0bb70c704b1d8d02" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a09b4719415c5bddb0bb70c704b1d8d02" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::params< / a > < / div > < div class = "ttdeci" > const constant MLXConvParams< 2 > * params< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:47< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a0b5303f3258e0a21862dead8e3f5401e" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a0b5303f3258e0a21862dead8e3f5401e" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::TCOLS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TCOLS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:33< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a17550360cae0a942a9552d7a67827512" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a17550360cae0a942a9552d7a67827512" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::read_iw< / a > < / div > < div class = "ttdeci" > int read_iw[n_rows]< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:57< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a2528ff5ed472e4ed35415ada42276b07" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a2528ff5ed472e4ed35415ada42276b07" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::TROWS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TROWS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:34< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a29fbeeacdf5b6feeb74815ced255fa5a" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a29fbeeacdf5b6feeb74815ced255fa5a" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::BCOLS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short BCOLS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:26< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a3b71f379ff9baf39830c92f4f1ecde52" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3b71f379ff9baf39830c92f4f1ecde52" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::next< / a > < / div > < div class = "ttdeci" > METAL_FUNC void next()< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:134< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a3be4815d4090cb27ebe2f9bad1a39e95" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a3be4815d4090cb27ebe2f9bad1a39e95" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::weight_h< / a > < / div > < div class = "ttdeci" > short weight_h< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:50< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a656a46ee27486482b45ff90b3d626255" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a656a46ee27486482b45ff90b3d626255" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::src< / a > < / div > < div class = "ttdeci" > const device T * src[n_rows]< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:53< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a70da26a715135d973f88371a70255be9" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a70da26a715135d973f88371a70255be9" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::thread_idx< / a > < / div > < div class = "ttdeci" > const short thread_idx< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:40< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a7ae9e41f50c0c63c35b63086a1c22cc3" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a7ae9e41f50c0c63c35b63086a1c22cc3" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::bj< / a > < / div > < div class = "ttdeci" > const short bj< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:42< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a82dd8230e1f37500f1a562177c3ad692" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a82dd8230e1f37500f1a562177c3ad692" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::read_ih< / a > < / div > < div class = "ttdeci" > int read_ih[n_rows]< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:56< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a8755116a535539744e4947bc69f9c50f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8755116a535539744e4947bc69f9c50f" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::Conv2DInputBlockLoaderLargeFilter< / a > < / div > < div class = "ttdeci" > METAL_FUNC Conv2DInputBlockLoaderLargeFilter(const device T *src_, threadgroup T *dst_, const int2 offsets, const constant MLXConvParams< 2 > *params_, const constant ImplicitGemmConv2DParams *gemm_params_, uint simd_group_id, uint simd_lane_id)< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:60< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a8e53b0a9951cb840d922cc285b257ee3" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a8e53b0a9951cb840d922cc285b257ee3" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::bi< / a > < / div > < div class = "ttdeci" > const short bi< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:41< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a91192d512e7a18c2d16a139065000959" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a91192d512e7a18c2d16a139065000959" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::dst_ld< / a > < / div > < div class = "ttdeci" > STEEL_CONST short dst_ld< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:29< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_a961836be363409744e48e595d5e0c2ec" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#a961836be363409744e48e595d5e0c2ec" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::load_unsafe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_unsafe() const< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:106< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_ab0724eb3ef52ee773b6607f6433b9f2c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ab0724eb3ef52ee773b6607f6433b9f2c" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::gemm_params< / a > < / div > < div class = "ttdeci" > const constant ImplicitGemmConv2DParams * gemm_params< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:48< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_ac070c6bd5be85b1ae805e18890db4fd4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ac070c6bd5be85b1ae805e18890db4fd4" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::BROWS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short BROWS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:25< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_adcc83bf6c02391cc2375e55c06a1c9a4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#adcc83bf6c02391cc2375e55c06a1c9a4" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::vec_size< / a > < / div > < div class = "ttdeci" > STEEL_CONST short vec_size< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:30< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_add1186c7accb62bfa8a4a7e87fc4cc84" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#add1186c7accb62bfa8a4a7e87fc4cc84" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::weight_w< / a > < / div > < div class = "ttdeci" > short weight_w< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:51< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_ae048eb79f8b8d98f0fe8805c30fbb09f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#ae048eb79f8b8d98f0fe8805c30fbb09f" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::dst< / a > < / div > < div class = "ttdeci" > threadgroup T * dst< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:45< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter_html_aeb67767e2d60d5ff0279a55553f3184e" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_large_filter.html#aeb67767e2d60d5ff0279a55553f3184e" > mlx::steel::Conv2DInputBlockLoaderLargeFilter::read_n< / a > < / div > < div class = "ttdeci" > int read_n[n_rows]< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:55< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html" > mlx::steel::Conv2DInputBlockLoaderSmallFilter< / a > < / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:171< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a0a2cbf57c51cd928722e3f06aafcf933" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0a2cbf57c51cd928722e3f06aafcf933" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::Conv2DInputBlockLoaderSmallFilter< / a > < / div > < div class = "ttdeci" > METAL_FUNC Conv2DInputBlockLoaderSmallFilter(const device T *src_, threadgroup T *dst_, const int2 offsets, const constant MLXConvParams< 2 > *params_, const constant ImplicitGemmConv2DParams *gemm_params_, uint simd_group_id, uint simd_lane_id)< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:209< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a0b892c1a7edb9ed20c076d8945855c19" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a0b892c1a7edb9ed20c076d8945855c19" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::mask_h< / a > < / div > < div class = "ttdeci" > mask_t mask_h[n_rows]< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:205< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a10591ea957605a9c662f93d59ff3410d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a10591ea957605a9c662f93d59ff3410d" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::BROWS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short BROWS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:173< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a19ddba7259c3c2c02ed90f3f635557be" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a19ddba7259c3c2c02ed90f3f635557be" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::mask_w< / a > < / div > < div class = "ttdeci" > mask_t mask_w[n_rows]< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:206< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a270ab3da7c98a12525a59952742cc97d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a270ab3da7c98a12525a59952742cc97d" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::mask_t< / a > < / div > < div class = "ttdeci" > short mask_t< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:187< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a366c3cee4ed1165545287c8d5ce49445" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a366c3cee4ed1165545287c8d5ce49445" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::weight_h< / a > < / div > < div class = "ttdeci" > short weight_h< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:200< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a3957fb263fe040fe70683fd1d7b06487" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3957fb263fe040fe70683fd1d7b06487" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::TROWS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TROWS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:182< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a3ec8a92c9e6643c1d5bf8af278026fe8" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a3ec8a92c9e6643c1d5bf8af278026fe8" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::n_rows< / a > < / div > < div class = "ttdeci" > STEEL_CONST short n_rows< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:185< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a4744bd79fb05e81eaa53d2eabe017446" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4744bd79fb05e81eaa53d2eabe017446" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::weight_w< / a > < / div > < div class = "ttdeci" > short weight_w< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:201< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a4f8c792ede675d14b70dd19fcf3c5aee" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a4f8c792ede675d14b70dd19fcf3c5aee" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::params< / a > < / div > < div class = "ttdeci" > const constant MLXConvParams< 2 > * params< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:197< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a57552e9cfbafad71d47b2f3a8e027bdf" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a57552e9cfbafad71d47b2f3a8e027bdf" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::src< / a > < / div > < div class = "ttdeci" > const device T * src[n_rows]< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:203< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a5adbd51e9adb6f7853724d83de4ff755" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a5adbd51e9adb6f7853724d83de4ff755" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::TCOLS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TCOLS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:181< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a6fd3dd7b74d91609fa9dd61c657a0e32" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a6fd3dd7b74d91609fa9dd61c657a0e32" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::bj< / a > < / div > < div class = "ttdeci" > const short bj< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:192< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a71c313e1597a2bb99f7b07d434e119d2" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a71c313e1597a2bb99f7b07d434e119d2" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::vec_size< / a > < / div > < div class = "ttdeci" > STEEL_CONST short vec_size< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:178< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a78d2b0098311a278be8394edbd5fc731" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a78d2b0098311a278be8394edbd5fc731" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::next< / a > < / div > < div class = "ttdeci" > METAL_FUNC void next()< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:315< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a8034abc10483487fc94313e3674d1111" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a8034abc10483487fc94313e3674d1111" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::load_unsafe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_unsafe() const< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:290< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a8598bf23a2bce6af13c876cbfa76449f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a8598bf23a2bce6af13c876cbfa76449f" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::dst< / a > < / div > < div class = "ttdeci" > threadgroup T * dst< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:195< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_a9e59da7e4436e61b2d3c3f982355910b" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#a9e59da7e4436e61b2d3c3f982355910b" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::dst_ld< / a > < / div > < div class = "ttdeci" > STEEL_CONST short dst_ld< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:177< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_ac18de37cde1459595bfe18b0d5ef146d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac18de37cde1459595bfe18b0d5ef146d" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::thread_idx< / a > < / div > < div class = "ttdeci" > const short thread_idx< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:190< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_ac3b40db720055350bba59d614ea1dd79" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ac3b40db720055350bba59d614ea1dd79" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::BCOLS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short BCOLS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:174< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_acc778b3c0b7ec38a43e8ea943df8704c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#acc778b3c0b7ec38a43e8ea943df8704c" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::gemm_params< / a > < / div > < div class = "ttdeci" > const constant ImplicitGemmConv2DParams * gemm_params< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:198< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter_html_ae3af75287f279d2cdeef189126740d4c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_input_block_loader_small_filter.html#ae3af75287f279d2cdeef189126740d4c" > mlx::steel::Conv2DInputBlockLoaderSmallFilter::bi< / a > < / div > < div class = "ttdeci" > const short bi< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:191< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html" > mlx::steel::Conv2DWeightBlockLoader< / a > < / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:352< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a0ff5a6d503e0bbac4634030a75ab818d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a0ff5a6d503e0bbac4634030a75ab818d" > mlx::steel::Conv2DWeightBlockLoader::dst_ld< / a > < / div > < div class = "ttdeci" > STEEL_CONST short dst_ld< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:358< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a10109dc9553207f5a365799e4969c6d2" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a10109dc9553207f5a365799e4969c6d2" > mlx::steel::Conv2DWeightBlockLoader::vec_size< / a > < / div > < div class = "ttdeci" > STEEL_CONST short vec_size< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:359< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a13eb86acf6abe288c19645935a47d2ad" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a13eb86acf6abe288c19645935a47d2ad" > mlx::steel::Conv2DWeightBlockLoader::do_read< / a > < / div > < div class = "ttdeci" > const bool do_read< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:386< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a1fed11be2e8d9d594dcdf60e32b936b1" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a1fed11be2e8d9d594dcdf60e32b936b1" > mlx::steel::Conv2DWeightBlockLoader::params< / a > < / div > < div class = "ttdeci" > const constant MLXConvParams< 2 > * params< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:381< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a593ec140370d53f8c968f6240116d38b" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a593ec140370d53f8c968f6240116d38b" > mlx::steel::Conv2DWeightBlockLoader::n_rows< / a > < / div > < div class = "ttdeci" > STEEL_CONST short n_rows< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:367< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a5afa232b7c84b5025247ac4f83eb9ca9" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a5afa232b7c84b5025247ac4f83eb9ca9" > mlx::steel::Conv2DWeightBlockLoader::read_n< / a > < / div > < div class = "ttdeci" > const int read_n< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:385< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a69e2f7c9814d1cc1c5c267be8618dc55" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a69e2f7c9814d1cc1c5c267be8618dc55" > mlx::steel::Conv2DWeightBlockLoader::load_unsafe< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_unsafe() const< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:409< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a6f2fdcaf5a67567cca38ae3d8120ab37" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a6f2fdcaf5a67567cca38ae3d8120ab37" > mlx::steel::Conv2DWeightBlockLoader::bj< / a > < / div > < div class = "ttdeci" > const short bj< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:375< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a7464ec687323fa79050702952ed9084f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7464ec687323fa79050702952ed9084f" > mlx::steel::Conv2DWeightBlockLoader::src_ld< / a > < / div > < div class = "ttdeci" > const int src_ld< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:370< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a7bfbcc4a1e3eef7aef5dd8e8c374a95f" > mlx::steel::Conv2DWeightBlockLoader::src< / a > < / div > < div class = "ttdeci" > const device T * src< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:379< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a80cb90674f839d5d4ecfde384fa0a7a2" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a80cb90674f839d5d4ecfde384fa0a7a2" > mlx::steel::Conv2DWeightBlockLoader::TCOLS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TCOLS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:363< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a86519729ef0561686bb86e474c95b93d" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a86519729ef0561686bb86e474c95b93d" > mlx::steel::Conv2DWeightBlockLoader::BCOLS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short BCOLS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:355< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a8c5e74003600132954cb953616e1a026" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a8c5e74003600132954cb953616e1a026" > mlx::steel::Conv2DWeightBlockLoader::bi< / a > < / div > < div class = "ttdeci" > const short bi< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:374< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a94f0ce5bb7d87bc1fb6a7c2ba2b892d4" > mlx::steel::Conv2DWeightBlockLoader::TROWS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short TROWS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:364< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_a9a7dca3512b64cffb6eac305d795831c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#a9a7dca3512b64cffb6eac305d795831c" > mlx::steel::Conv2DWeightBlockLoader::Conv2DWeightBlockLoader< / a > < / div > < div class = "ttdeci" > METAL_FUNC Conv2DWeightBlockLoader(const device T *src_, threadgroup T *dst_, const int2 offsets, const constant MLXConvParams< 2 > *params_, const constant ImplicitGemmConv2DParams *gemm_params_, uint simd_group_id, uint simd_lane_id)< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:389< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_aae56c19bb562219770fec38e5666c6ce" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#aae56c19bb562219770fec38e5666c6ce" > mlx::steel::Conv2DWeightBlockLoader::next< / a > < / div > < div class = "ttdeci" > METAL_FUNC void next()< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:436< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_ab1cb2ade639787243e0325dcd3dc0a11" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ab1cb2ade639787243e0325dcd3dc0a11" > mlx::steel::Conv2DWeightBlockLoader::thread_idx< / a > < / div > < div class = "ttdeci" > const short thread_idx< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:373< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_ae39d43f741c9c87cce9c6d3144dc8b94" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae39d43f741c9c87cce9c6d3144dc8b94" > mlx::steel::Conv2DWeightBlockLoader::weight_hw< / a > < / div > < div class = "ttdeci" > int weight_hw< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:383< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_ae9b86b05b23153ea1abaeead456c491c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#ae9b86b05b23153ea1abaeead456c491c" > mlx::steel::Conv2DWeightBlockLoader::BROWS< / a > < / div > < div class = "ttdeci" > STEEL_CONST short BROWS< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:354< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_conv2_d_weight_block_loader_html_aea6494838175225d02cbc7768a646ec7" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_conv2_d_weight_block_loader.html#aea6494838175225d02cbc7768a646ec7" > mlx::steel::Conv2DWeightBlockLoader::dst< / a > < / div > < div class = "ttdeci" > threadgroup T * dst< / div > < div class = "ttdef" > < b > Definition< / b > loader_channel_l.h:378< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_implicit_gemm_conv2_d_params_html" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html" > mlx::steel::ImplicitGemmConv2DParams< / a > < / div > < div class = "ttdef" > < b > Definition< / b > params.h:27< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_implicit_gemm_conv2_d_params_html_a03685a4066cdb11ffb647408e2c5b122" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#a03685a4066cdb11ffb647408e2c5b122" > mlx::steel::ImplicitGemmConv2DParams::inp_jump_h< / a > < / div > < div class = "ttdeci" > const int inp_jump_h< / div > < div class = "ttdef" > < b > Definition< / b > params.h:35< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_implicit_gemm_conv2_d_params_html_a78d30e843d65d1829623afb0b607f0a5" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#a78d30e843d65d1829623afb0b607f0a5" > mlx::steel::ImplicitGemmConv2DParams::inp_jump_c< / a > < / div > < div class = "ttdeci" > const int inp_jump_c< / div > < div class = "ttdef" > < b > Definition< / b > params.h:36< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1steel_1_1_implicit_gemm_conv2_d_params_html_acf168c72f4a86b72b8f5f386f07c9d8c" > < div class = "ttname" > < a href = "structmlx_1_1steel_1_1_implicit_gemm_conv2_d_params.html#acf168c72f4a86b72b8f5f386f07c9d8c" > mlx::steel::ImplicitGemmConv2DParams::inp_jump_w< / a > < / div > < div class = "ttdeci" > const int inp_jump_w< / div > < div class = "ttdef" > < b > Definition< / b > params.h:34< / div > < / div >
< / div > <!-- fragment --> < / div > <!-- contents -->
<!-- start footer part -->
< hr class = "footer" / > < address class = "footer" > < small >
Generated by  < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.10.0
< / small > < / address >
< / body >
< / html >