2024-10-15 23:12:17 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
< meta name = "generator" content = "Doxygen 1.12.0" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > MLX: mlx/backend/metal/kernels/fft/readwrite.h Source File< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< link href = "navtree.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "resize.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
<!-- Generated by Doxygen 1.12.0 -->
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { codefold.init(0); });
/* @license-end */
< / script >
< script type = "text/javascript" src = "menudata.js" > < / script >
< script type = "text/javascript" src = "menu.js" > < / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() {
initMenu('',true,false,'search.php','Search',false);
$(function() { init_search(); });
});
/* @license-end */
< / script >
< div id = "main-nav" > < / div >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function(){ initResizable(false); });
/* @license-end */
< / script >
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div id = "nav-path" class = "navpath" >
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_d0c977ea65824390717cdb7efc36c157.html" > metal< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_70a37effa88bcbd6b791977fa1e64356.html" > kernels< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_8c751ccfa9f494753d976761a9d60a84.html" > fft< / a > < / li > < / ul >
< / div >
< / div > <!-- top -->
< div id = "doc-content" >
< div class = "header" >
< div class = "headertitle" > < div class = "title" > readwrite.h< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< a href = "readwrite_8h.html" > Go to the documentation of this file.< / a > < div class = "fragment" > < div class = "line" > < a id = "l00001" name = "l00001" > < / a > < span class = "lineno" > 1< / span > < span class = "comment" > // Copyright © 2024 Apple Inc.< / span > < / div >
< div class = "line" > < a id = "l00002" name = "l00002" > < / a > < span class = "lineno" > 2< / span > < / div >
< div class = "line" > < a id = "l00003" name = "l00003" > < / a > < span class = "lineno" > 3< / span > < span class = "preprocessor" > #include < metal_common> < / span > < / div >
< div class = "line" > < a id = "l00004" name = "l00004" > < / a > < span class = "lineno" > 4< / span > < / div >
< div class = "line" > < a id = "l00005" name = "l00005" > < / a > < span class = "lineno" > 5< / span > < span class = "preprocessor" > #include " < a class = "code" href = "radix_8h.html" > mlx/backend/metal/kernels/fft/radix.h< / a > " < / span > < / div >
< div class = "line" > < a id = "l00006" name = "l00006" > < / a > < span class = "lineno" > 6< / span > < / div >
< div class = "line" > < a id = "l00007" name = "l00007" > < / a > < span class = "lineno" > 7< / span > < span class = "comment" > /* FFT helpers for reading and writing from/to device memory.< / span > < / div >
< div class = "line" > < a id = "l00008" name = "l00008" > < / a > < span class = "lineno" > 8< / span > < span class = "comment" > < / span > < / div >
< div class = "line" > < a id = "l00009" name = "l00009" > < / a > < span class = "lineno" > 9< / span > < span class = "comment" > For many sizes, GPU FFTs are memory bandwidth bound so< / span > < / div >
< div class = "line" > < a id = "l00010" name = "l00010" > < / a > < span class = "lineno" > 10< / span > < span class = "comment" > read/write performance is important.< / span > < / div >
< div class = "line" > < a id = "l00011" name = "l00011" > < / a > < span class = "lineno" > 11< / span > < span class = "comment" > < / span > < / div >
< div class = "line" > < a id = "l00012" name = "l00012" > < / a > < span class = "lineno" > 12< / span > < span class = "comment" > Where possible, we read 128 bits sequentially in each thread,< / span > < / div >
< div class = "line" > < a id = "l00013" name = "l00013" > < / a > < span class = "lineno" > 13< / span > < span class = "comment" > coalesced with accesses from adajcent threads for optimal performance.< / span > < / div >
< div class = "line" > < a id = "l00014" name = "l00014" > < / a > < span class = "lineno" > 14< / span > < span class = "comment" > < / span > < / div >
< div class = "line" > < a id = "l00015" name = "l00015" > < / a > < span class = "lineno" > 15< / span > < span class = "comment" > We implement specialized reading/writing for:< / span > < / div >
< div class = "line" > < a id = "l00016" name = "l00016" > < / a > < span class = "lineno" > 16< / span > < span class = "comment" > - FFT< / span > < / div >
< div class = "line" > < a id = "l00017" name = "l00017" > < / a > < span class = "lineno" > 17< / span > < span class = "comment" > - RFFT< / span > < / div >
< div class = "line" > < a id = "l00018" name = "l00018" > < / a > < span class = "lineno" > 18< / span > < span class = "comment" > - IRFFT< / span > < / div >
< div class = "line" > < a id = "l00019" name = "l00019" > < / a > < span class = "lineno" > 19< / span > < span class = "comment" > < / span > < / div >
< div class = "line" > < a id = "l00020" name = "l00020" > < / a > < span class = "lineno" > 20< / span > < span class = "comment" > Each with support for:< / span > < / div >
< div class = "line" > < a id = "l00021" name = "l00021" > < / a > < span class = "lineno" > 21< / span > < span class = "comment" > - Contiguous reads< / span > < / div >
< div class = "line" > < a id = "l00022" name = "l00022" > < / a > < span class = "lineno" > 22< / span > < span class = "comment" > - Padded reads< / span > < / div >
< div class = "line" > < a id = "l00023" name = "l00023" > < / a > < span class = "lineno" > 23< / span > < span class = "comment" > - Strided reads< / span > < / div >
< div class = "line" > < a id = "l00024" name = "l00024" > < / a > < span class = "lineno" > 24< / span > < span class = "comment" > */< / span > < / div >
< div class = "line" > < a id = "l00025" name = "l00025" > < / a > < span class = "lineno" > 25< / span > < / div >
< div class = "line" > < a id = "l00026" name = "l00026" > < / a > < span class = "lineno" > < a class = "line" href = "readwrite_8h.html#a7b6e56afa21f022c5e754b000955735a" > 26< / a > < / span > < span class = "preprocessor" > #define MAX_RADIX 13< / span > < / div >
< div class = "line" > < a id = "l00027" name = "l00027" > < / a > < span class = "lineno" > 27< / span > < / div >
< div class = "line" > < a id = "l00028" name = "l00028" > < / a > < span class = "lineno" > 28< / span > < span class = "keyword" > using namespace < / span > < a class = "code hl_namespace" href = "namespacemetal.html" > metal< / a > ;< / div >
< div class = "line" > < a id = "l00029" name = "l00029" > < / a > < span class = "lineno" > 29< / span > < / div >
< div class = "line" > < a id = "l00030" name = "l00030" > < / a > < span class = "lineno" > 30< / span > < span class = "keyword" > template< / span > < < / div >
< div class = "line" > < a id = "l00031" name = "l00031" > < / a > < span class = "lineno" > 31< / span > < span class = "keyword" > typename< / span > in_T,< / div >
< div class = "line" > < a id = "l00032" name = "l00032" > < / a > < span class = "lineno" > 32< / span > < span class = "keyword" > typename< / span > out_T,< / div >
< div class = "line" > < a id = "l00033" name = "l00033" > < / a > < span class = "lineno" > 33< / span > < span class = "keywordtype" > int< / span > step = 0,< / div >
< div class = "line" > < a id = "l00034" name = "l00034" > < / a > < span class = "lineno" > 34< / span > < span class = "keywordtype" > bool< / span > four_step_real = < span class = "keyword" > false< / span > > < / div >
< div class = "foldopen" id = "foldopen00035" data-start = "{" data-end = "};" >
< div class = "line" > < a id = "l00035" name = "l00035" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html" > 35< / a > < / span > < span class = "keyword" > struct < / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > {< / div >
< div class = "line" > < a id = "l00036" name = "l00036" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > 36< / a > < / span > < span class = "keyword" > const< / span > device in_T* < a class = "code hl_variable" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > in< / a > ;< / div >
< div class = "line" > < a id = "l00037" name = "l00037" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > 37< / a > < / span > threadgroup float2* < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > ;< / div >
< div class = "line" > < a id = "l00038" name = "l00038" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > 38< / a > < / span > device out_T* < a class = "code hl_variable" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > out< / a > ;< / div >
< div class = "line" > < a id = "l00039" name = "l00039" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > 39< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > ;< / div >
< div class = "line" > < a id = "l00040" name = "l00040" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a689f4890c1d2ce33fc6da7550beec735" > 40< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a689f4890c1d2ce33fc6da7550beec735" > batch_size< / a > ;< / div >
< div class = "line" > < a id = "l00041" name = "l00041" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > 41< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > ;< / div >
< div class = "line" > < a id = "l00042" name = "l00042" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > 42< / a > < / span > uint3 < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > ;< / div >
< div class = "line" > < a id = "l00043" name = "l00043" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > 43< / a > < / span > uint3 < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > ;< / div >
< div class = "line" > < a id = "l00044" name = "l00044" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > 44< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > threads_per_tg< / a > ;< / div >
< div class = "line" > < a id = "l00045" name = "l00045" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a773fa8524515bdc2ff8b0e2060741813" > 45< / a > < / span > < span class = "keywordtype" > bool< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a773fa8524515bdc2ff8b0e2060741813" > inv< / a > ;< / div >
< div class = "line" > < a id = "l00046" name = "l00046" > < / a > < span class = "lineno" > 46< / span > < / div >
< div class = "line" > < a id = "l00047" name = "l00047" > < / a > < span class = "lineno" > 47< / span > < span class = "comment" > // Used for strided access< / span > < / div >
< div class = "line" > < a id = "l00048" name = "l00048" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a4c0b12484aac4fd6759d67c190391989" > 48< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a4c0b12484aac4fd6759d67c190391989" > strided_device_idx< / a > = 0;< / div >
< div class = "line" > < a id = "l00049" name = "l00049" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#ace40adb02cfb33d89c98353327c251fc" > 49< / a > < / span > < span class = "keywordtype" > int< / span > < a class = "code hl_variable" href = "struct_read_writer.html#ace40adb02cfb33d89c98353327c251fc" > strided_shared_idx< / a > = 0;< / div >
< div class = "line" > < a id = "l00050" name = "l00050" > < / a > < span class = "lineno" > 50< / span > < / div >
< div class = "foldopen" id = "foldopen00051" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00051" name = "l00051" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a1aa07e41d7ac286ad79bd26a072dfa0c" > 51< / a > < / span > METAL_FUNC < a class = "code hl_function" href = "struct_read_writer.html#a1aa07e41d7ac286ad79bd26a072dfa0c" > ReadWriter< / a > (< / div >
< div class = "line" > < a id = "l00052" name = "l00052" > < / a > < span class = "lineno" > 52< / span > < span class = "keyword" > const< / span > device in_T* in_,< / div >
< div class = "line" > < a id = "l00053" name = "l00053" > < / a > < span class = "lineno" > 53< / span > threadgroup float2* buf_,< / div >
< div class = "line" > < a id = "l00054" name = "l00054" > < / a > < span class = "lineno" > 54< / span > device out_T* out_,< / div >
< div class = "line" > < a id = "l00055" name = "l00055" > < / a > < span class = "lineno" > 55< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > n_,< / div >
< div class = "line" > < a id = "l00056" name = "l00056" > < / a > < span class = "lineno" > 56< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > int< / span > batch_size_,< / div >
< div class = "line" > < a id = "l00057" name = "l00057" > < / a > < span class = "lineno" > 57< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > short< / span > < a class = "code hl_variable" href = "backend_2metal_2kernels_2fft_8h.html#ad395c11e6f2aee72cd1928fba93a35a3" > elems_per_thread_< / a > ,< / div >
< div class = "line" > < a id = "l00058" name = "l00058" > < / a > < span class = "lineno" > 58< / span > < span class = "keyword" > const< / span > uint3 elem_,< / div >
< div class = "line" > < a id = "l00059" name = "l00059" > < / a > < span class = "lineno" > 59< / span > < span class = "keyword" > const< / span > uint3 grid_,< / div >
< div class = "line" > < a id = "l00060" name = "l00060" > < / a > < span class = "lineno" > 60< / span > < span class = "keyword" > const< / span > < span class = "keywordtype" > bool< / span > < a class = "code hl_variable" href = "backend_2metal_2kernels_2fft_8h.html#a7a83318497519ff3ff0141b7d511ed38" > inv_< / a > )< / div >
< div class = "line" > < a id = "l00061" name = "l00061" > < / a > < span class = "lineno" > 61< / span > : < a class = "code hl_variable" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > in< / a > (in_),< / div >
< div class = "line" > < a id = "l00062" name = "l00062" > < / a > < span class = "lineno" > 62< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > (buf_),< / div >
< div class = "line" > < a id = "l00063" name = "l00063" > < / a > < span class = "lineno" > 63< / span > < a class = "code hl_variable" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > out< / a > (out_),< / div >
< div class = "line" > < a id = "l00064" name = "l00064" > < / a > < span class = "lineno" > 64< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > (n_),< / div >
< div class = "line" > < a id = "l00065" name = "l00065" > < / a > < span class = "lineno" > 65< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a689f4890c1d2ce33fc6da7550beec735" > batch_size< / a > (batch_size_),< / div >
< div class = "line" > < a id = "l00066" name = "l00066" > < / a > < span class = "lineno" > 66< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > (< a class = "code hl_variable" href = "backend_2metal_2kernels_2fft_8h.html#ad395c11e6f2aee72cd1928fba93a35a3" > elems_per_thread_< / a > ),< / div >
< div class = "line" > < a id = "l00067" name = "l00067" > < / a > < span class = "lineno" > 67< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > (elem_),< / div >
< div class = "line" > < a id = "l00068" name = "l00068" > < / a > < span class = "lineno" > 68< / span > < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > (grid_),< / div >
< div class = "line" > < a id = "l00069" name = "l00069" > < / a > < span class = "lineno" > 69< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a773fa8524515bdc2ff8b0e2060741813" > inv< / a > (< a class = "code hl_variable" href = "backend_2metal_2kernels_2fft_8h.html#a7a83318497519ff3ff0141b7d511ed38" > inv_< / a > ) {< / div >
< div class = "line" > < a id = "l00070" name = "l00070" > < / a > < span class = "lineno" > 70< / span > < span class = "comment" > // Account for padding on last threadgroup< / span > < / div >
< div class = "line" > < a id = "l00071" name = "l00071" > < / a > < span class = "lineno" > 71< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > threads_per_tg< / a > = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x == < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .x - 1< / div >
< div class = "line" > < a id = "l00072" name = "l00072" > < / a > < span class = "lineno" > 72< / span > ? (< a class = "code hl_variable" href = "struct_read_writer.html#a689f4890c1d2ce33fc6da7550beec735" > batch_size< / a > - (< a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .x - 1) * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y) * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .z< / div >
< div class = "line" > < a id = "l00073" name = "l00073" > < / a > < span class = "lineno" > 73< / span > : < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .z;< / div >
< div class = "line" > < a id = "l00074" name = "l00074" > < / a > < span class = "lineno" > 74< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00075" name = "l00075" > < / a > < span class = "lineno" > 75< / span > < / div >
< div class = "line" > < a id = "l00076" name = "l00076" > < / a > < span class = "lineno" > 76< / span > < span class = "comment" > // ifft(x) = 1/n * conj(fft(conj(x)))< / span > < / div >
< div class = "foldopen" id = "foldopen00077" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00077" name = "l00077" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > 77< / a > < / span > METAL_FUNC float2 < a class = "code hl_function" href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > post_in< / a > (float2 < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > )< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00078" name = "l00078" > < / a > < span class = "lineno" > 78< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a773fa8524515bdc2ff8b0e2060741813" > inv< / a > ? float2(< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x, -< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y) : < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > ;< / div >
< div class = "line" > < a id = "l00079" name = "l00079" > < / a > < span class = "lineno" > 79< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00080" name = "l00080" > < / a > < span class = "lineno" > 80< / span > < / div >
< div class = "line" > < a id = "l00081" name = "l00081" > < / a > < span class = "lineno" > 81< / span > < span class = "comment" > // Handle float case for generic RFFT alg< / span > < / div >
< div class = "foldopen" id = "foldopen00082" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00082" name = "l00082" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a6c47a25b2135393045fa5f95ada59d9d" > 82< / a > < / span > METAL_FUNC float2 < a class = "code hl_function" href = "struct_read_writer.html#a6c47a25b2135393045fa5f95ada59d9d" > post_in< / a > (< span class = "keywordtype" > float< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > )< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00083" name = "l00083" > < / a > < span class = "lineno" > 83< / span > < span class = "keywordflow" > return< / span > float2(< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > , 0);< / div >
< div class = "line" > < a id = "l00084" name = "l00084" > < / a > < span class = "lineno" > 84< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00085" name = "l00085" > < / a > < span class = "lineno" > 85< / span > < / div >
< div class = "foldopen" id = "foldopen00086" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00086" name = "l00086" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a94da8aa85fa2916eaa3eaeb11499234a" > 86< / a > < / span > METAL_FUNC float2 < a class = "code hl_function" href = "struct_read_writer.html#a94da8aa85fa2916eaa3eaeb11499234a" > pre_out< / a > (float2 < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > )< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00087" name = "l00087" > < / a > < span class = "lineno" > 87< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a773fa8524515bdc2ff8b0e2060741813" > inv< / a > ? float2(< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x / < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > , -< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y / < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > ) : < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > ;< / div >
< div class = "line" > < a id = "l00088" name = "l00088" > < / a > < span class = "lineno" > 88< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00089" name = "l00089" > < / a > < span class = "lineno" > 89< / span > < / div >
< div class = "foldopen" id = "foldopen00090" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00090" name = "l00090" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a185553204b07a407ef02c41dd78e8239" > 90< / a > < / span > METAL_FUNC float2 < a class = "code hl_function" href = "struct_read_writer.html#a185553204b07a407ef02c41dd78e8239" > pre_out< / a > (float2 < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > , < span class = "keywordtype" > int< / span > length)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00091" name = "l00091" > < / a > < span class = "lineno" > 91< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a773fa8524515bdc2ff8b0e2060741813" > inv< / a > ? float2(< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x / length, -< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y / length) : < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > ;< / div >
< div class = "line" > < a id = "l00092" name = "l00092" > < / a > < span class = "lineno" > 92< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00093" name = "l00093" > < / a > < span class = "lineno" > 93< / span > < / div >
< div class = "foldopen" id = "foldopen00094" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00094" name = "l00094" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a08e10626fbc789b6dff9172fd6c36f7c" > 94< / a > < / span > METAL_FUNC < span class = "keywordtype" > bool< / span > < a class = "code hl_function" href = "struct_read_writer.html#a08e10626fbc789b6dff9172fd6c36f7c" > out_of_bounds< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00095" name = "l00095" > < / a > < span class = "lineno" > 95< / span > < span class = "comment" > // Account for possible extra threadgroups< / span > < / div >
< div class = "line" > < a id = "l00096" name = "l00096" > < / a > < span class = "lineno" > 96< / span > < span class = "keywordtype" > int< / span > grid_index = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y;< / div >
< div class = "line" > < a id = "l00097" name = "l00097" > < / a > < span class = "lineno" > 97< / span > < span class = "keywordflow" > return< / span > grid_index > = < a class = "code hl_variable" href = "struct_read_writer.html#a689f4890c1d2ce33fc6da7550beec735" > batch_size< / a > ;< / div >
< div class = "line" > < a id = "l00098" name = "l00098" > < / a > < span class = "lineno" > 98< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00099" name = "l00099" > < / a > < span class = "lineno" > 99< / span > < / div >
< div class = "foldopen" id = "foldopen00100" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00100" name = "l00100" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a120eaf4b5f32e80972a18d14e82a2d75" > 100< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a120eaf4b5f32e80972a18d14e82a2d75" > load< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00101" name = "l00101" > < / a > < span class = "lineno" > 101< / span > < span class = "keywordtype" > int< / span > batch_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > ;< / div >
< div class = "line" > < a id = "l00102" name = "l00102" > < / a > < span class = "lineno" > 102< / span > < span class = "keywordtype" > short< / span > tg_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .z + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .z;< / div >
< div class = "line" > < a id = "l00103" name = "l00103" > < / a > < span class = "lineno" > 103< / span > < span class = "keywordtype" > short< / span > max_index = < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > - 2;< / div >
< div class = "line" > < a id = "l00104" name = "l00104" > < / a > < span class = "lineno" > 104< / span > < / div >
< div class = "line" > < a id = "l00105" name = "l00105" > < / a > < span class = "lineno" > 105< / span > < span class = "comment" > // 2 complex64s = 128 bits< / span > < / div >
< div class = "line" > < a id = "l00106" name = "l00106" > < / a > < span class = "lineno" > 106< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > int< / span > read_width = 2;< / div >
< div class = "line" > < a id = "l00107" name = "l00107" > < / a > < span class = "lineno" > 107< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > e = 0; e < (< a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > / read_width); e++) {< / div >
< div class = "line" > < a id = "l00108" name = "l00108" > < / a > < span class = "lineno" > 108< / span > < span class = "keywordtype" > short< / span > index = read_width * tg_idx + read_width * < a class = "code hl_variable" href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > threads_per_tg< / a > * e;< / div >
< div class = "line" > < a id = "l00109" name = "l00109" > < / a > < span class = "lineno" > 109< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (index, max_index);< / div >
< div class = "line" > < a id = "l00110" name = "l00110" > < / a > < span class = "lineno" > 110< / span > < span class = "comment" > // vectorized reads< / span > < / div >
< div class = "line" > < a id = "l00111" name = "l00111" > < / a > < span class = "lineno" > 111< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [index] = < a class = "code hl_function" href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > post_in< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > in< / a > [batch_idx + index]);< / div >
< div class = "line" > < a id = "l00112" name = "l00112" > < / a > < span class = "lineno" > 112< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [index + 1] = < a class = "code hl_function" href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > post_in< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > in< / a > [batch_idx + index + 1]);< / div >
< div class = "line" > < a id = "l00113" name = "l00113" > < / a > < span class = "lineno" > 113< / span > }< / div >
< div class = "line" > < a id = "l00114" name = "l00114" > < / a > < span class = "lineno" > 114< / span > max_index += 1;< / div >
< div class = "line" > < a id = "l00115" name = "l00115" > < / a > < span class = "lineno" > 115< / span > < span class = "keywordflow" > if< / span > (< a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > % 2 != 0) {< / div >
< div class = "line" > < a id = "l00116" name = "l00116" > < / a > < span class = "lineno" > 116< / span > < span class = "keywordtype" > short< / span > index = tg_idx +< / div >
< div class = "line" > < a id = "l00117" name = "l00117" > < / a > < span class = "lineno" > 117< / span > read_width * < a class = "code hl_variable" href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > threads_per_tg< / a > * (< a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > / read_width);< / div >
< div class = "line" > < a id = "l00118" name = "l00118" > < / a > < span class = "lineno" > 118< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (index, max_index);< / div >
< div class = "line" > < a id = "l00119" name = "l00119" > < / a > < span class = "lineno" > 119< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [index] = < a class = "code hl_function" href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > post_in< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > in< / a > [batch_idx + index]);< / div >
< div class = "line" > < a id = "l00120" name = "l00120" > < / a > < span class = "lineno" > 120< / span > }< / div >
< div class = "line" > < a id = "l00121" name = "l00121" > < / a > < span class = "lineno" > 121< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00122" name = "l00122" > < / a > < span class = "lineno" > 122< / span > < / div >
< div class = "foldopen" id = "foldopen00123" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00123" name = "l00123" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#ac2ea71e41740ddc863890e3e8e6f09d0" > 123< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#ac2ea71e41740ddc863890e3e8e6f09d0" > write< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00124" name = "l00124" > < / a > < span class = "lineno" > 124< / span > < span class = "keywordtype" > int< / span > batch_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > ;< / div >
< div class = "line" > < a id = "l00125" name = "l00125" > < / a > < span class = "lineno" > 125< / span > < span class = "keywordtype" > short< / span > tg_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .z + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .z;< / div >
< div class = "line" > < a id = "l00126" name = "l00126" > < / a > < span class = "lineno" > 126< / span > < span class = "keywordtype" > short< / span > max_index = < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > - 2;< / div >
< div class = "line" > < a id = "l00127" name = "l00127" > < / a > < span class = "lineno" > 127< / span > < / div >
< div class = "line" > < a id = "l00128" name = "l00128" > < / a > < span class = "lineno" > 128< / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > int< / span > read_width = 2;< / div >
< div class = "line" > < a id = "l00129" name = "l00129" > < / a > < span class = "lineno" > 129< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > short< / span > e = 0; e < (< a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > / read_width); e++) {< / div >
< div class = "line" > < a id = "l00130" name = "l00130" > < / a > < span class = "lineno" > 130< / span > < span class = "keywordtype" > short< / span > index = read_width * tg_idx + read_width * < a class = "code hl_variable" href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > threads_per_tg< / a > * e;< / div >
< div class = "line" > < a id = "l00131" name = "l00131" > < / a > < span class = "lineno" > 131< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (index, max_index);< / div >
< div class = "line" > < a id = "l00132" name = "l00132" > < / a > < span class = "lineno" > 132< / span > < span class = "comment" > // vectorized reads< / span > < / div >
< div class = "line" > < a id = "l00133" name = "l00133" > < / a > < span class = "lineno" > 133< / span > < a class = "code hl_variable" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > out< / a > [batch_idx + index] = < a class = "code hl_function" href = "struct_read_writer.html#a94da8aa85fa2916eaa3eaeb11499234a" > pre_out< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [index]);< / div >
< div class = "line" > < a id = "l00134" name = "l00134" > < / a > < span class = "lineno" > 134< / span > < a class = "code hl_variable" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > out< / a > [batch_idx + index + 1] = < a class = "code hl_function" href = "struct_read_writer.html#a94da8aa85fa2916eaa3eaeb11499234a" > pre_out< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [index + 1]);< / div >
< div class = "line" > < a id = "l00135" name = "l00135" > < / a > < span class = "lineno" > 135< / span > }< / div >
< div class = "line" > < a id = "l00136" name = "l00136" > < / a > < span class = "lineno" > 136< / span > max_index += 1;< / div >
< div class = "line" > < a id = "l00137" name = "l00137" > < / a > < span class = "lineno" > 137< / span > < span class = "keywordflow" > if< / span > (< a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > % 2 != 0) {< / div >
< div class = "line" > < a id = "l00138" name = "l00138" > < / a > < span class = "lineno" > 138< / span > < span class = "keywordtype" > short< / span > index = tg_idx +< / div >
< div class = "line" > < a id = "l00139" name = "l00139" > < / a > < span class = "lineno" > 139< / span > read_width * < a class = "code hl_variable" href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > threads_per_tg< / a > * (< a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > / read_width);< / div >
< div class = "line" > < a id = "l00140" name = "l00140" > < / a > < span class = "lineno" > 140< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (index, max_index);< / div >
< div class = "line" > < a id = "l00141" name = "l00141" > < / a > < span class = "lineno" > 141< / span > < a class = "code hl_variable" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > out< / a > [batch_idx + index] = < a class = "code hl_function" href = "struct_read_writer.html#a94da8aa85fa2916eaa3eaeb11499234a" > pre_out< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [index]);< / div >
< div class = "line" > < a id = "l00142" name = "l00142" > < / a > < span class = "lineno" > 142< / span > }< / div >
< div class = "line" > < a id = "l00143" name = "l00143" > < / a > < span class = "lineno" > 143< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00144" name = "l00144" > < / a > < span class = "lineno" > 144< / span > < / div >
< div class = "line" > < a id = "l00145" name = "l00145" > < / a > < span class = "lineno" > 145< / span > < span class = "comment" > // Padded IO for Bluestein' s algorithm< / span > < / div >
< div class = "foldopen" id = "foldopen00146" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00146" name = "l00146" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#add5bd3f647793a5a19d63197a19df73c" > 146< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#add5bd3f647793a5a19d63197a19df73c" > load_padded< / a > (< span class = "keywordtype" > int< / span > length, < span class = "keyword" > const< / span > device float2* w_k)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00147" name = "l00147" > < / a > < span class = "lineno" > 147< / span > < span class = "keywordtype" > int< / span > batch_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y * length + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y * length;< / div >
< div class = "line" > < a id = "l00148" name = "l00148" > < / a > < span class = "lineno" > 148< / span > < span class = "keywordtype" > int< / span > fft_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .z;< / div >
< div class = "line" > < a id = "l00149" name = "l00149" > < / a > < span class = "lineno" > 149< / span > < span class = "keywordtype" > int< / span > m = < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .z;< / div >
< div class = "line" > < a id = "l00150" name = "l00150" > < / a > < span class = "lineno" > 150< / span > < / div >
< div class = "line" > < a id = "l00151" name = "l00151" > < / a > < span class = "lineno" > 151< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > ;< / div >
< div class = "line" > < a id = "l00152" name = "l00152" > < / a > < span class = "lineno" > 152< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > ; e++) {< / div >
< div class = "line" > < a id = "l00153" name = "l00153" > < / a > < span class = "lineno" > 153< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + e * m, < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > - 1);< / div >
< div class = "line" > < a id = "l00154" name = "l00154" > < / a > < span class = "lineno" > 154< / span > < span class = "keywordflow" > if< / span > (index < length) {< / div >
< div class = "line" > < a id = "l00155" name = "l00155" > < / a > < span class = "lineno" > 155< / span > float2 < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > = < a class = "code hl_function" href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > post_in< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > in< / a > [batch_idx + index]);< / div >
< div class = "line" > < a id = "l00156" name = "l00156" > < / a > < span class = "lineno" > 156< / span > seq_buf[index] = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > , w_k[index]);< / div >
< div class = "line" > < a id = "l00157" name = "l00157" > < / a > < span class = "lineno" > 157< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00158" name = "l00158" > < / a > < span class = "lineno" > 158< / span > seq_buf[index] = 0.0;< / div >
< div class = "line" > < a id = "l00159" name = "l00159" > < / a > < span class = "lineno" > 159< / span > }< / div >
< div class = "line" > < a id = "l00160" name = "l00160" > < / a > < span class = "lineno" > 160< / span > }< / div >
< div class = "line" > < a id = "l00161" name = "l00161" > < / a > < span class = "lineno" > 161< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00162" name = "l00162" > < / a > < span class = "lineno" > 162< / span > < / div >
< div class = "foldopen" id = "foldopen00163" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00163" name = "l00163" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a95367307acace2aa88226cf8956d2d88" > 163< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a95367307acace2aa88226cf8956d2d88" > write_padded< / a > (< span class = "keywordtype" > int< / span > length, < span class = "keyword" > const< / span > device float2* w_k)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00164" name = "l00164" > < / a > < span class = "lineno" > 164< / span > < span class = "keywordtype" > int< / span > batch_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y * length + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y * length;< / div >
< div class = "line" > < a id = "l00165" name = "l00165" > < / a > < span class = "lineno" > 165< / span > < span class = "keywordtype" > int< / span > fft_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .z;< / div >
< div class = "line" > < a id = "l00166" name = "l00166" > < / a > < span class = "lineno" > 166< / span > < span class = "keywordtype" > int< / span > m = < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .z;< / div >
< div class = "line" > < a id = "l00167" name = "l00167" > < / a > < span class = "lineno" > 167< / span > float2 inv_factor = {1.0f / < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > , -1.0f / < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > };< / div >
< div class = "line" > < a id = "l00168" name = "l00168" > < / a > < span class = "lineno" > 168< / span > < / div >
< div class = "line" > < a id = "l00169" name = "l00169" > < / a > < span class = "lineno" > 169< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > ;< / div >
< div class = "line" > < a id = "l00170" name = "l00170" > < / a > < span class = "lineno" > 170< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > ; e++) {< / div >
< div class = "line" > < a id = "l00171" name = "l00171" > < / a > < span class = "lineno" > 171< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + e * m, < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > - 1);< / div >
< div class = "line" > < a id = "l00172" name = "l00172" > < / a > < span class = "lineno" > 172< / span > < span class = "keywordflow" > if< / span > (index < length) {< / div >
< div class = "line" > < a id = "l00173" name = "l00173" > < / a > < span class = "lineno" > 173< / span > float2 < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > = seq_buf[index + length - 1] * inv_factor;< / div >
< div class = "line" > < a id = "l00174" name = "l00174" > < / a > < span class = "lineno" > 174< / span > < a class = "code hl_variable" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > out< / a > [batch_idx + index] = < a class = "code hl_function" href = "struct_read_writer.html#a94da8aa85fa2916eaa3eaeb11499234a" > pre_out< / a > (< a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > , w_k[index]), length);< / div >
< div class = "line" > < a id = "l00175" name = "l00175" > < / a > < span class = "lineno" > 175< / span > }< / div >
< div class = "line" > < a id = "l00176" name = "l00176" > < / a > < span class = "lineno" > 176< / span > }< / div >
< div class = "line" > < a id = "l00177" name = "l00177" > < / a > < span class = "lineno" > 177< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00178" name = "l00178" > < / a > < span class = "lineno" > 178< / span > < / div >
< div class = "line" > < a id = "l00179" name = "l00179" > < / a > < span class = "lineno" > 179< / span > < span class = "comment" > // Strided IO for four step FFT< / span > < / div >
< div class = "foldopen" id = "foldopen00180" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00180" name = "l00180" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a7c903fbb8b85a856ba5564d7df537cdf" > 180< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a7c903fbb8b85a856ba5564d7df537cdf" > compute_strided_indices< / a > (< span class = "keywordtype" > int< / span > stride, < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00181" name = "l00181" > < / a > < span class = "lineno" > 181< / span > < span class = "comment" > // Use the batch threadgroup dimension to coalesce memory accesses:< / span > < / div >
< div class = "line" > < a id = "l00182" name = "l00182" > < / a > < span class = "lineno" > 182< / span > < span class = "comment" > // e.g. stride = 12< / span > < / div >
< div class = "line" > < a id = "l00183" name = "l00183" > < / a > < span class = "lineno" > 183< / span > < span class = "comment" > // device | shared mem< / span > < / div >
< div class = "line" > < a id = "l00184" name = "l00184" > < / a > < span class = "lineno" > 184< / span > < span class = "comment" > // 0 1 2 3 | 0 12 - -< / span > < / div >
< div class = "line" > < a id = "l00185" name = "l00185" > < / a > < span class = "lineno" > 185< / span > < span class = "comment" > // - - - - | 1 13 - -< / span > < / div >
< div class = "line" > < a id = "l00186" name = "l00186" > < / a > < span class = "lineno" > 186< / span > < span class = "comment" > // - - - - | 2 14 - -< / span > < / div >
< div class = "line" > < a id = "l00187" name = "l00187" > < / a > < span class = "lineno" > 187< / span > < span class = "comment" > // 12 13 14 15 | 3 15 - -< / span > < / div >
< div class = "line" > < a id = "l00188" name = "l00188" > < / a > < span class = "lineno" > 188< / span > < span class = "keywordtype" > int< / span > coalesce_width = < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .y;< / div >
< div class = "line" > < a id = "l00189" name = "l00189" > < / a > < span class = "lineno" > 189< / span > < span class = "keywordtype" > int< / span > tg_idx = < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .y * < a class = "code hl_variable" href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > grid< / a > .z + < a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .z;< / div >
< div class = "line" > < a id = "l00190" name = "l00190" > < / a > < span class = "lineno" > 190< / span > < span class = "keywordtype" > int< / span > outer_batch_size = stride / coalesce_width;< / div >
< div class = "line" > < a id = "l00191" name = "l00191" > < / a > < span class = "lineno" > 191< / span > < / div >
< div class = "line" > < a id = "l00192" name = "l00192" > < / a > < span class = "lineno" > 192< / span > < span class = "keywordtype" > int< / span > strided_batch_idx = (< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x % outer_batch_size) * coalesce_width +< / div >
< div class = "line" > < a id = "l00193" name = "l00193" > < / a > < span class = "lineno" > 193< / span > overall_n * (< a class = "code hl_variable" href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > elem< / a > .x / outer_batch_size);< / div >
< div class = "line" > < a id = "l00194" name = "l00194" > < / a > < span class = "lineno" > 194< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a4c0b12484aac4fd6759d67c190391989" > strided_device_idx< / a > = strided_batch_idx +< / div >
< div class = "line" > < a id = "l00195" name = "l00195" > < / a > < span class = "lineno" > 195< / span > tg_idx / coalesce_width * < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > * stride +< / div >
< div class = "line" > < a id = "l00196" name = "l00196" > < / a > < span class = "lineno" > 196< / span > tg_idx % coalesce_width;< / div >
< div class = "line" > < a id = "l00197" name = "l00197" > < / a > < span class = "lineno" > 197< / span > < a class = "code hl_variable" href = "struct_read_writer.html#ace40adb02cfb33d89c98353327c251fc" > strided_shared_idx< / a > = (tg_idx % coalesce_width) * < a class = "code hl_variable" href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > n< / a > +< / div >
< div class = "line" > < a id = "l00198" name = "l00198" > < / a > < span class = "lineno" > 198< / span > tg_idx / coalesce_width * < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > ;< / div >
< div class = "line" > < a id = "l00199" name = "l00199" > < / a > < span class = "lineno" > 199< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00200" name = "l00200" > < / a > < span class = "lineno" > 200< / span > < / div >
< div class = "line" > < a id = "l00201" name = "l00201" > < / a > < span class = "lineno" > 201< / span > < span class = "comment" > // Four Step FFT First Step< / span > < / div >
< div class = "foldopen" id = "foldopen00202" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00202" name = "l00202" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a998ef484bade81f726b9edfc6b878197" > 202< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a998ef484bade81f726b9edfc6b878197" > load_strided< / a > (< span class = "keywordtype" > int< / span > stride, < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00203" name = "l00203" > < / a > < span class = "lineno" > 203< / span > < a class = "code hl_function" href = "struct_read_writer.html#a7c903fbb8b85a856ba5564d7df537cdf" > compute_strided_indices< / a > (stride, overall_n);< / div >
< div class = "line" > < a id = "l00204" name = "l00204" > < / a > < span class = "lineno" > 204< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > ; e++) {< / div >
< div class = "line" > < a id = "l00205" name = "l00205" > < / a > < span class = "lineno" > 205< / span > < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [< a class = "code hl_variable" href = "struct_read_writer.html#ace40adb02cfb33d89c98353327c251fc" > strided_shared_idx< / a > + e] =< / div >
< div class = "line" > < a id = "l00206" name = "l00206" > < / a > < span class = "lineno" > 206< / span > < a class = "code hl_function" href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > post_in< / a > (< a class = "code hl_variable" href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > in< / a > [< a class = "code hl_variable" href = "struct_read_writer.html#a4c0b12484aac4fd6759d67c190391989" > strided_device_idx< / a > + e * stride]);< / div >
< div class = "line" > < a id = "l00207" name = "l00207" > < / a > < span class = "lineno" > 207< / span > }< / div >
< div class = "line" > < a id = "l00208" name = "l00208" > < / a > < span class = "lineno" > 208< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00209" name = "l00209" > < / a > < span class = "lineno" > 209< / span > < / div >
< div class = "foldopen" id = "foldopen00210" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00210" name = "l00210" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a77a4d7eac217305e22a3c25b3756ef67" > 210< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a77a4d7eac217305e22a3c25b3756ef67" > write_strided< / a > (< span class = "keywordtype" > int< / span > stride, < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00211" name = "l00211" > < / a > < span class = "lineno" > 211< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < < a class = "code hl_variable" href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > elems_per_thread< / a > ; e++) {< / div >
< div class = "line" > < a id = "l00212" name = "l00212" > < / a > < span class = "lineno" > 212< / span > float2 output = < a class = "code hl_variable" href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > buf< / a > [< a class = "code hl_variable" href = "struct_read_writer.html#ace40adb02cfb33d89c98353327c251fc" > strided_shared_idx< / a > + e];< / div >
< div class = "line" > < a id = "l00213" name = "l00213" > < / a > < span class = "lineno" > 213< / span > < span class = "keywordtype" > int< / span > combined_idx = (< a class = "code hl_variable" href = "struct_read_writer.html#a4c0b12484aac4fd6759d67c190391989" > strided_device_idx< / a > + e * stride) % overall_n;< / div >
< div class = "line" > < a id = "l00214" name = "l00214" > < / a > < span class = "lineno" > 214< / span > < span class = "keywordtype" > int< / span > ij = (combined_idx / stride) * (combined_idx % stride);< / div >
< div class = "line" > < a id = "l00215" name = "l00215" > < / a > < span class = "lineno" > 215< / span > < span class = "comment" > // Apply four step twiddles at end of first step< / span > < / div >
< div class = "line" > < a id = "l00216" name = "l00216" > < / a > < span class = "lineno" > 216< / span > float2 twiddle = < a class = "code hl_function" href = "radix_8h.html#ac5cf950316b9445296ee9ecfc56a56bd" > get_twiddle< / a > (ij, overall_n);< / div >
< div class = "line" > < a id = "l00217" name = "l00217" > < / a > < span class = "lineno" > 217< / span > < a class = "code hl_variable" href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > out< / a > [< a class = "code hl_variable" href = "struct_read_writer.html#a4c0b12484aac4fd6759d67c190391989" > strided_device_idx< / a > + e * stride] = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (output, twiddle);< / div >
< div class = "line" > < a id = "l00218" name = "l00218" > < / a > < span class = "lineno" > 218< / span > }< / div >
< div class = "line" > < a id = "l00219" name = "l00219" > < / a > < span class = "lineno" > 219< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00220" name = "l00220" > < / a > < span class = "lineno" > 220< / span > };< / div >
< / div >
< div class = "line" > < a id = "l00221" name = "l00221" > < / a > < span class = "lineno" > 221< / span > < / div >
< div class = "line" > < a id = "l00222" name = "l00222" > < / a > < span class = "lineno" > 222< / span > < span class = "comment" > // Four Step FFT Second Step< / span > < / div >
< div class = "line" > < a id = "l00223" name = "l00223" > < / a > < span class = "lineno" > 223< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00224" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00224" name = "l00224" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a3d9c8cbc582cad6b5218339d0f721559" > 224< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > < float2, float2, < span class = "comment" > /*step=*/< / span > 1> ::load_strided(< / div >
< div class = "line" > < a id = "l00225" name = "l00225" > < / a > < span class = "lineno" > 225< / span > < span class = "keywordtype" > int< / span > stride,< / div >
< div class = "line" > < a id = "l00226" name = "l00226" > < / a > < span class = "lineno" > 226< / span > < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00227" name = "l00227" > < / a > < span class = "lineno" > 227< / span > < span class = "comment" > // Silence compiler warnings< / span > < / div >
< div class = "line" > < a id = "l00228" name = "l00228" > < / a > < span class = "lineno" > 228< / span > (void)stride;< / div >
< div class = "line" > < a id = "l00229" name = "l00229" > < / a > < span class = "lineno" > 229< / span > (void)overall_n;< / div >
< div class = "line" > < a id = "l00230" name = "l00230" > < / a > < span class = "lineno" > 230< / span > < span class = "comment" > // Don' t invert between steps< / span > < / div >
< div class = "line" > < a id = "l00231" name = "l00231" > < / a > < span class = "lineno" > 231< / span > < span class = "keywordtype" > bool< / span > default_inv = inv;< / div >
< div class = "line" > < a id = "l00232" name = "l00232" > < / a > < span class = "lineno" > 232< / span > inv = < span class = "keyword" > false< / span > ;< / div >
< div class = "line" > < a id = "l00233" name = "l00233" > < / a > < span class = "lineno" > 233< / span > load();< / div >
< div class = "line" > < a id = "l00234" name = "l00234" > < / a > < span class = "lineno" > 234< / span > inv = default_inv;< / div >
< div class = "line" > < a id = "l00235" name = "l00235" > < / a > < span class = "lineno" > 235< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00236" name = "l00236" > < / a > < span class = "lineno" > 236< / span > < / div >
< div class = "line" > < a id = "l00237" name = "l00237" > < / a > < span class = "lineno" > 237< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00238" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00238" name = "l00238" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a12e7f43cd9de2d9990054184c0a32839" > 238< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > < float2, float2, < span class = "comment" > /*step=*/< / span > 1> ::write_strided(< / div >
< div class = "line" > < a id = "l00239" name = "l00239" > < / a > < span class = "lineno" > 239< / span > < span class = "keywordtype" > int< / span > stride,< / div >
< div class = "line" > < a id = "l00240" name = "l00240" > < / a > < span class = "lineno" > 240< / span > < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00241" name = "l00241" > < / a > < span class = "lineno" > 241< / span > compute_strided_indices(stride, overall_n);< / div >
< div class = "line" > < a id = "l00242" name = "l00242" > < / a > < span class = "lineno" > 242< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread; e++) {< / div >
< div class = "line" > < a id = "l00243" name = "l00243" > < / a > < span class = "lineno" > 243< / span > float2 output = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > [strided_shared_idx + e];< / div >
< div class = "line" > < a id = "l00244" name = "l00244" > < / a > < span class = "lineno" > 244< / span > out[strided_device_idx + e * stride] = pre_out(output, overall_n);< / div >
< div class = "line" > < a id = "l00245" name = "l00245" > < / a > < span class = "lineno" > 245< / span > }< / div >
< div class = "line" > < a id = "l00246" name = "l00246" > < / a > < span class = "lineno" > 246< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00247" name = "l00247" > < / a > < span class = "lineno" > 247< / span > < / div >
< div class = "line" > < a id = "l00248" name = "l00248" > < / a > < span class = "lineno" > 248< / span > < span class = "comment" > // For RFFT, we interleave batches of two real sequences into one complex one:< / span > < / div >
< div class = "line" > < a id = "l00249" name = "l00249" > < / a > < span class = "lineno" > 249< / span > < span class = "comment" > //< / span > < / div >
< div class = "line" > < a id = "l00250" name = "l00250" > < / a > < span class = "lineno" > 250< / span > < span class = "comment" > // z_k = x_k + j.y_k< / span > < / div >
< div class = "line" > < a id = "l00251" name = "l00251" > < / a > < span class = "lineno" > 251< / span > < span class = "comment" > // X_k = (Z_k + Z_(N-k)*) / 2< / span > < / div >
< div class = "line" > < a id = "l00252" name = "l00252" > < / a > < span class = "lineno" > 252< / span > < span class = "comment" > // Y_k = -j * ((Z_k - Z_(N-k)*) / 2)< / span > < / div >
< div class = "line" > < a id = "l00253" name = "l00253" > < / a > < span class = "lineno" > 253< / span > < span class = "comment" > //< / span > < / div >
< div class = "line" > < a id = "l00254" name = "l00254" > < / a > < span class = "lineno" > 254< / span > < span class = "comment" > // This roughly doubles the throughput over the regular FFT.< / span > < / div >
< div class = "line" > < a id = "l00255" name = "l00255" > < / a > < span class = "lineno" > 255< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00256" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00256" name = "l00256" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a6f946aea5452109dca7fc70ed39c6efe" > 256< / a > < / span > METAL_FUNC < span class = "keywordtype" > bool< / span > < a class = "code hl_function" href = "struct_read_writer.html#a08e10626fbc789b6dff9172fd6c36f7c" > ReadWriter< float, float2> ::out_of_bounds< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00257" name = "l00257" > < / a > < span class = "lineno" > 257< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00258" name = "l00258" > < / a > < span class = "lineno" > 258< / span > < span class = "comment" > // We pack two sequences into one for RFFTs< / span > < / div >
< div class = "line" > < a id = "l00259" name = "l00259" > < / a > < span class = "lineno" > 259< / span > < span class = "keywordflow" > return< / span > grid_index * 2 > = batch_size;< / div >
< div class = "line" > < a id = "l00260" name = "l00260" > < / a > < span class = "lineno" > 260< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00261" name = "l00261" > < / a > < span class = "lineno" > 261< / span > < / div >
< div class = "line" > < a id = "l00262" name = "l00262" > < / a > < span class = "lineno" > 262< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00263" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00263" name = "l00263" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a8a97ba42db5692898ef7391db08d8fd0" > 263< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a120eaf4b5f32e80972a18d14e82a2d75" > ReadWriter< float, float2> ::load< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00264" name = "l00264" > < / a > < span class = "lineno" > 264< / span > < span class = "keywordtype" > int< / span > batch_idx = elem.x * grid.y * n * 2 + elem.y * n * 2;< / div >
< div class = "line" > < a id = "l00265" name = "l00265" > < / a > < span class = "lineno" > 265< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n;< / div >
< div class = "line" > < a id = "l00266" name = "l00266" > < / a > < span class = "lineno" > 266< / span > < / div >
< div class = "line" > < a id = "l00267" name = "l00267" > < / a > < span class = "lineno" > 267< / span > < span class = "comment" > // No out of bounds accesses on odd batch sizes< / span > < / div >
< div class = "line" > < a id = "l00268" name = "l00268" > < / a > < span class = "lineno" > 268< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00269" name = "l00269" > < / a > < span class = "lineno" > 269< / span > < span class = "keywordtype" > short< / span > next_in =< / div >
< div class = "line" > < a id = "l00270" name = "l00270" > < / a > < span class = "lineno" > 270< / span > batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1 ? 0 : n;< / div >
< div class = "line" > < a id = "l00271" name = "l00271" > < / a > < span class = "lineno" > 271< / span > < / div >
< div class = "line" > < a id = "l00272" name = "l00272" > < / a > < span class = "lineno" > 272< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00273" name = "l00273" > < / a > < span class = "lineno" > 273< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00274" name = "l00274" > < / a > < span class = "lineno" > 274< / span > < / div >
< div class = "line" > < a id = "l00275" name = "l00275" > < / a > < span class = "lineno" > 275< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread; e++) {< / div >
< div class = "line" > < a id = "l00276" name = "l00276" > < / a > < span class = "lineno" > 276< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + e * m, n - 1);< / div >
< div class = "line" > < a id = "l00277" name = "l00277" > < / a > < span class = "lineno" > 277< / span > seq_buf[index].x = in[batch_idx + index];< / div >
< div class = "line" > < a id = "l00278" name = "l00278" > < / a > < span class = "lineno" > 278< / span > seq_buf[index].y = in[batch_idx + index + next_in];< / div >
< div class = "line" > < a id = "l00279" name = "l00279" > < / a > < span class = "lineno" > 279< / span > }< / div >
< div class = "line" > < a id = "l00280" name = "l00280" > < / a > < span class = "lineno" > 280< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00281" name = "l00281" > < / a > < span class = "lineno" > 281< / span > < / div >
< div class = "line" > < a id = "l00282" name = "l00282" > < / a > < span class = "lineno" > 282< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00283" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00283" name = "l00283" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a7a3d1396b0f83aa7506207bd6e7336bf" > 283< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#ac2ea71e41740ddc863890e3e8e6f09d0" > ReadWriter< float, float2> ::write< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00284" name = "l00284" > < / a > < span class = "lineno" > 284< / span > < span class = "keywordtype" > short< / span > n_over_2 = (n / 2) + 1;< / div >
< div class = "line" > < a id = "l00285" name = "l00285" > < / a > < span class = "lineno" > 285< / span > < / div >
< div class = "line" > < a id = "l00286" name = "l00286" > < / a > < span class = "lineno" > 286< / span > < span class = "keywordtype" > int< / span > batch_idx = elem.x * grid.y * n_over_2 * 2 + elem.y * n_over_2 * 2;< / div >
< div class = "line" > < a id = "l00287" name = "l00287" > < / a > < span class = "lineno" > 287< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n;< / div >
< div class = "line" > < a id = "l00288" name = "l00288" > < / a > < span class = "lineno" > 288< / span > < / div >
< div class = "line" > < a id = "l00289" name = "l00289" > < / a > < span class = "lineno" > 289< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00290" name = "l00290" > < / a > < span class = "lineno" > 290< / span > < span class = "keywordtype" > short< / span > next_out =< / div >
< div class = "line" > < a id = "l00291" name = "l00291" > < / a > < span class = "lineno" > 291< / span > batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1 ? 0 : n_over_2;< / div >
< div class = "line" > < a id = "l00292" name = "l00292" > < / a > < span class = "lineno" > 292< / span > < / div >
< div class = "line" > < a id = "l00293" name = "l00293" > < / a > < span class = "lineno" > 293< / span > float2 conj = {1, -1};< / div >
< div class = "line" > < a id = "l00294" name = "l00294" > < / a > < span class = "lineno" > 294< / span > float2 minus_j = {0, -1};< / div >
< div class = "line" > < a id = "l00295" name = "l00295" > < / a > < span class = "lineno" > 295< / span > < / div >
< div class = "line" > < a id = "l00296" name = "l00296" > < / a > < span class = "lineno" > 296< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00297" name = "l00297" > < / a > < span class = "lineno" > 297< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00298" name = "l00298" > < / a > < span class = "lineno" > 298< / span > < / div >
< div class = "line" > < a id = "l00299" name = "l00299" > < / a > < span class = "lineno" > 299< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread / 2 + 1; e++) {< / div >
< div class = "line" > < a id = "l00300" name = "l00300" > < / a > < span class = "lineno" > 300< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + e * m, n_over_2 - 1);< / div >
< div class = "line" > < a id = "l00301" name = "l00301" > < / a > < span class = "lineno" > 301< / span > < span class = "comment" > // x_0 = z_0.real< / span > < / div >
< div class = "line" > < a id = "l00302" name = "l00302" > < / a > < span class = "lineno" > 302< / span > < span class = "comment" > // y_0 = z_0.imag< / span > < / div >
< div class = "line" > < a id = "l00303" name = "l00303" > < / a > < span class = "lineno" > 303< / span > < span class = "keywordflow" > if< / span > (index == 0) {< / div >
< div class = "line" > < a id = "l00304" name = "l00304" > < / a > < span class = "lineno" > 304< / span > out[batch_idx + index] = {seq_buf[index].x, 0};< / div >
< div class = "line" > < a id = "l00305" name = "l00305" > < / a > < span class = "lineno" > 305< / span > out[batch_idx + index + next_out] = {seq_buf[index].y, 0};< / div >
< div class = "line" > < a id = "l00306" name = "l00306" > < / a > < span class = "lineno" > 306< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00307" name = "l00307" > < / a > < span class = "lineno" > 307< / span > float2 x_k = seq_buf[index];< / div >
< div class = "line" > < a id = "l00308" name = "l00308" > < / a > < span class = "lineno" > 308< / span > float2 x_n_minus_k = seq_buf[n - index] * conj;< / div >
< div class = "line" > < a id = "l00309" name = "l00309" > < / a > < span class = "lineno" > 309< / span > out[batch_idx + index] = (x_k + x_n_minus_k) / 2;< / div >
< div class = "line" > < a id = "l00310" name = "l00310" > < / a > < span class = "lineno" > 310< / span > out[batch_idx + index + next_out] =< / div >
< div class = "line" > < a id = "l00311" name = "l00311" > < / a > < span class = "lineno" > 311< / span > < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (((x_k - x_n_minus_k) / 2), minus_j);< / div >
< div class = "line" > < a id = "l00312" name = "l00312" > < / a > < span class = "lineno" > 312< / span > }< / div >
< div class = "line" > < a id = "l00313" name = "l00313" > < / a > < span class = "lineno" > 313< / span > }< / div >
< div class = "line" > < a id = "l00314" name = "l00314" > < / a > < span class = "lineno" > 314< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00315" name = "l00315" > < / a > < span class = "lineno" > 315< / span > < / div >
< div class = "line" > < a id = "l00316" name = "l00316" > < / a > < span class = "lineno" > 316< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00317" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00317" name = "l00317" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#af3ce6bbb1a8dfb3bab1ae18d3eb45bc0" > 317< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#add5bd3f647793a5a19d63197a19df73c" > ReadWriter< float, float2> ::load_padded< / a > (< / div >
< div class = "line" > < a id = "l00318" name = "l00318" > < / a > < span class = "lineno" > 318< / span > < span class = "keywordtype" > int< / span > length,< / div >
< div class = "line" > < a id = "l00319" name = "l00319" > < / a > < span class = "lineno" > 319< / span > < span class = "keyword" > const< / span > device float2* w_k)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00320" name = "l00320" > < / a > < span class = "lineno" > 320< / span > < span class = "keywordtype" > int< / span > batch_idx = elem.x * grid.y * length * 2 + elem.y * length * 2;< / div >
< div class = "line" > < a id = "l00321" name = "l00321" > < / a > < span class = "lineno" > 321< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n;< / div >
< div class = "line" > < a id = "l00322" name = "l00322" > < / a > < span class = "lineno" > 322< / span > < / div >
< div class = "line" > < a id = "l00323" name = "l00323" > < / a > < span class = "lineno" > 323< / span > < span class = "comment" > // No out of bounds accesses on odd batch sizes< / span > < / div >
< div class = "line" > < a id = "l00324" name = "l00324" > < / a > < span class = "lineno" > 324< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00325" name = "l00325" > < / a > < span class = "lineno" > 325< / span > < span class = "keywordtype" > short< / span > next_in =< / div >
< div class = "line" > < a id = "l00326" name = "l00326" > < / a > < span class = "lineno" > 326< / span > batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1 ? 0 : length;< / div >
< div class = "line" > < a id = "l00327" name = "l00327" > < / a > < span class = "lineno" > 327< / span > < / div >
< div class = "line" > < a id = "l00328" name = "l00328" > < / a > < span class = "lineno" > 328< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00329" name = "l00329" > < / a > < span class = "lineno" > 329< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00330" name = "l00330" > < / a > < span class = "lineno" > 330< / span > < / div >
< div class = "line" > < a id = "l00331" name = "l00331" > < / a > < span class = "lineno" > 331< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread; e++) {< / div >
< div class = "line" > < a id = "l00332" name = "l00332" > < / a > < span class = "lineno" > 332< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + e * m, n - 1);< / div >
< div class = "line" > < a id = "l00333" name = "l00333" > < / a > < span class = "lineno" > 333< / span > < span class = "keywordflow" > if< / span > (index < length) {< / div >
< div class = "line" > < a id = "l00334" name = "l00334" > < / a > < span class = "lineno" > 334< / span > float2 elem =< / div >
< div class = "line" > < a id = "l00335" name = "l00335" > < / a > < span class = "lineno" > 335< / span > float2(in[batch_idx + index], in[batch_idx + index + next_in]);< / div >
< div class = "line" > < a id = "l00336" name = "l00336" > < / a > < span class = "lineno" > 336< / span > seq_buf[index] = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (elem, w_k[index]);< / div >
< div class = "line" > < a id = "l00337" name = "l00337" > < / a > < span class = "lineno" > 337< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00338" name = "l00338" > < / a > < span class = "lineno" > 338< / span > seq_buf[index] = 0;< / div >
< div class = "line" > < a id = "l00339" name = "l00339" > < / a > < span class = "lineno" > 339< / span > }< / div >
< div class = "line" > < a id = "l00340" name = "l00340" > < / a > < span class = "lineno" > 340< / span > }< / div >
< div class = "line" > < a id = "l00341" name = "l00341" > < / a > < span class = "lineno" > 341< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00342" name = "l00342" > < / a > < span class = "lineno" > 342< / span > < / div >
< div class = "line" > < a id = "l00343" name = "l00343" > < / a > < span class = "lineno" > 343< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00344" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00344" name = "l00344" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#abaf2a6ad4c88bd9f65fe1db1f73a8d87" > 344< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a95367307acace2aa88226cf8956d2d88" > ReadWriter< float, float2> ::write_padded< / a > (< / div >
< div class = "line" > < a id = "l00345" name = "l00345" > < / a > < span class = "lineno" > 345< / span > < span class = "keywordtype" > int< / span > length,< / div >
< div class = "line" > < a id = "l00346" name = "l00346" > < / a > < span class = "lineno" > 346< / span > < span class = "keyword" > const< / span > device float2* w_k)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00347" name = "l00347" > < / a > < span class = "lineno" > 347< / span > < span class = "keywordtype" > int< / span > length_over_2 = (length / 2) + 1;< / div >
< div class = "line" > < a id = "l00348" name = "l00348" > < / a > < span class = "lineno" > 348< / span > < span class = "keywordtype" > int< / span > batch_idx =< / div >
< div class = "line" > < a id = "l00349" name = "l00349" > < / a > < span class = "lineno" > 349< / span > elem.x * grid.y * length_over_2 * 2 + elem.y * length_over_2 * 2;< / div >
< div class = "line" > < a id = "l00350" name = "l00350" > < / a > < span class = "lineno" > 350< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n + length - 1;< / div >
< div class = "line" > < a id = "l00351" name = "l00351" > < / a > < span class = "lineno" > 351< / span > < / div >
< div class = "line" > < a id = "l00352" name = "l00352" > < / a > < span class = "lineno" > 352< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00353" name = "l00353" > < / a > < span class = "lineno" > 353< / span > < span class = "keywordtype" > short< / span > next_out = batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1< / div >
< div class = "line" > < a id = "l00354" name = "l00354" > < / a > < span class = "lineno" > 354< / span > ? 0< / div >
< div class = "line" > < a id = "l00355" name = "l00355" > < / a > < span class = "lineno" > 355< / span > : length_over_2;< / div >
< div class = "line" > < a id = "l00356" name = "l00356" > < / a > < span class = "lineno" > 356< / span > < / div >
< div class = "line" > < a id = "l00357" name = "l00357" > < / a > < span class = "lineno" > 357< / span > float2 conj = {1, -1};< / div >
< div class = "line" > < a id = "l00358" name = "l00358" > < / a > < span class = "lineno" > 358< / span > float2 inv_factor = {1.0f / n, -1.0f / n};< / div >
< div class = "line" > < a id = "l00359" name = "l00359" > < / a > < span class = "lineno" > 359< / span > float2 minus_j = {0, -1};< / div >
< div class = "line" > < a id = "l00360" name = "l00360" > < / a > < span class = "lineno" > 360< / span > < / div >
< div class = "line" > < a id = "l00361" name = "l00361" > < / a > < span class = "lineno" > 361< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00362" name = "l00362" > < / a > < span class = "lineno" > 362< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00363" name = "l00363" > < / a > < span class = "lineno" > 363< / span > < / div >
< div class = "line" > < a id = "l00364" name = "l00364" > < / a > < span class = "lineno" > 364< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread / 2 + 1; e++) {< / div >
< div class = "line" > < a id = "l00365" name = "l00365" > < / a > < span class = "lineno" > 365< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + e * m, length_over_2 - 1);< / div >
< div class = "line" > < a id = "l00366" name = "l00366" > < / a > < span class = "lineno" > 366< / span > < span class = "comment" > // x_0 = z_0.real< / span > < / div >
< div class = "line" > < a id = "l00367" name = "l00367" > < / a > < span class = "lineno" > 367< / span > < span class = "comment" > // y_0 = z_0.imag< / span > < / div >
< div class = "line" > < a id = "l00368" name = "l00368" > < / a > < span class = "lineno" > 368< / span > < span class = "keywordflow" > if< / span > (index == 0) {< / div >
< div class = "line" > < a id = "l00369" name = "l00369" > < / a > < span class = "lineno" > 369< / span > float2 elem = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (w_k[index], seq_buf[index] * inv_factor);< / div >
< div class = "line" > < a id = "l00370" name = "l00370" > < / a > < span class = "lineno" > 370< / span > out[batch_idx + index] = float2(elem.x, 0);< / div >
< div class = "line" > < a id = "l00371" name = "l00371" > < / a > < span class = "lineno" > 371< / span > out[batch_idx + index + next_out] = float2(elem.y, 0);< / div >
< div class = "line" > < a id = "l00372" name = "l00372" > < / a > < span class = "lineno" > 372< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00373" name = "l00373" > < / a > < span class = "lineno" > 373< / span > float2 x_k = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (w_k[index], seq_buf[index] * inv_factor);< / div >
< div class = "line" > < a id = "l00374" name = "l00374" > < / a > < span class = "lineno" > 374< / span > float2 x_n_minus_k = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (< / div >
< div class = "line" > < a id = "l00375" name = "l00375" > < / a > < span class = "lineno" > 375< / span > w_k[length - index], seq_buf[length - index] * inv_factor);< / div >
< div class = "line" > < a id = "l00376" name = "l00376" > < / a > < span class = "lineno" > 376< / span > x_n_minus_k *= conj;< / div >
< div class = "line" > < a id = "l00377" name = "l00377" > < / a > < span class = "lineno" > 377< / span > < span class = "comment" > // w_k should happen before this extraction< / span > < / div >
< div class = "line" > < a id = "l00378" name = "l00378" > < / a > < span class = "lineno" > 378< / span > out[batch_idx + index] = (x_k + x_n_minus_k) / 2;< / div >
< div class = "line" > < a id = "l00379" name = "l00379" > < / a > < span class = "lineno" > 379< / span > out[batch_idx + index + next_out] =< / div >
< div class = "line" > < a id = "l00380" name = "l00380" > < / a > < span class = "lineno" > 380< / span > < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (((x_k - x_n_minus_k) / 2), minus_j);< / div >
< div class = "line" > < a id = "l00381" name = "l00381" > < / a > < span class = "lineno" > 381< / span > }< / div >
< div class = "line" > < a id = "l00382" name = "l00382" > < / a > < span class = "lineno" > 382< / span > }< / div >
< div class = "line" > < a id = "l00383" name = "l00383" > < / a > < span class = "lineno" > 383< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00384" name = "l00384" > < / a > < span class = "lineno" > 384< / span > < / div >
< div class = "line" > < a id = "l00385" name = "l00385" > < / a > < span class = "lineno" > 385< / span > < span class = "comment" > // For IRFFT, we do the opposite< / span > < / div >
< div class = "line" > < a id = "l00386" name = "l00386" > < / a > < span class = "lineno" > 386< / span > < span class = "comment" > //< / span > < / div >
< div class = "line" > < a id = "l00387" name = "l00387" > < / a > < span class = "lineno" > 387< / span > < span class = "comment" > // Z_k = X_k + j.Y_k< / span > < / div >
< div class = "line" > < a id = "l00388" name = "l00388" > < / a > < span class = "lineno" > 388< / span > < span class = "comment" > // x_k = Re(Z_k)< / span > < / div >
< div class = "line" > < a id = "l00389" name = "l00389" > < / a > < span class = "lineno" > 389< / span > < span class = "comment" > // Y_k = Imag(Z_k)< / span > < / div >
< div class = "line" > < a id = "l00390" name = "l00390" > < / a > < span class = "lineno" > 390< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00391" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00391" name = "l00391" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a8f40d7f343d32134fe27a694abfde6bf" > 391< / a > < / span > METAL_FUNC < span class = "keywordtype" > bool< / span > < a class = "code hl_function" href = "struct_read_writer.html#a08e10626fbc789b6dff9172fd6c36f7c" > ReadWriter< float2, float> ::out_of_bounds< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00392" name = "l00392" > < / a > < span class = "lineno" > 392< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00393" name = "l00393" > < / a > < span class = "lineno" > 393< / span > < span class = "comment" > // We pack two sequences into one for IRFFTs< / span > < / div >
< div class = "line" > < a id = "l00394" name = "l00394" > < / a > < span class = "lineno" > 394< / span > < span class = "keywordflow" > return< / span > grid_index * 2 > = batch_size;< / div >
< div class = "line" > < a id = "l00395" name = "l00395" > < / a > < span class = "lineno" > 395< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00396" name = "l00396" > < / a > < span class = "lineno" > 396< / span > < / div >
< div class = "line" > < a id = "l00397" name = "l00397" > < / a > < span class = "lineno" > 397< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00398" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00398" name = "l00398" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a2506ee61be67826ac9494efb12a81900" > 398< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a120eaf4b5f32e80972a18d14e82a2d75" > ReadWriter< float2, float> ::load< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00399" name = "l00399" > < / a > < span class = "lineno" > 399< / span > < span class = "keywordtype" > short< / span > n_over_2 = (n / 2) + 1;< / div >
< div class = "line" > < a id = "l00400" name = "l00400" > < / a > < span class = "lineno" > 400< / span > < span class = "keywordtype" > int< / span > batch_idx = elem.x * grid.y * n_over_2 * 2 + elem.y * n_over_2 * 2;< / div >
< div class = "line" > < a id = "l00401" name = "l00401" > < / a > < span class = "lineno" > 401< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n;< / div >
< div class = "line" > < a id = "l00402" name = "l00402" > < / a > < span class = "lineno" > 402< / span > < / div >
< div class = "line" > < a id = "l00403" name = "l00403" > < / a > < span class = "lineno" > 403< / span > < span class = "comment" > // No out of bounds accesses on odd batch sizes< / span > < / div >
< div class = "line" > < a id = "l00404" name = "l00404" > < / a > < span class = "lineno" > 404< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00405" name = "l00405" > < / a > < span class = "lineno" > 405< / span > < span class = "keywordtype" > short< / span > next_in =< / div >
< div class = "line" > < a id = "l00406" name = "l00406" > < / a > < span class = "lineno" > 406< / span > batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1 ? 0 : n_over_2;< / div >
< div class = "line" > < a id = "l00407" name = "l00407" > < / a > < span class = "lineno" > 407< / span > < / div >
< div class = "line" > < a id = "l00408" name = "l00408" > < / a > < span class = "lineno" > 408< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00409" name = "l00409" > < / a > < span class = "lineno" > 409< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00410" name = "l00410" > < / a > < span class = "lineno" > 410< / span > < / div >
< div class = "line" > < a id = "l00411" name = "l00411" > < / a > < span class = "lineno" > 411< / span > float2 conj = {1, -1};< / div >
< div class = "line" > < a id = "l00412" name = "l00412" > < / a > < span class = "lineno" > 412< / span > float2 plus_j = {0, 1};< / div >
< div class = "line" > < a id = "l00413" name = "l00413" > < / a > < span class = "lineno" > 413< / span > < / div >
< div class = "line" > < a id = "l00414" name = "l00414" > < / a > < span class = "lineno" > 414< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > t = 0; t < elems_per_thread / 2 + 1; t++) {< / div >
< div class = "line" > < a id = "l00415" name = "l00415" > < / a > < span class = "lineno" > 415< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + t * m, n_over_2 - 1);< / div >
< div class = "line" > < a id = "l00416" name = "l00416" > < / a > < span class = "lineno" > 416< / span > float2 x = in[batch_idx + index];< / div >
< div class = "line" > < a id = "l00417" name = "l00417" > < / a > < span class = "lineno" > 417< / span > float2 y = in[batch_idx + index + next_in];< / div >
< div class = "line" > < a id = "l00418" name = "l00418" > < / a > < span class = "lineno" > 418< / span > < span class = "comment" > // NumPy forces first input to be real< / span > < / div >
< div class = "line" > < a id = "l00419" name = "l00419" > < / a > < span class = "lineno" > 419< / span > < span class = "keywordtype" > bool< / span > first_val = index == 0;< / div >
< div class = "line" > < a id = "l00420" name = "l00420" > < / a > < span class = "lineno" > 420< / span > < span class = "comment" > // NumPy forces last input on even irffts to be real< / span > < / div >
< div class = "line" > < a id = "l00421" name = "l00421" > < / a > < span class = "lineno" > 421< / span > < span class = "keywordtype" > bool< / span > last_val = n % 2 == 0 & & index == n_over_2 - 1;< / div >
< div class = "line" > < a id = "l00422" name = "l00422" > < / a > < span class = "lineno" > 422< / span > < span class = "keywordflow" > if< / span > (first_val || last_val) {< / div >
< div class = "line" > < a id = "l00423" name = "l00423" > < / a > < span class = "lineno" > 423< / span > x = float2(x.x, 0);< / div >
< div class = "line" > < a id = "l00424" name = "l00424" > < / a > < span class = "lineno" > 424< / span > y = float2(y.x, 0);< / div >
< div class = "line" > < a id = "l00425" name = "l00425" > < / a > < span class = "lineno" > 425< / span > }< / div >
< div class = "line" > < a id = "l00426" name = "l00426" > < / a > < span class = "lineno" > 426< / span > seq_buf[index] = x + < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (y, plus_j);< / div >
< div class = "line" > < a id = "l00427" name = "l00427" > < / a > < span class = "lineno" > 427< / span > seq_buf[index].y = -seq_buf[index].y;< / div >
< div class = "line" > < a id = "l00428" name = "l00428" > < / a > < span class = "lineno" > 428< / span > < span class = "keywordflow" > if< / span > (index > 0 & & !last_val) {< / div >
< div class = "line" > < a id = "l00429" name = "l00429" > < / a > < span class = "lineno" > 429< / span > seq_buf[n - index] = (x * conj) + < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (y * conj, plus_j);< / div >
< div class = "line" > < a id = "l00430" name = "l00430" > < / a > < span class = "lineno" > 430< / span > seq_buf[n - index].y = -seq_buf[n - index].y;< / div >
< div class = "line" > < a id = "l00431" name = "l00431" > < / a > < span class = "lineno" > 431< / span > }< / div >
< div class = "line" > < a id = "l00432" name = "l00432" > < / a > < span class = "lineno" > 432< / span > }< / div >
< div class = "line" > < a id = "l00433" name = "l00433" > < / a > < span class = "lineno" > 433< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00434" name = "l00434" > < / a > < span class = "lineno" > 434< / span > < / div >
< div class = "line" > < a id = "l00435" name = "l00435" > < / a > < span class = "lineno" > 435< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00436" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00436" name = "l00436" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#ae1f0d3555b74998cc2d2288bce72a1f4" > 436< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#ac2ea71e41740ddc863890e3e8e6f09d0" > ReadWriter< float2, float> ::write< / a > ()< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00437" name = "l00437" > < / a > < span class = "lineno" > 437< / span > < span class = "keywordtype" > int< / span > batch_idx = elem.x * grid.y * n * 2 + elem.y * n * 2;< / div >
< div class = "line" > < a id = "l00438" name = "l00438" > < / a > < span class = "lineno" > 438< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n;< / div >
< div class = "line" > < a id = "l00439" name = "l00439" > < / a > < span class = "lineno" > 439< / span > < / div >
< div class = "line" > < a id = "l00440" name = "l00440" > < / a > < span class = "lineno" > 440< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00441" name = "l00441" > < / a > < span class = "lineno" > 441< / span > < span class = "keywordtype" > short< / span > next_out =< / div >
< div class = "line" > < a id = "l00442" name = "l00442" > < / a > < span class = "lineno" > 442< / span > batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1 ? 0 : n;< / div >
< div class = "line" > < a id = "l00443" name = "l00443" > < / a > < span class = "lineno" > 443< / span > < / div >
< div class = "line" > < a id = "l00444" name = "l00444" > < / a > < span class = "lineno" > 444< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00445" name = "l00445" > < / a > < span class = "lineno" > 445< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00446" name = "l00446" > < / a > < span class = "lineno" > 446< / span > < / div >
< div class = "line" > < a id = "l00447" name = "l00447" > < / a > < span class = "lineno" > 447< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread; e++) {< / div >
< div class = "line" > < a id = "l00448" name = "l00448" > < / a > < span class = "lineno" > 448< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + e * m, n - 1);< / div >
< div class = "line" > < a id = "l00449" name = "l00449" > < / a > < span class = "lineno" > 449< / span > out[batch_idx + index] = seq_buf[index].x / n;< / div >
< div class = "line" > < a id = "l00450" name = "l00450" > < / a > < span class = "lineno" > 450< / span > out[batch_idx + index + next_out] = seq_buf[index].y / -n;< / div >
< div class = "line" > < a id = "l00451" name = "l00451" > < / a > < span class = "lineno" > 451< / span > }< / div >
< div class = "line" > < a id = "l00452" name = "l00452" > < / a > < span class = "lineno" > 452< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00453" name = "l00453" > < / a > < span class = "lineno" > 453< / span > < / div >
< div class = "line" > < a id = "l00454" name = "l00454" > < / a > < span class = "lineno" > 454< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00455" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00455" name = "l00455" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#ab116f4569bb9dc6eaef0d8d08472e239" > 455< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#add5bd3f647793a5a19d63197a19df73c" > ReadWriter< float2, float> ::load_padded< / a > (< / div >
< div class = "line" > < a id = "l00456" name = "l00456" > < / a > < span class = "lineno" > 456< / span > < span class = "keywordtype" > int< / span > length,< / div >
< div class = "line" > < a id = "l00457" name = "l00457" > < / a > < span class = "lineno" > 457< / span > < span class = "keyword" > const< / span > device float2* w_k)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00458" name = "l00458" > < / a > < span class = "lineno" > 458< / span > < span class = "keywordtype" > int< / span > n_over_2 = (n / 2) + 1;< / div >
< div class = "line" > < a id = "l00459" name = "l00459" > < / a > < span class = "lineno" > 459< / span > < span class = "keywordtype" > int< / span > length_over_2 = (length / 2) + 1;< / div >
< div class = "line" > < a id = "l00460" name = "l00460" > < / a > < span class = "lineno" > 460< / span > < / div >
< div class = "line" > < a id = "l00461" name = "l00461" > < / a > < span class = "lineno" > 461< / span > < span class = "keywordtype" > int< / span > batch_idx =< / div >
< div class = "line" > < a id = "l00462" name = "l00462" > < / a > < span class = "lineno" > 462< / span > elem.x * grid.y * length_over_2 * 2 + elem.y * length_over_2 * 2;< / div >
< div class = "line" > < a id = "l00463" name = "l00463" > < / a > < span class = "lineno" > 463< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n;< / div >
< div class = "line" > < a id = "l00464" name = "l00464" > < / a > < span class = "lineno" > 464< / span > < / div >
< div class = "line" > < a id = "l00465" name = "l00465" > < / a > < span class = "lineno" > 465< / span > < span class = "comment" > // No out of bounds accesses on odd batch sizes< / span > < / div >
< div class = "line" > < a id = "l00466" name = "l00466" > < / a > < span class = "lineno" > 466< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00467" name = "l00467" > < / a > < span class = "lineno" > 467< / span > < span class = "keywordtype" > short< / span > next_in = batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1< / div >
< div class = "line" > < a id = "l00468" name = "l00468" > < / a > < span class = "lineno" > 468< / span > ? 0< / div >
< div class = "line" > < a id = "l00469" name = "l00469" > < / a > < span class = "lineno" > 469< / span > : length_over_2;< / div >
< div class = "line" > < a id = "l00470" name = "l00470" > < / a > < span class = "lineno" > 470< / span > < / div >
< div class = "line" > < a id = "l00471" name = "l00471" > < / a > < span class = "lineno" > 471< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00472" name = "l00472" > < / a > < span class = "lineno" > 472< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00473" name = "l00473" > < / a > < span class = "lineno" > 473< / span > < / div >
< div class = "line" > < a id = "l00474" name = "l00474" > < / a > < span class = "lineno" > 474< / span > float2 conj = {1, -1};< / div >
< div class = "line" > < a id = "l00475" name = "l00475" > < / a > < span class = "lineno" > 475< / span > float2 plus_j = {0, 1};< / div >
< div class = "line" > < a id = "l00476" name = "l00476" > < / a > < span class = "lineno" > 476< / span > < / div >
< div class = "line" > < a id = "l00477" name = "l00477" > < / a > < span class = "lineno" > 477< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > t = 0; t < elems_per_thread / 2 + 1; t++) {< / div >
< div class = "line" > < a id = "l00478" name = "l00478" > < / a > < span class = "lineno" > 478< / span > < span class = "keywordtype" > int< / span > index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (fft_idx + t * m, n_over_2 - 1);< / div >
< div class = "line" > < a id = "l00479" name = "l00479" > < / a > < span class = "lineno" > 479< / span > float2 x = in[batch_idx + index];< / div >
< div class = "line" > < a id = "l00480" name = "l00480" > < / a > < span class = "lineno" > 480< / span > float2 y = in[batch_idx + index + next_in];< / div >
< div class = "line" > < a id = "l00481" name = "l00481" > < / a > < span class = "lineno" > 481< / span > < span class = "keywordflow" > if< / span > (index < length_over_2) {< / div >
< div class = "line" > < a id = "l00482" name = "l00482" > < / a > < span class = "lineno" > 482< / span > < span class = "keywordtype" > bool< / span > last_val = length % 2 == 0 & & index == length_over_2 - 1;< / div >
< div class = "line" > < a id = "l00483" name = "l00483" > < / a > < span class = "lineno" > 483< / span > < span class = "keywordflow" > if< / span > (last_val) {< / div >
< div class = "line" > < a id = "l00484" name = "l00484" > < / a > < span class = "lineno" > 484< / span > x = float2(x.x, 0);< / div >
< div class = "line" > < a id = "l00485" name = "l00485" > < / a > < span class = "lineno" > 485< / span > y = float2(y.x, 0);< / div >
< div class = "line" > < a id = "l00486" name = "l00486" > < / a > < span class = "lineno" > 486< / span > }< / div >
< div class = "line" > < a id = "l00487" name = "l00487" > < / a > < span class = "lineno" > 487< / span > float2 elem1 = x + < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (y, plus_j);< / div >
< div class = "line" > < a id = "l00488" name = "l00488" > < / a > < span class = "lineno" > 488< / span > seq_buf[index] = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (elem1 * conj, w_k[index]);< / div >
< div class = "line" > < a id = "l00489" name = "l00489" > < / a > < span class = "lineno" > 489< / span > < span class = "keywordflow" > if< / span > (index > 0 & & !last_val) {< / div >
< div class = "line" > < a id = "l00490" name = "l00490" > < / a > < span class = "lineno" > 490< / span > float2 elem2 = (x * conj) + < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (y * conj, plus_j);< / div >
< div class = "line" > < a id = "l00491" name = "l00491" > < / a > < span class = "lineno" > 491< / span > seq_buf[length - index] =< / div >
< div class = "line" > < a id = "l00492" name = "l00492" > < / a > < span class = "lineno" > 492< / span > < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (elem2 * conj, w_k[length - index]);< / div >
< div class = "line" > < a id = "l00493" name = "l00493" > < / a > < span class = "lineno" > 493< / span > }< / div >
< div class = "line" > < a id = "l00494" name = "l00494" > < / a > < span class = "lineno" > 494< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00495" name = "l00495" > < / a > < span class = "lineno" > 495< / span > < span class = "keywordtype" > short< / span > pad_index = < a class = "code hl_function" href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > (length + (index - length_over_2) * 2, n - 2);< / div >
< div class = "line" > < a id = "l00496" name = "l00496" > < / a > < span class = "lineno" > 496< / span > seq_buf[pad_index] = 0;< / div >
< div class = "line" > < a id = "l00497" name = "l00497" > < / a > < span class = "lineno" > 497< / span > seq_buf[pad_index + 1] = 0;< / div >
< div class = "line" > < a id = "l00498" name = "l00498" > < / a > < span class = "lineno" > 498< / span > }< / div >
< div class = "line" > < a id = "l00499" name = "l00499" > < / a > < span class = "lineno" > 499< / span > }< / div >
< div class = "line" > < a id = "l00500" name = "l00500" > < / a > < span class = "lineno" > 500< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00501" name = "l00501" > < / a > < span class = "lineno" > 501< / span > < / div >
< div class = "line" > < a id = "l00502" name = "l00502" > < / a > < span class = "lineno" > 502< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "foldopen" id = "foldopen00503" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00503" name = "l00503" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a420453a56e77d6b3891ed4b5f178af9c" > 503< / a > < / span > METAL_FUNC < span class = "keywordtype" > void< / span > < a class = "code hl_function" href = "struct_read_writer.html#a95367307acace2aa88226cf8956d2d88" > ReadWriter< float2, float> ::write_padded< / a > (< / div >
< div class = "line" > < a id = "l00504" name = "l00504" > < / a > < span class = "lineno" > 504< / span > < span class = "keywordtype" > int< / span > length,< / div >
< div class = "line" > < a id = "l00505" name = "l00505" > < / a > < span class = "lineno" > 505< / span > < span class = "keyword" > const< / span > device float2* w_k)< span class = "keyword" > const < / span > {< / div >
< div class = "line" > < a id = "l00506" name = "l00506" > < / a > < span class = "lineno" > 506< / span > < span class = "keywordtype" > int< / span > batch_idx = elem.x * grid.y * length * 2 + elem.y * length * 2;< / div >
< div class = "line" > < a id = "l00507" name = "l00507" > < / a > < span class = "lineno" > 507< / span > threadgroup float2* seq_buf = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > + elem.y * n + length - 1;< / div >
< div class = "line" > < a id = "l00508" name = "l00508" > < / a > < span class = "lineno" > 508< / span > < / div >
< div class = "line" > < a id = "l00509" name = "l00509" > < / a > < span class = "lineno" > 509< / span > < span class = "keywordtype" > int< / span > grid_index = elem.x * grid.y + elem.y;< / div >
< div class = "line" > < a id = "l00510" name = "l00510" > < / a > < span class = "lineno" > 510< / span > < span class = "keywordtype" > short< / span > next_out =< / div >
< div class = "line" > < a id = "l00511" name = "l00511" > < / a > < span class = "lineno" > 511< / span > batch_size % 2 == 1 & & grid_index * 2 == batch_size - 1 ? 0 : length;< / div >
< div class = "line" > < a id = "l00512" name = "l00512" > < / a > < span class = "lineno" > 512< / span > < / div >
< div class = "line" > < a id = "l00513" name = "l00513" > < / a > < span class = "lineno" > 513< / span > < span class = "keywordtype" > short< / span > m = grid.z;< / div >
< div class = "line" > < a id = "l00514" name = "l00514" > < / a > < span class = "lineno" > 514< / span > < span class = "keywordtype" > short< / span > fft_idx = elem.z;< / div >
< div class = "line" > < a id = "l00515" name = "l00515" > < / a > < span class = "lineno" > 515< / span > < / div >
< div class = "line" > < a id = "l00516" name = "l00516" > < / a > < span class = "lineno" > 516< / span > float2 inv_factor = {1.0f / n, -1.0f / n};< / div >
< div class = "line" > < a id = "l00517" name = "l00517" > < / a > < span class = "lineno" > 517< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread; e++) {< / div >
< div class = "line" > < a id = "l00518" name = "l00518" > < / a > < span class = "lineno" > 518< / span > < span class = "keywordtype" > int< / span > index = fft_idx + e * m;< / div >
< div class = "line" > < a id = "l00519" name = "l00519" > < / a > < span class = "lineno" > 519< / span > < span class = "keywordflow" > if< / span > (index < length) {< / div >
< div class = "line" > < a id = "l00520" name = "l00520" > < / a > < span class = "lineno" > 520< / span > float2 output = < a class = "code hl_function" href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > (seq_buf[index] * inv_factor, w_k[index]);< / div >
< div class = "line" > < a id = "l00521" name = "l00521" > < / a > < span class = "lineno" > 521< / span > out[batch_idx + index] = output.x / length;< / div >
< div class = "line" > < a id = "l00522" name = "l00522" > < / a > < span class = "lineno" > 522< / span > out[batch_idx + index + next_out] = output.y / -length;< / div >
< div class = "line" > < a id = "l00523" name = "l00523" > < / a > < span class = "lineno" > 523< / span > }< / div >
< div class = "line" > < a id = "l00524" name = "l00524" > < / a > < span class = "lineno" > 524< / span > }< / div >
< div class = "line" > < a id = "l00525" name = "l00525" > < / a > < span class = "lineno" > 525< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00526" name = "l00526" > < / a > < span class = "lineno" > 526< / span > < / div >
< div class = "line" > < a id = "l00527" name = "l00527" > < / a > < span class = "lineno" > 527< / span > < span class = "comment" > // Four Step RFFT< / span > < / div >
< div class = "line" > < a id = "l00528" name = "l00528" > < / a > < span class = "lineno" > 528< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "line" > < a id = "l00529" name = "l00529" > < / a > < span class = "lineno" > 529< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00530" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00530" name = "l00530" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a795a71a8e1f154a5af415ebe1b3f0713" > 530< / a > < / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > < float2, float2, < span class = "comment" > /*step=*/< / span > 1, < span class = "comment" > /*real=*/< / span > < span class = "keyword" > true< / span > > ::load_strided(< / div >
< div class = "line" > < a id = "l00531" name = "l00531" > < / a > < span class = "lineno" > 531< / span > < span class = "keywordtype" > int< / span > stride,< / div >
< div class = "line" > < a id = "l00532" name = "l00532" > < / a > < span class = "lineno" > 532< / span > < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00533" name = "l00533" > < / a > < span class = "lineno" > 533< / span > < span class = "comment" > // Silence compiler warnings< / span > < / div >
< div class = "line" > < a id = "l00534" name = "l00534" > < / a > < span class = "lineno" > 534< / span > (void)stride;< / div >
< div class = "line" > < a id = "l00535" name = "l00535" > < / a > < span class = "lineno" > 535< / span > (void)overall_n;< / div >
< div class = "line" > < a id = "l00536" name = "l00536" > < / a > < span class = "lineno" > 536< / span > < span class = "comment" > // Don' t invert between steps< / span > < / div >
< div class = "line" > < a id = "l00537" name = "l00537" > < / a > < span class = "lineno" > 537< / span > < span class = "keywordtype" > bool< / span > default_inv = inv;< / div >
< div class = "line" > < a id = "l00538" name = "l00538" > < / a > < span class = "lineno" > 538< / span > inv = < span class = "keyword" > false< / span > ;< / div >
< div class = "line" > < a id = "l00539" name = "l00539" > < / a > < span class = "lineno" > 539< / span > load();< / div >
< div class = "line" > < a id = "l00540" name = "l00540" > < / a > < span class = "lineno" > 540< / span > inv = default_inv;< / div >
< div class = "line" > < a id = "l00541" name = "l00541" > < / a > < span class = "lineno" > 541< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00542" name = "l00542" > < / a > < span class = "lineno" > 542< / span > < / div >
< div class = "line" > < a id = "l00543" name = "l00543" > < / a > < span class = "lineno" > 543< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "line" > < a id = "l00544" name = "l00544" > < / a > < span class = "lineno" > 544< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00545" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00545" name = "l00545" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a959ccaa08f2999c50cea063b01e492e4" > 545< / a > < / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > < float2, float2, < span class = "comment" > /*step=*/< / span > 1, < span class = "comment" > /*real=*/< / span > < span class = "keyword" > true< / span > > ::write_strided(< / div >
< div class = "line" > < a id = "l00546" name = "l00546" > < / a > < span class = "lineno" > 546< / span > < span class = "keywordtype" > int< / span > stride,< / div >
< div class = "line" > < a id = "l00547" name = "l00547" > < / a > < span class = "lineno" > 547< / span > < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00548" name = "l00548" > < / a > < span class = "lineno" > 548< / span > < span class = "keywordtype" > int< / span > overall_n_over_2 = overall_n / 2 + 1;< / div >
< div class = "line" > < a id = "l00549" name = "l00549" > < / a > < span class = "lineno" > 549< / span > < span class = "keywordtype" > int< / span > coalesce_width = grid.y;< / div >
< div class = "line" > < a id = "l00550" name = "l00550" > < / a > < span class = "lineno" > 550< / span > < span class = "keywordtype" > int< / span > tg_idx = elem.y * grid.z + elem.z;< / div >
< div class = "line" > < a id = "l00551" name = "l00551" > < / a > < span class = "lineno" > 551< / span > < span class = "keywordtype" > int< / span > outer_batch_size = stride / coalesce_width;< / div >
< div class = "line" > < a id = "l00552" name = "l00552" > < / a > < span class = "lineno" > 552< / span > < / div >
< div class = "line" > < a id = "l00553" name = "l00553" > < / a > < span class = "lineno" > 553< / span > < span class = "keywordtype" > int< / span > strided_batch_idx = (elem.x % outer_batch_size) * coalesce_width +< / div >
< div class = "line" > < a id = "l00554" name = "l00554" > < / a > < span class = "lineno" > 554< / span > overall_n_over_2 * (elem.x / outer_batch_size);< / div >
< div class = "line" > < a id = "l00555" name = "l00555" > < / a > < span class = "lineno" > 555< / span > strided_device_idx = strided_batch_idx +< / div >
< div class = "line" > < a id = "l00556" name = "l00556" > < / a > < span class = "lineno" > 556< / span > tg_idx / coalesce_width * elems_per_thread / 2 * stride +< / div >
< div class = "line" > < a id = "l00557" name = "l00557" > < / a > < span class = "lineno" > 557< / span > tg_idx % coalesce_width;< / div >
< div class = "line" > < a id = "l00558" name = "l00558" > < / a > < span class = "lineno" > 558< / span > strided_shared_idx = (tg_idx % coalesce_width) * n +< / div >
< div class = "line" > < a id = "l00559" name = "l00559" > < / a > < span class = "lineno" > 559< / span > tg_idx / coalesce_width * elems_per_thread / 2;< / div >
< div class = "line" > < a id = "l00560" name = "l00560" > < / a > < span class = "lineno" > 560< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread / 2; e++) {< / div >
< div class = "line" > < a id = "l00561" name = "l00561" > < / a > < span class = "lineno" > 561< / span > float2 output = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > [strided_shared_idx + e];< / div >
< div class = "line" > < a id = "l00562" name = "l00562" > < / a > < span class = "lineno" > 562< / span > out[strided_device_idx + e * stride] = output;< / div >
< div class = "line" > < a id = "l00563" name = "l00563" > < / a > < span class = "lineno" > 563< / span > }< / div >
< div class = "line" > < a id = "l00564" name = "l00564" > < / a > < span class = "lineno" > 564< / span > < / div >
< div class = "line" > < a id = "l00565" name = "l00565" > < / a > < span class = "lineno" > 565< / span > < span class = "comment" > // Add on n/2 + 1 element< / span > < / div >
< div class = "line" > < a id = "l00566" name = "l00566" > < / a > < span class = "lineno" > 566< / span > < span class = "keywordflow" > if< / span > (tg_idx == 0 & & elem.x % outer_batch_size == 0) {< / div >
< div class = "line" > < a id = "l00567" name = "l00567" > < / a > < span class = "lineno" > 567< / span > out[strided_batch_idx + overall_n / 2] = < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > [n / 2];< / div >
< div class = "line" > < a id = "l00568" name = "l00568" > < / a > < span class = "lineno" > 568< / span > }< / div >
< div class = "line" > < a id = "l00569" name = "l00569" > < / a > < span class = "lineno" > 569< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00570" name = "l00570" > < / a > < span class = "lineno" > 570< / span > < / div >
< div class = "line" > < a id = "l00571" name = "l00571" > < / a > < span class = "lineno" > 571< / span > < span class = "comment" > // Four Step IRFFT< / span > < / div >
< div class = "line" > < a id = "l00572" name = "l00572" > < / a > < span class = "lineno" > 572< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "line" > < a id = "l00573" name = "l00573" > < / a > < span class = "lineno" > 573< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00574" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00574" name = "l00574" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a0935b946b8bf2e769427fcbf2da2f7be" > 574< / a > < / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > < float2, float2, < span class = "comment" > /*step=*/< / span > 0, < span class = "comment" > /*real=*/< / span > < span class = "keyword" > true< / span > > ::load_strided(< / div >
< div class = "line" > < a id = "l00575" name = "l00575" > < / a > < span class = "lineno" > 575< / span > < span class = "keywordtype" > int< / span > stride,< / div >
< div class = "line" > < a id = "l00576" name = "l00576" > < / a > < span class = "lineno" > 576< / span > < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00577" name = "l00577" > < / a > < span class = "lineno" > 577< / span > < span class = "keywordtype" > int< / span > overall_n_over_2 = overall_n / 2 + 1;< / div >
< div class = "line" > < a id = "l00578" name = "l00578" > < / a > < span class = "lineno" > 578< / span > < span class = "keyword" > auto< / span > conj = float2(1, -1);< / div >
< div class = "line" > < a id = "l00579" name = "l00579" > < / a > < span class = "lineno" > 579< / span > < / div >
< div class = "line" > < a id = "l00580" name = "l00580" > < / a > < span class = "lineno" > 580< / span > compute_strided_indices(stride, overall_n);< / div >
< div class = "line" > < a id = "l00581" name = "l00581" > < / a > < span class = "lineno" > 581< / span > < span class = "comment" > // Translate indices in terms of N - k< / span > < / div >
< div class = "line" > < a id = "l00582" name = "l00582" > < / a > < span class = "lineno" > 582< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread; e++) {< / div >
< div class = "line" > < a id = "l00583" name = "l00583" > < / a > < span class = "lineno" > 583< / span > < span class = "keywordtype" > int< / span > device_idx = strided_device_idx + e * stride;< / div >
< div class = "line" > < a id = "l00584" name = "l00584" > < / a > < span class = "lineno" > 584< / span > < span class = "keywordtype" > int< / span > overall_batch = device_idx / overall_n;< / div >
< div class = "line" > < a id = "l00585" name = "l00585" > < / a > < span class = "lineno" > 585< / span > < span class = "keywordtype" > int< / span > overall_index = device_idx % overall_n;< / div >
< div class = "line" > < a id = "l00586" name = "l00586" > < / a > < span class = "lineno" > 586< / span > < span class = "keywordflow" > if< / span > (overall_index < overall_n_over_2) {< / div >
< div class = "line" > < a id = "l00587" name = "l00587" > < / a > < span class = "lineno" > 587< / span > device_idx -= overall_batch * (overall_n - overall_n_over_2);< / div >
< div class = "line" > < a id = "l00588" name = "l00588" > < / a > < span class = "lineno" > 588< / span > < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > [strided_shared_idx + e] = in[device_idx] * conj;< / div >
< div class = "line" > < a id = "l00589" name = "l00589" > < / a > < span class = "lineno" > 589< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00590" name = "l00590" > < / a > < span class = "lineno" > 590< / span > < span class = "keywordtype" > int< / span > conj_idx = overall_n - overall_index;< / div >
< div class = "line" > < a id = "l00591" name = "l00591" > < / a > < span class = "lineno" > 591< / span > device_idx = overall_batch * overall_n_over_2 + conj_idx;< / div >
< div class = "line" > < a id = "l00592" name = "l00592" > < / a > < span class = "lineno" > 592< / span > < a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > [strided_shared_idx + e] = in[device_idx];< / div >
< div class = "line" > < a id = "l00593" name = "l00593" > < / a > < span class = "lineno" > 593< / span > }< / div >
< div class = "line" > < a id = "l00594" name = "l00594" > < / a > < span class = "lineno" > 594< / span > }< / div >
< div class = "line" > < a id = "l00595" name = "l00595" > < / a > < span class = "lineno" > 595< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00596" name = "l00596" > < / a > < span class = "lineno" > 596< / span > < / div >
< div class = "line" > < a id = "l00597" name = "l00597" > < / a > < span class = "lineno" > 597< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "line" > < a id = "l00598" name = "l00598" > < / a > < span class = "lineno" > 598< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00599" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00599" name = "l00599" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a7d45368c74a8b7c632659504b3273a13" > 599< / a > < / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > < float2, float, < span class = "comment" > /*step=*/< / span > 1, < span class = "comment" > /*real=*/< / span > < span class = "keyword" > true< / span > > ::load_strided(< / div >
< div class = "line" > < a id = "l00600" name = "l00600" > < / a > < span class = "lineno" > 600< / span > < span class = "keywordtype" > int< / span > stride,< / div >
< div class = "line" > < a id = "l00601" name = "l00601" > < / a > < span class = "lineno" > 601< / span > < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00602" name = "l00602" > < / a > < span class = "lineno" > 602< / span > < span class = "comment" > // Silence compiler warnings< / span > < / div >
< div class = "line" > < a id = "l00603" name = "l00603" > < / a > < span class = "lineno" > 603< / span > (void)stride;< / div >
< div class = "line" > < a id = "l00604" name = "l00604" > < / a > < span class = "lineno" > 604< / span > (void)overall_n;< / div >
< div class = "line" > < a id = "l00605" name = "l00605" > < / a > < span class = "lineno" > 605< / span > < span class = "keywordtype" > bool< / span > default_inv = inv;< / div >
< div class = "line" > < a id = "l00606" name = "l00606" > < / a > < span class = "lineno" > 606< / span > inv = < span class = "keyword" > false< / span > ;< / div >
< div class = "line" > < a id = "l00607" name = "l00607" > < / a > < span class = "lineno" > 607< / span > load();< / div >
< div class = "line" > < a id = "l00608" name = "l00608" > < / a > < span class = "lineno" > 608< / span > inv = default_inv;< / div >
< div class = "line" > < a id = "l00609" name = "l00609" > < / a > < span class = "lineno" > 609< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00610" name = "l00610" > < / a > < span class = "lineno" > 610< / span > < / div >
< div class = "line" > < a id = "l00611" name = "l00611" > < / a > < span class = "lineno" > 611< / span > < span class = "keyword" > template< / span > < > < / div >
< div class = "line" > < a id = "l00612" name = "l00612" > < / a > < span class = "lineno" > 612< / span > METAL_FUNC < span class = "keywordtype" > void< / span > < / div >
< div class = "foldopen" id = "foldopen00613" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00613" name = "l00613" > < / a > < span class = "lineno" > < a class = "line" href = "struct_read_writer.html#a5592b24dad5ad030a1e4769b0a278f35" > 613< / a > < / span > < a class = "code hl_struct" href = "struct_read_writer.html" > ReadWriter< / a > < float2, float, < span class = "comment" > /*step=*/< / span > 1, < span class = "comment" > /*real=*/< / span > < span class = "keyword" > true< / span > > ::write_strided(< / div >
< div class = "line" > < a id = "l00614" name = "l00614" > < / a > < span class = "lineno" > 614< / span > < span class = "keywordtype" > int< / span > stride,< / div >
< div class = "line" > < a id = "l00615" name = "l00615" > < / a > < span class = "lineno" > 615< / span > < span class = "keywordtype" > int< / span > overall_n) {< / div >
< div class = "line" > < a id = "l00616" name = "l00616" > < / a > < span class = "lineno" > 616< / span > compute_strided_indices(stride, overall_n);< / div >
< div class = "line" > < a id = "l00617" name = "l00617" > < / a > < span class = "lineno" > 617< / span > < / div >
< div class = "line" > < a id = "l00618" name = "l00618" > < / a > < span class = "lineno" > 618< / span > < span class = "keywordflow" > for< / span > (< span class = "keywordtype" > int< / span > e = 0; e < elems_per_thread; e++) {< / div >
< div class = "line" > < a id = "l00619" name = "l00619" > < / a > < span class = "lineno" > 619< / span > out[strided_device_idx + e * stride] =< / div >
< div class = "line" > < a id = "l00620" name = "l00620" > < / a > < span class = "lineno" > 620< / span > pre_out(< a class = "code hl_variable" href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > [strided_shared_idx + e], overall_n).x;< / div >
< div class = "line" > < a id = "l00621" name = "l00621" > < / a > < span class = "lineno" > 621< / span > }< / div >
< div class = "line" > < a id = "l00622" name = "l00622" > < / a > < span class = "lineno" > 622< / span > }< / div >
< / div >
2024-10-26 04:23:45 +08:00
< div class = "ttc" id = "abackend_2metal_2allocator_8h_html_a15aa5cc1baf29be08d55cca88509e697" > < div class = "ttname" > < a href = "backend_2metal_2allocator_8h.html#a15aa5cc1baf29be08d55cca88509e697" > buf< / a > < / div > < div class = "ttdeci" > MTL::Buffer * buf< / div > < div class = "ttdef" > < b > Definition< / b > allocator.h:39< / div > < / div >
2024-10-15 23:12:17 +08:00
< div class = "ttc" id = "abackend_2metal_2kernels_2fft_8h_html_a7a83318497519ff3ff0141b7d511ed38" > < div class = "ttname" > < a href = "backend_2metal_2kernels_2fft_8h.html#a7a83318497519ff3ff0141b7d511ed38" > inv_< / a > < / div > < div class = "ttdeci" > static constant constexpr const bool inv_< / div > < div class = "ttdef" > < b > Definition< / b > fft.h:23< / div > < / div >
< div class = "ttc" id = "abackend_2metal_2kernels_2fft_8h_html_ad395c11e6f2aee72cd1928fba93a35a3" > < div class = "ttname" > < a href = "backend_2metal_2kernels_2fft_8h.html#ad395c11e6f2aee72cd1928fba93a35a3" > elems_per_thread_< / a > < / div > < div class = "ttdeci" > static constant constexpr const int elems_per_thread_< / div > < div class = "ttdef" > < b > Definition< / b > fft.h:25< / div > < / div >
< div class = "ttc" id = "anamespacemetal_html" > < div class = "ttname" > < a href = "namespacemetal.html" > metal< / a > < / div > < div class = "ttdef" > < b > Definition< / b > bf16.h:265< / div > < / div >
< div class = "ttc" id = "anamespacemetal_html_a6653b28c9473087141eddce39878d4d3" > < div class = "ttname" > < a href = "namespacemetal.html#a6653b28c9473087141eddce39878d4d3" > metal::min< / a > < / div > < div class = "ttdeci" > METAL_FUNC bfloat16_t min(bfloat16_t x, bfloat16_t y)< / div > < div class = "ttdef" > < b > Definition< / b > bf16_math.h:234< / div > < / div >
< div class = "ttc" id = "aradix_8h_html" > < div class = "ttname" > < a href = "radix_8h.html" > radix.h< / a > < / div > < / div >
< div class = "ttc" id = "aradix_8h_html_a5bfc53b531214c9ce277bebc18aa67d6" > < div class = "ttname" > < a href = "radix_8h.html#a5bfc53b531214c9ce277bebc18aa67d6" > complex_mul< / a > < / div > < div class = "ttdeci" > METAL_FUNC float2 complex_mul(float2 a, float2 b)< / div > < div class = "ttdef" > < b > Definition< / b > radix.h:19< / div > < / div >
< div class = "ttc" id = "aradix_8h_html_ac5cf950316b9445296ee9ecfc56a56bd" > < div class = "ttname" > < a href = "radix_8h.html#ac5cf950316b9445296ee9ecfc56a56bd" > get_twiddle< / a > < / div > < div class = "ttdeci" > METAL_FUNC float2 get_twiddle(int k, int p)< / div > < div class = "ttdef" > < b > Definition< / b > radix.h:29< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html" > < div class = "ttname" > < a href = "struct_read_writer.html" > ReadWriter< / a > < / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:35< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a08e10626fbc789b6dff9172fd6c36f7c" > < div class = "ttname" > < a href = "struct_read_writer.html#a08e10626fbc789b6dff9172fd6c36f7c" > ReadWriter::out_of_bounds< / a > < / div > < div class = "ttdeci" > METAL_FUNC bool out_of_bounds() const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:94< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a120eaf4b5f32e80972a18d14e82a2d75" > < div class = "ttname" > < a href = "struct_read_writer.html#a120eaf4b5f32e80972a18d14e82a2d75" > ReadWriter::load< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load() const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:100< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a185553204b07a407ef02c41dd78e8239" > < div class = "ttname" > < a href = "struct_read_writer.html#a185553204b07a407ef02c41dd78e8239" > ReadWriter::pre_out< / a > < / div > < div class = "ttdeci" > METAL_FUNC float2 pre_out(float2 elem, int length) const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:90< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a1aa07e41d7ac286ad79bd26a072dfa0c" > < div class = "ttname" > < a href = "struct_read_writer.html#a1aa07e41d7ac286ad79bd26a072dfa0c" > ReadWriter::ReadWriter< / a > < / div > < div class = "ttdeci" > METAL_FUNC ReadWriter(const device in_T *in_, threadgroup float2 *buf_, device out_T *out_, const short n_, const int batch_size_, const short elems_per_thread_, const uint3 elem_, const uint3 grid_, const bool inv_)< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:51< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a23bac3c96dd0265ddbee1f256be45ff5" > < div class = "ttname" > < a href = "struct_read_writer.html#a23bac3c96dd0265ddbee1f256be45ff5" > ReadWriter::buf< / a > < / div > < div class = "ttdeci" > threadgroup float2 * buf< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:37< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a32b82adcd6ed324ce235e9f5ad780ede" > < div class = "ttname" > < a href = "struct_read_writer.html#a32b82adcd6ed324ce235e9f5ad780ede" > ReadWriter::elem< / a > < / div > < div class = "ttdeci" > uint3 elem< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:42< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a444230a0182ce6ba1898c04ce6e669a7" > < div class = "ttname" > < a href = "struct_read_writer.html#a444230a0182ce6ba1898c04ce6e669a7" > ReadWriter::elems_per_thread< / a > < / div > < div class = "ttdeci" > int elems_per_thread< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:41< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a4c0b12484aac4fd6759d67c190391989" > < div class = "ttname" > < a href = "struct_read_writer.html#a4c0b12484aac4fd6759d67c190391989" > ReadWriter::strided_device_idx< / a > < / div > < div class = "ttdeci" > int strided_device_idx< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:48< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a64c58e358da22358df3075448ea23893" > < div class = "ttname" > < a href = "struct_read_writer.html#a64c58e358da22358df3075448ea23893" > ReadWriter::threads_per_tg< / a > < / div > < div class = "ttdeci" > int threads_per_tg< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:44< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a655346c9ebfc33a69da3f1c1d4238dfb" > < div class = "ttname" > < a href = "struct_read_writer.html#a655346c9ebfc33a69da3f1c1d4238dfb" > ReadWriter::n< / a > < / div > < div class = "ttdeci" > int n< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:39< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a689f4890c1d2ce33fc6da7550beec735" > < div class = "ttname" > < a href = "struct_read_writer.html#a689f4890c1d2ce33fc6da7550beec735" > ReadWriter::batch_size< / a > < / div > < div class = "ttdeci" > int batch_size< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:40< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a6c47a25b2135393045fa5f95ada59d9d" > < div class = "ttname" > < a href = "struct_read_writer.html#a6c47a25b2135393045fa5f95ada59d9d" > ReadWriter::post_in< / a > < / div > < div class = "ttdeci" > METAL_FUNC float2 post_in(float elem) const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:82< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a773fa8524515bdc2ff8b0e2060741813" > < div class = "ttname" > < a href = "struct_read_writer.html#a773fa8524515bdc2ff8b0e2060741813" > ReadWriter::inv< / a > < / div > < div class = "ttdeci" > bool inv< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:45< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a77a4d7eac217305e22a3c25b3756ef67" > < div class = "ttname" > < a href = "struct_read_writer.html#a77a4d7eac217305e22a3c25b3756ef67" > ReadWriter::write_strided< / a > < / div > < div class = "ttdeci" > METAL_FUNC void write_strided(int stride, int overall_n)< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:210< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a7c903fbb8b85a856ba5564d7df537cdf" > < div class = "ttname" > < a href = "struct_read_writer.html#a7c903fbb8b85a856ba5564d7df537cdf" > ReadWriter::compute_strided_indices< / a > < / div > < div class = "ttdeci" > METAL_FUNC void compute_strided_indices(int stride, int overall_n)< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:180< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a94da8aa85fa2916eaa3eaeb11499234a" > < div class = "ttname" > < a href = "struct_read_writer.html#a94da8aa85fa2916eaa3eaeb11499234a" > ReadWriter::pre_out< / a > < / div > < div class = "ttdeci" > METAL_FUNC float2 pre_out(float2 elem) const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:86< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a95367307acace2aa88226cf8956d2d88" > < div class = "ttname" > < a href = "struct_read_writer.html#a95367307acace2aa88226cf8956d2d88" > ReadWriter::write_padded< / a > < / div > < div class = "ttdeci" > METAL_FUNC void write_padded(int length, const device float2 *w_k) const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:163< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_a998ef484bade81f726b9edfc6b878197" > < div class = "ttname" > < a href = "struct_read_writer.html#a998ef484bade81f726b9edfc6b878197" > ReadWriter::load_strided< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_strided(int stride, int overall_n)< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:202< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_ab555cec93b66eead607e6a03d9324e1c" > < div class = "ttname" > < a href = "struct_read_writer.html#ab555cec93b66eead607e6a03d9324e1c" > ReadWriter::post_in< / a > < / div > < div class = "ttdeci" > METAL_FUNC float2 post_in(float2 elem) const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:77< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_ab6057215920138f28fd00f0e7ea8afa4" > < div class = "ttname" > < a href = "struct_read_writer.html#ab6057215920138f28fd00f0e7ea8afa4" > ReadWriter::in< / a > < / div > < div class = "ttdeci" > const device in_T * in< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:36< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_abea3b913c952c505d0ca4e529c7316ef" > < div class = "ttname" > < a href = "struct_read_writer.html#abea3b913c952c505d0ca4e529c7316ef" > ReadWriter::out< / a > < / div > < div class = "ttdeci" > device out_T * out< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:38< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_ac2ea71e41740ddc863890e3e8e6f09d0" > < div class = "ttname" > < a href = "struct_read_writer.html#ac2ea71e41740ddc863890e3e8e6f09d0" > ReadWriter::write< / a > < / div > < div class = "ttdeci" > METAL_FUNC void write() const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:123< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_ac7a957f99873d3797081f5d620f3b2c8" > < div class = "ttname" > < a href = "struct_read_writer.html#ac7a957f99873d3797081f5d620f3b2c8" > ReadWriter::grid< / a > < / div > < div class = "ttdeci" > uint3 grid< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:43< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_ace40adb02cfb33d89c98353327c251fc" > < div class = "ttname" > < a href = "struct_read_writer.html#ace40adb02cfb33d89c98353327c251fc" > ReadWriter::strided_shared_idx< / a > < / div > < div class = "ttdeci" > int strided_shared_idx< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:49< / div > < / div >
< div class = "ttc" id = "astruct_read_writer_html_add5bd3f647793a5a19d63197a19df73c" > < div class = "ttname" > < a href = "struct_read_writer.html#add5bd3f647793a5a19d63197a19df73c" > ReadWriter::load_padded< / a > < / div > < div class = "ttdeci" > METAL_FUNC void load_padded(int length, const device float2 *w_k) const< / div > < div class = "ttdef" > < b > Definition< / b > readwrite.h:146< / div > < / div >
< / div > <!-- fragment --> < / div > <!-- contents -->
<!-- start footer part -->
< hr class = "footer" / > < address class = "footer" > < small >
Generated by  < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.12.0
< / small > < / address >
< / div > <!-- doc - content -->
< / body >
< / html >