2025-02-07 04:16:29 +08:00
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
< html xmlns = "http://www.w3.org/1999/xhtml" lang = "en-US" >
< head >
< meta http-equiv = "Content-Type" content = "text/xhtml;charset=UTF-8" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=11" / >
< meta name = "generator" content = "Doxygen 1.13.2" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< title > MLX: mlx/backend/cpu/simd/math.h Source File< / title >
< link href = "tabs.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "jquery.js" > < / script >
< script type = "text/javascript" src = "dynsections.js" > < / script >
< script type = "text/javascript" src = "clipboard.js" > < / script >
< link href = "navtree.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "navtreedata.js" > < / script >
< script type = "text/javascript" src = "navtree.js" > < / script >
< script type = "text/javascript" src = "resize.js" > < / script >
< script type = "text/javascript" src = "cookie.js" > < / script >
< link href = "search/search.css" rel = "stylesheet" type = "text/css" / >
< script type = "text/javascript" src = "search/searchdata.js" > < / script >
< script type = "text/javascript" src = "search/search.js" > < / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { init_search(); });
/* @license-end */
< / script >
< link href = "doxygen.css" rel = "stylesheet" type = "text/css" / >
< / head >
< body >
< div id = "top" > <!-- do not remove this div, it is closed by doxygen! -->
< div id = "titlearea" >
< table cellspacing = "0" cellpadding = "0" >
< tbody >
< tr id = "projectrow" >
< td id = "projectalign" >
< div id = "projectname" > MLX
< / div >
< / td >
< td > < div id = "MSearchBox" class = "MSearchBoxInactive" >
< span class = "left" >
< span id = "MSearchSelect" onmouseover = "return searchBox.OnSearchSelectShow()" onmouseout = "return searchBox.OnSearchSelectHide()" >   < / span >
< input type = "text" id = "MSearchField" value = "" placeholder = "Search" accesskey = "S"
onfocus="searchBox.OnSearchFieldFocus(true)"
onblur="searchBox.OnSearchFieldFocus(false)"
onkeyup="searchBox.OnSearchFieldChange(event)"/>
< / span > < span class = "right" >
< a id = "MSearchClose" href = "javascript:searchBox.CloseResultsWindow()" > < img id = "MSearchCloseImg" border = "0" src = "search/close.svg" alt = "" / > < / a >
< / span >
< / div >
< / td >
< / tr >
< / tbody >
< / table >
< / div >
<!-- end header part -->
<!-- Generated by Doxygen 1.13.2 -->
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
var searchBox = new SearchBox("searchBox", "search/",'.html');
/* @license-end */
< / script >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function() { codefold.init(0); });
/* @license-end */
< / script >
< / div > <!-- top -->
< div id = "side-nav" class = "ui-resizable side-nav-resizable" >
< div id = "nav-tree" >
< div id = "nav-tree-contents" >
< div id = "nav-sync" class = "sync" > < / div >
< / div >
< / div >
< div id = "splitbar" style = "-moz-user-select:none;"
class="ui-resizable-handle">
< / div >
< / div >
< script type = "text/javascript" >
/* @license magnet:?xt=urn:btih:d3d9a9a6595521f9666a5e94cc830dab83b65699& dn=expat.txt MIT */
$(function(){initNavTree('math_8h_source.html',''); initResizable(true); });
/* @license-end */
< / script >
< div id = "doc-content" >
<!-- window showing the filter options -->
< div id = "MSearchSelectWindow"
onmouseover="return searchBox.OnSearchSelectShow()"
onmouseout="return searchBox.OnSearchSelectHide()"
onkeydown="return searchBox.OnSearchSelectKey(event)">
< / div >
<!-- iframe showing the search results (closed by default) -->
< div id = "MSearchResultsWindow" >
< div id = "MSearchResults" >
< div class = "SRPage" >
< div id = "SRIndex" >
< div id = "SRResults" > < / div >
< div class = "SRStatus" id = "Loading" > Loading...< / div >
< div class = "SRStatus" id = "Searching" > Searching...< / div >
< div class = "SRStatus" id = "NoMatches" > No Matches< / div >
< / div >
< / div >
< / div >
< / div >
< div class = "header" >
< div class = "headertitle" > < div class = "title" > math.h< / div > < / div >
< / div > <!-- header -->
< div class = "contents" >
< a href = "math_8h.html" > Go to the documentation of this file.< / a > < div class = "fragment" > < div class = "line" > < a id = "l00001" name = "l00001" > < / a > < span class = "lineno" > 1< / span > < span class = "comment" > // Copyright © 2024 Apple Inc.< / span > < / div >
< div class = "line" > < a id = "l00002" name = "l00002" > < / a > < span class = "lineno" > 2< / span > < / div >
< div class = "line" > < a id = "l00003" name = "l00003" > < / a > < span class = "lineno" > 3< / span > < span class = "preprocessor" > #pragma once< / span > < / div >
< div class = "line" > < a id = "l00004" name = "l00004" > < / a > < span class = "lineno" > 4< / span > < / div >
< div class = "line" > < a id = "l00005" name = "l00005" > < / a > < span class = "lineno" > 5< / span > < span class = "preprocessor" > #include " < a class = "code" href = "type_8h.html" > mlx/backend/cpu/simd/type.h< / a > " < / span > < / div >
< div class = "line" > < a id = "l00006" name = "l00006" > < / a > < span class = "lineno" > 6< / span > < / div >
< div class = "line" > < a id = "l00007" name = "l00007" > < / a > < span class = "lineno" > 7< / span > < span class = "keyword" > namespace < / span > < a class = "code hl_namespace" href = "namespacemlx_1_1core_1_1simd.html" > mlx::core::simd< / a > {< / div >
< div class = "line" > < a id = "l00008" name = "l00008" > < / a > < span class = "lineno" > 8< / span > < / div >
< div class = "line" > < a id = "l00009" name = "l00009" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1core_1_1simd.html#a771b6597803beb800ff5e7560c41e341" > 9< / a > < / span > < span class = "keyword" > constexpr< / span > < span class = "keywordtype" > float< / span > < a class = "code hl_variable" href = "namespacemlx_1_1core_1_1simd.html#a771b6597803beb800ff5e7560c41e341" > inf< / a > = std::numeric_limits< float> ::infinity();< / div >
< div class = "line" > < a id = "l00010" name = "l00010" > < / a > < span class = "lineno" > 10< / span > < / div >
< div class = "line" > < a id = "l00027" name = "l00027" > < / a > < span class = "lineno" > 27< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keywordtype" > int< / span > N> < / div >
< div class = "foldopen" id = "foldopen00028" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00028" name = "l00028" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1core_1_1simd.html#a835d71dd0bb2f9494a397d9939696ec2" > 28< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a835d71dd0bb2f9494a397d9939696ec2" > exp< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > in) {< / div >
< div class = "line" > < a id = "l00029" name = "l00029" > < / a > < span class = "lineno" > 29< / span > < span class = "keywordflow" > if< / span > < span class = "keyword" > constexpr< / span > (< a class = "code hl_variable" href = "namespacemlx_1_1core_1_1simd.html#a64e80f096a8baf99ba8d396414473cc7" > is_complex< T> < / a > ) {< / div >
< div class = "line" > < a id = "l00030" name = "l00030" > < / a > < span class = "lineno" > 30< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, 1> < / a > {std::exp(in.< a class = "code hl_variable" href = "structmlx_1_1core_1_1simd_1_1_simd.html#a36e2b7db5ce6eb4dd456e99a4cd2c2cf" > value< / a > )};< / div >
< div class = "line" > < a id = "l00031" name = "l00031" > < / a > < span class = "lineno" > 31< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00032" name = "l00032" > < / a > < span class = "lineno" > 32< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > x_init = in;< / div >
< div class = "line" > < a id = "l00033" name = "l00033" > < / a > < span class = "lineno" > 33< / span > < span class = "keyword" > auto< / span > x = x_init * 1.442695f; < span class = "comment" > // multiply with log_2(e)< / span > < / div >
< div class = "line" > < a id = "l00034" name = "l00034" > < / a > < span class = "lineno" > 34< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > ipart, fpart;< / div >
< div class = "line" > < a id = "l00035" name = "l00035" > < / a > < span class = "lineno" > 35< / span > ipart = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a8e22c484298d9af10b6604c835e52052" > floor< / a > (x + 0.5);< / div >
< div class = "line" > < a id = "l00036" name = "l00036" > < / a > < span class = "lineno" > 36< / span > fpart = x - ipart;< / div >
< div class = "line" > < a id = "l00037" name = "l00037" > < / a > < span class = "lineno" > 37< / span > < / div >
< div class = "line" > < a id = "l00038" name = "l00038" > < / a > < span class = "lineno" > 38< / span > x = 1.535336188319500e-4f;< / div >
< div class = "line" > < a id = "l00039" name = "l00039" > < / a > < span class = "lineno" > 39< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (x, fpart, 1.339887440266574e-3f);< / div >
< div class = "line" > < a id = "l00040" name = "l00040" > < / a > < span class = "lineno" > 40< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (x, fpart, 9.618437357674640e-3f);< / div >
< div class = "line" > < a id = "l00041" name = "l00041" > < / a > < span class = "lineno" > 41< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (x, fpart, 5.550332471162809e-2f);< / div >
< div class = "line" > < a id = "l00042" name = "l00042" > < / a > < span class = "lineno" > 42< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (x, fpart, 2.402264791363012e-1f);< / div >
< div class = "line" > < a id = "l00043" name = "l00043" > < / a > < span class = "lineno" > 43< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (x, fpart, 6.931472028550421e-1f);< / div >
< div class = "line" > < a id = "l00044" name = "l00044" > < / a > < span class = "lineno" > 44< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (x, fpart, 1.000000000000000f);< / div >
< div class = "line" > < a id = "l00045" name = "l00045" > < / a > < span class = "lineno" > 45< / span > < / div >
< div class = "line" > < a id = "l00046" name = "l00046" > < / a > < span class = "lineno" > 46< / span > < span class = "comment" > // generate 2**ipart in the floating point representation using integer< / span > < / div >
< div class = "line" > < a id = "l00047" name = "l00047" > < / a > < span class = "lineno" > 47< / span > < span class = "comment" > // bitshifting< / span > < / div >
< div class = "line" > < a id = "l00048" name = "l00048" > < / a > < span class = "lineno" > 48< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< int, N> < / a > epart = (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< int, N> < / a > (ipart) + 127) < < 23;< / div >
< div class = "line" > < a id = "l00049" name = "l00049" > < / a > < span class = "lineno" > 49< / span > < / div >
< div class = "line" > < a id = "l00050" name = "l00050" > < / a > < span class = "lineno" > 50< / span > < span class = "comment" > // Deal with NaN and Inf< / span > < / div >
< div class = "line" > < a id = "l00051" name = "l00051" > < / a > < span class = "lineno" > 51< / span > < span class = "keyword" > auto< / span > result = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (< a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a05f4422a037c3bef343fb11f71363b65" > isnan< / a > (x_init), x_init, (*(< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > *)& epart) * x);< / div >
< div class = "line" > < a id = "l00052" name = "l00052" > < / a > < span class = "lineno" > 52< / span > result = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (x_init > 88.0f, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (< a class = "code hl_variable" href = "namespacemlx_1_1core_1_1simd.html#a771b6597803beb800ff5e7560c41e341" > inf< / a > ), result);< / div >
< div class = "line" > < a id = "l00053" name = "l00053" > < / a > < span class = "lineno" > 53< / span > result = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (x_init < -88.0f, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (0), result);< / div >
< div class = "line" > < a id = "l00054" name = "l00054" > < / a > < span class = "lineno" > 54< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > (result);< / div >
< div class = "line" > < a id = "l00055" name = "l00055" > < / a > < span class = "lineno" > 55< / span > }< / div >
< div class = "line" > < a id = "l00056" name = "l00056" > < / a > < span class = "lineno" > 56< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00057" name = "l00057" > < / a > < span class = "lineno" > 57< / span > < / div >
< div class = "line" > < a id = "l00058" name = "l00058" > < / a > < span class = "lineno" > 58< / span > < span class = "comment" > /* Implementation from:< / span > < / div >
< div class = "line" > < a id = "l00059" name = "l00059" > < / a > < span class = "lineno" > 59< / span > < span class = "comment" > * https://github.com/JishinMaster/simd_utils/blob/3c1433a86fb38edcc9b02039f3c9a65b16640976/neon_mathfun.h#L357< / span > < / div >
< div class = "line" > < a id = "l00060" name = "l00060" > < / a > < span class = "lineno" > 60< / span > < span class = "comment" > * which originally came from the Cephes math library.< / span > < / div >
< div class = "line" > < a id = "l00061" name = "l00061" > < / a > < span class = "lineno" > 61< / span > < span class = "comment" > */< / span > < / div >
< div class = "line" > < a id = "l00062" name = "l00062" > < / a > < span class = "lineno" > 62< / span > < span class = "keyword" > template< / span > < < span class = "keywordtype" > bool< / span > Sine, < span class = "keyword" > typename< / span > T, < span class = "keywordtype" > int< / span > N> < / div >
< div class = "foldopen" id = "foldopen00063" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00063" name = "l00063" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1core_1_1simd.html#ad78056685c9732c3465c0d8b8ec1bef7" > 63< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#ad78056685c9732c3465c0d8b8ec1bef7" > sincos< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > in) {< / div >
< div class = "line" > < a id = "l00064" name = "l00064" > < / a > < span class = "lineno" > 64< / span > < span class = "keyword" > auto< / span > sign_mask_sin = in < 0;< / div >
< div class = "line" > < a id = "l00065" name = "l00065" > < / a > < span class = "lineno" > 65< / span > in = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a4f3cc8b2493586e83fd65640df3b60ad" > abs< / a > (in);< / div >
< div class = "line" > < a id = "l00066" name = "l00066" > < / a > < span class = "lineno" > 66< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > x = in;< / div >
< div class = "line" > < a id = "l00067" name = "l00067" > < / a > < span class = "lineno" > 67< / span > < / div >
< div class = "line" > < a id = "l00068" name = "l00068" > < / a > < span class = "lineno" > 68< / span > < span class = "comment" > // scale by 4/Pi< / span > < / div >
< div class = "line" > < a id = "l00069" name = "l00069" > < / a > < span class = "lineno" > 69< / span > < span class = "keyword" > auto< / span > y = x * 1.27323954473516f;< / div >
< div class = "line" > < a id = "l00070" name = "l00070" > < / a > < span class = "lineno" > 70< / span > < / div >
< div class = "line" > < a id = "l00071" name = "l00071" > < / a > < span class = "lineno" > 71< / span > < span class = "comment" > // store the integer part of y in mm0< / span > < / div >
< div class = "line" > < a id = "l00072" name = "l00072" > < / a > < span class = "lineno" > 72< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< uint32_t, N> < / a > emm2 = y;< / div >
< div class = "line" > < a id = "l00073" name = "l00073" > < / a > < span class = "lineno" > 73< / span > < / div >
< div class = "line" > < a id = "l00074" name = "l00074" > < / a > < span class = "lineno" > 74< / span > < span class = "comment" > // j=(j+1) & (~1) (see the cephes sources)< / span > < / div >
< div class = "line" > < a id = "l00075" name = "l00075" > < / a > < span class = "lineno" > 75< / span > emm2 = emm2 + 1;< / div >
< div class = "line" > < a id = "l00076" name = "l00076" > < / a > < span class = "lineno" > 76< / span > emm2 = emm2 & ~1;< / div >
< div class = "line" > < a id = "l00077" name = "l00077" > < / a > < span class = "lineno" > 77< / span > < / div >
< div class = "line" > < a id = "l00078" name = "l00078" > < / a > < span class = "lineno" > 78< / span > y = emm2;< / div >
< div class = "line" > < a id = "l00079" name = "l00079" > < / a > < span class = "lineno" > 79< / span > < / div >
< div class = "line" > < a id = "l00080" name = "l00080" > < / a > < span class = "lineno" > 80< / span > < span class = "comment" > // Get the polynom selection mask. There is one polynom for 0 < = x < = Pi/4< / span > < / div >
< div class = "line" > < a id = "l00081" name = "l00081" > < / a > < span class = "lineno" > 81< / span > < span class = "comment" > // and another one for Pi/4< x< =Pi/2. Both branches will be computed.< / span > < / div >
< div class = "line" > < a id = "l00082" name = "l00082" > < / a > < span class = "lineno" > 82< / span > < span class = "keyword" > auto< / span > poly_mask = (emm2 & 2) != 0;< / div >
< div class = "line" > < a id = "l00083" name = "l00083" > < / a > < span class = "lineno" > 83< / span > < / div >
< div class = "line" > < a id = "l00084" name = "l00084" > < / a > < span class = "lineno" > 84< / span > < span class = "comment" > // The magic pass: " Extended precision modular arithmetic" < / span > < / div >
< div class = "line" > < a id = "l00085" name = "l00085" > < / a > < span class = "lineno" > 85< / span > < span class = "comment" > // x = ((x - y * DP1) - y * DP2) - y * DP3< / span > < / div >
< div class = "line" > < a id = "l00086" name = "l00086" > < / a > < span class = "lineno" > 86< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (y, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (-0.78515625f), x);< / div >
< div class = "line" > < a id = "l00087" name = "l00087" > < / a > < span class = "lineno" > 87< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (y, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (-2.4187564849853515625e-4f), x);< / div >
< div class = "line" > < a id = "l00088" name = "l00088" > < / a > < span class = "lineno" > 88< / span > x = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (y, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (-3.77489497744594108e-8f), x);< / div >
< div class = "line" > < a id = "l00089" name = "l00089" > < / a > < span class = "lineno" > 89< / span > < / div >
< div class = "line" > < a id = "l00090" name = "l00090" > < / a > < span class = "lineno" > 90< / span > sign_mask_sin = sign_mask_sin ^ ((emm2 & 4) != 0);< / div >
< div class = "line" > < a id = "l00091" name = "l00091" > < / a > < span class = "lineno" > 91< / span > < span class = "keyword" > auto< / span > sign_mask_cos = ((emm2 - 2) & 4) != 0;< / div >
< div class = "line" > < a id = "l00092" name = "l00092" > < / a > < span class = "lineno" > 92< / span > < / div >
< div class = "line" > < a id = "l00093" name = "l00093" > < / a > < span class = "lineno" > 93< / span > < span class = "comment" > // Evaluate the first polynom (0 < = x < = Pi/4) in y1,< / span > < / div >
< div class = "line" > < a id = "l00094" name = "l00094" > < / a > < span class = "lineno" > 94< / span > < span class = "comment" > // and the second polynom (Pi/4 < = x < = 0) in y2< / span > < / div >
< div class = "line" > < a id = "l00095" name = "l00095" > < / a > < span class = "lineno" > 95< / span > < span class = "keyword" > auto< / span > z = x * x;< / div >
< div class = "line" > < a id = "l00096" name = "l00096" > < / a > < span class = "lineno" > 96< / span > < / div >
< div class = "line" > < a id = "l00097" name = "l00097" > < / a > < span class = "lineno" > 97< / span > < span class = "keyword" > auto< / span > y1 =< / div >
< div class = "line" > < a id = "l00098" name = "l00098" > < / a > < span class = "lineno" > 98< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (z, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (2.443315711809948e-5f), -1.388731625493765e-3f);< / div >
< div class = "line" > < a id = "l00099" name = "l00099" > < / a > < span class = "lineno" > 99< / span > < span class = "keyword" > auto< / span > y2 = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (z, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (-1.9515295891e-4f), 8.3321608736e-3f);< / div >
< div class = "line" > < a id = "l00100" name = "l00100" > < / a > < span class = "lineno" > 100< / span > y1 = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (y1, z, 4.166664568298827e-2f);< / div >
< div class = "line" > < a id = "l00101" name = "l00101" > < / a > < span class = "lineno" > 101< / span > y2 = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (y2, z, -1.6666654611e-1f);< / div >
< div class = "line" > < a id = "l00102" name = "l00102" > < / a > < span class = "lineno" > 102< / span > y1 = y1 * z;< / div >
< div class = "line" > < a id = "l00103" name = "l00103" > < / a > < span class = "lineno" > 103< / span > y2 = y2 * z;< / div >
< div class = "line" > < a id = "l00104" name = "l00104" > < / a > < span class = "lineno" > 104< / span > y1 = y1 * z;< / div >
< div class = "line" > < a id = "l00105" name = "l00105" > < / a > < span class = "lineno" > 105< / span > y2 = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (x, y2, x);< / div >
< div class = "line" > < a id = "l00106" name = "l00106" > < / a > < span class = "lineno" > 106< / span > y1 = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (z, < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (-0.5f), y1);< / div >
< div class = "line" > < a id = "l00107" name = "l00107" > < / a > < span class = "lineno" > 107< / span > y1 = y1 + 1.0f;< / div >
< div class = "line" > < a id = "l00108" name = "l00108" > < / a > < span class = "lineno" > 108< / span > < / div >
< div class = "line" > < a id = "l00109" name = "l00109" > < / a > < span class = "lineno" > 109< / span > < span class = "keywordflow" > if< / span > < span class = "keyword" > constexpr< / span > (Sine) {< / div >
< div class = "line" > < a id = "l00110" name = "l00110" > < / a > < span class = "lineno" > 110< / span > < span class = "keyword" > auto< / span > ys = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (poly_mask, y1, y2);< / div >
< div class = "line" > < a id = "l00111" name = "l00111" > < / a > < span class = "lineno" > 111< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (sign_mask_sin, -ys, ys);< / div >
< div class = "line" > < a id = "l00112" name = "l00112" > < / a > < span class = "lineno" > 112< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00113" name = "l00113" > < / a > < span class = "lineno" > 113< / span > < span class = "keyword" > auto< / span > yc = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (poly_mask, y2, y1);< / div >
< div class = "line" > < a id = "l00114" name = "l00114" > < / a > < span class = "lineno" > 114< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (sign_mask_cos, yc, -yc);< / div >
< div class = "line" > < a id = "l00115" name = "l00115" > < / a > < span class = "lineno" > 115< / span > }< / div >
< div class = "line" > < a id = "l00116" name = "l00116" > < / a > < span class = "lineno" > 116< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00117" name = "l00117" > < / a > < span class = "lineno" > 117< / span > < / div >
< div class = "line" > < a id = "l00118" name = "l00118" > < / a > < span class = "lineno" > 118< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keywordtype" > int< / span > N> < / div >
< div class = "foldopen" id = "foldopen00119" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00119" name = "l00119" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1core_1_1simd.html#ab4d582d72c0a7ee313e19c906e43cef1" > 119< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#ab4d582d72c0a7ee313e19c906e43cef1" > sin< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > x) {< / div >
< div class = "line" > < a id = "l00120" name = "l00120" > < / a > < span class = "lineno" > 120< / span > < span class = "keywordflow" > if< / span > < span class = "keyword" > constexpr< / span > (< a class = "code hl_variable" href = "namespacemlx_1_1core_1_1simd.html#a64e80f096a8baf99ba8d396414473cc7" > is_complex< T> < / a > ) {< / div >
< div class = "line" > < a id = "l00121" name = "l00121" > < / a > < span class = "lineno" > 121< / span > < span class = "keywordflow" > return< / span > std::sin(x.< a class = "code hl_variable" href = "structmlx_1_1core_1_1simd_1_1_simd.html#a36e2b7db5ce6eb4dd456e99a4cd2c2cf" > value< / a > );< / div >
< div class = "line" > < a id = "l00122" name = "l00122" > < / a > < span class = "lineno" > 122< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00123" name = "l00123" > < / a > < span class = "lineno" > 123< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#ad78056685c9732c3465c0d8b8ec1bef7" > sincos< true> < / a > (x);< / div >
< div class = "line" > < a id = "l00124" name = "l00124" > < / a > < span class = "lineno" > 124< / span > }< / div >
< div class = "line" > < a id = "l00125" name = "l00125" > < / a > < span class = "lineno" > 125< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00126" name = "l00126" > < / a > < span class = "lineno" > 126< / span > < / div >
< div class = "line" > < a id = "l00127" name = "l00127" > < / a > < span class = "lineno" > 127< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keywordtype" > int< / span > N> < / div >
< div class = "foldopen" id = "foldopen00128" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00128" name = "l00128" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1core_1_1simd.html#ab179f429e34cd6d5c37050ea7e7c54ad" > 128< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#ab179f429e34cd6d5c37050ea7e7c54ad" > cos< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > x) {< / div >
< div class = "line" > < a id = "l00129" name = "l00129" > < / a > < span class = "lineno" > 129< / span > < span class = "keywordflow" > if< / span > < span class = "keyword" > constexpr< / span > (< a class = "code hl_variable" href = "namespacemlx_1_1core_1_1simd.html#a64e80f096a8baf99ba8d396414473cc7" > is_complex< T> < / a > ) {< / div >
< div class = "line" > < a id = "l00130" name = "l00130" > < / a > < span class = "lineno" > 130< / span > < span class = "keywordflow" > return< / span > std::cos(x.< a class = "code hl_variable" href = "structmlx_1_1core_1_1simd_1_1_simd.html#a36e2b7db5ce6eb4dd456e99a4cd2c2cf" > value< / a > );< / div >
< div class = "line" > < a id = "l00131" name = "l00131" > < / a > < span class = "lineno" > 131< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00132" name = "l00132" > < / a > < span class = "lineno" > 132< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#ad78056685c9732c3465c0d8b8ec1bef7" > sincos< false> < / a > (x);< / div >
< div class = "line" > < a id = "l00133" name = "l00133" > < / a > < span class = "lineno" > 133< / span > }< / div >
< div class = "line" > < a id = "l00134" name = "l00134" > < / a > < span class = "lineno" > 134< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00135" name = "l00135" > < / a > < span class = "lineno" > 135< / span > < / div >
< div class = "line" > < a id = "l00136" name = "l00136" > < / a > < span class = "lineno" > 136< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keywordtype" > int< / span > N> < / div >
< div class = "foldopen" id = "foldopen00137" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00137" name = "l00137" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1core_1_1simd.html#a60e33ebb16d9bab375a64aec8015a5c2" > 137< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a60e33ebb16d9bab375a64aec8015a5c2" > erf< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > x) {< / div >
< div class = "line" > < a id = "l00138" name = "l00138" > < / a > < span class = "lineno" > 138< / span > < span class = "comment" > // https://github.com/pytorch/pytorch/blob/abf28982a8cb43342e7669d859de9543fd804cc9/aten/src/ATen/cpu/vec/vec256/vec256_float.h#L175< / span > < / div >
< div class = "line" > < a id = "l00139" name = "l00139" > < / a > < span class = "lineno" > 139< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > v = x;< / div >
< div class = "line" > < a id = "l00140" name = "l00140" > < / a > < span class = "lineno" > 140< / span > < span class = "keyword" > auto< / span > t = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#ae344abefc91c7d9c0a9506c868a84d61" > recip< / a > (< a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (0.3275911f), < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a4f3cc8b2493586e83fd65640df3b60ad" > abs< / a > (v), 1.0f));< / div >
< div class = "line" > < a id = "l00141" name = "l00141" > < / a > < span class = "lineno" > 141< / span > < span class = "keyword" > auto< / span > r = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > (1.061405429f), t, -1.453152027f);< / div >
< div class = "line" > < a id = "l00142" name = "l00142" > < / a > < span class = "lineno" > 142< / span > r = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (r, t, 1.421413741f);< / div >
< div class = "line" > < a id = "l00143" name = "l00143" > < / a > < span class = "lineno" > 143< / span > r = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (r, t, -0.284496736f);< / div >
< div class = "line" > < a id = "l00144" name = "l00144" > < / a > < span class = "lineno" > 144< / span > r = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (r, t, 0.254829592f);< / div >
< div class = "line" > < a id = "l00145" name = "l00145" > < / a > < span class = "lineno" > 145< / span > < span class = "keyword" > auto< / span > e = -< a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a835d71dd0bb2f9494a397d9939696ec2" > exp< / a > (-v * v);< / div >
< div class = "line" > < a id = "l00146" name = "l00146" > < / a > < span class = "lineno" > 146< / span > < span class = "keyword" > auto< / span > result = < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > (< a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (e * t, r, 1.0f));< / div >
< div class = "line" > < a id = "l00147" name = "l00147" > < / a > < span class = "lineno" > 147< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (x > 0, result, -result);< / div >
< div class = "line" > < a id = "l00148" name = "l00148" > < / a > < span class = "lineno" > 148< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00149" name = "l00149" > < / a > < span class = "lineno" > 149< / span > < / div >
< div class = "line" > < a id = "l00150" name = "l00150" > < / a > < span class = "lineno" > 150< / span > < span class = "keyword" > template< / span > < < span class = "keyword" > typename< / span > T, < span class = "keywordtype" > int< / span > N> < / div >
< div class = "foldopen" id = "foldopen00151" data-start = "{" data-end = "}" >
< div class = "line" > < a id = "l00151" name = "l00151" > < / a > < span class = "lineno" > < a class = "line" href = "namespacemlx_1_1core_1_1simd.html#a7687f3d14077b51fb421f0efb5b626db" > 151< / a > < / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a7687f3d14077b51fb421f0efb5b626db" > erfinv< / a > (< a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< T, N> < / a > a_) {< / div >
< div class = "line" > < a id = "l00152" name = "l00152" > < / a > < span class = "lineno" > 152< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > a = a_;< / div >
< div class = "line" > < a id = "l00153" name = "l00153" > < / a > < span class = "lineno" > 153< / span > < span class = "keyword" > auto< / span > t = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (a, 0.0f - a, 1.0f);< / div >
< div class = "line" > < a id = "l00154" name = "l00154" > < / a > < span class = "lineno" > 154< / span > t = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a8cec82f4fb15bfd31d7554c6c09ceed4" > log< / a > (t);< / div >
< div class = "line" > < a id = "l00155" name = "l00155" > < / a > < span class = "lineno" > 155< / span > < span class = "keyword" > auto< / span > lhs = [](< span class = "keyword" > auto< / span > t) {< / div >
< div class = "line" > < a id = "l00156" name = "l00156" > < / a > < span class = "lineno" > 156< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > p;< / div >
< div class = "line" > < a id = "l00157" name = "l00157" > < / a > < span class = "lineno" > 157< / span > p = 3.03697567e-10f; < span class = "comment" > // 0x1.4deb44p-32< / span > < / div >
< div class = "line" > < a id = "l00158" name = "l00158" > < / a > < span class = "lineno" > 158< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 2.93243101e-8f); < span class = "comment" > // 0x1.f7c9aep-26< / span > < / div >
< div class = "line" > < a id = "l00159" name = "l00159" > < / a > < span class = "lineno" > 159< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 1.22150334e-6f); < span class = "comment" > // 0x1.47e512p-20< / span > < / div >
< div class = "line" > < a id = "l00160" name = "l00160" > < / a > < span class = "lineno" > 160< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 2.84108955e-5f); < span class = "comment" > // 0x1.dca7dep-16< / span > < / div >
< div class = "line" > < a id = "l00161" name = "l00161" > < / a > < span class = "lineno" > 161< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 3.93552968e-4f); < span class = "comment" > // 0x1.9cab92p-12< / span > < / div >
< div class = "line" > < a id = "l00162" name = "l00162" > < / a > < span class = "lineno" > 162< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 3.02698812e-3f); < span class = "comment" > // 0x1.8cc0dep-9< / span > < / div >
< div class = "line" > < a id = "l00163" name = "l00163" > < / a > < span class = "lineno" > 163< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 4.83185798e-3f); < span class = "comment" > // 0x1.3ca920p-8< / span > < / div >
< div class = "line" > < a id = "l00164" name = "l00164" > < / a > < span class = "lineno" > 164< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, -2.64646143e-1f); < span class = "comment" > // -0x1.0eff66p-2< / span > < / div >
< div class = "line" > < a id = "l00165" name = "l00165" > < / a > < span class = "lineno" > 165< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 8.40016484e-1f); < span class = "comment" > // 0x1.ae16a4p-1< / span > < / div >
< div class = "line" > < a id = "l00166" name = "l00166" > < / a > < span class = "lineno" > 166< / span > };< / div >
< div class = "line" > < a id = "l00167" name = "l00167" > < / a > < span class = "lineno" > 167< / span > < span class = "keyword" > auto< / span > rhs = [](< span class = "keyword" > auto< / span > t) {< / div >
< div class = "line" > < a id = "l00168" name = "l00168" > < / a > < span class = "lineno" > 168< / span > < a class = "code hl_struct" href = "structmlx_1_1core_1_1simd_1_1_simd.html" > Simd< float, N> < / a > p;< / div >
< div class = "line" > < a id = "l00169" name = "l00169" > < / a > < span class = "lineno" > 169< / span > p = 5.43877832e-9f; < span class = "comment" > // 0x1.75c000p-28< / span > < / div >
< div class = "line" > < a id = "l00170" name = "l00170" > < / a > < span class = "lineno" > 170< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 1.43285448e-7f); < span class = "comment" > // 0x1.33b402p-23< / span > < / div >
< div class = "line" > < a id = "l00171" name = "l00171" > < / a > < span class = "lineno" > 171< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 1.22774793e-6f); < span class = "comment" > // 0x1.499232p-20< / span > < / div >
< div class = "line" > < a id = "l00172" name = "l00172" > < / a > < span class = "lineno" > 172< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 1.12963626e-7f); < span class = "comment" > // 0x1.e52cd2p-24< / span > < / div >
< div class = "line" > < a id = "l00173" name = "l00173" > < / a > < span class = "lineno" > 173< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, -5.61530760e-5f); < span class = "comment" > // -0x1.d70bd0p-15< / span > < / div >
< div class = "line" > < a id = "l00174" name = "l00174" > < / a > < span class = "lineno" > 174< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, -1.47697632e-4f); < span class = "comment" > // -0x1.35be90p-13< / span > < / div >
< div class = "line" > < a id = "l00175" name = "l00175" > < / a > < span class = "lineno" > 175< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 2.31468678e-3f); < span class = "comment" > // 0x1.2f6400p-9< / span > < / div >
< div class = "line" > < a id = "l00176" name = "l00176" > < / a > < span class = "lineno" > 176< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 1.15392581e-2f); < span class = "comment" > // 0x1.7a1e50p-7< / span > < / div >
< div class = "line" > < a id = "l00177" name = "l00177" > < / a > < span class = "lineno" > 177< / span > p = < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, -2.32015476e-1f); < span class = "comment" > // -0x1.db2aeep-3< / span > < / div >
< div class = "line" > < a id = "l00178" name = "l00178" > < / a > < span class = "lineno" > 178< / span > < span class = "keywordflow" > return< / span > < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > fma< / a > (p, t, 8.86226892e-1f); < span class = "comment" > // 0x1.c5bf88p-1< / span > < / div >
< div class = "line" > < a id = "l00179" name = "l00179" > < / a > < span class = "lineno" > 179< / span > };< / div >
< div class = "line" > < a id = "l00180" name = "l00180" > < / a > < span class = "lineno" > 180< / span > < span class = "keyword" > auto< / span > thresh = 6.125f;< / div >
< div class = "line" > < a id = "l00181" name = "l00181" > < / a > < span class = "lineno" > 181< / span > < span class = "comment" > // Compute both branches and select if N > 1< / span > < / div >
< div class = "line" > < a id = "l00182" name = "l00182" > < / a > < span class = "lineno" > 182< / span > < span class = "keywordflow" > if< / span > < span class = "keyword" > constexpr< / span > (< a class = "code hl_variable" href = "namespacemlx_1_1core_1_1simd.html#a12b1553495a0c99d52472bd2a6626ddb" > N< / a > == 1) {< / div >
< div class = "line" > < a id = "l00183" name = "l00183" > < / a > < span class = "lineno" > 183< / span > < span class = "keywordflow" > if< / span > ((< a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#a4f3cc8b2493586e83fd65640df3b60ad" > abs< / a > (t) > thresh).value) { < span class = "comment" > // maximum ulp error = 2.35793< / span > < / div >
< div class = "line" > < a id = "l00184" name = "l00184" > < / a > < span class = "lineno" > 184< / span > < span class = "keywordflow" > return< / span > a * lhs(t);< / div >
< div class = "line" > < a id = "l00185" name = "l00185" > < / a > < span class = "lineno" > 185< / span > } < span class = "keywordflow" > else< / span > { < span class = "comment" > // maximum ulp error = 2.35002< / span > < / div >
< div class = "line" > < a id = "l00186" name = "l00186" > < / a > < span class = "lineno" > 186< / span > < span class = "keywordflow" > return< / span > a * rhs(t);< / div >
< div class = "line" > < a id = "l00187" name = "l00187" > < / a > < span class = "lineno" > 187< / span > }< / div >
< div class = "line" > < a id = "l00188" name = "l00188" > < / a > < span class = "lineno" > 188< / span > } < span class = "keywordflow" > else< / span > {< / div >
< div class = "line" > < a id = "l00189" name = "l00189" > < / a > < span class = "lineno" > 189< / span > < span class = "keywordflow" > return< / span > a * < a class = "code hl_function" href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > select< / a > (t > thresh, lhs(t), rhs(t));< / div >
< div class = "line" > < a id = "l00190" name = "l00190" > < / a > < span class = "lineno" > 190< / span > }< / div >
< div class = "line" > < a id = "l00191" name = "l00191" > < / a > < span class = "lineno" > 191< / span > }< / div >
< / div >
< div class = "line" > < a id = "l00192" name = "l00192" > < / a > < span class = "lineno" > 192< / span > < / div >
< div class = "line" > < a id = "l00193" name = "l00193" > < / a > < span class = "lineno" > 193< / span > } < span class = "comment" > // namespace mlx::core::simd< / span > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html" > mlx::core::simd< / a > < / div > < div class = "ttdef" > < b > Definition< / b > accelerate_fp16_simd.h:9< / div > < / div >
2025-02-15 05:44:39 +08:00
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a05f4422a037c3bef343fb11f71363b65" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a05f4422a037c3bef343fb11f71363b65" > mlx::core::simd::isnan< / a > < / div > < div class = "ttdeci" > Simd< bool, N > isnan(Simd< T, N > v)< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:146< / div > < / div >
2025-02-07 04:16:29 +08:00
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a12b1553495a0c99d52472bd2a6626ddb" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a12b1553495a0c99d52472bd2a6626ddb" > mlx::core::simd::N< / a > < / div > < div class = "ttdeci" > constexpr int N< / div > < div class = "ttdef" > < b > Definition< / b > neon_fp16_simd.h:9< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a4f3cc8b2493586e83fd65640df3b60ad" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a4f3cc8b2493586e83fd65640df3b60ad" > mlx::core::simd::abs< / a > < / div > < div class = "ttdeci" > Simd< T, N > abs(Simd< T, N > v)< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:112< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a60e33ebb16d9bab375a64aec8015a5c2" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a60e33ebb16d9bab375a64aec8015a5c2" > mlx::core::simd::erf< / a > < / div > < div class = "ttdeci" > Simd< T, N > erf(Simd< T, N > x)< / div > < div class = "ttdef" > < b > Definition< / b > math.h:137< / div > < / div >
2025-02-15 05:44:39 +08:00
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a64e80f096a8baf99ba8d396414473cc7" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a64e80f096a8baf99ba8d396414473cc7" > mlx::core::simd::is_complex< / a > < / div > < div class = "ttdeci" > constexpr bool is_complex< / div > < div class = "ttdef" > < b > Definition< / b > base_simd.h:43< / div > < / div >
2025-02-07 04:16:29 +08:00
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a7687f3d14077b51fb421f0efb5b626db" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a7687f3d14077b51fb421f0efb5b626db" > mlx::core::simd::erfinv< / a > < / div > < div class = "ttdeci" > Simd< T, N > erfinv(Simd< T, N > a_)< / div > < div class = "ttdef" > < b > Definition< / b > math.h:151< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a771b6597803beb800ff5e7560c41e341" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a771b6597803beb800ff5e7560c41e341" > mlx::core::simd::inf< / a > < / div > < div class = "ttdeci" > constexpr float inf< / div > < div class = "ttdef" > < b > Definition< / b > math.h:9< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a835d71dd0bb2f9494a397d9939696ec2" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a835d71dd0bb2f9494a397d9939696ec2" > mlx::core::simd::exp< / a > < / div > < div class = "ttdeci" > Simd< T, N > exp(Simd< T, N > in)< / div > < div class = "ttdoc" > Compute exp(x) in an optimizer friendly way as follows:< / div > < div class = "ttdef" > < b > Definition< / b > math.h:28< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a8cec82f4fb15bfd31d7554c6c09ceed4" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a8cec82f4fb15bfd31d7554c6c09ceed4" > mlx::core::simd::log< / a > < / div > < div class = "ttdeci" > Simd< float16_t, N > log(Simd< float16_t, N > v)< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_fp16_simd.h:37< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a8e22c484298d9af10b6604c835e52052" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a8e22c484298d9af10b6604c835e52052" > mlx::core::simd::floor< / a > < / div > < div class = "ttdeci" > Simd< T, N > floor(Simd< T, N > v)< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:113< / div > < / div >
2025-02-15 05:44:39 +08:00
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_a9ddc7f119cc1dc04372ec1adcaf55f70" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#a9ddc7f119cc1dc04372ec1adcaf55f70" > mlx::core::simd::fma< / a > < / div > < div class = "ttdeci" > Simd< T, N > fma(Simd< T, N > x, Simd< T, N > y, U z)< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:269< / div > < / div >
2025-02-07 04:16:29 +08:00
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_ab179f429e34cd6d5c37050ea7e7c54ad" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#ab179f429e34cd6d5c37050ea7e7c54ad" > mlx::core::simd::cos< / a > < / div > < div class = "ttdeci" > Simd< T, N > cos(Simd< T, N > x)< / div > < div class = "ttdef" > < b > Definition< / b > math.h:128< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_ab4d582d72c0a7ee313e19c906e43cef1" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#ab4d582d72c0a7ee313e19c906e43cef1" > mlx::core::simd::sin< / a > < / div > < div class = "ttdeci" > Simd< T, N > sin(Simd< T, N > x)< / div > < div class = "ttdef" > < b > Definition< / b > math.h:119< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_ad78056685c9732c3465c0d8b8ec1bef7" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#ad78056685c9732c3465c0d8b8ec1bef7" > mlx::core::simd::sincos< / a > < / div > < div class = "ttdeci" > Simd< T, N > sincos(Simd< T, N > in)< / div > < div class = "ttdef" > < b > Definition< / b > math.h:63< / div > < / div >
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_ae344abefc91c7d9c0a9506c868a84d61" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#ae344abefc91c7d9c0a9506c868a84d61" > mlx::core::simd::recip< / a > < / div > < div class = "ttdeci" > Simd< T, N > recip(Simd< T, N > v)< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:131< / div > < / div >
2025-02-15 05:44:39 +08:00
< div class = "ttc" id = "anamespacemlx_1_1core_1_1simd_html_afb3bcbd8d8b34128cd0c8eb677a170ef" > < div class = "ttname" > < a href = "namespacemlx_1_1core_1_1simd.html#afb3bcbd8d8b34128cd0c8eb677a170ef" > mlx::core::simd::select< / a > < / div > < div class = "ttdeci" > Simd< T1, N > select(Simd< MaskT, N > mask, Simd< T1, N > x, Simd< T2, N > y)< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:236< / div > < / div >
2025-02-07 04:16:29 +08:00
< div class = "ttc" id = "astructmlx_1_1core_1_1simd_1_1_simd_html" > < div class = "ttname" > < a href = "structmlx_1_1core_1_1simd_1_1_simd.html" > mlx::core::simd::Simd< / a > < / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:55< / div > < / div >
< div class = "ttc" id = "astructmlx_1_1core_1_1simd_1_1_simd_html_a36e2b7db5ce6eb4dd456e99a4cd2c2cf" > < div class = "ttname" > < a href = "structmlx_1_1core_1_1simd_1_1_simd.html#a36e2b7db5ce6eb4dd456e99a4cd2c2cf" > mlx::core::simd::Simd::value< / a > < / div > < div class = "ttdeci" > asd::Vector< scalar_t, N > ::packed_t value< / div > < div class = "ttdef" > < b > Definition< / b > accelerate_simd.h:80< / div > < / div >
< div class = "ttc" id = "atype_8h_html" > < div class = "ttname" > < a href = "type_8h.html" > type.h< / a > < / div > < / div >
< / div > <!-- fragment --> < / div > <!-- contents -->
< / div > <!-- doc - content -->
<!-- start footer part -->
< div id = "nav-path" class = "navpath" > <!-- id is needed for treeview function! -->
< ul >
< li class = "navelem" > < a class = "el" href = "dir_938ab0ecf10b8b860ff766c820f665fd.html" > mlx< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_1d446c9bd3c99228254c9484e0bc5c06.html" > backend< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_48c8bf40aae7e42226b4fe31ea48af19.html" > cpu< / a > < / li > < li class = "navelem" > < a class = "el" href = "dir_777905fddc177f731a39846ae16b0314.html" > simd< / a > < / li > < li class = "navelem" > < a class = "el" href = "math_8h.html" > math.h< / a > < / li >
< li class = "footer" > Generated by < a href = "https://www.doxygen.org/index.html" > < img class = "footer" src = "doxygen.svg" width = "104" height = "31" alt = "doxygen" / > < / a > 1.13.2 < / li >
< / ul >
< / div >
< / body >
< / html >