9constexpr float inf = std::numeric_limits<float>::infinity();
27template <
typename T,
int N>
33 auto x = x_init * 1.442695f;
35 ipart =
floor(x + 0.5);
38 x = 1.535336188319500e-4f;
39 x =
fma(x, fpart, 1.339887440266574e-3f);
40 x =
fma(x, fpart, 9.618437357674640e-3f);
41 x =
fma(x, fpart, 5.550332471162809e-2f);
42 x =
fma(x, fpart, 2.402264791363012e-1f);
43 x =
fma(x, fpart, 6.931472028550421e-1f);
44 x =
fma(x, fpart, 1.000000000000000f);
62template <
bool Sine,
typename T,
int N>
64 auto sign_mask_sin = in < 0;
69 auto y = x * 1.27323954473516f;
82 auto poly_mask = (emm2 & 2) != 0;
90 sign_mask_sin = sign_mask_sin ^ ((emm2 & 4) != 0);
91 auto sign_mask_cos = ((emm2 - 2) & 4) != 0;
100 y1 =
fma(y1, z, 4.166664568298827e-2f);
101 y2 =
fma(y2, z, -1.6666654611e-1f);
109 if constexpr (Sine) {
110 auto ys =
select(poly_mask, y1, y2);
111 return select(sign_mask_sin, -ys, ys);
113 auto yc =
select(poly_mask, y2, y1);
114 return select(sign_mask_cos, yc, -yc);
118template <
typename T,
int N>
121 return std::sin(x.
value);
127template <
typename T,
int N>
130 return std::cos(x.
value);
136template <
typename T,
int N>
142 r =
fma(r, t, 1.421413741f);
143 r =
fma(r, t, -0.284496736f);
144 r =
fma(r, t, 0.254829592f);
145 auto e = -
exp(-v * v);
147 return select(x > 0, result, -result);
150template <
typename T,
int N>
153 auto t =
fma(a, 0.0f - a, 1.0f);
155 auto lhs = [](
auto t) {
158 p =
fma(p, t, 2.93243101e-8f);
159 p =
fma(p, t, 1.22150334e-6f);
160 p =
fma(p, t, 2.84108955e-5f);
161 p =
fma(p, t, 3.93552968e-4f);
162 p =
fma(p, t, 3.02698812e-3f);
163 p =
fma(p, t, 4.83185798e-3f);
164 p =
fma(p, t, -2.64646143e-1f);
165 return fma(p, t, 8.40016484e-1f);
167 auto rhs = [](
auto t) {
170 p =
fma(p, t, 1.43285448e-7f);
171 p =
fma(p, t, 1.22774793e-6f);
172 p =
fma(p, t, 1.12963626e-7f);
173 p =
fma(p, t, -5.61530760e-5f);
174 p =
fma(p, t, -1.47697632e-4f);
175 p =
fma(p, t, 2.31468678e-3f);
176 p =
fma(p, t, 1.15392581e-2f);
177 p =
fma(p, t, -2.32015476e-1f);
178 return fma(p, t, 8.86226892e-1f);
180 auto thresh = 6.125f;
182 if constexpr (
N == 1) {
183 if ((
abs(t) > thresh).value) {
189 return a *
select(t > thresh, lhs(t), rhs(t));
Definition accelerate_fp16_simd.h:9
Simd< bool, N > isnan(Simd< T, N > v)
Definition accelerate_simd.h:146
constexpr int N
Definition neon_fp16_simd.h:9
Simd< T, N > abs(Simd< T, N > v)
Definition accelerate_simd.h:112
Simd< T, N > erf(Simd< T, N > x)
Definition math.h:137
constexpr bool is_complex
Definition base_simd.h:43
Simd< T, N > erfinv(Simd< T, N > a_)
Definition math.h:151
constexpr float inf
Definition math.h:9
Simd< T, N > exp(Simd< T, N > in)
Compute exp(x) in an optimizer friendly way as follows:
Definition math.h:28
Simd< float16_t, N > log(Simd< float16_t, N > v)
Definition accelerate_fp16_simd.h:37
Simd< T, N > floor(Simd< T, N > v)
Definition accelerate_simd.h:113
Simd< T, N > fma(Simd< T, N > x, Simd< T, N > y, U z)
Definition accelerate_simd.h:269
Simd< T, N > cos(Simd< T, N > x)
Definition math.h:128
Simd< T, N > sin(Simd< T, N > x)
Definition math.h:119
Simd< T, N > sincos(Simd< T, N > in)
Definition math.h:63
Simd< T, N > recip(Simd< T, N > v)
Definition accelerate_simd.h:131
Simd< T1, N > select(Simd< MaskT, N > mask, Simd< T1, N > x, Simd< T2, N > y)
Definition accelerate_simd.h:236
Definition accelerate_simd.h:55
asd::Vector< scalar_t, N >::packed_t value
Definition accelerate_simd.h:80