41METAL_FUNC
void radix3(thread float2* x, thread float2* y) {
42 float pi_2_3 = -0.8660254037844387;
44 float2 a_1 = x[1] + x[2];
45 float2 a_2 = x[1] - x[2];
48 float2 b_1 = x[0] - 0.5 * a_1;
49 float2 b_2 = pi_2_3 * a_2;
51 float2 b_2_j = {-b_2.y, b_2.x};
56METAL_FUNC
void radix4(thread float2* x, thread float2* y) {
57 float2 z_0 = x[0] + x[2];
58 float2 z_1 = x[0] - x[2];
59 float2 z_2 = x[1] + x[3];
60 float2 z_3 = x[1] - x[3];
61 float2 z_3_i = {z_3.y, -z_3.x};
69METAL_FUNC
void radix5(thread float2* x, thread float2* y) {
70 float2 root_5_4 = 0.5590169943749475;
71 float2 sin_2pi_5 = 0.9510565162951535;
72 float2 sin_1pi_5 = 0.5877852522924731;
74 float2 a_1 = x[1] + x[4];
75 float2 a_2 = x[2] + x[3];
76 float2 a_3 = x[1] - x[4];
77 float2 a_4 = x[2] - x[3];
79 float2 a_5 = a_1 + a_2;
80 float2 a_6 = root_5_4 * (a_1 - a_2);
81 float2 a_7 = x[0] - a_5 / 4;
82 float2 a_8 = a_7 + a_6;
83 float2 a_9 = a_7 - a_6;
84 float2 a_10 = sin_2pi_5 * a_3 + sin_1pi_5 * a_4;
85 float2 a_11 = sin_1pi_5 * a_3 - sin_2pi_5 * a_4;
86 float2 a_10_j = {a_10.y, -a_10.x};
87 float2 a_11_j = {a_11.y, -a_11.x};
96METAL_FUNC
void radix6(thread float2* x, thread float2* y) {
97 float sin_pi_3 = 0.8660254037844387;
98 float2 a_1 = x[2] + x[4];
99 float2 a_2 = x[0] - a_1 / 2;
100 float2 a_3 = sin_pi_3 * (x[2] - x[4]);
101 float2 a_4 = x[5] + x[1];
102 float2 a_5 = x[3] - a_4 / 2;
103 float2 a_6 = sin_pi_3 * (x[5] - x[1]);
104 float2 a_7 = x[0] + a_1;
106 float2 a_3_i = {a_3.y, -a_3.x};
107 float2 a_6_i = {a_6.y, -a_6.x};
108 float2 a_8 = a_2 + a_3_i;
109 float2 a_9 = a_2 - a_3_i;
110 float2 a_10 = x[3] + a_4;
111 float2 a_11 = a_5 + a_6_i;
112 float2 a_12 = a_5 - a_6_i;
122METAL_FUNC
void radix7(thread float2* x, thread float2* y) {
124 float2 inv = {1 / 6.0, -1 / 6.0};
127 float2 in1[6] = {x[1], x[3], x[2], x[6], x[4], x[5]};
143 y[1] = x[1] * inv + x[0];
144 y[5] = x[2] * inv + x[0];
145 y[4] = x[3] * inv + x[0];
146 y[6] = x[4] * inv + x[0];
147 y[2] = x[5] * inv + x[0];
148 y[3] = x[6] * inv + x[0];
151METAL_FUNC
void radix8(thread float2* x, thread float2* y) {
152 float cos_pi_4 = 0.7071067811865476;
153 float2 w_0 = {cos_pi_4, -cos_pi_4};
154 float2 w_1 = {-cos_pi_4, -cos_pi_4};
155 float2 temp[8] = {x[0], x[2], x[4], x[6], x[1], x[3], x[5], x[7]};
164 float2 x_6 = {x[6].y, -x[6].x};
173METAL_FUNC
void radix10(thread float2* x, thread float2* y) {
175 w[0] = {0.8090169943749475, -0.5877852522924731};
176 w[1] = {0.30901699437494745, -0.9510565162951535};
177 w[2] = {-w[1].x, w[1].y};
178 w[3] = {-w[0].x, w[0].y};
182 x[0], x[3], x[4], x[8], x[2], x[1], x[7], x[9], x[6], x[5]};
187 x[0], x[2], x[4], x[6], x[8], x[1], x[3], x[5], x[7], x[9]};
194 for (
int t = 1; t < 5; t++) {
201METAL_FUNC
void radix11(thread float2* x, thread float2* y) {
203 float2 inv = {1 / 10.0, -1 / 10.0};
206 radix10<true>(x + 1, y + 1);
223 radix10<false>(y + 1, x + 1);
225 y[1] = x[1] * inv + x[0];
226 y[6] = x[2] * inv + x[0];
227 y[3] = x[3] * inv + x[0];
228 y[7] = x[4] * inv + x[0];
229 y[9] = x[5] * inv + x[0];
230 y[10] = x[6] * inv + x[0];
231 y[5] = x[7] * inv + x[0];
232 y[8] = x[8] * inv + x[0];
233 y[4] = x[9] * inv + x[0];
234 y[2] = x[10] * inv + x[0];
238METAL_FUNC
void radix12(thread float2* x, thread float2* y) {
240 float sin_pi_3 = 0.8660254037844387;
241 w[0] = {sin_pi_3, -0.5};
242 w[1] = {0.5, -sin_pi_3};
244 w[3] = {-0.5, -sin_pi_3};
245 w[4] = {-sin_pi_3, -0.5};
283 for (
int t = 1; t < 6; t++) {
290METAL_FUNC
void radix13(thread float2* x, thread float2* y) {
292 float2 inv = {1 / 12.0, -1 / 12.0};
295 radix12<true>(x + 1, y + 1);
314 radix12<false>(y + 1, x + 1);
316 y[1] = x[1] * inv + x[0];
317 y[7] = x[2] * inv + x[0];
318 y[10] = x[3] * inv + x[0];
319 y[5] = x[4] * inv + x[0];
320 y[9] = x[5] * inv + x[0];
321 y[11] = x[6] * inv + x[0];
322 y[12] = x[7] * inv + x[0];
323 y[6] = x[8] * inv + x[0];
324 y[3] = x[9] * inv + x[0];
325 y[8] = x[10] * inv + x[0];
326 y[4] = x[11] * inv + x[0];
327 y[2] = x[12] * inv + x[0];