41METAL_FUNC 
void radix3(thread float2* x, thread float2* y) {
 
   42  float pi_2_3 = -0.8660254037844387;
 
   44  float2 a_1 = x[1] + x[2];
 
   45  float2 a_2 = x[1] - x[2];
 
   48  float2 b_1 = x[0] - 0.5 * a_1;
 
   49  float2 b_2 = pi_2_3 * a_2;
 
   51  float2 b_2_j = {-b_2.y, b_2.x};
 
 
   56METAL_FUNC 
void radix4(thread float2* x, thread float2* y) {
 
   57  float2 z_0 = x[0] + x[2];
 
   58  float2 z_1 = x[0] - x[2];
 
   59  float2 z_2 = x[1] + x[3];
 
   60  float2 z_3 = x[1] - x[3];
 
   61  float2 z_3_i = {z_3.y, -z_3.x};
 
 
   69METAL_FUNC 
void radix5(thread float2* x, thread float2* y) {
 
   70  float2 root_5_4 = 0.5590169943749475;
 
   71  float2 sin_2pi_5 = 0.9510565162951535;
 
   72  float2 sin_1pi_5 = 0.5877852522924731;
 
   74  float2 a_1 = x[1] + x[4];
 
   75  float2 a_2 = x[2] + x[3];
 
   76  float2 a_3 = x[1] - x[4];
 
   77  float2 a_4 = x[2] - x[3];
 
   79  float2 a_5 = a_1 + a_2;
 
   80  float2 a_6 = root_5_4 * (a_1 - a_2);
 
   81  float2 a_7 = x[0] - a_5 / 4;
 
   82  float2 a_8 = a_7 + a_6;
 
   83  float2 a_9 = a_7 - a_6;
 
   84  float2 a_10 = sin_2pi_5 * a_3 + sin_1pi_5 * a_4;
 
   85  float2 a_11 = sin_1pi_5 * a_3 - sin_2pi_5 * a_4;
 
   86  float2 a_10_j = {a_10.y, -a_10.x};
 
   87  float2 a_11_j = {a_11.y, -a_11.x};
 
 
   96METAL_FUNC 
void radix6(thread float2* x, thread float2* y) {
 
   97  float sin_pi_3 = 0.8660254037844387;
 
   98  float2 a_1 = x[2] + x[4];
 
   99  float2 a_2 = x[0] - a_1 / 2;
 
  100  float2 a_3 = sin_pi_3 * (x[2] - x[4]);
 
  101  float2 a_4 = x[5] + x[1];
 
  102  float2 a_5 = x[3] - a_4 / 2;
 
  103  float2 a_6 = sin_pi_3 * (x[5] - x[1]);
 
  104  float2 a_7 = x[0] + a_1;
 
  106  float2 a_3_i = {a_3.y, -a_3.x};
 
  107  float2 a_6_i = {a_6.y, -a_6.x};
 
  108  float2 a_8 = a_2 + a_3_i;
 
  109  float2 a_9 = a_2 - a_3_i;
 
  110  float2 a_10 = x[3] + a_4;
 
  111  float2 a_11 = a_5 + a_6_i;
 
  112  float2 a_12 = a_5 - a_6_i;
 
 
  122METAL_FUNC 
void radix7(thread float2* x, thread float2* y) {
 
  124  float2 inv = {1 / 6.0, -1 / 6.0};
 
  127  float2 in1[6] = {x[1], x[3], x[2], x[6], x[4], x[5]};
 
  143  y[1] = x[1] * inv + x[0];
 
  144  y[5] = x[2] * inv + x[0];
 
  145  y[4] = x[3] * inv + x[0];
 
  146  y[6] = x[4] * inv + x[0];
 
  147  y[2] = x[5] * inv + x[0];
 
  148  y[3] = x[6] * inv + x[0];
 
 
  151METAL_FUNC 
void radix8(thread float2* x, thread float2* y) {
 
  152  float cos_pi_4 = 0.7071067811865476;
 
  153  float2 w_0 = {cos_pi_4, -cos_pi_4};
 
  154  float2 w_1 = {-cos_pi_4, -cos_pi_4};
 
  155  float2 temp[8] = {x[0], x[2], x[4], x[6], x[1], x[3], x[5], x[7]};
 
  164  float2 x_6 = {x[6].y, -x[6].x};
 
 
  173METAL_FUNC 
void radix10(thread float2* x, thread float2* y) {
 
  175  w[0] = {0.8090169943749475, -0.5877852522924731};
 
  176  w[1] = {0.30901699437494745, -0.9510565162951535};
 
  177  w[2] = {-w[1].x, w[1].y};
 
  178  w[3] = {-w[0].x, w[0].y};
 
  182        x[0], x[3], x[4], x[8], x[2], x[1], x[7], x[9], x[6], x[5]};
 
  187        x[0], x[2], x[4], x[6], x[8], x[1], x[3], x[5], x[7], x[9]};
 
  194  for (
int t = 1; t < 5; t++) {
 
 
  201METAL_FUNC 
void radix11(thread float2* x, thread float2* y) {
 
  203  float2 inv = {1 / 10.0, -1 / 10.0};
 
  225  y[1] = x[1] * inv + x[0];
 
  226  y[6] = x[2] * inv + x[0];
 
  227  y[3] = x[3] * inv + x[0];
 
  228  y[7] = x[4] * inv + x[0];
 
  229  y[9] = x[5] * inv + x[0];
 
  230  y[10] = x[6] * inv + x[0];
 
  231  y[5] = x[7] * inv + x[0];
 
  232  y[8] = x[8] * inv + x[0];
 
  233  y[4] = x[9] * inv + x[0];
 
  234  y[2] = x[10] * inv + x[0];
 
 
  238METAL_FUNC 
void radix12(thread float2* x, thread float2* y) {
 
  240  float sin_pi_3 = 0.8660254037844387;
 
  241  w[0] = {sin_pi_3, -0.5};
 
  242  w[1] = {0.5, -sin_pi_3};
 
  244  w[3] = {-0.5, -sin_pi_3};
 
  245  w[4] = {-sin_pi_3, -0.5};
 
  283  for (
int t = 1; t < 6; t++) {
 
 
  290METAL_FUNC 
void radix13(thread float2* x, thread float2* y) {
 
  292  float2 inv = {1 / 12.0, -1 / 12.0};
 
  316  y[1] = x[1] * inv + x[0];
 
  317  y[7] = x[2] * inv + x[0];
 
  318  y[10] = x[3] * inv + x[0];
 
  319  y[5] = x[4] * inv + x[0];
 
  320  y[9] = x[5] * inv + x[0];
 
  321  y[11] = x[6] * inv + x[0];
 
  322  y[12] = x[7] * inv + x[0];
 
  323  y[6] = x[8] * inv + x[0];
 
  324  y[3] = x[9] * inv + x[0];
 
  325  y[8] = x[10] * inv + x[0];
 
  326  y[4] = x[11] * inv + x[0];
 
  327  y[2] = x[12] * inv + x[0];