837 std::vector<fctdata> fact;
839 void add_factor(
size_t factor)
840 { fact.push_back({factor,
nullptr,
nullptr}); }
842template<
bool fwd,
typename T>
void pass2 (
size_t ido,
size_t l1,
846 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
847 {
return ch[a+ido*(b+l1*c)]; };
848 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
849 {
return cc[a+ido*(b+2*c)]; };
850 auto WA = [wa, ido](
size_t x,
size_t i)
851 {
return wa[i-1+x*(ido-1)]; };
854 for (
size_t k=0; k<l1; ++k)
856 CH(0,k,0) = CC(0,0,k)+CC(0,1,k);
857 CH(0,k,1) = CC(0,0,k)-CC(0,1,k);
860 for (
size_t k=0; k<l1; ++k)
862 CH(0,k,0) = CC(0,0,k)+CC(0,1,k);
863 CH(0,k,1) = CC(0,0,k)-CC(0,1,k);
864 for (
size_t i=1; i<ido; ++i)
866 CH(i,k,0) = CC(i,0,k)+CC(i,1,k);
872#define POCKETFFT_PREP3(idx) \
873 T t0 = CC(idx,0,k), t1, t2; \
874 PM (t1,t2,CC(idx,1,k),CC(idx,2,k)); \
876#define POCKETFFT_PARTSTEP3a(u1,u2,twr,twi) \
879 T cb{-t2.i*twi, t2.r*twi}; \
880 PM(CH(0,k,u1),CH(0,k,u2),ca,cb) ;\
882#define POCKETFFT_PARTSTEP3b(u1,u2,twr,twi) \
885 T cb{-t2.i*twi, t2.r*twi}; \
886 special_mul<fwd>(ca+cb,WA(u1-1,i),CH(i,k,u1)); \
887 special_mul<fwd>(ca-cb,WA(u2-1,i),CH(i,k,u2)); \
889template<
bool fwd,
typename T>
void pass3 (
size_t ido,
size_t l1,
893 constexpr T0 tw1r=-0.5,
894 tw1i= (fwd ? -1: 1) * T0(0.8660254037844386467637231707529362L);
896 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
897 {
return ch[a+ido*(b+l1*c)]; };
898 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
899 {
return cc[a+ido*(b+3*c)]; };
900 auto WA = [wa, ido](
size_t x,
size_t i)
901 {
return wa[i-1+x*(ido-1)]; };
904 for (
size_t k=0; k<l1; ++k)
910 for (
size_t k=0; k<l1; ++k)
916 for (
size_t i=1; i<ido; ++i)
924#undef POCKETFFT_PARTSTEP3b
925#undef POCKETFFT_PARTSTEP3a
926#undef POCKETFFT_PREP3
928template<
bool fwd,
typename T>
void pass4 (
size_t ido,
size_t l1,
932 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
933 {
return ch[a+ido*(b+l1*c)]; };
934 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
935 {
return cc[a+ido*(b+4*c)]; };
936 auto WA = [wa, ido](
size_t x,
size_t i)
937 {
return wa[i-1+x*(ido-1)]; };
940 for (
size_t k=0; k<l1; ++k)
943 PM(t2,t1,CC(0,0,k),CC(0,2,k));
944 PM(t3,t4,CC(0,1,k),CC(0,3,k));
946 PM(CH(0,k,0),CH(0,k,2),t2,t3);
947 PM(CH(0,k,1),CH(0,k,3),t1,t4);
950 for (
size_t k=0; k<l1; ++k)
954 PM(t2,t1,CC(0,0,k),CC(0,2,k));
955 PM(t3,t4,CC(0,1,k),CC(0,3,k));
957 PM(CH(0,k,0),CH(0,k,2),t2,t3);
958 PM(CH(0,k,1),CH(0,k,3),t1,t4);
960 for (
size_t i=1; i<ido; ++i)
963 T cc0=CC(i,0,k), cc1=CC(i,1,k),cc2=CC(i,2,k),cc3=CC(i,3,k);
968 special_mul<fwd>(t1+t4,WA(0,i),CH(i,k,1));
969 special_mul<fwd>(t2-t3,WA(1,i),CH(i,k,2));
970 special_mul<fwd>(t1-t4,WA(2,i),CH(i,k,3));
975#define POCKETFFT_PREP5(idx) \
976 T t0 = CC(idx,0,k), t1, t2, t3, t4; \
977 PM (t1,t4,CC(idx,1,k),CC(idx,4,k)); \
978 PM (t2,t3,CC(idx,2,k),CC(idx,3,k)); \
979 CH(idx,k,0).r=t0.r+t1.r+t2.r; \
980 CH(idx,k,0).i=t0.i+t1.i+t2.i;
982#define POCKETFFT_PARTSTEP5a(u1,u2,twar,twbr,twai,twbi) \
985 ca.r=t0.r+twar*t1.r+twbr*t2.r; \
986 ca.i=t0.i+twar*t1.i+twbr*t2.i; \
987 cb.i=twai*t4.r twbi*t3.r; \
988 cb.r=-(twai*t4.i twbi*t3.i); \
989 PM(CH(0,k,u1),CH(0,k,u2),ca,cb); \
992#define POCKETFFT_PARTSTEP5b(u1,u2,twar,twbr,twai,twbi) \
995 ca.r=t0.r+twar*t1.r+twbr*t2.r; \
996 ca.i=t0.i+twar*t1.i+twbr*t2.i; \
997 cb.i=twai*t4.r twbi*t3.r; \
998 cb.r=-(twai*t4.i twbi*t3.i); \
999 special_mul<fwd>(ca+cb,WA(u1-1,i),CH(i,k,u1)); \
1000 special_mul<fwd>(ca-cb,WA(u2-1,i),CH(i,k,u2)); \
1002template<
bool fwd,
typename T>
void pass5 (
size_t ido,
size_t l1,
1006 constexpr T0 tw1r= T0(0.3090169943749474241022934171828191L),
1007 tw1i= (fwd ? -1: 1) * T0(0.9510565162951535721164393333793821L),
1008 tw2r= T0(-0.8090169943749474241022934171828191L),
1009 tw2i= (fwd ? -1: 1) * T0(0.5877852522924731291687059546390728L);
1011 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1012 {
return ch[a+ido*(b+l1*c)]; };
1013 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
1014 {
return cc[a+ido*(b+5*c)]; };
1015 auto WA = [wa, ido](
size_t x,
size_t i)
1016 {
return wa[i-1+x*(ido-1)]; };
1019 for (
size_t k=0; k<l1; ++k)
1026 for (
size_t k=0; k<l1; ++k)
1033 for (
size_t i=1; i<ido; ++i)
1042#undef POCKETFFT_PARTSTEP5b
1043#undef POCKETFFT_PARTSTEP5a
1044#undef POCKETFFT_PREP5
1046#define POCKETFFT_PREP7(idx) \
1047 T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7; \
1048 PM (t2,t7,CC(idx,1,k),CC(idx,6,k)); \
1049 PM (t3,t6,CC(idx,2,k),CC(idx,5,k)); \
1050 PM (t4,t5,CC(idx,3,k),CC(idx,4,k)); \
1051 CH(idx,k,0).r=t1.r+t2.r+t3.r+t4.r; \
1052 CH(idx,k,0).i=t1.i+t2.i+t3.i+t4.i;
1054#define POCKETFFT_PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,out1,out2) \
1057 ca.r=t1.r+x1*t2.r+x2*t3.r+x3*t4.r; \
1058 ca.i=t1.i+x1*t2.i+x2*t3.i+x3*t4.i; \
1059 cb.i=y1*t7.r y2*t6.r y3*t5.r; \
1060 cb.r=-(y1*t7.i y2*t6.i y3*t5.i); \
1061 PM(out1,out2,ca,cb); \
1063#define POCKETFFT_PARTSTEP7a(u1,u2,x1,x2,x3,y1,y2,y3) \
1064 POCKETFFT_PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,CH(0,k,u1),CH(0,k,u2))
1065#define POCKETFFT_PARTSTEP7(u1,u2,x1,x2,x3,y1,y2,y3) \
1068 POCKETFFT_PARTSTEP7a0(u1,u2,x1,x2,x3,y1,y2,y3,da,db) \
1069 special_mul<fwd>(da,WA(u1-1,i),CH(i,k,u1)); \
1070 special_mul<fwd>(db,WA(u2-1,i),CH(i,k,u2)); \
1073template<
bool fwd,
typename T>
void pass7(
size_t ido,
size_t l1,
1077 constexpr T0 tw1r= T0(0.6234898018587335305250048840042398L),
1078 tw1i= (fwd ? -1 : 1) * T0(0.7818314824680298087084445266740578L),
1079 tw2r= T0(-0.2225209339563144042889025644967948L),
1080 tw2i= (fwd ? -1 : 1) * T0(0.9749279121818236070181316829939312L),
1081 tw3r= T0(-0.9009688679024191262361023195074451L),
1082 tw3i= (fwd ? -1 : 1) * T0(0.433883739117558120475768332848359L);
1084 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1085 {
return ch[a+ido*(b+l1*c)]; };
1086 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
1087 {
return cc[a+ido*(b+7*c)]; };
1088 auto WA = [wa, ido](
size_t x,
size_t i)
1089 {
return wa[i-1+x*(ido-1)]; };
1092 for (
size_t k=0; k<l1; ++k)
1100 for (
size_t k=0; k<l1; ++k)
1108 for (
size_t i=1; i<ido; ++i)
1118#undef POCKETFFT_PARTSTEP7
1119#undef POCKETFFT_PARTSTEP7a0
1120#undef POCKETFFT_PARTSTEP7a
1121#undef POCKETFFT_PREP7
1123template <
bool fwd,
typename T>
void ROTX45(T &a)
const
1125 constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
1127 {
auto tmp_=a.r; a.r=hsqt2*(a.r+a.i); a.i=hsqt2*(a.i-tmp_); }
1129 {
auto tmp_=a.r; a.r=hsqt2*(a.r-a.i); a.i=hsqt2*(a.i+tmp_); }
1131template <
bool fwd,
typename T>
void ROTX135(T &a)
const
1133 constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
1135 {
auto tmp_=a.r; a.r=hsqt2*(a.i-a.r); a.i=hsqt2*(-tmp_-a.i); }
1137 {
auto tmp_=a.r; a.r=hsqt2*(-a.r-a.i); a.i=hsqt2*(tmp_-a.i); }
1140template<
bool fwd,
typename T>
void pass8 (
size_t ido,
size_t l1,
1144 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1145 {
return ch[a+ido*(b+l1*c)]; };
1146 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
1147 {
return cc[a+ido*(b+8*c)]; };
1148 auto WA = [wa, ido](
size_t x,
size_t i)
1149 {
return wa[i-1+x*(ido-1)]; };
1152 for (
size_t k=0; k<l1; ++k)
1154 T a0, a1, a2, a3, a4, a5, a6, a7;
1155 PM(a1,a5,CC(0,1,k),CC(0,5,k));
1156 PM(a3,a7,CC(0,3,k),CC(0,7,k));
1165 PM(a0,a4,CC(0,0,k),CC(0,4,k));
1166 PM(a2,a6,CC(0,2,k),CC(0,6,k));
1167 PM(CH(0,k,0),CH(0,k,4),a0+a2,a1);
1168 PM(CH(0,k,2),CH(0,k,6),a0-a2,a3);
1170 PM(CH(0,k,1),CH(0,k,5),a4+a6,a5);
1171 PM(CH(0,k,3),CH(0,k,7),a4-a6,a7);
1174 for (
size_t k=0; k<l1; ++k)
1177 T a0, a1, a2, a3, a4, a5, a6, a7;
1178 PM(a1,a5,CC(0,1,k),CC(0,5,k));
1179 PM(a3,a7,CC(0,3,k),CC(0,7,k));
1188 PM(a0,a4,CC(0,0,k),CC(0,4,k));
1189 PM(a2,a6,CC(0,2,k),CC(0,6,k));
1190 PM(CH(0,k,0),CH(0,k,4),a0+a2,a1);
1191 PM(CH(0,k,2),CH(0,k,6),a0-a2,a3);
1193 PM(CH(0,k,1),CH(0,k,5),a4+a6,a5);
1194 PM(CH(0,k,3),CH(0,k,7),a4-a6,a7);
1196 for (
size_t i=1; i<ido; ++i)
1198 T a0, a1, a2, a3, a4, a5, a6, a7;
1199 PM(a1,a5,CC(i,1,k),CC(i,5,k));
1200 PM(a3,a7,CC(i,3,k),CC(i,7,k));
1207 PM(a0,a4,CC(i,0,k),CC(i,4,k));
1208 PM(a2,a6,CC(i,2,k),CC(i,6,k));
1211 special_mul<fwd>(a0-a1,WA(3,i),CH(i,k,4));
1212 special_mul<fwd>(a2+a3,WA(1,i),CH(i,k,2));
1213 special_mul<fwd>(a2-a3,WA(5,i),CH(i,k,6));
1216 special_mul<fwd>(a4+a5,WA(0,i),CH(i,k,1));
1217 special_mul<fwd>(a4-a5,WA(4,i),CH(i,k,5));
1218 special_mul<fwd>(a6+a7,WA(2,i),CH(i,k,3));
1219 special_mul<fwd>(a6-a7,WA(6,i),CH(i,k,7));
1225#define POCKETFFT_PREP11(idx) \
1226 T t1 = CC(idx,0,k), t2, t3, t4, t5, t6, t7, t8, t9, t10, t11; \
1227 PM (t2,t11,CC(idx,1,k),CC(idx,10,k)); \
1228 PM (t3,t10,CC(idx,2,k),CC(idx, 9,k)); \
1229 PM (t4,t9 ,CC(idx,3,k),CC(idx, 8,k)); \
1230 PM (t5,t8 ,CC(idx,4,k),CC(idx, 7,k)); \
1231 PM (t6,t7 ,CC(idx,5,k),CC(idx, 6,k)); \
1232 CH(idx,k,0).r=t1.r+t2.r+t3.r+t4.r+t5.r+t6.r; \
1233 CH(idx,k,0).i=t1.i+t2.i+t3.i+t4.i+t5.i+t6.i;
1235#define POCKETFFT_PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,out1,out2) \
1237 T ca = t1 + t2*x1 + t3*x2 + t4*x3 + t5*x4 +t6*x5, \
1239 cb.i=y1*t11.r y2*t10.r y3*t9.r y4*t8.r y5*t7.r; \
1240 cb.r=-(y1*t11.i y2*t10.i y3*t9.i y4*t8.i y5*t7.i ); \
1241 PM(out1,out2,ca,cb); \
1243#define POCKETFFT_PARTSTEP11a(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5) \
1244 POCKETFFT_PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,CH(0,k,u1),CH(0,k,u2))
1245#define POCKETFFT_PARTSTEP11(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5) \
1248 POCKETFFT_PARTSTEP11a0(u1,u2,x1,x2,x3,x4,x5,y1,y2,y3,y4,y5,da,db) \
1249 special_mul<fwd>(da,WA(u1-1,i),CH(i,k,u1)); \
1250 special_mul<fwd>(db,WA(u2-1,i),CH(i,k,u2)); \
1253template<
bool fwd,
typename T>
void pass11 (
size_t ido,
size_t l1,
1257 constexpr T0 tw1r= T0(0.8412535328311811688618116489193677L),
1258 tw1i= (fwd ? -1 : 1) * T0(0.5406408174555975821076359543186917L),
1259 tw2r= T0(0.4154150130018864255292741492296232L),
1260 tw2i= (fwd ? -1 : 1) * T0(0.9096319953545183714117153830790285L),
1261 tw3r= T0(-0.1423148382732851404437926686163697L),
1262 tw3i= (fwd ? -1 : 1) * T0(0.9898214418809327323760920377767188L),
1263 tw4r= T0(-0.6548607339452850640569250724662936L),
1264 tw4i= (fwd ? -1 : 1) * T0(0.7557495743542582837740358439723444L),
1265 tw5r= T0(-0.9594929736144973898903680570663277L),
1266 tw5i= (fwd ? -1 : 1) * T0(0.2817325568414296977114179153466169L);
1268 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1269 {
return ch[a+ido*(b+l1*c)]; };
1270 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
1271 {
return cc[a+ido*(b+11*c)]; };
1272 auto WA = [wa, ido](
size_t x,
size_t i)
1273 {
return wa[i-1+x*(ido-1)]; };
1276 for (
size_t k=0; k<l1; ++k)
1279 POCKETFFT_PARTSTEP11a(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
1280 POCKETFFT_PARTSTEP11a(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
1281 POCKETFFT_PARTSTEP11a(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
1282 POCKETFFT_PARTSTEP11a(4, 7,tw4r,tw3r,tw1r,tw5r,tw2r,+tw4i,-tw3i,+tw1i,+tw5i,-tw2i)
1283 POCKETFFT_PARTSTEP11a(5, 6,tw5r,tw1r,tw4r,tw2r,tw3r,+tw5i,-tw1i,+tw4i,-tw2i,+tw3i)
1286 for (
size_t k=0; k<l1; ++k)
1290 POCKETFFT_PARTSTEP11a(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
1291 POCKETFFT_PARTSTEP11a(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
1292 POCKETFFT_PARTSTEP11a(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
1293 POCKETFFT_PARTSTEP11a(4, 7,tw4r,tw3r,tw1r,tw5r,tw2r,+tw4i,-tw3i,+tw1i,+tw5i,-tw2i)
1294 POCKETFFT_PARTSTEP11a(5, 6,tw5r,tw1r,tw4r,tw2r,tw3r,+tw5i,-tw1i,+tw4i,-tw2i,+tw3i)
1296 for (
size_t i=1; i<ido; ++i)
1299 POCKETFFT_PARTSTEP11(1,10,tw1r,tw2r,tw3r,tw4r,tw5r,+tw1i,+tw2i,+tw3i,+tw4i,+tw5i)
1300 POCKETFFT_PARTSTEP11(2, 9,tw2r,tw4r,tw5r,tw3r,tw1r,+tw2i,+tw4i,-tw5i,-tw3i,-tw1i)
1301 POCKETFFT_PARTSTEP11(3, 8,tw3r,tw5r,tw2r,tw1r,tw4r,+tw3i,-tw5i,-tw2i,+tw1i,+tw4i)
1302 POCKETFFT_PARTSTEP11(4, 7,tw4r,tw3r,tw1r,tw5r,tw2r,+tw4i,-tw3i,+tw1i,+tw5i,-tw2i)
1303 POCKETFFT_PARTSTEP11(5, 6,tw5r,tw1r,tw4r,tw2r,tw3r,+tw5i,-tw1i,+tw4i,-tw2i,+tw3i)
1308#undef POCKETFFT_PARTSTEP11
1309#undef POCKETFFT_PARTSTEP11a0
1310#undef POCKETFFT_PARTSTEP11a
1311#undef POCKETFFT_PREP11
1313template<
bool fwd,
typename T>
void passg (
size_t ido,
size_t ip,
1318 const size_t cdim=ip;
1319 size_t ipph = (ip+1)/2;
1320 size_t idl1 = ido*l1;
1322 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1323 {
return ch[a+ido*(b+l1*c)]; };
1324 auto CC = [cc,ido,cdim](
size_t a,
size_t b,
size_t c) ->
const T&
1325 {
return cc[a+ido*(b+cdim*c)]; };
1326 auto CX = [cc, ido, l1](
size_t a,
size_t b,
size_t c) -> T&
1327 {
return cc[a+ido*(b+l1*c)]; };
1328 auto CX2 = [cc, idl1](
size_t a,
size_t b) -> T&
1329 {
return cc[a+idl1*b]; };
1330 auto CH2 = [ch, idl1](
size_t a,
size_t b) ->
const T&
1331 {
return ch[a+idl1*b]; };
1333 arr<cmplx<T0>> wal(ip);
1334 wal[0] = cmplx<T0>(1., 0.);
1335 for (
size_t i=1; i<ip; ++i)
1336 wal[i]=cmplx<T0>(csarr[i].r,fwd ? -csarr[i].i : csarr[i].i);
1338 for (
size_t k=0; k<l1; ++k)
1339 for (
size_t i=0; i<ido; ++i)
1340 CH(i,k,0) = CC(i,0,k);
1341 for (
size_t j=1, jc=ip-1; j<ipph; ++j, --jc)
1342 for (
size_t k=0; k<l1; ++k)
1343 for (
size_t i=0; i<ido; ++i)
1344 PM(CH(i,k,j),CH(i,k,jc),CC(i,j,k),CC(i,jc,k));
1345 for (
size_t k=0; k<l1; ++k)
1346 for (
size_t i=0; i<ido; ++i)
1349 for (
size_t j=1; j<ipph; ++j)
1353 for (
size_t l=1, lc=ip-1; l<ipph; ++l, --lc)
1356 for (
size_t ik=0; ik<idl1; ++ik)
1358 CX2(ik,l).r = CH2(ik,0).r+wal[l].r*CH2(ik,1).r+wal[2*l].r*CH2(ik,2).r;
1359 CX2(ik,l).i = CH2(ik,0).i+wal[l].r*CH2(ik,1).i+wal[2*l].r*CH2(ik,2).i;
1360 CX2(ik,lc).r=-wal[l].i*CH2(ik,ip-1).i-wal[2*l].i*CH2(ik,ip-2).i;
1361 CX2(ik,lc).i=wal[l].i*CH2(ik,ip-1).r+wal[2*l].i*CH2(ik,ip-2).r;
1365 size_t j=3, jc=ip-3;
1366 for (; j<ipph-1; j+=2, jc-=2)
1368 iwal+=l;
if (iwal>ip) iwal-=ip;
1369 cmplx<T0> xwal=wal[iwal];
1370 iwal+=l;
if (iwal>ip) iwal-=ip;
1371 cmplx<T0> xwal2=wal[iwal];
1372 for (
size_t ik=0; ik<idl1; ++ik)
1374 CX2(ik,l).r += CH2(ik,j).r*xwal.r+CH2(ik,j+1).r*xwal2.r;
1375 CX2(ik,l).i += CH2(ik,j).i*xwal.r+CH2(ik,j+1).i*xwal2.r;
1376 CX2(ik,lc).r -= CH2(ik,jc).i*xwal.i+CH2(ik,jc-1).i*xwal2.i;
1377 CX2(ik,lc).i += CH2(ik,jc).r*xwal.i+CH2(ik,jc-1).r*xwal2.i;
1380 for (; j<ipph; ++j, --jc)
1382 iwal+=l;
if (iwal>ip) iwal-=ip;
1383 cmplx<T0> xwal=wal[iwal];
1384 for (
size_t ik=0; ik<idl1; ++ik)
1386 CX2(ik,l).r += CH2(ik,j).r*xwal.r;
1387 CX2(ik,l).i += CH2(ik,j).i*xwal.r;
1388 CX2(ik,lc).r -= CH2(ik,jc).i*xwal.i;
1389 CX2(ik,lc).i += CH2(ik,jc).r*xwal.i;
1396 for (
size_t j=1, jc=ip-1; j<ipph; ++j, --jc)
1397 for (
size_t ik=0; ik<idl1; ++ik)
1399 T t1=CX2(ik,j), t2=CX2(ik,jc);
1400 PM(CX2(ik,j),CX2(ik,jc),t1,t2);
1404 for (
size_t j=1, jc=ip-1; j<ipph; ++j,--jc)
1405 for (
size_t k=0; k<l1; ++k)
1407 T t1=CX(0,k,j), t2=CX(0,k,jc);
1408 PM(CX(0,k,j),CX(0,k,jc),t1,t2);
1409 for (
size_t i=1; i<ido; ++i)
1412 PM(x1,x2,CX(i,k,j),CX(i,k,jc));
1413 size_t idij=(j-1)*(ido-1)+i-1;
1414 special_mul<fwd>(x1,wa[idij],CX(i,k,j));
1415 idij=(jc-1)*(ido-1)+i-1;
1416 special_mul<fwd>(x2,wa[idij],CX(i,k,jc));
1422template<
bool fwd,
typename T>
void pass_all(T c[], T0 fct)
const
1424 if (length==1) { c[0]*=fct;
return; }
1427 T *p1=c, *p2=ch.data();
1429 for(
size_t k1=0; k1<fact.size(); k1++)
1431 size_t ip=fact[k1].fct;
1433 size_t ido = length/l2;
1435 pass4<fwd> (ido, l1, p1, p2, fact[k1].tw);
1437 pass8<fwd>(ido, l1, p1, p2, fact[k1].tw);
1439 pass2<fwd>(ido, l1, p1, p2, fact[k1].tw);
1441 pass3<fwd> (ido, l1, p1, p2, fact[k1].tw);
1443 pass5<fwd> (ido, l1, p1, p2, fact[k1].tw);
1445 pass7<fwd> (ido, l1, p1, p2, fact[k1].tw);
1447 pass11<fwd> (ido, l1, p1, p2, fact[k1].tw);
1450 passg<fwd>(ido, ip, l1, p1, p2, fact[k1].tw, fact[k1].tws);
1459 for (
size_t i=0; i<length; ++i)
1462 std::copy_n (p1, length, c);
1466 for (
size_t i=0; i<length; ++i)
1471 template<
typename T>
void exec(T c[], T0 fct,
bool fwd)
const
1472 { fwd ? pass_all<true>(c, fct) : pass_all<false>(c, fct); }
1479 { add_factor(8); len>>=3; }
1481 { add_factor(4); len>>=2; }
1487 std::swap(fact[0].fct, fact.back().fct);
1489 for (
size_t divisor=3; divisor*divisor<=len; divisor+=2)
1490 while ((len%divisor)==0)
1492 add_factor(divisor);
1495 if (len>1) add_factor(len);
1498 size_t twsize()
const
1500 size_t twsize=0, l1=1;
1501 for (
size_t k=0; k<fact.size(); ++k)
1503 size_t ip=fact[k].fct, ido= length/(l1*ip);
1504 twsize+=(ip-1)*(ido-1);
1514 sincos_2pibyn<T0> twiddle(length);
1517 for (
size_t k=0; k<fact.size(); ++k)
1519 size_t ip=fact[k].fct, ido=length/(l1*ip);
1520 fact[k].tw=mem.
data()+memofs;
1521 memofs+=(ip-1)*(ido-1);
1522 for (
size_t j=1; j<ip; ++j)
1523 for (
size_t i=1; i<ido; ++i)
1524 fact[k].tw[(j-1)*(ido-1)+i-1] = twiddle[j*l1*i];
1527 fact[k].tws=mem.
data()+memofs;
1529 for (
size_t j=0; j<ip; ++j)
1530 fact[k].tws[j] = twiddle[j*l1*ido];
1540 if (length==0)
throw std::runtime_error(
"zero-length FFT requested");
1541 if (length==1)
return;
1543 mem.resize(twsize());
1563 std::vector<fctdata> fact;
1565 void add_factor(
size_t factor)
1566 { fact.push_back({factor,
nullptr,
nullptr}); }
1569template<
typename T1,
typename T2,
typename T3>
inline void MULPM
1570 (T1 &a, T1 &b, T2 c, T2 d, T3 e, T3 f)
const
1571 { a=c*e+d*f; b=c*f-d*e; }
1573template<
typename T>
void radf2 (
size_t ido,
size_t l1,
1577 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
1578 auto CC = [cc,ido,l1](
size_t a,
size_t b,
size_t c) ->
const T&
1579 {
return cc[a+ido*(b+l1*c)]; };
1580 auto CH = [ch,ido](
size_t a,
size_t b,
size_t c) -> T&
1581 {
return ch[a+ido*(b+2*c)]; };
1583 for (
size_t k=0; k<l1; k++)
1584 PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1));
1586 for (
size_t k=0; k<l1; k++)
1588 CH( 0,1,k) = -CC(ido-1,k,1);
1589 CH(ido-1,0,k) = CC(ido-1,k,0);
1592 for (
size_t k=0; k<l1; k++)
1593 for (
size_t i=2; i<ido; i+=2)
1597 MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1));
1598 PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2);
1599 PM (CH(i ,0,k),CH(ic ,1,k),ti2,CC(i ,k,0));
1604#define POCKETFFT_REARRANGE(rx, ix, ry, iy) \
1606 auto t1=rx+ry, t2=ry-rx, t3=ix+iy, t4=ix-iy; \
1607 rx=t1; ix=t3; ry=t4; iy=t2; \
1610template<
typename T>
void radf3(
size_t ido,
size_t l1,
1614 constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
1616 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
1617 auto CC = [cc,ido,l1](
size_t a,
size_t b,
size_t c) ->
const T&
1618 {
return cc[a+ido*(b+l1*c)]; };
1619 auto CH = [ch,ido](
size_t a,
size_t b,
size_t c) -> T&
1620 {
return ch[a+ido*(b+3*c)]; };
1622 for (
size_t k=0; k<l1; k++)
1624 T cr2=CC(0,k,1)+CC(0,k,2);
1625 CH(0,0,k) = CC(0,k,0)+cr2;
1626 CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
1627 CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
1630 for (
size_t k=0; k<l1; k++)
1631 for (
size_t i=2; i<ido; i+=2)
1634 T di2, di3, dr2, dr3;
1635 MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1));
1636 MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2));
1638 CH(i-1,0,k) = CC(i-1,k,0)+dr2;
1639 CH(i ,0,k) = CC(i ,k,0)+di2;
1640 T tr2 = CC(i-1,k,0)+taur*dr2;
1641 T ti2 = CC(i ,k,0)+taur*di2;
1644 PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3);
1645 PM(CH(i ,2,k),CH(ic ,1,k),ti3,ti2);
1649template<
typename T>
void radf4(
size_t ido,
size_t l1,
1653 constexpr T0 hsqt2=T0(0.707106781186547524400844362104849L);
1655 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
1656 auto CC = [cc,ido,l1](
size_t a,
size_t b,
size_t c) ->
const T&
1657 {
return cc[a+ido*(b+l1*c)]; };
1658 auto CH = [ch,ido](
size_t a,
size_t b,
size_t c) -> T&
1659 {
return ch[a+ido*(b+4*c)]; };
1661 for (
size_t k=0; k<l1; k++)
1664 PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1));
1665 PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2));
1666 PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1);
1669 for (
size_t k=0; k<l1; k++)
1671 T ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
1672 T tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
1673 PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1);
1674 PM (CH( 0,3,k),CH( 0,1,k),ti1,CC(ido-1,k,2));
1677 for (
size_t k=0; k<l1; k++)
1678 for (
size_t i=2; i<ido; i+=2)
1681 T ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
1682 MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1));
1683 MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2));
1684 MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3));
1685 PM(tr1,tr4,cr4,cr2);
1686 PM(ti1,ti4,ci2,ci4);
1687 PM(tr2,tr3,CC(i-1,k,0),cr3);
1688 PM(ti2,ti3,CC(i ,k,0),ci3);
1689 PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1);
1690 PM(CH(i ,0,k),CH(ic ,3,k),ti1,ti2);
1691 PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4);
1692 PM(CH(i ,2,k),CH(ic ,1,k),tr4,ti3);
1696template<
typename T>
void radf5(
size_t ido,
size_t l1,
1700 constexpr T0 tr11= T0(0.3090169943749474241022934171828191L),
1701 ti11= T0(0.9510565162951535721164393333793821L),
1702 tr12= T0(-0.8090169943749474241022934171828191L),
1703 ti12= T0(0.5877852522924731291687059546390728L);
1705 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
1706 auto CC = [cc,ido,l1](
size_t a,
size_t b,
size_t c) ->
const T&
1707 {
return cc[a+ido*(b+l1*c)]; };
1708 auto CH = [ch,ido](
size_t a,
size_t b,
size_t c) -> T&
1709 {
return ch[a+ido*(b+5*c)]; };
1711 for (
size_t k=0; k<l1; k++)
1713 T cr2, cr3, ci4, ci5;
1714 PM (cr2,ci5,CC(0,k,4),CC(0,k,1));
1715 PM (cr3,ci4,CC(0,k,3),CC(0,k,2));
1716 CH(0,0,k)=CC(0,k,0)+cr2+cr3;
1717 CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
1718 CH(0,2,k)=ti11*ci5+ti12*ci4;
1719 CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
1720 CH(0,4,k)=ti12*ci5-ti11*ci4;
1723 for (
size_t k=0; k<l1;++k)
1724 for (
size_t i=2, ic=ido-2; i<ido; i+=2, ic-=2)
1726 T di2, di3, di4, di5, dr2, dr3, dr4, dr5;
1727 MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1));
1728 MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2));
1729 MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3));
1730 MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4));
1733 CH(i-1,0,k)=CC(i-1,k,0)+dr2+dr3;
1734 CH(i ,0,k)=CC(i ,k,0)+di2+di3;
1735 T tr2=CC(i-1,k,0)+tr11*dr2+tr12*dr3;
1736 T ti2=CC(i ,k,0)+tr11*di2+tr12*di3;
1737 T tr3=CC(i-1,k,0)+tr12*dr2+tr11*dr3;
1738 T ti3=CC(i ,k,0)+tr12*di2+tr11*di3;
1739 T tr5 = ti11*dr5 + ti12*dr4;
1740 T ti5 = ti11*di5 + ti12*di4;
1741 T tr4 = ti12*dr5 - ti11*dr4;
1742 T ti4 = ti12*di5 - ti11*di4;
1743 PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5);
1744 PM(CH(i ,2,k),CH(ic ,1,k),ti5,ti2);
1745 PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4);
1746 PM(CH(i ,4,k),CH(ic ,3,k),ti4,ti3);
1750#undef POCKETFFT_REARRANGE
1752template<
typename T>
void radfg(
size_t ido,
size_t ip,
size_t l1,
1756 const size_t cdim=ip;
1757 size_t ipph=(ip+1)/2;
1758 size_t idl1 = ido*l1;
1760 auto CC = [cc,ido,cdim](
size_t a,
size_t b,
size_t c) -> T&
1761 {
return cc[a+ido*(b+cdim*c)]; };
1762 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) ->
const T&
1763 {
return ch[a+ido*(b+l1*c)]; };
1764 auto C1 = [cc,ido,l1] (
size_t a,
size_t b,
size_t c) -> T&
1765 {
return cc[a+ido*(b+l1*c)]; };
1766 auto C2 = [cc,idl1] (
size_t a,
size_t b) -> T&
1767 {
return cc[a+idl1*b]; };
1768 auto CH2 = [ch,idl1] (
size_t a,
size_t b) -> T&
1769 {
return ch[a+idl1*b]; };
1773 for (
size_t j=1, jc=ip-1; j<ipph; ++j,--jc)
1775 size_t is=(j-1)*(ido-1),
1777 for (
size_t k=0; k<l1; ++k)
1781 for (
size_t i=1; i<=ido-2; i+=2)
1783 T t1=C1(i,k,j ), t2=C1(i+1,k,j ),
1784 t3=C1(i,k,jc), t4=C1(i+1,k,jc);
1785 T x1=wa[idij]*t1 + wa[idij+1]*t2,
1786 x2=wa[idij]*t2 - wa[idij+1]*t1,
1787 x3=wa[idij2]*t3 + wa[idij2+1]*t4,
1788 x4=wa[idij2]*t4 - wa[idij2+1]*t3;
1789 PM(C1(i,k,j),C1(i+1,k,jc),x3,x1);
1790 PM(C1(i+1,k,j),C1(i,k,jc),x2,x4);
1798 for (
size_t j=1, jc=ip-1; j<ipph; ++j,--jc)
1799 for (
size_t k=0; k<l1; ++k)
1805 for (
size_t l=1,lc=ip-1; l<ipph; ++l,--lc)
1807 for (
size_t ik=0; ik<idl1; ++ik)
1809 CH2(ik,l ) = C2(ik,0)+csarr[2*l]*C2(ik,1)+csarr[4*l]*C2(ik,2);
1810 CH2(ik,lc) = csarr[2*l+1]*C2(ik,ip-1)+csarr[4*l+1]*C2(ik,ip-2);
1813 size_t j=3, jc=ip-3;
1814 for (; j<ipph-3; j+=4,jc-=4)
1816 iang+=l;
if (iang>=ip) iang-=ip;
1817 T0 ar1=csarr[2*iang], ai1=csarr[2*iang+1];
1818 iang+=l;
if (iang>=ip) iang-=ip;
1819 T0 ar2=csarr[2*iang], ai2=csarr[2*iang+1];
1820 iang+=l;
if (iang>=ip) iang-=ip;
1821 T0 ar3=csarr[2*iang], ai3=csarr[2*iang+1];
1822 iang+=l;
if (iang>=ip) iang-=ip;
1823 T0 ar4=csarr[2*iang], ai4=csarr[2*iang+1];
1824 for (
size_t ik=0; ik<idl1; ++ik)
1826 CH2(ik,l ) += ar1*C2(ik,j )+ar2*C2(ik,j +1)
1827 +ar3*C2(ik,j +2)+ar4*C2(ik,j +3);
1828 CH2(ik,lc) += ai1*C2(ik,jc)+ai2*C2(ik,jc-1)
1829 +ai3*C2(ik,jc-2)+ai4*C2(ik,jc-3);
1832 for (; j<ipph-1; j+=2,jc-=2)
1834 iang+=l;
if (iang>=ip) iang-=ip;
1835 T0 ar1=csarr[2*iang], ai1=csarr[2*iang+1];
1836 iang+=l;
if (iang>=ip) iang-=ip;
1837 T0 ar2=csarr[2*iang], ai2=csarr[2*iang+1];
1838 for (
size_t ik=0; ik<idl1; ++ik)
1840 CH2(ik,l ) += ar1*C2(ik,j )+ar2*C2(ik,j +1);
1841 CH2(ik,lc) += ai1*C2(ik,jc)+ai2*C2(ik,jc-1);
1844 for (; j<ipph; ++j,--jc)
1846 iang+=l;
if (iang>=ip) iang-=ip;
1847 T0 ar=csarr[2*iang], ai=csarr[2*iang+1];
1848 for (
size_t ik=0; ik<idl1; ++ik)
1850 CH2(ik,l ) += ar*C2(ik,j );
1851 CH2(ik,lc) += ai*C2(ik,jc);
1855 for (
size_t ik=0; ik<idl1; ++ik)
1856 CH2(ik,0) = C2(ik,0);
1857 for (
size_t j=1; j<ipph; ++j)
1858 for (
size_t ik=0; ik<idl1; ++ik)
1859 CH2(ik,0) += C2(ik,j);
1864 for (
size_t k=0; k<l1; ++k)
1865 for (
size_t i=0; i<ido; ++i)
1866 CC(i,0,k) = CH(i,k,0);
1868 for (
size_t j=1, jc=ip-1; j<ipph; ++j,--jc)
1871 for (
size_t k=0; k<l1; ++k)
1873 CC(ido-1,j2,k) = CH(0,k,j);
1874 CC(0,j2+1,k) = CH(0,k,jc);
1880 for (
size_t j=1, jc=ip-1; j<ipph; ++j,--jc)
1883 for(
size_t k=0; k<l1; ++k)
1884 for(
size_t i=1, ic=ido-i-2; i<=ido-2; i+=2, ic-=2)
1886 CC(i ,j2+1,k) = CH(i ,k,j )+CH(i ,k,jc);
1887 CC(ic ,j2 ,k) = CH(i ,k,j )-CH(i ,k,jc);
1888 CC(i+1 ,j2+1,k) = CH(i+1,k,j )+CH(i+1,k,jc);
1889 CC(ic+1,j2 ,k) = CH(i+1,k,jc)-CH(i+1,k,j );
1894template<
typename T>
void radb2(
size_t ido,
size_t l1,
1898 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
1899 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
1900 {
return cc[a+ido*(b+2*c)]; };
1901 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1902 {
return ch[a+ido*(b+l1*c)]; };
1904 for (
size_t k=0; k<l1; k++)
1905 PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k));
1907 for (
size_t k=0; k<l1; k++)
1909 CH(ido-1,k,0) = 2*CC(ido-1,0,k);
1910 CH(ido-1,k,1) =-2*CC(0 ,1,k);
1913 for (
size_t k=0; k<l1;++k)
1914 for (
size_t i=2; i<ido; i+=2)
1918 PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k));
1919 PM (ti2,CH(i ,k,0),CC(i ,0,k),CC(ic ,1,k));
1920 MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2);
1924template<
typename T>
void radb3(
size_t ido,
size_t l1,
1928 constexpr T0 taur=-0.5, taui=T0(0.8660254037844386467637231707529362L);
1930 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
1931 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
1932 {
return cc[a+ido*(b+3*c)]; };
1933 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1934 {
return ch[a+ido*(b+l1*c)]; };
1936 for (
size_t k=0; k<l1; k++)
1938 T tr2=2*CC(ido-1,1,k);
1939 T cr2=CC(0,0,k)+taur*tr2;
1940 CH(0,k,0)=CC(0,0,k)+tr2;
1941 T ci3=2*taui*CC(0,2,k);
1942 PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
1945 for (
size_t k=0; k<l1; k++)
1946 for (
size_t i=2, ic=ido-2; i<ido; i+=2, ic-=2)
1948 T tr2=CC(i-1,2,k)+CC(ic-1,1,k);
1949 T ti2=CC(i ,2,k)-CC(ic ,1,k);
1950 T cr2=CC(i-1,0,k)+taur*tr2;
1951 T ci2=CC(i ,0,k)+taur*ti2;
1952 CH(i-1,k,0)=CC(i-1,0,k)+tr2;
1953 CH(i ,k,0)=CC(i ,0,k)+ti2;
1954 T cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
1955 T ci3=taui*(CC(i ,2,k)+CC(ic ,1,k));
1956 T di2, di3, dr2, dr3;
1957 PM(dr3,dr2,cr2,ci3);
1958 PM(di2,di3,ci2,cr3);
1959 MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2);
1960 MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3);
1964template<
typename T>
void radb4(
size_t ido,
size_t l1,
1968 constexpr T0 sqrt2=T0(1.414213562373095048801688724209698L);
1970 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
1971 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
1972 {
return cc[a+ido*(b+4*c)]; };
1973 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
1974 {
return ch[a+ido*(b+l1*c)]; };
1976 for (
size_t k=0; k<l1; k++)
1979 PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k));
1980 T tr3=2*CC(ido-1,1,k);
1982 PM (CH(0,k,0),CH(0,k,2),tr2,tr3);
1983 PM (CH(0,k,3),CH(0,k,1),tr1,tr4);
1986 for (
size_t k=0; k<l1; k++)
1989 PM (ti1,ti2,CC(0 ,3,k),CC(0 ,1,k));
1990 PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k));
1991 CH(ido-1,k,0)=tr2+tr2;
1992 CH(ido-1,k,1)=sqrt2*(tr1-ti1);
1993 CH(ido-1,k,2)=ti2+ti2;
1994 CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
1997 for (
size_t k=0; k<l1;++k)
1998 for (
size_t i=2; i<ido; i+=2)
2000 T ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
2002 PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k));
2003 PM (ti1,ti2,CC(i ,0,k),CC(ic ,3,k));
2004 PM (tr4,ti3,CC(i ,2,k),CC(ic ,1,k));
2005 PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k));
2006 PM (CH(i-1,k,0),cr3,tr2,tr3);
2007 PM (CH(i ,k,0),ci3,ti2,ti3);
2008 PM (cr4,cr2,tr1,tr4);
2009 PM (ci2,ci4,ti1,ti4);
2010 MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2);
2011 MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3);
2012 MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4);
2016template<
typename T>
void radb5(
size_t ido,
size_t l1,
2020 constexpr T0 tr11= T0(0.3090169943749474241022934171828191L),
2021 ti11= T0(0.9510565162951535721164393333793821L),
2022 tr12= T0(-0.8090169943749474241022934171828191L),
2023 ti12= T0(0.5877852522924731291687059546390728L);
2025 auto WA = [wa,ido](
size_t x,
size_t i) {
return wa[i+x*(ido-1)]; };
2026 auto CC = [cc,ido](
size_t a,
size_t b,
size_t c) ->
const T&
2027 {
return cc[a+ido*(b+5*c)]; };
2028 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
2029 {
return ch[a+ido*(b+l1*c)]; };
2031 for (
size_t k=0; k<l1; k++)
2033 T ti5=CC(0,2,k)+CC(0,2,k);
2034 T ti4=CC(0,4,k)+CC(0,4,k);
2035 T tr2=CC(ido-1,1,k)+CC(ido-1,1,k);
2036 T tr3=CC(ido-1,3,k)+CC(ido-1,3,k);
2037 CH(0,k,0)=CC(0,0,k)+tr2+tr3;
2038 T cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
2039 T cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
2041 MULPM(ci5,ci4,ti5,ti4,ti11,ti12);
2042 PM(CH(0,k,4),CH(0,k,1),cr2,ci5);
2043 PM(CH(0,k,3),CH(0,k,2),cr3,ci4);
2046 for (
size_t k=0; k<l1;++k)
2047 for (
size_t i=2, ic=ido-2; i<ido; i+=2, ic-=2)
2049 T tr2, tr3, tr4, tr5, ti2, ti3, ti4, ti5;
2050 PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k));
2051 PM(ti5,ti2,CC(i ,2,k),CC(ic ,1,k));
2052 PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k));
2053 PM(ti4,ti3,CC(i ,4,k),CC(ic ,3,k));
2054 CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
2055 CH(i ,k,0)=CC(i ,0,k)+ti2+ti3;
2056 T cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
2057 T ci2=CC(i ,0,k)+tr11*ti2+tr12*ti3;
2058 T cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
2059 T ci3=CC(i ,0,k)+tr12*ti2+tr11*ti3;
2060 T ci4, ci5, cr5, cr4;
2061 MULPM(cr5,cr4,tr5,tr4,ti11,ti12);
2062 MULPM(ci5,ci4,ti5,ti4,ti11,ti12);
2063 T dr2, dr3, dr4, dr5, di2, di3, di4, di5;
2064 PM(dr4,dr3,cr3,ci4);
2065 PM(di3,di4,ci3,cr4);
2066 PM(dr5,dr2,cr2,ci5);
2067 PM(di2,di5,ci2,cr5);
2068 MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2);
2069 MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3);
2070 MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4);
2071 MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5);
2075template<
typename T>
void radbg(
size_t ido,
size_t ip,
size_t l1,
2079 const size_t cdim=ip;
2080 size_t ipph=(ip+1)/ 2;
2081 size_t idl1 = ido*l1;
2083 auto CC = [cc,ido,cdim](
size_t a,
size_t b,
size_t c) ->
const T&
2084 {
return cc[a+ido*(b+cdim*c)]; };
2085 auto CH = [ch,ido,l1](
size_t a,
size_t b,
size_t c) -> T&
2086 {
return ch[a+ido*(b+l1*c)]; };
2087 auto C1 = [cc,ido,l1](
size_t a,
size_t b,
size_t c) ->
const T&
2088 {
return cc[a+ido*(b+l1*c)]; };
2089 auto C2 = [cc,idl1](
size_t a,
size_t b) -> T&
2090 {
return cc[a+idl1*b]; };
2091 auto CH2 = [ch,idl1](
size_t a,
size_t b) -> T&
2092 {
return ch[a+idl1*b]; };
2094 for (
size_t k=0; k<l1; ++k)
2095 for (
size_t i=0; i<ido; ++i)
2096 CH(i,k,0) = CC(i,0,k);
2097 for (
size_t j=1, jc=ip-1; j<ipph; ++j, --jc)
2100 for (
size_t k=0; k<l1; ++k)
2102 CH(0,k,j ) = 2*CC(ido-1,j2,k);
2103 CH(0,k,jc) = 2*CC(0,j2+1,k);
2109 for (
size_t j=1, jc=ip-1; j<ipph; ++j,--jc)
2112 for (
size_t k=0; k<l1; ++k)
2113 for (
size_t i=1, ic=ido-i-2; i<=ido-2; i+=2, ic-=2)
2115 CH(i ,k,j ) = CC(i ,j2+1,k)+CC(ic ,j2,k);
2116 CH(i ,k,jc) = CC(i ,j2+1,k)-CC(ic ,j2,k);
2117 CH(i+1,k,j ) = CC(i+1,j2+1,k)-CC(ic+1,j2,k);
2118 CH(i+1,k,jc) = CC(i+1,j2+1,k)+CC(ic+1,j2,k);
2122 for (
size_t l=1,lc=ip-1; l<ipph; ++l,--lc)
2124 for (
size_t ik=0; ik<idl1; ++ik)
2126 C2(ik,l ) = CH2(ik,0)+csarr[2*l]*CH2(ik,1)+csarr[4*l]*CH2(ik,2);
2127 C2(ik,lc) = csarr[2*l+1]*CH2(ik,ip-1)+csarr[4*l+1]*CH2(ik,ip-2);
2131 for(; j<ipph-3; j+=4,jc-=4)
2133 iang+=l;
if(iang>ip) iang-=ip;
2134 T0 ar1=csarr[2*iang], ai1=csarr[2*iang+1];
2135 iang+=l;
if(iang>ip) iang-=ip;
2136 T0 ar2=csarr[2*iang], ai2=csarr[2*iang+1];
2137 iang+=l;
if(iang>ip) iang-=ip;
2138 T0 ar3=csarr[2*iang], ai3=csarr[2*iang+1];
2139 iang+=l;
if(iang>ip) iang-=ip;
2140 T0 ar4=csarr[2*iang], ai4=csarr[2*iang+1];
2141 for (
size_t ik=0; ik<idl1; ++ik)
2143 C2(ik,l ) += ar1*CH2(ik,j )+ar2*CH2(ik,j +1)
2144 +ar3*CH2(ik,j +2)+ar4*CH2(ik,j +3);
2145 C2(ik,lc) += ai1*CH2(ik,jc)+ai2*CH2(ik,jc-1)
2146 +ai3*CH2(ik,jc-2)+ai4*CH2(ik,jc-3);
2149 for(; j<ipph-1; j+=2,jc-=2)
2151 iang+=l;
if(iang>ip) iang-=ip;
2152 T0 ar1=csarr[2*iang], ai1=csarr[2*iang+1];
2153 iang+=l;
if(iang>ip) iang-=ip;
2154 T0 ar2=csarr[2*iang], ai2=csarr[2*iang+1];
2155 for (
size_t ik=0; ik<idl1; ++ik)
2157 C2(ik,l ) += ar1*CH2(ik,j )+ar2*CH2(ik,j +1);
2158 C2(ik,lc) += ai1*CH2(ik,jc)+ai2*CH2(ik,jc-1);
2161 for(; j<ipph; ++j,--jc)
2163 iang+=l;
if(iang>ip) iang-=ip;
2164 T0 war=csarr[2*iang], wai=csarr[2*iang+1];
2165 for (
size_t ik=0; ik<idl1; ++ik)
2167 C2(ik,l ) += war*CH2(ik,j );
2168 C2(ik,lc) += wai*CH2(ik,jc);
2172 for (
size_t j=1; j<ipph; ++j)
2173 for (
size_t ik=0; ik<idl1; ++ik)
2174 CH2(ik,0) += CH2(ik,j);
2175 for (
size_t j=1, jc=ip-1; j<ipph; ++j,--jc)
2176 for (
size_t k=0; k<l1; ++k)
2177 PM(CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc));
2181 for (
size_t j=1, jc=ip-1; j<ipph; ++j, --jc)
2182 for (
size_t k=0; k<l1; ++k)
2183 for (
size_t i=1; i<=ido-2; i+=2)
2185 CH(i ,k,j ) = C1(i ,k,j)-C1(i+1,k,jc);
2186 CH(i ,k,jc) = C1(i ,k,j)+C1(i+1,k,jc);
2187 CH(i+1,k,j ) = C1(i+1,k,j)+C1(i ,k,jc);
2188 CH(i+1,k,jc) = C1(i+1,k,j)-C1(i ,k,jc);
2193 for (
size_t j=1; j<ip; ++j)
2195 size_t is = (j-1)*(ido-1);
2196 for (
size_t k=0; k<l1; ++k)
2199 for (
size_t i=1; i<=ido-2; i+=2)
2201 T t1=CH(i,k,j), t2=CH(i+1,k,j);
2202 CH(i ,k,j) = wa[idij]*t1-wa[idij+1]*t2;
2203 CH(i+1,k,j) = wa[idij]*t2+wa[idij+1]*t1;
2210 template<
typename T>
void copy_and_norm(T *c, T *p1, T0 fct)
const
2215 for (
size_t i=0; i<length; ++i)
2218 std::copy_n (p1, length, c);
2222 for (
size_t i=0; i<length; ++i)
2227 template<
typename T>
void exec(T c[], T0 fct,
bool r2hc)
const
2229 if (length==1) { c[0]*=fct;
return; }
2230 size_t nf=fact.size();
2232 T *p1=c, *p2=ch.
data();
2235 for(
size_t k1=0, l1=length; k1<nf;++k1)
2238 size_t ip=fact[k].fct;
2239 size_t ido=length / l1;
2242 radf4(ido, l1, p1, p2, fact[k].tw);
2244 radf2(ido, l1, p1, p2, fact[k].tw);
2246 radf3(ido, l1, p1, p2, fact[k].tw);
2248 radf5(ido, l1, p1, p2, fact[k].tw);
2250 { radfg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws); std::swap (p1,p2); }
2254 for(
size_t k=0, l1=1; k<nf; k++)
2256 size_t ip = fact[k].fct,
2257 ido= length/(ip*l1);
2259 radb4(ido, l1, p1, p2, fact[k].tw);
2261 radb2(ido, l1, p1, p2, fact[k].tw);
2263 radb3(ido, l1, p1, p2, fact[k].tw);
2265 radb5(ido, l1, p1, p2, fact[k].tw);
2267 radbg(ido, ip, l1, p1, p2, fact[k].tw, fact[k].tws);
2272 copy_and_norm(c,p1,fct);
2280 { add_factor(4); len>>=2; }
2286 std::swap(fact[0].fct, fact.back().fct);
2288 for (
size_t divisor=3; divisor*divisor<=len; divisor+=2)
2289 while ((len%divisor)==0)
2291 add_factor(divisor);
2294 if (len>1) add_factor(len);
2297 size_t twsize()
const
2299 size_t twsz=0, l1=1;
2300 for (
size_t k=0; k<fact.size(); ++k)
2302 size_t ip=fact[k].fct, ido=length/(l1*ip);
2303 twsz+=(ip-1)*(ido-1);
2304 if (ip>5) twsz+=2*ip;
2312 sincos_2pibyn<T0> twid(length);
2315 for (
size_t k=0; k<fact.size(); ++k)
2317 size_t ip=fact[k].fct, ido=length/(l1*ip);
2318 if (k<fact.size()-1)
2320 fact[k].tw=ptr; ptr+=(ip-1)*(ido-1);
2321 for (
size_t j=1; j<ip; ++j)
2322 for (
size_t i=1; i<=(ido-1)/2; ++i)
2324 fact[k].tw[(j-1)*(ido-1)+2*i-2] = twid[j*l1*i].r;
2325 fact[k].tw[(j-1)*(ido-1)+2*i-1] = twid[j*l1*i].i;
2330 fact[k].tws=ptr; ptr+=2*ip;
2331 fact[k].tws[0] = 1.;
2332 fact[k].tws[1] = 0.;
2333 for (
size_t i=2, ic=2*ip-2; i<=ic; i+=2, ic-=2)
2335 fact[k].tws[i ] = twid[i/2*(length/ip)].r;
2336 fact[k].tws[i+1] = twid[i/2*(length/ip)].i;
2337 fact[k].tws[ic] = twid[i/2*(length/ip)].r;
2338 fact[k].tws[ic+1] = -twid[i/2*(length/ip)].i;
2349 if (length==0)
throw std::runtime_error(
"zero-length FFT requested");
2350 if (length==1)
return;
2352 mem.resize(twsize());