initial upload

This commit is contained in:
2025-12-17 10:53:43 +08:00
commit f3f1778f77
308 changed files with 129940 additions and 0 deletions

397
include/1d_models.h Normal file
View File

@@ -0,0 +1,397 @@
#ifndef ONE_D_MODELS_H
#define ONE_D_MODELS_H
#include <vector>
#include "config.h"
// ak135 http://rses.anu.edu.au/seismology/ak135/ak135t.html
// depth(km), P velocity(km/s)
inline const std::vector<std::vector<CUSTOMREAL>> model_1d_ak135 \
{{ 0.000, 5.8000},
{ 20.000, 5.8000},
{ 20.000, 6.5000},
{ 35.000, 6.5000},
{ 35.000, 8.0400},
{ 77.500, 8.0450},
{ 120.000, 8.0500},
{ 165.000, 8.1750},
{ 210.000, 8.3000},
{ 210.000, 8.3000},
{ 260.000, 8.4825},
{ 310.000, 8.6650},
{ 360.000, 8.8475},
{ 410.000, 9.0300},
{ 410.000, 9.3600},
{ 460.000, 9.5280},
{ 510.000, 9.6960},
{ 560.000, 9.8640},
{ 610.000, 10.0320},
{ 660.000, 10.2000},
{ 660.000, 10.7900},
{ 710.000, 10.9229},
{ 760.000, 11.0558},
{ 809.500, 11.1353},
{ 859.000, 11.2221},
{ 908.500, 11.3068},
{ 958.000, 11.3896},
{ 1007.500, 11.4705},
{ 1057.000, 11.5495},
{ 1106.500, 11.6269},
{ 1156.000, 11.7026},
{ 1205.500, 11.7766},
{ 1255.000, 11.8491},
{ 1304.500, 11.9200},
{ 1354.000, 11.9895},
{ 1403.500, 12.0577},
{ 1453.000, 12.1245},
{ 1502.500, 12.1912},
{ 1552.000, 12.2550},
{ 1601.500, 12.3185},
{ 1651.000, 12.3819},
{ 1700.500, 12.4426},
{ 1750.000, 12.5031},
{ 1799.500, 12.5631},
{ 1849.000, 12.6221},
{ 1898.500, 12.6804},
{ 1948.000, 12.7382},
{ 1997.500, 12.7956},
{ 2047.000, 12.8526},
{ 2096.500, 12.9096},
{ 2146.000, 12.9668},
{ 2195.500, 13.0222},
{ 2245.000, 13.0783},
{ 2294.500, 13.1336},
{ 2344.000, 13.1894},
{ 2393.500, 13.2465},
{ 2443.000, 13.3018},
{ 2492.500, 13.3585},
{ 2542.000, 13.4156},
{ 2591.500, 13.4741},
{ 2640.000, 13.5312},
{ 2690.000, 13.5900},
{ 2740.000, 13.6494},
{ 2740.000, 13.6494},
{ 2789.670, 13.6530},
{ 2839.330, 13.6566},
{ 2891.500, 13.6602},
{ 2891.500, 8.0000},
{ 2939.330, 8.0382},
{ 2989.660, 8.1283},
{ 3039.990, 8.2213},
{ 3090.320, 8.3122},
{ 3140.660, 8.4001},
{ 3190.990, 8.4861},
{ 3241.320, 8.5692},
{ 3291.650, 8.6496},
{ 3341.980, 8.7283},
{ 3392.310, 8.8036},
{ 3442.640, 8.8761},
{ 3492.970, 8.9461},
{ 3543.300, 9.0138},
{ 3593.640, 9.0792},
{ 3643.970, 9.1426},
{ 3694.300, 9.2042},
{ 3744.630, 9.2634},
{ 3794.960, 9.3205},
{ 3845.290, 9.3760},
{ 3895.620, 9.4297},
{ 3945.950, 9.4814},
{ 3996.280, 9.5306},
{ 4046.620, 9.5777},
{ 4096.950, 9.6232},
{ 4147.280, 9.6673},
{ 4197.610, 9.7100},
{ 4247.940, 9.7513},
{ 4298.270, 9.7914},
{ 4348.600, 9.8304},
{ 4398.930, 9.8682},
{ 4449.260, 9.9051},
{ 4499.600, 9.9410},
{ 4549.930, 9.9761},
{ 4600.260, 10.0103},
{ 4650.590, 10.0439},
{ 4700.920, 10.0768},
{ 4801.580, 10.1415},
{ 4851.910, 10.1739},
{ 4902.240, 10.2049},
{ 4952.580, 10.2329},
{ 5002.910, 10.2565},
{ 5053.240, 10.2745},
{ 5103.570, 10.2854},
{ 5153.500, 10.2890},
{ 5153.500, 11.0427},
{ 5204.610, 11.0585},
{ 5255.320, 11.0718},
{ 5306.040, 11.0850},
{ 5356.750, 11.0983},
{ 5407.460, 11.1166},
{ 5458.170, 11.1316},
{ 5508.890, 11.1457},
{ 5559.600, 11.1590},
{ 5610.310, 11.1715},
{ 5661.020, 11.1832},
{ 5711.740, 11.1941},
{ 5813.160, 11.2134},
{ 5863.870, 11.2219},
{ 5914.590, 11.2295},
{ 5965.300, 11.2364},
{ 6016.010, 11.2424},
{ 6066.720, 11.2477},
{ 6117.440, 11.2521},
{ 6168.150, 11.2557},
{ 6218.860, 11.2586},
{ 6269.570, 11.2606},
{ 6320.290, 11.2618},
{ 6371.000, 11.2622}};
// iasp91 model from http://ds.iris.edu/spud/earthmodel/9991809
inline const std::vector<std::vector<CUSTOMREAL>> model_1d_iasp91 \
{{0.00, 5.8000},
{1.00, 5.8000},
{2.00, 5.8000},
{3.00, 5.8000},
{4.00, 5.8000},
{5.00, 5.8000},
{6.00, 5.8000},
{7.00, 5.8000},
{8.00, 5.8000},
{9.00, 5.8000},
{10.00, 5.8000},
{11.00, 5.8000},
{12.00, 5.8000},
{13.00, 5.8000},
{14.00, 5.8000},
{15.00, 5.8000},
{16.00, 5.8000},
{17.00, 5.8000},
{18.00, 5.8000},
{19.00, 5.8000},
{20.00, 5.8000},
{20.00, 6.5000},
{21.00, 6.5000},
{22.00, 6.5000},
{23.00, 6.5000},
{24.00, 6.5000},
{25.00, 6.5000},
{26.00, 6.5000},
{27.00, 6.5000},
{28.00, 6.5000},
{29.00, 6.5000},
{30.00, 6.5000},
{31.00, 6.5000},
{32.00, 6.5000},
{33.00, 6.5000},
{34.00, 6.5000},
{35.00, 6.5000},
{35.00, 8.0400},
{40.00, 8.0406},
{45.00, 8.0412},
{50.00, 8.0418},
{60.00, 8.0429},
{70.00, 8.0441},
{80.00, 8.0453},
{90.00, 8.0465},
{100.00, 8.0476},
{110.00, 8.0488},
{120.00, 8.0500},
{120.00, 8.0500},
{130.00, 8.0778},
{140.00, 8.1056},
{150.00, 8.1333},
{160.00, 8.1611},
{170.00, 8.1889},
{180.00, 8.2167},
{190.00, 8.2444},
{200.00, 8.2722},
{210.00, 8.3000},
{210.00, 8.3000},
{220.00, 8.3365},
{230.00, 8.3730},
{240.00, 8.4095},
{250.00, 8.4460},
{260.00, 8.4825},
{270.00, 8.5190},
{280.00, 8.5555},
{290.00, 8.5920},
{300.00, 8.6285},
{310.00, 8.6650},
{320.00, 8.7015},
{330.00, 8.7380},
{340.00, 8.7745},
{350.00, 8.8110},
{360.00, 8.8475},
{370.00, 8.8840},
{380.00, 8.9205},
{390.00, 8.9570},
{400.00, 8.9935},
{410.00, 9.0300},
{410.00, 9.3600},
{420.00, 9.3936},
{430.00, 9.4272},
{440.00, 9.4608},
{450.00, 9.4944},
{460.00, 9.5280},
{470.00, 9.5616},
{480.00, 9.5952},
{490.00, 9.6288},
{500.00, 9.6624},
{510.00, 9.6960},
{520.00, 9.7296},
{530.00, 9.7632},
{540.00, 9.7968},
{550.00, 9.8304},
{560.00, 9.8640},
{570.00, 9.8976},
{580.00, 9.9312},
{590.00, 9.9648},
{600.00, 9.9984},
{610.00, 10.0320},
{620.00, 10.0656},
{630.00, 10.0992},
{640.00, 10.1328},
{650.00, 10.1664},
{660.00, 10.2000},
{660.00, 10.7900},
{670.00, 10.8166},
{680.00, 10.8432},
{690.00, 10.8697},
{700.00, 10.8963},
{710.00, 10.9229},
{720.00, 10.9495},
{730.00, 10.9761},
{740.00, 11.0026},
{750.00, 11.0292},
{760.00, 11.0558},
{760.00, 11.0558},
{770.00, 11.0738},
{780.00, 11.0917},
{790.00, 11.1095},
{800.00, 11.1272},
{900.00, 11.2997},
{1000.00, 11.4640},
{1100.00, 11.6208},
{1200.00, 11.7707},
{1300.00, 11.9142},
{1400.00, 12.0521},
{1500.00, 12.1849},
{2000.00, 12.7944},
{2500.00, 13.3697},
{2700.00, 13.6076},
{2740.00, 13.6564},
{2740.00, 13.6564},
{2750.00, 13.6587},
{2800.00, 13.6703},
{2850.00, 13.6818},
{2889.00, 13.6908},
{2889.00, 8.0088},
{2900.00, 8.0280},
{3000.00, 8.1995},
{3100.00, 8.3642},
{3200.00, 8.5222},
{3300.00, 8.6735},
{3400.00, 8.8180},
{3500.00, 8.9558},
{4000.00, 9.5437},
{4500.00, 9.9633},
{5153.90, 10.2578},
{5153.90, 11.0914},
{5500.00, 11.1644},
{6000.00, 11.2270},
{6371.00, 11.2409}};
// fortran test model
inline const std::vector<std::vector<CUSTOMREAL>> model_1d_prem \
= {{ 0.00, 5.80000},
{ 15.00, 5.80000},
{ 15.00, 6.80000},
{ 24.40, 6.80000},
{ 24.40, 8.11061},
{ 40.00, 8.10119},
{ 60.00, 8.08907},
{ 80.00, 8.07688},
{ 115.00, 8.05540},
{ 150.00, 8.03370},
{ 185.00, 8.01180},
{ 220.00, 7.98970},
{ 220.00, 8.55896},
{ 265.00, 8.64552},
{ 310.00, 8.73209},
{ 355.00, 8.81867},
{ 400.00, 8.90522},
{ 400.00, 9.13397},
{ 450.00, 9.38990},
{ 500.00, 9.64588},
{ 550.00, 9.90185},
{ 600.00, 10.15782},
{ 635.00, 10.21203},
{ 670.00, 10.26622},
{ 670.00, 10.75131},
{ 721.00, 10.91005},
{ 771.00, 11.06557},
{ 871.00, 11.24490},
{ 971.00, 11.41560},
{ 1071.00, 11.57828},
{ 1171.00, 11.73357},
{ 1271.00, 11.88209},
{ 1371.00, 12.02445},
{ 1471.00, 12.16126},
{ 1571.00, 12.29316},
{ 1671.00, 12.42075},
{ 1771.00, 12.54466},
{ 1871.00, 12.66550},
{ 1971.00, 12.78389},
{ 2071.00, 12.90045},
{ 2171.00, 13.01579},
{ 2271.00, 13.13055},
{ 2371.00, 13.24532},
{ 2471.00, 13.36074},
{ 2571.00, 13.47742},
{ 2671.00, 13.59597},
{ 2741.00, 13.68041},
{ 2771.00, 13.68753},
{ 2871.00, 13.71168},
{ 2891.00, 13.71660},
{ 2891.00, 8.06482},
{ 2971.00, 8.19939},
{ 3071.00, 8.36019},
{ 3171.00, 8.51298},
{ 3271.00, 8.65805},
{ 3371.00, 8.79573},
{ 3471.00, 8.92632},
{ 3571.00, 9.05015},
{ 3671.00, 9.16752},
{ 3771.00, 9.27867},
{ 3871.00, 9.38418},
{ 3971.00, 9.48409},
{ 4071.00, 9.57881},
{ 4171.00, 9.66865},
{ 4271.00, 9.75393},
{ 4371.00, 9.83496},
{ 4471.00, 9.91206},
{ 4571.00, 9.98554},
{ 4671.00, 10.05572},
{ 4771.00, 10.12291},
{ 4871.00, 10.18743},
{ 4971.00, 10.24959},
{ 5071.00, 10.30971},
{ 5149.50, 10.35568},
{ 5149.50, 11.02827},
{ 5171.00, 11.03643},
{ 5271.00, 11.07249},
{ 5371.00, 11.10542},
{ 5471.00, 11.13521},
{ 5571.00, 11.16186},
{ 5671.00, 11.18538},
{ 5771.00, 11.20576},
{ 5871.00, 11.22301},
{ 5971.00, 11.23712},
{ 6071.00, 11.24809},
{ 6171.00, 11.25593},
{ 6271.00, 11.26064},
{ 6371.00, 11.26220}};
#endif

332
include/config.h Normal file
View File

@@ -0,0 +1,332 @@
#ifndef CONFIG_H
#define CONFIG_H
#include <math.h>
#include <mpi.h>
#include <string>
#include <vector>
#include <limits>
#include <iostream>
#include "version.h"
// custom floating point accuracy
#ifdef SINGLE_PRECISION
#define CUSTOMREAL float
#define MPI_CR MPI_FLOAT
#else
#define CUSTOMREAL double
#define MPI_CR MPI_DOUBLE
#endif
#define MPI_DUMMY_TAG 1000
// version
inline const std::string TOMOATT_VERSION = std::to_string(PROJECT_VERSION_MAJOR) +
"." + std::to_string(PROJECT_VERSION_MINOR) +
"." + std::to_string(PROJECT_VERSION_PATCH);
inline int loc_I, loc_J, loc_K;
inline int loc_I_vis, loc_J_vis, loc_K_vis;
inline int loc_I_excl_ghost, loc_J_excl_ghost, loc_K_excl_ghost;
inline int n_inv_grids;
inline int n_inv_I_loc, n_inv_J_loc, n_inv_K_loc;
inline int n_inv_I_loc_ani, n_inv_J_loc_ani, n_inv_K_loc_ani;
// 3d indices to 1d index
#define I2V(A,B,C) ((C)*loc_I*loc_J + (B)*loc_I + A)
inline void V2I(const int& ijk, int& i, int& j, int& k) {
k = ijk / (loc_I * loc_J);
j = (ijk - k * loc_I * loc_J) / loc_I;
i = ijk - k * loc_I * loc_J - j * loc_I;
}
#define I2V_INV_GRIDS(A,B,C,D) ((D)*n_inv_I_loc*n_inv_J_loc*n_inv_K_loc + (C)*n_inv_I_loc*n_inv_J_loc + (B)*n_inv_I_loc + A)
#define I2V_INV_KNL(A,B,C) ((C)*n_inv_I_loc*n_inv_J_loc + (B)*n_inv_I_loc + A)
#define I2V_INV_GRIDS_1DK(A,B) ((B)*n_inv_K_loc + (A))
#define I2V_INV_GRIDS_2DJ(A,B,C) ((C)*n_inv_J_loc*n_inv_K_loc + (B)*n_inv_J_loc + (A))
#define I2V_INV_GRIDS_2DI(A,B,C) ((C)*n_inv_I_loc*n_inv_K_loc + (B)*n_inv_I_loc + (A))
#define I2V_INV_ANI_GRIDS(A,B,C,D) ((D)*n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani + (C)*n_inv_I_loc_ani*n_inv_J_loc_ani + (B)*n_inv_I_loc_ani + A)
#define I2V_INV_ANI_KNL(A,B,C) ((C)*n_inv_I_loc_ani*n_inv_J_loc_ani + (B)*n_inv_I_loc_ani + A)
#define I2V_INV_ANI_GRIDS_1DK(A,B) ((B)*n_inv_K_loc_ani + (A))
#define I2V_INV_ANI_GRIDS_1DJ(A,B,C) ((C)*n_inv_J_loc_ani*n_inv_K_loc_ani + (B)*n_inv_J_loc_ani + (A))
#define I2V_INV_ANI_GRIDS_1DI(A,B,C) ((C)*n_inv_I_loc_ani*n_inv_K_loc_ani + (B)*n_inv_I_loc_ani + (A))
#define I2V_INV_GRIDS_1D_GENERIC(A,B,C_ninv) ((B)*C_ninv + (A))
#define I2V_INV_GRIDS_2D_GENERIC(A,B,C,D_ninv,E_inv) ((C)*D_ninv*E_inv + (B)*D_ninv + (A))
#define I2V_EXCL_GHOST(A,B,C) ((C)* loc_I_excl_ghost * loc_J_excl_ghost + (B)* loc_I_excl_ghost + A)
#define I2V_VIS(A,B,C) ((C)* loc_I_vis * loc_J_vis + (B)* loc_I_vis + A)
#define I2V_3D(A,B,C) ((C)* loc_I_excl_ghost*loc_J_excl_ghost + (B)* loc_I_excl_ghost + A)
#define I2V_ELM_CONN(A,B,C) ((C)*(loc_I_vis-1)*(loc_J_vis-1) + (B)*(loc_I_vis-1) + A)
#define I2VLBFGS(A,B,C,D) ((A)*loc_I*loc_J*loc_K + (D)*loc_I*loc_J + (C)*loc_I + B)
// 2d indices to 1d index
#define IJ2V(A,B,C) (C*loc_I*loc_J + (B)*loc_I + A)
#define JK2V(A,B,C) (C*loc_J*loc_K + (B)*loc_J + A)
#define IK2V(A,B,C) (C*loc_I*loc_K + (B)*loc_I + A)
inline const CUSTOMREAL eps = 1e-12;
inline const CUSTOMREAL epsAdj = 1e-6;
inline bool isZero(CUSTOMREAL x) {
return fabs(x) < eps;
}
inline bool isNegative(CUSTOMREAL x) {
return (x) < -eps;
}
inline bool isPositive(CUSTOMREAL x) {
return (x) > eps;
}
inline bool isZeroAdj(CUSTOMREAL x) {
return fabs(x) < epsAdj;
}
// constants
inline const CUSTOMREAL PI = 3.14159265358979323846264338327950288;
inline const CUSTOMREAL DEG2RAD = PI/180.0;
inline const CUSTOMREAL RAD2DEG = 180.0/PI;
inline const CUSTOMREAL M2KM = 0.001;
inline const CUSTOMREAL _0_CR = 0.0;
inline const CUSTOMREAL _0_5_CR = 0.5;
inline const CUSTOMREAL _1_CR = 1.0;
inline const CUSTOMREAL _1_5_CR = 1.5;
inline const CUSTOMREAL _2_CR = 2.0;
inline const CUSTOMREAL _3_CR = 3.0;
inline const CUSTOMREAL _4_CR = 4.0;
inline const CUSTOMREAL _8_CR = 8.0;
inline const CUSTOMREAL TAU_INITIAL_VAL = 1.0;
inline const CUSTOMREAL TAU_INF_VAL = 20.0;
inline const int SWEEPING_COEFF = 1.0; // coefficient for calculationg sigr/sigt/sigp for cpu
// ASCII output precision
typedef std::numeric_limits< CUSTOMREAL > dbl;
inline const int ASCII_OUTPUT_PRECISION = dbl::max_digits10;
// radious of the earth in km
//inline CUSTOMREAL R_earth = 6378.1370;
inline CUSTOMREAL R_earth = 6371.0; // for compatibility with fortran code
inline const CUSTOMREAL GAMMA = 0.0;
inline const CUSTOMREAL r_kermel_mask = 40.0;
inline CUSTOMREAL step_length_init = 0.01; // update step size limit
inline CUSTOMREAL step_length_init_sc = 0.001; // update step size limit (for station correction)
inline CUSTOMREAL step_length_decay = 0.9;
inline CUSTOMREAL step_length_down = 0.5;
inline CUSTOMREAL step_length_up = 1.2;
inline CUSTOMREAL step_length_lbfgs;
inline int step_method = 1; // 0modulate according to obj, 1: modulate according to gradient direction.
inline CUSTOMREAL step_length_gradient_angle = 120.0;
inline const int OBJ_DEFINED = 0;
inline const int GRADIENT_DEFINED = 1;
// halve steping params
inline const CUSTOMREAL HALVE_STEP_RATIO = 0.7;
inline const CUSTOMREAL MAX_DIFF_RATIO_VOBJ = 0.8; // maximum difference ratio between vobj_t+1 and vobj_t
inline const CUSTOMREAL HALVE_STEP_RESTORAION_RATIO = 0.7; // no restoration if == HALVE_STEP_RATIO
// RUN MODE TYPE FLAG
inline const int ONLY_FORWARD = 0;
inline const int DO_INVERSION = 1;
inline const int SRC_RELOCATION = 2;
inline const int INV_RELOC = 3;
inline const int ONED_INVERSION = 4;
inline const int TELESEIS_PREPROCESS = -1; // hiden function
// SWEEPING TYPE FLAG
inline const int SWEEP_TYPE_LEGACY = 0;
inline const int SWEEP_TYPE_LEVEL = 1;
inline bool hybrid_stencil_order = false; // if true, code at first run 1st order, then change to 3rd order (Dong Cui 2020)
// STENCIL TYPE
inline const int NON_UPWIND = 0;
inline const int UPWIND = 1;
// convert depth <-> radius
inline CUSTOMREAL depth2radius(CUSTOMREAL depth) {
return R_earth - depth;
}
inline CUSTOMREAL radius2depth(CUSTOMREAL radius) {
return R_earth - radius;
}
// parallel calculation parameters
// here we use so called "inline variables" for defining global variables
inline int ngrid_i = 0; // number of grid points in i direction
inline int ngrid_j = 0; // number of grid points in j direction
inline int ngrid_k = 0; // number of grid points in k direction
//inline int ngrid_i_inv = 0; // number of inversion grid points in i direction # DUPLICATED with n_inv_I_loc as all the subdomains have the same number of inversion grid points
//inline int ngrid_j_inv = 0; // number of inversion grid points in j direction # DUPLICATED with n_inv_J_loc
//inline int ngrid_k_inv = 0; // number of inversion grid points in k direction # DUPLICATED with n_inv_K_loc
//inline int ngrid_i_inv_ani = 0; // number of inversion grid points in i direction for anisotropy # DUPLICATED with n_inv_I_loc_ani
//inline int ngrid_j_inv_ani = 0; // number of inversion grid points in j direction for anisotropy # DUPLICATED with n_inv_J_loc_ani
//inline int ngrid_k_inv_ani = 0; // number of inversion grid points in k direction for anisotropy # DUPLICATED with n_inv_K_loc_ani
// inversion grid type flags
#define INV_GRID_REGULAR 0
#define INV_GRID_FLEX 1
#define INV_GRID_TRAPE 2
// mpi parameters
inline int world_nprocs; // total number of processes (all groups)
inline int world_rank; // mpi rank of this process (all groups)
inline int sim_nprocs; // number of processes in this simulation group
inline int sim_rank; // mpi rank of this process in this simulation group
inline int inter_sim_rank;
inline int sub_nprocs; // total number of processes
inline int sub_rank; // mpi rank of this process
inline int inter_sub_rank; // mpi rank of this process in the inter-subdomain communicator
inline int inter_sub_nprocs; // number of processes in the inter-subdomain communicator
inline int nprocs; // = n subdomains
inline int myrank; // = id subdomain
inline MPI_Comm sim_comm, inter_sim_comm, sub_comm, inter_sub_comm; // mpi communicator for simulation, inter-simulation, subdomain, and inter subdomains
inline int n_sims = 1; // number of mpi groups for simultaneous runs
inline int n_procs_each_sim = 1; // number of processes in each simulation group
inline int n_subdomains = 1; // number of subdomains
inline int n_subprocs = 1; // number of sub processes in each subdomain
inline int ndiv_i = 1; // number of divisions in x direction
inline int ndiv_j = 1; // number of divisions in y direction
inline int ndiv_k = 1; // number of divisions in z direction
inline int id_sim = 0; // simultaneous run id (not equal to src id) (parallel level 1)
inline int id_subdomain = 0; // subdomain id (parallel level 2)
inline bool subdom_main = false; // true if this process is main process in subdomain (parallel level 3)
// flags for explaining the process's role
inline bool proc_read_srcrec = false; // true if this process is reading source file
inline bool proc_store_srcrec = false; // true if this process is storing srcrec file
// MNMN stop using these global variable for avoiding misleadings during the sources' iteration loop
//inline int id_sim_src = 0; // id of current target source
//inline std::string name_sim_src = "unknown"; //name of current target source
// rule to distribute source to simultaneous group
inline int select_id_sim_for_src(const int& i_src, const int& n_sims){
return i_src % n_sims;
}
// read input yaml file
inline std::string input_file="input_params.yaml";
// output directory name
inline std::string output_dir="./OUTPUT_FILES/";
// output file format
#define OUTPUT_FORMAT_HDF5 11111
#define OUTPUT_FORMAT_ASCII 22222
//#define OUTPUT_FORMAT_BINARY 33333 //#TODO: add
inline int output_format = OUTPUT_FORMAT_HDF5; // 0 - ascii, 1 - hdf5, 2 - binary
// HDF5 io mode (independent or collective)
#ifdef USE_HDF5_IO_COLLECTIVE
#define HDF5_IO_MODE H5FD_MPIO_COLLECTIVE
#else
#define HDF5_IO_MODE H5FD_MPIO_INDEPENDENT
#endif
// smooth parameters
inline int smooth_method = 0; // 0: multi grid parametrization, 1: laplacian smoothing
inline CUSTOMREAL smooth_lp = 1.0;
inline CUSTOMREAL smooth_lt = 1.0;
inline CUSTOMREAL smooth_lr = 1.0;
inline CUSTOMREAL regul_lp = 1.0;
inline CUSTOMREAL regul_lt = 1.0;
inline CUSTOMREAL regul_lr = 1.0;
inline const int GRADIENT_DESCENT = 0;
inline const int HALVE_STEPPING_MODE = 1;
inline const int LBFGS_MODE = 2;
inline int optim_method = 0; // 0: gradient descent, 1: halve_stepping, 2: LBFGS
inline const CUSTOMREAL wolfe_c1 = 1e-4;
inline const CUSTOMREAL wolfe_c2 = 0.9;
inline const int Mbfgs = 5; // number of gradients/models stored in memory
inline CUSTOMREAL regularization_weight = 0.5; // regularization weight
inline int max_sub_iterations = 20; // maximum number of sub-iterations
inline const CUSTOMREAL LBFGS_RELATIVE_step_length = 3.0; // relative step size for the second and later iteration
inline CUSTOMREAL Kdensity_coe = 0.0; // a preconditioner \in [0,1], kernel -> kernel / pow(kernel density, Kdensity_coe)
// variables for test
inline bool if_test = false;
// verboose mode
inline bool if_verbose = false;
//inline bool if_verbose = true;
// if use gpu
inline bool use_gpu;
// total number of sources in the srcrec file
inline int nsrc_total = 0;
// flag if common receiver double difference data is used
inline bool src_pair_exists = false;
// weight of different typs of data
inline CUSTOMREAL abs_time_local_weight = 1.0; // weight of absolute traveltime data for local earthquake, default: 1.0
inline CUSTOMREAL cr_dif_time_local_weight = 1.0; // weight of common receiver differential traveltime data for local earthquake, default: 1.0
inline CUSTOMREAL cs_dif_time_local_weight = 1.0; // weight of common source differential traveltime data for local earthquake, default: 1.0 (not ready)
inline CUSTOMREAL teleseismic_weight = 1.0; // weight of teleseismic data default: 1.0 (not ready)
inline CUSTOMREAL abs_time_local_weight_reloc = 1.0; // weight of absolute traveltime data for local earthquake, default: 1.0
inline CUSTOMREAL cs_dif_time_local_weight_reloc = 1.0; // weight of common source differential traveltime data for local earthquake, default: 1.0
inline CUSTOMREAL cr_dif_time_local_weight_reloc = 1.0; // weight of common receiver differential traveltime data for local earthquake, default: 1.0
inline CUSTOMREAL teleseismic_weight_reloc = 1.0; // weight of teleseismic data default: 1.0 (not ready)
// misfit balance
inline bool balance_data_weight = false; // add the weight to normalize the initial objective function of different types of data. 1 for yes and 0 for no
inline CUSTOMREAL total_abs_local_data_weight = 0.0;
inline CUSTOMREAL total_cr_dif_local_data_weight = 0.0;
inline CUSTOMREAL total_cs_dif_local_data_weight = 0.0;
inline CUSTOMREAL total_teleseismic_data_weight = 0.0;
inline bool balance_data_weight_reloc = false; // add the weight to normalize the initial objective function of different types of data for relocation. 1 for yes and 0 for no
inline CUSTOMREAL total_abs_local_data_weight_reloc = 0.0;
inline CUSTOMREAL total_cr_dif_local_data_weight_reloc = 0.0;
inline CUSTOMREAL total_cs_dif_local_data_weight_reloc = 0.0;
inline CUSTOMREAL total_teleseismic_data_weight_reloc = 0.0;
// 2d solver parameters for teleseismic data
// use fixed domain size for all 2d simulations in teleseismic data
inline CUSTOMREAL rmin_2d = 3370.5;
inline CUSTOMREAL rmax_2d = 6471.5;
inline CUSTOMREAL tmin_2d = -5.0/180.0*PI;
inline CUSTOMREAL tmax_2d = 120.0/180.0*PI;
inline CUSTOMREAL dr_2d = 2.0;
inline CUSTOMREAL dt_2d = PI/1000.0;
inline const CUSTOMREAL TOL_2D_SOLVER = 1e-5;
inline const CUSTOMREAL MAX_ITER_2D_SOLVER = 4000;
inline const CUSTOMREAL SWEEPING_COEFF_TELE = 1.05; // coefficient for calculationg sigr/sigt/sigp
inline const int N_LAYER_SRC_BOUND = 1; // number of layers for source boundary
inline CUSTOMREAL DIST_SRC_DDT = 2.5*DEG2RAD; // distance threshold of two stations
inline const std::string OUTPUT_DIR_2D = "/2D_TRAVEL_TIME_FIELD/"; // output directory for 2d solver
// earthquake relocation
inline CUSTOMREAL step_length_src_reloc = 0.01; // step length for source relocation
inline CUSTOMREAL step_length_decay_src_reloc = 0.9;
inline int N_ITER_MAX_SRC_RELOC = 501; // max iteration for source location
inline CUSTOMREAL TOL_SRC_RELOC = 1e-3; // threshold of the norm of gradient for stopping single earthquake location
inline const CUSTOMREAL TOL_step_length = 1e-4; // threshold of the max step size for stopping single earthquake location
inline CUSTOMREAL rescaling_dep = 10.0;
inline CUSTOMREAL rescaling_lat = 5.0;
inline CUSTOMREAL rescaling_lon = 5.0;
inline CUSTOMREAL rescaling_ortime = 0.5;
inline CUSTOMREAL max_change_dep = 10.0; // default: max change is 10 km
inline CUSTOMREAL max_change_lat = 5.0; // default: max change is 5 km
inline CUSTOMREAL max_change_lon = 5.0; // default: max change is 5 km
inline CUSTOMREAL max_change_ortime = 0.5; // default: max change is 0.5 s
inline bool ortime_local_search = true;
inline int min_Ndata_reloc = 4; // if an earthquake is recorded by less than <min_Ndata> times, relocation is not allowed.
// inversion strategy parameters
inline int inv_mode = 1;
inline int model_update_N_iter = 1;
inline int relocation_N_iter = 1;
inline int max_loop_mode0 = 10;
inline int max_loop_mode1 = 10;
// INV MODE TYPE FLAG
inline const int ITERATIVE = 0;
inline const int SIMULTANEOUS = 1;
// source receiver weight calculation
inline CUSTOMREAL ref_value = 1.0; // reference value for source receiver weight calculation
inline std::string output_file_weight = "srcrec_weight.txt"; // output file name for source receiver weight calculation
#endif // CONFIG_H

View File

@@ -0,0 +1,79 @@
#ifndef EIKONAL_SOLVER_2D_H
#define EIKONAL_SOLVER_2D_H
#include <fstream>
#include <string>
#include <iostream>
#include "utils.h"
#include "input_params.h"
#include "grid.h"
#include "1d_models.h"
#include "mpi_funcs.h"
#include "io.h"
class PlainGrid {
public:
PlainGrid(Source& src, InputParams& IP);
~PlainGrid();
void run_iteration(InputParams& IP);
void run_iteration_upwind(InputParams& IP);
//private:
// grid parameters
CUSTOMREAL src_r, src_t, src_p;
CUSTOMREAL src_t_dummy;
CUSTOMREAL rmin_3d, rmax_3d, tmin_3d, tmax_3d, pmin_3d, pmax_3d;
CUSTOMREAL max_degree;
int nr_2d, nt_2d;
// for eikonal solver
int count_cand;
int ii,ii_nr,ii_n2r,ii_pr,ii_p2r,ii_nt,ii_n2t,ii_pt,ii_p2t;
CUSTOMREAL ar,br,at,bt,ar1,ar2,at1,at2,br1,br2,bt1,bt2;
CUSTOMREAL eqn_a, eqn_b, eqn_c, eqn_Delta;
CUSTOMREAL tmp_tau, T_r, T_t, charact_r, charact_t;
bool is_causality;
std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);
// arrays
// azimuth
CUSTOMREAL *azimuth_2d;
// epicentral distance
CUSTOMREAL *epicentral_distance_2d;
// activated boundaries
//bool *activated_boundaries;
// grid
CUSTOMREAL *r_2d, *t_2d; // coordinates
CUSTOMREAL *fun_2d, *fac_a_2d, *fac_b_2d, *u_2d, *T_2d; // functions
CUSTOMREAL *T0v_2d, *T0r_2d, *T0t_2d, *tau_2d, *tau_old_2d;
bool *is_changed_2d;
// member functions
void allocate_arrays();
void deallocate_arrays();
void load_1d_model(std::string);
void select_1d_model(std::string);
void calculate_stencil_2d(int&, int&);
void calculate_stencil_upwind_2d(int&, int&);
CUSTOMREAL eps_2d = 1e-12;
private:
// # TODO: make id model type selectable by input file
// 1D model used for 2D solver
std::vector<std::vector<CUSTOMREAL>> model_1d_ref = model_1d_prem;
};
void prepare_teleseismic_boundary_conditions(InputParams&, Grid&, IO_utils&);
void run_2d_solver(InputParams&, Source&, IO_utils&);
void interp2d(PlainGrid&, CUSTOMREAL, CUSTOMREAL, CUSTOMREAL&);
void load_2d_traveltime(InputParams&, Source&, Grid&, IO_utils&);
std::string get_2d_tt_filename(const std::string&, Source&);
#endif // EIKONAL_SOLVER_2D_H

2
include/grid.fwd.h Normal file
View File

@@ -0,0 +1,2 @@
#pragma once
class Grid;

445
include/grid.h Normal file
View File

@@ -0,0 +1,445 @@
#ifndef GRID_H
#define GRID_H
// to handle circular dependency
#pragma once
#include "grid.fwd.h"
#include "source.fwd.h"
#include "io.fwd.h"
#include <vector>
#include <array>
#include <algorithm>
#include <cstring>
#include <iomanip>
#include "utils.h"
#include "config.h"
#include "mpi_funcs.h"
#include "input_params.h"
#include "source.h"
#include "io.h"
#include "inv_grid.h"
#ifdef USE_CUDA
#include <cuda_runtime.h>
#include <cuda.h>
#endif
class Grid {
public:
Grid(InputParams&, IO_utils&);
~Grid();
//
// setter
//
// set physical parameters
void setup_grid_params(InputParams&, IO_utils&); // setup grid parameters
// set factors
void setup_factors(Source &);
// calculate initial fields T0 T0r T0t T0p and initialize tau
void initialize_fields(Source &, InputParams&);
// calculate initial fields T0 T0r T0t T0p and initialize tau for teleseismic source
void initialize_fields_teleseismic();
// calculate L1 and Linf diff (sum of value change on the nodes)
void calc_L1_and_Linf_diff(CUSTOMREAL&, CUSTOMREAL&);
// calculate L1 and Linf diff for teleseismic source (sum of value change on the nodes)
void calc_L1_and_Linf_diff_tele(CUSTOMREAL&, CUSTOMREAL&);
// FOR TEST: calculate L1 and Linf error between analytic solution and current T
void calc_L1_and_Linf_error(CUSTOMREAL&, CUSTOMREAL&);
// calculate L1 and Linf diff for adjoint field
void calc_L1_and_Linf_diff_adj(CUSTOMREAL&, CUSTOMREAL&);
// calculate L1 and Linf diff for density of adjoint field
void calc_L1_and_Linf_diff_adj_density(CUSTOMREAL&, CUSTOMREAL&);
// send and receive data to/from other subdomains
void send_recev_boundary_data(CUSTOMREAL*);
void prepare_boundary_data_to_send(CUSTOMREAL*);
void assign_received_data_to_ghost(CUSTOMREAL*);
void send_recev_boundary_data_av(CUSTOMREAL*);
void assign_received_data_to_ghost_av(CUSTOMREAL*);
// send and receive data to/from the neighbor subdomains which contact on a line or point
void send_recev_boundary_data_kosumi(CUSTOMREAL*);
void prepare_boundary_data_to_send_kosumi(CUSTOMREAL*);
void assign_received_data_to_ghost_kosumi(CUSTOMREAL*);
// inversion methods
InvGrid* inv_grid; // inversion grid definitions
// check model discontinuity
void check_velocity_discontinuity();
void reinitialize_abcf(); // reinitialize factors
void rejuvenate_abcf(); // reinitialize factors for earthquake relocation
void initialize_kernels(); // fill 0 to kernels
//
// getters
//
int get_ngrid_total_excl_ghost(){return loc_nnodes;};
int get_nelms_total_excl_ghost(){return loc_nelms;};
int get_offset_nnodes() {return offset_nnodes;};
int get_offset_elms() {return offset_nelms;};
void get_offsets_3d(int* arr) {
arr[0] = offset_k;
arr[1] = offset_j;
arr[2] = offset_i;
};
int get_ngrid_total_vis() {return loc_nnodes_vis;};
int get_nelms_total_vis() {return loc_nelms_vis;};
int get_offset_nnodes_vis(){return offset_nnodes_vis;};
int get_offset_elms_vis() {return offset_nelms_vis;};
// return dimensions of node coordinates arra and connectivity array
std::array<int,2> get_dim_nodes_coords() {return {offset_nnodes,3};};
std::array<int,2> get_dim_elms_conn() {return {offset_nelms,9};};
// return node coordinates array and connectivity array
int* get_elms_conn() {return elms_conn;};
CUSTOMREAL* get_nodes_coords_x(){return x_loc_3d;}; // lon
CUSTOMREAL* get_nodes_coords_y(){return y_loc_3d;}; // lat
CUSTOMREAL* get_nodes_coords_z(){return z_loc_3d;}; // r
CUSTOMREAL* get_nodes_coords_p(){return p_loc_3d;}; // lon
CUSTOMREAL* get_nodes_coords_t(){return t_loc_3d;}; // lat
CUSTOMREAL* get_nodes_coords_r(){return r_loc_3d;}; // r
int* get_proc_dump() {return my_proc_dump;}; // array storing the process ids
CUSTOMREAL* get_true_solution(){return get_array_for_vis(u_loc, false);}; // true solution
CUSTOMREAL* get_fun() {return get_array_for_vis(fun_loc, false);}; //
CUSTOMREAL* get_xi() {return get_array_for_vis(xi_loc, false);}; //
CUSTOMREAL* get_eta() {return get_array_for_vis(eta_loc, false);}; //
CUSTOMREAL* get_a() {return get_array_for_vis(fac_a_loc, false);}; //
CUSTOMREAL* get_b() {return get_array_for_vis(fac_b_loc, false);}; //
CUSTOMREAL* get_c() {return get_array_for_vis(fac_c_loc, false);}; //
CUSTOMREAL* get_f() {return get_array_for_vis(fac_f_loc, false);}; //
CUSTOMREAL* get_vel() {return get_array_for_vis(fun_loc, true);}; // true velocity field
CUSTOMREAL* get_T0v() {return get_array_for_vis(T0v_loc, false);}; // initial T0
CUSTOMREAL* get_u() {return get_array_for_vis(u_loc, false);}; // current solution
CUSTOMREAL* get_tau() {return get_array_for_vis(tau_loc, false);}; // current tau
CUSTOMREAL* get_T() {return get_array_for_vis(T_loc, false);};
CUSTOMREAL* get_residual() { calc_residual();
return get_array_for_vis(u_loc, false);}; // calculate residual (T_loc over written!!)
CUSTOMREAL* get_Tadj() {return get_array_for_vis(Tadj_loc, false);}; // adjoint solution
CUSTOMREAL* get_Ks() {return get_array_for_vis(Ks_loc, false);}; // Ks
CUSTOMREAL* get_Kxi() {return get_array_for_vis(Kxi_loc, false);}; // Kxi
CUSTOMREAL* get_Keta() {return get_array_for_vis(Keta_loc, false);}; // Keta
CUSTOMREAL* get_Ks_density() {return get_array_for_vis(Ks_density_loc, false);}; // Ks_density
CUSTOMREAL* get_Kxi_density() {return get_array_for_vis(Kxi_density_loc, false);}; // Kxi_density
CUSTOMREAL* get_Keta_density() {return get_array_for_vis(Keta_density_loc, false);}; // Keta_density
CUSTOMREAL* get_Ks_update() {return get_array_for_vis(Ks_update_loc, false);}; // Ks_update
CUSTOMREAL* get_Kxi_update() {return get_array_for_vis(Kxi_update_loc, false);}; // Kxi_update
CUSTOMREAL* get_Keta_update() {return get_array_for_vis(Keta_update_loc, false);}; // Keta_update
CUSTOMREAL* get_Ks_density_update() {return get_array_for_vis(Ks_density_update_loc,false);}; // Ks_density_update
CUSTOMREAL* get_Kxi_density_update() {return get_array_for_vis(Kxi_density_update_loc,false);}; // Kxi_density_update
CUSTOMREAL* get_Keta_density_update() {return get_array_for_vis(Keta_density_update_loc,false);}; // Keta_density_update
CUSTOMREAL* get_Ks_descent_dir() {return get_array_for_vis(Ks_descent_dir_loc, false);}; // Ks_descent_dir
CUSTOMREAL* get_Kxi_descent_dir(){return get_array_for_vis(Kxi_descent_dir_loc,false);}; // Kxi_descent_dir
CUSTOMREAL* get_Keta_descent_dir(){return get_array_for_vis(Keta_descent_dir_loc,false);}; // Keta_descent_dir
// get physical parameters
CUSTOMREAL get_r_min() {return r_min;};
CUSTOMREAL get_r_max() {return r_max;};
CUSTOMREAL get_lat_min() {return lat_min;};
CUSTOMREAL get_lat_max() {return lat_max;};
CUSTOMREAL get_lon_min() {return lon_min;};
CUSTOMREAL get_lon_max() {return lon_max;};
CUSTOMREAL get_delta_lon() {return dlon;};
CUSTOMREAL get_delta_lat() {return dlat;};
CUSTOMREAL get_delta_r() {return dr;};
CUSTOMREAL get_r_min_loc_excl_gl() {return r_loc_1d[k_start_loc];};
CUSTOMREAL get_r_max_loc_excl_gl() {return r_loc_1d[k_end_loc];};
CUSTOMREAL get_lat_min_loc_excl_gl() {return t_loc_1d[j_start_loc];};
CUSTOMREAL get_lat_max_loc_excl_gl() {return t_loc_1d[j_end_loc];};
CUSTOMREAL get_lon_min_loc_excl_gl() {return p_loc_1d[i_start_loc];};
CUSTOMREAL get_lon_max_loc_excl_gl() {return p_loc_1d[i_end_loc];};
CUSTOMREAL get_r_min_loc() {return r_loc_1d[0];};
CUSTOMREAL get_r_max_loc() {return r_loc_1d[loc_K-1];};
CUSTOMREAL get_lat_min_loc() {return t_loc_1d[0];};
CUSTOMREAL get_lat_max_loc() {return t_loc_1d[loc_J-1];};
CUSTOMREAL get_lon_min_loc() {return p_loc_1d[0];};
CUSTOMREAL get_lon_max_loc() {return p_loc_1d[loc_I-1];};
CUSTOMREAL get_r_by_index(int i) {return r_loc_1d[i];};
CUSTOMREAL get_lat_by_index(int j) {return t_loc_1d[j];};
CUSTOMREAL get_lon_by_index(int i) {return p_loc_1d[i];};
int get_offset_i() {return offset_i-i_start_loc;}; // return offset index (global index of the first node)
int get_offset_j() {return offset_j-j_start_loc;};
int get_offset_k() {return offset_k-k_start_loc;};
int get_offset_i_excl_gl() {return offset_i;}; // return offset index
int get_offset_j_excl_gl() {return offset_j;};
int get_offset_k_excl_gl() {return offset_k;};
// return index of the first node excluding ghost layer
int get_k_start_loc() {return k_start_loc;};
int get_k_end_loc() {return k_end_loc;};
int get_j_start_loc() {return j_start_loc;};
int get_j_end_loc() {return j_end_loc;};
int get_i_start_loc() {return i_start_loc;};
int get_i_end_loc() {return i_end_loc;};
// copy tau to tau old
void tau2tau_old();
// copy T to tau old
void T2tau_old();
// copy tau to Tadj
void update_Tadj();
// copy tau to Tadj
void update_Tadj_density();
// back up fun xi eta
void back_up_fun_xi_eta_bcf();
// restore fun xi eta
void restore_fun_xi_eta_bcf();
// write out inversion grid file
void write_inversion_grid_file();
private:
//
// member variables
//
// i: longitude, phi,
// j: latitude, theta,
// k: radius, r
// starting node id for each direction expect the boundary
int i_start_loc=0, j_start_loc=0, k_start_loc=0;
// end node id for each direction expect the boundary
int i_end_loc=0, j_end_loc=0, k_end_loc=0;
// neighbors domain_id (-1 if no neighbor)
// order of directions: -i,+i,-j,+j,-k,+k
std::vector<int> neighbors_id{-1,-1,-1,-1,-1,-1};
// neighbors domain_id line and point contact
// ij plane nn, np, pn, pp
std::vector<int> neighbors_id_ij{-1,-1,-1,-1};
// jk plane nn, np, pn, pp
std::vector<int> neighbors_id_jk{-1,-1,-1,-1};
// ik plane nn, np, pn, pp
std::vector<int> neighbors_id_ik{-1,-1,-1,-1};
// ijk plane nnn, nnp, npn, npp, pnn, pnp, ppn, ppp
std::vector<int> neighbors_id_ijk{-1,-1,-1,-1,-1,-1,-1,-1};
// layer id of this domain
int domain_i, domain_j, domain_k;
// number of points on each boundary surface
int n_grid_bound_i, n_grid_bound_j, n_grid_bound_k;
// mpi parameters
int offset_nnodes=0, offset_nelms=0;
int offset_i=0, offset_j=0, offset_k=0; // offset of this domain in the global grid
// number of nodes and elements on each subdomain
int loc_nnodes, loc_nelms;
int *nnodes, *nelms;
// arrays for visualization
//int loc_I_vis, loc_J_vis, loc_K_vis;
int i_start_vis, j_start_vis, k_start_vis;
int i_end_vis, j_end_vis, k_end_vis;
int loc_nnodes_vis, loc_nelms_vis;
int *nnodes_vis, *nelms_vis;
int offset_nnodes_vis=0, offset_nelms_vis=0;
public:
// 3d arrays
CUSTOMREAL *xi_loc; // local xi
CUSTOMREAL *eta_loc; // local eta
CUSTOMREAL *zeta_loc; // local zeta
CUSTOMREAL *fac_a_loc; // factor a
CUSTOMREAL *fac_b_loc; // factor b
CUSTOMREAL *fac_c_loc; // factor c
CUSTOMREAL *fac_f_loc; // factor f
CUSTOMREAL *fun_loc;
CUSTOMREAL *T_loc;
CUSTOMREAL *T0v_loc, *T0r_loc, *T0p_loc, *T0t_loc;
CUSTOMREAL *tau_loc;
CUSTOMREAL *tau_old_loc;
bool *is_changed;
// for inversion backup
CUSTOMREAL *fun_loc_back;
CUSTOMREAL *xi_loc_back;
CUSTOMREAL *eta_loc_back;
CUSTOMREAL *fac_b_loc_back;
CUSTOMREAL *fac_c_loc_back;
CUSTOMREAL *fac_f_loc_back;
// for lbfgs
CUSTOMREAL *Ks_grad_store_loc, *Keta_grad_store_loc, *Kxi_grad_store_loc;
CUSTOMREAL *Ks_model_store_loc, *Keta_model_store_loc, *Kxi_model_store_loc;
CUSTOMREAL *Ks_descent_dir_loc, *Keta_descent_dir_loc, *Kxi_descent_dir_loc;
CUSTOMREAL *fun_regularization_penalty_loc, *eta_regularization_penalty_loc, *xi_regularization_penalty_loc;
CUSTOMREAL *fun_gradient_regularization_penalty_loc, *eta_gradient_regularization_penalty_loc, *xi_gradient_regularization_penalty_loc;
CUSTOMREAL *fun_prior_loc, *eta_prior_loc, *xi_prior_loc; // *zeta_prior_loc; TODO
// tmp array for file IO
CUSTOMREAL *vis_data;
private:
// windows for shm arrays
MPI_Win win_fac_a_loc, win_fac_b_loc, win_fac_c_loc, win_fac_f_loc;
MPI_Win win_T0r_loc, win_T0p_loc, win_T0t_loc, win_T0v_loc;
MPI_Win win_tau_loc, win_fun_loc;
MPI_Win win_is_changed;
MPI_Win win_T_loc, win_tau_old_loc;
MPI_Win win_xi_loc, win_eta_loc, win_zeta_loc;
MPI_Win win_r_loc_1d, win_t_loc_1d, win_p_loc_1d;
CUSTOMREAL *x_loc_3d; // local (lon) x (global position)
CUSTOMREAL *y_loc_3d; // local (lat) y (global position)
CUSTOMREAL *z_loc_3d; // local (r ) z (global position)
CUSTOMREAL *p_loc_3d; // local lon (x) (global position)
CUSTOMREAL *t_loc_3d; // local lat (y) (global position)
CUSTOMREAL *r_loc_3d; // local r (z) (global position)
int *elms_conn; // connectivity array
int *my_proc_dump; // dump process id for each node DEBUG
public:
// 1d arrays for coordinates, storing only subdomain's local coordinates
CUSTOMREAL *r_loc_1d; // radius z in kilo meter
CUSTOMREAL *t_loc_1d; // theta lat y in radian
CUSTOMREAL *p_loc_1d; // phi lon x in radian
private:
// arrays for inversion
// TODO: check if those arrays can use shm
bool inverse_flag = false;
//int n_inv_grids; // in config.h
//int n_inv_I_loc, n_inv_J_loc, n_inv_K_loc; // in config.h
public:
CUSTOMREAL *Ks_loc;
CUSTOMREAL *Kxi_loc;
CUSTOMREAL *Keta_loc;
CUSTOMREAL *Ks_density_loc;
CUSTOMREAL *Kxi_density_loc;
CUSTOMREAL *Keta_density_loc;
CUSTOMREAL *Tadj_loc; // timetable for adjoint source
CUSTOMREAL *Tadj_density_loc; // timetable for density of adjoint source
CUSTOMREAL *Ks_inv_loc;
CUSTOMREAL *Kxi_inv_loc;
CUSTOMREAL *Keta_inv_loc;
CUSTOMREAL *Ks_density_inv_loc;
CUSTOMREAL *Kxi_density_inv_loc;
CUSTOMREAL *Keta_density_inv_loc;
// model update para
CUSTOMREAL *Ks_update_loc;
CUSTOMREAL *Kxi_update_loc;
CUSTOMREAL *Keta_update_loc;
CUSTOMREAL *Ks_density_update_loc;
CUSTOMREAL *Kxi_density_update_loc;
CUSTOMREAL *Keta_density_update_loc;
// model update para of the previous step
CUSTOMREAL *Ks_update_loc_previous;
CUSTOMREAL *Kxi_update_loc_previous;
CUSTOMREAL *Keta_update_loc_previous;
private:
MPI_Win win_Tadj_loc;
MPI_Win win_Tadj_density_loc;
// boundary layer for mpi communication
// storing not the value but the pointers for the elements of arrays
// to send
CUSTOMREAL *bin_s; // boundary i axis negative
CUSTOMREAL *bip_s; // boundary i axis positive
CUSTOMREAL *bjn_s; // boundary j axis negative
CUSTOMREAL *bjp_s; // boundary j axis positive
CUSTOMREAL *bkn_s; // boundary k axis negative
CUSTOMREAL *bkp_s; // boundary k axis positive
// to receive
CUSTOMREAL *bin_r; // boundary i axis negative
CUSTOMREAL *bip_r; // boundary i axis positive
CUSTOMREAL *bjn_r; // boundary j axis negative
CUSTOMREAL *bjp_r; // boundary j axis positive
CUSTOMREAL *bkn_r; // boundary k axis negative
CUSTOMREAL *bkp_r; // boundary k axis positive
// kosumi boundaries (touching on lines and points)
CUSTOMREAL *bij_pp_s, *bij_pp_r;
CUSTOMREAL *bij_pn_s, *bij_pn_r;
CUSTOMREAL *bij_np_s, *bij_np_r;
CUSTOMREAL *bij_nn_s, *bij_nn_r;
CUSTOMREAL *bjk_pp_s, *bjk_pp_r;
CUSTOMREAL *bjk_pn_s, *bjk_pn_r;
CUSTOMREAL *bjk_np_s, *bjk_np_r;
CUSTOMREAL *bjk_nn_s, *bjk_nn_r;
CUSTOMREAL *bik_pp_s, *bik_pp_r;
CUSTOMREAL *bik_pn_s, *bik_pn_r;
CUSTOMREAL *bik_np_s, *bik_np_r;
CUSTOMREAL *bik_nn_s, *bik_nn_r;
CUSTOMREAL *bijk_ppp_s,*bijk_ppp_r;
CUSTOMREAL *bijk_ppn_s,*bijk_ppn_r;
CUSTOMREAL *bijk_pnn_s,*bijk_pnn_r;
CUSTOMREAL *bijk_npp_s,*bijk_npp_r;
CUSTOMREAL *bijk_npn_s,*bijk_npn_r;
CUSTOMREAL *bijk_nnp_s,*bijk_nnp_r;
CUSTOMREAL *bijk_nnn_s,*bijk_nnn_r;
CUSTOMREAL *bijk_pnp_s,*bijk_pnp_r;
// store MPI_Request for sending and receiving
MPI_Request *mpi_send_reqs;
MPI_Request *mpi_recv_reqs;
MPI_Request *mpi_send_reqs_kosumi;
MPI_Request *mpi_recv_reqs_kosumi;
//
// domain definition
//
CUSTOMREAL r_min; // minimum radius of global domain
CUSTOMREAL r_max; // maximum radius of global domain
CUSTOMREAL lat_min; // minimum latitude of global domain
CUSTOMREAL lat_max; // maximum latitude of global domain
CUSTOMREAL lon_min; // minimum longitude of global domain
CUSTOMREAL lon_max; // maximum longitude of global domain
public:
CUSTOMREAL dr ;
CUSTOMREAL dlat,dt;
CUSTOMREAL dlon,dp;
private:
//
// members for test
//
CUSTOMREAL *u_loc; // true solution # TODO: erase for no testing
//CUSTOMREAL *velo_loc; // velocity field, # TODO: use this for storing an intial model
// anisotropic factors
CUSTOMREAL a0, b0, c0, f0, fun0;
CUSTOMREAL source_width;
//
// member functions
//
void init_decomposition(InputParams&); // initialize domain decomposition
void memory_allocation(); // allocate memory for arrays
void memory_deallocation(); // deallocate memory from arrays
void shm_memory_allocation(); // allocate memory for shared memory
void shm_memory_deallocation(); // deallocate memory from shared memory
CUSTOMREAL *get_array_for_vis(CUSTOMREAL *arr, bool); // get array for visualization
public:
void get_array_for_3d_output(const CUSTOMREAL *arr_in, CUSTOMREAL* arr_out, bool inverse_field); // get array for 3d output
void set_array_from_vis(CUSTOMREAL *arr); // set vis array to local array
// finalize the Time table
void calc_T_plus_tau();
private:
// check difference between true solution and computed solution
void calc_residual();
//
// utilities
//
public:
bool i_first(){ return neighbors_id[0] == -1;}
bool j_first(){ return neighbors_id[2] == -1;}
bool k_first(){ return neighbors_id[4] == -1;}
bool i_last(){ return neighbors_id[1] == -1;}
bool j_last(){ return neighbors_id[3] == -1;}
bool k_last(){ return neighbors_id[5] == -1;}
private:
// number of the layers for ghost nodes
const int n_ghost_layers = 1;
// vtk format cell type
const int cell_type = 9;
};
#endif // GRID_H

574
include/input_params.h Normal file
View File

@@ -0,0 +1,574 @@
#ifndef INPUT_PARAMS_H
#define INPUT_PARAMS_H
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <algorithm>
#include <iomanip>
#include <unordered_set>
#include <map>
#include <stdexcept>
#include "config.h"
#include "yaml-cpp/yaml.h"
#include "utils.h"
#include "mpi_funcs.h"
#include "timer.h"
#include "src_rec.h"
// InputParams class for reading/storing and outputing input parameters and src_rec information
class InputParams {
public:
InputParams(std::string&);
~InputParams();
// write parameters to output file
void write_params_to_file();
//
// getter/setter
//
//
// bondary information
//
CUSTOMREAL get_min_dep(){return min_dep;};
CUSTOMREAL get_max_dep(){return max_dep;};
CUSTOMREAL get_min_lat(){return min_lat*DEG2RAD;};
CUSTOMREAL get_max_lat(){return max_lat*DEG2RAD;};
CUSTOMREAL get_min_lon(){return min_lon*DEG2RAD;};
CUSTOMREAL get_max_lon(){return max_lon*DEG2RAD;};
//
// source receiver data
//
// source receiver information for processes which stores source receiver information
std::string get_src_rec_file() {return src_rec_file;};
bool get_src_rec_file_exist(){return src_rec_file_exist;};
SrcRecInfo& get_src_point(const std::string&); // return SrcRec object
SrcRecInfo& get_rec_point(const std::string&); // return receivers for the current source
// source receiver information with broadcast to all subdom_main processes
SrcRecInfo get_src_point_bcast(const std::string&); // return SrcRec object
SrcRecInfo get_rec_point_bcast(const std::string&); // return receivers for the current source
CUSTOMREAL get_src_radius( const std::string&);
CUSTOMREAL get_src_lat( const std::string&);
CUSTOMREAL get_src_lon( const std::string&);
SrcRecInfo get_src_point_bcast_2d(const std::string&); // return SrcRec object
CUSTOMREAL get_src_radius_2d(const std::string&);
CUSTOMREAL get_src_lat_2d( const std::string&);
CUSTOMREAL get_src_lon_2d( const std::string&);
std::string get_src_name(const int&); // return source name from in-sim_group id
std::string get_rec_name(const int&); // return receiver name from in-sim_group id
std::string get_src_name_comm(const int&); // return source name in common receiver list
int get_src_id(const std::string&); // return src global id from src name
bool get_if_src_teleseismic(const std::string&); // return true if the source is teleseismic
//
// others
//
CUSTOMREAL get_conv_tol() {return conv_tol;};
void set_conv_tol(CUSTOMREAL conv_tol_){conv_tol = conv_tol_;};
CUSTOMREAL get_max_iter() {return max_iter;};
int get_stencil_order() {return stencil_order;};
void set_stencil_order(int stencil_order_){stencil_order = stencil_order_;};
int get_stencil_type() {return stencil_type;};
int get_sweep_type() {return sweep_type;};
std::string get_init_model_path(){return init_model_path;};
std::string get_model_1d_name() {return model_1d_name;};
int get_run_mode() {return run_mode;};
// invgrid for vel
int get_n_inversion_grid(){return n_inversion_grid;};
int get_n_inv_r() {return n_inv_r;};
int get_n_inv_t() {return n_inv_t;};
int get_n_inv_p() {return n_inv_p;};
int get_n_inv_r_ani(){return n_inv_r_ani;};
int get_n_inv_t_ani(){return n_inv_t_ani;};
int get_n_inv_p_ani(){return n_inv_p_ani;};
CUSTOMREAL get_min_dep_inv(){return min_dep_inv;};
CUSTOMREAL get_max_dep_inv(){return max_dep_inv;};
CUSTOMREAL get_min_lat_inv(){return min_lat_inv*DEG2RAD;};
CUSTOMREAL get_max_lat_inv(){return max_lat_inv*DEG2RAD;};
CUSTOMREAL get_min_lon_inv(){return min_lon_inv*DEG2RAD;};
CUSTOMREAL get_max_lon_inv(){return max_lon_inv*DEG2RAD;};
CUSTOMREAL get_min_dep_inv_ani(){return min_dep_inv_ani;};
CUSTOMREAL get_max_dep_inv_ani(){return max_dep_inv_ani;};
CUSTOMREAL get_min_lat_inv_ani(){return min_lat_inv_ani*DEG2RAD;};
CUSTOMREAL get_max_lat_inv_ani(){return max_lat_inv_ani*DEG2RAD;};
CUSTOMREAL get_min_lon_inv_ani(){return min_lon_inv_ani*DEG2RAD;};
CUSTOMREAL get_max_lon_inv_ani(){return max_lon_inv_ani*DEG2RAD;};
int get_n_inv_r_flex(){return n_inv_r_flex;};
int get_n_inv_t_flex(){return n_inv_t_flex;};
int get_n_inv_p_flex(){return n_inv_p_flex;};
CUSTOMREAL* get_dep_inv() {return dep_inv;};
CUSTOMREAL* get_lat_inv() {return lat_inv;};
CUSTOMREAL* get_lon_inv() {return lon_inv;};
CUSTOMREAL* get_trapezoid() {return trapezoid;};
bool get_ignore_velocity_discontinuity(){return ignore_velocity_discontinuity;};
// invgrid for ani
bool get_invgrid_ani() {return invgrid_ani;};
int get_n_inv_r_flex_ani(){return n_inv_r_flex_ani;};
int get_n_inv_t_flex_ani(){return n_inv_t_flex_ani;};
int get_n_inv_p_flex_ani(){return n_inv_p_flex_ani;};
CUSTOMREAL* get_dep_inv_ani() {return dep_inv_ani;};
CUSTOMREAL* get_lat_inv_ani() {return lat_inv_ani;};
CUSTOMREAL* get_lon_inv_ani() {return lon_inv_ani;};
CUSTOMREAL* get_trapezoid_ani() {return trapezoid_ani;};
//
void check_inv_grid();
private:
void check_increasing(CUSTOMREAL*, int, std::string);
void check_lower_bound(CUSTOMREAL*&, int&, CUSTOMREAL, std::string);
void check_upper_bound(CUSTOMREAL*&, int&, CUSTOMREAL, std::string);
public:
// void check_dep_inv();
// void check_lat_inv();
// void check_lon_inv();
// void check_dep_inv_ani();
// void check_lat_inv_ani();
// void check_lon_inv_ani();
// // adjust inv grid
// void insertAtFront(CUSTOMREAL*&, int&, CUSTOMREAL);
// void insertAtBack(CUSTOMREAL*&, int&, CUSTOMREAL);
// bool get_invgrid_volume_rescale(){return invgrid_volume_rescale;}
int get_max_iter_inv() {return max_iter_inv;};
int get_model_update_N_iter(){return model_update_N_iter;};
int get_relocation_N_iter() {return relocation_N_iter;};
int get_max_loop_mode0() {return max_loop_mode0;};
int get_max_loop_mode1() {return max_loop_mode1;};
bool get_is_srcrec_swap() {return swap_src_rec;};
bool get_if_output_source_field() {return output_source_field;};
bool get_if_output_kernel() {return output_kernel;};
bool get_if_output_final_model() {return output_final_model;};
bool get_if_output_middle_model() {return output_middle_model;};
bool get_if_output_in_process() {return output_in_process;};
bool get_if_output_in_process_data() {return output_in_process_data;};
bool get_if_single_precision_output(){return single_precision_output;};
int get_verbose_output_level() {return verbose_output_level;}; // #TODO: modify codes for verbose_output_level > 1
bool get_update_slowness() {return update_slowness;};
bool get_update_azi_ani() {return update_azi_ani;};
bool get_update_rad_ani() {return update_rad_ani;};
CUSTOMREAL * get_depth_taper() {return depth_taper;};
bool get_have_tele_data() {return have_tele_data;};
bool get_use_abs() {return use_abs;};
bool get_use_cs() {return use_cs;};
bool get_use_cr() {return use_cr;};
bool get_use_abs_reloc() {return use_abs_reloc;};
bool get_use_cs_reloc() {return use_cs_reloc;};
bool get_use_cr_reloc() {return use_cr_reloc;};
CUSTOMREAL* get_residual_weight_abs() {return residual_weight_abs;};
CUSTOMREAL* get_distance_weight_abs() {return distance_weight_abs;};
CUSTOMREAL* get_residual_weight_cs() {return residual_weight_cs;};
CUSTOMREAL* get_azimuthal_weight_cs() {return azimuthal_weight_cs;};
CUSTOMREAL* get_residual_weight_cr() {return residual_weight_cr;};
CUSTOMREAL* get_azimuthal_weight_cr() {return azimuthal_weight_cr;};
CUSTOMREAL* get_residual_weight_abs_reloc() {return residual_weight_abs_reloc;};
CUSTOMREAL* get_distance_weight_abs_reloc() {return distance_weight_abs_reloc;};
CUSTOMREAL* get_residual_weight_cs_reloc() {return residual_weight_cs_reloc;};
CUSTOMREAL* get_azimuthal_weight_cs_reloc() {return azimuthal_weight_cs_reloc;};
CUSTOMREAL* get_residual_weight_cr_reloc() {return residual_weight_cr_reloc;};
CUSTOMREAL* get_azimuthal_weight_cr_reloc() {return azimuthal_weight_cr_reloc;};
// get if the T field is written into the file
bool get_is_T_written_into_file(const std::string&);
// prepare source list for this simulation group
void prepare_src_map();
// (relocation) modify (swapped source) receiver's location and time
void modify_swapped_source_location();
// write out src_rec_file
void write_src_rec_file(int,int);
// write out station_correction_file
void write_station_correction_file(int);
// station correction
void station_correction_update( CUSTOMREAL );
int n_src_this_sim_group = 0; // number of sources in this simultaneous group
int n_src_comm_rec_this_sim_group = 0; // number of sources with common receiver in this simultaneous group
int n_src_2d_this_sim_group = 0; // number of sources assigned for 2d solver in this simultaneous group
int n_rec_this_sim_group = 0; // number of receivers in this simultaneous group
std::map<std::string, SrcRecInfo> src_map_all; // map of all sources (full information is only stored by the main process)
std::map<std::string, SrcRecInfo> src_map; // map of sources belonging to this simultaneous group
std::map<std::string, SrcRecInfo> src_map_comm_rec; // map of sources with common receiver
std::map<std::string, SrcRecInfo> src_map_2d; // map of sources assigned for 2d solver
std::map<std::string, SrcRecInfo> src_map_tele; // source list for teleseismic
std::map<std::string, SrcRecInfo> rec_map_all; // map of all receivers (full information is only stored by the main process)
std::map<std::string, SrcRecInfo> rec_map; // map of receivers belonging to this simultaneous group
std::map<std::string, SrcRecInfo> rec_map_tele; // rec list for teleseismic
// datainfo-vector maps <src_name, rec_name>
std::map< std::string, std::map<std::string, std::vector<DataInfo>>> data_map_all; // data list for all data (full information is only stored by the main process)
std::map< std::string, std::map<std::string, std::vector<DataInfo>>> data_map; // data list for this simultaneous group
std::map< std::string, std::map<std::string, std::vector<DataInfo>>> data_map_tele; // data list for teleseismic
std::vector<std::string> name_for_reloc; // name list of receivers (swarpped sources) for location
// src id <-> src name relations
std::vector<std::string> src_id2name; // name list of sources belonging to this simultaneous group
std::vector<std::string> rec_id2name; // name list of receivers belongig to this simultaneous group
std::vector<std::string> src_id2name_comm_rec; // name list of sources with common receiver
std::vector<std::string> src_id2name_2d; // name list of sources assigned for 2d solver.
std::vector<std::string> src_id2name_all; // name list of all sources (store the order of sources in src_rec file)
std::vector<std::string> src_id2name_back; // back up of name list of all sources (this will not be swapped)
std::vector<std::vector<std::vector<std::string>>> rec_id2name_back; // back up of the name list of all receivers for each source (this will not be swapped)
// backups used when outputing the data
std::map<std::string, SrcRecInfo> src_map_back;
std::map<std::string, SrcRecInfo> rec_map_back;
std::map<std::string, std::map<std::string, std::vector<DataInfo>>> data_map_back;
// the number of data
int N_abs_local_data = 0;
int N_cr_dif_local_data = 0;
int N_cs_dif_local_data = 0;
int N_teleseismic_data = 0;
int N_data = 0;
// the number of the types of data
int N_data_type = 0;
std::map<std::string, int> data_type; // element: "abs", "cs_dif", "cr_dif", "tele"
// initialize_adjoint_source
void initialize_adjoint_source();
// set adjoint source && adjoint source density
void set_adjoint_source(std::string, CUSTOMREAL);
void set_adjoint_source_density(std::string, CUSTOMREAL);
// gather traveltimes and calculate differences of synthetic data
void gather_traveltimes_and_calc_syn_diff();
// reduce necessary data in rec_map, which has differed elements in each sim group.
template <typename T>
void allreduce_rec_map_var(T&);
void allreduce_rec_map_vobj_src_reloc();
void allreduce_rec_map_grad_src();
private:
// boundary information
CUSTOMREAL min_dep; // minimum depth in km
CUSTOMREAL max_dep; // maximum depth in km
CUSTOMREAL min_lat; // minimum latitude in degree
CUSTOMREAL max_lat; // maximum latitude in degree
CUSTOMREAL min_lon; // minimum longitude in degree
CUSTOMREAL max_lon; // maximum longitude in degree
// source information
CUSTOMREAL src_dep; // source depth in km
CUSTOMREAL src_lat; // source latitude in degrees
CUSTOMREAL src_lon; // source longitude in degrees
std::string src_rec_file; // source receiver file
std::string src_rec_file_out; // source receiver file to be output
std::string sta_correction_file; // station correction file to be input
std::string station_correction_file_out; // station correction file to be output
bool src_rec_file_exist = false; // source receiver file exist
bool sta_correction_file_exist = false; // station correction file exist
bool swap_src_rec = false; // whether the src/rec are swapped or not
// model input files
std::string init_model_path; // model file path init
std::string model_1d_name; // name of 1d model for teleseismic tomography
// inversion
int run_mode=0; // do inversion or not (0: no, 1: yes)
int n_inversion_grid=1; // number of inversion grid
// inversion grid setting
static const int n_trapezoid = 3;
CUSTOMREAL trapezoid[n_trapezoid] = {1.0, 0.0, 50.0};
CUSTOMREAL trapezoid_ani[n_trapezoid] = {1.0, 0.0, 50.0};
// uniform or flexible inversion grid (0: uniform, 1: flexible, 2: trapezoid)
// int type_invgrid_dep=0;
// int type_invgrid_lat=0;
// int type_invgrid_lon=0;
// uniform or flexible anisotropic inversion grid (0: uniform, 1: flexible, 2: trapezoid)
// int type_invgrid_dep_ani=0;
// int type_invgrid_lat_ani=0;
// int type_invgrid_lon_ani=0;
//
// variables for type = 0: uniform inversion grid
//
bool uniform_inv_grid_dep = false; // automatically generate inversion grid for dep or not
bool uniform_inv_grid_lat = false; // automatically generate inversion grid for lat or not
bool uniform_inv_grid_lon = false; // automatically generate inversion grid for lon or not
// number of uniform inversion grid nodes in r, t, p direction
int n_inv_r=0;
int n_inv_t=0;
int n_inv_p=0;
// number of uniform anisotropic inversion grid nodes in r, t, p direction
int n_inv_r_ani=0;
int n_inv_t_ani=0;
int n_inv_p_ani=0;
// type = 0: uniform inversion grid
// inversion grid
CUSTOMREAL min_dep_inv=-99999; // minimum depth in km
CUSTOMREAL max_dep_inv=-99999; // maximum depth in km
CUSTOMREAL min_lat_inv=-99999; // minimum latitude
CUSTOMREAL max_lat_inv=-99999; // maximum latitude
CUSTOMREAL min_lon_inv=-99999; // minimum longitude
CUSTOMREAL max_lon_inv=-99999; // maximum longitude
// inversion grid for anisotropic parameters
CUSTOMREAL min_dep_inv_ani=-99999; // minimum depth in km
CUSTOMREAL max_dep_inv_ani=-99999; // maximum depth in km
CUSTOMREAL min_lat_inv_ani=-99999; // minimum latitude
CUSTOMREAL max_lat_inv_ani=-99999; // maximum latitude
CUSTOMREAL min_lon_inv_ani=-99999; // minimum longitude
CUSTOMREAL max_lon_inv_ani=-99999; // maximum longitude
void setup_uniform_inv_grid();
//
// variables fo type = 1: flexible inversion grid
//
CUSTOMREAL *dep_inv = nullptr; // array for storing inversion grid points in depth
CUSTOMREAL *lat_inv = nullptr; // array for storing inversion grid points in latitude
CUSTOMREAL *lon_inv = nullptr; // array for storing inversion grid points in longitude
CUSTOMREAL *dep_inv_ani = nullptr; // array for storing inversion grid points in depth for anisotropy
CUSTOMREAL *lat_inv_ani = nullptr; // array for storing inversion grid points in latitude for anisotropy
CUSTOMREAL *lon_inv_ani = nullptr; // array for storing inversion grid points in longitude for anisotropy
// number of flexibly designed inversion grid in r, t, p direction
int n_inv_r_flex=0;
int n_inv_t_flex=0;
int n_inv_p_flex=0;
// number of flexibly designed inversion grid in r, t, p direction
int n_inv_r_flex_ani=0;
int n_inv_t_flex_ani=0;
int n_inv_p_flex_ani=0;
// if true, use defined inversion grid for anisotropy. Otherwise, use the same inversion grid of velocity for anisotropy
bool invgrid_ani = false;
// inversion grid volume rescale (kernel -> kernel / volume of inversion grid mesh)
// bool invgrid_volume_rescale = false;
// date usage setting and weights
bool use_abs = false; // use absolute travel time or not
bool use_cs = false; // use common source double difference or not
bool use_cr = false; // use common receiver double difference or not
static const int n_weight = 4;
CUSTOMREAL residual_weight_abs[n_weight] = { 1.0, 3.0, 1.0, 1.0};
CUSTOMREAL residual_weight_cs[n_weight] = { 1.0, 3.0, 1.0, 1.0};
CUSTOMREAL residual_weight_cr[n_weight] = { 1.0, 3.0, 1.0, 1.0};
CUSTOMREAL distance_weight_abs[n_weight] = { 100.0, 200.0, 1.0, 1.0};
CUSTOMREAL azimuthal_weight_cs[n_weight] = { 15.0, 30.0, 1.0, 1.0};
CUSTOMREAL azimuthal_weight_cr[n_weight] = { 15.0, 30.0, 1.0, 1.0};
// for relocation
bool use_abs_reloc = false; // use absolute travel time or not
bool use_cs_reloc = false; // use common source double difference or not
bool use_cr_reloc = false; // use common source double difference or not
CUSTOMREAL residual_weight_abs_reloc[n_weight] = { 1.0, 3.0, 1.0, 1.0};
CUSTOMREAL distance_weight_abs_reloc[n_weight] = { 1.0, 3.0, 1.0, 1.0};
CUSTOMREAL residual_weight_cs_reloc[n_weight] = { 1.0, 3.0, 1.0, 1.0};
CUSTOMREAL azimuthal_weight_cs_reloc[n_weight] = { 100.0, 200.0, 1.0, 1.0};
CUSTOMREAL residual_weight_cr_reloc[n_weight] = { 15.0, 30.0, 1.0, 1.0};
CUSTOMREAL azimuthal_weight_cr_reloc[n_weight] = { 15.0, 30.0, 1.0, 1.0};
// convergence setting
CUSTOMREAL conv_tol = 1e-05; // convergence tolerance
int max_iter = 500; // maximum number of iterations
int max_iter_inv=1; // maximum number of iterations for inversion
// calculation method
int stencil_order = 1; // stencil order
int stencil_type = 1; // stencil type (0: non upwind; 1: upwind)
int sweep_type = 0; // sweep type (0: legacy, 1: cuthil-mckee with shm parallelization)
// gather all arrival times to a main process
void gather_all_arrival_times_to_main();
// gather rec info to main process
void gather_rec_info_to_main();
// generate a map of sources which include common receiver double difference data
void generate_src_map_with_common_receiver(std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
std::map<std::string, SrcRecInfo>&,
std::vector<std::string>&);
bool i_first=false, i_last=false, \
j_first=false, j_last=false, \
k_first=false; // store info if this subdomain has outer boundary
// check contradictions in input parameters
void check_contradictions();
// output setting
bool output_source_field = false; // output out_data_sim_X.h or not.
bool output_kernel = false; // output kernel or not.
bool output_final_model = true; // output merged final model or not.
bool output_middle_model = false; // output merged model at each inv iteration or not.
bool output_in_process = true; // True: output at each inv iteration, otherwise, only output step 0, Niter-1, Niter. Default: true. File: "out_data_sim_group_0". Keys: "Kdensity_update_inv_XXXX", "vel_inv_0000", "xi_inv_0000", "eta_inv_0000"
bool output_in_process_data = true; // output src_rec_file at each inv iteration or not.
int verbose_output_level = 0; // output verbose information or not.
// inversion setting
bool update_slowness = true; // update slowness (velocity) or not.
bool update_azi_ani = false; // update azimuthal anisotropy (xi, eta) or not.
bool update_rad_ani = false; // update radial anisotropy (in future) or not.
// have have_tele_data or not?
bool have_tele_data = false; // default is false. Error will occur if teleseismic data is include but false.
// ignore velocity discontinuity
bool ignore_velocity_discontinuity = false; // default is false. Error will occur if model velocity has discontinuity ( v[i+1] > v[i] * 1.2 or v[i+1] < v[i] * 0.8).
CUSTOMREAL depth_taper[2] = {-9999999, -9999998}; // kernel weight: 0: -inf ~ taper[0]; 0 ~ 1 : taper[0] ~ taper[1]; 1 : taper[1] ~ inf
bool use_sta_correction = false; // apply station correction or not.
// single precision (float) output mode
bool single_precision_output = false;
};
//
// utils
//
inline DataInfo& get_data_src_rec(std::vector<DataInfo>& v){
// return the first element in the vector with is_rec_pair = true
for (auto it = v.begin(); it != v.end(); it++){
if (it->is_src_rec)
return *it;
}
// error if no rec pair is found
std::cerr << "Error: no src/rec is found in get_data_src_rec" << std::endl;
exit(1);
// return the first element in the vector as a dummy
return v[0];
}
inline DataInfo& get_data_rec_pair(std::map<std::string, std::map<std::string, std::vector<DataInfo>>>& v,
const std::string& name_src,
const std::string& name_rec1,
const std::string& name_rec2){
// return the first element in the vector with is_rec_pair = true
auto& map1 = v[name_src][name_rec1];
auto& map2 = v[name_src][name_rec2];
for (auto it = map1.begin(); it != map1.end(); it++){
if (it->is_rec_pair && it->name_rec_pair[0] == name_rec1 && it->name_rec_pair[1] == name_rec2)
return *it;
}
for (auto it = map2.begin(); it != map2.end(); it++){
if (it->is_rec_pair && it->name_rec_pair[0] == name_rec1 && it->name_rec_pair[1] == name_rec2)
return *it;
}
// error if no rec pair is found
std::cerr << "Error: no rec pair is found in get_data_rec_pair" << std::endl;
exit(1);
// return the first element in the vector as a dummy
return v[name_src][name_rec1][0];
}
inline DataInfo& get_data_src_pair(std::map<std::string, std::map<std::string, std::vector<DataInfo>>>& v,
const std::string& name_src1,
const std::string& name_src2,
const std::string& name_rec){
// return the first element in the vector with is_rec_pair = true
auto& map1 = v[name_src1][name_rec];
auto& map2 = v[name_src2][name_rec];
for (auto it = map1.begin(); it != map1.end(); it++){
if (it->is_src_pair && it->name_src_pair[0] == name_src1 && it->name_src_pair[1] == name_src2)
return *it;
}
for (auto it = map2.begin(); it != map2.end(); it++){
if (it->is_src_pair && it->name_src_pair[0] == name_src2 && it->name_src_pair[1] == name_src1)
return *it;
}
// error if no src pair is found
std::cerr << "Error: no src pair is found in get_data_src_pair" << std::endl;
exit(1);
// return the first element in the vector as a dummy
return v[name_src1][name_rec][0];
}
inline void set_cr_dif_to_src_pair(std::map<std::string, std::map< std::string, std::vector<DataInfo>>>& v,
std::string& name_src1,
std::string& name_src2,
std::string& name_rec,
CUSTOMREAL& cr_dif){
// return the first element in the vector with is_rec_pair = true
std::vector<DataInfo>& vdata = v[name_src1][name_rec];
for (auto it = vdata.begin(); it != vdata.end(); it++){
if (it->is_src_pair
&& ( (it->name_src_pair[0] == name_src1 && it->name_src_pair[1] == name_src2)
|| (it->name_src_pair[0] == name_src2 && it->name_src_pair[1] == name_src1) )) {
it->cr_dif_travel_time = cr_dif;
}
}
}
inline bool get_if_any_src_pair(std::vector<DataInfo>& v){
// return the first element in the vector with is_rec_pair = true
for (auto it = v.begin(); it != v.end(); it++){
if (it->is_src_pair)
return true;
}
// or maybe this will be enough
//return v.size() > 1;
// return false if no rec pair is found
return false;
}
#endif

75
include/inv_grid.h Normal file
View File

@@ -0,0 +1,75 @@
#ifndef INV_GRID_H
#define INV_GRID_H
#include <algorithm>
#include "config.h"
#include "input_params.h"
// Base class for 1D inversion grid ()
class InvGrid1dBase {
public:
InvGrid1dBase(){};
virtual ~InvGrid1dBase(){
delete[] arr;
};
// copy constructor
InvGrid1dBase(const InvGrid1dBase& other){
n = other.n;
dinv = other.dinv;
dinv_l = other.dinv_l;
arr = new CUSTOMREAL[n];
std::copy(other.arr, other.arr+n, arr);
}
// assignment operator
InvGrid1dBase& operator=(const InvGrid1dBase& other){
if (this != &other){
n = other.n;
dinv = other.dinv;
dinv_l = other.dinv_l;
arr = new CUSTOMREAL[n];
std::copy(other.arr, other.arr+n, arr);
}
return *this;
}
CUSTOMREAL* arr = nullptr; // 1d or 2d array storing the grid coordinates
int n; // number of grid points
CUSTOMREAL dinv; // grid spacing
CUSTOMREAL dinv_l; // amount of shift for each inversion grid
};
// Derived class for 1D inversion grid
class InvGrid1d : public InvGrid1dBase {
public:
InvGrid1d(InputParams&, const int, const CUSTOMREAL*); // for r grid
InvGrid1d(InputParams&, const int, const CUSTOMREAL*, const CUSTOMREAL*, const int, const CUSTOMREAL*); // function overload for t and p grids
~InvGrid1d() override {};
};
// Base class for 3D inversion grid
class InvGrid {
public:
InvGrid(InputParams&);
~InvGrid();
void write_inversion_grid_to_file();
InvGrid1dBase r;
InvGrid1dBase t;
InvGrid1dBase p;
InvGrid1dBase r_ani;
InvGrid1dBase t_ani;
InvGrid1dBase p_ani;
private:
void get_inv_grid_params(InputParams&);
};
#endif // INV_GRID_H

2
include/io.fwd.h Normal file
View File

@@ -0,0 +1,2 @@
#pragma once
class IO_utils;

284
include/io.h Normal file
View File

@@ -0,0 +1,284 @@
#ifndef IO_H
#define IO_H
#include <fstream>
#include <vector>
#if USE_HDF5
#include "hdf5.h"
#endif
#include "tinyxml2.h"
// to handle circular dependency
#pragma once
#include "grid.fwd.h"
//#include "source.fwd.h"
#include "io.fwd.h"
#include "utils.h"
#include "mpi_funcs.h"
#include "grid.h"
#include "config.h"
#include "input_params.h"
#include "eikonal_solver_2d.h"
class IO_utils {
public:
IO_utils(InputParams&);
~IO_utils();
// initialize data output file
void init_data_output_file();
// finalize data/grid outpiut file (xdmf)
void finalize_data_output_file();
// change the output file name and xdmf objects
void change_xdmf_obj(int);
// output for updating xdmf file
void update_xdmf_file();
// change group name for source
void reset_source_info(const int&, const std::string&);
// change group name for model
void change_group_name_for_model();
// prepare grid for each inversion iteration
void prepare_grid_inv_xdmf(int);
// set id_sim
void set_id_src(const int& id_src_){id_sim_src = id_src_;};
// set source name
void set_name_src(const std::string& name_src_){name_sim_src = name_src_;};
//
// write functions
//
// xdmf write out for grid data
void write_xdmf_file_grid();
// write grid data to output file
void write_grid(Grid&);
// general function for write out data in hdf5
void write_data_h5(Grid&, std::string&, std::string&, CUSTOMREAL*, int, bool, bool for_tmp_db=false);
// general function for write out data in hdf5 with merging subdomains
void write_data_merged_h5(Grid&, std::string&, std::string&, std::string&, CUSTOMREAL*, bool, bool no_group=true);
// general function for write out data in ascii
void write_data_ascii(Grid&, std::string&, CUSTOMREAL*);
// general function for write out data in ascii with merging subdomains
void write_data_merged_ascii(Grid&, std::string&);
// write true solution
void write_true_solution(Grid&);
// write velocity model
void write_vel(Grid&, int);
// write T0v
void write_T0v(Grid&, int);
// write u
void write_u(Grid&);
// write tau
void write_tau(Grid&, int);
// write temporal tau fields
//void write_tmp_tau(Grid&, int);
// write result timetable T
void write_T(Grid&, int);
// write temporal timetable T in a separated file
void write_T_tmp(Grid&);
// write residual (resudual = true_solution - result)
void write_residual(Grid&);
// write adjoint field
void write_adjoint_field(Grid&, int);
// write fun_loc
void write_fun(Grid&, int);
// write xi
void write_xi(Grid&, int);
// write eta
void write_eta(Grid&, int);
// write a
void write_a(Grid&, int);
// write b
void write_b(Grid&, int);
// write c
void write_c(Grid&, int);
// write f
void write_f(Grid&, int);
// Ks
void write_Ks(Grid&, int);
// Kxi
void write_Kxi(Grid&, int);
// Keta
void write_Keta(Grid&, int);
// Ks_density
void write_Ks_density(Grid&, int);
// Kxi_density
void write_Kxi_density(Grid&, int);
// Keta_density
void write_Keta_density(Grid&, int);
// Ks_over_Kden
void write_Ks_over_Kden(Grid&, int);
// Kxi_over_Kden
void write_Kxi_over_Kden(Grid&, int);
// Keta_over_Kden
void write_Keta_over_Kden(Grid&, int);
// Ks_update
void write_Ks_update(Grid&, int);
// Kxi_update
void write_Kxi_update(Grid&, int);
// Keta_update
void write_Keta_update(Grid&, int);
// Ks_density_update
void write_Ks_density_update(Grid&, int);
// Kxi_density_update
void write_Kxi_density_update(Grid&, int);
// Keta_density_update
void write_Keta_density_update(Grid&, int);
// Ks_descent_dir_loc
void write_Ks_descent_dir(Grid&, int);
// Kxi_descent_dir_loc
void write_Kxi_descent_dir(Grid&, int);
// Keta_descent_dir_loc
void write_Keta_descent_dir(Grid&, int);
// write all concerning parameters
std::vector<CUSTOMREAL> get_grid_data(CUSTOMREAL * data);
// void write_concerning_parameters(Grid&, int, InputParams&);
// 2d traveltime field for teleseismic source
void write_2d_travel_time_field(CUSTOMREAL*, CUSTOMREAL*, CUSTOMREAL*, int, int, CUSTOMREAL);
void h5_create_and_write_dataset_2d(std::string&, int, int*, int, CUSTOMREAL*);
void read_2d_travel_time_field(std::string&, CUSTOMREAL*, int, int);
// 2d field for 1dinv
void write_1dinv_field(CUSTOMREAL*, CUSTOMREAL*, CUSTOMREAL*, int, int, std::string&);
// merged model
void write_T_merged(Grid&, InputParams&, int);
void write_merged_model(Grid&, InputParams&, std::string);
bool node_of_this_subdomain(int*, const int&, const int&, const int&);
//
// read functions
//
// read model data
void read_model(std::string&, const char*, CUSTOMREAL*, int, int, int);
// read Travel time from file for earthquake relocation and common receiver double-difference
void read_T(Grid&);
// read Travel time from a temporal file for earthquake relocation and common receiver double-difference
void read_T_tmp(Grid&);
void read_data_ascii(Grid&, std::string&);
//
// delete functions
//
// Any deletion of data should be done on a created hdf5. Because the design of hdf5 is that
// it is not possible to delete data from an existing hdf5 file. The only way to do so is to
// create a new hdf5 file and copy the necessary data from the old one to the new one.
// Therefore, the deletion of data is not implemented in this class.
// Reference : https://stackoverflow.com/questions/1124994/removing-data-from-a-hdf5-file
private:
// member variables
std::string h5_output_grid_fname = "out_data_grid.h5"; // output file name
std::string h5_output_fname = "out_data.h5"; // output file name
std::string h5_output_fname_tmp = "out_data_tmp.h5"; // output file name
std::string xdmf_output_fname = "out_data.xmf"; // output file name
std::string h5_group_name_grid = "Mesh"; // group name for grid data
std::string h5_group_name_event = "Event"; // group name for event data
std::string h5_group_name_data = "Data"; // group name for event data
std::string h5_dset_name_node_coords_x = "node_coords_x"; // dataset name for node coordinates
std::string h5_dset_name_node_coords_y = "node_coords_y"; // dataset name for node coordinates
std::string h5_dset_name_node_coords_z = "node_coords_z"; // dataset name for node coordinates
std::string h5_dset_name_node_coords_p = "node_coords_p"; // dataset name for node coordinates
std::string h5_dset_name_node_coords_t = "node_coords_t"; // dataset name for node coordinates
std::string h5_dset_name_node_coords_r = "node_coords_r"; // dataset name for node coordinates
std::string h5_dset_name_elem_conn = "elem_conn"; // dataset name for element connectivity
std::string h5_dset_name_procid = "procid"; // dataset name for my_procs
std::string h5_file_and_group = h5_output_grid_fname + ":/" + h5_group_name_grid;
std::string h5_whole_name_conn = h5_file_and_group + "/" + h5_dset_name_elem_conn;
std::string h5_whole_name_x = h5_file_and_group + "/" + h5_dset_name_node_coords_x;
std::string h5_whole_name_y = h5_file_and_group + "/" + h5_dset_name_node_coords_y;
std::string h5_whole_name_z = h5_file_and_group + "/" + h5_dset_name_node_coords_z;
std::string h5_whole_name_proc = h5_file_and_group + "/" + h5_dset_name_procid;
// index of current simulation used for naming output files and datasets
int id_sim_src;
// name of current simulation used for naming output files and datasets
std::string name_sim_src;
int nnodes_glob = 0;
int nelms_glob = 0;
int array_rank_default = 2; // default array rank
int custom_real_flag; // custom real type
// custom data type for real
std::string type_name = "Float";
std::string type_size = "4";
// helper for xdmf/xml file io
tinyxml2::XMLDocument* doc;
tinyxml2::XMLNode* xdmf, *domain, *grid;
tinyxml2::XMLNode* inversions, *inv_grid;
void init_xdmf_file();
void write_xdmf_file();
void finalize_xdmf_file();
void insert_data_xdmf(std::string&, std::string&, std::string&);
#ifdef USE_HDF5
// h5 variables
hid_t file_id;
hid_t group_id;
hid_t space_id;
hid_t dset_id;
hid_t plist_id, plist_id_dset;
hid_t file_dspace_id, mem_dspace_id;
herr_t status;
hid_t plist_id_2d;
hid_t file_id_2d;
// h5 low level utilities
void h5_create_file_by_group_main(std::string&);
void h5_open_file_by_group_main(std::string&);
void h5_open_file_collective(std::string&);
void h5_open_file_collective_input(std::string&);
void h5_close_file_by_group_main();
void h5_close_file_collective();
void h5_create_group_by_group_main(std::string& );
void h5_open_group_by_group_main(std::string& );
void h5_open_group_collective(std::string& );
void h5_close_group_by_group_main();
void h5_close_group_collective();
void h5_create_dataset(std::string&, int, int*, int, bool no_group = false);
void h5_open_dataset(std::string&);
void h5_open_dataset_no_group(std::string&);
void h5_close_dataset();
template <typename T>
void h5_write_array(std::string&, int, int*, T*, int offset_one, int offset_two=0, int offset_three=0, bool no_group=false);
template <typename T>
void h5_read_array(std::string&, int, int*, T*, int*, bool);
template <typename T>
void h5_read_array_simple(std::string&, T*);
void read_data_h5(Grid&, CUSTOMREAL*, std::string, std::string);
#endif // HDF_HDF5
// utilities
std::string create_fname_ascii(std::string&);
std::string create_fname_ascii_model(std::string&);
// flag for data type
const bool model_data = true;
const bool src_data = false;
};
#endif // IO_H

161
include/iterator.h Normal file
View File

@@ -0,0 +1,161 @@
#ifndef ITERATOR_H
#define ITERATOR_H
#include <algorithm>
#include <initializer_list>
#include <vector>
#include <math.h>
#include "grid.h"
#include "input_params.h"
#include "utils.h"
#include "source.h"
#include "io.h"
#include "timer.h"
#include "eikonal_solver_2d.h"
#ifdef USE_CUDA
#include "grid_wrapper.cuh"
#include "iterator_wrapper.cuh"
#endif
//#ifdef USE_SIMD
#include "simd_conf.h"
//#endif
class Iterator {
public:
Iterator(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
virtual ~Iterator();
// regional source
void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&); // run forward iteration till convergence
void run_iteration_adjoint(InputParams&, Grid&, IO_utils&, int); // run adjoint iteration till convergence
void initialize_arrays(InputParams&, IO_utils&, Grid&, Source&, const std::string&); // initialize factors etc.
protected:
void assign_processes_for_levels(Grid&, InputParams&); // assign intra-node processes for each sweeping level
void set_sweep_direction(int); // set sweep direction
// regional source
virtual void do_sweep(int, Grid&, InputParams&){}; // do sweeping with ordinal method
void calculate_stencil_1st_order(Grid&, int&, int&, int&); // calculate stencil for 1st order
void calculate_stencil_3rd_order(Grid&, int&, int&, int&); // calculate stencil for 3rd order
void calculate_stencil_1st_order_upwind(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form
void calculate_boundary_nodes(Grid&); // calculate boundary values
// teleseismic source
void calculate_stencil_1st_order_tele(Grid&, int&, int&, int&); // calculate stencil for 1st order
void calculate_stencil_3rd_order_tele(Grid&, int&, int&, int&); // calculate stencil for 3rd order
void calculate_stencil_1st_order_upwind_tele(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form
void calculate_boundary_nodes_tele(Grid&, int&, int&, int&); // calculate boundary values for teleseismic source
void calculate_boundary_nodes_adj(Grid&, int&, int&, int&); // calculate boundary values for adjoint source (all zeros)
// Hamiltonian calculation
inline CUSTOMREAL calc_LF_Hamiltonian(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \
CUSTOMREAL& ,CUSTOMREAL& , \
CUSTOMREAL& ,CUSTOMREAL&, \
int&, int&, int& );
inline CUSTOMREAL calc_LF_Hamiltonian_tele(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \
CUSTOMREAL& ,CUSTOMREAL& , \
CUSTOMREAL& ,CUSTOMREAL&, \
int&, int&, int& );
// methods for adjoint field calculation
void init_delta_and_Tadj(Grid&, InputParams&); // initialize delta and Tadj
void init_delta_and_Tadj_density(Grid&, InputParams&); // initialize delta and Tadj_density
void fix_boundary_Tadj(Grid&); // fix boundary values for Tadj
virtual void do_sweep_adj(int, Grid&, InputParams&){}; // do sweeping with ordinal method for adjoint field
void calculate_stencil_adj(Grid&, int&, int&, int&); // calculate stencil for 1st order for adjoint field
// grid point information
int* _nr, *_nt, *_np; // number of grid points on the direction r, theta, phi
CUSTOMREAL* _dr, *_dt, *_dp; // grid spacing on the direction r, theta, phi
int nr, nt, np; // number of grid points on the direction r, theta, phi
CUSTOMREAL dr, dt, dp; // grid spacing on the direction r, theta, phi
int st_level; // start level for sweeping
int ed_level; // end level for sweeping
std::vector< std::vector<int> > ijk_for_this_subproc; // ijk=I2V(i,j,k) for this process (level, ijk)
int max_n_nodes_plane; // maximum number of nodes on a plane
#if defined USE_SIMD || defined USE_CUDA
// stencil dumps
// first orders
CUSTOMREAL *dump_c__;// center of C
// all grid data expect tau pre-load strategy (iswap, ilevel, inodes)
#if USE_AVX512 || USE_AVX || USE_NEON || defined USE_CUDA
std::vector<std::vector<int*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;
std::vector<std::vector<int*>> vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;
#elif USE_ARM_SVE
std::vector<std::vector<uint64_t*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;
std::vector<std::vector<uint64_t*>> vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;
#endif
std::vector<std::vector<CUSTOMREAL*>> vv_iip, vv_jjt, vv_kkr;
std::vector<std::vector<CUSTOMREAL*>> vv_fac_a, vv_fac_b, vv_fac_c, vv_fac_f, vv_T0v, vv_T0r, vv_T0t, vv_T0p, vv_fun;
std::vector<std::vector<CUSTOMREAL*>>vv_change;
std::vector<std::vector<bool*>>vv_change_bl;
template <typename T>
void preload_indices(std::vector<std::vector<T*>> &vi, std::vector<std::vector<T*>> &, std::vector<std::vector<T*>> &, int, int, int);
template <typename T>
void preload_indices_1d(std::vector<std::vector<T*>> &, int, int, int);
template <typename T>
std::vector<std::vector<CUSTOMREAL*>> preload_array(T* a);
std::vector<std::vector<bool*>> preload_array_bl(bool* a);
template <typename T>
void free_preloaded_array(std::vector<std::vector<T*>> &vvv){
for (int iswap = 0; iswap < 8; iswap++){
for (auto& vv : vvv.at(iswap)) free(vv);
}
}
// flag for deallocation
bool simd_allocated = false;
bool simd_allocated_3rd = false;
#endif // USE_SIMD || USE_CUDA
#ifdef USE_CUDA
Grid_on_device *gpu_grid;
#endif
const int nswp = 8; // number of sweeping directions
int r_dirc, t_dirc, p_dirc; // sweeping directions
CUSTOMREAL sigr, sigt, sigp; //
CUSTOMREAL coe;
CUSTOMREAL wp1, pp1, wp2, pp2;
CUSTOMREAL wt1, pt1, wt2, pt2;
CUSTOMREAL wr1, pr1, wr2, pr2;
CUSTOMREAL Htau, tpT;
CUSTOMREAL ap1, bp1, ap2, bp2, ap, bp;
CUSTOMREAL at1, bt1, at2, bt2, at, bt;
CUSTOMREAL ar1, br1, ar2, br2, ar, br;
CUSTOMREAL bc_f2, eqn_a, eqn_b, eqn_c, eqn_Delta;
CUSTOMREAL tmp_tau, tmp_T;
CUSTOMREAL T_r, T_t, T_p, charact_r, charact_t, charact_p;
bool is_causality;
int count_cand;
std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);
// iteration control
int iter_count = 0;
CUSTOMREAL ini_diff_L1 = HUGE_VAL, ini_diff_Linf = HUGE_VAL;
CUSTOMREAL ini_err_L1 = HUGE_VAL, ini_err_Linf = HUGE_VAL;
CUSTOMREAL cur_diff_L1 = HUGE_VAL, cur_diff_Linf = HUGE_VAL;
CUSTOMREAL cur_err_L1 = HUGE_VAL, cur_err_Linf = HUGE_VAL;
// teleseismic flag
bool is_teleseismic = false;
// second run for hybrid order method
bool is_second_run = false;
};
// define derived classes for each iteration scheme
#endif // ITERATOR_H

82
include/iterator_legacy.h Normal file
View File

@@ -0,0 +1,82 @@
#ifndef ITERATOR_legacy_H
#define ITERATOR_legacy_H
#include "iterator.h"
class Iterator_legacy : public Iterator {
public:
Iterator_legacy(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
protected:
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
};
class Iterator_legacy_tele : public Iterator {
public:
Iterator_legacy_tele(InputParams& , Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
protected:
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
};
class Iterator_legacy_1st_order : public Iterator_legacy {
public:
Iterator_legacy_1st_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_legacy_3rd_order : public Iterator_legacy {
public:
Iterator_legacy_3rd_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_legacy_1st_order_upwind : public Iterator_legacy {
public:
Iterator_legacy_1st_order_upwind(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_legacy_1st_order_tele : public Iterator_legacy_tele {
public:
Iterator_legacy_1st_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_legacy_3rd_order_tele : public Iterator_legacy_tele {
public:
Iterator_legacy_3rd_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_legacy_1st_order_upwind_tele : public Iterator_legacy_tele {
public:
Iterator_legacy_1st_order_upwind_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
#endif // ITERATOR_LEGACY_H

65
include/iterator_level.h Normal file
View File

@@ -0,0 +1,65 @@
#ifndef ITERATOR_LEVEL_H
#define ITERATOR_LEVEL_H
#include "iterator.h"
class Iterator_level : public Iterator {
public:
Iterator_level(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
protected:
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
};
class Iterator_level_tele : public Iterator {
public:
Iterator_level_tele(InputParams& , Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
protected:
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
};
class Iterator_level_1st_order : public Iterator_level {
public:
Iterator_level_1st_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_level_3rd_order : public Iterator_level {
public:
Iterator_level_3rd_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_level_1st_order_upwind : public Iterator_level {
public:
Iterator_level_1st_order_upwind(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_level_1st_order_tele : public Iterator_level_tele {
public:
Iterator_level_1st_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_level_3rd_order_tele : public Iterator_level_tele {
public:
Iterator_level_3rd_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
class Iterator_level_1st_order_upwind_tele : public Iterator_level_tele {
public:
Iterator_level_1st_order_upwind_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
private:
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
};
#endif // ITERATOR_LEVEL_H

View File

@@ -0,0 +1,98 @@
#ifndef ITERATOR_SELECTOR_H
#define ITERATOR_SELECTOR_H
#include <memory>
#include "iterator.h"
#include "iterator_legacy.h"
#include "iterator_level.h"
void select_iterator(InputParams& IP, Grid& grid, Source& src, IO_utils& io, const std::string& src_name, \
bool first_init, bool is_teleseismic, std::unique_ptr<Iterator>& It, bool is_second_run) {
// initialize iterator object
if (!is_teleseismic){
if (IP.get_sweep_type() == SWEEP_TYPE_LEGACY) {
if (IP.get_stencil_order() == 1){
if (IP.get_stencil_type() == UPWIND){
It = std::make_unique<Iterator_legacy_1st_order_upwind>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
} else {
It = std::make_unique<Iterator_legacy_1st_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
}
} else if (IP.get_stencil_order() == 3){
It = std::make_unique<Iterator_legacy_3rd_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
if (IP.get_stencil_type() == UPWIND){
std::cout << "WARNING: Upwind Stencil type not supported, using 3rd order non upwind scheme (LF)" << std::endl;
}
} else{
std::cout << "ERROR: Stencil order not supported" << std::endl;
exit(1);
}
} else if (IP.get_sweep_type() == SWEEP_TYPE_LEVEL){
if (IP.get_stencil_order() == 1){
if (IP.get_stencil_type() == UPWIND){
// std::cout << "WARNING: Upwind Stencil type not supported, using non upwind scheme (LF)" << std::endl;
It = std::make_unique<Iterator_level_1st_order_upwind>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
} else {
It = std::make_unique<Iterator_level_1st_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
}
} else if (IP.get_stencil_order() == 3){
It = std::make_unique<Iterator_level_3rd_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
if (IP.get_stencil_type() == UPWIND){
std::cout << "WARNING: Upwind Stencil type not supported, using 3rd order non upwind scheme (LF)" << std::endl;
}
} else{
std::cout << "ERROR: Stencil order not supported" << std::endl;
exit(1);
}
} else {
std::cout << "ERROR: Sweep type not supported" << std::endl;
exit(1);
}
} else { // teleseismic event
if (IP.get_sweep_type() == SWEEP_TYPE_LEGACY) {
if (IP.get_stencil_order() == 1){
if (IP.get_stencil_type() == UPWIND){
It = std::make_unique<Iterator_legacy_1st_order_upwind_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
} else {
It = std::make_unique<Iterator_legacy_1st_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
}
} else if (IP.get_stencil_order() == 3){
It = std::make_unique<Iterator_legacy_3rd_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
if (IP.get_stencil_type() == UPWIND){
std::cout << "WARNING: Upwind Stencil type not supported, using non upwind scheme (LF)" << std::endl;
}
} else{
std::cout << "ERROR: Stencil order not supported" << std::endl;
exit(1);
}
} else if (IP.get_sweep_type() == SWEEP_TYPE_LEVEL){
if (IP.get_stencil_order() == 1){
if (IP.get_stencil_type() == UPWIND){
It = std::make_unique<Iterator_level_1st_order_upwind_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
} else {
It = std::make_unique<Iterator_level_1st_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
std::cout << "WARNING: SWEEP_TYPE_LEVEL for non-unwind solver does not work. Index wrong currently" << std::endl;
exit(1);
}
} else if (IP.get_stencil_order() == 3){
It = std::make_unique<Iterator_level_3rd_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
std::cout << "WARNING: SWEEP_TYPE_LEVEL for non-unwind solver does not work. Index wrong currently" << std::endl;
exit(1);
if (IP.get_stencil_type() == UPWIND){
std::cout << "WARNING: Upwind Stencil type not supported, using non upwind scheme (LF)" << std::endl;
}
} else{
std::cout << "ERROR: Stencil order not supported" << std::endl;
exit(1);
}
} else {
std::cout << "ERROR: Sweep type not supported" << std::endl;
exit(1);
}
}
}
#endif

226
include/kernel.h Normal file
View File

@@ -0,0 +1,226 @@
#ifndef KERNEL_H
#define KERNEL_H
#include "config.h"
#include "grid.h"
#include "input_params.h"
void calculate_sensitivity_kernel(Grid& grid, InputParams& IP, const std::string& name_sim_src){
// calculate sensitivity kernel
// kernel calculation will be done only by the subdom_main
if (subdom_main) {
// get the necessary parameters
int np = loc_I;
int nt = loc_J;
int nr = loc_K;
CUSTOMREAL dr = grid.dr;
CUSTOMREAL dt = grid.dt;
CUSTOMREAL dp = grid.dp;
CUSTOMREAL src_lon = IP.get_src_lon( name_sim_src);
CUSTOMREAL src_lat = IP.get_src_lat( name_sim_src);
CUSTOMREAL src_r = IP.get_src_radius(name_sim_src);
CUSTOMREAL weight = _1_CR;
// inner points
for (int kkr = 1; kkr < nr-1; kkr++) {
for (int jjt = 1; jjt < nt-1; jjt++) {
for (int iip = 1; iip < np-1; iip++) {
// calculate the kernel
CUSTOMREAL Tr_km = (grid.T_loc[I2V(iip,jjt,kkr+1)] - grid.T_loc[I2V(iip,jjt,kkr-1)]) / (_2_CR * dr);
CUSTOMREAL Ttheta_km = (grid.T_loc[I2V(iip,jjt+1,kkr)] - grid.T_loc[I2V(iip,jjt-1,kkr)]) / (_2_CR * dt) / grid.r_loc_1d[kkr];
CUSTOMREAL Tphi_km = (grid.T_loc[I2V(iip+1,jjt,kkr)] - grid.T_loc[I2V(iip-1,jjt,kkr)]) / (_2_CR * dp) / (grid.r_loc_1d[kkr]*std::cos(grid.t_loc_1d[jjt]));
CUSTOMREAL azi_ratio = std::sqrt((my_square(Ttheta_km) + my_square(Tphi_km))/(my_square(Tr_km) + my_square(Ttheta_km) + my_square(Tphi_km)));
// mask within one grid around the source
if ((std::abs(grid.r_loc_1d[kkr] - src_r) >= dr) ||
(std::abs(grid.t_loc_1d[jjt] - src_lat) >= dt) ||
(std::abs(grid.p_loc_1d[iip] - src_lon) >= dp)) {
// density of ks
grid.Ks_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)];
// density of kxi
// grid.Kxi_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)];
grid.Kxi_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)] * azi_ratio;
// density of keta
// grid.Keta_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)];
grid.Keta_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)] * azi_ratio;
if (IP.get_update_slowness()==1){ // we need to update slowness
// Kernel w r t slowness s
grid.Ks_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] * my_square(grid.fun_loc[I2V(iip,jjt,kkr)]);
} else {
grid.Ks_loc[I2V(iip,jjt,kkr)] = _0_CR;
}
if (IP.get_update_azi_ani()){ // we need to update azimuthal anisotropy
// Kernel w r t anisotrophy xi
if (isZero(std::sqrt(my_square(grid.xi_loc[I2V(iip,jjt,kkr)])+my_square(grid.eta_loc[I2V(iip,jjt,kkr)])))) {
grid.Kxi_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
* (my_square(Ttheta_km) - my_square(Tphi_km));
grid.Keta_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
* ( -_2_CR * Ttheta_km * Tphi_km );
} else {
grid.Kxi_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
* ((- GAMMA * grid.xi_loc[I2V(iip,jjt,kkr)] / \
std::sqrt(my_square(grid.xi_loc[ I2V(iip,jjt,kkr)]) + my_square(grid.eta_loc[I2V(iip,jjt,kkr)]))) * my_square(Tr_km) \
+ my_square(Ttheta_km)
- my_square(Tphi_km));
grid.Keta_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
* (( - GAMMA * grid.eta_loc[I2V(iip,jjt,kkr)]/ \
std::sqrt(my_square(grid.xi_loc[I2V(iip,jjt,kkr)]) + my_square(grid.eta_loc[I2V(iip,jjt,kkr)]))) * my_square(Tr_km) \
- _2_CR * Ttheta_km * Tphi_km );
}
} else {
grid.Kxi_loc[I2V(iip,jjt,kkr)] = _0_CR;
grid.Keta_loc[I2V(iip,jjt,kkr)] = _0_CR;
}
} else{
grid.Ks_loc[I2V(iip,jjt,kkr)] += _0_CR;
grid.Kxi_loc[I2V(iip,jjt,kkr)] += _0_CR;
grid.Keta_loc[I2V(iip,jjt,kkr)] += _0_CR;
grid.Ks_density_loc[I2V(iip,jjt,kkr)] += _0_CR;
grid.Kxi_density_loc[I2V(iip,jjt,kkr)] += _0_CR;
grid.Keta_density_loc[I2V(iip,jjt,kkr)] += _0_CR;
}
}
}
}
// boundary
for (int kkr = 0; kkr < nr; kkr++) {
for (int jjt = 0; jjt < nt; jjt++) {
// set Ks Kxi Keta to zero
if (grid.i_first()){
grid.Ks_loc[I2V(0,jjt,kkr)] = _0_CR;
grid.Kxi_loc[I2V(0,jjt,kkr)] = _0_CR;
grid.Keta_loc[I2V(0,jjt,kkr)] = _0_CR;
grid.Ks_density_loc[I2V(0,jjt,kkr)] = _0_CR;
grid.Kxi_density_loc[I2V(0,jjt,kkr)] = _0_CR;
grid.Keta_density_loc[I2V(0,jjt,kkr)] = _0_CR;
}
if (grid.i_last()){
grid.Ks_loc[I2V(np-1,jjt,kkr)] = _0_CR;
grid.Kxi_loc[I2V(np-1,jjt,kkr)] = _0_CR;
grid.Keta_loc[I2V(np-1,jjt,kkr)] = _0_CR;
grid.Ks_density_loc[I2V(np-1,jjt,kkr)] = _0_CR;
grid.Kxi_density_loc[I2V(np-1,jjt,kkr)] = _0_CR;
grid.Keta_density_loc[I2V(np-1,jjt,kkr)]= _0_CR;
}
}
}
for (int kkr = 0; kkr < nr; kkr++) {
for (int iip = 0; iip < np; iip++) {
// set Ks Kxi Keta to zero
if (grid.j_first()){
grid.Ks_loc[I2V(iip,0,kkr)] = _0_CR;
grid.Kxi_loc[I2V(iip,0,kkr)] = _0_CR;
grid.Keta_loc[I2V(iip,0,kkr)] = _0_CR;
grid.Ks_density_loc[I2V(iip,0,kkr)] = _0_CR;
grid.Kxi_density_loc[I2V(iip,0,kkr)] = _0_CR;
grid.Keta_density_loc[I2V(iip,0,kkr)] = _0_CR;
}
if (grid.j_last()){
grid.Ks_loc[I2V(iip,nt-1,kkr)] = _0_CR;
grid.Kxi_loc[I2V(iip,nt-1,kkr)] = _0_CR;
grid.Keta_loc[I2V(iip,nt-1,kkr)] = _0_CR;
grid.Ks_density_loc[I2V(iip,nt-1,kkr)] = _0_CR;
grid.Kxi_density_loc[I2V(iip,nt-1,kkr)] = _0_CR;
grid.Keta_density_loc[I2V(iip,nt-1,kkr)]= _0_CR;
}
}
}
for (int jjt = 0; jjt < nt; jjt++) {
for (int iip = 0; iip < np; iip++) {
// set Ks Kxi Keta to zero
if (grid.k_first()){
grid.Ks_loc[I2V(iip,jjt,0)] = _0_CR;
grid.Kxi_loc[I2V(iip,jjt,0)] = _0_CR;
grid.Keta_loc[I2V(iip,jjt,0)] = _0_CR;
grid.Ks_density_loc[I2V(iip,jjt,0)] = _0_CR;
grid.Kxi_density_loc[I2V(iip,jjt,0)] = _0_CR;
grid.Keta_density_loc[I2V(iip,jjt,0)] = _0_CR;
}
if (grid.k_last()){
grid.Ks_loc[I2V(iip,jjt,nr-1)] = _0_CR;
grid.Kxi_loc[I2V(iip,jjt,nr-1)] = _0_CR;
grid.Keta_loc[I2V(iip,jjt,nr-1)] = _0_CR;
grid.Ks_density_loc[I2V(iip,jjt,nr-1)] = _0_CR;
grid.Kxi_density_loc[I2V(iip,jjt,nr-1)] = _0_CR;
grid.Keta_density_loc[I2V(iip,jjt,nr-1)]= _0_CR;
}
}
}
} // end if subdom_main
}
void check_kernel_density(Grid& grid, InputParams& IP) {
if(subdom_main){
// check local kernel density positivity
for (int i_loc = 0; i_loc < loc_I; i_loc++) {
for (int j_loc = 0; j_loc < loc_J; j_loc++) {
for (int k_loc = 0; k_loc < loc_K; k_loc++) {
if (isNegative(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])){
std::cout << "Warning, id_sim: " << id_sim << ", grid.Ks_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = "
<< grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)]
<< std::endl;
// // print the source name
// for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
// // get source info
// const std::string name_sim_src = IP.get_src_name(i_src); // get_src_name must be run by all processes
// std::cout << "id_sim: " << id_sim << ", "
// << i_src + 1 << "/" << IP.n_src_this_sim_group << ", source name: " << name_sim_src << std::endl;
// }
}
}
}
}
}
}
void sumup_kernels(Grid& grid) {
if(subdom_main){
int n_grids = loc_I*loc_J*loc_K;
allreduce_cr_sim_inplace(grid.Ks_loc, n_grids);
allreduce_cr_sim_inplace(grid.Kxi_loc, n_grids);
allreduce_cr_sim_inplace(grid.Keta_loc, n_grids);
allreduce_cr_sim_inplace(grid.Ks_density_loc, n_grids);
allreduce_cr_sim_inplace(grid.Kxi_density_loc, n_grids);
allreduce_cr_sim_inplace(grid.Keta_density_loc, n_grids);
// share the values on boundary
grid.send_recev_boundary_data(grid.Ks_loc);
grid.send_recev_boundary_data(grid.Kxi_loc);
grid.send_recev_boundary_data(grid.Keta_loc);
grid.send_recev_boundary_data(grid.Ks_density_loc);
grid.send_recev_boundary_data(grid.Kxi_density_loc);
grid.send_recev_boundary_data(grid.Keta_density_loc);
grid.send_recev_boundary_data_kosumi(grid.Ks_loc);
grid.send_recev_boundary_data_kosumi(grid.Kxi_loc);
grid.send_recev_boundary_data_kosumi(grid.Keta_loc);
grid.send_recev_boundary_data_kosumi(grid.Ks_density_loc);
grid.send_recev_boundary_data_kosumi(grid.Kxi_density_loc);
grid.send_recev_boundary_data_kosumi(grid.Keta_density_loc);
}
synchronize_all_world();
}
#endif

577
include/lbfgs.h Normal file
View File

@@ -0,0 +1,577 @@
#ifndef LBFGS_H
#define LBFGS_H
#include "config.h"
#include "utils.h"
#include "grid.h"
#include "mpi_funcs.h"
inline CUSTOMREAL volume_domain = _0_CR; // volume of the domain
inline CUSTOMREAL weight_Tikonov = _0_CR; // weight of the regularization term
inline CUSTOMREAL weight_Tikonov_ani = _0_CR; // weight of the regularization term for anisotropic regularization
inline int N_params = 3; // number of parameters to invert
inline CUSTOMREAL q_0 = _0_CR; // initial cost function
inline CUSTOMREAL qp_0 = _0_CR; // store initial p_k * grad(f_k) (descent_direction * gradient)
inline int damp_weight_fun = _1_CR; // damp weights for fun
inline int damp_weight_eta = _1_CR; // damp weights for eta
inline int damp_weight_xi = _1_CR; // damp weights for xi
void store_model_and_gradient(Grid& grid, int i_inv) {
if (subdom_main && id_sim==0) {
if (i_inv < Mbfgs){
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
grid.Ks_model_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.fun_loc[I2V(i,j,k)];
grid.Keta_model_store_loc[I2VLBFGS(i_inv,i,j,k)] = grid.eta_loc[I2V(i,j,k)];
grid.Kxi_model_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.xi_loc[ I2V(i,j,k)];
grid.Ks_grad_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.Ks_update_loc[ I2V(i,j,k)];
grid.Keta_grad_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.Keta_update_loc[I2V(i,j,k)];
grid.Kxi_grad_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.Kxi_update_loc[ I2V(i,j,k)];
}
}
}
} else {
// replace the current stored models and gradients
for (int istore = 0; istore < Mbfgs-1; istore++){
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
grid.Ks_model_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Ks_model_store_loc[ I2VLBFGS(istore+1,i,j,k)];
grid.Keta_model_store_loc[I2VLBFGS(istore,i,j,k)] = grid.Keta_model_store_loc[I2VLBFGS(istore+1,i,j,k)];
grid.Kxi_model_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Kxi_model_store_loc[ I2VLBFGS(istore+1,i,j,k)];
grid.Ks_grad_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(istore+1,i,j,k)];
grid.Keta_grad_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Keta_grad_store_loc[ I2VLBFGS(istore+1,i,j,k)];
grid.Kxi_grad_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(istore+1,i,j,k)];
}
}
}
}
// store new model and gradient
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
grid.Ks_model_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.fun_loc[I2V(i,j,k)];
grid.Keta_model_store_loc[I2VLBFGS(Mbfgs-1,i,j,k)] = grid.eta_loc[I2V(i,j,k)];
grid.Kxi_model_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.xi_loc[ I2V(i,j,k)];
grid.Ks_grad_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.Ks_update_loc[ I2V(i,j,k)];
grid.Keta_grad_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.Keta_update_loc[I2V(i,j,k)];
grid.Kxi_grad_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.Kxi_update_loc[ I2V(i,j,k)];
}
}
}
}
} // end subdom main and id_sim==0
}
void calculate_descent_direction_lbfgs(Grid& grid, int i_inv) {
if (subdom_main && id_sim==0) {
int imin = 0;
int imax = 0;
if (i_inv >= Mbfgs)
imax = Mbfgs-2;
else
imax = i_inv-1;
CUSTOMREAL* ak_store = allocateMemory<CUSTOMREAL>(imax-imin+1, 2000);
CUSTOMREAL* pk_store = allocateMemory<CUSTOMREAL>(imax-imin+1, 2001);
CUSTOMREAL* desc_wks_Ks = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2002);
CUSTOMREAL* desc_wks_Keta = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2003);
CUSTOMREAL* desc_wks_Kxi = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2004);
CUSTOMREAL* wks_1_Ks = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2005);
CUSTOMREAL* wks_1_Keta = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2006);
CUSTOMREAL* wks_1_Kxi = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2007);
CUSTOMREAL* wks_2_Ks = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2008);
CUSTOMREAL* wks_2_Keta = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2009);
CUSTOMREAL* wks_2_Kxi = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2010);
std::cout << "imax, imin: " << imax << ", " << imin << std::endl;
// initialize
for (int i = 0; i < imax-imin+1; i++) {
ak_store[i] = 0.0; // alpha
pk_store[i] = 0.0; //rho
}
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
desc_wks_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)]; // d
desc_wks_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(imax+1,i,j,k)]; // d
desc_wks_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)]; // d
wks_1_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)] - grid.Ks_grad_store_loc[ I2VLBFGS(imax,i,j,k)];
wks_1_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(imax+1,i,j,k)] - grid.Keta_grad_store_loc[I2VLBFGS(imax,i,j,k)];
wks_1_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)] - grid.Kxi_grad_store_loc[ I2VLBFGS(imax,i,j,k)];
}
}
}
//calculate l2 norms
CUSTOMREAL norm_yiter = 0.0;
norm_yiter += calc_l2norm(wks_1_Ks,loc_I*loc_J*loc_K);
norm_yiter += calc_l2norm(wks_1_Keta,loc_I*loc_J*loc_K);
norm_yiter += calc_l2norm(wks_1_Kxi,loc_I*loc_J*loc_K);
CUSTOMREAL tmp = norm_yiter;
allreduce_cr_single(tmp, norm_yiter);
for (int iinv = imax; iinv >= imin; iinv--){
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
wks_1_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_1_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_1_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_2_Ks[I2V(i,j,k)] = grid.Ks_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_model_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_2_Keta[I2V(i,j,k)] = grid.Keta_model_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_model_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_2_Kxi[I2V(i,j,k)] = grid.Kxi_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_model_store_loc[I2VLBFGS(iinv,i,j,k)];
}
}
}
CUSTOMREAL pk = _0_CR;
pk += dot_product(wks_1_Ks,wks_2_Ks,loc_I*loc_J*loc_K);
pk += dot_product(wks_1_Keta,wks_2_Keta,loc_I*loc_J*loc_K);
pk += dot_product(wks_1_Kxi,wks_2_Kxi,loc_I*loc_J*loc_K);
CUSTOMREAL tmp = pk;
allreduce_cr_single(tmp, pk);
pk_store[iinv] = _1_CR / pk;
CUSTOMREAL ak = _0_CR;
ak += dot_product(wks_2_Ks, desc_wks_Ks,loc_I*loc_J*loc_K);
ak += dot_product(wks_2_Keta, desc_wks_Keta,loc_I*loc_J*loc_K);
ak += dot_product(wks_2_Kxi, desc_wks_Kxi,loc_I*loc_J*loc_K);
// print ak
if(myrank== 0) std::cout << "ak: " << ak << std::endl;
tmp = ak;
allreduce_cr_single(tmp, ak);
// print ak
if(myrank== 0) std::cout << "ak gathered: " << ak << std::endl;
ak_store[iinv] = pk_store[iinv] * ak;
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
desc_wks_Ks[I2V(i,j,k)] -= ak_store[iinv] * wks_1_Ks[I2V(i,j,k)];
desc_wks_Keta[I2V(i,j,k)] -= ak_store[iinv] * wks_1_Keta[I2V(i,j,k)];
desc_wks_Kxi[I2V(i,j,k)] -= ak_store[iinv] * wks_1_Kxi[I2V(i,j,k)];
}
}
}
} // end loop iinv
// Nocedal's default preconditionning
CUSTOMREAL pk = _1_CR / (pk_store[imax] * norm_yiter);
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
desc_wks_Ks[I2V(i,j,k)] *= pk;
desc_wks_Keta[I2V(i,j,k)] *= pk;
desc_wks_Kxi[I2V(i,j,k)] *= pk;
}
}
}
// print ak_store and pk_store
if (myrank == 0) {
std::cout << "ak_store: ";
for (int i = 0; i < imax+1; i++) {
std::cout << ak_store[i] << " ";
}
std::cout << std::endl;
std::cout << "pk_store: ";
for (int i = 0; i < imax+1; i++) {
std::cout << pk_store[i] << " ";
}
std::cout << std::endl;
}
// custom preconditionning
// diagonal preconditionner
for (int iinv = imin; iinv <= imax; iinv++){
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
wks_1_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_1_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_1_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_2_Ks[I2V(i,j,k)] = grid.Ks_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_model_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_2_Keta[I2V(i,j,k)] = grid.Keta_model_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_model_store_loc[I2VLBFGS(iinv,i,j,k)];
wks_2_Kxi[I2V(i,j,k)] = grid.Kxi_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_model_store_loc[I2VLBFGS(iinv,i,j,k)];
}
}
}
CUSTOMREAL beta = _0_CR;
beta += dot_product(wks_1_Ks,desc_wks_Ks,loc_I*loc_J*loc_K);
beta += dot_product(wks_1_Keta,desc_wks_Keta,loc_I*loc_J*loc_K);
beta += dot_product(wks_1_Kxi,desc_wks_Kxi,loc_I*loc_J*loc_K);
CUSTOMREAL tmp = beta;
allreduce_cr_single(tmp, beta);
beta = pk_store[iinv] * beta;
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
desc_wks_Ks[I2V(i,j,k)] += (ak_store[iinv]-beta) * wks_2_Ks[I2V(i,j,k)];
desc_wks_Keta[I2V(i,j,k)] += (ak_store[iinv]-beta) * wks_2_Keta[I2V(i,j,k)];
desc_wks_Kxi[I2V(i,j,k)] += (ak_store[iinv]-beta) * wks_2_Kxi[I2V(i,j,k)];
}
}
}
}
// descent directions
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
grid.Ks_descent_dir_loc[I2V(i,j,k)] = - _1_CR * desc_wks_Ks[I2V(i,j,k)];
grid.Keta_descent_dir_loc[I2V(i,j,k)] = - _1_CR * desc_wks_Keta[I2V(i,j,k)];
grid.Kxi_descent_dir_loc[I2V(i,j,k)] = - _1_CR * desc_wks_Kxi[I2V(i,j,k)];
//grid.Ks_descent_dir_loc[I2V(i,j,k)] = desc_wks_Ks[I2V(i,j,k)];
//grid.Keta_descent_dir_loc[I2V(i,j,k)] = desc_wks_Keta[I2V(i,j,k)];
//grid.Kxi_descent_dir_loc[I2V(i,j,k)] = desc_wks_Kxi[I2V(i,j,k)];
}
}
}
delete [] ak_store;
delete [] pk_store;
delete [] desc_wks_Ks;
delete [] desc_wks_Keta;
delete [] desc_wks_Kxi;
delete [] wks_1_Ks;
delete [] wks_1_Keta;
delete [] wks_1_Kxi;
delete [] wks_2_Ks;
delete [] wks_2_Keta;
delete [] wks_2_Kxi;
} // end of subdom_main
// share with all simultaneous run
if (subdom_main){
broadcast_cr_inter_sim(grid.Ks_descent_dir_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Keta_descent_dir_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Kxi_descent_dir_loc, loc_I*loc_J*loc_K, 0);
}
}
/*
// laplacian approximation in cartesian coordinates
inline void calc_laplacian_field(Grid& grid, CUSTOMREAL* arr_in, CUSTOMREAL* arr_res) {
if (subdom_main) {
//CUSTOMREAL lr = 1.0;
//CUSTOMREAL lt = 1.0;
//CUSTOMREAL lp = 1.0;
CUSTOMREAL lr = regul_lp;
CUSTOMREAL lt = regul_lt;
CUSTOMREAL lp = regul_lp;
// calculate L(m)
for (int k = 1; k < loc_K-1; k++) {
for (int j = 1; j < loc_J-1; j++) {
for (int i = 1; i < loc_I-1; i++) {
arr_res[I2V(i,j,k)] = \
- _2_CR * (lr+lt+lp) * arr_in[I2V(i,j,k)] \
+ lr * (arr_in[I2V(i,j,k+1)] + arr_in[I2V(i,j,k-1)]) \
+ lt * (arr_in[I2V(i,j+1,k)] + arr_in[I2V(i,j-1,k)]) \
+ lp * (arr_in[I2V(i+1,j,k)] + arr_in[I2V(i-1,j,k)]);
}
}
}
// communicate with adjacent subdomains
grid.send_recev_boundary_data(arr_res);
}
}
*/
// laplacian approximation in spherical coordinates
inline void calc_laplacian_field(Grid& grid, CUSTOMREAL* d, CUSTOMREAL* arr_out){
if (subdom_main) {
CUSTOMREAL lr = regul_lp;
CUSTOMREAL lt = regul_lt;
CUSTOMREAL lp = regul_lp;
CUSTOMREAL dr = grid.dr;
CUSTOMREAL dt = grid.dt;
CUSTOMREAL dp = grid.dp;
CUSTOMREAL r,t;
// calculate L(m) in sphercal coordinates
for (int k = 1; k < loc_K-1; k++) {
for (int j = 1; j < loc_J-1; j++) {
for (int i = 1; i < loc_I-1; i++) {
r = grid.r_loc_1d[k];
t = grid.t_loc_1d[j];
// finite difference approximation of laplacian spherical coordinates
arr_out[I2V(i,j,k)] = \
lr*lr*((d[I2V(i,j,k+1)] - _2_CR*d[I2V(i,j,k)] + d[I2V(i,j,k-1)])/(dr*dr) \
+(_2_CR/r)*(d[I2V(i,j,k+1)]-d[I2V(i,j,k-1)])/dr) \
+ lt*lt*((_1_CR/(r*r*std::sin(t)))*(std::cos(t)*(d[I2V(i,j+1,k)]-d[I2V(i,j-1,k)])/dt \
- (std::sin(t)*(d[I2V(i,j+1,k)]-_2_CR*d[I2V(i,j,k)]+d[I2V(i,j-1,k)])/(dt*dt)))) \
+ lp*lp*((_1_CR/(r*r*std::sin(t)*std::sin(t)))*(d[I2V(i+1,j,k)]-_2_CR*d[I2V(i,j,k)]+d[I2V(i-1,j,k)])/(dp*dp));
}
}
}
// communicate with adjacent subdomains
grid.send_recev_boundary_data(arr_out);
}
}
// add 1/2 * L(m)^2 to the objective function
inline CUSTOMREAL calculate_regularization_obj(Grid& grid) {
CUSTOMREAL regularization_term = _0_CR;
if (subdom_main && id_sim==0) {
int ngrid = loc_I*loc_J*loc_K;
regularization_term += weight_Tikonov * calc_l2norm(grid.fun_regularization_penalty_loc, ngrid);
regularization_term += weight_Tikonov_ani * calc_l2norm(grid.eta_regularization_penalty_loc, ngrid);
regularization_term += weight_Tikonov_ani * calc_l2norm(grid.xi_regularization_penalty_loc, ngrid);
// gather from all subdomain
CUSTOMREAL tmp = regularization_term;
allreduce_cr_single(tmp, regularization_term);
}
// share retularization_term with all simultaneous run (may be unnecessary)
broadcast_cr_single_inter_sim(regularization_term, 0);
return _0_5_CR * regularization_term;
}
inline void init_regularization_penalty(Grid& grid) {
if (subdom_main && id_sim==0) {
int ngrid = loc_I*loc_J*loc_K;
// initialize regularization penalties
std::fill(grid.fun_regularization_penalty_loc, grid.fun_regularization_penalty_loc + ngrid, _0_CR);
std::fill(grid.eta_regularization_penalty_loc, grid.eta_regularization_penalty_loc + ngrid, _0_CR);
std::fill(grid.xi_regularization_penalty_loc, grid.xi_regularization_penalty_loc + ngrid, _0_CR);
std::fill(grid.fun_gradient_regularization_penalty_loc, grid.fun_gradient_regularization_penalty_loc + ngrid, _0_CR);
std::fill(grid.eta_gradient_regularization_penalty_loc, grid.eta_gradient_regularization_penalty_loc + ngrid, _0_CR);
std::fill(grid.xi_gradient_regularization_penalty_loc, grid.xi_gradient_regularization_penalty_loc + ngrid, _0_CR);
}
}
inline void init_regulaization_penalty_with_one(Grid& grid) {
if (subdom_main && id_sim==0) {
int ngrid = loc_I*loc_J*loc_K;
// initialize regularization penalties
std::fill(grid.fun_regularization_penalty_loc, grid.fun_regularization_penalty_loc + ngrid, _1_CR);
std::fill(grid.eta_regularization_penalty_loc, grid.eta_regularization_penalty_loc + ngrid, _1_CR);
std::fill(grid.xi_regularization_penalty_loc, grid.xi_regularization_penalty_loc + ngrid, _1_CR);
std::fill(grid.fun_gradient_regularization_penalty_loc, grid.fun_gradient_regularization_penalty_loc + ngrid, _1_CR);
std::fill(grid.eta_gradient_regularization_penalty_loc, grid.eta_gradient_regularization_penalty_loc + ngrid, _1_CR);
std::fill(grid.xi_gradient_regularization_penalty_loc, grid.xi_gradient_regularization_penalty_loc + ngrid, _1_CR);
}
}
inline void calculate_regularization_penalty(Grid& grid) {
if (subdom_main && id_sim==0) {
int n_grid = loc_I*loc_J*loc_K;
CUSTOMREAL* tmp_fun = allocateMemory<CUSTOMREAL>(n_grid, 2011);
CUSTOMREAL* tmp_eta = allocateMemory<CUSTOMREAL>(n_grid, 2012);
CUSTOMREAL* tmp_xi = allocateMemory<CUSTOMREAL>(n_grid, 2013);
for (int i = 0; i < n_grid; i++) {
tmp_fun[i] = grid.fun_loc[i] - grid.fun_prior_loc[i];
tmp_eta[i] = grid.eta_loc[i] - grid.eta_prior_loc[i];
tmp_xi[i] = grid.xi_loc[i] - grid.xi_prior_loc[i];
}
init_regularization_penalty(grid);
// calculate LL(m) on fun (Ks)
calc_laplacian_field(grid, tmp_fun, grid.fun_regularization_penalty_loc);
calc_laplacian_field(grid, grid.fun_regularization_penalty_loc, grid.fun_gradient_regularization_penalty_loc);
// calculate LL(m) on eta (Keta)
calc_laplacian_field(grid, tmp_eta, grid.eta_regularization_penalty_loc);
calc_laplacian_field(grid, grid.eta_regularization_penalty_loc, grid.eta_gradient_regularization_penalty_loc);
// calculate LL(m) on xi (Kxi)
calc_laplacian_field(grid, tmp_xi, grid.xi_regularization_penalty_loc);
calc_laplacian_field(grid, grid.xi_regularization_penalty_loc, grid.xi_gradient_regularization_penalty_loc);
for (int i = 0; i < n_grid; i++){
// calculate gradient_regularization_penalty first for avoiding using overwrited value in regularization_penalty
grid.fun_gradient_regularization_penalty_loc[i] = damp_weight_fun*damp_weight_fun*(tmp_fun[i] - _2_CR * grid.fun_regularization_penalty_loc[i] + grid.fun_gradient_regularization_penalty_loc[i]);
grid.eta_gradient_regularization_penalty_loc[i] = damp_weight_eta*damp_weight_eta*(tmp_eta[i] - _2_CR * grid.eta_regularization_penalty_loc[i] + grid.eta_gradient_regularization_penalty_loc[i]);
grid.xi_gradient_regularization_penalty_loc[i] = damp_weight_xi *damp_weight_xi *(tmp_xi[i] - _2_CR * grid.xi_regularization_penalty_loc[i] + grid.xi_gradient_regularization_penalty_loc[i]);
}
grid.send_recev_boundary_data(grid.fun_gradient_regularization_penalty_loc);
grid.send_recev_boundary_data(grid.eta_gradient_regularization_penalty_loc);
grid.send_recev_boundary_data(grid.xi_gradient_regularization_penalty_loc);
for (int i = 0; i < n_grid; i++){
// calculate gradient_regularization_penalty first for avoiding using overwrited value in regularization_penalty
grid.fun_regularization_penalty_loc[i] = damp_weight_fun*(tmp_fun[i] - grid.fun_regularization_penalty_loc[i]);
grid.eta_regularization_penalty_loc[i] = damp_weight_eta*(tmp_eta[i] - grid.eta_regularization_penalty_loc[i]);
grid.xi_regularization_penalty_loc[i] = damp_weight_xi *(tmp_xi[i] - grid.xi_regularization_penalty_loc[i]);
}
grid.send_recev_boundary_data(grid.fun_regularization_penalty_loc);
grid.send_recev_boundary_data(grid.eta_regularization_penalty_loc);
grid.send_recev_boundary_data(grid.xi_regularization_penalty_loc);
delete [] tmp_fun;
delete [] tmp_eta;
delete [] tmp_xi;
}
}
// add grad(L(m)) to gradient
inline void add_regularization_grad(Grid& grid) {
if (subdom_main){
if(id_sim==0){
// add LL(m) to gradient
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
//grid.fun_gradient_regularization_penalty_loc[I2V(i,j,k)] /= Linf_Ks;
//grid.eta_gradient_regularization_penalty_loc[I2V(i,j,k)] /= Linf_Keta;
//grid.xi_gradient_regularization_penalty_loc[I2V(i,j,k)] /= Linf_Kxi;
grid.Ks_update_loc[I2V(i,j,k)] += weight_Tikonov * grid.fun_gradient_regularization_penalty_loc[I2V(i,j,k)];
grid.Keta_update_loc[I2V(i,j,k)] += weight_Tikonov_ani * grid.eta_gradient_regularization_penalty_loc[I2V(i,j,k)];
grid.Kxi_update_loc[I2V(i,j,k)] += weight_Tikonov_ani * grid.xi_gradient_regularization_penalty_loc[I2V(i,j,k)];
}
}
}
grid.send_recev_boundary_data(grid.Ks_update_loc);
grid.send_recev_boundary_data(grid.Keta_update_loc);
grid.send_recev_boundary_data(grid.Kxi_update_loc);
}// end id_sim==0
// share with all simultaneous run (may be unnecessary)
broadcast_cr_inter_sim(grid.Ks_update_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Keta_update_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Kxi_update_loc, loc_I*loc_J*loc_K, 0);
}// end subdom_main
}
// compute initial guess for step length to try for line search
void initial_guess_step(Grid& grid, CUSTOMREAL& step_length, CUSTOMREAL SC_VAL) {
// find the max value in descent_direction
CUSTOMREAL max_val = _0_CR, max_val_all = _0_CR;
const CUSTOMREAL max_relative_pert = 0.02;
if(subdom_main && id_sim==0) {
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
if (std::abs(grid.Ks_descent_dir_loc[I2V(i,j,k)] ) > max_val) max_val = std::abs(grid.Ks_descent_dir_loc[I2V(i,j,k)]);
if (std::abs(grid.Keta_descent_dir_loc[I2V(i,j,k)]) > max_val) max_val = std::abs(grid.Keta_descent_dir_loc[I2V(i,j,k)]);
if (std::abs(grid.Kxi_descent_dir_loc[I2V(i,j,k)] ) > max_val) max_val = std::abs(grid.Kxi_descent_dir_loc[I2V(i,j,k)]);
}
}
}
// reduce
allreduce_cr_single_max(max_val, max_val_all);
// debug print max_val
std::cout << "max_val: " << max_val_all << std::endl;
// step size
step_length = max_relative_pert / SC_VAL / max_val_all;
}
// broadcast to all simultaneous run (may be unnecessary)
if (subdom_main)
broadcast_cr_single_inter_sim(step_length, 0);
}
inline CUSTOMREAL compute_volume_domain(Grid& grid) {
CUSTOMREAL volume = _0_CR;
if (subdom_main && id_sim==0) {
volume += calc_l2norm(grid.fun_regularization_penalty_loc, loc_I*loc_J*loc_K);
volume += calc_l2norm(grid.eta_regularization_penalty_loc, loc_I*loc_J*loc_K);
volume += calc_l2norm(grid.xi_regularization_penalty_loc, loc_I*loc_J*loc_K);
// reduce
allreduce_cr_single(volume, volume);
}
// broadcast to all simultaneous run (may be unnecessary)
broadcast_cr_single_inter_sim(volume, 0);
return volume;
}
inline void compute_damp_weights(const Grid& grid) {
// (size/sum)**2
if(subdom_main){
CUSTOMREAL sum_fun = std::accumulate(grid.fun_prior_loc, grid.fun_prior_loc + loc_I*loc_J*loc_K, _0_CR);
//CUSTOMREAL sum_eta = std::accumulate(grid.eta_prior_loc, grid.eta_prior_loc + loc_I*loc_J*loc_K, _0_CR); // do not use
//CUSTOMREAL sum_xi = std::accumulate(grid.xi_prior_loc, grid.xi_prior_loc + loc_I*loc_J*loc_K, _0_CR);
allreduce_cr_single(sum_fun, sum_fun);
damp_weight_fun = (volume_domain / sum_fun)*(volume_domain / sum_fun);
//damp_weight_eta = (volume / sum_eta)*(volume / sum_eta);
//damp_weight_xi = (volume / sum_xi)*(volume / sum_xi);
damp_weight_eta = _1_CR;
damp_weight_xi = _1_CR;
}
}
#endif

View File

@@ -0,0 +1,923 @@
#ifndef MAIN_ROUTINES_CALLING_H
#define MAIN_ROUTINES_CALLING_H
#include <iostream>
#include <memory>
#include "mpi_funcs.h"
#include "config.h"
#include "utils.h"
#include "input_params.h"
#include "grid.h"
#include "io.h"
#include "main_routines_inversion_mode.h"
#include "model_optimization_routines.h"
#include "main_routines_earthquake_relocation.h"
#include "iterator_selector.h"
#include "iterator.h"
#include "iterator_legacy.h"
#include "iterator_level.h"
#include "source.h"
#include "receiver.h"
#include "kernel.h"
#include "model_update.h"
#include "lbfgs.h"
#include "objective_function_utils.h"
#include "timer.h"
#include "oneD_inversion.h"
// run forward-only or inversion mode
inline void run_forward_only_or_inversion(InputParams &IP, Grid &grid, IO_utils &io) {
// for check if the current source is the first source
bool first_src = true;
if(myrank == 0)
std::cout << "id_sim: " << id_sim << ", size of src_map: " << IP.src_map.size() << std::endl;
// estimate running time
Timer timer("Forward_or_inversion", true);
// prepare objective_function file
std::ofstream out_main; // close() is not mandatory
prepare_header_line(IP, out_main);
if (id_sim==0) {
io.prepare_grid_inv_xdmf(0);
//io.change_xdmf_obj(0); // change xmf file for next src
io.change_group_name_for_model();
// write out model info
io.write_vel(grid, 0);
io.write_xi( grid, 0);
io.write_eta(grid, 0);
//io.write_zeta(grid, i_inv); // TODO
if (IP.get_verbose_output_level()){
io.write_a(grid, 0);
io.write_b(grid, 0);
io.write_c(grid, 0);
io.write_f(grid, 0);
io.write_fun(grid, 0);
}
// // output model_parameters_inv_0000.dat
// if (IP.get_if_output_model_dat())
// io.write_concerning_parameters(grid, 0, IP);
}
// output station correction file (only for teleseismic differential data)
IP.write_station_correction_file(0);
synchronize_all_world();
/////////////////////
// loop for inversion
/////////////////////
bool line_search_mode = false; // if true, run_simulation_one_step skips adjoint simulation and only calculates objective function value
// objective function for all src
CUSTOMREAL v_obj = 0.0, old_v_obj = 0.0;
std::vector<CUSTOMREAL> v_obj_misfit(20, 0.0);
for (int i_inv = 0; i_inv < IP.get_max_iter_inv(); i_inv++) {
if(myrank == 0 && id_sim ==0){
std::cout << "iteration " << i_inv << " starting ... " << std::endl;
}
old_v_obj = v_obj;
// prepare inverstion iteration group in xdmf file
io.prepare_grid_inv_xdmf(i_inv);
///////////////////////////////////////////////////////
// run (forward and adjoint) simulation for each source
///////////////////////////////////////////////////////
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel for all sources
line_search_mode = false;
// skip for the mode with sub-iteration
if (i_inv > 0 && optim_method != GRADIENT_DESCENT) {
} else {
bool is_save_T = false;
v_obj_misfit = run_simulation_one_step(IP, grid, io, i_inv, first_src, line_search_mode, is_save_T);
v_obj = v_obj_misfit[0];
}
// wait for all processes to finish
synchronize_all_world();
// check if v_obj is nan
if (std::isnan(v_obj)) {
if (myrank == 0)
std::cout << "v_obj is nan, stop inversion" << std::endl;
// stop inversion
break;
}
// output src rec file with the result arrival times
if (IP.get_if_output_in_process_data() || i_inv == IP.get_max_iter_inv()-1 || i_inv==0) {
IP.write_src_rec_file(i_inv,0);
}
///////////////
// model update
///////////////
if(myrank == 0 && id_sim ==0)
std::cout << "model update starting ... " << std::endl;
if (IP.get_run_mode() == DO_INVERSION) {
if (optim_method == GRADIENT_DESCENT)
model_optimize(IP, grid, io, i_inv, v_obj, old_v_obj, first_src, out_main);
else if (optim_method == HALVE_STEPPING_MODE)
v_obj_misfit = model_optimize_halve_stepping(IP, grid, io, i_inv, v_obj, first_src, out_main);
else if (optim_method == LBFGS_MODE) {
bool found_next_step = model_optimize_lbfgs(IP, grid, io, i_inv, v_obj, first_src, out_main);
if (!found_next_step)
goto end_of_inversion;
}
}
// output station correction file (only for teleseismic differential data)
IP.write_station_correction_file(i_inv + 1);
// output objective function
write_objective_function(IP, i_inv, v_obj_misfit, out_main, "model update");
// since model is update. The written traveltime field should be discraded.
// initialize is_T_written_into_file
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
const std::string name_sim_src = IP.get_src_name(i_src);
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
IP.src_map[name_sim_src].is_T_written_into_file = false;
}
// output updated model
if (id_sim==0) {
//io.change_xdmf_obj(0); // change xmf file for next src
io.change_group_name_for_model();
// write out model info
if (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2){
io.write_vel(grid, i_inv+1);
io.write_xi( grid, i_inv+1);
io.write_eta(grid, i_inv+1);
}
//io.write_zeta(grid, i_inv); // TODO
if (IP.get_verbose_output_level()){
io.write_a(grid, i_inv+1);
io.write_b(grid, i_inv+1);
io.write_c(grid, i_inv+1);
io.write_f(grid, i_inv+1);
io.write_fun(grid, i_inv+1);
}
// // output model_parameters_inv_0000.dat
// if (IP.get_if_output_model_dat()
// && (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2))
// io.write_concerning_parameters(grid, i_inv + 1, IP);
}
// writeout temporary xdmf file
io.update_xdmf_file();
// wait for all processes to finish
synchronize_all_world();
// estimate running time
CUSTOMREAL time_elapsed = timer.get_t();
if (id_sim == 0 && myrank == 0 && i_inv < IP.get_max_iter_inv()-1) {
const time_t end_time_estimated = time_elapsed / (i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1) + timer.get_start();
auto will_run_time = (int)(time_elapsed/(i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1));
std::cout << std::endl;
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
std::cout << "Iteration (" << i_inv + 1 << "/" << IP.get_max_iter_inv() << ") finished at " << time_elapsed << " seconds" << std::endl;
std::cout << i_inv + 1 << " iterations run " << timer.get_t() << " seconds, the rest of " << IP.get_max_iter_inv() - i_inv - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
std::cout << std::endl;
}
// output current state of the model
if (IP.get_if_output_final_model()) {
// io.write_final_model(grid, IP);
io.write_merged_model(grid, IP, "final_model.h5");
}
if (IP.get_if_output_middle_model()) {
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_inv + 1) + ".h5";
io.write_merged_model(grid, IP, tmp_fname);
}
} // end loop inverse
end_of_inversion:
// close xdmf file
io.finalize_data_output_file();
timer.stop_timer();
if (id_sim == 0 && myrank == 0) {
std::cout << std::endl;
std::cout << "The program begin at " << timer.get_start_t() << std::endl;
std::cout << "Forward_or_inversion end at " << timer.get_end_t() << std::endl;
std::cout << "It has run " << timer.get_elapsed_t() << " seconds in total." << std::endl;
std::cout << std::endl;
}
}
// run earthquake relocation mode
inline void run_earthquake_relocation(InputParams& IP, Grid& grid, IO_utils& io) {
Receiver recs;
// calculate traveltime for each receiver (swapped from source) and write in output file
calculate_traveltime_for_all_src_rec(IP, grid, io);
// prepare output for iteration status
std::ofstream out_main; // close() is not mandatory
prepare_header_line(IP, out_main);
// objective function and its gradient
CUSTOMREAL v_obj = 999999999.0;
int i_iter = 0;
std::vector<CUSTOMREAL> v_obj_misfit;
// iterate
while (true) {
v_obj = 0.0;
// calculate gradient of objective function at sources
v_obj_misfit = calculate_gradient_objective_function(IP, grid, io, i_iter);
v_obj = v_obj_misfit[0];
// update source location
recs.update_source_location(IP, grid);
synchronize_all_world();
// check convergence
bool finished = false;
if (subdom_main && id_subdomain==0) {
if (i_iter >= N_ITER_MAX_SRC_RELOC){
std::cout << "Finished relocation because iteration number exceeds the maximum " << N_ITER_MAX_SRC_RELOC << std::endl;
finished = true;
}
allreduce_bool_single_inplace_sim(finished); //LAND
}
synchronize_all_world();
// check if all processes have finished
broadcast_bool_inter_and_intra_sim(finished, 0);
synchronize_all_world();
// output location information
if(id_sim == 0 && myrank == 0){
// write objective function
std::cout << "iteration: " << i_iter << ", objective function: " << v_obj << std::endl;
}
// write objective functions
write_objective_function(IP, i_iter, v_obj_misfit, out_main, "relocation");
// write out new src_rec_file
if (IP.get_if_output_in_process_data() || i_iter == N_ITER_MAX_SRC_RELOC-1 || i_iter==0){
IP.write_src_rec_file(0,i_iter);
}
// modify the receiver's location for output
IP.modify_swapped_source_location();
if (finished)
break;
// new iteration
i_iter++;
}
// modify the receiver's location
IP.modify_swapped_source_location();
// write out new src_rec_file
IP.write_src_rec_file(0,i_iter);
// close xdmf file
io.finalize_data_output_file();
}
// run earthquake relocation mode
inline void run_inversion_and_relocation(InputParams& IP, Grid& grid, IO_utils& io) {
Timer timer("Inv_and_reloc", true);
/////////////////////
// preparation of model update
/////////////////////
// for check if the current source is the first source
bool first_src = true;
if(myrank == 0)
std::cout << "id_sim: " << id_sim << ", size of src_map: " << IP.src_map.size() << std::endl;
// prepare objective_function file
std::ofstream out_main; // close() is not mandatory
prepare_header_line(IP, out_main);
if (id_sim==0) {
// prepare inverstion iteration group in xdmf file
io.prepare_grid_inv_xdmf(0);
//io.change_xdmf_obj(0); // change xmf file for next src
io.change_group_name_for_model();
// write out model info
io.write_vel(grid, 0);
io.write_xi( grid, 0);
io.write_eta(grid, 0);
//io.write_zeta(grid, i_inv); // TODO
if (IP.get_verbose_output_level()){
io.write_a(grid, 0);
io.write_b(grid, 0);
io.write_c(grid, 0);
io.write_f(grid, 0);
io.write_fun(grid, 0);
}
// // output model_parameters_inv_0000.dat
// if (IP.get_if_output_model_dat())
// io.write_concerning_parameters(grid, 0, IP);
}
// output station correction file (only for teleseismic differential data)
IP.write_station_correction_file(0);
synchronize_all_world();
bool line_search_mode = false; // if true, run_simulation_one_step skips adjoint simulation and only calculates objective function value
/////////////////////
// preparation of relocation
/////////////////////
Receiver recs;
/////////////////////
// initializae objective function
/////////////////////
CUSTOMREAL v_obj = 0.0, old_v_obj = 10000000000.0;
std::vector<CUSTOMREAL> v_obj_misfit(20, 0.0);
if (inv_mode == ITERATIVE){
/////////////////////
// main loop for model update and relocation
/////////////////////
int model_update_step = 0;
int relocation_step = 0;
for (int i_loop = 0; i_loop < IP.get_max_loop_mode0(); i_loop++){
/////////////////////
// stage 1, update model parameters
/////////////////////
for (int one_loop_i_inv = 0; one_loop_i_inv < IP.get_model_update_N_iter(); one_loop_i_inv++){
// the actural step index of model update
int i_inv = i_loop * IP.get_model_update_N_iter() + one_loop_i_inv;
if(myrank == 0 && id_sim ==0){
std::cout << std::endl;
std::cout << "loop " << i_loop+1 << ", model update iteration " << one_loop_i_inv+1
<< " ( the " << i_inv+1 << "-th model update) starting ... " << std::endl;
std::cout << std::endl;
}
// prepare inverstion iteration group in xdmf file
io.prepare_grid_inv_xdmf(i_inv);
///////////////////////////////////////////////////////
// run (forward and adjoint) simulation for each source
///////////////////////////////////////////////////////
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel for all sources
line_search_mode = false;
// skip for the mode with sub-iteration
if (i_inv > 0 && optim_method != GRADIENT_DESCENT) {
} else {
bool is_save_T = false;
v_obj_misfit = run_simulation_one_step(IP, grid, io, i_inv, first_src, line_search_mode, is_save_T);
v_obj = v_obj_misfit[0];
}
// wait for all processes to finish
synchronize_all_world();
// check if v_obj is nan
if (std::isnan(v_obj)) {
if (myrank == 0)
std::cout << "v_obj is nan, stop inversion" << std::endl;
// stop inversion
break;
}
///////////////
// model update
///////////////
if (optim_method == GRADIENT_DESCENT)
model_optimize(IP, grid, io, i_inv, v_obj, old_v_obj, first_src, out_main);
else if (optim_method == HALVE_STEPPING_MODE)
v_obj_misfit = model_optimize_halve_stepping(IP, grid, io, i_inv, v_obj, first_src, out_main);
else if (optim_method == LBFGS_MODE) {
bool found_next_step = model_optimize_lbfgs(IP, grid, io, i_inv, v_obj, first_src, out_main);
if (!found_next_step)
break;
}
// define old_v_obj
old_v_obj = v_obj;
// output objective function
write_objective_function(IP, i_inv, v_obj_misfit, out_main, "model update");
// since model is update. The written traveltime field should be discraded.
// initialize is_T_written_into_file
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
const std::string name_sim_src = IP.get_src_name(i_src);
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
IP.src_map[name_sim_src].is_T_written_into_file = false;
}
// output updated model
if (id_sim==0) {
//io.change_xdmf_obj(0); // change xmf file for next src
io.change_group_name_for_model();
// write out model info
if (IP.get_if_output_in_process() || i_inv >= IP.get_max_loop_mode0()*IP.get_model_update_N_iter() - 2){
io.write_vel(grid, i_inv+1);
io.write_xi( grid, i_inv+1);
io.write_eta(grid, i_inv+1);
}
//io.write_zeta(grid, i_inv); // TODO
if (IP.get_verbose_output_level()){
io.write_a(grid, i_inv+1);
io.write_b(grid, i_inv+1);
io.write_c(grid, i_inv+1);
io.write_f(grid, i_inv+1);
io.write_fun(grid, i_inv+1);
}
// // output model_parameters_inv_0000.dat
// if (IP.get_if_output_model_dat()
// && (IP.get_if_output_in_process() || i_inv >= IP.get_max_loop_mode0()*IP.get_model_update_N_iter() - 2))
// io.write_concerning_parameters(grid, i_inv + 1, IP);
} // end output updated model
// writeout temporary xdmf file
io.update_xdmf_file();
// write src rec files
if (IP.get_if_output_in_process_data()){
IP.write_src_rec_file(model_update_step,relocation_step);
} else if (i_inv == IP.get_max_loop_mode0() * IP.get_model_update_N_iter()-1 || i_inv==0) {
IP.write_src_rec_file(model_update_step,relocation_step);
}
model_update_step += 1;
// wait for all processes to finish
synchronize_all_world();
// output current state of the model
if (IP.get_if_output_final_model()) {
// io.write_final_model(grid, IP);
io.write_merged_model(grid, IP, "final_model.h5");
}
if (IP.get_if_output_middle_model()) {
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_inv + 1) + ".h5";
io.write_merged_model(grid, IP, tmp_fname);
}
} // end model update in one loop
/////////////////////
// stage 2, update earthquake locations
/////////////////////
if(myrank == 0 && id_sim ==0){
std::cout << std::endl;
std::cout << "loop " << i_loop+1 << ", computing traveltime field for relocation" << std::endl;
std::cout << std::endl;
}
// calculate traveltime for each receiver (swapped from source) and write in output file
if (IP.get_relocation_N_iter() > 0) // we really do relocation
calculate_traveltime_for_all_src_rec(IP, grid, io);
// initilize all earthquakes
if (proc_store_srcrec){
for(auto iter = IP.rec_map.begin(); iter != IP.rec_map.end(); iter++){
iter->second.is_stop = false;
}
}
// iterate
for (int one_loop_i_iter = 0; one_loop_i_iter < IP.get_relocation_N_iter(); one_loop_i_iter++){
int i_iter = i_loop * IP.get_relocation_N_iter() + one_loop_i_iter;
if(myrank == 0 && id_sim ==0){
std::cout << std::endl;
std::cout << "loop " << i_loop+1 << ", relocation iteration " << one_loop_i_iter+1
<< " ( the " << i_iter+1 << "-th relocation) starting ... " << std::endl;
std::cout << std::endl;
}
// calculate gradient of objective function at sources
v_obj_misfit = calculate_gradient_objective_function(IP, grid, io, i_iter);
v_obj = v_obj_misfit[0];
// update source location
recs.update_source_location(IP, grid);
synchronize_all_world();
// output location information
if(id_sim == 0 && myrank == 0){
std::cout << "objective function: " << v_obj << std::endl;
}
// write objective functions
write_objective_function(IP, i_iter, v_obj_misfit, out_main, "relocation");
// write out new src_rec_file
if (IP.get_if_output_in_process_data()){
IP.write_src_rec_file(model_update_step,relocation_step);
} else if (i_iter == IP.get_max_loop_mode0() * IP.get_relocation_N_iter()-1 || i_iter==0) {
IP.write_src_rec_file(model_update_step,relocation_step);
}
// modify the receiver's location for output
IP.modify_swapped_source_location();
relocation_step += 1;
} // end relocation loop
grid.rejuvenate_abcf(); // (a,b/r^2,c/(r^2*cos^2),f/(r^2*cos)) -> (a,b,c,f)
// estimate running time
CUSTOMREAL time_elapsed = timer.get_t();
if (id_sim == 0 && myrank == 0 && i_loop < IP.get_max_loop_mode0()-1) {
const time_t end_time_estimated = time_elapsed / (i_loop + 1) * (IP.get_max_loop_mode0() - i_loop - 1) + timer.get_start();
auto will_run_time = (int)(time_elapsed/(i_loop + 2) * (IP.get_max_loop_mode0() - i_loop - 1));
std::cout << std::endl;
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
std::cout << "Loop (" << i_loop + 1 << "/" << IP.get_max_loop_mode0() << ") finished at " << time_elapsed << " seconds" << std::endl;
std::cout << i_loop + 1 << " loop run " << timer.get_t() << " seconds, the rest of " << IP.get_max_loop_mode0() - i_loop - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
std::cout << std::endl;
}
} // end loop for model update and relocation
} else if (inv_mode == SIMULTANEOUS) {
for (int i_loop = 0; i_loop < IP.get_max_loop_mode1(); i_loop++){
if(myrank == 0 && id_sim ==0){
std::cout << std::endl;
std::cout << "loop " << i_loop+1 << ", model update and relocation starting ... " << std::endl;
std::cout << std::endl;
}
/////////////////////
// stage 1, update model parameters
/////////////////////
// prepare inversion iteration group in xdmf file
io.prepare_grid_inv_xdmf(i_loop);
///////////////////////////////////////////////////////
// run (forward and adjoint) simulation for each source
///////////////////////////////////////////////////////
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel for all sources
line_search_mode = false;
// skip for the mode with sub-iteration
if (i_loop > 0 && optim_method != GRADIENT_DESCENT) {
} else {
bool is_save_T = true;
v_obj_misfit = run_simulation_one_step(IP, grid, io, i_loop, first_src, line_search_mode, is_save_T);
v_obj = v_obj_misfit[0];
}
// wait for all processes to finish
synchronize_all_world();
// check if v_obj is nan
if (std::isnan(v_obj)) {
if (myrank == 0)
std::cout << "v_obj is nan, stop inversion" << std::endl;
// stop inversion
break;
}
///////////////
// model update
///////////////
if (optim_method == GRADIENT_DESCENT)
model_optimize(IP, grid, io, i_loop, v_obj, old_v_obj, first_src, out_main);
else if (optim_method == HALVE_STEPPING_MODE)
v_obj_misfit = model_optimize_halve_stepping(IP, grid, io, i_loop, v_obj, first_src, out_main);
else if (optim_method == LBFGS_MODE) {
bool found_next_step = model_optimize_lbfgs(IP, grid, io, i_loop, v_obj, first_src, out_main);
if (!found_next_step)
break;
}
// output objective function (model update part)
write_objective_function(IP, i_loop, v_obj_misfit, out_main, "model update");
/////////////////////
// stage 2, update earthquake locations
/////////////////////
// initilize all earthquakes
if (proc_store_srcrec){
for(auto iter = IP.rec_map.begin(); iter != IP.rec_map.end(); iter++){
iter->second.is_stop = false;
}
}
// calculate gradient of objective function at sources
v_obj_misfit = calculate_gradient_objective_function(IP, grid, io, i_loop);
// update source location
recs.update_source_location(IP, grid);
synchronize_all_world();
// output objective function (relocation part)
write_objective_function(IP, i_loop, v_obj_misfit, out_main, "relocation");
// define old_v_obj
old_v_obj = v_obj;
// since model is update. The written traveltime field should be discraded.
// initialize is_T_written_into_file
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
const std::string name_sim_src = IP.get_src_name(i_src);
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
IP.src_map[name_sim_src].is_T_written_into_file = false;
}
// output updated model
if (id_sim==0) {
//io.change_xdmf_obj(0); // change xmf file for next src
io.change_group_name_for_model();
// write out model info
if (IP.get_if_output_in_process() || i_loop >= IP.get_max_loop_mode1() - 2){
io.write_vel(grid, i_loop+1);
io.write_xi( grid, i_loop+1);
io.write_eta(grid, i_loop+1);
}
//io.write_zeta(grid, i_inv); // TODO
if (IP.get_verbose_output_level()){
io.write_a(grid, i_loop+1);
io.write_b(grid, i_loop+1);
io.write_c(grid, i_loop+1);
io.write_f(grid, i_loop+1);
io.write_fun(grid, i_loop+1);
}
// // output model_parameters_inv_0000.dat
// if (IP.get_if_output_model_dat()
// && (IP.get_if_output_in_process() || i_loop >= IP.get_max_loop_mode1() - 2))
// io.write_concerning_parameters(grid, i_loop + 1, IP);
} // end output updated model
// writeout temporary xdmf file
io.update_xdmf_file();
// write src rec files
if (IP.get_if_output_in_process_data()){
IP.write_src_rec_file(i_loop,i_loop);
} else if (i_loop == IP.get_max_loop_mode1() -1 || i_loop==0) {
IP.write_src_rec_file(i_loop,i_loop);
}
// modify the receiver's location for output
IP.modify_swapped_source_location();
// wait for all processes to finish
synchronize_all_world();
grid.rejuvenate_abcf(); // (a,b/r^2,c/(r^2*cos^2),f/(r^2*cos)) -> (a,b,c,f)
// estimate running time
CUSTOMREAL time_elapsed = timer.get_t();
if (id_sim == 0 && myrank == 0 && i_loop < IP.get_max_loop_mode1()-1) {
const time_t end_time_estimated = time_elapsed / (i_loop + 1) * (IP.get_max_loop_mode1() - i_loop - 1) + timer.get_start();
auto will_run_time = (int)(time_elapsed/(i_loop + 2) * (IP.get_max_loop_mode1() - i_loop - 1));
std::cout << std::endl;
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
std::cout << "Loop (" << i_loop + 1 << "/" << IP.get_max_loop_mode1() << ") finished at " << time_elapsed << " seconds" << std::endl;
std::cout << i_loop + 1 << " loop run " << timer.get_t() << " seconds, the rest of " << IP.get_max_loop_mode1() - i_loop - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
std::cout << std::endl;
}
// output current state of the model
if (IP.get_if_output_final_model()) {
// io.write_final_model(grid, IP);
io.write_merged_model(grid, IP, "final_model.h5");
}
if (IP.get_if_output_middle_model()) {
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_loop + 1) + ".h5";
io.write_merged_model(grid, IP, tmp_fname);
}
} // end loop for model update and relocation
} else {
std::cout << "Error: inv_mode is not defined" << std::endl;
exit(1);
}
// close xdmf file
io.finalize_data_output_file();
timer.stop_timer();
if (id_sim == 0 && myrank == 0) {
std::cout << std::endl;
std::cout << "The program begin at " << timer.get_start_t();
std::cout << "Inv_and_reloc end at " << timer.get_end_t();
std::cout << "It has run " << timer.get_elapsed_t() << " seconds in total." << std::endl;
std::cout << std::endl;
}
}
// run 1D inversion mode
inline void run_1d_inversion(InputParams& IP, Grid& grid, IO_utils& io) {
OneDInversion oneDInv(IP, grid);
if(myrank == 0)
std::cout << "id_sim: " << id_sim << ", size of src_map: " << IP.src_map.size() << std::endl;
// estimate running time
Timer timer("1D_inversion", true);
// prepare objective_function file
std::ofstream out_main; // close() is not mandatory
prepare_header_line(IP, out_main);
synchronize_all_world();
/////////////////////
// loop for inversion
/////////////////////
// objective function for all src
std::vector<CUSTOMREAL> v_obj_misfit(20, 0.0);
for (int i_inv = 0; i_inv < IP.get_max_iter_inv(); i_inv++) {
if(myrank == 0 && id_sim ==0){
std::cout << "iteration " << i_inv << " starting ... " << std::endl;
}
// prepare inverstion iteration group in xdmf file
io.prepare_grid_inv_xdmf(i_inv);
///////////////////////////////////////////////////////
// run (forward and adjoint) simulation for each source
///////////////////////////////////////////////////////
v_obj_misfit = oneDInv.run_simulation_one_step_1dinv(IP, io, i_inv);
// wait for all processes to finish
synchronize_all_world();
// check if v_obj is nan
if (std::isnan(v_obj_misfit[0])) {
if (myrank == 0)
std::cout << "v_obj is nan, stop inversion" << std::endl;
// stop inversion
break;
}
// output src rec file with the result arrival times
if (IP.get_if_output_in_process_data()){
IP.write_src_rec_file(i_inv,0);
} else if (i_inv == IP.get_max_iter_inv()-1 || i_inv==0) {
IP.write_src_rec_file(i_inv,0);
}
///////////////
// model update
///////////////
if(myrank == 0 && id_sim ==0)
std::cout << "model update starting ... " << std::endl;
oneDInv.model_optimize_1dinv(IP, grid, io, i_inv);
// output objective function
write_objective_function(IP, i_inv, v_obj_misfit, out_main, "1d inversion");
// output updated model
if (id_sim==0) {
//io.change_xdmf_obj(0); // change xmf file for next src
io.change_group_name_for_model();
// write out model info
if (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2){
io.write_vel(grid, i_inv+1);
io.write_xi( grid, i_inv+1);
io.write_eta(grid, i_inv+1);
}
}
// writeout temporary xdmf file
io.update_xdmf_file();
// wait for all processes to finish
synchronize_all_world();
// estimate running time
CUSTOMREAL time_elapsed = timer.get_t();
if (id_sim == 0 && myrank == 0 && i_inv < IP.get_max_iter_inv()-1) {
const time_t end_time_estimated = time_elapsed / (i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1) + timer.get_start();
auto will_run_time = (int)(time_elapsed/(i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1));
std::cout << std::endl;
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
std::cout << "Iteration (" << i_inv + 1 << "/" << IP.get_max_iter_inv() << ") finished at " << time_elapsed << " seconds" << std::endl;
std::cout << i_inv + 1 << " iterations run " << timer.get_t() << " seconds, the rest of " << IP.get_max_iter_inv() - i_inv - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
std::cout << std::endl;
}
// output current state of the model
if (IP.get_if_output_final_model()) {
// io.write_final_model(grid, IP);
io.write_merged_model(grid, IP, "final_model.h5");
}
if (IP.get_if_output_middle_model()) {
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_inv + 1) + ".h5";
io.write_merged_model(grid, IP, tmp_fname);
}
// wait for all processes to finish
synchronize_all_world();
} // end loop inverse
// close xdmf file
io.finalize_data_output_file();
timer.stop_timer();
if (id_sim == 0 && myrank == 0) {
std::cout << std::endl;
std::cout << "The program begin at " << timer.get_start_t() << std::endl;
std::cout << "1d model inversion ended at " << timer.get_end_t() << std::endl;
std::cout << "It has run " << timer.get_elapsed_t() << " seconds in total." << std::endl;
std::cout << std::endl;
}
}
#endif // MAIN_ROUTINES_CALLING_H

View File

@@ -0,0 +1,149 @@
#ifndef MAIN_ROUTINES_EARTHQUAKE_RELOCATION_H
#define MAIN_ROUTINES_EARTHQUAKE_RELOCATION_H
#include <iostream>
#include <memory>
#include "mpi_funcs.h"
#include "config.h"
#include "utils.h"
#include "input_params.h"
#include "grid.h"
#include "io.h"
#include "iterator_selector.h"
#include "iterator.h"
#include "iterator_legacy.h"
#include "iterator_level.h"
#include "source.h"
#include "receiver.h"
#include "main_routines_inversion_mode.h"
// calculate traveltime field for all sources-receivers and write to file
void calculate_traveltime_for_all_src_rec(InputParams& IP, Grid& grid, IO_utils& io){
// check if this run is in source receiver swap mode
// if not, stop the program
if (IP.get_is_srcrec_swap() == false){
std::cout << "Error: Source relocation mode must run with swap_src_rec = true" << std::endl;
exit(1);
}
// reinitialize factors
grid.reinitialize_abcf();
// prepare inverstion iteration group in xdmf file
io.prepare_grid_inv_xdmf(0);
///////////////////////
// loop for each source
///////////////////////
Source src;
// iterate over sources
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
const std::string name_sim_src = IP.get_src_name(i_src);
const int id_sim_src = IP.get_src_id(name_sim_src); // global source id
// check if the source is teleseismic or not
// because teleseismic source is not supported in this mode
bool is_teleseismic = IP.get_if_src_teleseismic(name_sim_src);
if (is_teleseismic){
std::cout << "Error: Teleseismic source is not supported in source relocation mode." << std::endl;
exit(1);
}
// set simu group id and source name for output files/dataset names
io.reset_source_info(id_sim_src, name_sim_src);
// set source position
src.set_source_position(IP, grid, is_teleseismic, name_sim_src);
// initialize iterator object
bool first_init = (i_src==0);
// initialize iterator object
std::unique_ptr<Iterator> It;
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, false);
/////////////////////////
// run forward simulation
/////////////////////////
if (proc_store_srcrec){
auto srcmap_this = IP.get_src_point(name_sim_src);
std::cout << "calculating source (" << i_src+1 << "/" << IP.n_src_this_sim_group << "), name: "
<< name_sim_src << ", lat: " << srcmap_this.lat
<< ", lon: " << srcmap_this.lon << ", dep: " << srcmap_this.dep
<< std::endl;
}
bool write_tmp_T = true;
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It, name_sim_src, write_tmp_T);
}
// wait for all processes to finish traveltime calculation
synchronize_all_world();
}
std::vector<CUSTOMREAL> calculate_gradient_objective_function(InputParams& IP, Grid& grid, IO_utils& io, int i_iter){
Receiver recs; // here the source is swapped to receiver
// initialize source parameters (obj, kernel, )
recs.init_vars_src_reloc(IP);
// iterate over sources (to obtain gradient of traveltime field T and absolute traveltime, common source differential traveltime, and common receiver differential traveltime)
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
const std::string name_sim_src = IP.get_src_name(i_src);
const int id_sim_src = IP.get_src_id(name_sim_src); // global source id
// set simu group id and source name for output files/dataset names
io.reset_source_info(id_sim_src, name_sim_src);
// load travel time field on grid.T_loc
io.read_T_tmp(grid);
// calculate travel time at the actual source location (absolute traveltime, common source differential traveltime)
recs.interpolate_and_store_arrival_times_at_rec_position(IP, grid, name_sim_src);
// calculate gradient at the actual source location
recs.calculate_T_gradient(IP, grid, name_sim_src);
}
// wait for all processes to finish
synchronize_all_world();
// gather all the traveltime to the main process and distribute to all processes
// for calculating the synthetic (common receiver differential traveltime)
IP.gather_traveltimes_and_calc_syn_diff();
// iterate over sources for calculating gradient of objective function
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
const std::string name_sim_src = IP.get_src_name(i_src);
// calculate gradient of objective function with respect to location and ortime
recs.calculate_grad_obj_src_reloc(IP, name_sim_src);
}
// compute the objective function
std::vector<CUSTOMREAL> obj_residual = recs.calculate_obj_reloc(IP, i_iter);
// sum grad_tau and grad_chi_k of all simulation groups
IP.allreduce_rec_map_grad_src();
//synchronize_all_world(); // not necessary here because allreduce is already synchronizing communication
return obj_residual;
}
#endif // MAIN_ROUTINES_EARTHQUAKE_RELOCATION_H

View File

@@ -0,0 +1,293 @@
#ifndef MAIN_ROUTINES_INVERSION_MODE_H
#define MAIN_ROUTINES_INVERSION_MODE_H
#include <iostream>
#include <memory>
#include "mpi_funcs.h"
#include "config.h"
#include "utils.h"
#include "input_params.h"
#include "grid.h"
#include "io.h"
#include "iterator_selector.h"
#include "iterator.h"
#include "iterator_legacy.h"
#include "iterator_level.h"
#include "source.h"
#include "receiver.h"
#include "kernel.h"
#include "model_update.h"
#include "lbfgs.h"
inline void calculate_or_read_traveltime_field(InputParams& IP, Grid& grid, IO_utils& io, const int i_src, const int N_src, bool first_init,
std::unique_ptr<Iterator>& It, const std::string& name_sim_src, const bool& prerun=false){
if (IP.get_is_T_written_into_file(name_sim_src)){
// load travel time field on grid.T_loc
if (myrank == 0){
std::cout << "id_sim: " << id_sim << ", reading source (" << i_src+1 << "/" << N_src
<< "), name: "
<< name_sim_src << ", lat: " << IP.src_map[name_sim_src].lat
<< ", lon: " << IP.src_map[name_sim_src].lon << ", dep: " << IP.src_map[name_sim_src].dep
<< std::endl;
}
io.read_T_tmp(grid);
} else {
// We need to solve eikonal equation
if (myrank == 0){
std::cout << "id_sim: " << id_sim << ", calculating source (" << i_src+1 << "/" << N_src
<< "), name: "
<< name_sim_src << ", lat: " << IP.src_map[name_sim_src].lat
<< ", lon: " << IP.src_map[name_sim_src].lon << ", dep: " << IP.src_map[name_sim_src].dep
<< std::endl;
}
// solve travel time field on grid.T_loc
It->run_iteration_forward(IP, grid, io, first_init);
// writeout travel time field
if (prerun) {
// write the temporary traveltime field into file for later use
io.write_T_tmp(grid);
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
IP.src_map[name_sim_src].is_T_written_into_file = true;
}
}
}
inline void pre_run_forward_only(InputParams& IP, Grid& grid, IO_utils& io, int i_inv){
if(world_rank == 0)
std::cout << "preparing traveltimes of common receiver data ..." << std::endl;
Source src;
Receiver recs;
// noted that src_map_comm_rec is the subset of src_map
for (int i_src = 0; i_src < IP.n_src_comm_rec_this_sim_group; i_src++){
// check if this is the first iteration of entire inversion process
bool first_init = (i_inv == 0 && i_src==0);
// get source info
std::string name_sim_src = IP.get_src_name_comm(i_src);
int id_sim_src = IP.get_src_id(name_sim_src); // global source id
bool is_teleseismic = IP.get_if_src_teleseismic(name_sim_src); // get is_teleseismic flag
// set simu group id and source name for output files/dataset names
io.reset_source_info(id_sim_src, name_sim_src);
// set source position
src.set_source_position(IP, grid, is_teleseismic, name_sim_src);
// initialize iterator object
std::unique_ptr<Iterator> It;
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, false);
// calculate or read traveltime field
bool prerun_mode = true;
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_comm_rec_this_sim_group, first_init, It, name_sim_src, prerun_mode);
// interpolate and store traveltime, cs_dif. For cr_dif, only store the traveltime.
recs.interpolate_and_store_arrival_times_at_rec_position(IP, grid, name_sim_src);
// CHS: At this point, all the synthesised arrival times for all the co-located stations are recorded in syn_time_map_sr. When you need to use it later, you can just look it up.
}
// wait for all processes to finish
synchronize_all_world();
if(world_rank == 0)
std::cout << "synthetic traveltimes of common receiver data have been prepared." << std::endl;
// gather all the traveltime to the main process and distribute to all processes
// for calculating cr_dif
IP.gather_traveltimes_and_calc_syn_diff();
}
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel if requested
inline std::vector<CUSTOMREAL> run_simulation_one_step(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, bool& first_src, bool line_search_mode, bool is_save_T){
// initialize kernel arrays
if (IP.get_run_mode() == DO_INVERSION || IP.get_run_mode() == INV_RELOC)
grid.initialize_kernels();
// reinitialize factors
grid.reinitialize_abcf();
///////////////////////////////////////////////////////////////////////
// compute the synthetic common receiver differential traveltime first
///////////////////////////////////////////////////////////////////////
// prepare synthetic traveltime for all earthquakes, if
// 1. common receiver data exists;
// 2. we use common receiver data to update model; (cr + not swap) or (cs + swap)
// 3. we do inversion
if ( src_pair_exists &&
((IP.get_use_cr() && !IP.get_is_srcrec_swap())
|| (IP.get_use_cs() && IP.get_is_srcrec_swap()) )
&& (IP.get_run_mode() == DO_INVERSION || IP.get_run_mode() == INV_RELOC)){
pre_run_forward_only(IP, grid, io, i_inv);
}
//
// loop over all sources
//
Source src;
Receiver recs;
if(world_rank == 0)
std::cout << "computing traveltime field, adjoint field and kernel ..." << std::endl;
// iterate over sources
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
// check if this is the first iteration of entire inversion process
bool first_init = (i_inv == 0 && i_src==0);
// get source info
const std::string name_sim_src = IP.get_src_name(i_src); // source name
const int id_sim_src = IP.get_src_id(name_sim_src); // global source id
bool is_teleseismic = IP.get_if_src_teleseismic(name_sim_src); // get is_teleseismic flag
// set simu group id and source name for output files/dataset names
io.reset_source_info(id_sim_src, name_sim_src);
// output initial field
if(first_src && IP.get_if_output_source_field()) {
// write true solution
if (if_test){
io.write_true_solution(grid);
}
first_src = false;
}
/////////////////////////
// run forward simulation
/////////////////////////
// (re) initialize source object and set to grid
src.set_source_position(IP, grid, is_teleseismic, name_sim_src);
// initialize iterator object
std::unique_ptr<Iterator> It;
if (!hybrid_stencil_order){
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, false);
// if traveltime field has been wriiten into the file, we choose to read the traveltime data.
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It, name_sim_src, is_save_T);
} else {
// hybrid stencil mode
std::cout << "\nrunnning in hybrid stencil mode\n" << std::endl;
// run 1st order forward simulation
std::unique_ptr<Iterator> It_pre;
IP.set_stencil_order(1);
IP.set_conv_tol(IP.get_conv_tol()*100.0);
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It_pre, false);
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It_pre, name_sim_src, is_save_T);
// run 3rd order forward simulation
IP.set_stencil_order(3);
IP.set_conv_tol(IP.get_conv_tol()/100.0);
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, true);
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It, name_sim_src, is_save_T);
}
// output the result of forward simulation
// ignored for inversion mode.
if (!line_search_mode && IP.get_if_output_source_field()) {
// output T (result timetable)
io.write_T(grid, i_inv);
// output T0v
//io.write_T0v(grid,i_inv); // initial Timetable
// output u (true solution)
//if (if_test)
// io.write_u(grid); // true Timetable
// output tau
//io.write_tau(grid, i_inv); // calculated deviation
// output residual (residual = true_solution - result)
//if (if_test)
// io.write_residual(grid); // this will over write the u_loc, so we need to call write_u_h5 first
}
// calculate the arrival times at each receivers
recs.interpolate_and_store_arrival_times_at_rec_position(IP, grid, name_sim_src);
/////////////////////////
// run adjoint simulation
/////////////////////////
// if (myrank == 0){
// std::cout << "calculating adjoint field, source (" << i_src+1 << "/" << (int)IP.src_id2name.size() << "), name: "
// << name_sim_src << ", lat: " << IP.src_map[name_sim_src].lat
// << ", lon: " << IP.src_map[name_sim_src].lon << ", dep: " << IP.src_map[name_sim_src].dep
// << std::endl;
// }
if (IP.get_run_mode()==DO_INVERSION || IP.get_run_mode()==INV_RELOC){
// calculate adjoint source
recs.calculate_adjoint_source(IP, name_sim_src);
// run iteration for adjoint field calculation
int adj_type = 0; // compute adjoint field
It->run_iteration_adjoint(IP, grid, io, adj_type);
// run iteration for density of the adjoint field
adj_type = 1; // compute adjoint field
It->run_iteration_adjoint(IP, grid, io, adj_type);
// calculate sensitivity kernel
calculate_sensitivity_kernel(grid, IP, name_sim_src);
if (subdom_main && !line_search_mode && IP.get_if_output_source_field()) {
// adjoint field will be output only at the end of subiteration
// output the result of adjoint simulation
io.write_adjoint_field(grid,i_inv);
}
// io.write_adjoint_field(grid,i_inv);
// check adjoint source
// if (proc_store_srcrec){
// for (auto iter = IP.rec_map.begin(); iter != IP.rec_map.end(); iter++){
// std::cout << "rec id: " << iter->second.id << ", rec name: " << iter->second.name << ", adjoint source: " << iter->second.adjoint_source << std::endl;
// }
// }
} // end if run_mode == DO_INVERSION
// wait for all processes to finish
// this should not be called here, for the case that the simultaneous run group has different number of sources
//synchronize_all_world();
} // end for i_src
// synchronize all processes
synchronize_all_world();
// gather all the traveltime to the main process and distribute to all processes
// for calculating the synthetic common receiver differential traveltime
if ( IP.get_run_mode()==ONLY_FORWARD || // case 1. if we are doing forward modeling, traveltime is not prepared for computing cr_dif data. Now we need to compute it
(!IP.get_use_cr() && !IP.get_is_srcrec_swap()) || // case 2-1, we do inversion, but we do not use cr data (cr + no swap)
(!IP.get_use_cs() && IP.get_is_srcrec_swap())){ // case 2-2, we do inversion, but we do not use cr data (cs + swap)
IP.gather_traveltimes_and_calc_syn_diff();
}
// compute all residual and obj
std::vector<CUSTOMREAL> obj_residual = recs.calculate_obj_and_residual(IP);
// return current objective function value
return obj_residual;
}
#endif // MAIN_ROUTINES_INVERSION_MODE_H

View File

@@ -0,0 +1,496 @@
#ifndef MODEL_OPTIMIZATION_ROUTINES_H
#define MODEL_OPTIMIZATION_ROUTINES_H
#include <iostream>
#include <memory>
#include "mpi_funcs.h"
#include "config.h"
#include "utils.h"
#include "input_params.h"
#include "grid.h"
#include "io.h"
#include "main_routines_calling.h"
#include "iterator_selector.h"
#include "iterator.h"
#include "iterator_legacy.h"
#include "iterator_level.h"
#include "source.h"
#include "receiver.h"
#include "kernel.h"
#include "model_update.h"
#include "lbfgs.h"
#include "objective_function_utils.h"
// do model update by gradient descent
inline void model_optimize(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, \
CUSTOMREAL& v_obj_inout, CUSTOMREAL& old_v_obj, bool& first_src, std::ofstream& out_main) {
// check kernel density
check_kernel_density(grid, IP);
// sum kernels among all simultaneous runs
sumup_kernels(grid);
// write out original kernels
if (id_sim==0 && subdom_main && IP.get_if_output_kernel() && (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2 || i_inv == 0)) {
// store kernel only in the first src datafile
io.change_group_name_for_model();
// write kernel
io.write_Ks(grid, i_inv);
io.write_Keta(grid, i_inv);
io.write_Kxi(grid, i_inv);
// write kernel density
io.write_Ks_density(grid, i_inv);
io.write_Kxi_density(grid, i_inv);
io.write_Keta_density(grid, i_inv);
}
// smooth kernels (multigrid, and kdensity normalization)
smooth_kernels(grid, IP);
// write out modified kernels
if (id_sim==0 && subdom_main && IP.get_if_output_kernel() && (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2 || i_inv == 0)) {
// store kernel only in the first src datafile
io.change_group_name_for_model();
// // kernel over density
// io.write_Ks_over_Kden(grid, i_inv);
// io.write_Keta_over_Kden(grid, i_inv);
// io.write_Kxi_over_Kden(grid, i_inv);
// kernel over density with smoothing
io.write_Ks_update(grid, i_inv);
io.write_Keta_update(grid, i_inv);
io.write_Kxi_update(grid, i_inv);
// density with smoothing
io.write_Ks_density_update(grid, i_inv);
io.write_Kxi_density_update(grid, i_inv);
io.write_Keta_density_update(grid, i_inv);
}
// change stepsize
// Option 1: the step length is modulated when obj changes.
if (step_method == OBJ_DEFINED){
if(i_inv != 0){
if (v_obj_inout < old_v_obj) {
step_length_init = std::min((CUSTOMREAL)0.02, step_length_init);
if(myrank == 0 && id_sim == 0){
std::cout << std::endl;
std::cout << "The obj keeps decreasing, from " << old_v_obj << " to " << v_obj_inout
<< ", the step length is " << step_length_init << std::endl;
std::cout << std::endl;
}
} else if (v_obj_inout >= old_v_obj) {
step_length_init = std::max((CUSTOMREAL)0.0001, step_length_init*step_length_decay);
if(myrank == 0 && id_sim == 0){
std::cout << std::endl;
std::cout << "The obj keep increases, from " << old_v_obj << " to " << v_obj_inout
<< ", the step length decreases from " << step_length_init/step_length_decay
<< " to " << step_length_init << std::endl;
std::cout << std::endl;
}
}
} else {
if(myrank == 0 && id_sim == 0){
std::cout << std::endl;
std::cout << "At the first iteration, the step length is " << step_length_init << std::endl;
std::cout << std::endl;
}
}
} else if (step_method == GRADIENT_DEFINED){
// Option 2: we modulate the step length according to the angle between the previous and current gradient directions.
// If the angle is less than XX degree, which means the model update direction is successive, we should enlarge the step size
// Otherwise, the step length should decrease
CUSTOMREAL angle = direction_change_of_model_update(grid);
if(i_inv != 0){
if (angle > step_length_gradient_angle){
CUSTOMREAL old_step_length = step_length_init;
step_length_init = std::max((CUSTOMREAL)0.0001, step_length_init * step_length_down);
if(myrank == 0 && id_sim == 0){
std::cout << std::endl;
std::cout << "The angle between two update darections is " << angle
<< ". Because the angle is greater than " << step_length_gradient_angle << " degree, the step length decreases from "
<< old_step_length << " to " << step_length_init << std::endl;
std::cout << std::endl;
}
} else if (angle <= step_length_gradient_angle) {
CUSTOMREAL old_step_length = step_length_init;
step_length_init = std::min((CUSTOMREAL)0.02, step_length_init * step_length_up);
if(myrank == 0 && id_sim == 0){
std::cout << std::endl;
std::cout << "The angle between two update darections is " << angle
<< ". Because the angle is less than " << step_length_gradient_angle << " degree, the step length increases from "
<< old_step_length << " to " << step_length_init << std::endl;
std::cout << std::endl;
}
}
} else {
if(myrank == 0 && id_sim == 0){
std::cout << std::endl;
std::cout << "At the first iteration, the step length is " << step_length_init << std::endl;
std::cout << std::endl;
}
}
} else {
std::cout << std::endl;
std::cout << "No supported method for step size change, step keep the same: " << step_length_init << std::endl;
std::cout << std::endl;
}
// broadcast the step_length
broadcast_cr_single(step_length_init,0);
// update the model with the initial step size
set_new_model(grid, step_length_init);
// make station correction
IP.station_correction_update(step_length_init_sc);
// writeout temporary xdmf file
if (IP.get_verbose_output_level())
io.update_xdmf_file();
synchronize_all_world();
}
inline std::vector<CUSTOMREAL> model_optimize_halve_stepping(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, CUSTOMREAL& v_obj_inout, bool& first_src, std::ofstream& out_main) {
CUSTOMREAL step_length = step_length_init; // step size init is global variable
CUSTOMREAL diff_obj = - 9999999999;
CUSTOMREAL v_obj_old = v_obj_inout;
CUSTOMREAL v_obj_new = 0.0;
std::vector<CUSTOMREAL> v_obj_misfit_new = std::vector<CUSTOMREAL>(10, 0.0);
int sub_iter_count = 0;
// sum kernels among all simultaneous runs
sumup_kernels(grid);
// smooth kernels
smooth_kernels(grid, IP);
// backup the initial model
grid.back_up_fun_xi_eta_bcf();
// update the model with the initial step size
set_new_model(grid, step_length);
if (IP.get_verbose_output_level()) {
// store kernel only in the first src datafile
io.change_group_name_for_model();
// output updated velocity models
io.write_Ks(grid, i_inv);
io.write_Keta(grid, i_inv);
io.write_Kxi(grid, i_inv);
// output descent direction
io.write_Ks_update(grid, i_inv);
io.write_Keta_update(grid, i_inv);
io.write_Kxi_update(grid, i_inv);
}
// writeout temporary xdmf file
if (IP.get_verbose_output_level())
io.update_xdmf_file();
synchronize_all_world();
// loop till
while (sub_iter_count < max_sub_iterations) {
// check the new objective function value
// v_obj_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true); // run simulations with line search mode
v_obj_misfit_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true, false); // run simulations with line search mode
v_obj_new = v_obj_misfit_new[0];
// if the new objective function value is larger than the old one, make the step width to be half of the previous one
diff_obj = v_obj_new - v_obj_old;
if ( diff_obj > _0_CR // if the objective function value is larger than the old one
|| std::abs(diff_obj/v_obj_old) > MAX_DIFF_RATIO_VOBJ) { // if the objective function reduced too much ) {
// set step length just for writing out
step_length_init = step_length;
// print status
write_objective_function(IP, i_inv, v_obj_misfit_new, out_main, "sub iter");
grid.restore_fun_xi_eta_bcf();
step_length *= HALVE_STEP_RATIO;
set_new_model(grid, step_length);
sub_iter_count++;
} else {
// if the new objective function value is smaller than the old one, make the step width to be twice of the previous one
goto end_of_sub_iteration;
}
}
end_of_sub_iteration:
v_obj_inout = v_obj_new;
step_length_init = step_length/(HALVE_STEP_RATIO)*HALVE_STEP_RESTORAION_RATIO; // use this step size for the next inversion
// write adjoint field
int next_i_inv = i_inv + 1;
if (subdom_main && IP.get_if_output_source_field())
io.write_adjoint_field(grid,next_i_inv);
return v_obj_misfit_new;
}
// do model update
inline bool model_optimize_lbfgs(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, CUSTOMREAL& v_obj_inout, bool& first_src, std::ofstream& out_main) {
int subiter_count = 0; // subiteration count
//CUSTOMREAL qp_0 = _0_CR; // store initial p_k * grad(f_k) (descent_direction * gradient)
CUSTOMREAL qp_t = _0_CR; // store p_k * grad(f_k)
CUSTOMREAL td = _0_CR; // wolfes right step
CUSTOMREAL tg = _0_CR; // wolfes left step
CUSTOMREAL step_length = step_length_init; // step size init is global variable
CUSTOMREAL v_obj_reg = _0_CR; // regularization term == q_t
//CUSTOMREAL q_0 = _0_CR; // initial cost function
CUSTOMREAL q_t = _0_CR; // current cost function
std::vector<CUSTOMREAL> v_obj_misfit_new = std::vector<CUSTOMREAL>(10, 0.0);
//
// initialize
//
if (i_inv == 0) {
// initialize regularization
// regularization_penalty[:] = 1.0
init_regulaization_penalty_with_one(grid);
// calculate volume_domain = SquaredL2Norm(regularization_penalty)
volume_domain = compute_volume_domain(grid);
// volume_domain /= N_params
volume_domain /= N_params;
// weight_Tikonov = penalty_weight / volume_domain
weight_Tikonov = regularization_weight / volume_domain;
weight_Tikonov_ani = regularization_weight;
std::cout << "DEBUG: weight_Tikonov: " << weight_Tikonov << std::endl;
// calculate damp_weights
compute_damp_weights(grid);
// calculate gradient (run onestep forward+adjoint) not necessary to do here. already done
//v_obj_misfit_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true);
// Q0 = initial obj
q_0 = v_obj_inout;
// sum kernels among all simultaneous runs
sumup_kernels(grid);
// smooth gradient
smooth_kernels(grid, IP);
// calc regularization penalties
calculate_regularization_penalty(grid);
//// smooth calculated grad regularization penalty
//smooth_gradient_regularization(grid);
// calc (update) obj_regl = squaredL2Norm(regularization_penalty)
v_obj_reg = calculate_regularization_obj(grid);
// Q0 += 0.5*weight_Tiknov*obj_regl
q_0 += v_obj_reg;
// init_grad += weight_Tikonov * gadient_regularization_penalty
add_regularization_grad(grid);
// store model and gradient
store_model_and_gradient(grid, i_inv);
// log out
if(myrank == 0 && id_sim ==0) {
out_main \
<< std::setw(5) << i_inv \
<< "," << std::setw(5) << subiter_count \
<< "," << std::setw(15) << step_length \
<< "," << std::setw(15) << q_0 \
<< "," << std::setw(15) << q_t \
<< "," << std::setw(15) << v_obj_reg \
<< "," << std::setw(15) << qp_0 \
<< "," << std::setw(15) << qp_t \
<< "," << std::setw(15) << td \
<< "," << std::setw(15) << tg \
<< "," << std::setw(15) << wolfe_c1*qp_0 \
<< "," << std::setw(15) << wolfe_c2*qp_0 \
<< "," << std::setw(15) << "init" << std::endl;
}
}
// backup the initial model
grid.back_up_fun_xi_eta_bcf();
// update the model with the initial step size
if (IP.get_verbose_output_level() && id_sim==0) {
// store kernel only in the first src datafile
io.change_group_name_for_model();
// output updated velocity models
io.write_Ks(grid, i_inv);
io.write_Keta(grid, i_inv);
io.write_Kxi(grid, i_inv);
// output descent direction
io.write_Ks_descent_dir(grid, i_inv);
io.write_Keta_descent_dir(grid, i_inv);
io.write_Kxi_descent_dir(grid, i_inv);
}
// writeout temporary xdmf file
if (IP.get_verbose_output_level()&& id_sim==0)
io.update_xdmf_file();
//
// iteration
//
if (i_inv > 0) {
// sum kernels among all simultaneous runs
sumup_kernels(grid);
// smooth kernels
smooth_kernels(grid, IP);
}
// compute descent direction
calc_descent_direction(grid, i_inv, IP);
// smooth descent direction
smooth_descent_direction(grid);
// calc qp_0 = inital grad * descent direction
qp_0 = compute_q_k(grid);
// loop wolfes subiterations
bool wolfe_cond_ok = false;
while (wolfe_cond_ok != true) {
// initial guess for isub = 0
if (i_inv == 0 && subiter_count == 0)
initial_guess_step(grid, step_length, 1.0);
else if (i_inv == 1 && subiter_count == 0)
initial_guess_step(grid, step_length, LBFGS_RELATIVE_step_length);
//// Update model
set_new_model(grid, step_length);
//// calculate gradient (run onestep forward+adjoint)
v_obj_misfit_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true, false);
//// Qt update (=current obj)
q_t = v_obj_misfit_new[0];
// sum kernels among all simultaneous runs
sumup_kernels(grid);
//// smooth gradient
smooth_kernels(grid, IP);
//// calculate regularization to grad
calculate_regularization_penalty(grid);
//// smooth calculated grad regularization penalty
//smooth_gradient_regularization(grid);
//// add regularization term to Qt (Qt += 0.5 * obj_regl )
v_obj_reg = calculate_regularization_obj(grid);
q_t += v_obj_reg;
//// current grad += weight_Tikonov * gadient_regularization_penalty
add_regularization_grad(grid);
//// Qpt = Inner product(current_grad * descent_direction)
qp_t = compute_q_k(grid);
//// check wolfe conditions
wolfe_cond_ok = check_wolfe_cond(grid, q_0, q_t, qp_0, qp_t, td, tg, step_length);
// update the model with the initial step size
if (IP.get_verbose_output_level() && id_sim==0) {
// store kernel only in the first src datafile
io.change_group_name_for_model();
// gradient
io.write_Ks_update(grid, i_inv*100+subiter_count);
io.write_Keta_update(grid, i_inv*100+subiter_count);
io.write_Kxi_update(grid, i_inv*100+subiter_count);
// output descent direction
io.write_Ks_descent_dir(grid, i_inv*100+subiter_count);
io.write_Keta_descent_dir(grid, i_inv*100+subiter_count);
io.write_Kxi_descent_dir(grid, i_inv*100+subiter_count);
}
// writeout temporary xdmf file
if (IP.get_verbose_output_level()&& id_sim==0)
io.update_xdmf_file();
// log out
if(myrank == 0 && id_sim ==0)
out_main \
<< std::setw(5) << i_inv \
<< "," << std::setw(5) << subiter_count \
<< "," << std::setw(15) << step_length \
<< "," << std::setw(15) << q_0 \
<< "," << std::setw(15) << q_t \
<< "," << std::setw(15) << v_obj_reg \
<< "," << std::setw(15) << qp_0 \
<< "," << std::setw(15) << qp_t \
<< "," << std::setw(15) << td \
<< "," << std::setw(15) << tg \
<< "," << std::setw(15) << wolfe_c1*qp_0 \
<< "," << std::setw(15) << wolfe_c2*qp_0 \
<< "," << std::setw(15) << wolfe_cond_ok << std::endl;
if (wolfe_cond_ok) {
goto end_of_subiteration;
} else if (subiter_count > max_sub_iterations){
// reached max subiter
// exit
std::cout << "reached max subiterations" << std::endl;
return false;
} else {
// wolfe conditions not satisfied
subiter_count++;
grid.restore_fun_xi_eta_bcf();
}
}
end_of_subiteration:
//// store model and gradient,
store_model_and_gradient(grid, i_inv+1);
//// Q0=Qt
q_0 = q_t;
qp_0 = qp_t;
step_length_init = step_length; // use current step size for the next iteration
if (myrank == 0)
std::cout << "Wolfe conditions satisfied at iteration " << subiter_count << std::endl;
return true;
}
#endif // MODEL_OPTIMIZATION_ROUTINES_H

485
include/model_update.h Normal file
View File

@@ -0,0 +1,485 @@
#ifndef MODEL_UPDATE_H
#define MODEL_UPDATE_H
#include <cmath>
#include "config.h"
#include "grid.h"
#include "smooth.h"
#include "smooth_descent_dir.h"
#include "smooth_grad_regul.h"
#include "lbfgs.h"
// generate smoothed kernels (K*_update_loc) from the kernels (K*_loc)
// before doing this, K*_loc should be summed up among all simultaneous runs (by calling sumup_kernels)
// before doing this, K*_update_loc has no meaning (unavailable)
void smooth_kernels(Grid& grid, InputParams& IP) {
if (subdom_main){ // parallel level 3
if (id_sim==0){ // parallel level 1
// initiaize update params
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
grid.Ks_update_loc_previous[I2V(i,j,k)] = grid.Ks_update_loc[I2V(i,j,k)];
grid.Keta_update_loc_previous[I2V(i,j,k)] = grid.Keta_update_loc[I2V(i,j,k)];
grid.Kxi_update_loc_previous[I2V(i,j,k)] = grid.Kxi_update_loc[I2V(i,j,k)];
grid.Ks_update_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_update_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_update_loc[I2V(i,j,k)] = _0_CR;
// grid.Ks_density_update_loc[I2V(i,j,k)] = _0_CR;
// grid.Kxi_density_update_loc[I2V(i,j,k)] = _0_CR;
// grid.Keta_density_update_loc[I2V(i,j,k)] = _0_CR;
}
}
}
// check kernel
CUSTOMREAL max_kernel = _0_CR;
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
max_kernel = std::max(max_kernel, std::abs(grid.Ks_loc[I2V(i,j,k)]));
}
}
}
CUSTOMREAL tmp = 0;
allreduce_cr_single_max(max_kernel, tmp);
max_kernel = tmp;
if (max_kernel <= eps) {
std::cout << "Error: max_kernel is near zero (less than 10^-12), check data residual and whether no data is used" << std::endl;
exit(1);
}
if (smooth_method == 0) {
// Ks_loc, Keta_loc, Kxi_loc to be 0 for ghost layers to eliminate the effect of overlapping boundary
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
if (i == 0 && !grid.i_first()) {
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
}
if (i == loc_I-1 && !grid.i_last()) {
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
}
if (j == 0 && !grid.j_first()) {
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
}
if (j == loc_J-1 && !grid.j_last()) {
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
}
if (k == 0 && !grid.k_first()) {
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
}
if (k == loc_K-1 && !grid.k_last()) {
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
}
}
}
}
// grid based smoothing
smooth_inv_kernels_orig(grid, IP);
} else if (smooth_method == 1) {
// CG smoothing
smooth_inv_kernels_CG(grid, smooth_lr, smooth_lt, smooth_lp);
}
// shared values on the boundary
grid.send_recev_boundary_data(grid.Ks_update_loc);
grid.send_recev_boundary_data(grid.Keta_update_loc);
grid.send_recev_boundary_data(grid.Kxi_update_loc);
grid.send_recev_boundary_data(grid.Ks_density_update_loc);
grid.send_recev_boundary_data(grid.Kxi_density_update_loc);
grid.send_recev_boundary_data(grid.Keta_density_update_loc);
grid.send_recev_boundary_data_kosumi(grid.Ks_update_loc);
grid.send_recev_boundary_data_kosumi(grid.Keta_update_loc);
grid.send_recev_boundary_data_kosumi(grid.Kxi_update_loc);
grid.send_recev_boundary_data_kosumi(grid.Ks_density_update_loc);
grid.send_recev_boundary_data_kosumi(grid.Kxi_density_update_loc);
grid.send_recev_boundary_data_kosumi(grid.Keta_density_update_loc);
} // end if id_sim == 0
// send the updated model to all the simultaneous run
broadcast_cr_inter_sim(grid.Ks_update_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Kxi_update_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Keta_update_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Ks_density_update_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Kxi_density_update_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Keta_density_update_loc, loc_I*loc_J*loc_K, 0);
// send the previous updated model to all the simultaneous run
broadcast_cr_inter_sim(grid.Ks_update_loc_previous, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Kxi_update_loc_previous, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Keta_update_loc_previous, loc_I*loc_J*loc_K, 0);
} // end if subdom_main
synchronize_all_world();
}
// smooth gradient regularization term
void smooth_gradient_regularization(Grid& grid) {
if (subdom_main && id_sim==0){ // only id_sim==0 has values for these arrays
if (smooth_method == 0) {
// grid based smoothing
smooth_gradient_regularization_orig(grid);
} else if (smooth_method == 1) {
// CG smoothing
smooth_gradient_regularization_CG(grid, smooth_lr, smooth_lt, smooth_lp);
}
grid.send_recev_boundary_data(grid.fun_gradient_regularization_penalty_loc);
grid.send_recev_boundary_data(grid.eta_gradient_regularization_penalty_loc);
grid.send_recev_boundary_data(grid.xi_gradient_regularization_penalty_loc);
}
}
void smooth_descent_direction(Grid& grid){
if (subdom_main){
if (id_sim == 0) { // calculation of the update model is only done in the main simultaneous run
if (smooth_method == 0) {
// grid based smoothing
smooth_descent_dir(grid);
} else if (smooth_method == 1) {
// CG smoothing
smooth_descent_dir_CG(grid, smooth_lr, smooth_lt, smooth_lp);
}
// shared values on the boundary
grid.send_recev_boundary_data( grid.Ks_descent_dir_loc);
grid.send_recev_boundary_data(grid.Keta_descent_dir_loc);
grid.send_recev_boundary_data( grid.Kxi_descent_dir_loc);
} // end if id_sim == 0
// send the updated model to all the simultaneous run
broadcast_cr_inter_sim( grid.Ks_descent_dir_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim( grid.Kxi_descent_dir_loc, loc_I*loc_J*loc_K, 0);
broadcast_cr_inter_sim(grid.Keta_descent_dir_loc, loc_I*loc_J*loc_K, 0);
}
}
void calc_descent_direction(Grid& grid, int i_inv, InputParams& IP) {
if (subdom_main) {
// routines for LBFGS
if (optim_method == LBFGS_MODE) {
// calculate the descent direction
if (i_inv > 0) {
calculate_descent_direction_lbfgs(grid, i_inv);
// use gradient for the first iteration
} else {
int n_grid = loc_I*loc_J*loc_K;
// first time, descent direction = - precond * gradient
// inverse the gradient to fit the update scheme for LBFGS
for (int i = 0; i < n_grid; i++){
grid.Ks_descent_dir_loc[i] = - _1_CR* grid.Ks_update_loc[i];
grid.Keta_descent_dir_loc[i] = - _1_CR* grid.Keta_update_loc[i];
grid.Kxi_descent_dir_loc[i] = - _1_CR* grid.Kxi_update_loc[i];
//grid.Ks_descent_dir_loc[i] = - _1_CR* grid.Ks_loc[i];
//grid.Keta_descent_dir_loc[i] = - _1_CR* grid.Keta_loc[i];
//grid.Kxi_descent_dir_loc[i] = - _1_CR* grid.Kxi_loc[i];
}
}
} else {
// return error
std::cout << "Error: optim_method is not set to LBFGS_MODE (=2)" << std::endl;
exit(1);
}
} // end if subdom_main
synchronize_all_world();
}
CUSTOMREAL direction_change_of_model_update(Grid& grid){
CUSTOMREAL norm_grad = _0_CR;
CUSTOMREAL norm_grad_previous = _0_CR;
CUSTOMREAL inner_product = _0_CR;
CUSTOMREAL cos_angle = _0_CR;
CUSTOMREAL angle = _0_CR;
if (subdom_main) {
// initiaize update params
inner_product = dot_product(grid.Ks_update_loc_previous, grid.Ks_update_loc, loc_I*loc_J*loc_K);
norm_grad = dot_product(grid.Ks_update_loc, grid.Ks_update_loc, loc_I*loc_J*loc_K);
norm_grad_previous = dot_product(grid.Ks_update_loc_previous, grid.Ks_update_loc_previous, loc_I*loc_J*loc_K);
CUSTOMREAL tmp;
allreduce_cr_single(inner_product,tmp);
inner_product = tmp;
allreduce_cr_single(norm_grad,tmp);
norm_grad = tmp;
allreduce_cr_single(norm_grad_previous,tmp);
norm_grad_previous = tmp;
cos_angle = inner_product / (std::sqrt(norm_grad) * std::sqrt(norm_grad_previous));
angle = acos(cos_angle) * RAD2DEG;
}
return angle;
}
void set_new_model(Grid& grid, CUSTOMREAL step_length_new, bool init_bfgs=false) {
if (subdom_main) {
// for LBFGS mode. K*_update_loc is not directly the descent direction but smoothed gradient
// so for non LBFGS_mode, nextstep will be calculated with K*_update_loc
if (optim_method != LBFGS_MODE) {
// // get the scaling factor
// CUSTOMREAL Linf_Ks = _0_CR, Linf_Keta = _0_CR, Linf_Kxi = _0_CR;
// for (int k = 1; k < loc_K-1; k++) {
// for (int j = 1; j < loc_J-1; j++) {
// for (int i = 1; i < loc_I-1; i++) {
// Linf_Ks = std::max(Linf_Ks, std::abs(grid.Ks_update_loc[I2V(i,j,k)]));
// Linf_Keta = std::max(Linf_Keta, std::abs(grid.Keta_update_loc[I2V(i,j,k)]));
// Linf_Kxi = std::max(Linf_Kxi, std::abs(grid.Kxi_update_loc[I2V(i,j,k)]));
// }
// }
// }
// // get the maximum scaling factor among subdomains
// CUSTOMREAL Linf_tmp;
// allreduce_cr_single_max(Linf_Ks, Linf_tmp); Linf_Ks = Linf_tmp;
// allreduce_cr_single_max(Linf_Keta, Linf_tmp); Linf_Keta = Linf_tmp;
// allreduce_cr_single_max(Linf_Kxi, Linf_tmp); Linf_Kxi = Linf_tmp;
// CUSTOMREAL Linf_all = _0_CR;
// Linf_all = std::max(Linf_Ks, std::max(Linf_Keta, Linf_Kxi));
// // if (myrank == 0 && id_sim == 0)
// // std::cout << "Scaling factor for all kernels: " << Linf_all << std::endl;
// // std::cout << "Scaling factor for model update for Ks, Keta, Kx, stepsize: " << Linf_Ks << ", " << Linf_Keta << ", " << Linf_Kxi << ", " << step_length_new << std::endl;
// Linf_Ks = Linf_all;
// Linf_Keta = Linf_all;
// Linf_Kxi = Linf_all;
// kernel update has been rescaled in "smooth_inv_kernels_orig"
// update the model
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
// // update
// grid.fun_loc[I2V(i,j,k)] *= (_1_CR - grid.Ks_update_loc[I2V(i,j,k) ] / (Linf_Ks /step_length_new) );
// grid.xi_loc[I2V(i,j,k)] -= grid.Kxi_update_loc[I2V(i,j,k) ] / (Linf_Kxi /step_length_new) ;
// grid.eta_loc[I2V(i,j,k)] -= grid.Keta_update_loc[I2V(i,j,k)] / (Linf_Keta /step_length_new) ;
// update
grid.fun_loc[I2V(i,j,k)] *= (_1_CR - grid.Ks_update_loc[I2V(i,j,k) ] * step_length_new);
grid.xi_loc[I2V(i,j,k)] -= grid.Kxi_update_loc[I2V(i,j,k) ] * step_length_new;
grid.eta_loc[I2V(i,j,k)] -= grid.Keta_update_loc[I2V(i,j,k)] * step_length_new;
// grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
// grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
// grid.fac_f_loc[I2V(i,j,k)] = - _2_CR * grid.eta_loc[I2V(i,j,k)];
// grid.fun_loc[I2V(i,j,k)] *= (_1_CR);
// grid.xi_loc[I2V(i,j,k)] -= (_0_CR);
// grid.eta_loc[I2V(i,j,k)] -= (_0_CR);
// grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
// grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
// grid.fac_f_loc[I2V(i,j,k)] = - _2_CR * grid.eta_loc[I2V(i,j,k)];
}
}
}
grid.rejuvenate_abcf();
} else { // for LBFGS routine
// here all the simultaneous runs have the same values used in this routine.
// thus we don't need to if(id_sim==0)
CUSTOMREAL step_length = step_length_new;
const CUSTOMREAL factor = - _1_CR;
// update the model
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
// update
grid.fun_loc[I2V(i,j,k)] *= (_1_CR - factor * grid.Ks_descent_dir_loc[I2V(i,j,k)] * step_length);
grid.xi_loc[I2V(i,j,k)] -= factor * grid.Kxi_descent_dir_loc[I2V(i,j,k) ] * step_length;
grid.eta_loc[I2V(i,j,k)] -= factor * grid.Keta_descent_dir_loc[I2V(i,j,k)] * step_length;
//grid.fun_loc[I2V(i,j,k)] += grid.Ks_descent_dir_loc[I2V(i,j,k)] *step_length_new;
//grid.xi_loc[I2V(i,j,k)] += grid.Kxi_descent_dir_loc[I2V(i,j,k)] *step_length_new;
//grid.eta_loc[I2V(i,j,k)] += grid.Keta_descent_dir_loc[I2V(i,j,k)]*step_length_new;
grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
grid.fac_f_loc[I2V(i,j,k)] = - _2_CR * grid.eta_loc[I2V(i,j,k)];
}
}
}
} // end LBFGS model update
// shared values on the boundary
grid.send_recev_boundary_data(grid.fun_loc);
grid.send_recev_boundary_data(grid.xi_loc);
grid.send_recev_boundary_data(grid.eta_loc);
grid.send_recev_boundary_data(grid.fac_b_loc);
grid.send_recev_boundary_data(grid.fac_c_loc);
grid.send_recev_boundary_data(grid.fac_f_loc);
} // end if subdom_main
}
// compute p_k * grad(f_k)
CUSTOMREAL compute_q_k(Grid& grid) {
CUSTOMREAL tmp_qk = _0_CR;
if (subdom_main && id_sim == 0) {
// grad * descent_direction
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
// we can add here a precondition (here use *_update_loc for storing descent direction)
tmp_qk += grid.Ks_update_loc[I2V(i,j,k)] * grid.Ks_descent_dir_loc[I2V(i,j,k)]
+ grid.Keta_update_loc[I2V(i,j,k)] * grid.Keta_descent_dir_loc[I2V(i,j,k)]
+ grid.Kxi_update_loc[I2V(i,j,k)] * grid.Kxi_descent_dir_loc[I2V(i,j,k)];
}
}
}
// add tmp_qk among all subdomain
allreduce_cr_single(tmp_qk, tmp_qk);
}
// share tmp_qk among all simultaneous runs
if (subdom_main)
broadcast_cr_single_inter_sim(tmp_qk,0);
return tmp_qk;
}
// check if the wolfe conditions are satisfied
bool check_wolfe_cond(Grid& grid, \
CUSTOMREAL q_0, CUSTOMREAL q_t, \
CUSTOMREAL qp_0, CUSTOMREAL qp_t, \
CUSTOMREAL& td, CUSTOMREAL& tg, CUSTOMREAL& step_length_sub) {
/*
q_0 : initial cost function
q_t : current cost function
q_p0 : initial grad * descent_dir
q_pt : current grad * descent_dir
td : right step size
tg : left step size
step_length_sub : current step size
*/
bool step_accepted = false;
// check if the wolfe conditions are satisfied and update the step_length_sub
CUSTOMREAL qpt = (q_t - q_0) / step_length_sub;
if (subdom_main) {
// good step size
if (qpt <= wolfe_c1*qp_0 \
&& wolfe_c2*qp_0 <= qp_t \
){
if (myrank==0)
std::cout << "Wolfe rules: step accepted" << std::endl;
step_accepted = true;
} else {
// modify the stepsize
if (wolfe_c1*qp_0 < qpt) {
td = step_length_sub;
if (myrank==0)
std::cout << "Wolfe rules: right step size updated." << std::endl;
}
if (qpt <= wolfe_c1*qp_0 && qp_t < wolfe_c2*qp_0) {
tg = step_length_sub;
if (myrank==0)
std::cout << "Wolfe rules: left step size updated." << std::endl;
}
//if (isZero(td)) {
if (td == _0_CR) {
step_length_sub = _2_CR * step_length_sub;
if (myrank==0)
std::cout << "Wolfe rules: step size too small. Increased to " << step_length_sub << std::endl;
} else {
step_length_sub = _0_5_CR * (td+tg);
if (myrank==0)
std::cout << "Wolfe rules: step size too large. Decreased to " << step_length_sub << std::endl;
}
}
// share step_accepted among all subdomains
broadcast_bool_single(step_accepted,0);
broadcast_cr_single(step_length_sub,0);
} // end if subdom_main
broadcast_bool_single_sub(step_accepted,0);
broadcast_cr_single_sub(step_length_sub,0);
return step_accepted;
}
#endif // MODEL_UPDATE_H

1062
include/mpi_funcs.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,181 @@
#ifndef OBJECTIVE_FUNCTION_UTILS_H
#define OBJECTIVE_FUNCTION_UTILS_H
#include <iostream>
#include <vector>
#include "config.h"
#include "input_params.h"
// prepare header line of objective_funciton.txt
inline void prepare_header_line(InputParams &IP, std::ofstream &out_main) {
// prepare output for iteration status
if(myrank == 0 && id_sim ==0){
out_main.open(output_dir + "/objective_function.txt");
if (optim_method == GRADIENT_DESCENT || optim_method == HALVE_STEPPING_MODE){
out_main << std::setw(8) << std::right << "# iter,";
out_main << std::setw(13) << std::right << " type,";
// if (optim_method == HALVE_STEPPING_MODE)
// out_main << std::setw(8) << std::right << "subiter,"; (TODO in the future)
std::string tmp = "obj(";
if(IP.get_run_mode() == ONED_INVERSION){
tmp.append(std::to_string(IP.N_abs_local_data)); // for 1d inversion, only absolute time residual is considered in obj
} else {
tmp.append(std::to_string(IP.N_data));
}
tmp.append("),");
out_main << std::setw(20) << tmp;
tmp = "obj_abs(";
tmp.append(std::to_string(IP.N_abs_local_data));
tmp.append("),");
out_main << std::setw(20) << tmp;
tmp = "obj_cs_dif(";
if (IP.get_is_srcrec_swap())
tmp.append(std::to_string(IP.N_cr_dif_local_data));
else
tmp.append(std::to_string(IP.N_cs_dif_local_data));
tmp.append("),");
out_main << std::setw(20) << tmp;
tmp = "obj_cr_dif(";
if (IP.get_is_srcrec_swap())
tmp.append(std::to_string(IP.N_cs_dif_local_data));
else
tmp.append(std::to_string(IP.N_cr_dif_local_data));
tmp.append("),");
out_main << std::setw(20) << tmp;
tmp = "obj_tele(";
tmp.append(std::to_string(IP.N_teleseismic_data));
tmp.append("),");
out_main << std::setw(20) << tmp;
out_main << std::setw(25) << "res(mean/std),";
out_main << std::setw(25) << "res_abs(mean/std),";
out_main << std::setw(25) << "res_cs_dif(mean/std),";
out_main << std::setw(25) << "res_cr_dif(mean/std),";
out_main << std::setw(25) << "res_tele(mean/std),";
out_main << std::setw(20) << "step_length," << std::endl;
} else if (optim_method == LBFGS_MODE)
out_main << std::setw(6) << "it," << std::setw(6) << "subit," << std::setw(16) << "step_length," << std::setw(16) << "q_0," << std::setw(16) << "q_t," \
<< std::setw(16) << "v_obj_reg," << std::setw(16) << "qp_0," << std::setw(16) << "qp_t," << std::setw(16) << "td," << std::setw(16) << "tg," \
<< std::setw(16) << "c1*qp_0," << std::setw(16) << "c2*qp_0," << std::setw(6) << "step ok," << std::endl;
}
}
inline void write_objective_function(InputParams& IP, int i_inv, std::vector<CUSTOMREAL>& v_misfit_inout, std::ofstream& out_main, std::string type) {
// output objective function
if (myrank==0 && id_sim==0) {
out_main << std::setw(8) << std::to_string(i_inv)+",";
out_main << std::setw(13) << type;
out_main << "," << std::setw(19) << v_misfit_inout[0];
out_main << "," << std::setw(19) << v_misfit_inout[1];
if (IP.get_is_srcrec_swap())
out_main << "," << std::setw(19) << v_misfit_inout[3];
else
out_main << "," << std::setw(19) << v_misfit_inout[2];
if (IP.get_is_srcrec_swap())
out_main << "," << std::setw(19) << v_misfit_inout[2];
else
out_main << "," << std::setw(19) << v_misfit_inout[3];
out_main << "," << std::setw(19) << v_misfit_inout[4];
// res
CUSTOMREAL mean;
CUSTOMREAL std;
std::string tmp;
if (IP.N_data > 0) {
mean = v_misfit_inout[5]/IP.N_data;
std = sqrt(v_misfit_inout[6]/IP.N_data - my_square(mean));
tmp = std::to_string(mean);
tmp.append("/");
tmp.append(std::to_string(std));
out_main << "," << std::setw(24) << tmp;
} else {
out_main << "," << std::setw(24) << "0.0/0.0";
}
// res_abs
if (IP.N_abs_local_data > 0) {
mean = v_misfit_inout[7]/IP.N_abs_local_data;
std = sqrt(v_misfit_inout[8]/IP.N_abs_local_data - my_square(mean));
tmp = std::to_string(mean);
tmp.append("/");
tmp.append(std::to_string(std));
out_main << "," << std::setw(24) << tmp;
} else {
out_main << "," << std::setw(24) << "0.0/0.0";
}
if (IP.get_is_srcrec_swap()){
if (IP.N_cr_dif_local_data > 0) {
mean = v_misfit_inout[11]/IP.N_cr_dif_local_data;
std = sqrt(v_misfit_inout[12]/IP.N_cr_dif_local_data - my_square(mean));
tmp = std::to_string(mean);
tmp.append("/");
tmp.append(std::to_string(std));
out_main << "," << std::setw(24) << tmp;
} else {
out_main << "," << std::setw(24) << "0.0/0.0";
}
if (IP.N_cs_dif_local_data > 0) {
mean = v_misfit_inout[9]/IP.N_cs_dif_local_data;
std = sqrt(v_misfit_inout[10]/IP.N_cs_dif_local_data - my_square(mean));
tmp = std::to_string(mean);
tmp.append("/");
tmp.append(std::to_string(std));
out_main << "," << std::setw(24) << tmp;
} else {
out_main << "," << std::setw(24) << "0.0/0.0";
}
} else {
if (IP.N_cs_dif_local_data > 0) {
mean = v_misfit_inout[9]/IP.N_cs_dif_local_data;
std = sqrt(v_misfit_inout[10]/IP.N_cs_dif_local_data - my_square(mean));
tmp = std::to_string(mean);
tmp.append("/");
tmp.append(std::to_string(std));
out_main << "," << std::setw(24) << tmp;
} else {
out_main << "," << std::setw(24) << "0.0/0.0";
}
if (IP.N_cr_dif_local_data > 0) {
mean = v_misfit_inout[11]/IP.N_cr_dif_local_data;
std = sqrt(v_misfit_inout[12]/IP.N_cr_dif_local_data - my_square(mean));
tmp = std::to_string(mean);
tmp.append("/");
tmp.append(std::to_string(std));
out_main << "," << std::setw(24) << tmp;
} else {
out_main << "," << std::setw(24) << "0.0/0.0";
}
}
if (IP.N_teleseismic_data > 0) {
mean = v_misfit_inout[13]/IP.N_teleseismic_data;
std = sqrt(v_misfit_inout[14]/IP.N_teleseismic_data - my_square(mean));
tmp = std::to_string(mean);
tmp.append("/");
tmp.append(std::to_string(std));
out_main << "," << std::setw(24) << tmp;
} else {
out_main << "," << std::setw(24) << "0.0/0.0";
}
if(type == "model update" || type == "1d inversion")
out_main << "," << std::setw(19) << step_length_init << ",";
out_main << std::endl;
}
}
#endif // OBJECTIVE_FUNCTION_UTILS_H

109
include/oneD_inversion.h Normal file
View File

@@ -0,0 +1,109 @@
#ifndef ONED_INVERSION_H
#define ONED_INVERSION_H
#include <fstream>
#include <string>
#include <iostream>
#include "input_params.h"
#include "grid.h"
#include "utils.h"
#include "config.h"
#include "receiver.h"
class OneDInversion {
public:
OneDInversion(InputParams&, Grid&);
~OneDInversion();
// grid parameters
int nr_1dinv, nt_1dinv; // number of grid points in r and epicenter distance
CUSTOMREAL *r_1dinv, *t_1dinv; // coordinates
CUSTOMREAL dr_1dinv, dt_1dinv; // grid spacing
// parameters on grid nodes (for model)
CUSTOMREAL *slowness_1dinv; // slowness
CUSTOMREAL *fac_a_1dinv; // fac_a
CUSTOMREAL *fac_b_1dinv; // fac_b
// parameters on grid nodes (for eikonal solver and adjoint solver)
CUSTOMREAL *tau_1dinv; // tau
CUSTOMREAL *tau_old_1dinv; // tau_old
CUSTOMREAL *T_1dinv; // T
CUSTOMREAL *Tadj_1dinv; // Tadj
CUSTOMREAL *Tadj_density_1dinv; // Tadj
CUSTOMREAL *T0v_1dinv; // T0v
CUSTOMREAL *T0r_1dinv; // T0r
CUSTOMREAL *T0t_1dinv; // T0t
bool *is_changed_1dinv; // is_changed
CUSTOMREAL *delta_1dinv; // delta
// parameters on grid nodes (for inversion)
CUSTOMREAL *Ks_1dinv; // Ks
CUSTOMREAL *Ks_density_1dinv; // Ks_density
CUSTOMREAL *Ks_over_Kden_1dinv; // Ks_over_Kdensity
CUSTOMREAL *Ks_multigrid_1dinv; // (Ks_over_Kdensity) parameterized by multigrid
CUSTOMREAL *Ks_multigrid_previous_1dinv; // Ks_multigrid at previous iteration
// parameters for optimization
CUSTOMREAL v_obj;
CUSTOMREAL old_v_obj;
// functions
std::vector<CUSTOMREAL> run_simulation_one_step_1dinv(InputParams&, IO_utils&, const int&);
void model_optimize_1dinv(InputParams&, Grid&, IO_utils&, const int&);
// h5 file output
void write_T_1dinv(IO_utils&, const std::string&, const int&);
void write_Tadj_1dinv(IO_utils&, const std::string&, const int&);
void write_vel_1dinv(IO_utils&, const int&);
private:
// for eikonal solver
int count_cand;
int ii,ii_nr,ii_n2r,ii_pr,ii_p2r,ii_nt,ii_n2t,ii_pt,ii_p2t;
CUSTOMREAL ar,br,at,bt,ar1,ar2,at1,at2,br1,br2,bt1,bt2;
CUSTOMREAL eqn_a, eqn_b, eqn_c, eqn_Delta;
CUSTOMREAL tmp_tau, T_r, T_t, charact_r, charact_t;
bool is_causality;
std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);
// member functions
// class functions
void generate_2d_mesh(InputParams&);
void deallocate_arrays();
void allocate_arrays();
void load_1d_model(Grid&);
int I2V_1DINV(const int&,const int&);
// "run_simulation_one_step_1dinv" subfunctions:
void eikonal_solver_2d(InputParams&, int& );
void initialize_eikonal_array(CUSTOMREAL);
void FSM_2d();
void calculate_stencil(const int&, const int&);
void calculate_synthetic_traveltime_and_adjoint_source(InputParams&, int& );
CUSTOMREAL interpolate_2d_traveltime(const CUSTOMREAL&, const CUSTOMREAL&);
void adjoint_solver_2d(InputParams&, const int&, const int&);
void initialize_adjoint_array(InputParams&, const int&, const int&);
void FSM_2d_adjoint(const int&);
void calculate_stencil_adj(const int&, const int&);
void initialize_kernel_1d();
void calculate_kernel_1d();
// "model_optimize_1dinv" subfunctions:
void kernel_processing_1dinv(Grid&);
void density_normalization_1dinv();
void multi_grid_parameterization_1dinv(Grid&);
void model_update_1dinv(const int&);
void determine_step_size_1dinv(const int&);
CUSTOMREAL norm_1dinv(const CUSTOMREAL*, const int&);
void generate_3d_model(Grid&);
};
#endif // ONED_INVERSION_H

36
include/receiver.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef RECEIVER_H
#define RECEIVER_H
#include <vector>
#include "input_params.h"
#include "grid.h"
class Receiver {
public:
Receiver();
~Receiver();
void interpolate_and_store_arrival_times_at_rec_position(InputParams&, Grid&, const std::string&); // only for common receiver differential traveltime
// adjoint source
void calculate_adjoint_source(InputParams&, const std::string&);
// objective function and residual
std::vector<CUSTOMREAL> calculate_obj_and_residual(InputParams&);
// Gradient of traveltime
void calculate_T_gradient(InputParams&, Grid&, const std::string&);
// initialize variables for source relocation
void init_vars_src_reloc(InputParams&);
// Gradient of objective function
void calculate_grad_obj_src_reloc(InputParams&, const std::string&);
// objective function
std::vector<CUSTOMREAL> calculate_obj_reloc(InputParams&, int);
// update source location
void update_source_location(InputParams&, Grid&);
private:
CUSTOMREAL interpolate_travel_time(Grid&, InputParams&, std::string, std::string);
void calculate_T_gradient_one_rec(Grid&, InputParams&, std::string, CUSTOMREAL*);
bool check_if_receiver_is_in_this_subdomain(Grid&, const CUSTOMREAL&, const CUSTOMREAL&, const CUSTOMREAL&);
};
#endif // RECEIVER_H

194
include/simd_conf.h Normal file
View File

@@ -0,0 +1,194 @@
#ifndef SIMD_CONF_H
#define SIMD_CONF_H
#ifdef USE_SIMD
// flags for self controling the SIMD usage
#define USE_AVX __AVX__
#define USE_AVX512 __AVX512F__
#define USE_ARM_SVE __ARM_FEATURE_SVE
#define USE_NEON __ARM_NEON__
// forcely make USE_AVX and USE_AVX512 to be false if USE_ARM_SVE is true
#if USE_ARM_SVE
#undef USE_AVX
#define USE_AVX 0
#undef USE_AVX512
#define USE_AVX512 0
#endif
#if USE_AVX || USE_AVX512
#include <immintrin.h>
#elif USE_ARM_SVE
#include <arm_sve.h>
#elif USE_NEON
#include <arm_neon.h>
#endif
#ifdef SINGLE_PRECISION
#if USE_AVX && !USE_AVX512
const int ALIGN = 32;
const int NSIMD = 8;
#define __mT __m256
#define _mmT_set1_pT _mm256_set1_ps
#define _mmT_loadu_pT _mm256_loadu_ps
#define _mmT_mul_pT _mm256_mul_ps
#define _mmT_sub_pT _mm256_sub_ps
#define _mmT_add_pT _mm256_add_ps
#define _mmT_div_pT _mm256_div_ps
#define _mmT_min_pT _mm256_min_ps
#define _mmT_sqrt_pT _mm256_sqrt_ps
#define _mmT_store_pT _mm256_store_ps
#define _mmT_fmadd_pT _mm256_fmadd_ps
#define _mm256_cmp_pT _mm256_cmp_ps
#elif USE_AVX512
const int ALIGN = 64;
const int NSIMD = 16;
#define __mT __m512
#define _mmT_set1_pT _mm512_set1_ps
#define _mmT_loadu_pT _mm512_loadu_ps
#define _mmT_mul_pT _mm512_mul_ps
#define _mmT_sub_pT _mm512_sub_ps
#define _mmT_add_pT _mm512_add_ps
#define _mmT_div_pT _mm512_div_ps
#define _mmT_min_pT _mm512_min_ps
#define _mmT_max_pT _mm512_max_ps
#define _mmT_sqrt_pT _mm512_sqrt_ps
#define _mmT_store_pT _mm512_store_ps
#define _mmT_fmadd_pT _mm512_fmadd_ps
#define __mmaskT __mmask16
#define _mm512_cmp_pT_mask _mm512_cmp_ps_mask
#define _mm512_mask_blend_pT _mm512_mask_blend_ps
#define _kand_maskT _kand_mask16
#elif USE_ARM_SVE // NOT TESTED YET
const int ALIGN = 8*svcntd(); // use svcntw() for float
const int NSIMD = svcntd(); // Vector Length
#define __mT svfloat32_t
//#define _mmT_set1_pT svdup_f64
//#define _mmT_loadu_pT svld1_f64_f64_f64
//#define _mmT_mul_pT svmul_f64_m
//#define _mmT_sub_pT svsub_f64_m
//#define _mmT_add_pT svadd_f64_m
//#define _mmT_div_pT svdiv_f64_m
//#define _mmT_min_pT svmin_f64_m
//#define _mmT_sqrt_pT svsqrt_f64_m
//#define _mmT_store_pT svst1_f64
#elif USE_NEON
const int ALIGN = 16;
const int NSIMD = 4;
#define __mT float32x4_t
#define _mmT_set1_pT vdupq_n_f32
#define _mmT_loadu_pT vld1q_f32
#define _mmT_mul_pT vmulq_f32
#define _mmT_sub_pT vsubq_f32
#define _mmT_add_pT vaddq_f32
#define _mmT_div_pT vdivq_f32
#define _mmT_min_pT vminq_f32
#define _mmT_sqrt_pT vsqrtq_f32
#define _mmT_store_pT vst1q_f32
#endif // __ARM_FEATURE_SVE
#else // DOUBLE_PRECISION
#if USE_AVX && !USE_AVX512
const int ALIGN = 32;
const int NSIMD = 4;
#define __mT __m256d
#define _mmT_set1_pT _mm256_set1_pd
#define _mmT_loadu_pT _mm256_loadu_pd
#define _mmT_mul_pT _mm256_mul_pd
#define _mmT_sub_pT _mm256_sub_pd
#define _mmT_add_pT _mm256_add_pd
#define _mmT_div_pT _mm256_div_pd
#define _mmT_min_pT _mm256_min_pd
#define _mmT_sqrt_pT _mm256_sqrt_pd
#define _mmT_store_pT _mm256_store_pd
#define _mmT_fmadd_pT _mm256_fmadd_pd
#define _mm256_cmp_pT _mm256_cmp_pd
#elif USE_AVX512
const int ALIGN = 64;
const int NSIMD = 8;
#define __mT __m512d
#define _mmT_set1_pT _mm512_set1_pd
#define _mmT_loadu_pT _mm512_loadu_pd
#define _mmT_mul_pT _mm512_mul_pd
#define _mmT_sub_pT _mm512_sub_pd
#define _mmT_add_pT _mm512_add_pd
#define _mmT_div_pT _mm512_div_pd
#define _mmT_min_pT _mm512_min_pd
#define _mmT_max_pT _mm512_max_pd
#define _mmT_sqrt_pT _mm512_sqrt_pd
#define _mmT_store_pT _mm512_store_pd
#define _mmT_fmadd_pT _mm512_fmadd_pd
#define __mmaskT __mmask8
#define _mm512_cmp_pT_mask _mm512_cmp_pd_mask
#define _mm512_mask_blend_pT _mm512_mask_blend_pd
#define _kand_maskT _kand_mask8
#elif USE_ARM_SVE
const int ALIGN = 8*svcntd(); // use svcntw() for float
const int NSIMD = svcntd(); // Vector Length
#define __mT svfloat64_t
//#define _mmT_set1_pT svdup_f64
//#define _mmT_loadu_pT svld1_f64_f64_f64
//#define _mmT_mul_pT svmul_f64_m
//#define _mmT_sub_pT svsub_f64_m
//#define _mmT_add_pT svadd_f64_m
//#define _mmT_div_pT svdiv_f64_m
//#define _mmT_min_pT svmin_f64_m
//#define _mmT_sqrt_pT svsqrt_f64_m
//#define _mmT_store_pT svst1_f64
#elif USE_NEON
const int ALIGN = 16;
const int NSIMD = 2;
#define __mT float64x2_t
#define _mmT_set1_pT vdupq_n_f64
#define _mmT_loadu_pT vld1q_f64
#define _mmT_mul_pT vmulq_f64
#define _mmT_sub_pT vsubq_f64
#define _mmT_add_pT vaddq_f64
#define _mmT_div_pT vdivq_f64
#define _mmT_min_pT vminq_f64
#define _mmT_sqrt_pT vsqrtq_f64
#define _mmT_store_pT vst1q_f64
#endif // __ARM_FEATURE_SVE
#endif // DOUBLE_PRECISION
inline void print_simd_type() {
std::cout << "SIMD type: ";
#if USE_AVX && !USE_AVX512
std::cout << "AVX" << std::endl;
#elif USE_AVX512
std::cout << "AVX512" << std::endl;
#elif USE_ARM_SVE
std::cout << "ARM SVE" << std::endl;
#elif USE_NEON
std::cout << "NEON" << std::endl;
#endif // __ARM_FEATURE_SVE
}
#else // USE_CUDA but not AVX dummy
const int ALIGN = 32;
const int NSIMD = 4;
#endif // USE_SIMD
#endif // SIMD_CONF_H

773
include/smooth.h Normal file
View File

@@ -0,0 +1,773 @@
#ifndef SMOOTH_H
#define SMOOTH_H
#include <iostream>
#include "grid.h"
#include "config.h"
#include "utils.h"
void calc_inversed_laplacian(CUSTOMREAL* d, CUSTOMREAL* Ap,
const int i, const int j, const int k,
const CUSTOMREAL lr, const CUSTOMREAL lt, const CUSTOMREAL lp,
const CUSTOMREAL dr, const CUSTOMREAL dt, const CUSTOMREAL dp) {
// calculate inversed laplacian operator
CUSTOMREAL termx = _0_CR, termy = _0_CR, termz = _0_CR;
if (i==0) {
termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i+1,j,k)]));
} else if (i==loc_I-1) {
termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i-1,j,k)]));
} else {
termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+d[I2V(i-1,j,k)]+d[I2V(i+1,j,k)]));
}
if (j==0) {
termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j+1,k)]));
} else if (j==loc_J-1) {
termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j-1,k)]));
} else {
termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+d[I2V(i,j-1,k)]+d[I2V(i,j+1,k)]));
}
if (k==0) {
termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j,k+1)]));
} else if (k==loc_K-1) {
termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j,k-1)]));
} else {
termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+d[I2V(i,j,k-1)]+d[I2V(i,j,k+1)]));
}
Ap[I2V(i,j,k)] = termx+termy+termz;
}
void CG_smooth(Grid& grid, CUSTOMREAL* arr_in, CUSTOMREAL* arr_out, CUSTOMREAL lr, CUSTOMREAL lt, CUSTOMREAL lp) {
// arr: array to be smoothed
// lr: smooth length on r
// lt: smooth length on theta
// lp: smooth length on phi
// arrays :
// x_array model
// g_array gradient
// d_array descent direction
// Ap stiffness scalar (laplacian)
// pAp = d_array*Ap
// rr = g_array * g_array (dot product)
const int max_iter_cg = 1000;
const CUSTOMREAL xtol = 0.001;
const bool use_scaling = true;
CUSTOMREAL dr=grid.dr, dt=grid.dt, dp=grid.dp; // in km, rad, rad
//CUSTOMREAL dr=_1_CR,dt=_1_CR,dp=_1_CR;
//debug
std::cout << "dr, dt, dp, lr, lt, lp = " << dr << " " << dt << " " << dp << " " << lr << " " << lt << " " << lp << std::endl;
// allocate memory
CUSTOMREAL* x_array = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3000);
CUSTOMREAL* r_array = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3001);
CUSTOMREAL* p_array = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3002);
CUSTOMREAL* Ap = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3003);
CUSTOMREAL pAp=_0_CR, rr_0=_0_CR, rr=_0_CR, rr_new=_0_CR, aa=_0_CR, bb=_0_CR, tmp=_0_CR;
CUSTOMREAL scaling_A=_1_CR, scaling_coeff = _1_CR;
if (use_scaling) {
// calculate scaling factor
//scaling_A = std::sqrt(_1_CR / (_8_CR * PI * lr * lt * lp));
// scaling coefficient for gradient
scaling_coeff = find_absmax(arr_in, loc_I*loc_J*loc_K);
tmp = scaling_coeff;
allreduce_cr_single_max(tmp, scaling_coeff);
//if (scaling_coeff == _0_CR)
if (isZero(scaling_coeff))
scaling_coeff = _1_CR;
}
// std out scaling factors
if (myrank == 0) {
std::cout << "scaling_A = " << scaling_A << std::endl;
std::cout << "scaling_coeff = " << scaling_coeff << std::endl;
}
// array initialization
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
x_array[i] = _0_CR; // x
r_array[i] = arr_in[i]/scaling_coeff; // r = b - Ax
p_array[i] = r_array[i]; // p
Ap[i] = _0_CR;
}
// initial rr
rr = dot_product(r_array, r_array, loc_I*loc_J*loc_K);
tmp = rr;
// sum all rr among all processors
allreduce_cr_single(tmp, rr);
rr_0 = rr; // record initial rr
int k_start=0, k_end=loc_K, j_start=0, j_end=loc_J, i_start=0, i_end=loc_I;
// CG loop
for (int iter=0; iter<max_iter_cg; iter++) {
// calculate laplacian
for (int k = k_start; k < k_end; k++) {
for (int j = j_start; j < j_end; j++) {
for (int i = i_start; i < i_end; i++) {
//scaling_coeff = std::max(scaling_coeff, std::abs(arr[I2V(i,j,k)]));
// calculate inversed laplacian operator
calc_inversed_laplacian(p_array,Ap,i,j,k,lr,lt,lp,dr,dt,dp);
// scaling
Ap[I2V(i,j,k)] = Ap[I2V(i,j,k)]*scaling_A;
}
}
}
// get the values on the boundaries
grid.send_recev_boundary_data(Ap);
// calculate pAp
pAp = dot_product(p_array, Ap, loc_I*loc_J*loc_K);
tmp = pAp;
// sum all pAp among all processors
allreduce_cr_single(tmp, pAp);
// compute step length
aa = rr / pAp;
// update x_array (model)
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
x_array[i] += aa * p_array[i];
}
// update r_array (gradient)
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
r_array[i] -= aa * Ap[i];
}
// update rr
rr_new = dot_product(r_array, r_array, loc_I*loc_J*loc_K);
tmp = rr_new;
// sum all rr among all processors
allreduce_cr_single(tmp, rr_new);
// update d_array (descent direction)
bb = rr_new / rr;
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
p_array[i] = r_array[i] + bb * p_array[i];
}
if (myrank == 0 && iter%100==0){//} && if_verbose) {
std::cout << "iter: " << iter << " rr: " << rr << " rr_new: " << rr_new << " rr/rr_0: " << rr/rr_0 << " pAp: " << pAp << " aa: " << aa << " bb: " << bb << std::endl;
}
// update rr
rr = rr_new;
// check convergence
if (rr / rr_0 < xtol) {
std::cout << "CG converged in " << iter << " iterations." << std::endl;
break;
}
} // end of CG loop
// copy x_array to arr_out
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
arr_out[i] = x_array[i]*scaling_coeff;
}
// deallocate
delete[] x_array;
delete[] r_array;
delete[] p_array;
delete[] Ap;
//delete[] Ap_tmp;
}
void smooth_inv_kernels_CG(Grid& grid, CUSTOMREAL lr, CUSTOMREAL lt, CUSTOMREAL lp) {
// smoothing kernels with Conjugate Gradient method
// lr,lt,lp: smoothing length in r, theta, phi direction
// smooth Ks
CG_smooth(grid, grid.Ks_loc, grid.Ks_update_loc, lr, lt, lp);
// smooth Keta
CG_smooth(grid, grid.Keta_loc, grid.Keta_update_loc, lr, lt, lp);
// smooth Kxi
CG_smooth(grid, grid.Kxi_loc, grid.Kxi_update_loc, lr, lt, lp);
}
// original method for smoothing kernels
inline void smooth_inv_kernels_orig(Grid& grid, InputParams& IP) {
// necessary params
CUSTOMREAL r_r, r_t, r_p;
CUSTOMREAL r_r_ani, r_t_ani, r_p_ani;
int kdr = 0, jdt = 0, idp = 0;
int kdr_ani = 0, jdt_ani = 0, idp_ani = 0;
int k_start = 0;
int j_start = 0;
int i_start = 0;
int k_end = ngrid_k;
int j_end = ngrid_j;
int i_end = ngrid_i;
CUSTOMREAL weight = _1_CR;
CUSTOMREAL * taper = IP.get_depth_taper();
// check final kernel density
for (int i_loc = 0; i_loc < loc_I; i_loc++) {
for (int j_loc = 0; j_loc < loc_J; j_loc++) {
for (int k_loc = 0; k_loc < loc_K; k_loc++) {
if (isNegative(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])){
std::cout << "Warning: grid.Ks_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
}
if (isNegative(grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)])){
std::cout << "Warning: grid.Kxi_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
}
if (isNegative(grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)])){
std::cout << "Warning: grid.Keta_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
}
}
}
}
// remark: kernel density normalization is shifted from kernel to kernel_inv.
// Because high kernel density may concentrated below station at single grid point. This part should be strenghthened.
// However, low kernel density may be distributed widely at middle depth. This part should be weakened.
// kernel_inv considers the integration, whereas kernel only considers one grid point.
// Therefore, kernel density normalization should be applied to kernel_inv, not kernel.
// // kernel density normalization
// for (int i_loc = 0; i_loc < loc_I; i_loc++) {
// for (int j_loc = 0; j_loc < loc_J; j_loc++) {
// for (int k_loc = 0; k_loc < loc_K; k_loc++) {
// if(isZero(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])){
// // do nothing
// } else {
// if (grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)] < 0){
// std::cout << "Warning: grid.Ks_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = "
// << grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)]
// << std::endl;
// }
// grid.Ks_loc[I2V(i_loc,j_loc,k_loc)] /= std::pow(std::abs(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)]),Kdensity_coe);
// }
// if(isZero(grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)])){
// // do nothing
// } else {
// if (grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)] < 0){
// std::cout << "Warning: grid.Kxi_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
// }
// grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] /= std::pow(std::abs(grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)]),Kdensity_coe);
// }
// if(isZero(grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)])){
// // do nothing
// } else {
// if (grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] < 0){
// std::cout << "Warning: grid.Keta_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
// }
// grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] /= std::pow(std::abs(grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)]),Kdensity_coe);
// }
// }
// }
// }
//
// sum the kernel values on the inversion grid
//
for (int i_grid = 0; i_grid < n_inv_grids; i_grid++) {
// init coarser kernel arrays
// velocity
for (int k = 0; k < n_inv_K_loc; k++) {
for (int j = 0; j < n_inv_J_loc; j++) {
for (int i = 0; i < n_inv_I_loc; i++) {
grid.Ks_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)] = _0_CR;
}
}
}
// anisotropy
for (int k = 0; k < n_inv_K_loc_ani; k++) {
for (int j = 0; j < n_inv_J_loc_ani; j++) {
for (int i = 0; i < n_inv_I_loc_ani; i++) {
grid.Keta_inv_loc[I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
}
}
}
for (int k = k_start; k < k_end; k++) {
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
r_r = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// increasing or decreasing order
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr = ii_invr;
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
r_r_ani = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc_ani-1; ii_invr++){
// increasing or decreasing order
if (in_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr_ani = ii_invr;
r_r_ani = calc_ratio_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
// continue if r is out of the inversion grid
if (r_r < _0_CR || r_r_ani < _0_CR) continue;
for (int j = j_start; j < j_end; j++) {
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
r_t = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
if (in_between(t_glob, left, right)) {
jdt = ii_invt;
r_t = calc_ratio_between(t_glob, left, right);
break;
}
}
r_t_ani = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc_ani-1; ii_invt++){
//if ((t_glob - grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt,i_grid)]) * (t_glob - grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,i_grid)]) <= _0_CR) {
CUSTOMREAL left = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani+1,i_grid)]*(r_r_ani);
CUSTOMREAL right = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani+1,i_grid)]*(r_r_ani);
if (in_between(t_glob, left, right)) {
jdt_ani = ii_invt;
r_t_ani = calc_ratio_between(t_glob, left, right);
break;
}
}
// continue if t is out of the inversion grid
if (r_t < _0_CR || r_t_ani < _0_CR) continue;
for (int i = i_start; i < i_end; i++) {
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
r_p = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr+1,i_grid)]*(r_r);
if (in_between(p_glob, left, right)) {
idp = ii_invp;
r_p = calc_ratio_between(p_glob, left, right);
break;
}
}
r_p_ani = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc_ani-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani+1,i_grid)]*(r_r_ani);
CUSTOMREAL right = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani+1,i_grid)]*(r_r_ani);
if (in_between(p_glob, left, right)) {
idp_ani = ii_invp;
r_p_ani = calc_ratio_between(p_glob, left, right);
break;
}
}
// continue if p is out of the inversion grid
if (r_p < _0_CR || r_p_ani < _0_CR) continue;
// global grid id to local id
int k_loc = k - grid.get_offset_k();
int j_loc = j - grid.get_offset_j();
int i_loc = i - grid.get_offset_i();
// check if *_loc are inside the local subdomain
if (k_loc < 0 || k_loc > loc_K-1) continue;
if (j_loc < 0 || j_loc > loc_J-1) continue;
if (i_loc < 0 || i_loc > loc_I-1) continue;
// check if Ks_density_loc is 0 (no contributary kernel here)
// if(isZero(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])) continue;
CUSTOMREAL dxdydz = (grid.dr) * (r_glob*grid.dt) * (r_glob*cos(t_glob)*grid.dp); // dx*dy*dz = r^2*cos(t)dr*dt*dp
// update Ks_inv Keta_inv Kxi_inv
// -p, -t, -r
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
// -p, -t, +r
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt, kdr +1)] += r_r *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt, kdr +1)] += r_r *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
// -p, +t, -r
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr)] += (_1_CR-r_r) *r_t *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr)] += (_1_CR-r_r) *r_t *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
// +p, -t, -r
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
// -p, +t, +r
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr +1)] += r_r *r_t *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr +1)] += r_r *r_t *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
// +p, -t, +r
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr +1)] += r_r *(_1_CR-r_t) *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr +1)] += r_r *(_1_CR-r_t) *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
// +p, +t, -r
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr)] += (_1_CR-r_r) *r_t *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr)] += (_1_CR-r_r) *r_t *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
// +p, +t, +r
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr +1)] += r_r *r_t *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr +1)] += r_r *r_t *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
} // end for i
} // end for j
} // end for k
// sum over all sub-domains
allreduce_cr_inplace(grid.Ks_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
allreduce_cr_inplace(grid.Keta_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
allreduce_cr_inplace(grid.Kxi_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
allreduce_cr_inplace(grid.Ks_density_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
allreduce_cr_inplace(grid.Keta_density_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
allreduce_cr_inplace(grid.Kxi_density_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
// kernel density normalization
// velocity
for (int k = 0; k < n_inv_K_loc; k++) {
for (int j = 0; j < n_inv_J_loc; j++) {
for (int i = 0; i < n_inv_I_loc; i++) {
if(isZero(grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)])){
// do nothing
} else {
if (isNegative(grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)])){
std::cout << "Warning: grid.Ks_density_inv_loc[I2V_INV_KNL(" << i << "," << j << "," << k << ")] is less than 0, = "
<< grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)]
<< ", using absolute value instead."
<< std::endl;
}
grid.Ks_inv_loc[I2V_INV_KNL(i,j,k)] /= std::pow(std::abs(grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)]),Kdensity_coe);
}
}
}
}
// anisotropy
for (int k = 0; k < n_inv_K_loc_ani; k++) {
for (int j = 0; j < n_inv_J_loc_ani; j++) {
for (int i = 0; i < n_inv_I_loc_ani; i++) {
if(isZero(grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
// do nothing
} else {
if (isNegative(grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
std::cout << "Warning: grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(" << i << "," << j << "," << k << ")] is less than 0, = "
<< grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]
<< ", using absolute value instead."
<< std::endl;
}
grid.Keta_inv_loc[I2V_INV_ANI_KNL(i,j,k)] /= std::pow(std::abs(grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]),Kdensity_coe);
}
if(isZero(grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
// do nothing
} else {
if (isNegative(grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
std::cout << "Warning: grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(" << i << "," << j << "," << k << ")] is less than 0, = "
<< grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]
<< ", using absolute value instead."
<< std::endl;
}
grid.Kxi_inv_loc[I2V_INV_ANI_KNL(i,j,k)] /= std::pow(std::abs(grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]),Kdensity_coe);
}
}
}
}
//
// update factors
//
for (int k = k_start; k < k_end; k++) {
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
r_r = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr = ii_invr;
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
r_r_ani = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc_ani-1; ii_invr++){
// increasing or decreasing order
if (in_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr_ani = ii_invr;
r_r_ani = calc_ratio_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
// depth model update taper
CUSTOMREAL depth = radius2depth(r_glob);
if (depth < taper[0]) { // weight = 0;
weight = _0_CR;
} else if (depth < taper[1]) {
weight = (_1_CR - std::cos(PI*(depth - taper[0])/(taper[1] - taper[0]))) / _2_CR;
} else {
weight = _1_CR;
}
// continue if r is out of the inversion grid
if (r_r < _0_CR || r_r_ani < _0_CR) continue;
for (int j = j_start; j < j_end; j++) {
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
r_t = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
if (in_between(t_glob, left, right)) {
jdt = ii_invt;
r_t = calc_ratio_between(t_glob, left, right);
break;
}
}
r_t_ani = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc_ani-1; ii_invt++){
CUSTOMREAL left = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani+1,i_grid)]*(r_r_ani);
CUSTOMREAL right = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani+1,i_grid)]*(r_r_ani);
if (in_between(t_glob, left, right)) {
jdt_ani = ii_invt;
r_t_ani = calc_ratio_between(t_glob, left, right);
break;
}
}
// continue if t is out of the inversion grid
if (r_t < _0_CR || r_t_ani < _0_CR) continue;
for (int i = i_start; i < i_end; i++) {
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
r_p = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr+1,i_grid)]*(r_r);
if (in_between(p_glob, left, right)) {
idp = ii_invp;
r_p = calc_ratio_between(p_glob, left, right);
break;
}
}
r_p_ani = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc_ani-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani+1,i_grid)]*(r_r_ani);
CUSTOMREAL right = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani+1,i_grid)]*(r_r_ani);
if (in_between(p_glob, left, right)) {
idp_ani = ii_invp;
r_p_ani = calc_ratio_between(p_glob, left, right);
break;
}
}
// continue if p is out of the inversion grid
if (r_p < _0_CR || r_p_ani < _0_CR) continue;
// global grid id to local id
int k_loc = k - grid.get_offset_k();
int j_loc = j - grid.get_offset_j();
int i_loc = i - grid.get_offset_i();
// check if *_loc are inside the local subdomain
if (k_loc < 0 || k_loc > loc_K-1) continue;
if (j_loc < 0 || j_loc > loc_J-1) continue;
if (i_loc < 0 || i_loc > loc_I-1) continue;
CUSTOMREAL pert_Ks = 0.0;
CUSTOMREAL pert_Keta = 0.0;
CUSTOMREAL pert_Kxi = 0.0;
CUSTOMREAL pert_Ks_density = 0.0;
CUSTOMREAL pert_Keta_density = 0.0;
CUSTOMREAL pert_Kxi_density = 0.0;
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
pert_Keta += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
pert_Kxi += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
pert_Ks_density += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
pert_Keta_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
pert_Kxi_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
pert_Ks += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Keta += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
pert_Kxi += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
pert_Ks_density += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Keta_density += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
pert_Kxi_density += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
pert_Ks += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Keta += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
pert_Kxi += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
pert_Ks_density += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Keta_density = (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
pert_Kxi_density += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Keta += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
pert_Kxi += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
pert_Ks_density += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Keta_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
pert_Kxi_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
pert_Ks += r_r*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Keta += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
pert_Kxi += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
pert_Ks_density += r_r*r_t*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Keta_density += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
pert_Kxi_density += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
pert_Ks += r_r*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Keta += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
pert_Kxi += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
pert_Ks_density += r_r*(_1_CR-r_t)*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Keta_density += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
pert_Kxi_density += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
pert_Ks += (_1_CR-r_r)*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Keta += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
pert_Kxi += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
pert_Ks_density += (_1_CR-r_r)*r_t*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Keta_density += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
pert_Kxi_density += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
pert_Ks += r_r*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
pert_Keta += r_r_ani*r_t_ani*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
pert_Kxi += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
pert_Ks_density += r_r*r_t*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
pert_Keta_density += r_r_ani*r_t_ani*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
pert_Kxi_density += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
// update para
grid.Ks_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Ks;
grid.Keta_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Keta;
grid.Kxi_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Kxi;
grid.Ks_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Ks_density;
grid.Keta_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Keta_density;
grid.Kxi_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Kxi_density;
} // end for i
} // end for j
} // end for k
} // end i_grid
//
// rescale kernel update to -1 ~ 1
//
// get the scaling factor
CUSTOMREAL Linf_Ks = _0_CR, Linf_Keta = _0_CR, Linf_Kxi = _0_CR;
// CUSTOMREAL Linf_Kden = _0_CR;
for (int k = 1; k < loc_K-1; k++) {
for (int j = 1; j < loc_J-1; j++) {
for (int i = 1; i < loc_I-1; i++) {
Linf_Ks = std::max(Linf_Ks, std::abs(grid.Ks_update_loc[I2V(i,j,k)]));
Linf_Keta = std::max(Linf_Keta, std::abs(grid.Keta_update_loc[I2V(i,j,k)]));
Linf_Kxi = std::max(Linf_Kxi, std::abs(grid.Kxi_update_loc[I2V(i,j,k)]));
}
}
}
// get the maximum scaling factor among subdomains
CUSTOMREAL Linf_tmp;
allreduce_cr_single_max(Linf_Ks, Linf_tmp); Linf_Ks = Linf_tmp;
allreduce_cr_single_max(Linf_Keta, Linf_tmp); Linf_Keta = Linf_tmp;
allreduce_cr_single_max(Linf_Kxi, Linf_tmp); Linf_Kxi = Linf_tmp;
CUSTOMREAL Linf_all = _0_CR;
Linf_all = std::max(Linf_Ks, std::max(Linf_Keta, Linf_Kxi));
Linf_Ks = Linf_all;
Linf_Keta = Linf_all;
Linf_Kxi = Linf_all;
// rescale the kernel update
for (int k = 0; k < loc_K; k++) {
for (int j = 0; j < loc_J; j++) {
for (int i = 0; i < loc_I; i++) {
grid.Ks_update_loc[I2V(i,j,k)] /= Linf_Ks;
grid.Keta_update_loc[I2V(i,j,k)] /= Linf_Keta;
grid.Kxi_update_loc[I2V(i,j,k)] /= Linf_Kxi;
}
}
}
}
#endif

View File

@@ -0,0 +1,319 @@
#ifndef SMOOTH_DESCENT_DIR_H
#define SMOOTH_DESCENT_DIR_H
#include <iostream>
#include "grid.h"
#include "config.h"
#include "smooth.h"
inline void smooth_descent_dir_CG(Grid& grid, const CUSTOMREAL lr, const CUSTOMREAL lt, const CUSTOMREAL lp) {
// smoothing kernels with Conjugate Gradient method
// lr,lt,lp: smoothing length in r, theta, phi direction
// smooth Ks
CG_smooth(grid, grid.Ks_descent_dir_loc, grid.Ks_descent_dir_loc, lr, lt, lp);
// smooth Keta
CG_smooth(grid, grid.Keta_descent_dir_loc, grid.Keta_descent_dir_loc, lr, lt, lp);
// smooth Kxi
CG_smooth(grid, grid.Kxi_descent_dir_loc, grid.Kxi_descent_dir_loc, lr, lt, lp);
}
// original method for smoothing kernels
inline void smooth_descent_dir(Grid& grid){
// necessary params
CUSTOMREAL r_r, r_t, r_p;
int kdr, jdt, idp;
int k_start = 0;
int j_start = 0;
int i_start = 0;
int k_end = ngrid_k;
int j_end = ngrid_j;
int i_end = ngrid_i;
//
// sum the kernel values on the inversion grid
//
for (int i_grid = 0; i_grid < n_inv_grids; i_grid++) {
// init coarser kernel arrays
for (int k = 0; k < n_inv_K_loc; k++) {
for (int j = 0; j < n_inv_J_loc; j++) {
for (int i = 0; i < n_inv_I_loc; i++) {
grid.Ks_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
grid.Keta_inv_loc[I2V_INV_KNL(i,j,k)] = _0_CR;
grid.Kxi_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
}
}
}
for (int k = k_start; k < k_end; k++) {
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
// r_r = -_1_CR;
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
// kdr = ii_invr;
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
// break;
// }
// }
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
r_r = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// increasing or decreasing order
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr = ii_invr;
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
// continue if r is out of the inversion grid
if (r_r < 0) continue;
for (int j = j_start; j < j_end; j++) {
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
// r_t = -_1_CR;
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
// jdt = ii_invt;
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
// break;
// }
// }
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
r_t = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
if (in_between(t_glob, left, right)) {
jdt = ii_invt;
r_t = calc_ratio_between(t_glob, left, right);
break;
}
}
// continue if t is out of the inversion grid
if (r_t < 0) continue;
for (int i = i_start; i < i_end; i++) {
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
// r_p = -_1_CR;
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
// idp = ii_invp;
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
// break;
// }
// }
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
r_p = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
if (in_between(p_glob, left, right)) {
idp = ii_invp;
r_p = calc_ratio_between(p_glob, left, right);
break;
}
}
// continue if p is out of the inversion grid
if (r_p < 0) continue;
// global grid id to local id
int k_loc = k - grid.get_offset_k();
int j_loc = j - grid.get_offset_j();
int i_loc = i - grid.get_offset_i();
// check if *_loc are inside the local subdomain
if (k_loc < 0 || k_loc > loc_K-1) continue;
if (j_loc < 0 || j_loc > loc_J-1) continue;
if (i_loc < 0 || i_loc > loc_I-1) continue;
// update Ks_inv Keta_inv Kxi_inv
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
} // end for i
} // end for j
} // end for k
// sum over all sub-domains
allreduce_cr_inplace(grid.Ks_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
allreduce_cr_inplace(grid.Keta_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
allreduce_cr_inplace(grid.Kxi_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
//
// update factors
//
for (int k = k_start; k < k_end; k++) {
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
// r_r = -_1_CR;
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
// kdr = ii_invr;
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
// break;
// }
// }
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
r_r = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr = ii_invr;
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
// continue if r is out of the inversion grid
if (r_r < 0) continue;
for (int j = j_start; j < j_end; j++) {
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
// r_t = -_1_CR;
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
// jdt = ii_invt;
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
// break;
// }
// }
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
r_t = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
if (in_between(t_glob, left, right)) {
jdt = ii_invt;
r_t = calc_ratio_between(t_glob, left, right);
break;
}
}
// continue if t is out of the inversion grid
if (r_t < 0) continue;
for (int i = i_start; i < i_end; i++) {
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
// r_p = -_1_CR;
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
// idp = ii_invp;
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
// break;
// }
// }
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
r_p = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
if (in_between(p_glob, left, right)) {
idp = ii_invp;
r_p = calc_ratio_between(p_glob, left, right);
break;
}
}
// continue if p is out of the inversion grid
if (r_p < 0) continue;
// global grid id to local id
int k_loc = k - grid.get_offset_k();
int j_loc = j - grid.get_offset_j();
int i_loc = i - grid.get_offset_i();
// check if *_loc are inside the local subdomain
if (k_loc < 0 || k_loc > loc_K-1) continue;
if (j_loc < 0 || j_loc > loc_J-1) continue;
if (i_loc < 0 || i_loc > loc_I-1) continue;
CUSTOMREAL pert_Ks = 0.0;
CUSTOMREAL pert_Keta = 0.0;
CUSTOMREAL pert_Kxi = 0.0;
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)];
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
pert_Ks += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Keta += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Kxi += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Ks += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Keta += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Kxi += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Ks += r_r*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Keta += r_r*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Kxi += r_r*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Ks += r_r*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Keta += r_r*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Kxi += r_r*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Ks += (_1_CR-r_r)*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Keta += (_1_CR-r_r)*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Kxi += (_1_CR-r_r)*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Ks += r_r*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
pert_Keta += r_r*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
pert_Kxi += r_r*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
// update para
grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Ks;
grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)] += pert_Keta;
grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Kxi;
} // end for i
} // end for j
} // end for k
} // end i_grid
}
#endif

321
include/smooth_grad_regul.h Normal file
View File

@@ -0,0 +1,321 @@
#ifndef SMOOTH_GRAD_REGUL_H
#define SMOOTH_GRAD_REGUL_H
#include <iostream>
#include "grid.h"
#include "config.h"
#include "smooth.h"
inline void smooth_gradient_regularization_CG(Grid& grid, const CUSTOMREAL lr, const CUSTOMREAL lt, const CUSTOMREAL lp) {
// smoothing kernels with Conjugate Gradient method
// lr,lt,lp: smoothing length in r, theta, phi direction
// smooth Ks
CG_smooth(grid, grid.fun_gradient_regularization_penalty_loc, \
grid.fun_gradient_regularization_penalty_loc, lr, lt, lp);
// smooth Keta
CG_smooth(grid, grid.eta_gradient_regularization_penalty_loc, \
grid.eta_gradient_regularization_penalty_loc, lr, lt, lp);
// smooth Kxi
CG_smooth(grid, grid.xi_gradient_regularization_penalty_loc, \
grid.xi_gradient_regularization_penalty_loc, lr, lt, lp);
}
// original method for smoothing kernels
inline void smooth_gradient_regularization_orig(Grid& grid){
// necessary params
CUSTOMREAL r_r, r_t, r_p;
int kdr, jdt, idp;
int k_start = 0;
int j_start = 0;
int i_start = 0;
int k_end = ngrid_k;
int j_end = ngrid_j;
int i_end = ngrid_i;
//
// sum the kernel values on the inversion grid
//
for (int i_grid = 0; i_grid < n_inv_grids; i_grid++) {
// init coarser kernel arrays
for (int k = 0; k < n_inv_K_loc; k++) {
for (int j = 0; j < n_inv_J_loc; j++) {
for (int i = 0; i < n_inv_I_loc; i++) {
grid.Ks_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
grid.Keta_inv_loc[I2V_INV_KNL(i,j,k)] = _0_CR;
grid.Kxi_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
}
}
}
for (int k = k_start; k < k_end; k++) {
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
// r_r = -_1_CR;
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
// kdr = ii_invr;
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
// break;
// }
// }
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
r_r = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// increasing or decreasing order
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr = ii_invr;
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
// continue if r is out of the inversion grid
if (r_r < 0) continue;
for (int j = j_start; j < j_end; j++) {
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
// r_t = -_1_CR;
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
// jdt = ii_invt;
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
// break;
// }
// }
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
r_t = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
if (in_between(t_glob, left, right)) {
jdt = ii_invt;
r_t = calc_ratio_between(t_glob, left, right);
break;
}
}
// continue if t is out of the inversion grid
if (r_t < 0) continue;
for (int i = i_start; i < i_end; i++) {
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
// r_p = -_1_CR;
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
// idp = ii_invp;
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
// break;
// }
// }
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
r_p = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
if (in_between(p_glob, left, right)) {
idp = ii_invp;
r_p = calc_ratio_between(p_glob, left, right);
break;
}
}
// continue if p is out of the inversion grid
if (r_p < 0) continue;
// global grid id to local id
int k_loc = k - grid.get_offset_k();
int j_loc = j - grid.get_offset_j();
int i_loc = i - grid.get_offset_i();
// check if *_loc are inside the local subdomain
if (k_loc < 0 || k_loc > loc_K-1) continue;
if (j_loc < 0 || j_loc > loc_J-1) continue;
if (i_loc < 0 || i_loc > loc_I-1) continue;
// update Ks_inv Keta_inv Kxi_inv
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
} // end for i
} // end for j
} // end for k
// sum over all sub-domains
allreduce_cr_inplace(grid.Ks_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
allreduce_cr_inplace(grid.Keta_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
allreduce_cr_inplace(grid.Kxi_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
//
// update factors
//
for (int k = k_start; k < k_end; k++) {
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
// r_r = -_1_CR;
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
// kdr = ii_invr;
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
// break;
// }
// }
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
r_r = -_1_CR;
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
// increasing or decreasing order
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
kdr = ii_invr;
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
break;
}
}
// continue if r is out of the inversion grid
if (r_r < 0) continue;
for (int j = j_start; j < j_end; j++) {
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
// r_t = -_1_CR;
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
// jdt = ii_invt;
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
// break;
// }
// }
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
r_t = -_1_CR;
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
if (in_between(t_glob, left, right)) {
jdt = ii_invt;
r_t = calc_ratio_between(t_glob, left, right);
break;
}
}
// continue if t is out of the inversion grid
if (r_t < 0) continue;
for (int i = i_start; i < i_end; i++) {
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
// r_p = -_1_CR;
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
// idp = ii_invp;
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
// break;
// }
// }
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
r_p = -_1_CR;
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
if (in_between(p_glob, left, right)) {
idp = ii_invp;
r_p = calc_ratio_between(p_glob, left, right);
break;
}
}
// continue if p is out of the inversion grid
if (r_p < 0) continue;
// global grid id to local id
int k_loc = k - grid.get_offset_k();
int j_loc = j - grid.get_offset_j();
int i_loc = i - grid.get_offset_i();
// check if *_loc are inside the local subdomain
if (k_loc < 0 || k_loc > loc_K-1) continue;
if (j_loc < 0 || j_loc > loc_J-1) continue;
if (i_loc < 0 || i_loc > loc_I-1) continue;
CUSTOMREAL pert_Ks = 0.0;
CUSTOMREAL pert_Keta = 0.0;
CUSTOMREAL pert_Kxi = 0.0;
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)];
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
pert_Ks += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Keta += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Kxi += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
pert_Ks += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Keta += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Kxi += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
pert_Ks += r_r*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Keta += r_r*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Kxi += r_r*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
pert_Ks += r_r*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Keta += r_r*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Kxi += r_r*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
pert_Ks += (_1_CR-r_r)*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Keta += (_1_CR-r_r)*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Kxi += (_1_CR-r_r)*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
pert_Ks += r_r*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
pert_Keta += r_r*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
pert_Kxi += r_r*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
// update para
grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Ks;
grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)] += pert_Keta;
grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Kxi;
} // end for i
} // end for j
} // end for k
} // end i_grid
}
#endif

2
include/source.fwd.h Normal file
View File

@@ -0,0 +1,2 @@
#pragma once
class Source;

71
include/source.h Normal file
View File

@@ -0,0 +1,71 @@
#ifndef SOURCE_H
#define SOURCE_H
// to handle circular dependency
#pragma once
#include "grid.fwd.h"
#include "source.fwd.h"
#include "input_params.h"
#include "grid.h"
#include "config.h"
#include "mpi_funcs.h"
class Source {
public:
Source(){};
~Source();
// set source information
void set_source_position(InputParams &, Grid &, bool&, const std::string&, bool for_2d_solver=false);
//
// getters
//
CUSTOMREAL get_ds_lat(){return dis_src_lat;};
CUSTOMREAL get_ds_lon(){return dis_src_lon;};
CUSTOMREAL get_ds_r (){return dis_src_r;};
CUSTOMREAL get_src_r(){return src_r;}; // radius
CUSTOMREAL get_src_t(){return src_lat;}; // radian
CUSTOMREAL get_src_p(){return src_lon;}; // radian
CUSTOMREAL get_src_dep(){return radius2depth(src_r);}; // km
//
// parallel getters
//
CUSTOMREAL get_fac_at_source(CUSTOMREAL*, bool check=false);
CUSTOMREAL get_fac_at_point(CUSTOMREAL*, int, int, int);
private:
// positions
CUSTOMREAL src_lat;
CUSTOMREAL src_lon;
CUSTOMREAL src_r;
// discretize source position ids (LOCAL)
int i_src_loc;
int j_src_loc;
int k_src_loc;
// discretized source position
CUSTOMREAL dis_src_lat;
CUSTOMREAL dis_src_lon;
CUSTOMREAL dis_src_r;
CUSTOMREAL dis_src_err_r; // r1
CUSTOMREAL dis_src_err_lat; // r2
CUSTOMREAL dis_src_err_lon; // r3
// if source is in this subdomain
bool is_in_subdomain = false;
int src_rank;
bool *src_flags;
int n_dom_src = 0;
// position error by discretization for each direction
CUSTOMREAL error_lon, error_lat, error_r;
// grid gaps
CUSTOMREAL delta_lon, delta_lat, delta_r;
};
#endif // SOURCE_H

211
include/src_rec.h Normal file
View File

@@ -0,0 +1,211 @@
#ifndef SRC_REC_H
#define SRC_REC_H
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <map>
#include <algorithm>
#include "config.h"
#include "utils.h"
#include "timer.h"
#include "mpi_funcs.h"
// information of source or receiver
class SrcRecInfo {
public:
int id = -1;
std::string name = "unknown";
CUSTOMREAL dep; // stored as depth (km), so need to depth2radious function when using it in other part of the code
CUSTOMREAL lat; // stored in degree, but convarted to radian when passed through get_src_* function
CUSTOMREAL lon; // stored in degree, but convarted to radian when passed through get_src_* function
int year = -1;
int month = -1;
int day = -1;
int hour = -1;
int min = -1;
CUSTOMREAL sec = -1.0;
CUSTOMREAL mag = -1.0;
CUSTOMREAL adjoint_source = 0.0;
CUSTOMREAL adjoint_source_density = 0.0;
int n_data = 0;
// arrays for storing arrival times on boundary surfaces, calculated by 2D Eikonal solver
bool is_out_of_region = false; // is the source or receiver in the region; false: in the refion; true: teleseismic
//CUSTOMREAL* arr_times_bound_N = nullptr; // arrival time of the receiver at the north boundary of the subdomain
//CUSTOMREAL* arr_times_bound_E = nullptr; // arrival time of the receiver at the east boundary of the subdomain
//CUSTOMREAL* arr_times_bound_W = nullptr; // arrival time of the receiver at the west boundary of the subdomain
//CUSTOMREAL* arr_times_bound_S = nullptr; // arrival time of the receiver at the south boundary of the subdomain
//CUSTOMREAL* arr_times_bound_Bot = nullptr; // arrival time of the receiver at the bottom boundary of the subdomain
//bool* is_bound_src; // true if the source is on the boundary surface
// kernel
CUSTOMREAL sta_correct = 0.0;
CUSTOMREAL sta_correct_kernel = 0.0;
// relocation
bool is_stop = false;
CUSTOMREAL tau_opt = 0.0;
CUSTOMREAL grad_chi_i = 0.0;
CUSTOMREAL grad_chi_j = 0.0;
CUSTOMREAL grad_chi_k = 0.0;
CUSTOMREAL grad_tau = 0.0;
CUSTOMREAL sum_weight = 0.0;
CUSTOMREAL vobj_src_reloc_old = 99999999.9;
CUSTOMREAL vobj_src_reloc = 0.0;
CUSTOMREAL vobj_grad_norm_src_reloc = 0.0;
CUSTOMREAL step_length_max = step_length_src_reloc; // 2 km default, step length for relocation
CUSTOMREAL change_dep = 0.0;
CUSTOMREAL change_lat = 0.0;
CUSTOMREAL change_lon = 0.0;
CUSTOMREAL change_tau = 0.0;
int Ndata = 0.0;
bool is_T_written_into_file = false;
};
class DataInfo {
public:
CUSTOMREAL data_weight = 1.0; // the weight in the src_rec file
CUSTOMREAL weight = 1.0; // the actual weight in the inversion, equal data_weight * weight about the data type;
CUSTOMREAL weight_reloc = 1.0; // the actual weight for relocation, equal data_weight * weight about the data type;
std::string phase = "unknown";
bool dual_data = false; // if true, this data is a dual data, used for generating kernel, but not for obj estimation (if true, data type = 2 or 3)
bool is_src_rec = false; // absolute traveltime, single source - receiver
// source information
int id_src = -1;
std::string name_src = "unknown";
// receiver information
int id_rec = -1;
std::string name_rec = "unknown";
// traveltime
CUSTOMREAL travel_time = -999.0;
CUSTOMREAL travel_time_obs = -999.0;
// receiver pair infomation
bool is_rec_pair = false; // common source differential traveltime
std::vector<int> id_rec_pair = {-1,-1};
std::vector<std::string> name_rec_pair = {"unknown","unknown"};
// common source differential travel time
CUSTOMREAL cs_dif_travel_time = -999.0;
CUSTOMREAL cs_dif_travel_time_obs = -999.0;
// source pair infomation
bool is_src_pair = false; // common receiver differential traveltime (for future)
std::vector<int> id_src_pair = {-1,-1};
std::vector<std::string> name_src_pair = {"unknown","unknown"};
// common receiver differential travel time
CUSTOMREAL cr_dif_travel_time = -999.0;
CUSTOMREAL cr_dif_travel_time_obs = -999.0;
// source relocation
CUSTOMREAL DTi = 0.0;
CUSTOMREAL DTj = 0.0;
CUSTOMREAL DTk = 0.0;
std::vector<CUSTOMREAL> DTi_pair = {0.0, 0.0};
std::vector<CUSTOMREAL> DTj_pair = {0.0, 0.0};
std::vector<CUSTOMREAL> DTk_pair = {0.0, 0.0};
};
// methods for managing SrcRec objects/lists
// parse src_rec_file
void parse_src_rec_file(std::string& , \
std::map<std::string, SrcRecInfo>&, \
std::map<std::string, SrcRecInfo>&, \
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&, \
std::vector<std::string>&, \
std::vector<std::vector<std::vector<std::string>>>&);
// parse sta_correctoion_file
void parse_sta_correction_file(std::string&, \
std::map<std::string, SrcRecInfo>&);
// swap the sources and receivers
void do_swap_src_rec(std::map<std::string, SrcRecInfo> &, \
std::map<std::string, SrcRecInfo> &, \
std::map<std::string, std::map<std::string, std::vector<DataInfo>>> &, \
std::vector<std::string>&);
// do not swap the sources and receivers
void do_not_swap_src_rec(std::map<std::string, SrcRecInfo> &, \
std::map<std::string, SrcRecInfo> &, \
std::map<std::string, std::map<std::string, std::vector<DataInfo>>> &, \
std::vector<std::string>&);
// tele seismic source management
void separate_region_and_tele_src_rec_data(std::map<std::string, SrcRecInfo> &,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
std::map<std::string, int> &,
int &,
int &,
int &,
int &,
int &,
const CUSTOMREAL, const CUSTOMREAL,
const CUSTOMREAL, const CUSTOMREAL,
const CUSTOMREAL, const CUSTOMREAL,
bool);
void merge_region_and_tele_src(std::map<std::string, SrcRecInfo> &,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&,
std::vector<std::string>&,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, SrcRecInfo> &,
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&);
// distribute srcrec data to all simulation groups
void distribute_src_rec_data(std::map<std::string, SrcRecInfo>&, \
std::map<std::string, SrcRecInfo>&, \
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&, \
std::vector<std::string>&, \
std::map<std::string, SrcRecInfo>&, \
std::map<std::string, SrcRecInfo>&, \
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&, \
std::vector<std::string>&, \
std::vector<std::string>&);
void prepare_src_map_for_2d_solver(std::map<std::string, SrcRecInfo>&, \
std::map<std::string, SrcRecInfo>&, \
std::vector<std::string>&, \
std::map<std::string, SrcRecInfo>&);
void send_src_info_inter_sim(SrcRecInfo&, int);
void recv_src_info_inter_sim(SrcRecInfo&, int);
void broadcast_src_info_intra_sim(SrcRecInfo&, int);
void send_rec_info_inter_sim(SrcRecInfo&, int);
void recv_rec_info_inter_sim(SrcRecInfo&, int);
void broadcast_rec_info_intra_sim(SrcRecInfo&, int);
void send_data_info_inter_sim(DataInfo&, int);
void recv_data_info_inter_sim(DataInfo&, int);
#endif //SRC_REC_H

98
include/timer.h Normal file
View File

@@ -0,0 +1,98 @@
#ifndef TIMER_H
#define TIMER_H
#include <iostream>
#include <chrono>
#include <iomanip>
#include <ctime>
#include <string>
#include "config.h"
class Timer {
public:
Timer(std::string name, bool show_start_time = false) : name(name), show_start_time(show_start_time) {
start_timer();
}
~Timer() {}
std::string name;
std::chrono::high_resolution_clock::time_point start;
std::chrono::high_resolution_clock::time_point end;
std::chrono::duration<double> elapsed_time;
bool show_start_time = false;
void start_timer() {
start = std::chrono::high_resolution_clock::now();
time_back = start;
if (show_start_time && proc_read_srcrec) {
std::cout << this->name << " started at " << get_current_utc_time() << std::endl;
}
}
void stop_timer() {
end = std::chrono::high_resolution_clock::now();
elapsed_time = end - start;
print_elapsed_time();
}
double get_start() {
return std::chrono::duration<double>(start.time_since_epoch()).count();
}
double get_t_delta() {
auto current_time = std::chrono::high_resolution_clock::now();
auto delta = current_time - time_back;
time_back = current_time;
return std::chrono::duration<double>(delta).count();
}
double get_t() {
return std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - start).count();
}
std::string get_start_t() {
return get_current_utc_time(start);
}
std::string get_end_t() {
return get_current_utc_time(end);
}
std::string get_elapsed_t() {
return std::to_string(elapsed_time.count()) + " sec";
}
std::string get_utc_from_time_t(std::time_t time_t) {
std::tm utc_tm = *std::gmtime(&time_t);
std::ostringstream oss;
oss << std::put_time(&utc_tm, "%F %T") << " UTC";
return oss.str();
}
private:
std::chrono::high_resolution_clock::time_point time_back;
void print_elapsed_time() {
if (if_verbose)
std::cout << "Time for " << name << ": " << elapsed_time.count() << " sec" << std::endl;
}
std::string get_current_utc_time(std::chrono::high_resolution_clock::time_point time = std::chrono::high_resolution_clock::now()) {
std::chrono::milliseconds ms = std::chrono::duration_cast<std::chrono::milliseconds>(time.time_since_epoch());
std::time_t time_t = ms.count() / 1000;
std::tm utc_tm = *std::gmtime(&time_t);
std::ostringstream oss;
oss << std::put_time(&utc_tm, "%F %T") << " UTC";
return oss.str();
}
};
#endif // TIMER_H

401
include/utils.h Normal file
View File

@@ -0,0 +1,401 @@
#ifndef UTILS_H
#define UTILS_H
#include <iostream>
#include <math.h>
#include <string.h>
#include <fstream>
#include <sys/stat.h>
#include <iomanip>
#include <sstream>
#include <cmath>
#include <new> // for std::bad_alloc
#include <cstdlib> // for std::exit
// chec if compiler is gcc 7.5 or older
//#if __GNUC__ == 7 && __GNUC_MINOR__ <= 5
//#include <experimental/filesystem>
//#define GNUC_7_5
//#elif __cplusplus > 201402L // compilers supporting c++17
//#include <filesystem>
//#endif
#include "config.h"
inline int mkpath(std::string s,mode_t mode) {
size_t pos=0;
std::string dir;
int mdret = 0;
if(s[s.size()-1]!='/'){
// force trailing / so we can handle everything in loop
s+='/';
}
while((pos=s.find_first_of('/',pos))!=std::string::npos){
dir=s.substr(0,pos++);
if(dir.size()==0) continue; // if leading / first time is 0 length
if((mdret=mkdir(dir.c_str(),mode)) && errno!=EEXIST){
return mdret;
}
}
return mdret;
}
inline void create_output_dir(std::string dir_path){
// create output directory if not exists (directories tree)
if (world_rank == 0) {
//#if __cplusplus > 201402L && !defined(GNUC_7_5)
// // this function requires c++17
// if (!std::filesystem::exists(dir_path)){
// std::filesystem::create_directories(dir_path);
// } else {
// std::cout << "Output directory already exists. Overwriting..." << std::endl;
// }
//#else // compilers not supporting std++17
// check if directory exists
struct stat info;
if (stat(dir_path.c_str(), &info) != 0){
// directory does not exist
// create directory
int status = mkpath(dir_path, 0755);
if (status != 0){
std::cout << "Error: cannot create output directory" << std::endl;
std::cout << "Exiting..." << std::endl;
exit(1);
}
} else {
// directory exists
std::cout << "Output directory already exists. Overwriting..." << std::endl;
}
//#endif
}
}
inline bool is_file_exist(const char* fileName)
{
return static_cast<bool>(std::ifstream(fileName));
}
inline void stdout_by_main(char const* str){
if (sim_rank == 0 && inter_sub_rank == 0 && sub_rank == 0 && id_sim == 0)
std::cout << str << std::endl;
}
inline void stdout_by_rank_zero(char const* str){
if(world_rank == 0)
std::cout << str << std::endl;
}
inline void parse_options(int argc, char* argv[]){
bool input_file_found = false;
for (int i = 1; i < argc; i++){
if(strcmp(argv[i], "-v") == 0)
if_verbose = true;
else if (strcmp(argv[i],"-i") == 0){
input_file = argv[i+1];
input_file_found = true;
}
else if (strcmp(argv[i], "-h") == 0){
stdout_by_main("usage: mpirun -np 4 TOMOATT -i input_params.yaml");
exit(EXIT_SUCCESS);
}
}
// error if input_file is not found
if(!input_file_found){
stdout_by_main("usage: mpirun -np 4 TOMOATT -i input_params.yaml");
std::cout << "Error: input parameter file not found" << std::endl;
exit(EXIT_FAILURE);
}
}
inline void parse_options_srcrec_weight(int argc, char* argv[]){
bool input_file_found = false;
for (int i = 1; i < argc; i++){
if(strcmp(argv[i], "-v") == 0)
if_verbose = true;
else if (strcmp(argv[i],"-i") == 0){
input_file = argv[i+1];
input_file_found = true;
} else if (strcmp(argv[i],"-r") == 0){
// reference value
ref_value = atof(argv[i+1]);
} else if (strcmp(argv[i], "-o") == 0){
// get output filename
output_file_weight = argv[i+1];
}
}
// error if input_file is not found
if(!input_file_found){
stdout_by_main("usage: ./SrcRecWeight -i srcrec_file.txt -r 10.0 -o srcrec_weight.txt");
std::cout << argc << std::endl;
exit(EXIT_FAILURE);
}
}
template <typename T>
inline int check_data_type(T const& data){
if (std::is_same<T,bool>::value){
return 0;
} else if (std::is_same<T, int>::value){
return 1;
} else if (std::is_same<T, float>::value){
return 2;
} else if (std::is_same<T, double>::value){
return 3;
} else {
std::cout << "Error: custom real type is not float or double" << std::endl;
std::cout << "Please check the definition of CUSTOMREAL in io.h" << std::endl;
std::cout << "Exiting..." << std::endl;
exit(1);
}
}
template <typename T>
inline T my_square(T const& a){
return a * a;
}
// defined function is more than 2 times slower than inline function
//#define my_square(a) (a * a)
template <typename T>
T* allocateMemory(size_t size, int allocationID) {
try {
T* ptr = new T[size];
return ptr;
} catch (const std::bad_alloc& e) {
// Handle memory allocation failure
std::cerr << "Memory allocation failed for ID " << allocationID << ": " << e.what() << std::endl;
std::exit(EXIT_FAILURE); // Exit the program on allocation failure
}
}
inline void RLonLat2xyz(CUSTOMREAL lon, CUSTOMREAL lat, CUSTOMREAL R, CUSTOMREAL& x, CUSTOMREAL& y, CUSTOMREAL& z){
/*
lon : longitude in radian
lat : latitude in radian
R : radius
x : x coordinate in m
y : y coordinate in m
z : z coordinate in m
*/
//x = R*cos(lat*DEG2RAD)*cos(lon*DEG2RAD);
//y = R*cos(lat*DEG2RAD)*sin(lon*DEG2RAD);
//z = R*sin(lat*DEG2RAD);
x = R*cos(lat)*cos(lon);
y = R*cos(lat)*sin(lon);
z = R*sin(lat);
}
// calculate epicentral distance in radian from lon lat in radian
inline void Epicentral_distance_sphere(const CUSTOMREAL lat1, const CUSTOMREAL lon1, \
const CUSTOMREAL lat2, const CUSTOMREAL lon2, \
CUSTOMREAL& dist) {
// calculate epicentral distance in radian
//dist = std::abs(acos(sin(lat1)*sin(lat2)+cos(lat1)*cos(lat2)*cos(lon2-lon1)));
CUSTOMREAL lon_dif = lon2 - lon1;
dist = std::atan2(std::sqrt((my_square(std::cos(lat2) * std::sin(lon_dif)) + \
my_square(std::cos(lat1) * std::sin(lat2) - std::sin(lat1) * std::cos(lat2) * std::cos(lon_dif)))), \
std::sin(lat1) * std::sin(lat2) + std::cos(lat1) * std::cos(lat2) * std::cos(lon_dif));
}
//calculate azimuth
inline void Azimuth_sphere(CUSTOMREAL lat1, CUSTOMREAL lon1, \
CUSTOMREAL lat2, CUSTOMREAL lon2, \
CUSTOMREAL& azi) {
if (isZero(my_square((lat1-lat2)) \
&& + my_square((lon1-lon2)))){
azi = _0_CR;
} else {
azi = atan2(sin(lon2-lon1)*cos(lat2), \
cos(lat1)*sin(lat2)-sin(lat1)*cos(lat2)*cos(lon2-lon1));
if (azi < _0_CR)
azi += _2_CR * PI;
}
}
inline void WGS84ToCartesian(CUSTOMREAL& lon, CUSTOMREAL& lat, CUSTOMREAL R, \
CUSTOMREAL& x, CUSTOMREAL& y, CUSTOMREAL& z, \
CUSTOMREAL& lon_center, CUSTOMREAL& lat_center){
// equatorial radius WGS84 major axis
const static CUSTOMREAL equRadius = R_earth; // in m in this function
const static CUSTOMREAL flattening = 1.0 / 298.257222101;
const static CUSTOMREAL sqrEccentricity = flattening * (2.0 - flattening);
const CUSTOMREAL lat_rad = lat - lat_center; // input should be already in radian
const CUSTOMREAL lon_rad = lon - lon_center;
const CUSTOMREAL alt = R - equRadius; // altitude (height above sea level)
const CUSTOMREAL sinLat = sin(lat_rad);
const CUSTOMREAL cosLat = cos(lat_rad);
const CUSTOMREAL sinLon = sin(lon_rad);
const CUSTOMREAL cosLon = cos(lon_rad);
// Normalized radius
const CUSTOMREAL normRadius = equRadius / sqrt(1.0 - sqrEccentricity * sinLat * sinLat);
x = (normRadius + alt) * cosLat * cosLon;
y = (normRadius + alt) * cosLat * sinLon;
z = (normRadius * (1.0 - sqrEccentricity) + alt) * sinLat;
}
template <typename T>
inline T dot_product(T const* const a, T const* const b, int const& n){
CUSTOMREAL result = _0_CR;
for (int i = 0; i < n; i++){
result += a[i] * b[i];
}
return result;
}
template <typename T>
inline T find_max(T const* const a, int const& n){
T max = a[0];
for (int i = 1; i < n; i++){
if (a[i] > max){
max = a[i];
}
}
return max;
}
template <typename T>
inline T find_absmax(T const* const a, int const& n){
T max = fabs(a[0]);
for (int i = 1; i < n; i++){
if (fabs(a[i]) > max){
max = fabs(a[i]);
}
}
return max;
}
template <typename T>
inline T calc_l2norm(T const* const a, int const& n){
T result = _0_CR;
for (int i = 0; i < n; i++){
result += a[i] * a[i];
}
return result;
}
inline std::string int2string_zero_fill(int i) {
std::stringstream ss;
ss << std::setfill('0') << std::setw(4) << i;
return ss.str();
}
inline bool in_between(CUSTOMREAL const& a, CUSTOMREAL const& b, CUSTOMREAL const& c){
// check if a is between b and c
if ((a-b)*(a-c) <= _0_CR){
return true;
} else {
return false;
}
}
inline CUSTOMREAL calc_ratio_between(CUSTOMREAL const& a, CUSTOMREAL const& b, CUSTOMREAL const& c){
// calculate ratio of a between b and c
//if (b < c)
return (a - b) / (c - b);
//else
// return (a - b) / (b - c);
}
inline void linear_interpolation_1d_sorted(const CUSTOMREAL* x1, const CUSTOMREAL* y1, const int& n1, const CUSTOMREAL* x2, CUSTOMREAL* y2, const int& n2){
// linear interpolation for sorted 1d array (monotonely increasing)
// x1 : positions of the first array
// y1 : values of the first array
// n1 : size of the first array
// x2 : positions of the second array
// y2 : values of the second array
// n2 : size of the second array
int start = 0;
for(int pt2=0; pt2<n2; pt2++){ // x2[pt2] <= x1[0]
if (x2[pt2] <= x1[0]){
y2[pt2] = y1[0];
} else if (x2[pt2] >= x1[n1-1]){ // x2[pt2] >= x1[n1-1]
y2[pt2] = y1[n1-1];
} else { // x1[0] < x2[pt2] < x1[n1-1]
for(int pt1=start; pt1<n1-1; pt1++){
if (x2[pt2] >= x1[pt1] && x2[pt2] <= x1[pt1+1]){
y2[pt2] = y1[pt1] + (y1[pt1+1] - y1[pt1]) * (x2[pt2] - x1[pt1]) / (x1[pt1+1] - x1[pt1]);
start = pt1;
break;
}
}
}
}
}
template<typename T>
std::vector<CUSTOMREAL> linspace(T start_in, T end_in, int num_in)
{
std::vector<CUSTOMREAL> linspaced;
double start = static_cast<CUSTOMREAL>(start_in);
double end = static_cast<CUSTOMREAL>(end_in);
double num = static_cast<CUSTOMREAL>(num_in);
if (num == 0) { return linspaced; }
if (num == 1)
{
linspaced.push_back(start);
return linspaced;
}
double delta = (end - start) / (num - 1);
for(int i=0; i < num-1; ++i)
{
linspaced.push_back(start + delta * i);
}
linspaced.push_back(end); // I want to ensure that start and end
// are exactly the same as the input
return linspaced;
}
#endif // UTILS_H

825
include/vectorized_sweep.h Normal file
View File

@@ -0,0 +1,825 @@
#ifndef VECTORIZED_SWEEP_H
#define VECTORIZED_SWEEP_H
#include<vector>
#include "config.h"
#ifdef USE_SIMD // closed at the end of this file
#include "simd_conf.h"
#if USE_AVX || USE_AVX512
__mT COEF = _mmT_set1_pT(1.0);
__mT v_1 = _mmT_set1_pT(1.0);
__mT v_0 = _mmT_set1_pT(0.0);
__mT v_half = _mmT_set1_pT(0.5);
__mT v_2 = _mmT_set1_pT(2.0);
__mT v_m2 = _mmT_set1_pT(-2.0);
__mT v_4 = _mmT_set1_pT(4.0);
__mT v_m3 = _mmT_set1_pT(-3.0);
__mT coe_max = _mmT_set1_pT(1e19);
__mT v_eps = _mmT_set1_pT(1e-12);
__mT COEF_TELE = _mmT_set1_pT(1.05);
// square of __mT
inline __mT my_square_v(__mT const& a){
return _mmT_mul_pT(a, a);
}
// pp = (a - b) * Dinv
inline __mT calc_1d_stencil(__mT const& a, __mT const& b, __mT const& Dinv){
return _mmT_mul_pT(_mmT_sub_pT(a,b),Dinv);
}
/*
ww = _1_CR/(_1_CR+_2_CR*my_square_v((eps + my_square_v( a \
-_2_CR*b \
+ c) ) \
/ (eps + my_square_v( d \
-_2_CR*a \
+ b) )) );
pp = sign * (_1_CR - ww) * ( b \ <-- sign = +1 or -1
- d) * 0.5 * Dinv \
+ ww * ( -3.0 * a \
+ 4.0 * b \
- 1.0 * c ) * 0.5 * Dinv );
*/
inline __mT calc_3d_stencil(__mT const& a, __mT const& b, __mT const&c, __mT const& d, __mT const& Dinv_half, int const& sign){
#ifdef __FMA__
// v_eps + square(a - 2.0*b + c)
__mT tmp1 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(a,_mmT_fmadd_pT(v_m2,b,c))));
// v_eps + square(d - 2.0*a + b)
__mT tmp2 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(d,_mmT_fmadd_pT(v_m2,a,b))));
// ww = 1.0/(1.0 + 2.0 * square(tmp1/tmp2))
__mT ww = _mmT_div_pT(v_1,_mmT_fmadd_pT(v_2,my_square_v(_mmT_div_pT(tmp1,tmp2)),v_1));
// pp = sign* ((1.0 - ww) * (b - d) / 2.0 / D
// + ww * (-3.0* a + 4.0 * b - c) * Dinv_half)
/*
*/
return _mmT_mul_pT(_mmT_set1_pT(sign), \
_mmT_add_pT(\
_mmT_mul_pT(_mmT_sub_pT(v_1,ww),_mmT_mul_pT(_mmT_sub_pT(b,d),Dinv_half)),\
_mmT_mul_pT(ww,_mmT_mul_pT(_mmT_sub_pT(_mmT_fmadd_pT(v_4,b,_mmT_mul_pT(v_m3,a)),c),Dinv_half))\
)\
);
#else
__mT tmp1 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(a,_mmT_add_pT(_mmT_mul_pT(v_m2,b),c))));
__mT tmp2 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(d,_mmT_add_pT(_mmT_mul_pT(v_m2,a),b))));
__mT ww = _mmT_div_pT(v_1,_mmT_add_pT(v_1,_mmT_mul_pT(v_2,my_square_v(_mmT_div_pT(tmp1,tmp2)))));
return _mmT_mul_pT(_mmT_set1_pT(sign), \
_mmT_add_pT(\
_mmT_mul_pT(_mmT_sub_pT(v_1,ww),_mmT_mul_pT(_mmT_sub_pT(b,d),Dinv_half)),\
_mmT_mul_pT(ww,_mmT_mul_pT(_mmT_sub_pT(_mmT_add_pT(_mmT_mul_pT(v_4,b),_mmT_mul_pT(v_m3,a)),c),Dinv_half))\
)\
);
#endif
}
#elif USE_ARM_SVE
inline __mT my_square_v(svbool_t const& pg, __mT const& a){
return svmul_f64_z(pg, a, a);
}
inline __mT calc_1d_stencil(svbool_t const& pg, __mT const& a, __mT const& b, __mT const& Dinv){
return svmul_f64_z(pg, svsub_f64_z(pg, a, b), Dinv);
}
inline __mT calc_3d_stencil(svbool_t const& pg, __mT const& a, __mT const& b, __mT const&c, __mT const& d, __mT const& Dinv_half, int const& sign){
__mT v_1 = svdup_f64(1.0);
__mT v_2 = svdup_f64(2.0);
__mT v_m2 = svdup_f64(-2.0);
__mT v_4 = svdup_f64(4.0);
__mT v_m3 = svdup_f64(-3.0);
__mT v_eps = svdup_f64(1e-12);
// v_eps + square(a - 2.0*b + c)
//__mT tmp1 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, a, svadd_f64_z(pg, svmul_f64_z(pg, v_m2, b), c))));
__mT tmp1 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, a, svmad_f64_z(pg, v_m2, b, c))));
// v_eps + square(d - 2.0*a + b)
//__mT tmp2 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, d, svadd_f64_z(pg, svmul_f64_z(pg, v_m2, a), b))));
__mT tmp2 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, d, svmad_f64_z(pg, v_m2, a, b))));
// ww = 1.0/(1.0 + 2.0 * square(tmp1/tmp2))
//__mT ww = svdiv_f64_z(pg, v_1, svadd_f64_z(pg, v_1, svmul_f64_z(pg, v_2, my_square_v(pg, svdiv_f64_z(pg, tmp1, tmp2)))));
__mT ww = svdiv_f64_z(pg, v_1, svmad_f64_z(pg, v_2, my_square_v(pg, svdiv_f64_z(pg, tmp1, tmp2)), v_1));
// pp = sign* ((1.0 - ww) * (b - d) / 2.0 / D
// + ww * (-3.0* a + 4.0 * b - c) * Dinv_half)
/*
return svmul_f64_m(pg, svdup_f64(sign), \
svadd_f64_z(pg, \
svmul_f64_z(pg, svsub_f64_z(pg, v_1, ww), svmul_f64_z(pg, svsub_f64_z(pg, b, d), Dinv_half)),\
svmul_f64_z(pg, ww, svmul_f64_z(pg, svsub_f64_z(pg, svadd_f64_z(pg, svmul_f64_z(pg, v_4, b), svmul_f64_z(pg, v_m3, a)), c), Dinv_half))\
)\
);
*/
return svmul_f64_m(pg, svdup_f64(sign), \
svadd_f64_z(pg, \
svmul_f64_z(pg, svsub_f64_z(pg, v_1, ww), svmul_f64_z(pg, svsub_f64_z(pg, b, d), Dinv_half)),\
svmul_f64_z(pg, ww, svmul_f64_z(pg, svsub_f64_z(pg, svmad_f64_z(pg, v_4, b, svmul_f64_z(pg, v_m3, a)), c), Dinv_half))\
)\
);
}
#endif
inline void vect_stencil_1st_pre_simd(
#if USE_ARM_SVE
svbool_t const& pg,
#endif
__mT const& v_iip, __mT const& v_jjt, __mT const& v_kkr,
__mT const& v_c__,
__mT const& v_p__, __mT const& v_m__, __mT const& v__p_, __mT const& v__m_, __mT const& v___p, __mT const& v___m,
__mT& v_pp1, __mT& v_pp2, __mT& v_pt1, __mT& v_pt2, __mT& v_pr1, __mT& v_pr2,
__mT const& v_DP_inv, __mT const& v_DT_inv, __mT const& v_DR_inv,
__mT const& v_DP_inv_half, __mT const& v_DT_inv_half, __mT const& v_DR_inv_half,
int const& NP, int const& NT, int const& NR){
#if USE_AVX512 || USE_AVX
v_pp1 = calc_1d_stencil(v_c__, v_m__, v_DP_inv);
v_pp2 = calc_1d_stencil(v_p__, v_c__, v_DP_inv);
v_pt1 = calc_1d_stencil(v_c__, v__m_, v_DT_inv);
v_pt2 = calc_1d_stencil(v__p_, v_c__, v_DT_inv);
v_pr1 = calc_1d_stencil(v_c__, v___m, v_DR_inv);
v_pr2 = calc_1d_stencil(v___p, v_c__, v_DR_inv);
#elif USE_ARM_SVE
v_pp1 = calc_1d_stencil(pg, v_c__, v_m__, v_DP_inv);
v_pp2 = calc_1d_stencil(pg, v_p__, v_c__, v_DP_inv);
v_pt1 = calc_1d_stencil(pg, v_c__, v__m_, v_DT_inv);
v_pt2 = calc_1d_stencil(pg, v__p_, v_c__, v_DT_inv);
v_pr1 = calc_1d_stencil(pg, v_c__, v___m, v_DR_inv);
v_pr2 = calc_1d_stencil(pg, v___p, v_c__, v_DR_inv);
#endif
}
inline void vect_stencil_3rd_pre_simd(
#if USE_ARM_SVE
svbool_t const& pg,
#endif
__mT const& v_iip, __mT const& v_jjt, __mT const& v_kkr,
__mT const& v_c__,
__mT const& v_p__, __mT const& v_m__, __mT const& v__p_, __mT const& v__m_, __mT const& v___p, __mT const& v___m,
__mT const& v_pp____, __mT const& v_mm____, __mT const& v___pp__, __mT const& v___mm__, __mT const& v_____pp, __mT const& v_____mm,
__mT& v_pp1, __mT& v_pp2, __mT& v_pt1, __mT& v_pt2, __mT& v_pr1, __mT& v_pr2,
__mT const& v_DP_inv, __mT const& v_DT_inv, __mT const& v_DR_inv,
__mT const& v_DP_inv_half, __mT const& v_DT_inv_half, __mT const& v_DR_inv_half,
int const& NP, int const& NT, int const& NR){
const int PLUS = 1;
const int MINUS = -1;
#if USE_AVX512
__mmaskT mask_i_eq_1 = _mm512_cmp_pT_mask(v_iip, v_1,_CMP_EQ_OQ); // if iip == 1
__mmaskT mask_j_eq_1 = _mm512_cmp_pT_mask(v_jjt, v_1,_CMP_EQ_OQ); // if jjt == 1
__mmaskT mask_k_eq_1 = _mm512_cmp_pT_mask(v_kkr, v_1,_CMP_EQ_OQ); // if kkr == 1
__mmaskT mask_i_eq_N_minus_2 = _mm512_cmp_pT_mask(v_iip, _mmT_set1_pT(NP-2),_CMP_EQ_OQ); // if iip == N-2
__mmaskT mask_j_eq_N_minus_2 = _mm512_cmp_pT_mask(v_jjt, _mmT_set1_pT(NT-2),_CMP_EQ_OQ); // if jjt == N-2
__mmaskT mask_k_eq_N_minus_2 = _mm512_cmp_pT_mask(v_kkr, _mmT_set1_pT(NR-2),_CMP_EQ_OQ); // if kkr == N-2
// 1 < iip < N-2
__mmaskT mask_i_else = _kand_maskT(
_mm512_cmp_pT_mask(v_iip, v_1 , _CMP_GT_OQ),
_mm512_cmp_pT_mask(_mmT_set1_pT(NP-2), v_iip, _CMP_GT_OQ)
);
// 1 < jjt < N-2
__mmaskT mask_j_else = _kand_maskT(
_mm512_cmp_pT_mask(v_jjt, v_1 , _CMP_GT_OQ),
_mm512_cmp_pT_mask(_mmT_set1_pT(NT-2), v_jjt, _CMP_GT_OQ)
);
// 1 < kkr < N-2
__mmaskT mask_k_else = _kand_maskT(
_mm512_cmp_pT_mask(v_kkr, v_1 , _CMP_GT_OQ),
_mm512_cmp_pT_mask(_mmT_set1_pT(NR-2), v_kkr, _CMP_GT_OQ)
);
// if _i_eq_1 == true
v_pp1 = _mm512_mask_blend_pT(mask_i_eq_1, calc_1d_stencil(v_c__, v_m__, v_DP_inv) , v_pp1 );
v_pp2 = _mm512_mask_blend_pT(mask_i_eq_1, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), v_pp2 );
// if_i_eq_N_minus_2 == true
v_pp1 = _mm512_mask_blend_pT(mask_i_eq_N_minus_2, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
v_pp2 = _mm512_mask_blend_pT(mask_i_eq_N_minus_2, calc_1d_stencil(v_p__, v_c__, v_DP_inv) , v_pp2);
// else
v_pp1 = _mm512_mask_blend_pT(mask_i_else, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
v_pp2 = _mm512_mask_blend_pT(mask_i_else, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS) , v_pp2);
// if _j_eq_1 == true
v_pt1 = _mm512_mask_blend_pT(mask_j_eq_1, calc_1d_stencil(v_c__, v__m_, v_DT_inv) , v_pt1);
v_pt2 = _mm512_mask_blend_pT(mask_j_eq_1, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), v_pt2);
// if _j_eq_N_minus_2 == true
v_pt1 = _mm512_mask_blend_pT(mask_j_eq_N_minus_2, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
v_pt2 = _mm512_mask_blend_pT(mask_j_eq_N_minus_2, calc_1d_stencil(v__p_, v_c__, v_DT_inv) , v_pt2);
// else
v_pt1 = _mm512_mask_blend_pT(mask_j_else, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
v_pt2 = _mm512_mask_blend_pT(mask_j_else, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS) , v_pt2);
// if _k_eq_1 == true
v_pr1 = _mm512_mask_blend_pT(mask_k_eq_1, calc_1d_stencil(v_c__, v___m, v_DR_inv) , v_pr1 );
v_pr2 = _mm512_mask_blend_pT(mask_k_eq_1, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), v_pr2 );
// if _k_eq_N_minus_2 == true
v_pr1 = _mm512_mask_blend_pT(mask_k_eq_N_minus_2, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
v_pr2 = _mm512_mask_blend_pT(mask_k_eq_N_minus_2, calc_1d_stencil(v___p, v_c__, v_DR_inv) , v_pr2);
// else
v_pr1 = _mm512_mask_blend_pT(mask_k_else, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
v_pr2 = _mm512_mask_blend_pT(mask_k_else, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS) , v_pr2);
#elif USE_AVX
__mT mask_i_eq_1 = _mm256_cmp_pT(v_iip, v_1,_CMP_EQ_OQ); // if iip == 1
__mT mask_j_eq_1 = _mm256_cmp_pT(v_jjt, v_1,_CMP_EQ_OQ); // if jjt == 1
__mT mask_k_eq_1 = _mm256_cmp_pT(v_kkr, v_1,_CMP_EQ_OQ); // if kkr == 1
__mT mask_i_eq_N_minus_2 = _mm256_cmp_pT(v_iip, _mmT_set1_pT(NP-2),_CMP_EQ_OQ); // if iip == N-2
__mT mask_j_eq_N_minus_2 = _mm256_cmp_pT(v_jjt, _mmT_set1_pT(NT-2),_CMP_EQ_OQ); // if jjt == N-2
__mT mask_k_eq_N_minus_2 = _mm256_cmp_pT(v_kkr, _mmT_set1_pT(NR-2),_CMP_EQ_OQ); // if kkr == N-2
// 1 < iip < N-2
__m256d mask_i_else = _mm256_and_pd(
_mm256_cmp_pT(v_iip, v_1 , _CMP_GT_OQ),
_mm256_cmp_pT(_mmT_set1_pT(NP-2),v_iip, _CMP_GT_OQ)
);
// 1 < jjt < N-2
__m256d mask_j_else = _mm256_and_pd(
_mm256_cmp_pT(v_jjt, v_1 , _CMP_GT_OQ),
_mm256_cmp_pT(_mmT_set1_pT(NT-2),v_jjt, _CMP_GT_OQ)
);
// 1 < kkr < N-2
__m256d mask_k_else = _mm256_and_pd(
_mm256_cmp_pT(v_kkr, v_1 , _CMP_GT_OQ),
_mm256_cmp_pT(_mmT_set1_pT(NR-2),v_kkr, _CMP_GT_OQ)
);
// if _i_eq_1 == true
v_pp1 = _mm256_blendv_pd(v_pp1, calc_1d_stencil(v_c__, v_m__, v_DP_inv), mask_i_eq_1);
v_pp2 = _mm256_blendv_pd(v_pp2, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), mask_i_eq_1);
// if_i_eq_N_minus_2 == true
v_pp1 = _mm256_blendv_pd(v_pp1, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), mask_i_eq_N_minus_2);
v_pp2 = _mm256_blendv_pd(v_pp2, calc_1d_stencil(v_p__, v_c__, v_DP_inv), mask_i_eq_N_minus_2);
// else
v_pp1 = _mm256_blendv_pd(v_pp1, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), mask_i_else);
v_pp2 = _mm256_blendv_pd(v_pp2, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), mask_i_else);
// if _j_eq_1 == true
v_pt1 = _mm256_blendv_pd(v_pt1, calc_1d_stencil(v_c__, v__m_, v_DT_inv), mask_j_eq_1);
v_pt2 = _mm256_blendv_pd(v_pt2, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), mask_j_eq_1);
// if _j_eq_N_minus_2 == true
v_pt1 = _mm256_blendv_pd(v_pt1, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), mask_j_eq_N_minus_2);
v_pt2 = _mm256_blendv_pd(v_pt2, calc_1d_stencil(v__p_, v_c__, v_DT_inv), mask_j_eq_N_minus_2);
// else
v_pt1 = _mm256_blendv_pd(v_pt1, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), mask_j_else);
v_pt2 = _mm256_blendv_pd(v_pt2, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), mask_j_else);
// if _k_eq_1 == true
v_pr1 = _mm256_blendv_pd(v_pr1, calc_1d_stencil(v_c__, v___m, v_DR_inv), mask_k_eq_1);
v_pr2 = _mm256_blendv_pd(v_pr2, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), mask_k_eq_1);
// if _k_eq_N_minus_2 == true
v_pr1 = _mm256_blendv_pd(v_pr1, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), mask_k_eq_N_minus_2);
v_pr2 = _mm256_blendv_pd(v_pr2, calc_1d_stencil(v___p, v_c__, v_DR_inv), mask_k_eq_N_minus_2);
// else
v_pr1 = _mm256_blendv_pd(v_pr1, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), mask_k_else);
v_pr2 = _mm256_blendv_pd(v_pr2, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), mask_k_else);
#elif USE_ARM_SVE
svfloat64_t v_1 = svdup_f64(1.0);
svfloat64_t v_NP_minus_2 = svdup_f64(NP-2);
svfloat64_t v_NT_minus_2 = svdup_f64(NT-2);
svfloat64_t v_NR_minus_2 = svdup_f64(NR-2);
svbool_t mask_i_eq_1 = svcmpeq_f64(pg, v_iip, v_1); // if iip == 1
svbool_t mask_j_eq_1 = svcmpeq_f64(pg, v_jjt, v_1); // if jjt == 1
svbool_t mask_k_eq_1 = svcmpeq_f64(pg, v_kkr, v_1); // if kkr == 1
svbool_t mask_i_eq_N_minus_2 = svcmpeq_f64(pg, v_iip, v_NP_minus_2); // if iip == N-2
svbool_t mask_j_eq_N_minus_2 = svcmpeq_f64(pg, v_jjt, v_NT_minus_2); // if jjt == N-2
svbool_t mask_k_eq_N_minus_2 = svcmpeq_f64(pg, v_kkr, v_NR_minus_2); // if kkr == N-2
// 1 < iip < N-2
svbool_t mask_i_else = svand_b_z(pg,
svcmpgt_f64(pg, v_iip, v_1),
svcmplt_f64(pg, v_iip, v_NP_minus_2)
);
// 1 < jjt < N-2
svbool_t mask_j_else = svand_b_z(pg,
svcmpgt_f64(pg, v_jjt, v_1),
svcmplt_f64(pg, v_jjt, v_NT_minus_2)
);
// 1 < kkr < N-2
svbool_t mask_k_else = svand_b_z(pg,
svcmpgt_f64(pg, v_kkr, v_1),
svcmplt_f64(pg, v_kkr, v_NR_minus_2)
);
// if _i_eq_1 == true
v_pp1 = svsel_f64(mask_i_eq_1, calc_1d_stencil(mask_i_eq_1, v_c__, v_m__, v_DP_inv) , v_pp1);
v_pp2 = svsel_f64(mask_i_eq_1, calc_3d_stencil(mask_i_eq_1, v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), v_pp2);
// if_i_eq_N_minus_2 == true
v_pp1 = svsel_f64(mask_i_eq_N_minus_2, calc_3d_stencil(mask_i_eq_N_minus_2, v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
v_pp2 = svsel_f64(mask_i_eq_N_minus_2, calc_1d_stencil(mask_i_eq_N_minus_2, v_p__, v_c__, v_DP_inv) , v_pp2);
// else
v_pp1 = svsel_f64(mask_i_else, calc_3d_stencil(mask_i_else, v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
v_pp2 = svsel_f64(mask_i_else, calc_3d_stencil(mask_i_else, v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS) , v_pp2);
// if _j_eq_1 == true
v_pt1 = svsel_f64(mask_j_eq_1, calc_1d_stencil(mask_j_eq_1, v_c__, v__m_, v_DT_inv) , v_pt1);
v_pt2 = svsel_f64(mask_j_eq_1, calc_3d_stencil(mask_j_eq_1, v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), v_pt2);
// if _j_eq_N_minus_2 == true
v_pt1 = svsel_f64(mask_j_eq_N_minus_2, calc_3d_stencil(mask_j_eq_N_minus_2, v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
v_pt2 = svsel_f64(mask_j_eq_N_minus_2, calc_1d_stencil(mask_j_eq_N_minus_2, v__p_, v_c__, v_DT_inv) , v_pt2);
// else
v_pt1 = svsel_f64(mask_j_else, calc_3d_stencil(mask_j_else, v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
v_pt2 = svsel_f64(mask_j_else, calc_3d_stencil(mask_j_else, v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS) , v_pt2);
// if _k_eq_1 == true
v_pr1 = svsel_f64(mask_k_eq_1, calc_1d_stencil(mask_k_eq_1, v_c__, v___m, v_DR_inv) , v_pr1);
v_pr2 = svsel_f64(mask_k_eq_1, calc_3d_stencil(mask_k_eq_1, v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), v_pr2);
// if _k_eq_N_minus_2 == true
v_pr1 = svsel_f64(mask_k_eq_N_minus_2, calc_3d_stencil(mask_k_eq_N_minus_2, v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
v_pr2 = svsel_f64(mask_k_eq_N_minus_2, calc_1d_stencil(mask_k_eq_N_minus_2, v___p, v_c__, v_DR_inv) , v_pr2);
// else
v_pr1 = svsel_f64(mask_k_else, calc_3d_stencil(mask_k_else, v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
v_pr2 = svsel_f64(mask_k_else, calc_3d_stencil(mask_k_else, v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS) , v_pr2);
#endif
}
inline void vect_stencil_1st_3rd_apre_simd(
#if USE_ARM_SVE
svbool_t const& pg,
#endif
__mT& v_tau, __mT const& v_fac_a,__mT const& v_fac_b, __mT const& v_fac_c, __mT const& v_fac_f,
__mT const& v_T0v, __mT const& v_T0p, __mT const& v_T0t, __mT const& v_T0r, __mT const& v_fun, __mT const& v_change,
__mT const& v_pp1, __mT const& v_pp2, __mT const& v_pt1, __mT const& v_pt2, __mT const& v_pr1, __mT const& v_pr2,
__mT const& DP_inv, __mT const& DT_inv, __mT const& DR_inv){
#if USE_AVX512 || USE_AVX
// sigr = COEF * sqrt(v_fac_a)*v_T0v;
__mT sigr = _mmT_mul_pT(_mmT_mul_pT(COEF,_mmT_sqrt_pT(v_fac_a)),v_T0v);
// sigt = COEF * sqrt(v_fac_b)*v_T0v;
__mT sigt = _mmT_mul_pT(_mmT_mul_pT(COEF,_mmT_sqrt_pT(v_fac_b)),v_T0v);
// sigp = COEF * sqrt(v_fac_c)*v_T0v;
__mT sigp = _mmT_mul_pT(_mmT_mul_pT(COEF,_mmT_sqrt_pT(v_fac_c)),v_T0v);
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
__mT coe = _mmT_div_pT(v_1,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,DR_inv),_mmT_mul_pT(sigt,DT_inv)),_mmT_mul_pT(sigp,DP_inv)));
// coe becomes inf as sig* goes to 0
// if coe > 1e19, set coe = 1e19
coe = _mmT_min_pT(coe,coe_max);
// Htau = v_fac_a * square(v_T0r * v_tau + v_T0v * (v_pr1 + v_pr2)*0.5);
__mT Htau = _mmT_mul_pT(v_fac_a,my_square_v(_mmT_add_pT(_mmT_mul_pT(v_T0r,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pr1,v_pr2),v_half)))));
// Htau += v_fac_b * square(v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5))
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_b,my_square_v(_mmT_add_pT(_mmT_mul_pT(v_T0t,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half))))));
// Htau += v_fac_c * square(v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_c,my_square_v(_mmT_add_pT(_mmT_mul_pT(v_T0p,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half))))));
// tmp1 = ( v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5)
__mT tmp1 = _mmT_add_pT(_mmT_mul_pT(v_T0t,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half)));
// tmp2 = ( v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
__mT tmp2 = _mmT_add_pT(_mmT_mul_pT(v_T0p,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half)));
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
__mT tmp3 = _mmT_mul_pT(v_m2,_mmT_mul_pT(v_fac_f,_mmT_mul_pT(tmp1,tmp2)));
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
Htau = _mmT_sqrt_pT(_mmT_add_pT(Htau,tmp3));
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
__mT tmp = _mmT_mul_pT(v_half,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,_mmT_sub_pT(v_pr2,v_pr1)),_mmT_mul_pT(sigt,_mmT_sub_pT(v_pt2,v_pt1))),_mmT_mul_pT(sigp,_mmT_sub_pT(v_pp2,v_pp1))));
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
v_tau = _mmT_add_pT(v_tau,_mmT_mul_pT(coe,_mmT_add_pT(_mmT_sub_pT(v_fun,Htau),tmp)));
#if USE_AVX512
// mask if v_change != 1.0
__mmaskT mask = _mm512_cmp_pT_mask(v_change,v_1,_CMP_NEQ_OQ);
// set 1 if mask is true
v_tau = _mm512_mask_blend_pT(mask,v_tau,v_1);
#elif USE_AVX
__m256d mask = _mm256_cmp_pT(v_change, v_1,_CMP_NEQ_OQ);
v_tau = _mm256_blendv_pd(v_tau, v_1,mask);
#endif
#elif USE_ARM_SVE
__mT COEF = svdup_f64(1.0);
__mT v_1 = svdup_f64(1.0);
__mT v_half = svdup_f64(0.5);
__mT v_m2 = svdup_f64(-2.0);
__mT coe_max = svdup_f64(1e19);
// sigr = COEF * sqrt(v_fac_a)*v_T0v;
__mT sigr = svmul_f64_z(pg, svmul_f64_z(pg,COEF,svsqrt_f64_z(pg,v_fac_a)), v_T0v);
// sigt = COEF * sqrt(v_fac_b)*v_T0v;
__mT sigt = svmul_f64_z(pg, svmul_f64_z(pg,COEF,svsqrt_f64_z(pg,v_fac_b)), v_T0v);
// sigp = COEF * sqrt(v_fac_c)*v_T0v;
__mT sigp = svmul_f64_z(pg, svmul_f64_z(pg,COEF,svsqrt_f64_z(pg,v_fac_c)), v_T0v);
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
__mT coe = svdiv_f64_z(pg, v_1, svadd_f64_z(pg, svadd_f64_z(pg, svmul_f64_z(pg, sigr, DR_inv), svmul_f64_z(pg, sigt, DT_inv)), svmul_f64_z(pg, sigp, DP_inv)));
// coe becomes inf as sig* goes to 0
// if coe > 1e19, set coe = 1e19
coe = svmin_f64_z(pg, coe, coe_max);
// Htau = v_fac_a * square(v_T0r * v_tau + v_T0v * (v_pr1 + v_pr2)*0.5);
__mT Htau = svmul_f64_z(pg, v_fac_a, my_square_v(pg, svadd_f64_z(pg, \
svmul_f64_z(pg, v_T0r, v_tau), \
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pr1, v_pr2))))\
)\
);
// Htau += v_fac_b * square(v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5))
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_b, my_square_v(pg, svadd_f64_z(pg, \
svmul_f64_z(pg, v_T0t, v_tau), \
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2))))\
)\
)\
);
// Htau += v_fac_c * square(v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_c, my_square_v(pg, svadd_f64_z(pg, \
svmul_f64_z(pg, v_T0p, v_tau), \
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2))))\
)\
)\
);
// tmp1 = ( v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5)
__mT tmp1 = svadd_f64_z(pg, \
svmul_f64_z(pg, v_T0t, v_tau), \
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2)))\
);
// tmp2 = ( v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
__mT tmp2 = svadd_f64_z(pg, \
svmul_f64_z(pg, v_T0p, v_tau), \
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2)))\
);
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
__mT tmp3 = svmul_f64_z(pg, v_m2, svmul_f64_z(pg, v_fac_f, svmul_f64_z(pg, tmp1, tmp2)));
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
Htau = svsqrt_f64_z(pg, svadd_f64_z(pg, Htau, tmp3));
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
__mT tmp = svmul_f64_z(pg, v_half, svadd_f64_z(pg, \
svadd_f64_z(pg, \
svmul_f64_z(pg, sigr, svsub_f64_z(pg, v_pr2, v_pr1)), \
svmul_f64_z(pg, sigt, svsub_f64_z(pg, v_pt2, v_pt1))\
), \
svmul_f64_z(pg, sigp, svsub_f64_z(pg, v_pp2, v_pp1))\
)\
);
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
v_tau = svadd_f64_z(pg, v_tau, svmul_f64_z(pg, coe, svadd_f64_z(pg, svsub_f64_z(pg, v_fun, Htau), tmp)));
// mask = v_change != 1.0
svbool_t mask = svcmpne_f64(pg, v_change, v_1);
// v_tau = v_1 if mask is true (!= 1.0)
v_tau = svsel_f64(mask, v_1, v_tau);
#endif
}
inline void vect_stencil_1st_3rd_apre_simd_tele(
#if USE_ARM_SVE
svbool_t const& pg,
#endif
__mT& v_tau, __mT const& v_fac_a,__mT const& v_fac_b, __mT const& v_fac_c, __mT const& v_fac_f,
__mT const& v_fun, __mT const& v_change,
__mT const& v_pp1, __mT const& v_pp2, __mT const& v_pt1, __mT const& v_pt2, __mT const& v_pr1, __mT const& v_pr2,
__mT const& DP_inv, __mT const& DT_inv, __mT const& DR_inv){
#if USE_AVX512 || USE_AVX
// sigr = COEF * sqrt(v_fac_a);
__mT sigr = _mmT_mul_pT(COEF_TELE,_mmT_sqrt_pT(v_fac_a));
// sigt = COEF * sqrt(v_fac_b);
__mT sigt = _mmT_mul_pT(COEF_TELE,_mmT_sqrt_pT(v_fac_b));
// sigp = COEF * sqrt(v_fac_c);
__mT sigp = _mmT_mul_pT(COEF_TELE,_mmT_sqrt_pT(v_fac_c));
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
__mT coe = _mmT_div_pT(v_1,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,DR_inv),_mmT_mul_pT(sigt,DT_inv)),_mmT_mul_pT(sigp,DP_inv)));
// coe becomes inf as sig* goes to 0
// if coe > 1e19, set coe = 1e19
coe = _mmT_min_pT(coe,coe_max);
// Htau = v_fac_a * square((v_pr1 + v_pr2)*0.5);
__mT Htau = _mmT_mul_pT(v_fac_a,my_square_v(_mmT_mul_pT(_mmT_add_pT(v_pr1,v_pr2),v_half)));
// Htau += v_fac_b * square((v_pt1 + v_pt2)*0.5))
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_b,my_square_v(_mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half))));
// Htau += v_fac_c * square((v_pp1 + v_pp2)*0.5))
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_c,my_square_v(_mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half))));
// tmp1 = (v_pt1 + v_pt2)*0.5
__mT tmp1 = _mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half);
// tmp2 = (v_pp1 + v_pp2)*0.5
__mT tmp2 = _mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half);
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
__mT tmp3 = _mmT_mul_pT(v_m2,_mmT_mul_pT(v_fac_f,_mmT_mul_pT(tmp1,tmp2)));
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
Htau = _mmT_sqrt_pT(_mmT_add_pT(Htau,tmp3));
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
__mT tmp = _mmT_mul_pT(v_half,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,_mmT_sub_pT(v_pr2,v_pr1)),_mmT_mul_pT(sigt,_mmT_sub_pT(v_pt2,v_pt1))),_mmT_mul_pT(sigp,_mmT_sub_pT(v_pp2,v_pp1))));
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
v_tau = _mmT_add_pT(v_tau,_mmT_mul_pT(coe,_mmT_add_pT(_mmT_sub_pT(v_fun,Htau),tmp)));
#if USE_AVX512
// mask if v_change != 1.0
__mmaskT mask = _mm512_cmp_pT_mask(v_change,v_1,_CMP_NEQ_OQ);
// set 1 if mask is true
v_tau = _mm512_mask_blend_pT(mask,v_tau,v_1);
#elif USE_AVX
__m256d mask = _mm256_cmp_pT(v_change, v_1,_CMP_NEQ_OQ);
v_tau = _mm256_blendv_pd(v_tau, v_1,mask);
#endif
#elif USE_ARM_SVE
__mT COEF_TELE = svdup_f64(1.05);
__mT v_1 = svdup_f64(1.0);
__mT v_half = svdup_f64(0.5);
__mT v_m2 = svdup_f64(-2.0);
__mT coe_max = svdup_f64(1e19);
// sigr = COEF * sqrt(v_fac_a);
__mT sigr = svmul_f64_z(pg,COEF_TELE,svsqrt_f64_z(pg,v_fac_a));
// sigt = COEF * sqrt(v_fac_b);
__mT sigt = svmul_f64_z(pg,COEF_TELE,svsqrt_f64_z(pg,v_fac_b));
// sigp = COEF * sqrt(v_fac_c);
__mT sigp = svmul_f64_z(pg,COEF_TELE,svsqrt_f64_z(pg,v_fac_c));
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
__mT coe = svdiv_f64_z(pg, v_1, svadd_f64_z(pg, svadd_f64_z(pg, svmul_f64_z(pg, sigr, DR_inv), svmul_f64_z(pg, sigt, DT_inv)), svmul_f64_z(pg, sigp, DP_inv)));
// coe becomes inf as sig* goes to 0
// if coe > 1e19, set coe = 1e19
coe = svmin_f64_z(pg, coe, coe_max);
// Htau = v_fac_a * square((v_pr1 + v_pr2)*0.5);
__mT Htau = svmul_f64_z(pg, v_fac_a, my_square_v(pg, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pr1, v_pr2))));
// Htau += v_fac_b * square((v_pt1 + v_pt2)*0.5))
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_b, my_square_v(pg, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2)))));
// Htau += v_fac_c * square((v_pp1 + v_pp2)*0.5))
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_c, my_square_v(pg, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2)))));
// tmp1 = (v_pt1 + v_pt2)*0.5
__mT tmp1 = svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2));
// tmp2 = (v_pp1 + v_pp2)*0.5
__mT tmp2 = svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2));
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
__mT tmp3 = svmul_f64_z(pg, v_m2, svmul_f64_z(pg, v_fac_f, svmul_f64_z(pg, tmp1, tmp2)));
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
Htau = svsqrt_f64_z(pg, svadd_f64_z(pg, Htau, tmp3));
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
__mT tmp = svmul_f64_z(pg, v_half, svadd_f64_z(pg, \
svadd_f64_z(pg, \
svmul_f64_z(pg, sigr, svsub_f64_z(pg, v_pr2, v_pr1)), \
svmul_f64_z(pg, sigt, svsub_f64_z(pg, v_pt2, v_pt1))\
), \
svmul_f64_z(pg, sigp, svsub_f64_z(pg, v_pp2, v_pp1))\
)\
);
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
v_tau = svadd_f64_z(pg, v_tau, svmul_f64_z(pg, coe, svadd_f64_z(pg, svsub_f64_z(pg, v_fun, Htau), tmp)));
// mask = v_change != 1.0
svbool_t mask = svcmpne_f64(pg, v_change, v_1);
// v_tau = v_1 if mask is true (!= 1.0)
v_tau = svsel_f64(mask, v_1, v_tau);
#endif
}
// function for calculating the values on boundaries
inline void calculate_boundary_nodes_tele_simd(
#if USE_ARM_SVE
svbool_t const& pg,
#endif
__mT const& v_iip, __mT const& v_jjt, __mT const& v_kkr,
__mT & v_c__,
__mT const& v_p__, __mT const& v_m__, __mT const& v__p_, __mT const& v__m_, __mT const& v___p, __mT const& v___m,
__mT const& v_pp____, __mT const& v_mm____, __mT const& v___pp__, __mT const& v___mm__, __mT const& v_____pp, __mT const& v_____mm,
__mT const& v_change,
int const& loc_I, int const& loc_J, int const& loc_K
){
#if USE_AVX512
// mask for bottom boundary (v_kkr == 0 && v_change == 1.0)
__mmaskT mask_bot = _kand_maskT(\
_mm512_cmp_pT_mask(v_kkr, v_0, _CMP_EQ_OQ), \
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
// mask for top boundary (v_kkr == loc_K-1 && v_change == 1.0)
__mmaskT mask_top = _kand_maskT(\
_mm512_cmp_pT_mask(v_kkr, _mmT_set1_pT(loc_K-1), _CMP_EQ_OQ), \
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
// mask for south boundary (v_jjt == 0 && v_change == 1.0)
__mmaskT mask_south = _kand_maskT(\
_mm512_cmp_pT_mask(v_jjt, v_0, _CMP_EQ_OQ), \
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
// mask for north boundary (v_jjt == loc_J-1 && v_change == 1.0)
__mmaskT mask_north = _kand_maskT(\
_mm512_cmp_pT_mask(v_jjt, _mmT_set1_pT(loc_J-1), _CMP_EQ_OQ), \
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
// mask for west boundary (v_iip == 0 && v_change == 1.0)
__mmaskT mask_west = _kand_maskT(\
_mm512_cmp_pT_mask(v_iip, v_0, _CMP_EQ_OQ), \
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
// mask for east boundary (v_iip == loc_I-1 && v_change == 1.0)
__mmaskT mask_east = _kand_maskT(\
_mm512_cmp_pT_mask(v_iip, _mmT_set1_pT(loc_I-1), _CMP_EQ_OQ), \
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
// if mask_bot, v_c__ = max(2*v___p - v_____pp, v_____pp)
v_c__ = _mm512_mask_blend_pT(mask_bot, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v___p), v_____pp), v_____pp));
// if mask_top, v_c__ = max(2*v___m - v_____mm, v_____mm)
v_c__ = _mm512_mask_blend_pT(mask_top, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v___m), v_____mm), v_____mm));
// if mask_south, v_c__ = max(2*v__p_ - v___pp__, v___pp__)
v_c__ = _mm512_mask_blend_pT(mask_south, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v__p_), v___pp__), v___pp__));
// if mask_north, v_c__ = max(2*v__m_ - v___mm__, v___mm__)
v_c__ = _mm512_mask_blend_pT(mask_north, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v__m_), v___mm__), v___mm__));
// if mask_west, v_c__ = max(2*v_p__ - v_pp____, v_pp____)
v_c__ = _mm512_mask_blend_pT(mask_west, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v_p__), v_pp____), v_pp____));
// if mask_east, v_c__ = max(2*v_m__ - v_mm____, v_mm____)
v_c__ = _mm512_mask_blend_pT(mask_east, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v_m__), v_mm____), v_mm____));
#elif USE_AVX
// mask for bottom boundary (v_kkr == 0 && v_change == 1.0)
__m256d mask_bot = _mm256_and_pd(\
_mm256_cmp_pT(v_kkr, v_0, _CMP_EQ_OQ), \
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
// mask for top boundary (v_kkr == loc_K-1 && v_change == 1.0)
__m256d mask_top = _mm256_and_pd(\
_mm256_cmp_pT(v_kkr, _mmT_set1_pT(loc_K-1), _CMP_EQ_OQ), \
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
// mask for south boundary (v_jjt == 0 && v_change == 1.0)
__m256d mask_south = _mm256_and_pd(\
_mm256_cmp_pT(v_jjt, v_0, _CMP_EQ_OQ), \
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
// mask for north boundary (v_jjt == loc_J-1 && v_change == 1.0)
__m256d mask_north = _mm256_and_pd(\
_mm256_cmp_pT(v_jjt, _mmT_set1_pT(loc_J-1), _CMP_EQ_OQ), \
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
// mask for west boundary (v_iip == 0 && v_change == 1.0)
__m256d mask_west = _mm256_and_pd(\
_mm256_cmp_pT(v_iip, v_0, _CMP_EQ_OQ), \
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
// mask for east boundary (v_iip == loc_I-1 && v_change == 1.0)
__m256d mask_east = _mm256_and_pd(\
_mm256_cmp_pT(v_iip, _mmT_set1_pT(loc_I-1), _CMP_EQ_OQ), \
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
// if mask_bot, v_c__ = max(2*v___p - v_____pp, v_____pp)
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v___p), v_____pp), v_____pp), mask_bot);
// if mask_top, v_c__ = max(2*v___m - v_____mm, v_____mm)
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v___m), v_____mm), v_____mm), mask_top);
// if mask_south, v_c__ = max(2*v__p_ - v___pp__, v___pp__)
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v__p_), v___pp__), v___pp__), mask_south);
// if mask_north, v_c__ = max(2*v__m_ - v___mm__, v___mm__)
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v__m_), v___mm__), v___mm__), mask_north);
// if mask_west, v_c__ = max(2*v_p__ - v_pp____, v_pp____)
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v_p__), v_pp____), v_pp____), mask_west);
// if mask_east, v_c__ = max(2*v_m__ - v_mm____, v_mm____)
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v_m__), v_mm____), v_mm____), mask_east);
#elif USE_ARM_SVE
svfloat64_t v_0 = svdup_f64(0.0);
svfloat64_t v_1 = svdup_f64(1.0);
svfloat64_t v_2 = svdup_f64(2.0);
svfloat64_t v_loc_I_minus_1 = svdup_f64(loc_I-1);
svfloat64_t v_loc_J_minus_1 = svdup_f64(loc_J-1);
svfloat64_t v_loc_K_minus_1 = svdup_f64(loc_K-1);
// mask for bottom boundary
svbool_t mask_bot = svand_b_z(pg, \
svcmpeq_f64(pg, v_kkr, v_0), \
svcmpeq_f64(pg, v_change, v_1));
// mask for top boundary
svbool_t mask_top = svand_b_z(pg, \
svcmpeq_f64(pg, v_kkr, v_loc_K_minus_1), \
svcmpeq_f64(pg, v_change, v_1));
// mask for south boundary
svbool_t mask_south = svand_b_z(pg, \
svcmpeq_f64(pg, v_jjt, v_0), \
svcmpeq_f64(pg, v_change, v_1));
// mask for north boundary
svbool_t mask_north = svand_b_z(pg, \
svcmpeq_f64(pg, v_jjt, v_loc_J_minus_1), \
svcmpeq_f64(pg, v_change, v_1));
// mask for west boundary
svbool_t mask_west = svand_b_z(pg, \
svcmpeq_f64(pg, v_iip, v_0), \
svcmpeq_f64(pg, v_change, v_1));
// mask for east boundary
svbool_t mask_east = svand_b_z(pg, \
svcmpeq_f64(pg, v_iip, v_loc_I_minus_1), \
svcmpeq_f64(pg, v_change, v_1));
// if mask_bot, v_c__ = max(2*v___p - v_____pp, v_____pp)
v_c__ = svsel_f64(mask_bot, \
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v___p), v_____pp), v_____pp), \
v_c__);
// if mask_top, v_c__ = max(2*v___m - v_____mm, v_____mm)
v_c__ = svsel_f64(mask_top, \
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v___m), v_____mm), v_____mm), \
v_c__);
// if mask_south, v_c__ = max(2*v__p_ - v___pp__, v___pp__)
v_c__ = svsel_f64(mask_south, \
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v__p_), v___pp__), v___pp__), \
v_c__);
// if mask_north, v_c__ = max(2*v__m_ - v___mm__, v___mm__)
v_c__ = svsel_f64(mask_north, \
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v__m_), v___mm__), v___mm__), \
v_c__);
// if mask_west, v_c__ = max(2*v_p__ - v_pp____, v_pp____)
v_c__ = svsel_f64(mask_west, \
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v_p__), v_pp____), v_pp____), \
v_c__);
// if mask_east, v_c__ = max(2*v_m__ - v_mm____, v_mm____)
v_c__ = svsel_f64(mask_east, \
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v_m__), v_mm____), v_mm____), \
v_c__);
#endif
}
#if USE_AVX512 || USE_AVX
inline __mT load_mem_gen_to_mTd(CUSTOMREAL* a, int* ijk){
CUSTOMREAL dump_[NSIMD];
for (int i=0; i<NSIMD; i++){
dump_[i] = a[ijk[i]];
}
return _mmT_loadu_pT(dump_);
}
inline __mT load_mem_bool_to_mTd(bool* a, int* ijk){
CUSTOMREAL dump_[NSIMD];
for (int i=0; i<NSIMD; i++){
dump_[i] = (CUSTOMREAL) a[ijk[i]];
}
return _mmT_loadu_pT(dump_);
}
#elif USE_ARM_SVE
inline __mT load_mem_gen_to_mTd(svbool_t const& pg, CUSTOMREAL* a, uint64_t* ijk){
svuint64_t v_ijk = svld1_u64(pg, ijk);
return svld1_gather_u64index_f64(pg, a, v_ijk);
}
inline __mT load_mem_bool_to_mTd(svbool_t const& pg, bool* a, int* ijk){
CUSTOMREAL dump_[NSIMD];
for (int i=0; i<NSIMD; i++){
dump_[i] = (CUSTOMREAL) a[ijk[i]];
}
return svld1_f64(pg, dump_); // change this to gather load
}
#endif // __ARM_FEATURE_SVE
#endif // USE_SIMD
#endif // VECTORIZATED_SWEEP_H

10
include/version.h.in Normal file
View File

@@ -0,0 +1,10 @@
#ifndef VERSION_H
#define VERSION_H
#define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR@
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH@
#define GIT_COMMIT "@GIT_COMMIT_HASH@"
#endif // VERSION_H