initial upload
This commit is contained in:
397
include/1d_models.h
Normal file
397
include/1d_models.h
Normal file
@@ -0,0 +1,397 @@
|
||||
#ifndef ONE_D_MODELS_H
|
||||
#define ONE_D_MODELS_H
|
||||
|
||||
#include <vector>
|
||||
#include "config.h"
|
||||
|
||||
|
||||
// ak135 http://rses.anu.edu.au/seismology/ak135/ak135t.html
|
||||
// depth(km), P velocity(km/s)
|
||||
inline const std::vector<std::vector<CUSTOMREAL>> model_1d_ak135 \
|
||||
{{ 0.000, 5.8000},
|
||||
{ 20.000, 5.8000},
|
||||
{ 20.000, 6.5000},
|
||||
{ 35.000, 6.5000},
|
||||
{ 35.000, 8.0400},
|
||||
{ 77.500, 8.0450},
|
||||
{ 120.000, 8.0500},
|
||||
{ 165.000, 8.1750},
|
||||
{ 210.000, 8.3000},
|
||||
{ 210.000, 8.3000},
|
||||
{ 260.000, 8.4825},
|
||||
{ 310.000, 8.6650},
|
||||
{ 360.000, 8.8475},
|
||||
{ 410.000, 9.0300},
|
||||
{ 410.000, 9.3600},
|
||||
{ 460.000, 9.5280},
|
||||
{ 510.000, 9.6960},
|
||||
{ 560.000, 9.8640},
|
||||
{ 610.000, 10.0320},
|
||||
{ 660.000, 10.2000},
|
||||
{ 660.000, 10.7900},
|
||||
{ 710.000, 10.9229},
|
||||
{ 760.000, 11.0558},
|
||||
{ 809.500, 11.1353},
|
||||
{ 859.000, 11.2221},
|
||||
{ 908.500, 11.3068},
|
||||
{ 958.000, 11.3896},
|
||||
{ 1007.500, 11.4705},
|
||||
{ 1057.000, 11.5495},
|
||||
{ 1106.500, 11.6269},
|
||||
{ 1156.000, 11.7026},
|
||||
{ 1205.500, 11.7766},
|
||||
{ 1255.000, 11.8491},
|
||||
{ 1304.500, 11.9200},
|
||||
{ 1354.000, 11.9895},
|
||||
{ 1403.500, 12.0577},
|
||||
{ 1453.000, 12.1245},
|
||||
{ 1502.500, 12.1912},
|
||||
{ 1552.000, 12.2550},
|
||||
{ 1601.500, 12.3185},
|
||||
{ 1651.000, 12.3819},
|
||||
{ 1700.500, 12.4426},
|
||||
{ 1750.000, 12.5031},
|
||||
{ 1799.500, 12.5631},
|
||||
{ 1849.000, 12.6221},
|
||||
{ 1898.500, 12.6804},
|
||||
{ 1948.000, 12.7382},
|
||||
{ 1997.500, 12.7956},
|
||||
{ 2047.000, 12.8526},
|
||||
{ 2096.500, 12.9096},
|
||||
{ 2146.000, 12.9668},
|
||||
{ 2195.500, 13.0222},
|
||||
{ 2245.000, 13.0783},
|
||||
{ 2294.500, 13.1336},
|
||||
{ 2344.000, 13.1894},
|
||||
{ 2393.500, 13.2465},
|
||||
{ 2443.000, 13.3018},
|
||||
{ 2492.500, 13.3585},
|
||||
{ 2542.000, 13.4156},
|
||||
{ 2591.500, 13.4741},
|
||||
{ 2640.000, 13.5312},
|
||||
{ 2690.000, 13.5900},
|
||||
{ 2740.000, 13.6494},
|
||||
{ 2740.000, 13.6494},
|
||||
{ 2789.670, 13.6530},
|
||||
{ 2839.330, 13.6566},
|
||||
{ 2891.500, 13.6602},
|
||||
{ 2891.500, 8.0000},
|
||||
{ 2939.330, 8.0382},
|
||||
{ 2989.660, 8.1283},
|
||||
{ 3039.990, 8.2213},
|
||||
{ 3090.320, 8.3122},
|
||||
{ 3140.660, 8.4001},
|
||||
{ 3190.990, 8.4861},
|
||||
{ 3241.320, 8.5692},
|
||||
{ 3291.650, 8.6496},
|
||||
{ 3341.980, 8.7283},
|
||||
{ 3392.310, 8.8036},
|
||||
{ 3442.640, 8.8761},
|
||||
{ 3492.970, 8.9461},
|
||||
{ 3543.300, 9.0138},
|
||||
{ 3593.640, 9.0792},
|
||||
{ 3643.970, 9.1426},
|
||||
{ 3694.300, 9.2042},
|
||||
{ 3744.630, 9.2634},
|
||||
{ 3794.960, 9.3205},
|
||||
{ 3845.290, 9.3760},
|
||||
{ 3895.620, 9.4297},
|
||||
{ 3945.950, 9.4814},
|
||||
{ 3996.280, 9.5306},
|
||||
{ 4046.620, 9.5777},
|
||||
{ 4096.950, 9.6232},
|
||||
{ 4147.280, 9.6673},
|
||||
{ 4197.610, 9.7100},
|
||||
{ 4247.940, 9.7513},
|
||||
{ 4298.270, 9.7914},
|
||||
{ 4348.600, 9.8304},
|
||||
{ 4398.930, 9.8682},
|
||||
{ 4449.260, 9.9051},
|
||||
{ 4499.600, 9.9410},
|
||||
{ 4549.930, 9.9761},
|
||||
{ 4600.260, 10.0103},
|
||||
{ 4650.590, 10.0439},
|
||||
{ 4700.920, 10.0768},
|
||||
{ 4801.580, 10.1415},
|
||||
{ 4851.910, 10.1739},
|
||||
{ 4902.240, 10.2049},
|
||||
{ 4952.580, 10.2329},
|
||||
{ 5002.910, 10.2565},
|
||||
{ 5053.240, 10.2745},
|
||||
{ 5103.570, 10.2854},
|
||||
{ 5153.500, 10.2890},
|
||||
{ 5153.500, 11.0427},
|
||||
{ 5204.610, 11.0585},
|
||||
{ 5255.320, 11.0718},
|
||||
{ 5306.040, 11.0850},
|
||||
{ 5356.750, 11.0983},
|
||||
{ 5407.460, 11.1166},
|
||||
{ 5458.170, 11.1316},
|
||||
{ 5508.890, 11.1457},
|
||||
{ 5559.600, 11.1590},
|
||||
{ 5610.310, 11.1715},
|
||||
{ 5661.020, 11.1832},
|
||||
{ 5711.740, 11.1941},
|
||||
{ 5813.160, 11.2134},
|
||||
{ 5863.870, 11.2219},
|
||||
{ 5914.590, 11.2295},
|
||||
{ 5965.300, 11.2364},
|
||||
{ 6016.010, 11.2424},
|
||||
{ 6066.720, 11.2477},
|
||||
{ 6117.440, 11.2521},
|
||||
{ 6168.150, 11.2557},
|
||||
{ 6218.860, 11.2586},
|
||||
{ 6269.570, 11.2606},
|
||||
{ 6320.290, 11.2618},
|
||||
{ 6371.000, 11.2622}};
|
||||
|
||||
|
||||
// iasp91 model from http://ds.iris.edu/spud/earthmodel/9991809
|
||||
inline const std::vector<std::vector<CUSTOMREAL>> model_1d_iasp91 \
|
||||
{{0.00, 5.8000},
|
||||
{1.00, 5.8000},
|
||||
{2.00, 5.8000},
|
||||
{3.00, 5.8000},
|
||||
{4.00, 5.8000},
|
||||
{5.00, 5.8000},
|
||||
{6.00, 5.8000},
|
||||
{7.00, 5.8000},
|
||||
{8.00, 5.8000},
|
||||
{9.00, 5.8000},
|
||||
{10.00, 5.8000},
|
||||
{11.00, 5.8000},
|
||||
{12.00, 5.8000},
|
||||
{13.00, 5.8000},
|
||||
{14.00, 5.8000},
|
||||
{15.00, 5.8000},
|
||||
{16.00, 5.8000},
|
||||
{17.00, 5.8000},
|
||||
{18.00, 5.8000},
|
||||
{19.00, 5.8000},
|
||||
{20.00, 5.8000},
|
||||
{20.00, 6.5000},
|
||||
{21.00, 6.5000},
|
||||
{22.00, 6.5000},
|
||||
{23.00, 6.5000},
|
||||
{24.00, 6.5000},
|
||||
{25.00, 6.5000},
|
||||
{26.00, 6.5000},
|
||||
{27.00, 6.5000},
|
||||
{28.00, 6.5000},
|
||||
{29.00, 6.5000},
|
||||
{30.00, 6.5000},
|
||||
{31.00, 6.5000},
|
||||
{32.00, 6.5000},
|
||||
{33.00, 6.5000},
|
||||
{34.00, 6.5000},
|
||||
{35.00, 6.5000},
|
||||
{35.00, 8.0400},
|
||||
{40.00, 8.0406},
|
||||
{45.00, 8.0412},
|
||||
{50.00, 8.0418},
|
||||
{60.00, 8.0429},
|
||||
{70.00, 8.0441},
|
||||
{80.00, 8.0453},
|
||||
{90.00, 8.0465},
|
||||
{100.00, 8.0476},
|
||||
{110.00, 8.0488},
|
||||
{120.00, 8.0500},
|
||||
{120.00, 8.0500},
|
||||
{130.00, 8.0778},
|
||||
{140.00, 8.1056},
|
||||
{150.00, 8.1333},
|
||||
{160.00, 8.1611},
|
||||
{170.00, 8.1889},
|
||||
{180.00, 8.2167},
|
||||
{190.00, 8.2444},
|
||||
{200.00, 8.2722},
|
||||
{210.00, 8.3000},
|
||||
{210.00, 8.3000},
|
||||
{220.00, 8.3365},
|
||||
{230.00, 8.3730},
|
||||
{240.00, 8.4095},
|
||||
{250.00, 8.4460},
|
||||
{260.00, 8.4825},
|
||||
{270.00, 8.5190},
|
||||
{280.00, 8.5555},
|
||||
{290.00, 8.5920},
|
||||
{300.00, 8.6285},
|
||||
{310.00, 8.6650},
|
||||
{320.00, 8.7015},
|
||||
{330.00, 8.7380},
|
||||
{340.00, 8.7745},
|
||||
{350.00, 8.8110},
|
||||
{360.00, 8.8475},
|
||||
{370.00, 8.8840},
|
||||
{380.00, 8.9205},
|
||||
{390.00, 8.9570},
|
||||
{400.00, 8.9935},
|
||||
{410.00, 9.0300},
|
||||
{410.00, 9.3600},
|
||||
{420.00, 9.3936},
|
||||
{430.00, 9.4272},
|
||||
{440.00, 9.4608},
|
||||
{450.00, 9.4944},
|
||||
{460.00, 9.5280},
|
||||
{470.00, 9.5616},
|
||||
{480.00, 9.5952},
|
||||
{490.00, 9.6288},
|
||||
{500.00, 9.6624},
|
||||
{510.00, 9.6960},
|
||||
{520.00, 9.7296},
|
||||
{530.00, 9.7632},
|
||||
{540.00, 9.7968},
|
||||
{550.00, 9.8304},
|
||||
{560.00, 9.8640},
|
||||
{570.00, 9.8976},
|
||||
{580.00, 9.9312},
|
||||
{590.00, 9.9648},
|
||||
{600.00, 9.9984},
|
||||
{610.00, 10.0320},
|
||||
{620.00, 10.0656},
|
||||
{630.00, 10.0992},
|
||||
{640.00, 10.1328},
|
||||
{650.00, 10.1664},
|
||||
{660.00, 10.2000},
|
||||
{660.00, 10.7900},
|
||||
{670.00, 10.8166},
|
||||
{680.00, 10.8432},
|
||||
{690.00, 10.8697},
|
||||
{700.00, 10.8963},
|
||||
{710.00, 10.9229},
|
||||
{720.00, 10.9495},
|
||||
{730.00, 10.9761},
|
||||
{740.00, 11.0026},
|
||||
{750.00, 11.0292},
|
||||
{760.00, 11.0558},
|
||||
{760.00, 11.0558},
|
||||
{770.00, 11.0738},
|
||||
{780.00, 11.0917},
|
||||
{790.00, 11.1095},
|
||||
{800.00, 11.1272},
|
||||
{900.00, 11.2997},
|
||||
{1000.00, 11.4640},
|
||||
{1100.00, 11.6208},
|
||||
{1200.00, 11.7707},
|
||||
{1300.00, 11.9142},
|
||||
{1400.00, 12.0521},
|
||||
{1500.00, 12.1849},
|
||||
{2000.00, 12.7944},
|
||||
{2500.00, 13.3697},
|
||||
{2700.00, 13.6076},
|
||||
{2740.00, 13.6564},
|
||||
{2740.00, 13.6564},
|
||||
{2750.00, 13.6587},
|
||||
{2800.00, 13.6703},
|
||||
{2850.00, 13.6818},
|
||||
{2889.00, 13.6908},
|
||||
{2889.00, 8.0088},
|
||||
{2900.00, 8.0280},
|
||||
{3000.00, 8.1995},
|
||||
{3100.00, 8.3642},
|
||||
{3200.00, 8.5222},
|
||||
{3300.00, 8.6735},
|
||||
{3400.00, 8.8180},
|
||||
{3500.00, 8.9558},
|
||||
{4000.00, 9.5437},
|
||||
{4500.00, 9.9633},
|
||||
{5153.90, 10.2578},
|
||||
{5153.90, 11.0914},
|
||||
{5500.00, 11.1644},
|
||||
{6000.00, 11.2270},
|
||||
{6371.00, 11.2409}};
|
||||
|
||||
|
||||
// fortran test model
|
||||
inline const std::vector<std::vector<CUSTOMREAL>> model_1d_prem \
|
||||
= {{ 0.00, 5.80000},
|
||||
{ 15.00, 5.80000},
|
||||
{ 15.00, 6.80000},
|
||||
{ 24.40, 6.80000},
|
||||
{ 24.40, 8.11061},
|
||||
{ 40.00, 8.10119},
|
||||
{ 60.00, 8.08907},
|
||||
{ 80.00, 8.07688},
|
||||
{ 115.00, 8.05540},
|
||||
{ 150.00, 8.03370},
|
||||
{ 185.00, 8.01180},
|
||||
{ 220.00, 7.98970},
|
||||
{ 220.00, 8.55896},
|
||||
{ 265.00, 8.64552},
|
||||
{ 310.00, 8.73209},
|
||||
{ 355.00, 8.81867},
|
||||
{ 400.00, 8.90522},
|
||||
{ 400.00, 9.13397},
|
||||
{ 450.00, 9.38990},
|
||||
{ 500.00, 9.64588},
|
||||
{ 550.00, 9.90185},
|
||||
{ 600.00, 10.15782},
|
||||
{ 635.00, 10.21203},
|
||||
{ 670.00, 10.26622},
|
||||
{ 670.00, 10.75131},
|
||||
{ 721.00, 10.91005},
|
||||
{ 771.00, 11.06557},
|
||||
{ 871.00, 11.24490},
|
||||
{ 971.00, 11.41560},
|
||||
{ 1071.00, 11.57828},
|
||||
{ 1171.00, 11.73357},
|
||||
{ 1271.00, 11.88209},
|
||||
{ 1371.00, 12.02445},
|
||||
{ 1471.00, 12.16126},
|
||||
{ 1571.00, 12.29316},
|
||||
{ 1671.00, 12.42075},
|
||||
{ 1771.00, 12.54466},
|
||||
{ 1871.00, 12.66550},
|
||||
{ 1971.00, 12.78389},
|
||||
{ 2071.00, 12.90045},
|
||||
{ 2171.00, 13.01579},
|
||||
{ 2271.00, 13.13055},
|
||||
{ 2371.00, 13.24532},
|
||||
{ 2471.00, 13.36074},
|
||||
{ 2571.00, 13.47742},
|
||||
{ 2671.00, 13.59597},
|
||||
{ 2741.00, 13.68041},
|
||||
{ 2771.00, 13.68753},
|
||||
{ 2871.00, 13.71168},
|
||||
{ 2891.00, 13.71660},
|
||||
{ 2891.00, 8.06482},
|
||||
{ 2971.00, 8.19939},
|
||||
{ 3071.00, 8.36019},
|
||||
{ 3171.00, 8.51298},
|
||||
{ 3271.00, 8.65805},
|
||||
{ 3371.00, 8.79573},
|
||||
{ 3471.00, 8.92632},
|
||||
{ 3571.00, 9.05015},
|
||||
{ 3671.00, 9.16752},
|
||||
{ 3771.00, 9.27867},
|
||||
{ 3871.00, 9.38418},
|
||||
{ 3971.00, 9.48409},
|
||||
{ 4071.00, 9.57881},
|
||||
{ 4171.00, 9.66865},
|
||||
{ 4271.00, 9.75393},
|
||||
{ 4371.00, 9.83496},
|
||||
{ 4471.00, 9.91206},
|
||||
{ 4571.00, 9.98554},
|
||||
{ 4671.00, 10.05572},
|
||||
{ 4771.00, 10.12291},
|
||||
{ 4871.00, 10.18743},
|
||||
{ 4971.00, 10.24959},
|
||||
{ 5071.00, 10.30971},
|
||||
{ 5149.50, 10.35568},
|
||||
{ 5149.50, 11.02827},
|
||||
{ 5171.00, 11.03643},
|
||||
{ 5271.00, 11.07249},
|
||||
{ 5371.00, 11.10542},
|
||||
{ 5471.00, 11.13521},
|
||||
{ 5571.00, 11.16186},
|
||||
{ 5671.00, 11.18538},
|
||||
{ 5771.00, 11.20576},
|
||||
{ 5871.00, 11.22301},
|
||||
{ 5971.00, 11.23712},
|
||||
{ 6071.00, 11.24809},
|
||||
{ 6171.00, 11.25593},
|
||||
{ 6271.00, 11.26064},
|
||||
{ 6371.00, 11.26220}};
|
||||
|
||||
|
||||
#endif
|
||||
332
include/config.h
Normal file
332
include/config.h
Normal file
@@ -0,0 +1,332 @@
|
||||
#ifndef CONFIG_H
|
||||
#define CONFIG_H
|
||||
|
||||
#include <math.h>
|
||||
#include <mpi.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <limits>
|
||||
#include <iostream>
|
||||
#include "version.h"
|
||||
|
||||
// custom floating point accuracy
|
||||
#ifdef SINGLE_PRECISION
|
||||
#define CUSTOMREAL float
|
||||
#define MPI_CR MPI_FLOAT
|
||||
#else
|
||||
#define CUSTOMREAL double
|
||||
#define MPI_CR MPI_DOUBLE
|
||||
#endif
|
||||
|
||||
|
||||
#define MPI_DUMMY_TAG 1000
|
||||
|
||||
// version
|
||||
inline const std::string TOMOATT_VERSION = std::to_string(PROJECT_VERSION_MAJOR) +
|
||||
"." + std::to_string(PROJECT_VERSION_MINOR) +
|
||||
"." + std::to_string(PROJECT_VERSION_PATCH);
|
||||
|
||||
inline int loc_I, loc_J, loc_K;
|
||||
inline int loc_I_vis, loc_J_vis, loc_K_vis;
|
||||
inline int loc_I_excl_ghost, loc_J_excl_ghost, loc_K_excl_ghost;
|
||||
inline int n_inv_grids;
|
||||
inline int n_inv_I_loc, n_inv_J_loc, n_inv_K_loc;
|
||||
inline int n_inv_I_loc_ani, n_inv_J_loc_ani, n_inv_K_loc_ani;
|
||||
|
||||
// 3d indices to 1d index
|
||||
#define I2V(A,B,C) ((C)*loc_I*loc_J + (B)*loc_I + A)
|
||||
inline void V2I(const int& ijk, int& i, int& j, int& k) {
|
||||
k = ijk / (loc_I * loc_J);
|
||||
j = (ijk - k * loc_I * loc_J) / loc_I;
|
||||
i = ijk - k * loc_I * loc_J - j * loc_I;
|
||||
}
|
||||
|
||||
#define I2V_INV_GRIDS(A,B,C,D) ((D)*n_inv_I_loc*n_inv_J_loc*n_inv_K_loc + (C)*n_inv_I_loc*n_inv_J_loc + (B)*n_inv_I_loc + A)
|
||||
#define I2V_INV_KNL(A,B,C) ((C)*n_inv_I_loc*n_inv_J_loc + (B)*n_inv_I_loc + A)
|
||||
#define I2V_INV_GRIDS_1DK(A,B) ((B)*n_inv_K_loc + (A))
|
||||
#define I2V_INV_GRIDS_2DJ(A,B,C) ((C)*n_inv_J_loc*n_inv_K_loc + (B)*n_inv_J_loc + (A))
|
||||
#define I2V_INV_GRIDS_2DI(A,B,C) ((C)*n_inv_I_loc*n_inv_K_loc + (B)*n_inv_I_loc + (A))
|
||||
|
||||
#define I2V_INV_ANI_GRIDS(A,B,C,D) ((D)*n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani + (C)*n_inv_I_loc_ani*n_inv_J_loc_ani + (B)*n_inv_I_loc_ani + A)
|
||||
#define I2V_INV_ANI_KNL(A,B,C) ((C)*n_inv_I_loc_ani*n_inv_J_loc_ani + (B)*n_inv_I_loc_ani + A)
|
||||
#define I2V_INV_ANI_GRIDS_1DK(A,B) ((B)*n_inv_K_loc_ani + (A))
|
||||
#define I2V_INV_ANI_GRIDS_1DJ(A,B,C) ((C)*n_inv_J_loc_ani*n_inv_K_loc_ani + (B)*n_inv_J_loc_ani + (A))
|
||||
#define I2V_INV_ANI_GRIDS_1DI(A,B,C) ((C)*n_inv_I_loc_ani*n_inv_K_loc_ani + (B)*n_inv_I_loc_ani + (A))
|
||||
|
||||
#define I2V_INV_GRIDS_1D_GENERIC(A,B,C_ninv) ((B)*C_ninv + (A))
|
||||
#define I2V_INV_GRIDS_2D_GENERIC(A,B,C,D_ninv,E_inv) ((C)*D_ninv*E_inv + (B)*D_ninv + (A))
|
||||
|
||||
#define I2V_EXCL_GHOST(A,B,C) ((C)* loc_I_excl_ghost * loc_J_excl_ghost + (B)* loc_I_excl_ghost + A)
|
||||
#define I2V_VIS(A,B,C) ((C)* loc_I_vis * loc_J_vis + (B)* loc_I_vis + A)
|
||||
#define I2V_3D(A,B,C) ((C)* loc_I_excl_ghost*loc_J_excl_ghost + (B)* loc_I_excl_ghost + A)
|
||||
#define I2V_ELM_CONN(A,B,C) ((C)*(loc_I_vis-1)*(loc_J_vis-1) + (B)*(loc_I_vis-1) + A)
|
||||
#define I2VLBFGS(A,B,C,D) ((A)*loc_I*loc_J*loc_K + (D)*loc_I*loc_J + (C)*loc_I + B)
|
||||
|
||||
// 2d indices to 1d index
|
||||
#define IJ2V(A,B,C) (C*loc_I*loc_J + (B)*loc_I + A)
|
||||
#define JK2V(A,B,C) (C*loc_J*loc_K + (B)*loc_J + A)
|
||||
#define IK2V(A,B,C) (C*loc_I*loc_K + (B)*loc_I + A)
|
||||
|
||||
inline const CUSTOMREAL eps = 1e-12;
|
||||
inline const CUSTOMREAL epsAdj = 1e-6;
|
||||
|
||||
inline bool isZero(CUSTOMREAL x) {
|
||||
return fabs(x) < eps;
|
||||
}
|
||||
|
||||
inline bool isNegative(CUSTOMREAL x) {
|
||||
return (x) < -eps;
|
||||
}
|
||||
|
||||
inline bool isPositive(CUSTOMREAL x) {
|
||||
return (x) > eps;
|
||||
}
|
||||
|
||||
inline bool isZeroAdj(CUSTOMREAL x) {
|
||||
return fabs(x) < epsAdj;
|
||||
}
|
||||
|
||||
// constants
|
||||
inline const CUSTOMREAL PI = 3.14159265358979323846264338327950288;
|
||||
inline const CUSTOMREAL DEG2RAD = PI/180.0;
|
||||
inline const CUSTOMREAL RAD2DEG = 180.0/PI;
|
||||
inline const CUSTOMREAL M2KM = 0.001;
|
||||
inline const CUSTOMREAL _0_CR = 0.0;
|
||||
inline const CUSTOMREAL _0_5_CR = 0.5;
|
||||
inline const CUSTOMREAL _1_CR = 1.0;
|
||||
inline const CUSTOMREAL _1_5_CR = 1.5;
|
||||
inline const CUSTOMREAL _2_CR = 2.0;
|
||||
inline const CUSTOMREAL _3_CR = 3.0;
|
||||
inline const CUSTOMREAL _4_CR = 4.0;
|
||||
inline const CUSTOMREAL _8_CR = 8.0;
|
||||
inline const CUSTOMREAL TAU_INITIAL_VAL = 1.0;
|
||||
inline const CUSTOMREAL TAU_INF_VAL = 20.0;
|
||||
inline const int SWEEPING_COEFF = 1.0; // coefficient for calculationg sigr/sigt/sigp for cpu
|
||||
|
||||
// ASCII output precision
|
||||
typedef std::numeric_limits< CUSTOMREAL > dbl;
|
||||
inline const int ASCII_OUTPUT_PRECISION = dbl::max_digits10;
|
||||
|
||||
// radious of the earth in km
|
||||
//inline CUSTOMREAL R_earth = 6378.1370;
|
||||
inline CUSTOMREAL R_earth = 6371.0; // for compatibility with fortran code
|
||||
inline const CUSTOMREAL GAMMA = 0.0;
|
||||
inline const CUSTOMREAL r_kermel_mask = 40.0;
|
||||
inline CUSTOMREAL step_length_init = 0.01; // update step size limit
|
||||
inline CUSTOMREAL step_length_init_sc = 0.001; // update step size limit (for station correction)
|
||||
inline CUSTOMREAL step_length_decay = 0.9;
|
||||
inline CUSTOMREAL step_length_down = 0.5;
|
||||
inline CUSTOMREAL step_length_up = 1.2;
|
||||
inline CUSTOMREAL step_length_lbfgs;
|
||||
inline int step_method = 1; // 0:modulate according to obj, 1: modulate according to gradient direction.
|
||||
inline CUSTOMREAL step_length_gradient_angle = 120.0;
|
||||
inline const int OBJ_DEFINED = 0;
|
||||
inline const int GRADIENT_DEFINED = 1;
|
||||
|
||||
// halve steping params
|
||||
inline const CUSTOMREAL HALVE_STEP_RATIO = 0.7;
|
||||
inline const CUSTOMREAL MAX_DIFF_RATIO_VOBJ = 0.8; // maximum difference ratio between vobj_t+1 and vobj_t
|
||||
inline const CUSTOMREAL HALVE_STEP_RESTORAION_RATIO = 0.7; // no restoration if == HALVE_STEP_RATIO
|
||||
|
||||
// RUN MODE TYPE FLAG
|
||||
inline const int ONLY_FORWARD = 0;
|
||||
inline const int DO_INVERSION = 1;
|
||||
inline const int SRC_RELOCATION = 2;
|
||||
inline const int INV_RELOC = 3;
|
||||
inline const int ONED_INVERSION = 4;
|
||||
inline const int TELESEIS_PREPROCESS = -1; // hiden function
|
||||
|
||||
// SWEEPING TYPE FLAG
|
||||
inline const int SWEEP_TYPE_LEGACY = 0;
|
||||
inline const int SWEEP_TYPE_LEVEL = 1;
|
||||
inline bool hybrid_stencil_order = false; // if true, code at first run 1st order, then change to 3rd order (Dong Cui 2020)
|
||||
|
||||
// STENCIL TYPE
|
||||
inline const int NON_UPWIND = 0;
|
||||
inline const int UPWIND = 1;
|
||||
|
||||
// convert depth <-> radius
|
||||
inline CUSTOMREAL depth2radius(CUSTOMREAL depth) {
|
||||
return R_earth - depth;
|
||||
}
|
||||
inline CUSTOMREAL radius2depth(CUSTOMREAL radius) {
|
||||
return R_earth - radius;
|
||||
}
|
||||
|
||||
// parallel calculation parameters
|
||||
// here we use so called "inline variables" for defining global variables
|
||||
inline int ngrid_i = 0; // number of grid points in i direction
|
||||
inline int ngrid_j = 0; // number of grid points in j direction
|
||||
inline int ngrid_k = 0; // number of grid points in k direction
|
||||
//inline int ngrid_i_inv = 0; // number of inversion grid points in i direction # DUPLICATED with n_inv_I_loc as all the subdomains have the same number of inversion grid points
|
||||
//inline int ngrid_j_inv = 0; // number of inversion grid points in j direction # DUPLICATED with n_inv_J_loc
|
||||
//inline int ngrid_k_inv = 0; // number of inversion grid points in k direction # DUPLICATED with n_inv_K_loc
|
||||
//inline int ngrid_i_inv_ani = 0; // number of inversion grid points in i direction for anisotropy # DUPLICATED with n_inv_I_loc_ani
|
||||
//inline int ngrid_j_inv_ani = 0; // number of inversion grid points in j direction for anisotropy # DUPLICATED with n_inv_J_loc_ani
|
||||
//inline int ngrid_k_inv_ani = 0; // number of inversion grid points in k direction for anisotropy # DUPLICATED with n_inv_K_loc_ani
|
||||
|
||||
// inversion grid type flags
|
||||
#define INV_GRID_REGULAR 0
|
||||
#define INV_GRID_FLEX 1
|
||||
#define INV_GRID_TRAPE 2
|
||||
|
||||
// mpi parameters
|
||||
inline int world_nprocs; // total number of processes (all groups)
|
||||
inline int world_rank; // mpi rank of this process (all groups)
|
||||
inline int sim_nprocs; // number of processes in this simulation group
|
||||
inline int sim_rank; // mpi rank of this process in this simulation group
|
||||
inline int inter_sim_rank;
|
||||
inline int sub_nprocs; // total number of processes
|
||||
inline int sub_rank; // mpi rank of this process
|
||||
inline int inter_sub_rank; // mpi rank of this process in the inter-subdomain communicator
|
||||
inline int inter_sub_nprocs; // number of processes in the inter-subdomain communicator
|
||||
inline int nprocs; // = n subdomains
|
||||
inline int myrank; // = id subdomain
|
||||
inline MPI_Comm sim_comm, inter_sim_comm, sub_comm, inter_sub_comm; // mpi communicator for simulation, inter-simulation, subdomain, and inter subdomains
|
||||
inline int n_sims = 1; // number of mpi groups for simultaneous runs
|
||||
inline int n_procs_each_sim = 1; // number of processes in each simulation group
|
||||
inline int n_subdomains = 1; // number of subdomains
|
||||
inline int n_subprocs = 1; // number of sub processes in each subdomain
|
||||
inline int ndiv_i = 1; // number of divisions in x direction
|
||||
inline int ndiv_j = 1; // number of divisions in y direction
|
||||
inline int ndiv_k = 1; // number of divisions in z direction
|
||||
inline int id_sim = 0; // simultaneous run id (not equal to src id) (parallel level 1)
|
||||
inline int id_subdomain = 0; // subdomain id (parallel level 2)
|
||||
inline bool subdom_main = false; // true if this process is main process in subdomain (parallel level 3)
|
||||
|
||||
// flags for explaining the process's role
|
||||
inline bool proc_read_srcrec = false; // true if this process is reading source file
|
||||
inline bool proc_store_srcrec = false; // true if this process is storing srcrec file
|
||||
|
||||
// MNMN stop using these global variable for avoiding misleadings during the sources' iteration loop
|
||||
//inline int id_sim_src = 0; // id of current target source
|
||||
//inline std::string name_sim_src = "unknown"; //name of current target source
|
||||
|
||||
// rule to distribute source to simultaneous group
|
||||
inline int select_id_sim_for_src(const int& i_src, const int& n_sims){
|
||||
return i_src % n_sims;
|
||||
}
|
||||
|
||||
// read input yaml file
|
||||
inline std::string input_file="input_params.yaml";
|
||||
// output directory name
|
||||
inline std::string output_dir="./OUTPUT_FILES/";
|
||||
// output file format
|
||||
#define OUTPUT_FORMAT_HDF5 11111
|
||||
#define OUTPUT_FORMAT_ASCII 22222
|
||||
//#define OUTPUT_FORMAT_BINARY 33333 //#TODO: add
|
||||
inline int output_format = OUTPUT_FORMAT_HDF5; // 0 - ascii, 1 - hdf5, 2 - binary
|
||||
|
||||
// HDF5 io mode (independent or collective)
|
||||
#ifdef USE_HDF5_IO_COLLECTIVE
|
||||
#define HDF5_IO_MODE H5FD_MPIO_COLLECTIVE
|
||||
#else
|
||||
#define HDF5_IO_MODE H5FD_MPIO_INDEPENDENT
|
||||
#endif
|
||||
|
||||
// smooth parameters
|
||||
inline int smooth_method = 0; // 0: multi grid parametrization, 1: laplacian smoothing
|
||||
inline CUSTOMREAL smooth_lp = 1.0;
|
||||
inline CUSTOMREAL smooth_lt = 1.0;
|
||||
inline CUSTOMREAL smooth_lr = 1.0;
|
||||
inline CUSTOMREAL regul_lp = 1.0;
|
||||
inline CUSTOMREAL regul_lt = 1.0;
|
||||
inline CUSTOMREAL regul_lr = 1.0;
|
||||
inline const int GRADIENT_DESCENT = 0;
|
||||
inline const int HALVE_STEPPING_MODE = 1;
|
||||
inline const int LBFGS_MODE = 2;
|
||||
inline int optim_method = 0; // 0: gradient descent, 1: halve_stepping, 2: LBFGS
|
||||
inline const CUSTOMREAL wolfe_c1 = 1e-4;
|
||||
inline const CUSTOMREAL wolfe_c2 = 0.9;
|
||||
inline const int Mbfgs = 5; // number of gradients/models stored in memory
|
||||
inline CUSTOMREAL regularization_weight = 0.5; // regularization weight
|
||||
inline int max_sub_iterations = 20; // maximum number of sub-iterations
|
||||
inline const CUSTOMREAL LBFGS_RELATIVE_step_length = 3.0; // relative step size for the second and later iteration
|
||||
inline CUSTOMREAL Kdensity_coe = 0.0; // a preconditioner \in [0,1], kernel -> kernel / pow(kernel density, Kdensity_coe)
|
||||
// variables for test
|
||||
inline bool if_test = false;
|
||||
|
||||
// verboose mode
|
||||
inline bool if_verbose = false;
|
||||
//inline bool if_verbose = true;
|
||||
|
||||
// if use gpu
|
||||
inline bool use_gpu;
|
||||
|
||||
// total number of sources in the srcrec file
|
||||
inline int nsrc_total = 0;
|
||||
|
||||
// flag if common receiver double difference data is used
|
||||
inline bool src_pair_exists = false;
|
||||
|
||||
|
||||
// weight of different typs of data
|
||||
inline CUSTOMREAL abs_time_local_weight = 1.0; // weight of absolute traveltime data for local earthquake, default: 1.0
|
||||
inline CUSTOMREAL cr_dif_time_local_weight = 1.0; // weight of common receiver differential traveltime data for local earthquake, default: 1.0
|
||||
inline CUSTOMREAL cs_dif_time_local_weight = 1.0; // weight of common source differential traveltime data for local earthquake, default: 1.0 (not ready)
|
||||
inline CUSTOMREAL teleseismic_weight = 1.0; // weight of teleseismic data default: 1.0 (not ready)
|
||||
inline CUSTOMREAL abs_time_local_weight_reloc = 1.0; // weight of absolute traveltime data for local earthquake, default: 1.0
|
||||
inline CUSTOMREAL cs_dif_time_local_weight_reloc = 1.0; // weight of common source differential traveltime data for local earthquake, default: 1.0
|
||||
inline CUSTOMREAL cr_dif_time_local_weight_reloc = 1.0; // weight of common receiver differential traveltime data for local earthquake, default: 1.0
|
||||
inline CUSTOMREAL teleseismic_weight_reloc = 1.0; // weight of teleseismic data default: 1.0 (not ready)
|
||||
|
||||
// misfit balance
|
||||
inline bool balance_data_weight = false; // add the weight to normalize the initial objective function of different types of data. 1 for yes and 0 for no
|
||||
inline CUSTOMREAL total_abs_local_data_weight = 0.0;
|
||||
inline CUSTOMREAL total_cr_dif_local_data_weight = 0.0;
|
||||
inline CUSTOMREAL total_cs_dif_local_data_weight = 0.0;
|
||||
inline CUSTOMREAL total_teleseismic_data_weight = 0.0;
|
||||
inline bool balance_data_weight_reloc = false; // add the weight to normalize the initial objective function of different types of data for relocation. 1 for yes and 0 for no
|
||||
inline CUSTOMREAL total_abs_local_data_weight_reloc = 0.0;
|
||||
inline CUSTOMREAL total_cr_dif_local_data_weight_reloc = 0.0;
|
||||
inline CUSTOMREAL total_cs_dif_local_data_weight_reloc = 0.0;
|
||||
inline CUSTOMREAL total_teleseismic_data_weight_reloc = 0.0;
|
||||
|
||||
// 2d solver parameters for teleseismic data
|
||||
// use fixed domain size for all 2d simulations in teleseismic data
|
||||
inline CUSTOMREAL rmin_2d = 3370.5;
|
||||
inline CUSTOMREAL rmax_2d = 6471.5;
|
||||
inline CUSTOMREAL tmin_2d = -5.0/180.0*PI;
|
||||
inline CUSTOMREAL tmax_2d = 120.0/180.0*PI;
|
||||
inline CUSTOMREAL dr_2d = 2.0;
|
||||
inline CUSTOMREAL dt_2d = PI/1000.0;
|
||||
inline const CUSTOMREAL TOL_2D_SOLVER = 1e-5;
|
||||
inline const CUSTOMREAL MAX_ITER_2D_SOLVER = 4000;
|
||||
inline const CUSTOMREAL SWEEPING_COEFF_TELE = 1.05; // coefficient for calculationg sigr/sigt/sigp
|
||||
inline const int N_LAYER_SRC_BOUND = 1; // number of layers for source boundary
|
||||
inline CUSTOMREAL DIST_SRC_DDT = 2.5*DEG2RAD; // distance threshold of two stations
|
||||
inline const std::string OUTPUT_DIR_2D = "/2D_TRAVEL_TIME_FIELD/"; // output directory for 2d solver
|
||||
|
||||
// earthquake relocation
|
||||
inline CUSTOMREAL step_length_src_reloc = 0.01; // step length for source relocation
|
||||
inline CUSTOMREAL step_length_decay_src_reloc = 0.9;
|
||||
inline int N_ITER_MAX_SRC_RELOC = 501; // max iteration for source location
|
||||
inline CUSTOMREAL TOL_SRC_RELOC = 1e-3; // threshold of the norm of gradient for stopping single earthquake location
|
||||
inline const CUSTOMREAL TOL_step_length = 1e-4; // threshold of the max step size for stopping single earthquake location
|
||||
inline CUSTOMREAL rescaling_dep = 10.0;
|
||||
inline CUSTOMREAL rescaling_lat = 5.0;
|
||||
inline CUSTOMREAL rescaling_lon = 5.0;
|
||||
inline CUSTOMREAL rescaling_ortime = 0.5;
|
||||
inline CUSTOMREAL max_change_dep = 10.0; // default: max change is 10 km
|
||||
inline CUSTOMREAL max_change_lat = 5.0; // default: max change is 5 km
|
||||
inline CUSTOMREAL max_change_lon = 5.0; // default: max change is 5 km
|
||||
inline CUSTOMREAL max_change_ortime = 0.5; // default: max change is 0.5 s
|
||||
inline bool ortime_local_search = true;
|
||||
inline int min_Ndata_reloc = 4; // if an earthquake is recorded by less than <min_Ndata> times, relocation is not allowed.
|
||||
|
||||
// inversion strategy parameters
|
||||
inline int inv_mode = 1;
|
||||
inline int model_update_N_iter = 1;
|
||||
inline int relocation_N_iter = 1;
|
||||
inline int max_loop_mode0 = 10;
|
||||
inline int max_loop_mode1 = 10;
|
||||
|
||||
// INV MODE TYPE FLAG
|
||||
inline const int ITERATIVE = 0;
|
||||
inline const int SIMULTANEOUS = 1;
|
||||
|
||||
// source receiver weight calculation
|
||||
inline CUSTOMREAL ref_value = 1.0; // reference value for source receiver weight calculation
|
||||
inline std::string output_file_weight = "srcrec_weight.txt"; // output file name for source receiver weight calculation
|
||||
|
||||
#endif // CONFIG_H
|
||||
79
include/eikonal_solver_2d.h
Normal file
79
include/eikonal_solver_2d.h
Normal file
@@ -0,0 +1,79 @@
|
||||
#ifndef EIKONAL_SOLVER_2D_H
|
||||
#define EIKONAL_SOLVER_2D_H
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include "utils.h"
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
#include "1d_models.h"
|
||||
#include "mpi_funcs.h"
|
||||
#include "io.h"
|
||||
|
||||
|
||||
class PlainGrid {
|
||||
|
||||
public:
|
||||
PlainGrid(Source& src, InputParams& IP);
|
||||
~PlainGrid();
|
||||
|
||||
void run_iteration(InputParams& IP);
|
||||
void run_iteration_upwind(InputParams& IP);
|
||||
|
||||
//private:
|
||||
// grid parameters
|
||||
CUSTOMREAL src_r, src_t, src_p;
|
||||
CUSTOMREAL src_t_dummy;
|
||||
CUSTOMREAL rmin_3d, rmax_3d, tmin_3d, tmax_3d, pmin_3d, pmax_3d;
|
||||
CUSTOMREAL max_degree;
|
||||
int nr_2d, nt_2d;
|
||||
|
||||
// for eikonal solver
|
||||
int count_cand;
|
||||
int ii,ii_nr,ii_n2r,ii_pr,ii_p2r,ii_nt,ii_n2t,ii_pt,ii_p2t;
|
||||
CUSTOMREAL ar,br,at,bt,ar1,ar2,at1,at2,br1,br2,bt1,bt2;
|
||||
CUSTOMREAL eqn_a, eqn_b, eqn_c, eqn_Delta;
|
||||
CUSTOMREAL tmp_tau, T_r, T_t, charact_r, charact_t;
|
||||
bool is_causality;
|
||||
std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);
|
||||
|
||||
// arrays
|
||||
// azimuth
|
||||
CUSTOMREAL *azimuth_2d;
|
||||
// epicentral distance
|
||||
CUSTOMREAL *epicentral_distance_2d;
|
||||
// activated boundaries
|
||||
//bool *activated_boundaries;
|
||||
// grid
|
||||
CUSTOMREAL *r_2d, *t_2d; // coordinates
|
||||
CUSTOMREAL *fun_2d, *fac_a_2d, *fac_b_2d, *u_2d, *T_2d; // functions
|
||||
CUSTOMREAL *T0v_2d, *T0r_2d, *T0t_2d, *tau_2d, *tau_old_2d;
|
||||
bool *is_changed_2d;
|
||||
|
||||
// member functions
|
||||
void allocate_arrays();
|
||||
void deallocate_arrays();
|
||||
void load_1d_model(std::string);
|
||||
void select_1d_model(std::string);
|
||||
void calculate_stencil_2d(int&, int&);
|
||||
void calculate_stencil_upwind_2d(int&, int&);
|
||||
CUSTOMREAL eps_2d = 1e-12;
|
||||
|
||||
private:
|
||||
|
||||
// # TODO: make id model type selectable by input file
|
||||
// 1D model used for 2D solver
|
||||
std::vector<std::vector<CUSTOMREAL>> model_1d_ref = model_1d_prem;
|
||||
|
||||
|
||||
};
|
||||
|
||||
void prepare_teleseismic_boundary_conditions(InputParams&, Grid&, IO_utils&);
|
||||
void run_2d_solver(InputParams&, Source&, IO_utils&);
|
||||
void interp2d(PlainGrid&, CUSTOMREAL, CUSTOMREAL, CUSTOMREAL&);
|
||||
void load_2d_traveltime(InputParams&, Source&, Grid&, IO_utils&);
|
||||
std::string get_2d_tt_filename(const std::string&, Source&);
|
||||
|
||||
#endif // EIKONAL_SOLVER_2D_H
|
||||
2
include/grid.fwd.h
Normal file
2
include/grid.fwd.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#pragma once
|
||||
class Grid;
|
||||
445
include/grid.h
Normal file
445
include/grid.h
Normal file
@@ -0,0 +1,445 @@
|
||||
#ifndef GRID_H
|
||||
#define GRID_H
|
||||
|
||||
// to handle circular dependency
|
||||
#pragma once
|
||||
#include "grid.fwd.h"
|
||||
#include "source.fwd.h"
|
||||
#include "io.fwd.h"
|
||||
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
|
||||
#include "utils.h"
|
||||
#include "config.h"
|
||||
#include "mpi_funcs.h"
|
||||
#include "input_params.h"
|
||||
#include "source.h"
|
||||
#include "io.h"
|
||||
#include "inv_grid.h"
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda.h>
|
||||
#endif
|
||||
|
||||
class Grid {
|
||||
public:
|
||||
Grid(InputParams&, IO_utils&);
|
||||
~Grid();
|
||||
//
|
||||
// setter
|
||||
//
|
||||
// set physical parameters
|
||||
void setup_grid_params(InputParams&, IO_utils&); // setup grid parameters
|
||||
// set factors
|
||||
void setup_factors(Source &);
|
||||
// calculate initial fields T0 T0r T0t T0p and initialize tau
|
||||
void initialize_fields(Source &, InputParams&);
|
||||
// calculate initial fields T0 T0r T0t T0p and initialize tau for teleseismic source
|
||||
void initialize_fields_teleseismic();
|
||||
// calculate L1 and Linf diff (sum of value change on the nodes)
|
||||
void calc_L1_and_Linf_diff(CUSTOMREAL&, CUSTOMREAL&);
|
||||
// calculate L1 and Linf diff for teleseismic source (sum of value change on the nodes)
|
||||
void calc_L1_and_Linf_diff_tele(CUSTOMREAL&, CUSTOMREAL&);
|
||||
// FOR TEST: calculate L1 and Linf error between analytic solution and current T
|
||||
void calc_L1_and_Linf_error(CUSTOMREAL&, CUSTOMREAL&);
|
||||
// calculate L1 and Linf diff for adjoint field
|
||||
void calc_L1_and_Linf_diff_adj(CUSTOMREAL&, CUSTOMREAL&);
|
||||
// calculate L1 and Linf diff for density of adjoint field
|
||||
void calc_L1_and_Linf_diff_adj_density(CUSTOMREAL&, CUSTOMREAL&);
|
||||
|
||||
// send and receive data to/from other subdomains
|
||||
void send_recev_boundary_data(CUSTOMREAL*);
|
||||
void prepare_boundary_data_to_send(CUSTOMREAL*);
|
||||
void assign_received_data_to_ghost(CUSTOMREAL*);
|
||||
void send_recev_boundary_data_av(CUSTOMREAL*);
|
||||
void assign_received_data_to_ghost_av(CUSTOMREAL*);
|
||||
// send and receive data to/from the neighbor subdomains which contact on a line or point
|
||||
void send_recev_boundary_data_kosumi(CUSTOMREAL*);
|
||||
void prepare_boundary_data_to_send_kosumi(CUSTOMREAL*);
|
||||
void assign_received_data_to_ghost_kosumi(CUSTOMREAL*);
|
||||
|
||||
// inversion methods
|
||||
InvGrid* inv_grid; // inversion grid definitions
|
||||
|
||||
// check model discontinuity
|
||||
void check_velocity_discontinuity();
|
||||
|
||||
void reinitialize_abcf(); // reinitialize factors
|
||||
void rejuvenate_abcf(); // reinitialize factors for earthquake relocation
|
||||
void initialize_kernels(); // fill 0 to kernels
|
||||
|
||||
//
|
||||
// getters
|
||||
//
|
||||
|
||||
int get_ngrid_total_excl_ghost(){return loc_nnodes;};
|
||||
int get_nelms_total_excl_ghost(){return loc_nelms;};
|
||||
int get_offset_nnodes() {return offset_nnodes;};
|
||||
int get_offset_elms() {return offset_nelms;};
|
||||
void get_offsets_3d(int* arr) {
|
||||
arr[0] = offset_k;
|
||||
arr[1] = offset_j;
|
||||
arr[2] = offset_i;
|
||||
};
|
||||
|
||||
int get_ngrid_total_vis() {return loc_nnodes_vis;};
|
||||
int get_nelms_total_vis() {return loc_nelms_vis;};
|
||||
int get_offset_nnodes_vis(){return offset_nnodes_vis;};
|
||||
int get_offset_elms_vis() {return offset_nelms_vis;};
|
||||
|
||||
// return dimensions of node coordinates arra and connectivity array
|
||||
std::array<int,2> get_dim_nodes_coords() {return {offset_nnodes,3};};
|
||||
std::array<int,2> get_dim_elms_conn() {return {offset_nelms,9};};
|
||||
|
||||
// return node coordinates array and connectivity array
|
||||
int* get_elms_conn() {return elms_conn;};
|
||||
CUSTOMREAL* get_nodes_coords_x(){return x_loc_3d;}; // lon
|
||||
CUSTOMREAL* get_nodes_coords_y(){return y_loc_3d;}; // lat
|
||||
CUSTOMREAL* get_nodes_coords_z(){return z_loc_3d;}; // r
|
||||
CUSTOMREAL* get_nodes_coords_p(){return p_loc_3d;}; // lon
|
||||
CUSTOMREAL* get_nodes_coords_t(){return t_loc_3d;}; // lat
|
||||
CUSTOMREAL* get_nodes_coords_r(){return r_loc_3d;}; // r
|
||||
int* get_proc_dump() {return my_proc_dump;}; // array storing the process ids
|
||||
|
||||
CUSTOMREAL* get_true_solution(){return get_array_for_vis(u_loc, false);}; // true solution
|
||||
CUSTOMREAL* get_fun() {return get_array_for_vis(fun_loc, false);}; //
|
||||
CUSTOMREAL* get_xi() {return get_array_for_vis(xi_loc, false);}; //
|
||||
CUSTOMREAL* get_eta() {return get_array_for_vis(eta_loc, false);}; //
|
||||
CUSTOMREAL* get_a() {return get_array_for_vis(fac_a_loc, false);}; //
|
||||
CUSTOMREAL* get_b() {return get_array_for_vis(fac_b_loc, false);}; //
|
||||
CUSTOMREAL* get_c() {return get_array_for_vis(fac_c_loc, false);}; //
|
||||
CUSTOMREAL* get_f() {return get_array_for_vis(fac_f_loc, false);}; //
|
||||
CUSTOMREAL* get_vel() {return get_array_for_vis(fun_loc, true);}; // true velocity field
|
||||
CUSTOMREAL* get_T0v() {return get_array_for_vis(T0v_loc, false);}; // initial T0
|
||||
CUSTOMREAL* get_u() {return get_array_for_vis(u_loc, false);}; // current solution
|
||||
CUSTOMREAL* get_tau() {return get_array_for_vis(tau_loc, false);}; // current tau
|
||||
CUSTOMREAL* get_T() {return get_array_for_vis(T_loc, false);};
|
||||
CUSTOMREAL* get_residual() { calc_residual();
|
||||
return get_array_for_vis(u_loc, false);}; // calculate residual (T_loc over written!!)
|
||||
CUSTOMREAL* get_Tadj() {return get_array_for_vis(Tadj_loc, false);}; // adjoint solution
|
||||
CUSTOMREAL* get_Ks() {return get_array_for_vis(Ks_loc, false);}; // Ks
|
||||
CUSTOMREAL* get_Kxi() {return get_array_for_vis(Kxi_loc, false);}; // Kxi
|
||||
CUSTOMREAL* get_Keta() {return get_array_for_vis(Keta_loc, false);}; // Keta
|
||||
CUSTOMREAL* get_Ks_density() {return get_array_for_vis(Ks_density_loc, false);}; // Ks_density
|
||||
CUSTOMREAL* get_Kxi_density() {return get_array_for_vis(Kxi_density_loc, false);}; // Kxi_density
|
||||
CUSTOMREAL* get_Keta_density() {return get_array_for_vis(Keta_density_loc, false);}; // Keta_density
|
||||
CUSTOMREAL* get_Ks_update() {return get_array_for_vis(Ks_update_loc, false);}; // Ks_update
|
||||
CUSTOMREAL* get_Kxi_update() {return get_array_for_vis(Kxi_update_loc, false);}; // Kxi_update
|
||||
CUSTOMREAL* get_Keta_update() {return get_array_for_vis(Keta_update_loc, false);}; // Keta_update
|
||||
CUSTOMREAL* get_Ks_density_update() {return get_array_for_vis(Ks_density_update_loc,false);}; // Ks_density_update
|
||||
CUSTOMREAL* get_Kxi_density_update() {return get_array_for_vis(Kxi_density_update_loc,false);}; // Kxi_density_update
|
||||
CUSTOMREAL* get_Keta_density_update() {return get_array_for_vis(Keta_density_update_loc,false);}; // Keta_density_update
|
||||
|
||||
CUSTOMREAL* get_Ks_descent_dir() {return get_array_for_vis(Ks_descent_dir_loc, false);}; // Ks_descent_dir
|
||||
CUSTOMREAL* get_Kxi_descent_dir(){return get_array_for_vis(Kxi_descent_dir_loc,false);}; // Kxi_descent_dir
|
||||
CUSTOMREAL* get_Keta_descent_dir(){return get_array_for_vis(Keta_descent_dir_loc,false);}; // Keta_descent_dir
|
||||
|
||||
// get physical parameters
|
||||
CUSTOMREAL get_r_min() {return r_min;};
|
||||
CUSTOMREAL get_r_max() {return r_max;};
|
||||
CUSTOMREAL get_lat_min() {return lat_min;};
|
||||
CUSTOMREAL get_lat_max() {return lat_max;};
|
||||
CUSTOMREAL get_lon_min() {return lon_min;};
|
||||
CUSTOMREAL get_lon_max() {return lon_max;};
|
||||
CUSTOMREAL get_delta_lon() {return dlon;};
|
||||
CUSTOMREAL get_delta_lat() {return dlat;};
|
||||
CUSTOMREAL get_delta_r() {return dr;};
|
||||
CUSTOMREAL get_r_min_loc_excl_gl() {return r_loc_1d[k_start_loc];};
|
||||
CUSTOMREAL get_r_max_loc_excl_gl() {return r_loc_1d[k_end_loc];};
|
||||
CUSTOMREAL get_lat_min_loc_excl_gl() {return t_loc_1d[j_start_loc];};
|
||||
CUSTOMREAL get_lat_max_loc_excl_gl() {return t_loc_1d[j_end_loc];};
|
||||
CUSTOMREAL get_lon_min_loc_excl_gl() {return p_loc_1d[i_start_loc];};
|
||||
CUSTOMREAL get_lon_max_loc_excl_gl() {return p_loc_1d[i_end_loc];};
|
||||
CUSTOMREAL get_r_min_loc() {return r_loc_1d[0];};
|
||||
CUSTOMREAL get_r_max_loc() {return r_loc_1d[loc_K-1];};
|
||||
CUSTOMREAL get_lat_min_loc() {return t_loc_1d[0];};
|
||||
CUSTOMREAL get_lat_max_loc() {return t_loc_1d[loc_J-1];};
|
||||
CUSTOMREAL get_lon_min_loc() {return p_loc_1d[0];};
|
||||
CUSTOMREAL get_lon_max_loc() {return p_loc_1d[loc_I-1];};
|
||||
CUSTOMREAL get_r_by_index(int i) {return r_loc_1d[i];};
|
||||
CUSTOMREAL get_lat_by_index(int j) {return t_loc_1d[j];};
|
||||
CUSTOMREAL get_lon_by_index(int i) {return p_loc_1d[i];};
|
||||
|
||||
int get_offset_i() {return offset_i-i_start_loc;}; // return offset index (global index of the first node)
|
||||
int get_offset_j() {return offset_j-j_start_loc;};
|
||||
int get_offset_k() {return offset_k-k_start_loc;};
|
||||
int get_offset_i_excl_gl() {return offset_i;}; // return offset index
|
||||
int get_offset_j_excl_gl() {return offset_j;};
|
||||
int get_offset_k_excl_gl() {return offset_k;};
|
||||
|
||||
// return index of the first node excluding ghost layer
|
||||
int get_k_start_loc() {return k_start_loc;};
|
||||
int get_k_end_loc() {return k_end_loc;};
|
||||
int get_j_start_loc() {return j_start_loc;};
|
||||
int get_j_end_loc() {return j_end_loc;};
|
||||
int get_i_start_loc() {return i_start_loc;};
|
||||
int get_i_end_loc() {return i_end_loc;};
|
||||
|
||||
// copy tau to tau old
|
||||
void tau2tau_old();
|
||||
// copy T to tau old
|
||||
void T2tau_old();
|
||||
// copy tau to Tadj
|
||||
void update_Tadj();
|
||||
// copy tau to Tadj
|
||||
void update_Tadj_density();
|
||||
// back up fun xi eta
|
||||
void back_up_fun_xi_eta_bcf();
|
||||
// restore fun xi eta
|
||||
void restore_fun_xi_eta_bcf();
|
||||
|
||||
// write out inversion grid file
|
||||
void write_inversion_grid_file();
|
||||
|
||||
private:
|
||||
|
||||
//
|
||||
// member variables
|
||||
//
|
||||
// i: longitude, phi,
|
||||
// j: latitude, theta,
|
||||
// k: radius, r
|
||||
|
||||
// starting node id for each direction expect the boundary
|
||||
int i_start_loc=0, j_start_loc=0, k_start_loc=0;
|
||||
// end node id for each direction expect the boundary
|
||||
int i_end_loc=0, j_end_loc=0, k_end_loc=0;
|
||||
|
||||
// neighbors domain_id (-1 if no neighbor)
|
||||
// order of directions: -i,+i,-j,+j,-k,+k
|
||||
std::vector<int> neighbors_id{-1,-1,-1,-1,-1,-1};
|
||||
// neighbors domain_id line and point contact
|
||||
// ij plane nn, np, pn, pp
|
||||
std::vector<int> neighbors_id_ij{-1,-1,-1,-1};
|
||||
// jk plane nn, np, pn, pp
|
||||
std::vector<int> neighbors_id_jk{-1,-1,-1,-1};
|
||||
// ik plane nn, np, pn, pp
|
||||
std::vector<int> neighbors_id_ik{-1,-1,-1,-1};
|
||||
// ijk plane nnn, nnp, npn, npp, pnn, pnp, ppn, ppp
|
||||
std::vector<int> neighbors_id_ijk{-1,-1,-1,-1,-1,-1,-1,-1};
|
||||
|
||||
// layer id of this domain
|
||||
int domain_i, domain_j, domain_k;
|
||||
|
||||
// number of points on each boundary surface
|
||||
int n_grid_bound_i, n_grid_bound_j, n_grid_bound_k;
|
||||
|
||||
// mpi parameters
|
||||
int offset_nnodes=0, offset_nelms=0;
|
||||
int offset_i=0, offset_j=0, offset_k=0; // offset of this domain in the global grid
|
||||
|
||||
// number of nodes and elements on each subdomain
|
||||
int loc_nnodes, loc_nelms;
|
||||
int *nnodes, *nelms;
|
||||
|
||||
// arrays for visualization
|
||||
//int loc_I_vis, loc_J_vis, loc_K_vis;
|
||||
int i_start_vis, j_start_vis, k_start_vis;
|
||||
int i_end_vis, j_end_vis, k_end_vis;
|
||||
int loc_nnodes_vis, loc_nelms_vis;
|
||||
int *nnodes_vis, *nelms_vis;
|
||||
int offset_nnodes_vis=0, offset_nelms_vis=0;
|
||||
|
||||
public:
|
||||
// 3d arrays
|
||||
CUSTOMREAL *xi_loc; // local xi
|
||||
CUSTOMREAL *eta_loc; // local eta
|
||||
CUSTOMREAL *zeta_loc; // local zeta
|
||||
CUSTOMREAL *fac_a_loc; // factor a
|
||||
CUSTOMREAL *fac_b_loc; // factor b
|
||||
CUSTOMREAL *fac_c_loc; // factor c
|
||||
CUSTOMREAL *fac_f_loc; // factor f
|
||||
CUSTOMREAL *fun_loc;
|
||||
CUSTOMREAL *T_loc;
|
||||
CUSTOMREAL *T0v_loc, *T0r_loc, *T0p_loc, *T0t_loc;
|
||||
CUSTOMREAL *tau_loc;
|
||||
CUSTOMREAL *tau_old_loc;
|
||||
bool *is_changed;
|
||||
// for inversion backup
|
||||
CUSTOMREAL *fun_loc_back;
|
||||
CUSTOMREAL *xi_loc_back;
|
||||
CUSTOMREAL *eta_loc_back;
|
||||
CUSTOMREAL *fac_b_loc_back;
|
||||
CUSTOMREAL *fac_c_loc_back;
|
||||
CUSTOMREAL *fac_f_loc_back;
|
||||
// for lbfgs
|
||||
CUSTOMREAL *Ks_grad_store_loc, *Keta_grad_store_loc, *Kxi_grad_store_loc;
|
||||
CUSTOMREAL *Ks_model_store_loc, *Keta_model_store_loc, *Kxi_model_store_loc;
|
||||
CUSTOMREAL *Ks_descent_dir_loc, *Keta_descent_dir_loc, *Kxi_descent_dir_loc;
|
||||
CUSTOMREAL *fun_regularization_penalty_loc, *eta_regularization_penalty_loc, *xi_regularization_penalty_loc;
|
||||
CUSTOMREAL *fun_gradient_regularization_penalty_loc, *eta_gradient_regularization_penalty_loc, *xi_gradient_regularization_penalty_loc;
|
||||
CUSTOMREAL *fun_prior_loc, *eta_prior_loc, *xi_prior_loc; // *zeta_prior_loc; TODO
|
||||
// tmp array for file IO
|
||||
CUSTOMREAL *vis_data;
|
||||
|
||||
private:
|
||||
// windows for shm arrays
|
||||
MPI_Win win_fac_a_loc, win_fac_b_loc, win_fac_c_loc, win_fac_f_loc;
|
||||
MPI_Win win_T0r_loc, win_T0p_loc, win_T0t_loc, win_T0v_loc;
|
||||
MPI_Win win_tau_loc, win_fun_loc;
|
||||
MPI_Win win_is_changed;
|
||||
MPI_Win win_T_loc, win_tau_old_loc;
|
||||
MPI_Win win_xi_loc, win_eta_loc, win_zeta_loc;
|
||||
MPI_Win win_r_loc_1d, win_t_loc_1d, win_p_loc_1d;
|
||||
|
||||
CUSTOMREAL *x_loc_3d; // local (lon) x (global position)
|
||||
CUSTOMREAL *y_loc_3d; // local (lat) y (global position)
|
||||
CUSTOMREAL *z_loc_3d; // local (r ) z (global position)
|
||||
CUSTOMREAL *p_loc_3d; // local lon (x) (global position)
|
||||
CUSTOMREAL *t_loc_3d; // local lat (y) (global position)
|
||||
CUSTOMREAL *r_loc_3d; // local r (z) (global position)
|
||||
int *elms_conn; // connectivity array
|
||||
int *my_proc_dump; // dump process id for each node DEBUG
|
||||
public:
|
||||
// 1d arrays for coordinates, storing only subdomain's local coordinates
|
||||
CUSTOMREAL *r_loc_1d; // radius z in kilo meter
|
||||
CUSTOMREAL *t_loc_1d; // theta lat y in radian
|
||||
CUSTOMREAL *p_loc_1d; // phi lon x in radian
|
||||
private:
|
||||
// arrays for inversion
|
||||
// TODO: check if those arrays can use shm
|
||||
bool inverse_flag = false;
|
||||
//int n_inv_grids; // in config.h
|
||||
//int n_inv_I_loc, n_inv_J_loc, n_inv_K_loc; // in config.h
|
||||
public:
|
||||
CUSTOMREAL *Ks_loc;
|
||||
CUSTOMREAL *Kxi_loc;
|
||||
CUSTOMREAL *Keta_loc;
|
||||
CUSTOMREAL *Ks_density_loc;
|
||||
CUSTOMREAL *Kxi_density_loc;
|
||||
CUSTOMREAL *Keta_density_loc;
|
||||
CUSTOMREAL *Tadj_loc; // timetable for adjoint source
|
||||
CUSTOMREAL *Tadj_density_loc; // timetable for density of adjoint source
|
||||
CUSTOMREAL *Ks_inv_loc;
|
||||
CUSTOMREAL *Kxi_inv_loc;
|
||||
CUSTOMREAL *Keta_inv_loc;
|
||||
CUSTOMREAL *Ks_density_inv_loc;
|
||||
CUSTOMREAL *Kxi_density_inv_loc;
|
||||
CUSTOMREAL *Keta_density_inv_loc;
|
||||
// model update para
|
||||
CUSTOMREAL *Ks_update_loc;
|
||||
CUSTOMREAL *Kxi_update_loc;
|
||||
CUSTOMREAL *Keta_update_loc;
|
||||
CUSTOMREAL *Ks_density_update_loc;
|
||||
CUSTOMREAL *Kxi_density_update_loc;
|
||||
CUSTOMREAL *Keta_density_update_loc;
|
||||
// model update para of the previous step
|
||||
CUSTOMREAL *Ks_update_loc_previous;
|
||||
CUSTOMREAL *Kxi_update_loc_previous;
|
||||
CUSTOMREAL *Keta_update_loc_previous;
|
||||
private:
|
||||
MPI_Win win_Tadj_loc;
|
||||
MPI_Win win_Tadj_density_loc;
|
||||
// boundary layer for mpi communication
|
||||
// storing not the value but the pointers for the elements of arrays
|
||||
// to send
|
||||
CUSTOMREAL *bin_s; // boundary i axis negative
|
||||
CUSTOMREAL *bip_s; // boundary i axis positive
|
||||
CUSTOMREAL *bjn_s; // boundary j axis negative
|
||||
CUSTOMREAL *bjp_s; // boundary j axis positive
|
||||
CUSTOMREAL *bkn_s; // boundary k axis negative
|
||||
CUSTOMREAL *bkp_s; // boundary k axis positive
|
||||
// to receive
|
||||
CUSTOMREAL *bin_r; // boundary i axis negative
|
||||
CUSTOMREAL *bip_r; // boundary i axis positive
|
||||
CUSTOMREAL *bjn_r; // boundary j axis negative
|
||||
CUSTOMREAL *bjp_r; // boundary j axis positive
|
||||
CUSTOMREAL *bkn_r; // boundary k axis negative
|
||||
CUSTOMREAL *bkp_r; // boundary k axis positive
|
||||
|
||||
// kosumi boundaries (touching on lines and points)
|
||||
CUSTOMREAL *bij_pp_s, *bij_pp_r;
|
||||
CUSTOMREAL *bij_pn_s, *bij_pn_r;
|
||||
CUSTOMREAL *bij_np_s, *bij_np_r;
|
||||
CUSTOMREAL *bij_nn_s, *bij_nn_r;
|
||||
CUSTOMREAL *bjk_pp_s, *bjk_pp_r;
|
||||
CUSTOMREAL *bjk_pn_s, *bjk_pn_r;
|
||||
CUSTOMREAL *bjk_np_s, *bjk_np_r;
|
||||
CUSTOMREAL *bjk_nn_s, *bjk_nn_r;
|
||||
CUSTOMREAL *bik_pp_s, *bik_pp_r;
|
||||
CUSTOMREAL *bik_pn_s, *bik_pn_r;
|
||||
CUSTOMREAL *bik_np_s, *bik_np_r;
|
||||
CUSTOMREAL *bik_nn_s, *bik_nn_r;
|
||||
CUSTOMREAL *bijk_ppp_s,*bijk_ppp_r;
|
||||
CUSTOMREAL *bijk_ppn_s,*bijk_ppn_r;
|
||||
CUSTOMREAL *bijk_pnn_s,*bijk_pnn_r;
|
||||
CUSTOMREAL *bijk_npp_s,*bijk_npp_r;
|
||||
CUSTOMREAL *bijk_npn_s,*bijk_npn_r;
|
||||
CUSTOMREAL *bijk_nnp_s,*bijk_nnp_r;
|
||||
CUSTOMREAL *bijk_nnn_s,*bijk_nnn_r;
|
||||
CUSTOMREAL *bijk_pnp_s,*bijk_pnp_r;
|
||||
|
||||
// store MPI_Request for sending and receiving
|
||||
MPI_Request *mpi_send_reqs;
|
||||
MPI_Request *mpi_recv_reqs;
|
||||
MPI_Request *mpi_send_reqs_kosumi;
|
||||
MPI_Request *mpi_recv_reqs_kosumi;
|
||||
//
|
||||
// domain definition
|
||||
//
|
||||
CUSTOMREAL r_min; // minimum radius of global domain
|
||||
CUSTOMREAL r_max; // maximum radius of global domain
|
||||
CUSTOMREAL lat_min; // minimum latitude of global domain
|
||||
CUSTOMREAL lat_max; // maximum latitude of global domain
|
||||
CUSTOMREAL lon_min; // minimum longitude of global domain
|
||||
CUSTOMREAL lon_max; // maximum longitude of global domain
|
||||
public:
|
||||
CUSTOMREAL dr ;
|
||||
CUSTOMREAL dlat,dt;
|
||||
CUSTOMREAL dlon,dp;
|
||||
private:
|
||||
|
||||
//
|
||||
// members for test
|
||||
//
|
||||
CUSTOMREAL *u_loc; // true solution # TODO: erase for no testing
|
||||
//CUSTOMREAL *velo_loc; // velocity field, # TODO: use this for storing an intial model
|
||||
// anisotropic factors
|
||||
CUSTOMREAL a0, b0, c0, f0, fun0;
|
||||
|
||||
CUSTOMREAL source_width;
|
||||
//
|
||||
// member functions
|
||||
//
|
||||
void init_decomposition(InputParams&); // initialize domain decomposition
|
||||
void memory_allocation(); // allocate memory for arrays
|
||||
void memory_deallocation(); // deallocate memory from arrays
|
||||
void shm_memory_allocation(); // allocate memory for shared memory
|
||||
void shm_memory_deallocation(); // deallocate memory from shared memory
|
||||
|
||||
CUSTOMREAL *get_array_for_vis(CUSTOMREAL *arr, bool); // get array for visualization
|
||||
public:
|
||||
void get_array_for_3d_output(const CUSTOMREAL *arr_in, CUSTOMREAL* arr_out, bool inverse_field); // get array for 3d output
|
||||
void set_array_from_vis(CUSTOMREAL *arr); // set vis array to local array
|
||||
|
||||
// finalize the Time table
|
||||
void calc_T_plus_tau();
|
||||
private:
|
||||
// check difference between true solution and computed solution
|
||||
void calc_residual();
|
||||
|
||||
//
|
||||
// utilities
|
||||
//
|
||||
public:
|
||||
bool i_first(){ return neighbors_id[0] == -1;}
|
||||
bool j_first(){ return neighbors_id[2] == -1;}
|
||||
bool k_first(){ return neighbors_id[4] == -1;}
|
||||
bool i_last(){ return neighbors_id[1] == -1;}
|
||||
bool j_last(){ return neighbors_id[3] == -1;}
|
||||
bool k_last(){ return neighbors_id[5] == -1;}
|
||||
private:
|
||||
// number of the layers for ghost nodes
|
||||
const int n_ghost_layers = 1;
|
||||
// vtk format cell type
|
||||
const int cell_type = 9;
|
||||
|
||||
|
||||
};
|
||||
|
||||
#endif // GRID_H
|
||||
574
include/input_params.h
Normal file
574
include/input_params.h
Normal file
@@ -0,0 +1,574 @@
|
||||
#ifndef INPUT_PARAMS_H
|
||||
#define INPUT_PARAMS_H
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <unordered_set>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "config.h"
|
||||
#include "yaml-cpp/yaml.h"
|
||||
#include "utils.h"
|
||||
#include "mpi_funcs.h"
|
||||
#include "timer.h"
|
||||
#include "src_rec.h"
|
||||
|
||||
|
||||
// InputParams class for reading/storing and outputing input parameters and src_rec information
|
||||
class InputParams {
|
||||
|
||||
public:
|
||||
InputParams(std::string&);
|
||||
~InputParams();
|
||||
|
||||
// write parameters to output file
|
||||
void write_params_to_file();
|
||||
|
||||
//
|
||||
// getter/setter
|
||||
//
|
||||
|
||||
//
|
||||
// bondary information
|
||||
//
|
||||
CUSTOMREAL get_min_dep(){return min_dep;};
|
||||
CUSTOMREAL get_max_dep(){return max_dep;};
|
||||
CUSTOMREAL get_min_lat(){return min_lat*DEG2RAD;};
|
||||
CUSTOMREAL get_max_lat(){return max_lat*DEG2RAD;};
|
||||
CUSTOMREAL get_min_lon(){return min_lon*DEG2RAD;};
|
||||
CUSTOMREAL get_max_lon(){return max_lon*DEG2RAD;};
|
||||
|
||||
//
|
||||
// source receiver data
|
||||
//
|
||||
|
||||
// source receiver information for processes which stores source receiver information
|
||||
std::string get_src_rec_file() {return src_rec_file;};
|
||||
bool get_src_rec_file_exist(){return src_rec_file_exist;};
|
||||
SrcRecInfo& get_src_point(const std::string&); // return SrcRec object
|
||||
SrcRecInfo& get_rec_point(const std::string&); // return receivers for the current source
|
||||
|
||||
// source receiver information with broadcast to all subdom_main processes
|
||||
SrcRecInfo get_src_point_bcast(const std::string&); // return SrcRec object
|
||||
SrcRecInfo get_rec_point_bcast(const std::string&); // return receivers for the current source
|
||||
CUSTOMREAL get_src_radius( const std::string&);
|
||||
CUSTOMREAL get_src_lat( const std::string&);
|
||||
CUSTOMREAL get_src_lon( const std::string&);
|
||||
SrcRecInfo get_src_point_bcast_2d(const std::string&); // return SrcRec object
|
||||
CUSTOMREAL get_src_radius_2d(const std::string&);
|
||||
CUSTOMREAL get_src_lat_2d( const std::string&);
|
||||
CUSTOMREAL get_src_lon_2d( const std::string&);
|
||||
std::string get_src_name(const int&); // return source name from in-sim_group id
|
||||
std::string get_rec_name(const int&); // return receiver name from in-sim_group id
|
||||
std::string get_src_name_comm(const int&); // return source name in common receiver list
|
||||
int get_src_id(const std::string&); // return src global id from src name
|
||||
bool get_if_src_teleseismic(const std::string&); // return true if the source is teleseismic
|
||||
|
||||
//
|
||||
// others
|
||||
//
|
||||
CUSTOMREAL get_conv_tol() {return conv_tol;};
|
||||
void set_conv_tol(CUSTOMREAL conv_tol_){conv_tol = conv_tol_;};
|
||||
CUSTOMREAL get_max_iter() {return max_iter;};
|
||||
|
||||
int get_stencil_order() {return stencil_order;};
|
||||
void set_stencil_order(int stencil_order_){stencil_order = stencil_order_;};
|
||||
int get_stencil_type() {return stencil_type;};
|
||||
int get_sweep_type() {return sweep_type;};
|
||||
|
||||
std::string get_init_model_path(){return init_model_path;};
|
||||
std::string get_model_1d_name() {return model_1d_name;};
|
||||
|
||||
int get_run_mode() {return run_mode;};
|
||||
|
||||
// invgrid for vel
|
||||
int get_n_inversion_grid(){return n_inversion_grid;};
|
||||
|
||||
int get_n_inv_r() {return n_inv_r;};
|
||||
int get_n_inv_t() {return n_inv_t;};
|
||||
int get_n_inv_p() {return n_inv_p;};
|
||||
int get_n_inv_r_ani(){return n_inv_r_ani;};
|
||||
int get_n_inv_t_ani(){return n_inv_t_ani;};
|
||||
int get_n_inv_p_ani(){return n_inv_p_ani;};
|
||||
CUSTOMREAL get_min_dep_inv(){return min_dep_inv;};
|
||||
CUSTOMREAL get_max_dep_inv(){return max_dep_inv;};
|
||||
CUSTOMREAL get_min_lat_inv(){return min_lat_inv*DEG2RAD;};
|
||||
CUSTOMREAL get_max_lat_inv(){return max_lat_inv*DEG2RAD;};
|
||||
CUSTOMREAL get_min_lon_inv(){return min_lon_inv*DEG2RAD;};
|
||||
CUSTOMREAL get_max_lon_inv(){return max_lon_inv*DEG2RAD;};
|
||||
CUSTOMREAL get_min_dep_inv_ani(){return min_dep_inv_ani;};
|
||||
CUSTOMREAL get_max_dep_inv_ani(){return max_dep_inv_ani;};
|
||||
CUSTOMREAL get_min_lat_inv_ani(){return min_lat_inv_ani*DEG2RAD;};
|
||||
CUSTOMREAL get_max_lat_inv_ani(){return max_lat_inv_ani*DEG2RAD;};
|
||||
CUSTOMREAL get_min_lon_inv_ani(){return min_lon_inv_ani*DEG2RAD;};
|
||||
CUSTOMREAL get_max_lon_inv_ani(){return max_lon_inv_ani*DEG2RAD;};
|
||||
|
||||
int get_n_inv_r_flex(){return n_inv_r_flex;};
|
||||
int get_n_inv_t_flex(){return n_inv_t_flex;};
|
||||
int get_n_inv_p_flex(){return n_inv_p_flex;};
|
||||
CUSTOMREAL* get_dep_inv() {return dep_inv;};
|
||||
CUSTOMREAL* get_lat_inv() {return lat_inv;};
|
||||
CUSTOMREAL* get_lon_inv() {return lon_inv;};
|
||||
CUSTOMREAL* get_trapezoid() {return trapezoid;};
|
||||
|
||||
bool get_ignore_velocity_discontinuity(){return ignore_velocity_discontinuity;};
|
||||
|
||||
// invgrid for ani
|
||||
bool get_invgrid_ani() {return invgrid_ani;};
|
||||
|
||||
int get_n_inv_r_flex_ani(){return n_inv_r_flex_ani;};
|
||||
int get_n_inv_t_flex_ani(){return n_inv_t_flex_ani;};
|
||||
int get_n_inv_p_flex_ani(){return n_inv_p_flex_ani;};
|
||||
CUSTOMREAL* get_dep_inv_ani() {return dep_inv_ani;};
|
||||
CUSTOMREAL* get_lat_inv_ani() {return lat_inv_ani;};
|
||||
CUSTOMREAL* get_lon_inv_ani() {return lon_inv_ani;};
|
||||
CUSTOMREAL* get_trapezoid_ani() {return trapezoid_ani;};
|
||||
|
||||
//
|
||||
void check_inv_grid();
|
||||
private:
|
||||
void check_increasing(CUSTOMREAL*, int, std::string);
|
||||
void check_lower_bound(CUSTOMREAL*&, int&, CUSTOMREAL, std::string);
|
||||
void check_upper_bound(CUSTOMREAL*&, int&, CUSTOMREAL, std::string);
|
||||
|
||||
public:
|
||||
// void check_dep_inv();
|
||||
// void check_lat_inv();
|
||||
// void check_lon_inv();
|
||||
// void check_dep_inv_ani();
|
||||
// void check_lat_inv_ani();
|
||||
// void check_lon_inv_ani();
|
||||
|
||||
// // adjust inv grid
|
||||
// void insertAtFront(CUSTOMREAL*&, int&, CUSTOMREAL);
|
||||
// void insertAtBack(CUSTOMREAL*&, int&, CUSTOMREAL);
|
||||
|
||||
// bool get_invgrid_volume_rescale(){return invgrid_volume_rescale;}
|
||||
|
||||
int get_max_iter_inv() {return max_iter_inv;};
|
||||
|
||||
int get_model_update_N_iter(){return model_update_N_iter;};
|
||||
int get_relocation_N_iter() {return relocation_N_iter;};
|
||||
int get_max_loop_mode0() {return max_loop_mode0;};
|
||||
int get_max_loop_mode1() {return max_loop_mode1;};
|
||||
|
||||
bool get_is_srcrec_swap() {return swap_src_rec;};
|
||||
|
||||
bool get_if_output_source_field() {return output_source_field;};
|
||||
bool get_if_output_kernel() {return output_kernel;};
|
||||
bool get_if_output_final_model() {return output_final_model;};
|
||||
bool get_if_output_middle_model() {return output_middle_model;};
|
||||
bool get_if_output_in_process() {return output_in_process;};
|
||||
bool get_if_output_in_process_data() {return output_in_process_data;};
|
||||
bool get_if_single_precision_output(){return single_precision_output;};
|
||||
int get_verbose_output_level() {return verbose_output_level;}; // #TODO: modify codes for verbose_output_level > 1
|
||||
|
||||
bool get_update_slowness() {return update_slowness;};
|
||||
bool get_update_azi_ani() {return update_azi_ani;};
|
||||
bool get_update_rad_ani() {return update_rad_ani;};
|
||||
CUSTOMREAL * get_depth_taper() {return depth_taper;};
|
||||
|
||||
bool get_have_tele_data() {return have_tele_data;};
|
||||
|
||||
bool get_use_abs() {return use_abs;};
|
||||
bool get_use_cs() {return use_cs;};
|
||||
bool get_use_cr() {return use_cr;};
|
||||
|
||||
bool get_use_abs_reloc() {return use_abs_reloc;};
|
||||
bool get_use_cs_reloc() {return use_cs_reloc;};
|
||||
bool get_use_cr_reloc() {return use_cr_reloc;};
|
||||
|
||||
CUSTOMREAL* get_residual_weight_abs() {return residual_weight_abs;};
|
||||
CUSTOMREAL* get_distance_weight_abs() {return distance_weight_abs;};
|
||||
CUSTOMREAL* get_residual_weight_cs() {return residual_weight_cs;};
|
||||
CUSTOMREAL* get_azimuthal_weight_cs() {return azimuthal_weight_cs;};
|
||||
CUSTOMREAL* get_residual_weight_cr() {return residual_weight_cr;};
|
||||
CUSTOMREAL* get_azimuthal_weight_cr() {return azimuthal_weight_cr;};
|
||||
|
||||
|
||||
CUSTOMREAL* get_residual_weight_abs_reloc() {return residual_weight_abs_reloc;};
|
||||
CUSTOMREAL* get_distance_weight_abs_reloc() {return distance_weight_abs_reloc;};
|
||||
CUSTOMREAL* get_residual_weight_cs_reloc() {return residual_weight_cs_reloc;};
|
||||
CUSTOMREAL* get_azimuthal_weight_cs_reloc() {return azimuthal_weight_cs_reloc;};
|
||||
CUSTOMREAL* get_residual_weight_cr_reloc() {return residual_weight_cr_reloc;};
|
||||
CUSTOMREAL* get_azimuthal_weight_cr_reloc() {return azimuthal_weight_cr_reloc;};
|
||||
|
||||
// get if the T field is written into the file
|
||||
bool get_is_T_written_into_file(const std::string&);
|
||||
|
||||
// prepare source list for this simulation group
|
||||
void prepare_src_map();
|
||||
|
||||
// (relocation) modify (swapped source) receiver's location and time
|
||||
void modify_swapped_source_location();
|
||||
|
||||
// write out src_rec_file
|
||||
void write_src_rec_file(int,int);
|
||||
|
||||
// write out station_correction_file
|
||||
void write_station_correction_file(int);
|
||||
|
||||
// station correction
|
||||
void station_correction_update( CUSTOMREAL );
|
||||
|
||||
int n_src_this_sim_group = 0; // number of sources in this simultaneous group
|
||||
int n_src_comm_rec_this_sim_group = 0; // number of sources with common receiver in this simultaneous group
|
||||
int n_src_2d_this_sim_group = 0; // number of sources assigned for 2d solver in this simultaneous group
|
||||
int n_rec_this_sim_group = 0; // number of receivers in this simultaneous group
|
||||
|
||||
std::map<std::string, SrcRecInfo> src_map_all; // map of all sources (full information is only stored by the main process)
|
||||
std::map<std::string, SrcRecInfo> src_map; // map of sources belonging to this simultaneous group
|
||||
std::map<std::string, SrcRecInfo> src_map_comm_rec; // map of sources with common receiver
|
||||
std::map<std::string, SrcRecInfo> src_map_2d; // map of sources assigned for 2d solver
|
||||
std::map<std::string, SrcRecInfo> src_map_tele; // source list for teleseismic
|
||||
|
||||
std::map<std::string, SrcRecInfo> rec_map_all; // map of all receivers (full information is only stored by the main process)
|
||||
std::map<std::string, SrcRecInfo> rec_map; // map of receivers belonging to this simultaneous group
|
||||
std::map<std::string, SrcRecInfo> rec_map_tele; // rec list for teleseismic
|
||||
|
||||
// datainfo-vector maps <src_name, rec_name>
|
||||
std::map< std::string, std::map<std::string, std::vector<DataInfo>>> data_map_all; // data list for all data (full information is only stored by the main process)
|
||||
std::map< std::string, std::map<std::string, std::vector<DataInfo>>> data_map; // data list for this simultaneous group
|
||||
std::map< std::string, std::map<std::string, std::vector<DataInfo>>> data_map_tele; // data list for teleseismic
|
||||
|
||||
std::vector<std::string> name_for_reloc; // name list of receivers (swarpped sources) for location
|
||||
|
||||
// src id <-> src name relations
|
||||
std::vector<std::string> src_id2name; // name list of sources belonging to this simultaneous group
|
||||
std::vector<std::string> rec_id2name; // name list of receivers belongig to this simultaneous group
|
||||
std::vector<std::string> src_id2name_comm_rec; // name list of sources with common receiver
|
||||
std::vector<std::string> src_id2name_2d; // name list of sources assigned for 2d solver.
|
||||
std::vector<std::string> src_id2name_all; // name list of all sources (store the order of sources in src_rec file)
|
||||
std::vector<std::string> src_id2name_back; // back up of name list of all sources (this will not be swapped)
|
||||
std::vector<std::vector<std::vector<std::string>>> rec_id2name_back; // back up of the name list of all receivers for each source (this will not be swapped)
|
||||
|
||||
// backups used when outputing the data
|
||||
std::map<std::string, SrcRecInfo> src_map_back;
|
||||
std::map<std::string, SrcRecInfo> rec_map_back;
|
||||
std::map<std::string, std::map<std::string, std::vector<DataInfo>>> data_map_back;
|
||||
|
||||
// the number of data
|
||||
int N_abs_local_data = 0;
|
||||
int N_cr_dif_local_data = 0;
|
||||
int N_cs_dif_local_data = 0;
|
||||
int N_teleseismic_data = 0;
|
||||
int N_data = 0;
|
||||
// the number of the types of data
|
||||
int N_data_type = 0;
|
||||
std::map<std::string, int> data_type; // element: "abs", "cs_dif", "cr_dif", "tele"
|
||||
|
||||
// initialize_adjoint_source
|
||||
void initialize_adjoint_source();
|
||||
// set adjoint source && adjoint source density
|
||||
void set_adjoint_source(std::string, CUSTOMREAL);
|
||||
void set_adjoint_source_density(std::string, CUSTOMREAL);
|
||||
|
||||
// gather traveltimes and calculate differences of synthetic data
|
||||
void gather_traveltimes_and_calc_syn_diff();
|
||||
|
||||
// reduce necessary data in rec_map, which has differed elements in each sim group.
|
||||
template <typename T>
|
||||
void allreduce_rec_map_var(T&);
|
||||
|
||||
void allreduce_rec_map_vobj_src_reloc();
|
||||
void allreduce_rec_map_grad_src();
|
||||
|
||||
private:
|
||||
// boundary information
|
||||
CUSTOMREAL min_dep; // minimum depth in km
|
||||
CUSTOMREAL max_dep; // maximum depth in km
|
||||
CUSTOMREAL min_lat; // minimum latitude in degree
|
||||
CUSTOMREAL max_lat; // maximum latitude in degree
|
||||
CUSTOMREAL min_lon; // minimum longitude in degree
|
||||
CUSTOMREAL max_lon; // maximum longitude in degree
|
||||
|
||||
// source information
|
||||
CUSTOMREAL src_dep; // source depth in km
|
||||
CUSTOMREAL src_lat; // source latitude in degrees
|
||||
CUSTOMREAL src_lon; // source longitude in degrees
|
||||
std::string src_rec_file; // source receiver file
|
||||
std::string src_rec_file_out; // source receiver file to be output
|
||||
std::string sta_correction_file; // station correction file to be input
|
||||
std::string station_correction_file_out; // station correction file to be output
|
||||
bool src_rec_file_exist = false; // source receiver file exist
|
||||
bool sta_correction_file_exist = false; // station correction file exist
|
||||
bool swap_src_rec = false; // whether the src/rec are swapped or not
|
||||
|
||||
// model input files
|
||||
std::string init_model_path; // model file path init
|
||||
std::string model_1d_name; // name of 1d model for teleseismic tomography
|
||||
|
||||
// inversion
|
||||
int run_mode=0; // do inversion or not (0: no, 1: yes)
|
||||
int n_inversion_grid=1; // number of inversion grid
|
||||
|
||||
// inversion grid setting
|
||||
static const int n_trapezoid = 3;
|
||||
CUSTOMREAL trapezoid[n_trapezoid] = {1.0, 0.0, 50.0};
|
||||
CUSTOMREAL trapezoid_ani[n_trapezoid] = {1.0, 0.0, 50.0};
|
||||
|
||||
|
||||
// uniform or flexible inversion grid (0: uniform, 1: flexible, 2: trapezoid)
|
||||
// int type_invgrid_dep=0;
|
||||
// int type_invgrid_lat=0;
|
||||
// int type_invgrid_lon=0;
|
||||
// uniform or flexible anisotropic inversion grid (0: uniform, 1: flexible, 2: trapezoid)
|
||||
// int type_invgrid_dep_ani=0;
|
||||
// int type_invgrid_lat_ani=0;
|
||||
// int type_invgrid_lon_ani=0;
|
||||
|
||||
//
|
||||
// variables for type = 0: uniform inversion grid
|
||||
//
|
||||
|
||||
bool uniform_inv_grid_dep = false; // automatically generate inversion grid for dep or not
|
||||
bool uniform_inv_grid_lat = false; // automatically generate inversion grid for lat or not
|
||||
bool uniform_inv_grid_lon = false; // automatically generate inversion grid for lon or not
|
||||
// number of uniform inversion grid nodes in r, t, p direction
|
||||
int n_inv_r=0;
|
||||
int n_inv_t=0;
|
||||
int n_inv_p=0;
|
||||
// number of uniform anisotropic inversion grid nodes in r, t, p direction
|
||||
int n_inv_r_ani=0;
|
||||
int n_inv_t_ani=0;
|
||||
int n_inv_p_ani=0;
|
||||
|
||||
// type = 0: uniform inversion grid
|
||||
// inversion grid
|
||||
CUSTOMREAL min_dep_inv=-99999; // minimum depth in km
|
||||
CUSTOMREAL max_dep_inv=-99999; // maximum depth in km
|
||||
CUSTOMREAL min_lat_inv=-99999; // minimum latitude
|
||||
CUSTOMREAL max_lat_inv=-99999; // maximum latitude
|
||||
CUSTOMREAL min_lon_inv=-99999; // minimum longitude
|
||||
CUSTOMREAL max_lon_inv=-99999; // maximum longitude
|
||||
|
||||
// inversion grid for anisotropic parameters
|
||||
CUSTOMREAL min_dep_inv_ani=-99999; // minimum depth in km
|
||||
CUSTOMREAL max_dep_inv_ani=-99999; // maximum depth in km
|
||||
CUSTOMREAL min_lat_inv_ani=-99999; // minimum latitude
|
||||
CUSTOMREAL max_lat_inv_ani=-99999; // maximum latitude
|
||||
CUSTOMREAL min_lon_inv_ani=-99999; // minimum longitude
|
||||
CUSTOMREAL max_lon_inv_ani=-99999; // maximum longitude
|
||||
void setup_uniform_inv_grid();
|
||||
|
||||
//
|
||||
// variables fo type = 1: flexible inversion grid
|
||||
//
|
||||
|
||||
CUSTOMREAL *dep_inv = nullptr; // array for storing inversion grid points in depth
|
||||
CUSTOMREAL *lat_inv = nullptr; // array for storing inversion grid points in latitude
|
||||
CUSTOMREAL *lon_inv = nullptr; // array for storing inversion grid points in longitude
|
||||
CUSTOMREAL *dep_inv_ani = nullptr; // array for storing inversion grid points in depth for anisotropy
|
||||
CUSTOMREAL *lat_inv_ani = nullptr; // array for storing inversion grid points in latitude for anisotropy
|
||||
CUSTOMREAL *lon_inv_ani = nullptr; // array for storing inversion grid points in longitude for anisotropy
|
||||
|
||||
// number of flexibly designed inversion grid in r, t, p direction
|
||||
int n_inv_r_flex=0;
|
||||
int n_inv_t_flex=0;
|
||||
int n_inv_p_flex=0;
|
||||
// number of flexibly designed inversion grid in r, t, p direction
|
||||
int n_inv_r_flex_ani=0;
|
||||
int n_inv_t_flex_ani=0;
|
||||
int n_inv_p_flex_ani=0;
|
||||
|
||||
// if true, use defined inversion grid for anisotropy. Otherwise, use the same inversion grid of velocity for anisotropy
|
||||
bool invgrid_ani = false;
|
||||
|
||||
// inversion grid volume rescale (kernel -> kernel / volume of inversion grid mesh)
|
||||
// bool invgrid_volume_rescale = false;
|
||||
|
||||
// date usage setting and weights
|
||||
bool use_abs = false; // use absolute travel time or not
|
||||
bool use_cs = false; // use common source double difference or not
|
||||
bool use_cr = false; // use common receiver double difference or not
|
||||
static const int n_weight = 4;
|
||||
CUSTOMREAL residual_weight_abs[n_weight] = { 1.0, 3.0, 1.0, 1.0};
|
||||
CUSTOMREAL residual_weight_cs[n_weight] = { 1.0, 3.0, 1.0, 1.0};
|
||||
CUSTOMREAL residual_weight_cr[n_weight] = { 1.0, 3.0, 1.0, 1.0};
|
||||
CUSTOMREAL distance_weight_abs[n_weight] = { 100.0, 200.0, 1.0, 1.0};
|
||||
CUSTOMREAL azimuthal_weight_cs[n_weight] = { 15.0, 30.0, 1.0, 1.0};
|
||||
CUSTOMREAL azimuthal_weight_cr[n_weight] = { 15.0, 30.0, 1.0, 1.0};
|
||||
|
||||
// for relocation
|
||||
bool use_abs_reloc = false; // use absolute travel time or not
|
||||
bool use_cs_reloc = false; // use common source double difference or not
|
||||
bool use_cr_reloc = false; // use common source double difference or not
|
||||
CUSTOMREAL residual_weight_abs_reloc[n_weight] = { 1.0, 3.0, 1.0, 1.0};
|
||||
CUSTOMREAL distance_weight_abs_reloc[n_weight] = { 1.0, 3.0, 1.0, 1.0};
|
||||
CUSTOMREAL residual_weight_cs_reloc[n_weight] = { 1.0, 3.0, 1.0, 1.0};
|
||||
CUSTOMREAL azimuthal_weight_cs_reloc[n_weight] = { 100.0, 200.0, 1.0, 1.0};
|
||||
CUSTOMREAL residual_weight_cr_reloc[n_weight] = { 15.0, 30.0, 1.0, 1.0};
|
||||
CUSTOMREAL azimuthal_weight_cr_reloc[n_weight] = { 15.0, 30.0, 1.0, 1.0};
|
||||
|
||||
// convergence setting
|
||||
CUSTOMREAL conv_tol = 1e-05; // convergence tolerance
|
||||
int max_iter = 500; // maximum number of iterations
|
||||
int max_iter_inv=1; // maximum number of iterations for inversion
|
||||
|
||||
// calculation method
|
||||
int stencil_order = 1; // stencil order
|
||||
int stencil_type = 1; // stencil type (0: non upwind; 1: upwind)
|
||||
int sweep_type = 0; // sweep type (0: legacy, 1: cuthil-mckee with shm parallelization)
|
||||
|
||||
// gather all arrival times to a main process
|
||||
void gather_all_arrival_times_to_main();
|
||||
// gather rec info to main process
|
||||
void gather_rec_info_to_main();
|
||||
|
||||
// generate a map of sources which include common receiver double difference data
|
||||
void generate_src_map_with_common_receiver(std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
|
||||
std::map<std::string, SrcRecInfo>&,
|
||||
std::vector<std::string>&);
|
||||
|
||||
bool i_first=false, i_last=false, \
|
||||
j_first=false, j_last=false, \
|
||||
k_first=false; // store info if this subdomain has outer boundary
|
||||
|
||||
// check contradictions in input parameters
|
||||
void check_contradictions();
|
||||
|
||||
// output setting
|
||||
bool output_source_field = false; // output out_data_sim_X.h or not.
|
||||
bool output_kernel = false; // output kernel or not.
|
||||
bool output_final_model = true; // output merged final model or not.
|
||||
bool output_middle_model = false; // output merged model at each inv iteration or not.
|
||||
bool output_in_process = true; // True: output at each inv iteration, otherwise, only output step 0, Niter-1, Niter. Default: true. File: "out_data_sim_group_0". Keys: "Kdensity_update_inv_XXXX", "vel_inv_0000", "xi_inv_0000", "eta_inv_0000"
|
||||
bool output_in_process_data = true; // output src_rec_file at each inv iteration or not.
|
||||
int verbose_output_level = 0; // output verbose information or not.
|
||||
|
||||
// inversion setting
|
||||
bool update_slowness = true; // update slowness (velocity) or not.
|
||||
bool update_azi_ani = false; // update azimuthal anisotropy (xi, eta) or not.
|
||||
bool update_rad_ani = false; // update radial anisotropy (in future) or not.
|
||||
|
||||
// have have_tele_data or not?
|
||||
bool have_tele_data = false; // default is false. Error will occur if teleseismic data is include but false.
|
||||
|
||||
// ignore velocity discontinuity
|
||||
bool ignore_velocity_discontinuity = false; // default is false. Error will occur if model velocity has discontinuity ( v[i+1] > v[i] * 1.2 or v[i+1] < v[i] * 0.8).
|
||||
|
||||
CUSTOMREAL depth_taper[2] = {-9999999, -9999998}; // kernel weight: 0: -inf ~ taper[0]; 0 ~ 1 : taper[0] ~ taper[1]; 1 : taper[1] ~ inf
|
||||
bool use_sta_correction = false; // apply station correction or not.
|
||||
|
||||
// single precision (float) output mode
|
||||
bool single_precision_output = false;
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
//
|
||||
// utils
|
||||
//
|
||||
inline DataInfo& get_data_src_rec(std::vector<DataInfo>& v){
|
||||
// return the first element in the vector with is_rec_pair = true
|
||||
for (auto it = v.begin(); it != v.end(); it++){
|
||||
if (it->is_src_rec)
|
||||
return *it;
|
||||
}
|
||||
|
||||
// error if no rec pair is found
|
||||
std::cerr << "Error: no src/rec is found in get_data_src_rec" << std::endl;
|
||||
exit(1);
|
||||
|
||||
// return the first element in the vector as a dummy
|
||||
return v[0];
|
||||
}
|
||||
|
||||
|
||||
inline DataInfo& get_data_rec_pair(std::map<std::string, std::map<std::string, std::vector<DataInfo>>>& v,
|
||||
const std::string& name_src,
|
||||
const std::string& name_rec1,
|
||||
const std::string& name_rec2){
|
||||
// return the first element in the vector with is_rec_pair = true
|
||||
auto& map1 = v[name_src][name_rec1];
|
||||
auto& map2 = v[name_src][name_rec2];
|
||||
|
||||
for (auto it = map1.begin(); it != map1.end(); it++){
|
||||
if (it->is_rec_pair && it->name_rec_pair[0] == name_rec1 && it->name_rec_pair[1] == name_rec2)
|
||||
return *it;
|
||||
}
|
||||
|
||||
for (auto it = map2.begin(); it != map2.end(); it++){
|
||||
if (it->is_rec_pair && it->name_rec_pair[0] == name_rec1 && it->name_rec_pair[1] == name_rec2)
|
||||
return *it;
|
||||
}
|
||||
|
||||
// error if no rec pair is found
|
||||
std::cerr << "Error: no rec pair is found in get_data_rec_pair" << std::endl;
|
||||
exit(1);
|
||||
|
||||
// return the first element in the vector as a dummy
|
||||
return v[name_src][name_rec1][0];
|
||||
}
|
||||
|
||||
|
||||
inline DataInfo& get_data_src_pair(std::map<std::string, std::map<std::string, std::vector<DataInfo>>>& v,
|
||||
const std::string& name_src1,
|
||||
const std::string& name_src2,
|
||||
const std::string& name_rec){
|
||||
|
||||
// return the first element in the vector with is_rec_pair = true
|
||||
auto& map1 = v[name_src1][name_rec];
|
||||
auto& map2 = v[name_src2][name_rec];
|
||||
|
||||
for (auto it = map1.begin(); it != map1.end(); it++){
|
||||
if (it->is_src_pair && it->name_src_pair[0] == name_src1 && it->name_src_pair[1] == name_src2)
|
||||
return *it;
|
||||
}
|
||||
|
||||
for (auto it = map2.begin(); it != map2.end(); it++){
|
||||
if (it->is_src_pair && it->name_src_pair[0] == name_src2 && it->name_src_pair[1] == name_src1)
|
||||
return *it;
|
||||
}
|
||||
|
||||
// error if no src pair is found
|
||||
std::cerr << "Error: no src pair is found in get_data_src_pair" << std::endl;
|
||||
exit(1);
|
||||
|
||||
// return the first element in the vector as a dummy
|
||||
return v[name_src1][name_rec][0];
|
||||
}
|
||||
|
||||
|
||||
inline void set_cr_dif_to_src_pair(std::map<std::string, std::map< std::string, std::vector<DataInfo>>>& v,
|
||||
std::string& name_src1,
|
||||
std::string& name_src2,
|
||||
std::string& name_rec,
|
||||
CUSTOMREAL& cr_dif){
|
||||
// return the first element in the vector with is_rec_pair = true
|
||||
|
||||
std::vector<DataInfo>& vdata = v[name_src1][name_rec];
|
||||
|
||||
for (auto it = vdata.begin(); it != vdata.end(); it++){
|
||||
if (it->is_src_pair
|
||||
&& ( (it->name_src_pair[0] == name_src1 && it->name_src_pair[1] == name_src2)
|
||||
|| (it->name_src_pair[0] == name_src2 && it->name_src_pair[1] == name_src1) )) {
|
||||
it->cr_dif_travel_time = cr_dif;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline bool get_if_any_src_pair(std::vector<DataInfo>& v){
|
||||
// return the first element in the vector with is_rec_pair = true
|
||||
for (auto it = v.begin(); it != v.end(); it++){
|
||||
if (it->is_src_pair)
|
||||
return true;
|
||||
}
|
||||
|
||||
// or maybe this will be enough
|
||||
//return v.size() > 1;
|
||||
|
||||
// return false if no rec pair is found
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
75
include/inv_grid.h
Normal file
75
include/inv_grid.h
Normal file
@@ -0,0 +1,75 @@
|
||||
#ifndef INV_GRID_H
|
||||
#define INV_GRID_H
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "config.h"
|
||||
#include "input_params.h"
|
||||
|
||||
|
||||
// Base class for 1D inversion grid ()
|
||||
class InvGrid1dBase {
|
||||
public:
|
||||
InvGrid1dBase(){};
|
||||
virtual ~InvGrid1dBase(){
|
||||
delete[] arr;
|
||||
};
|
||||
|
||||
// copy constructor
|
||||
InvGrid1dBase(const InvGrid1dBase& other){
|
||||
n = other.n;
|
||||
dinv = other.dinv;
|
||||
dinv_l = other.dinv_l;
|
||||
arr = new CUSTOMREAL[n];
|
||||
std::copy(other.arr, other.arr+n, arr);
|
||||
}
|
||||
|
||||
// assignment operator
|
||||
InvGrid1dBase& operator=(const InvGrid1dBase& other){
|
||||
if (this != &other){
|
||||
n = other.n;
|
||||
dinv = other.dinv;
|
||||
dinv_l = other.dinv_l;
|
||||
arr = new CUSTOMREAL[n];
|
||||
std::copy(other.arr, other.arr+n, arr);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
CUSTOMREAL* arr = nullptr; // 1d or 2d array storing the grid coordinates
|
||||
int n; // number of grid points
|
||||
CUSTOMREAL dinv; // grid spacing
|
||||
CUSTOMREAL dinv_l; // amount of shift for each inversion grid
|
||||
};
|
||||
|
||||
// Derived class for 1D inversion grid
|
||||
class InvGrid1d : public InvGrid1dBase {
|
||||
public:
|
||||
InvGrid1d(InputParams&, const int, const CUSTOMREAL*); // for r grid
|
||||
InvGrid1d(InputParams&, const int, const CUSTOMREAL*, const CUSTOMREAL*, const int, const CUSTOMREAL*); // function overload for t and p grids
|
||||
~InvGrid1d() override {};
|
||||
};
|
||||
|
||||
|
||||
// Base class for 3D inversion grid
|
||||
class InvGrid {
|
||||
public:
|
||||
InvGrid(InputParams&);
|
||||
~InvGrid();
|
||||
|
||||
void write_inversion_grid_to_file();
|
||||
|
||||
InvGrid1dBase r;
|
||||
InvGrid1dBase t;
|
||||
InvGrid1dBase p;
|
||||
InvGrid1dBase r_ani;
|
||||
InvGrid1dBase t_ani;
|
||||
InvGrid1dBase p_ani;
|
||||
|
||||
private:
|
||||
void get_inv_grid_params(InputParams&);
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif // INV_GRID_H
|
||||
2
include/io.fwd.h
Normal file
2
include/io.fwd.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#pragma once
|
||||
class IO_utils;
|
||||
284
include/io.h
Normal file
284
include/io.h
Normal file
@@ -0,0 +1,284 @@
|
||||
#ifndef IO_H
|
||||
#define IO_H
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#if USE_HDF5
|
||||
#include "hdf5.h"
|
||||
#endif
|
||||
#include "tinyxml2.h"
|
||||
|
||||
// to handle circular dependency
|
||||
#pragma once
|
||||
#include "grid.fwd.h"
|
||||
//#include "source.fwd.h"
|
||||
#include "io.fwd.h"
|
||||
|
||||
#include "utils.h"
|
||||
#include "mpi_funcs.h"
|
||||
#include "grid.h"
|
||||
#include "config.h"
|
||||
#include "input_params.h"
|
||||
#include "eikonal_solver_2d.h"
|
||||
|
||||
|
||||
class IO_utils {
|
||||
|
||||
public:
|
||||
IO_utils(InputParams&);
|
||||
~IO_utils();
|
||||
|
||||
// initialize data output file
|
||||
void init_data_output_file();
|
||||
// finalize data/grid outpiut file (xdmf)
|
||||
void finalize_data_output_file();
|
||||
// change the output file name and xdmf objects
|
||||
void change_xdmf_obj(int);
|
||||
// output for updating xdmf file
|
||||
void update_xdmf_file();
|
||||
|
||||
// change group name for source
|
||||
void reset_source_info(const int&, const std::string&);
|
||||
// change group name for model
|
||||
void change_group_name_for_model();
|
||||
// prepare grid for each inversion iteration
|
||||
void prepare_grid_inv_xdmf(int);
|
||||
|
||||
// set id_sim
|
||||
void set_id_src(const int& id_src_){id_sim_src = id_src_;};
|
||||
// set source name
|
||||
void set_name_src(const std::string& name_src_){name_sim_src = name_src_;};
|
||||
|
||||
|
||||
//
|
||||
// write functions
|
||||
//
|
||||
|
||||
// xdmf write out for grid data
|
||||
void write_xdmf_file_grid();
|
||||
|
||||
// write grid data to output file
|
||||
void write_grid(Grid&);
|
||||
// general function for write out data in hdf5
|
||||
void write_data_h5(Grid&, std::string&, std::string&, CUSTOMREAL*, int, bool, bool for_tmp_db=false);
|
||||
// general function for write out data in hdf5 with merging subdomains
|
||||
void write_data_merged_h5(Grid&, std::string&, std::string&, std::string&, CUSTOMREAL*, bool, bool no_group=true);
|
||||
// general function for write out data in ascii
|
||||
void write_data_ascii(Grid&, std::string&, CUSTOMREAL*);
|
||||
// general function for write out data in ascii with merging subdomains
|
||||
void write_data_merged_ascii(Grid&, std::string&);
|
||||
// write true solution
|
||||
void write_true_solution(Grid&);
|
||||
// write velocity model
|
||||
void write_vel(Grid&, int);
|
||||
// write T0v
|
||||
void write_T0v(Grid&, int);
|
||||
// write u
|
||||
void write_u(Grid&);
|
||||
// write tau
|
||||
void write_tau(Grid&, int);
|
||||
// write temporal tau fields
|
||||
//void write_tmp_tau(Grid&, int);
|
||||
// write result timetable T
|
||||
void write_T(Grid&, int);
|
||||
// write temporal timetable T in a separated file
|
||||
void write_T_tmp(Grid&);
|
||||
// write residual (resudual = true_solution - result)
|
||||
void write_residual(Grid&);
|
||||
// write adjoint field
|
||||
void write_adjoint_field(Grid&, int);
|
||||
// write fun_loc
|
||||
void write_fun(Grid&, int);
|
||||
// write xi
|
||||
void write_xi(Grid&, int);
|
||||
// write eta
|
||||
void write_eta(Grid&, int);
|
||||
// write a
|
||||
void write_a(Grid&, int);
|
||||
// write b
|
||||
void write_b(Grid&, int);
|
||||
// write c
|
||||
void write_c(Grid&, int);
|
||||
// write f
|
||||
void write_f(Grid&, int);
|
||||
// Ks
|
||||
void write_Ks(Grid&, int);
|
||||
// Kxi
|
||||
void write_Kxi(Grid&, int);
|
||||
// Keta
|
||||
void write_Keta(Grid&, int);
|
||||
// Ks_density
|
||||
void write_Ks_density(Grid&, int);
|
||||
// Kxi_density
|
||||
void write_Kxi_density(Grid&, int);
|
||||
// Keta_density
|
||||
void write_Keta_density(Grid&, int);
|
||||
// Ks_over_Kden
|
||||
void write_Ks_over_Kden(Grid&, int);
|
||||
// Kxi_over_Kden
|
||||
void write_Kxi_over_Kden(Grid&, int);
|
||||
// Keta_over_Kden
|
||||
void write_Keta_over_Kden(Grid&, int);
|
||||
// Ks_update
|
||||
void write_Ks_update(Grid&, int);
|
||||
// Kxi_update
|
||||
void write_Kxi_update(Grid&, int);
|
||||
// Keta_update
|
||||
void write_Keta_update(Grid&, int);
|
||||
// Ks_density_update
|
||||
void write_Ks_density_update(Grid&, int);
|
||||
// Kxi_density_update
|
||||
void write_Kxi_density_update(Grid&, int);
|
||||
// Keta_density_update
|
||||
void write_Keta_density_update(Grid&, int);
|
||||
// Ks_descent_dir_loc
|
||||
void write_Ks_descent_dir(Grid&, int);
|
||||
// Kxi_descent_dir_loc
|
||||
void write_Kxi_descent_dir(Grid&, int);
|
||||
// Keta_descent_dir_loc
|
||||
void write_Keta_descent_dir(Grid&, int);
|
||||
|
||||
// write all concerning parameters
|
||||
std::vector<CUSTOMREAL> get_grid_data(CUSTOMREAL * data);
|
||||
// void write_concerning_parameters(Grid&, int, InputParams&);
|
||||
|
||||
// 2d traveltime field for teleseismic source
|
||||
void write_2d_travel_time_field(CUSTOMREAL*, CUSTOMREAL*, CUSTOMREAL*, int, int, CUSTOMREAL);
|
||||
void h5_create_and_write_dataset_2d(std::string&, int, int*, int, CUSTOMREAL*);
|
||||
void read_2d_travel_time_field(std::string&, CUSTOMREAL*, int, int);
|
||||
|
||||
// 2d field for 1dinv
|
||||
void write_1dinv_field(CUSTOMREAL*, CUSTOMREAL*, CUSTOMREAL*, int, int, std::string&);
|
||||
|
||||
// merged model
|
||||
void write_T_merged(Grid&, InputParams&, int);
|
||||
void write_merged_model(Grid&, InputParams&, std::string);
|
||||
bool node_of_this_subdomain(int*, const int&, const int&, const int&);
|
||||
|
||||
//
|
||||
// read functions
|
||||
//
|
||||
|
||||
// read model data
|
||||
void read_model(std::string&, const char*, CUSTOMREAL*, int, int, int);
|
||||
// read Travel time from file for earthquake relocation and common receiver double-difference
|
||||
void read_T(Grid&);
|
||||
// read Travel time from a temporal file for earthquake relocation and common receiver double-difference
|
||||
void read_T_tmp(Grid&);
|
||||
|
||||
void read_data_ascii(Grid&, std::string&);
|
||||
|
||||
//
|
||||
// delete functions
|
||||
//
|
||||
// Any deletion of data should be done on a created hdf5. Because the design of hdf5 is that
|
||||
// it is not possible to delete data from an existing hdf5 file. The only way to do so is to
|
||||
// create a new hdf5 file and copy the necessary data from the old one to the new one.
|
||||
// Therefore, the deletion of data is not implemented in this class.
|
||||
// Reference : https://stackoverflow.com/questions/1124994/removing-data-from-a-hdf5-file
|
||||
|
||||
private:
|
||||
// member variables
|
||||
std::string h5_output_grid_fname = "out_data_grid.h5"; // output file name
|
||||
std::string h5_output_fname = "out_data.h5"; // output file name
|
||||
std::string h5_output_fname_tmp = "out_data_tmp.h5"; // output file name
|
||||
std::string xdmf_output_fname = "out_data.xmf"; // output file name
|
||||
std::string h5_group_name_grid = "Mesh"; // group name for grid data
|
||||
std::string h5_group_name_event = "Event"; // group name for event data
|
||||
std::string h5_group_name_data = "Data"; // group name for event data
|
||||
|
||||
std::string h5_dset_name_node_coords_x = "node_coords_x"; // dataset name for node coordinates
|
||||
std::string h5_dset_name_node_coords_y = "node_coords_y"; // dataset name for node coordinates
|
||||
std::string h5_dset_name_node_coords_z = "node_coords_z"; // dataset name for node coordinates
|
||||
std::string h5_dset_name_node_coords_p = "node_coords_p"; // dataset name for node coordinates
|
||||
std::string h5_dset_name_node_coords_t = "node_coords_t"; // dataset name for node coordinates
|
||||
std::string h5_dset_name_node_coords_r = "node_coords_r"; // dataset name for node coordinates
|
||||
std::string h5_dset_name_elem_conn = "elem_conn"; // dataset name for element connectivity
|
||||
std::string h5_dset_name_procid = "procid"; // dataset name for my_procs
|
||||
|
||||
std::string h5_file_and_group = h5_output_grid_fname + ":/" + h5_group_name_grid;
|
||||
std::string h5_whole_name_conn = h5_file_and_group + "/" + h5_dset_name_elem_conn;
|
||||
std::string h5_whole_name_x = h5_file_and_group + "/" + h5_dset_name_node_coords_x;
|
||||
std::string h5_whole_name_y = h5_file_and_group + "/" + h5_dset_name_node_coords_y;
|
||||
std::string h5_whole_name_z = h5_file_and_group + "/" + h5_dset_name_node_coords_z;
|
||||
std::string h5_whole_name_proc = h5_file_and_group + "/" + h5_dset_name_procid;
|
||||
|
||||
// index of current simulation used for naming output files and datasets
|
||||
int id_sim_src;
|
||||
// name of current simulation used for naming output files and datasets
|
||||
std::string name_sim_src;
|
||||
|
||||
int nnodes_glob = 0;
|
||||
int nelms_glob = 0;
|
||||
|
||||
int array_rank_default = 2; // default array rank
|
||||
|
||||
int custom_real_flag; // custom real type
|
||||
|
||||
// custom data type for real
|
||||
std::string type_name = "Float";
|
||||
std::string type_size = "4";
|
||||
|
||||
// helper for xdmf/xml file io
|
||||
tinyxml2::XMLDocument* doc;
|
||||
tinyxml2::XMLNode* xdmf, *domain, *grid;
|
||||
tinyxml2::XMLNode* inversions, *inv_grid;
|
||||
void init_xdmf_file();
|
||||
void write_xdmf_file();
|
||||
void finalize_xdmf_file();
|
||||
void insert_data_xdmf(std::string&, std::string&, std::string&);
|
||||
|
||||
#ifdef USE_HDF5
|
||||
// h5 variables
|
||||
hid_t file_id;
|
||||
hid_t group_id;
|
||||
hid_t space_id;
|
||||
hid_t dset_id;
|
||||
hid_t plist_id, plist_id_dset;
|
||||
hid_t file_dspace_id, mem_dspace_id;
|
||||
herr_t status;
|
||||
hid_t plist_id_2d;
|
||||
hid_t file_id_2d;
|
||||
|
||||
// h5 low level utilities
|
||||
void h5_create_file_by_group_main(std::string&);
|
||||
void h5_open_file_by_group_main(std::string&);
|
||||
void h5_open_file_collective(std::string&);
|
||||
void h5_open_file_collective_input(std::string&);
|
||||
void h5_close_file_by_group_main();
|
||||
void h5_close_file_collective();
|
||||
|
||||
void h5_create_group_by_group_main(std::string& );
|
||||
void h5_open_group_by_group_main(std::string& );
|
||||
void h5_open_group_collective(std::string& );
|
||||
void h5_close_group_by_group_main();
|
||||
void h5_close_group_collective();
|
||||
|
||||
void h5_create_dataset(std::string&, int, int*, int, bool no_group = false);
|
||||
void h5_open_dataset(std::string&);
|
||||
void h5_open_dataset_no_group(std::string&);
|
||||
void h5_close_dataset();
|
||||
|
||||
template <typename T>
|
||||
void h5_write_array(std::string&, int, int*, T*, int offset_one, int offset_two=0, int offset_three=0, bool no_group=false);
|
||||
|
||||
template <typename T>
|
||||
void h5_read_array(std::string&, int, int*, T*, int*, bool);
|
||||
|
||||
template <typename T>
|
||||
void h5_read_array_simple(std::string&, T*);
|
||||
|
||||
void read_data_h5(Grid&, CUSTOMREAL*, std::string, std::string);
|
||||
#endif // HDF_HDF5
|
||||
|
||||
// utilities
|
||||
std::string create_fname_ascii(std::string&);
|
||||
std::string create_fname_ascii_model(std::string&);
|
||||
|
||||
// flag for data type
|
||||
const bool model_data = true;
|
||||
const bool src_data = false;
|
||||
|
||||
};
|
||||
|
||||
#endif // IO_H
|
||||
161
include/iterator.h
Normal file
161
include/iterator.h
Normal file
@@ -0,0 +1,161 @@
|
||||
#ifndef ITERATOR_H
|
||||
#define ITERATOR_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <initializer_list>
|
||||
#include <vector>
|
||||
#include <math.h>
|
||||
#include "grid.h"
|
||||
#include "input_params.h"
|
||||
#include "utils.h"
|
||||
#include "source.h"
|
||||
#include "io.h"
|
||||
#include "timer.h"
|
||||
#include "eikonal_solver_2d.h"
|
||||
|
||||
#ifdef USE_CUDA
|
||||
#include "grid_wrapper.cuh"
|
||||
#include "iterator_wrapper.cuh"
|
||||
#endif
|
||||
|
||||
//#ifdef USE_SIMD
|
||||
#include "simd_conf.h"
|
||||
//#endif
|
||||
|
||||
class Iterator {
|
||||
public:
|
||||
Iterator(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
virtual ~Iterator();
|
||||
// regional source
|
||||
void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&); // run forward iteration till convergence
|
||||
void run_iteration_adjoint(InputParams&, Grid&, IO_utils&, int); // run adjoint iteration till convergence
|
||||
|
||||
void initialize_arrays(InputParams&, IO_utils&, Grid&, Source&, const std::string&); // initialize factors etc.
|
||||
|
||||
protected:
|
||||
void assign_processes_for_levels(Grid&, InputParams&); // assign intra-node processes for each sweeping level
|
||||
void set_sweep_direction(int); // set sweep direction
|
||||
// regional source
|
||||
virtual void do_sweep(int, Grid&, InputParams&){}; // do sweeping with ordinal method
|
||||
void calculate_stencil_1st_order(Grid&, int&, int&, int&); // calculate stencil for 1st order
|
||||
void calculate_stencil_3rd_order(Grid&, int&, int&, int&); // calculate stencil for 3rd order
|
||||
void calculate_stencil_1st_order_upwind(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form
|
||||
void calculate_boundary_nodes(Grid&); // calculate boundary values
|
||||
// teleseismic source
|
||||
void calculate_stencil_1st_order_tele(Grid&, int&, int&, int&); // calculate stencil for 1st order
|
||||
void calculate_stencil_3rd_order_tele(Grid&, int&, int&, int&); // calculate stencil for 3rd order
|
||||
void calculate_stencil_1st_order_upwind_tele(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form
|
||||
void calculate_boundary_nodes_tele(Grid&, int&, int&, int&); // calculate boundary values for teleseismic source
|
||||
|
||||
void calculate_boundary_nodes_adj(Grid&, int&, int&, int&); // calculate boundary values for adjoint source (all zeros)
|
||||
|
||||
// Hamiltonian calculation
|
||||
inline CUSTOMREAL calc_LF_Hamiltonian(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \
|
||||
CUSTOMREAL& ,CUSTOMREAL& , \
|
||||
CUSTOMREAL& ,CUSTOMREAL&, \
|
||||
int&, int&, int& );
|
||||
inline CUSTOMREAL calc_LF_Hamiltonian_tele(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \
|
||||
CUSTOMREAL& ,CUSTOMREAL& , \
|
||||
CUSTOMREAL& ,CUSTOMREAL&, \
|
||||
int&, int&, int& );
|
||||
|
||||
// methods for adjoint field calculation
|
||||
void init_delta_and_Tadj(Grid&, InputParams&); // initialize delta and Tadj
|
||||
void init_delta_and_Tadj_density(Grid&, InputParams&); // initialize delta and Tadj_density
|
||||
void fix_boundary_Tadj(Grid&); // fix boundary values for Tadj
|
||||
virtual void do_sweep_adj(int, Grid&, InputParams&){}; // do sweeping with ordinal method for adjoint field
|
||||
void calculate_stencil_adj(Grid&, int&, int&, int&); // calculate stencil for 1st order for adjoint field
|
||||
|
||||
// grid point information
|
||||
int* _nr, *_nt, *_np; // number of grid points on the direction r, theta, phi
|
||||
CUSTOMREAL* _dr, *_dt, *_dp; // grid spacing on the direction r, theta, phi
|
||||
int nr, nt, np; // number of grid points on the direction r, theta, phi
|
||||
CUSTOMREAL dr, dt, dp; // grid spacing on the direction r, theta, phi
|
||||
int st_level; // start level for sweeping
|
||||
int ed_level; // end level for sweeping
|
||||
std::vector< std::vector<int> > ijk_for_this_subproc; // ijk=I2V(i,j,k) for this process (level, ijk)
|
||||
int max_n_nodes_plane; // maximum number of nodes on a plane
|
||||
|
||||
|
||||
#if defined USE_SIMD || defined USE_CUDA
|
||||
// stencil dumps
|
||||
// first orders
|
||||
CUSTOMREAL *dump_c__;// center of C
|
||||
// all grid data expect tau pre-load strategy (iswap, ilevel, inodes)
|
||||
#if USE_AVX512 || USE_AVX || USE_NEON || defined USE_CUDA
|
||||
std::vector<std::vector<int*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;
|
||||
std::vector<std::vector<int*>> vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;
|
||||
#elif USE_ARM_SVE
|
||||
std::vector<std::vector<uint64_t*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;
|
||||
std::vector<std::vector<uint64_t*>> vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;
|
||||
#endif
|
||||
std::vector<std::vector<CUSTOMREAL*>> vv_iip, vv_jjt, vv_kkr;
|
||||
|
||||
std::vector<std::vector<CUSTOMREAL*>> vv_fac_a, vv_fac_b, vv_fac_c, vv_fac_f, vv_T0v, vv_T0r, vv_T0t, vv_T0p, vv_fun;
|
||||
std::vector<std::vector<CUSTOMREAL*>>vv_change;
|
||||
std::vector<std::vector<bool*>>vv_change_bl;
|
||||
|
||||
template <typename T>
|
||||
void preload_indices(std::vector<std::vector<T*>> &vi, std::vector<std::vector<T*>> &, std::vector<std::vector<T*>> &, int, int, int);
|
||||
template <typename T>
|
||||
void preload_indices_1d(std::vector<std::vector<T*>> &, int, int, int);
|
||||
template <typename T>
|
||||
std::vector<std::vector<CUSTOMREAL*>> preload_array(T* a);
|
||||
std::vector<std::vector<bool*>> preload_array_bl(bool* a);
|
||||
template <typename T>
|
||||
void free_preloaded_array(std::vector<std::vector<T*>> &vvv){
|
||||
for (int iswap = 0; iswap < 8; iswap++){
|
||||
for (auto& vv : vvv.at(iswap)) free(vv);
|
||||
}
|
||||
}
|
||||
// flag for deallocation
|
||||
bool simd_allocated = false;
|
||||
bool simd_allocated_3rd = false;
|
||||
|
||||
#endif // USE_SIMD || USE_CUDA
|
||||
|
||||
#ifdef USE_CUDA
|
||||
Grid_on_device *gpu_grid;
|
||||
#endif
|
||||
|
||||
|
||||
const int nswp = 8; // number of sweeping directions
|
||||
int r_dirc, t_dirc, p_dirc; // sweeping directions
|
||||
CUSTOMREAL sigr, sigt, sigp; //
|
||||
CUSTOMREAL coe;
|
||||
CUSTOMREAL wp1, pp1, wp2, pp2;
|
||||
CUSTOMREAL wt1, pt1, wt2, pt2;
|
||||
CUSTOMREAL wr1, pr1, wr2, pr2;
|
||||
CUSTOMREAL Htau, tpT;
|
||||
|
||||
CUSTOMREAL ap1, bp1, ap2, bp2, ap, bp;
|
||||
CUSTOMREAL at1, bt1, at2, bt2, at, bt;
|
||||
CUSTOMREAL ar1, br1, ar2, br2, ar, br;
|
||||
|
||||
CUSTOMREAL bc_f2, eqn_a, eqn_b, eqn_c, eqn_Delta;
|
||||
CUSTOMREAL tmp_tau, tmp_T;
|
||||
CUSTOMREAL T_r, T_t, T_p, charact_r, charact_t, charact_p;
|
||||
bool is_causality;
|
||||
int count_cand;
|
||||
std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);
|
||||
|
||||
|
||||
// iteration control
|
||||
int iter_count = 0;
|
||||
CUSTOMREAL ini_diff_L1 = HUGE_VAL, ini_diff_Linf = HUGE_VAL;
|
||||
CUSTOMREAL ini_err_L1 = HUGE_VAL, ini_err_Linf = HUGE_VAL;
|
||||
CUSTOMREAL cur_diff_L1 = HUGE_VAL, cur_diff_Linf = HUGE_VAL;
|
||||
CUSTOMREAL cur_err_L1 = HUGE_VAL, cur_err_Linf = HUGE_VAL;
|
||||
|
||||
// teleseismic flag
|
||||
bool is_teleseismic = false;
|
||||
|
||||
// second run for hybrid order method
|
||||
bool is_second_run = false;
|
||||
|
||||
};
|
||||
|
||||
// define derived classes for each iteration scheme
|
||||
|
||||
|
||||
#endif // ITERATOR_H
|
||||
82
include/iterator_legacy.h
Normal file
82
include/iterator_legacy.h
Normal file
@@ -0,0 +1,82 @@
|
||||
#ifndef ITERATOR_legacy_H
|
||||
#define ITERATOR_legacy_H
|
||||
|
||||
#include "iterator.h"
|
||||
|
||||
|
||||
class Iterator_legacy : public Iterator {
|
||||
public:
|
||||
Iterator_legacy(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
protected:
|
||||
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
|
||||
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
|
||||
};
|
||||
|
||||
|
||||
class Iterator_legacy_tele : public Iterator {
|
||||
public:
|
||||
Iterator_legacy_tele(InputParams& , Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
protected:
|
||||
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
|
||||
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
|
||||
};
|
||||
|
||||
|
||||
class Iterator_legacy_1st_order : public Iterator_legacy {
|
||||
|
||||
public:
|
||||
Iterator_legacy_1st_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
|
||||
};
|
||||
|
||||
class Iterator_legacy_3rd_order : public Iterator_legacy {
|
||||
|
||||
public:
|
||||
Iterator_legacy_3rd_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
|
||||
};
|
||||
|
||||
class Iterator_legacy_1st_order_upwind : public Iterator_legacy {
|
||||
|
||||
public:
|
||||
Iterator_legacy_1st_order_upwind(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_legacy_1st_order_tele : public Iterator_legacy_tele {
|
||||
|
||||
public:
|
||||
Iterator_legacy_1st_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
|
||||
};
|
||||
|
||||
class Iterator_legacy_3rd_order_tele : public Iterator_legacy_tele {
|
||||
|
||||
public:
|
||||
Iterator_legacy_3rd_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
|
||||
};
|
||||
|
||||
class Iterator_legacy_1st_order_upwind_tele : public Iterator_legacy_tele {
|
||||
|
||||
public:
|
||||
Iterator_legacy_1st_order_upwind_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
//void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
|
||||
};
|
||||
|
||||
#endif // ITERATOR_LEGACY_H
|
||||
65
include/iterator_level.h
Normal file
65
include/iterator_level.h
Normal file
@@ -0,0 +1,65 @@
|
||||
#ifndef ITERATOR_LEVEL_H
|
||||
#define ITERATOR_LEVEL_H
|
||||
|
||||
#include "iterator.h"
|
||||
|
||||
|
||||
class Iterator_level : public Iterator {
|
||||
public:
|
||||
Iterator_level(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
protected:
|
||||
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
|
||||
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_level_tele : public Iterator {
|
||||
public:
|
||||
Iterator_level_tele(InputParams& , Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
protected:
|
||||
void do_sweep_adj(int, Grid&, InputParams&) override ; // do sweeping for adjoint routine
|
||||
virtual void do_sweep(int, Grid&, InputParams&) {}; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_level_1st_order : public Iterator_level {
|
||||
public:
|
||||
Iterator_level_1st_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_level_3rd_order : public Iterator_level {
|
||||
public:
|
||||
Iterator_level_3rd_order(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_level_1st_order_upwind : public Iterator_level {
|
||||
public:
|
||||
Iterator_level_1st_order_upwind(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_level_1st_order_tele : public Iterator_level_tele {
|
||||
public:
|
||||
Iterator_level_1st_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_level_3rd_order_tele : public Iterator_level_tele {
|
||||
public:
|
||||
Iterator_level_3rd_order_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
};
|
||||
|
||||
class Iterator_level_1st_order_upwind_tele : public Iterator_level_tele {
|
||||
public:
|
||||
Iterator_level_1st_order_upwind_tele(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
|
||||
private:
|
||||
void do_sweep(int, Grid&, InputParams&) override ; // do sweeping
|
||||
};
|
||||
|
||||
#endif // ITERATOR_LEVEL_H
|
||||
98
include/iterator_selector.h
Normal file
98
include/iterator_selector.h
Normal file
@@ -0,0 +1,98 @@
|
||||
#ifndef ITERATOR_SELECTOR_H
|
||||
#define ITERATOR_SELECTOR_H
|
||||
|
||||
#include <memory>
|
||||
#include "iterator.h"
|
||||
#include "iterator_legacy.h"
|
||||
#include "iterator_level.h"
|
||||
|
||||
|
||||
void select_iterator(InputParams& IP, Grid& grid, Source& src, IO_utils& io, const std::string& src_name, \
|
||||
bool first_init, bool is_teleseismic, std::unique_ptr<Iterator>& It, bool is_second_run) {
|
||||
// initialize iterator object
|
||||
|
||||
if (!is_teleseismic){
|
||||
if (IP.get_sweep_type() == SWEEP_TYPE_LEGACY) {
|
||||
if (IP.get_stencil_order() == 1){
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
It = std::make_unique<Iterator_legacy_1st_order_upwind>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
} else {
|
||||
It = std::make_unique<Iterator_legacy_1st_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
}
|
||||
} else if (IP.get_stencil_order() == 3){
|
||||
It = std::make_unique<Iterator_legacy_3rd_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
std::cout << "WARNING: Upwind Stencil type not supported, using 3rd order non upwind scheme (LF)" << std::endl;
|
||||
}
|
||||
} else{
|
||||
std::cout << "ERROR: Stencil order not supported" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else if (IP.get_sweep_type() == SWEEP_TYPE_LEVEL){
|
||||
if (IP.get_stencil_order() == 1){
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
// std::cout << "WARNING: Upwind Stencil type not supported, using non upwind scheme (LF)" << std::endl;
|
||||
It = std::make_unique<Iterator_level_1st_order_upwind>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
} else {
|
||||
It = std::make_unique<Iterator_level_1st_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
}
|
||||
} else if (IP.get_stencil_order() == 3){
|
||||
It = std::make_unique<Iterator_level_3rd_order>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
std::cout << "WARNING: Upwind Stencil type not supported, using 3rd order non upwind scheme (LF)" << std::endl;
|
||||
}
|
||||
} else{
|
||||
std::cout << "ERROR: Stencil order not supported" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
std::cout << "ERROR: Sweep type not supported" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else { // teleseismic event
|
||||
if (IP.get_sweep_type() == SWEEP_TYPE_LEGACY) {
|
||||
if (IP.get_stencil_order() == 1){
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
It = std::make_unique<Iterator_legacy_1st_order_upwind_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
} else {
|
||||
It = std::make_unique<Iterator_legacy_1st_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
}
|
||||
} else if (IP.get_stencil_order() == 3){
|
||||
It = std::make_unique<Iterator_legacy_3rd_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
std::cout << "WARNING: Upwind Stencil type not supported, using non upwind scheme (LF)" << std::endl;
|
||||
}
|
||||
} else{
|
||||
std::cout << "ERROR: Stencil order not supported" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else if (IP.get_sweep_type() == SWEEP_TYPE_LEVEL){
|
||||
if (IP.get_stencil_order() == 1){
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
It = std::make_unique<Iterator_level_1st_order_upwind_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
} else {
|
||||
It = std::make_unique<Iterator_level_1st_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
std::cout << "WARNING: SWEEP_TYPE_LEVEL for non-unwind solver does not work. Index wrong currently" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else if (IP.get_stencil_order() == 3){
|
||||
It = std::make_unique<Iterator_level_3rd_order_tele>(IP, grid, src, io, src_name, first_init, is_teleseismic, is_second_run);
|
||||
std::cout << "WARNING: SWEEP_TYPE_LEVEL for non-unwind solver does not work. Index wrong currently" << std::endl;
|
||||
exit(1);
|
||||
if (IP.get_stencil_type() == UPWIND){
|
||||
std::cout << "WARNING: Upwind Stencil type not supported, using non upwind scheme (LF)" << std::endl;
|
||||
}
|
||||
} else{
|
||||
std::cout << "ERROR: Stencil order not supported" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
std::cout << "ERROR: Sweep type not supported" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
226
include/kernel.h
Normal file
226
include/kernel.h
Normal file
@@ -0,0 +1,226 @@
|
||||
#ifndef KERNEL_H
|
||||
#define KERNEL_H
|
||||
|
||||
#include "config.h"
|
||||
#include "grid.h"
|
||||
#include "input_params.h"
|
||||
|
||||
void calculate_sensitivity_kernel(Grid& grid, InputParams& IP, const std::string& name_sim_src){
|
||||
// calculate sensitivity kernel
|
||||
|
||||
// kernel calculation will be done only by the subdom_main
|
||||
if (subdom_main) {
|
||||
// get the necessary parameters
|
||||
int np = loc_I;
|
||||
int nt = loc_J;
|
||||
int nr = loc_K;
|
||||
CUSTOMREAL dr = grid.dr;
|
||||
CUSTOMREAL dt = grid.dt;
|
||||
CUSTOMREAL dp = grid.dp;
|
||||
CUSTOMREAL src_lon = IP.get_src_lon( name_sim_src);
|
||||
CUSTOMREAL src_lat = IP.get_src_lat( name_sim_src);
|
||||
CUSTOMREAL src_r = IP.get_src_radius(name_sim_src);
|
||||
|
||||
CUSTOMREAL weight = _1_CR;
|
||||
|
||||
// inner points
|
||||
for (int kkr = 1; kkr < nr-1; kkr++) {
|
||||
for (int jjt = 1; jjt < nt-1; jjt++) {
|
||||
for (int iip = 1; iip < np-1; iip++) {
|
||||
|
||||
// calculate the kernel
|
||||
CUSTOMREAL Tr_km = (grid.T_loc[I2V(iip,jjt,kkr+1)] - grid.T_loc[I2V(iip,jjt,kkr-1)]) / (_2_CR * dr);
|
||||
CUSTOMREAL Ttheta_km = (grid.T_loc[I2V(iip,jjt+1,kkr)] - grid.T_loc[I2V(iip,jjt-1,kkr)]) / (_2_CR * dt) / grid.r_loc_1d[kkr];
|
||||
CUSTOMREAL Tphi_km = (grid.T_loc[I2V(iip+1,jjt,kkr)] - grid.T_loc[I2V(iip-1,jjt,kkr)]) / (_2_CR * dp) / (grid.r_loc_1d[kkr]*std::cos(grid.t_loc_1d[jjt]));
|
||||
|
||||
CUSTOMREAL azi_ratio = std::sqrt((my_square(Ttheta_km) + my_square(Tphi_km))/(my_square(Tr_km) + my_square(Ttheta_km) + my_square(Tphi_km)));
|
||||
|
||||
// mask within one grid around the source
|
||||
if ((std::abs(grid.r_loc_1d[kkr] - src_r) >= dr) ||
|
||||
(std::abs(grid.t_loc_1d[jjt] - src_lat) >= dt) ||
|
||||
(std::abs(grid.p_loc_1d[iip] - src_lon) >= dp)) {
|
||||
// density of ks
|
||||
grid.Ks_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)];
|
||||
|
||||
// density of kxi
|
||||
// grid.Kxi_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)];
|
||||
grid.Kxi_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)] * azi_ratio;
|
||||
|
||||
// density of keta
|
||||
// grid.Keta_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)];
|
||||
grid.Keta_density_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_density_loc[I2V(iip,jjt,kkr)] * azi_ratio;
|
||||
|
||||
|
||||
if (IP.get_update_slowness()==1){ // we need to update slowness
|
||||
// Kernel w r t slowness s
|
||||
grid.Ks_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] * my_square(grid.fun_loc[I2V(iip,jjt,kkr)]);
|
||||
} else {
|
||||
grid.Ks_loc[I2V(iip,jjt,kkr)] = _0_CR;
|
||||
}
|
||||
|
||||
|
||||
if (IP.get_update_azi_ani()){ // we need to update azimuthal anisotropy
|
||||
// Kernel w r t anisotrophy xi
|
||||
if (isZero(std::sqrt(my_square(grid.xi_loc[I2V(iip,jjt,kkr)])+my_square(grid.eta_loc[I2V(iip,jjt,kkr)])))) {
|
||||
grid.Kxi_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
|
||||
* (my_square(Ttheta_km) - my_square(Tphi_km));
|
||||
|
||||
grid.Keta_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
|
||||
* ( -_2_CR * Ttheta_km * Tphi_km );
|
||||
|
||||
} else {
|
||||
grid.Kxi_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
|
||||
* ((- GAMMA * grid.xi_loc[I2V(iip,jjt,kkr)] / \
|
||||
std::sqrt(my_square(grid.xi_loc[ I2V(iip,jjt,kkr)]) + my_square(grid.eta_loc[I2V(iip,jjt,kkr)]))) * my_square(Tr_km) \
|
||||
+ my_square(Ttheta_km)
|
||||
- my_square(Tphi_km));
|
||||
|
||||
grid.Keta_loc[I2V(iip,jjt,kkr)] += weight * grid.Tadj_loc[I2V(iip,jjt,kkr)] \
|
||||
* (( - GAMMA * grid.eta_loc[I2V(iip,jjt,kkr)]/ \
|
||||
std::sqrt(my_square(grid.xi_loc[I2V(iip,jjt,kkr)]) + my_square(grid.eta_loc[I2V(iip,jjt,kkr)]))) * my_square(Tr_km) \
|
||||
- _2_CR * Ttheta_km * Tphi_km );
|
||||
}
|
||||
} else {
|
||||
grid.Kxi_loc[I2V(iip,jjt,kkr)] = _0_CR;
|
||||
grid.Keta_loc[I2V(iip,jjt,kkr)] = _0_CR;
|
||||
}
|
||||
|
||||
} else{
|
||||
grid.Ks_loc[I2V(iip,jjt,kkr)] += _0_CR;
|
||||
grid.Kxi_loc[I2V(iip,jjt,kkr)] += _0_CR;
|
||||
grid.Keta_loc[I2V(iip,jjt,kkr)] += _0_CR;
|
||||
|
||||
grid.Ks_density_loc[I2V(iip,jjt,kkr)] += _0_CR;
|
||||
grid.Kxi_density_loc[I2V(iip,jjt,kkr)] += _0_CR;
|
||||
grid.Keta_density_loc[I2V(iip,jjt,kkr)] += _0_CR;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// boundary
|
||||
for (int kkr = 0; kkr < nr; kkr++) {
|
||||
for (int jjt = 0; jjt < nt; jjt++) {
|
||||
// set Ks Kxi Keta to zero
|
||||
if (grid.i_first()){
|
||||
grid.Ks_loc[I2V(0,jjt,kkr)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(0,jjt,kkr)] = _0_CR;
|
||||
grid.Keta_loc[I2V(0,jjt,kkr)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(0,jjt,kkr)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(0,jjt,kkr)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(0,jjt,kkr)] = _0_CR;
|
||||
}
|
||||
if (grid.i_last()){
|
||||
grid.Ks_loc[I2V(np-1,jjt,kkr)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(np-1,jjt,kkr)] = _0_CR;
|
||||
grid.Keta_loc[I2V(np-1,jjt,kkr)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(np-1,jjt,kkr)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(np-1,jjt,kkr)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(np-1,jjt,kkr)]= _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int kkr = 0; kkr < nr; kkr++) {
|
||||
for (int iip = 0; iip < np; iip++) {
|
||||
// set Ks Kxi Keta to zero
|
||||
if (grid.j_first()){
|
||||
grid.Ks_loc[I2V(iip,0,kkr)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(iip,0,kkr)] = _0_CR;
|
||||
grid.Keta_loc[I2V(iip,0,kkr)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(iip,0,kkr)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(iip,0,kkr)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(iip,0,kkr)] = _0_CR;
|
||||
}
|
||||
if (grid.j_last()){
|
||||
grid.Ks_loc[I2V(iip,nt-1,kkr)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(iip,nt-1,kkr)] = _0_CR;
|
||||
grid.Keta_loc[I2V(iip,nt-1,kkr)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(iip,nt-1,kkr)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(iip,nt-1,kkr)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(iip,nt-1,kkr)]= _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int jjt = 0; jjt < nt; jjt++) {
|
||||
for (int iip = 0; iip < np; iip++) {
|
||||
// set Ks Kxi Keta to zero
|
||||
if (grid.k_first()){
|
||||
grid.Ks_loc[I2V(iip,jjt,0)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(iip,jjt,0)] = _0_CR;
|
||||
grid.Keta_loc[I2V(iip,jjt,0)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(iip,jjt,0)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(iip,jjt,0)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(iip,jjt,0)] = _0_CR;
|
||||
}
|
||||
if (grid.k_last()){
|
||||
grid.Ks_loc[I2V(iip,jjt,nr-1)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(iip,jjt,nr-1)] = _0_CR;
|
||||
grid.Keta_loc[I2V(iip,jjt,nr-1)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(iip,jjt,nr-1)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(iip,jjt,nr-1)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(iip,jjt,nr-1)]= _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end if subdom_main
|
||||
}
|
||||
|
||||
|
||||
void check_kernel_density(Grid& grid, InputParams& IP) {
|
||||
if(subdom_main){
|
||||
// check local kernel density positivity
|
||||
for (int i_loc = 0; i_loc < loc_I; i_loc++) {
|
||||
for (int j_loc = 0; j_loc < loc_J; j_loc++) {
|
||||
for (int k_loc = 0; k_loc < loc_K; k_loc++) {
|
||||
if (isNegative(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])){
|
||||
std::cout << "Warning, id_sim: " << id_sim << ", grid.Ks_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = "
|
||||
<< grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)]
|
||||
<< std::endl;
|
||||
// // print the source name
|
||||
// for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
// // get source info
|
||||
// const std::string name_sim_src = IP.get_src_name(i_src); // get_src_name must be run by all processes
|
||||
// std::cout << "id_sim: " << id_sim << ", "
|
||||
// << i_src + 1 << "/" << IP.n_src_this_sim_group << ", source name: " << name_sim_src << std::endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void sumup_kernels(Grid& grid) {
|
||||
if(subdom_main){
|
||||
int n_grids = loc_I*loc_J*loc_K;
|
||||
|
||||
allreduce_cr_sim_inplace(grid.Ks_loc, n_grids);
|
||||
allreduce_cr_sim_inplace(grid.Kxi_loc, n_grids);
|
||||
allreduce_cr_sim_inplace(grid.Keta_loc, n_grids);
|
||||
allreduce_cr_sim_inplace(grid.Ks_density_loc, n_grids);
|
||||
allreduce_cr_sim_inplace(grid.Kxi_density_loc, n_grids);
|
||||
allreduce_cr_sim_inplace(grid.Keta_density_loc, n_grids);
|
||||
|
||||
// share the values on boundary
|
||||
grid.send_recev_boundary_data(grid.Ks_loc);
|
||||
grid.send_recev_boundary_data(grid.Kxi_loc);
|
||||
grid.send_recev_boundary_data(grid.Keta_loc);
|
||||
grid.send_recev_boundary_data(grid.Ks_density_loc);
|
||||
grid.send_recev_boundary_data(grid.Kxi_density_loc);
|
||||
grid.send_recev_boundary_data(grid.Keta_density_loc);
|
||||
|
||||
grid.send_recev_boundary_data_kosumi(grid.Ks_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Kxi_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Keta_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Ks_density_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Kxi_density_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Keta_density_loc);
|
||||
}
|
||||
|
||||
synchronize_all_world();
|
||||
}
|
||||
|
||||
#endif
|
||||
577
include/lbfgs.h
Normal file
577
include/lbfgs.h
Normal file
@@ -0,0 +1,577 @@
|
||||
#ifndef LBFGS_H
|
||||
#define LBFGS_H
|
||||
|
||||
#include "config.h"
|
||||
#include "utils.h"
|
||||
#include "grid.h"
|
||||
#include "mpi_funcs.h"
|
||||
|
||||
inline CUSTOMREAL volume_domain = _0_CR; // volume of the domain
|
||||
inline CUSTOMREAL weight_Tikonov = _0_CR; // weight of the regularization term
|
||||
inline CUSTOMREAL weight_Tikonov_ani = _0_CR; // weight of the regularization term for anisotropic regularization
|
||||
inline int N_params = 3; // number of parameters to invert
|
||||
inline CUSTOMREAL q_0 = _0_CR; // initial cost function
|
||||
inline CUSTOMREAL qp_0 = _0_CR; // store initial p_k * grad(f_k) (descent_direction * gradient)
|
||||
inline int damp_weight_fun = _1_CR; // damp weights for fun
|
||||
inline int damp_weight_eta = _1_CR; // damp weights for eta
|
||||
inline int damp_weight_xi = _1_CR; // damp weights for xi
|
||||
|
||||
|
||||
void store_model_and_gradient(Grid& grid, int i_inv) {
|
||||
|
||||
if (subdom_main && id_sim==0) {
|
||||
|
||||
if (i_inv < Mbfgs){
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
grid.Ks_model_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.fun_loc[I2V(i,j,k)];
|
||||
grid.Keta_model_store_loc[I2VLBFGS(i_inv,i,j,k)] = grid.eta_loc[I2V(i,j,k)];
|
||||
grid.Kxi_model_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.xi_loc[ I2V(i,j,k)];
|
||||
grid.Ks_grad_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.Ks_update_loc[ I2V(i,j,k)];
|
||||
grid.Keta_grad_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.Keta_update_loc[I2V(i,j,k)];
|
||||
grid.Kxi_grad_store_loc[ I2VLBFGS(i_inv,i,j,k)] = grid.Kxi_update_loc[ I2V(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// replace the current stored models and gradients
|
||||
for (int istore = 0; istore < Mbfgs-1; istore++){
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
grid.Ks_model_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Ks_model_store_loc[ I2VLBFGS(istore+1,i,j,k)];
|
||||
grid.Keta_model_store_loc[I2VLBFGS(istore,i,j,k)] = grid.Keta_model_store_loc[I2VLBFGS(istore+1,i,j,k)];
|
||||
grid.Kxi_model_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Kxi_model_store_loc[ I2VLBFGS(istore+1,i,j,k)];
|
||||
grid.Ks_grad_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(istore+1,i,j,k)];
|
||||
grid.Keta_grad_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Keta_grad_store_loc[ I2VLBFGS(istore+1,i,j,k)];
|
||||
grid.Kxi_grad_store_loc[ I2VLBFGS(istore,i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(istore+1,i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// store new model and gradient
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
grid.Ks_model_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.fun_loc[I2V(i,j,k)];
|
||||
grid.Keta_model_store_loc[I2VLBFGS(Mbfgs-1,i,j,k)] = grid.eta_loc[I2V(i,j,k)];
|
||||
grid.Kxi_model_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.xi_loc[ I2V(i,j,k)];
|
||||
grid.Ks_grad_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.Ks_update_loc[ I2V(i,j,k)];
|
||||
grid.Keta_grad_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.Keta_update_loc[I2V(i,j,k)];
|
||||
grid.Kxi_grad_store_loc[ I2VLBFGS(Mbfgs-1,i,j,k)] = grid.Kxi_update_loc[ I2V(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end subdom main and id_sim==0
|
||||
|
||||
}
|
||||
|
||||
|
||||
void calculate_descent_direction_lbfgs(Grid& grid, int i_inv) {
|
||||
|
||||
if (subdom_main && id_sim==0) {
|
||||
int imin = 0;
|
||||
int imax = 0;
|
||||
if (i_inv >= Mbfgs)
|
||||
imax = Mbfgs-2;
|
||||
else
|
||||
imax = i_inv-1;
|
||||
|
||||
CUSTOMREAL* ak_store = allocateMemory<CUSTOMREAL>(imax-imin+1, 2000);
|
||||
CUSTOMREAL* pk_store = allocateMemory<CUSTOMREAL>(imax-imin+1, 2001);
|
||||
|
||||
CUSTOMREAL* desc_wks_Ks = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2002);
|
||||
CUSTOMREAL* desc_wks_Keta = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2003);
|
||||
CUSTOMREAL* desc_wks_Kxi = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2004);
|
||||
|
||||
CUSTOMREAL* wks_1_Ks = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2005);
|
||||
CUSTOMREAL* wks_1_Keta = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2006);
|
||||
CUSTOMREAL* wks_1_Kxi = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2007);
|
||||
CUSTOMREAL* wks_2_Ks = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2008);
|
||||
CUSTOMREAL* wks_2_Keta = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2009);
|
||||
CUSTOMREAL* wks_2_Kxi = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 2010);
|
||||
|
||||
std::cout << "imax, imin: " << imax << ", " << imin << std::endl;
|
||||
|
||||
// initialize
|
||||
for (int i = 0; i < imax-imin+1; i++) {
|
||||
ak_store[i] = 0.0; // alpha
|
||||
pk_store[i] = 0.0; //rho
|
||||
}
|
||||
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
desc_wks_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)]; // d
|
||||
desc_wks_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(imax+1,i,j,k)]; // d
|
||||
desc_wks_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)]; // d
|
||||
wks_1_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)] - grid.Ks_grad_store_loc[ I2VLBFGS(imax,i,j,k)];
|
||||
wks_1_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(imax+1,i,j,k)] - grid.Keta_grad_store_loc[I2VLBFGS(imax,i,j,k)];
|
||||
wks_1_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(imax+1,i,j,k)] - grid.Kxi_grad_store_loc[ I2VLBFGS(imax,i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//calculate l2 norms
|
||||
CUSTOMREAL norm_yiter = 0.0;
|
||||
|
||||
norm_yiter += calc_l2norm(wks_1_Ks,loc_I*loc_J*loc_K);
|
||||
norm_yiter += calc_l2norm(wks_1_Keta,loc_I*loc_J*loc_K);
|
||||
norm_yiter += calc_l2norm(wks_1_Kxi,loc_I*loc_J*loc_K);
|
||||
|
||||
CUSTOMREAL tmp = norm_yiter;
|
||||
allreduce_cr_single(tmp, norm_yiter);
|
||||
|
||||
for (int iinv = imax; iinv >= imin; iinv--){
|
||||
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
wks_1_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_1_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_1_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
|
||||
wks_2_Ks[I2V(i,j,k)] = grid.Ks_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_model_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_2_Keta[I2V(i,j,k)] = grid.Keta_model_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_model_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_2_Kxi[I2V(i,j,k)] = grid.Kxi_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_model_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CUSTOMREAL pk = _0_CR;
|
||||
pk += dot_product(wks_1_Ks,wks_2_Ks,loc_I*loc_J*loc_K);
|
||||
pk += dot_product(wks_1_Keta,wks_2_Keta,loc_I*loc_J*loc_K);
|
||||
pk += dot_product(wks_1_Kxi,wks_2_Kxi,loc_I*loc_J*loc_K);
|
||||
CUSTOMREAL tmp = pk;
|
||||
allreduce_cr_single(tmp, pk);
|
||||
pk_store[iinv] = _1_CR / pk;
|
||||
|
||||
CUSTOMREAL ak = _0_CR;
|
||||
ak += dot_product(wks_2_Ks, desc_wks_Ks,loc_I*loc_J*loc_K);
|
||||
ak += dot_product(wks_2_Keta, desc_wks_Keta,loc_I*loc_J*loc_K);
|
||||
ak += dot_product(wks_2_Kxi, desc_wks_Kxi,loc_I*loc_J*loc_K);
|
||||
// print ak
|
||||
if(myrank== 0) std::cout << "ak: " << ak << std::endl;
|
||||
tmp = ak;
|
||||
allreduce_cr_single(tmp, ak);
|
||||
// print ak
|
||||
if(myrank== 0) std::cout << "ak gathered: " << ak << std::endl;
|
||||
ak_store[iinv] = pk_store[iinv] * ak;
|
||||
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
desc_wks_Ks[I2V(i,j,k)] -= ak_store[iinv] * wks_1_Ks[I2V(i,j,k)];
|
||||
desc_wks_Keta[I2V(i,j,k)] -= ak_store[iinv] * wks_1_Keta[I2V(i,j,k)];
|
||||
desc_wks_Kxi[I2V(i,j,k)] -= ak_store[iinv] * wks_1_Kxi[I2V(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end loop iinv
|
||||
|
||||
// Nocedal's default preconditionning
|
||||
CUSTOMREAL pk = _1_CR / (pk_store[imax] * norm_yiter);
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
desc_wks_Ks[I2V(i,j,k)] *= pk;
|
||||
desc_wks_Keta[I2V(i,j,k)] *= pk;
|
||||
desc_wks_Kxi[I2V(i,j,k)] *= pk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// print ak_store and pk_store
|
||||
if (myrank == 0) {
|
||||
std::cout << "ak_store: ";
|
||||
for (int i = 0; i < imax+1; i++) {
|
||||
std::cout << ak_store[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "pk_store: ";
|
||||
for (int i = 0; i < imax+1; i++) {
|
||||
std::cout << pk_store[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
// custom preconditionning
|
||||
// diagonal preconditionner
|
||||
|
||||
for (int iinv = imin; iinv <= imax; iinv++){
|
||||
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
wks_1_Ks[I2V(i,j,k)] = grid.Ks_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_1_Keta[I2V(i,j,k)] = grid.Keta_grad_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_1_Kxi[I2V(i,j,k)] = grid.Kxi_grad_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_grad_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
|
||||
wks_2_Ks[I2V(i,j,k)] = grid.Ks_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Ks_model_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_2_Keta[I2V(i,j,k)] = grid.Keta_model_store_loc[I2VLBFGS(iinv+1,i,j,k)] - grid.Keta_model_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
wks_2_Kxi[I2V(i,j,k)] = grid.Kxi_model_store_loc[ I2VLBFGS(iinv+1,i,j,k)] - grid.Kxi_model_store_loc[I2VLBFGS(iinv,i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CUSTOMREAL beta = _0_CR;
|
||||
beta += dot_product(wks_1_Ks,desc_wks_Ks,loc_I*loc_J*loc_K);
|
||||
beta += dot_product(wks_1_Keta,desc_wks_Keta,loc_I*loc_J*loc_K);
|
||||
beta += dot_product(wks_1_Kxi,desc_wks_Kxi,loc_I*loc_J*loc_K);
|
||||
CUSTOMREAL tmp = beta;
|
||||
allreduce_cr_single(tmp, beta);
|
||||
beta = pk_store[iinv] * beta;
|
||||
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
desc_wks_Ks[I2V(i,j,k)] += (ak_store[iinv]-beta) * wks_2_Ks[I2V(i,j,k)];
|
||||
desc_wks_Keta[I2V(i,j,k)] += (ak_store[iinv]-beta) * wks_2_Keta[I2V(i,j,k)];
|
||||
desc_wks_Kxi[I2V(i,j,k)] += (ak_store[iinv]-beta) * wks_2_Kxi[I2V(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// descent directions
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
grid.Ks_descent_dir_loc[I2V(i,j,k)] = - _1_CR * desc_wks_Ks[I2V(i,j,k)];
|
||||
grid.Keta_descent_dir_loc[I2V(i,j,k)] = - _1_CR * desc_wks_Keta[I2V(i,j,k)];
|
||||
grid.Kxi_descent_dir_loc[I2V(i,j,k)] = - _1_CR * desc_wks_Kxi[I2V(i,j,k)];
|
||||
//grid.Ks_descent_dir_loc[I2V(i,j,k)] = desc_wks_Ks[I2V(i,j,k)];
|
||||
//grid.Keta_descent_dir_loc[I2V(i,j,k)] = desc_wks_Keta[I2V(i,j,k)];
|
||||
//grid.Kxi_descent_dir_loc[I2V(i,j,k)] = desc_wks_Kxi[I2V(i,j,k)];
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
delete [] ak_store;
|
||||
delete [] pk_store;
|
||||
delete [] desc_wks_Ks;
|
||||
delete [] desc_wks_Keta;
|
||||
delete [] desc_wks_Kxi;
|
||||
delete [] wks_1_Ks;
|
||||
delete [] wks_1_Keta;
|
||||
delete [] wks_1_Kxi;
|
||||
delete [] wks_2_Ks;
|
||||
delete [] wks_2_Keta;
|
||||
delete [] wks_2_Kxi;
|
||||
|
||||
|
||||
} // end of subdom_main
|
||||
|
||||
// share with all simultaneous run
|
||||
if (subdom_main){
|
||||
broadcast_cr_inter_sim(grid.Ks_descent_dir_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Keta_descent_dir_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Kxi_descent_dir_loc, loc_I*loc_J*loc_K, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
// laplacian approximation in cartesian coordinates
|
||||
inline void calc_laplacian_field(Grid& grid, CUSTOMREAL* arr_in, CUSTOMREAL* arr_res) {
|
||||
if (subdom_main) {
|
||||
|
||||
//CUSTOMREAL lr = 1.0;
|
||||
//CUSTOMREAL lt = 1.0;
|
||||
//CUSTOMREAL lp = 1.0;
|
||||
|
||||
CUSTOMREAL lr = regul_lp;
|
||||
CUSTOMREAL lt = regul_lt;
|
||||
CUSTOMREAL lp = regul_lp;
|
||||
|
||||
// calculate L(m)
|
||||
for (int k = 1; k < loc_K-1; k++) {
|
||||
for (int j = 1; j < loc_J-1; j++) {
|
||||
for (int i = 1; i < loc_I-1; i++) {
|
||||
arr_res[I2V(i,j,k)] = \
|
||||
- _2_CR * (lr+lt+lp) * arr_in[I2V(i,j,k)] \
|
||||
+ lr * (arr_in[I2V(i,j,k+1)] + arr_in[I2V(i,j,k-1)]) \
|
||||
+ lt * (arr_in[I2V(i,j+1,k)] + arr_in[I2V(i,j-1,k)]) \
|
||||
+ lp * (arr_in[I2V(i+1,j,k)] + arr_in[I2V(i-1,j,k)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// communicate with adjacent subdomains
|
||||
grid.send_recev_boundary_data(arr_res);
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// laplacian approximation in spherical coordinates
|
||||
inline void calc_laplacian_field(Grid& grid, CUSTOMREAL* d, CUSTOMREAL* arr_out){
|
||||
|
||||
if (subdom_main) {
|
||||
|
||||
CUSTOMREAL lr = regul_lp;
|
||||
CUSTOMREAL lt = regul_lt;
|
||||
CUSTOMREAL lp = regul_lp;
|
||||
CUSTOMREAL dr = grid.dr;
|
||||
CUSTOMREAL dt = grid.dt;
|
||||
CUSTOMREAL dp = grid.dp;
|
||||
|
||||
CUSTOMREAL r,t;
|
||||
|
||||
// calculate L(m) in sphercal coordinates
|
||||
for (int k = 1; k < loc_K-1; k++) {
|
||||
for (int j = 1; j < loc_J-1; j++) {
|
||||
for (int i = 1; i < loc_I-1; i++) {
|
||||
r = grid.r_loc_1d[k];
|
||||
t = grid.t_loc_1d[j];
|
||||
|
||||
// finite difference approximation of laplacian spherical coordinates
|
||||
arr_out[I2V(i,j,k)] = \
|
||||
lr*lr*((d[I2V(i,j,k+1)] - _2_CR*d[I2V(i,j,k)] + d[I2V(i,j,k-1)])/(dr*dr) \
|
||||
+(_2_CR/r)*(d[I2V(i,j,k+1)]-d[I2V(i,j,k-1)])/dr) \
|
||||
+ lt*lt*((_1_CR/(r*r*std::sin(t)))*(std::cos(t)*(d[I2V(i,j+1,k)]-d[I2V(i,j-1,k)])/dt \
|
||||
- (std::sin(t)*(d[I2V(i,j+1,k)]-_2_CR*d[I2V(i,j,k)]+d[I2V(i,j-1,k)])/(dt*dt)))) \
|
||||
+ lp*lp*((_1_CR/(r*r*std::sin(t)*std::sin(t)))*(d[I2V(i+1,j,k)]-_2_CR*d[I2V(i,j,k)]+d[I2V(i-1,j,k)])/(dp*dp));
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// communicate with adjacent subdomains
|
||||
grid.send_recev_boundary_data(arr_out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// add 1/2 * L(m)^2 to the objective function
|
||||
inline CUSTOMREAL calculate_regularization_obj(Grid& grid) {
|
||||
|
||||
CUSTOMREAL regularization_term = _0_CR;
|
||||
|
||||
if (subdom_main && id_sim==0) {
|
||||
int ngrid = loc_I*loc_J*loc_K;
|
||||
|
||||
regularization_term += weight_Tikonov * calc_l2norm(grid.fun_regularization_penalty_loc, ngrid);
|
||||
regularization_term += weight_Tikonov_ani * calc_l2norm(grid.eta_regularization_penalty_loc, ngrid);
|
||||
regularization_term += weight_Tikonov_ani * calc_l2norm(grid.xi_regularization_penalty_loc, ngrid);
|
||||
|
||||
// gather from all subdomain
|
||||
CUSTOMREAL tmp = regularization_term;
|
||||
allreduce_cr_single(tmp, regularization_term);
|
||||
}
|
||||
|
||||
// share retularization_term with all simultaneous run (may be unnecessary)
|
||||
broadcast_cr_single_inter_sim(regularization_term, 0);
|
||||
|
||||
return _0_5_CR * regularization_term;
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline void init_regularization_penalty(Grid& grid) {
|
||||
if (subdom_main && id_sim==0) {
|
||||
int ngrid = loc_I*loc_J*loc_K;
|
||||
|
||||
// initialize regularization penalties
|
||||
std::fill(grid.fun_regularization_penalty_loc, grid.fun_regularization_penalty_loc + ngrid, _0_CR);
|
||||
std::fill(grid.eta_regularization_penalty_loc, grid.eta_regularization_penalty_loc + ngrid, _0_CR);
|
||||
std::fill(grid.xi_regularization_penalty_loc, grid.xi_regularization_penalty_loc + ngrid, _0_CR);
|
||||
std::fill(grid.fun_gradient_regularization_penalty_loc, grid.fun_gradient_regularization_penalty_loc + ngrid, _0_CR);
|
||||
std::fill(grid.eta_gradient_regularization_penalty_loc, grid.eta_gradient_regularization_penalty_loc + ngrid, _0_CR);
|
||||
std::fill(grid.xi_gradient_regularization_penalty_loc, grid.xi_gradient_regularization_penalty_loc + ngrid, _0_CR);
|
||||
}
|
||||
}
|
||||
|
||||
inline void init_regulaization_penalty_with_one(Grid& grid) {
|
||||
if (subdom_main && id_sim==0) {
|
||||
int ngrid = loc_I*loc_J*loc_K;
|
||||
|
||||
// initialize regularization penalties
|
||||
std::fill(grid.fun_regularization_penalty_loc, grid.fun_regularization_penalty_loc + ngrid, _1_CR);
|
||||
std::fill(grid.eta_regularization_penalty_loc, grid.eta_regularization_penalty_loc + ngrid, _1_CR);
|
||||
std::fill(grid.xi_regularization_penalty_loc, grid.xi_regularization_penalty_loc + ngrid, _1_CR);
|
||||
std::fill(grid.fun_gradient_regularization_penalty_loc, grid.fun_gradient_regularization_penalty_loc + ngrid, _1_CR);
|
||||
std::fill(grid.eta_gradient_regularization_penalty_loc, grid.eta_gradient_regularization_penalty_loc + ngrid, _1_CR);
|
||||
std::fill(grid.xi_gradient_regularization_penalty_loc, grid.xi_gradient_regularization_penalty_loc + ngrid, _1_CR);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void calculate_regularization_penalty(Grid& grid) {
|
||||
if (subdom_main && id_sim==0) {
|
||||
int n_grid = loc_I*loc_J*loc_K;
|
||||
|
||||
CUSTOMREAL* tmp_fun = allocateMemory<CUSTOMREAL>(n_grid, 2011);
|
||||
CUSTOMREAL* tmp_eta = allocateMemory<CUSTOMREAL>(n_grid, 2012);
|
||||
CUSTOMREAL* tmp_xi = allocateMemory<CUSTOMREAL>(n_grid, 2013);
|
||||
|
||||
for (int i = 0; i < n_grid; i++) {
|
||||
tmp_fun[i] = grid.fun_loc[i] - grid.fun_prior_loc[i];
|
||||
tmp_eta[i] = grid.eta_loc[i] - grid.eta_prior_loc[i];
|
||||
tmp_xi[i] = grid.xi_loc[i] - grid.xi_prior_loc[i];
|
||||
}
|
||||
|
||||
init_regularization_penalty(grid);
|
||||
|
||||
// calculate LL(m) on fun (Ks)
|
||||
calc_laplacian_field(grid, tmp_fun, grid.fun_regularization_penalty_loc);
|
||||
calc_laplacian_field(grid, grid.fun_regularization_penalty_loc, grid.fun_gradient_regularization_penalty_loc);
|
||||
|
||||
// calculate LL(m) on eta (Keta)
|
||||
calc_laplacian_field(grid, tmp_eta, grid.eta_regularization_penalty_loc);
|
||||
calc_laplacian_field(grid, grid.eta_regularization_penalty_loc, grid.eta_gradient_regularization_penalty_loc);
|
||||
|
||||
// calculate LL(m) on xi (Kxi)
|
||||
calc_laplacian_field(grid, tmp_xi, grid.xi_regularization_penalty_loc);
|
||||
calc_laplacian_field(grid, grid.xi_regularization_penalty_loc, grid.xi_gradient_regularization_penalty_loc);
|
||||
|
||||
for (int i = 0; i < n_grid; i++){
|
||||
// calculate gradient_regularization_penalty first for avoiding using overwrited value in regularization_penalty
|
||||
grid.fun_gradient_regularization_penalty_loc[i] = damp_weight_fun*damp_weight_fun*(tmp_fun[i] - _2_CR * grid.fun_regularization_penalty_loc[i] + grid.fun_gradient_regularization_penalty_loc[i]);
|
||||
grid.eta_gradient_regularization_penalty_loc[i] = damp_weight_eta*damp_weight_eta*(tmp_eta[i] - _2_CR * grid.eta_regularization_penalty_loc[i] + grid.eta_gradient_regularization_penalty_loc[i]);
|
||||
grid.xi_gradient_regularization_penalty_loc[i] = damp_weight_xi *damp_weight_xi *(tmp_xi[i] - _2_CR * grid.xi_regularization_penalty_loc[i] + grid.xi_gradient_regularization_penalty_loc[i]);
|
||||
}
|
||||
|
||||
grid.send_recev_boundary_data(grid.fun_gradient_regularization_penalty_loc);
|
||||
grid.send_recev_boundary_data(grid.eta_gradient_regularization_penalty_loc);
|
||||
grid.send_recev_boundary_data(grid.xi_gradient_regularization_penalty_loc);
|
||||
|
||||
for (int i = 0; i < n_grid; i++){
|
||||
// calculate gradient_regularization_penalty first for avoiding using overwrited value in regularization_penalty
|
||||
|
||||
grid.fun_regularization_penalty_loc[i] = damp_weight_fun*(tmp_fun[i] - grid.fun_regularization_penalty_loc[i]);
|
||||
grid.eta_regularization_penalty_loc[i] = damp_weight_eta*(tmp_eta[i] - grid.eta_regularization_penalty_loc[i]);
|
||||
grid.xi_regularization_penalty_loc[i] = damp_weight_xi *(tmp_xi[i] - grid.xi_regularization_penalty_loc[i]);
|
||||
}
|
||||
|
||||
grid.send_recev_boundary_data(grid.fun_regularization_penalty_loc);
|
||||
grid.send_recev_boundary_data(grid.eta_regularization_penalty_loc);
|
||||
grid.send_recev_boundary_data(grid.xi_regularization_penalty_loc);
|
||||
|
||||
delete [] tmp_fun;
|
||||
delete [] tmp_eta;
|
||||
delete [] tmp_xi;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// add grad(L(m)) to gradient
|
||||
inline void add_regularization_grad(Grid& grid) {
|
||||
if (subdom_main){
|
||||
if(id_sim==0){
|
||||
// add LL(m) to gradient
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
|
||||
//grid.fun_gradient_regularization_penalty_loc[I2V(i,j,k)] /= Linf_Ks;
|
||||
//grid.eta_gradient_regularization_penalty_loc[I2V(i,j,k)] /= Linf_Keta;
|
||||
//grid.xi_gradient_regularization_penalty_loc[I2V(i,j,k)] /= Linf_Kxi;
|
||||
|
||||
grid.Ks_update_loc[I2V(i,j,k)] += weight_Tikonov * grid.fun_gradient_regularization_penalty_loc[I2V(i,j,k)];
|
||||
grid.Keta_update_loc[I2V(i,j,k)] += weight_Tikonov_ani * grid.eta_gradient_regularization_penalty_loc[I2V(i,j,k)];
|
||||
grid.Kxi_update_loc[I2V(i,j,k)] += weight_Tikonov_ani * grid.xi_gradient_regularization_penalty_loc[I2V(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
grid.send_recev_boundary_data(grid.Ks_update_loc);
|
||||
grid.send_recev_boundary_data(grid.Keta_update_loc);
|
||||
grid.send_recev_boundary_data(grid.Kxi_update_loc);
|
||||
|
||||
}// end id_sim==0
|
||||
|
||||
|
||||
// share with all simultaneous run (may be unnecessary)
|
||||
broadcast_cr_inter_sim(grid.Ks_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Keta_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Kxi_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
|
||||
}// end subdom_main
|
||||
}
|
||||
|
||||
|
||||
// compute initial guess for step length to try for line search
|
||||
void initial_guess_step(Grid& grid, CUSTOMREAL& step_length, CUSTOMREAL SC_VAL) {
|
||||
// find the max value in descent_direction
|
||||
CUSTOMREAL max_val = _0_CR, max_val_all = _0_CR;
|
||||
const CUSTOMREAL max_relative_pert = 0.02;
|
||||
|
||||
if(subdom_main && id_sim==0) {
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
if (std::abs(grid.Ks_descent_dir_loc[I2V(i,j,k)] ) > max_val) max_val = std::abs(grid.Ks_descent_dir_loc[I2V(i,j,k)]);
|
||||
if (std::abs(grid.Keta_descent_dir_loc[I2V(i,j,k)]) > max_val) max_val = std::abs(grid.Keta_descent_dir_loc[I2V(i,j,k)]);
|
||||
if (std::abs(grid.Kxi_descent_dir_loc[I2V(i,j,k)] ) > max_val) max_val = std::abs(grid.Kxi_descent_dir_loc[I2V(i,j,k)]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reduce
|
||||
allreduce_cr_single_max(max_val, max_val_all);
|
||||
|
||||
// debug print max_val
|
||||
std::cout << "max_val: " << max_val_all << std::endl;
|
||||
|
||||
// step size
|
||||
step_length = max_relative_pert / SC_VAL / max_val_all;
|
||||
}
|
||||
|
||||
// broadcast to all simultaneous run (may be unnecessary)
|
||||
if (subdom_main)
|
||||
broadcast_cr_single_inter_sim(step_length, 0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline CUSTOMREAL compute_volume_domain(Grid& grid) {
|
||||
CUSTOMREAL volume = _0_CR;
|
||||
|
||||
if (subdom_main && id_sim==0) {
|
||||
volume += calc_l2norm(grid.fun_regularization_penalty_loc, loc_I*loc_J*loc_K);
|
||||
volume += calc_l2norm(grid.eta_regularization_penalty_loc, loc_I*loc_J*loc_K);
|
||||
volume += calc_l2norm(grid.xi_regularization_penalty_loc, loc_I*loc_J*loc_K);
|
||||
|
||||
// reduce
|
||||
allreduce_cr_single(volume, volume);
|
||||
}
|
||||
|
||||
// broadcast to all simultaneous run (may be unnecessary)
|
||||
broadcast_cr_single_inter_sim(volume, 0);
|
||||
|
||||
return volume;
|
||||
}
|
||||
|
||||
|
||||
inline void compute_damp_weights(const Grid& grid) {
|
||||
// (size/sum)**2
|
||||
|
||||
if(subdom_main){
|
||||
CUSTOMREAL sum_fun = std::accumulate(grid.fun_prior_loc, grid.fun_prior_loc + loc_I*loc_J*loc_K, _0_CR);
|
||||
//CUSTOMREAL sum_eta = std::accumulate(grid.eta_prior_loc, grid.eta_prior_loc + loc_I*loc_J*loc_K, _0_CR); // do not use
|
||||
//CUSTOMREAL sum_xi = std::accumulate(grid.xi_prior_loc, grid.xi_prior_loc + loc_I*loc_J*loc_K, _0_CR);
|
||||
|
||||
allreduce_cr_single(sum_fun, sum_fun);
|
||||
|
||||
damp_weight_fun = (volume_domain / sum_fun)*(volume_domain / sum_fun);
|
||||
//damp_weight_eta = (volume / sum_eta)*(volume / sum_eta);
|
||||
//damp_weight_xi = (volume / sum_xi)*(volume / sum_xi);
|
||||
damp_weight_eta = _1_CR;
|
||||
damp_weight_xi = _1_CR;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
923
include/main_routines_calling.h
Normal file
923
include/main_routines_calling.h
Normal file
@@ -0,0 +1,923 @@
|
||||
#ifndef MAIN_ROUTINES_CALLING_H
|
||||
#define MAIN_ROUTINES_CALLING_H
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "mpi_funcs.h"
|
||||
#include "config.h"
|
||||
#include "utils.h"
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
#include "io.h"
|
||||
#include "main_routines_inversion_mode.h"
|
||||
#include "model_optimization_routines.h"
|
||||
#include "main_routines_earthquake_relocation.h"
|
||||
#include "iterator_selector.h"
|
||||
#include "iterator.h"
|
||||
#include "iterator_legacy.h"
|
||||
#include "iterator_level.h"
|
||||
#include "source.h"
|
||||
#include "receiver.h"
|
||||
#include "kernel.h"
|
||||
#include "model_update.h"
|
||||
#include "lbfgs.h"
|
||||
#include "objective_function_utils.h"
|
||||
#include "timer.h"
|
||||
#include "oneD_inversion.h"
|
||||
|
||||
// run forward-only or inversion mode
|
||||
inline void run_forward_only_or_inversion(InputParams &IP, Grid &grid, IO_utils &io) {
|
||||
|
||||
// for check if the current source is the first source
|
||||
bool first_src = true;
|
||||
|
||||
if(myrank == 0)
|
||||
std::cout << "id_sim: " << id_sim << ", size of src_map: " << IP.src_map.size() << std::endl;
|
||||
|
||||
// estimate running time
|
||||
Timer timer("Forward_or_inversion", true);
|
||||
|
||||
// prepare objective_function file
|
||||
std::ofstream out_main; // close() is not mandatory
|
||||
prepare_header_line(IP, out_main);
|
||||
|
||||
if (id_sim==0) {
|
||||
io.prepare_grid_inv_xdmf(0);
|
||||
|
||||
//io.change_xdmf_obj(0); // change xmf file for next src
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// write out model info
|
||||
io.write_vel(grid, 0);
|
||||
io.write_xi( grid, 0);
|
||||
io.write_eta(grid, 0);
|
||||
//io.write_zeta(grid, i_inv); // TODO
|
||||
|
||||
if (IP.get_verbose_output_level()){
|
||||
io.write_a(grid, 0);
|
||||
io.write_b(grid, 0);
|
||||
io.write_c(grid, 0);
|
||||
io.write_f(grid, 0);
|
||||
io.write_fun(grid, 0);
|
||||
}
|
||||
|
||||
// // output model_parameters_inv_0000.dat
|
||||
// if (IP.get_if_output_model_dat())
|
||||
// io.write_concerning_parameters(grid, 0, IP);
|
||||
}
|
||||
|
||||
|
||||
// output station correction file (only for teleseismic differential data)
|
||||
IP.write_station_correction_file(0);
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
/////////////////////
|
||||
// loop for inversion
|
||||
/////////////////////
|
||||
|
||||
bool line_search_mode = false; // if true, run_simulation_one_step skips adjoint simulation and only calculates objective function value
|
||||
|
||||
// objective function for all src
|
||||
CUSTOMREAL v_obj = 0.0, old_v_obj = 0.0;
|
||||
std::vector<CUSTOMREAL> v_obj_misfit(20, 0.0);
|
||||
|
||||
for (int i_inv = 0; i_inv < IP.get_max_iter_inv(); i_inv++) {
|
||||
|
||||
if(myrank == 0 && id_sim ==0){
|
||||
std::cout << "iteration " << i_inv << " starting ... " << std::endl;
|
||||
}
|
||||
|
||||
old_v_obj = v_obj;
|
||||
|
||||
// prepare inverstion iteration group in xdmf file
|
||||
io.prepare_grid_inv_xdmf(i_inv);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// run (forward and adjoint) simulation for each source
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel for all sources
|
||||
line_search_mode = false;
|
||||
// skip for the mode with sub-iteration
|
||||
if (i_inv > 0 && optim_method != GRADIENT_DESCENT) {
|
||||
} else {
|
||||
bool is_save_T = false;
|
||||
v_obj_misfit = run_simulation_one_step(IP, grid, io, i_inv, first_src, line_search_mode, is_save_T);
|
||||
v_obj = v_obj_misfit[0];
|
||||
}
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
// check if v_obj is nan
|
||||
if (std::isnan(v_obj)) {
|
||||
if (myrank == 0)
|
||||
std::cout << "v_obj is nan, stop inversion" << std::endl;
|
||||
// stop inversion
|
||||
break;
|
||||
}
|
||||
|
||||
// output src rec file with the result arrival times
|
||||
if (IP.get_if_output_in_process_data() || i_inv == IP.get_max_iter_inv()-1 || i_inv==0) {
|
||||
IP.write_src_rec_file(i_inv,0);
|
||||
}
|
||||
|
||||
///////////////
|
||||
// model update
|
||||
///////////////
|
||||
if(myrank == 0 && id_sim ==0)
|
||||
std::cout << "model update starting ... " << std::endl;
|
||||
|
||||
if (IP.get_run_mode() == DO_INVERSION) {
|
||||
if (optim_method == GRADIENT_DESCENT)
|
||||
model_optimize(IP, grid, io, i_inv, v_obj, old_v_obj, first_src, out_main);
|
||||
else if (optim_method == HALVE_STEPPING_MODE)
|
||||
v_obj_misfit = model_optimize_halve_stepping(IP, grid, io, i_inv, v_obj, first_src, out_main);
|
||||
else if (optim_method == LBFGS_MODE) {
|
||||
bool found_next_step = model_optimize_lbfgs(IP, grid, io, i_inv, v_obj, first_src, out_main);
|
||||
if (!found_next_step)
|
||||
goto end_of_inversion;
|
||||
}
|
||||
}
|
||||
|
||||
// output station correction file (only for teleseismic differential data)
|
||||
IP.write_station_correction_file(i_inv + 1);
|
||||
|
||||
// output objective function
|
||||
write_objective_function(IP, i_inv, v_obj_misfit, out_main, "model update");
|
||||
|
||||
// since model is update. The written traveltime field should be discraded.
|
||||
// initialize is_T_written_into_file
|
||||
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
const std::string name_sim_src = IP.get_src_name(i_src);
|
||||
|
||||
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
|
||||
IP.src_map[name_sim_src].is_T_written_into_file = false;
|
||||
}
|
||||
|
||||
// output updated model
|
||||
if (id_sim==0) {
|
||||
//io.change_xdmf_obj(0); // change xmf file for next src
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// write out model info
|
||||
if (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2){
|
||||
io.write_vel(grid, i_inv+1);
|
||||
io.write_xi( grid, i_inv+1);
|
||||
io.write_eta(grid, i_inv+1);
|
||||
}
|
||||
//io.write_zeta(grid, i_inv); // TODO
|
||||
|
||||
if (IP.get_verbose_output_level()){
|
||||
io.write_a(grid, i_inv+1);
|
||||
io.write_b(grid, i_inv+1);
|
||||
io.write_c(grid, i_inv+1);
|
||||
io.write_f(grid, i_inv+1);
|
||||
io.write_fun(grid, i_inv+1);
|
||||
}
|
||||
|
||||
// // output model_parameters_inv_0000.dat
|
||||
// if (IP.get_if_output_model_dat()
|
||||
// && (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2))
|
||||
// io.write_concerning_parameters(grid, i_inv + 1, IP);
|
||||
|
||||
}
|
||||
|
||||
// writeout temporary xdmf file
|
||||
io.update_xdmf_file();
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
|
||||
// estimate running time
|
||||
CUSTOMREAL time_elapsed = timer.get_t();
|
||||
if (id_sim == 0 && myrank == 0 && i_inv < IP.get_max_iter_inv()-1) {
|
||||
const time_t end_time_estimated = time_elapsed / (i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1) + timer.get_start();
|
||||
auto will_run_time = (int)(time_elapsed/(i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1));
|
||||
|
||||
std::cout << std::endl;
|
||||
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
|
||||
std::cout << "Iteration (" << i_inv + 1 << "/" << IP.get_max_iter_inv() << ") finished at " << time_elapsed << " seconds" << std::endl;
|
||||
std::cout << i_inv + 1 << " iterations run " << timer.get_t() << " seconds, the rest of " << IP.get_max_iter_inv() - i_inv - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
|
||||
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// output current state of the model
|
||||
if (IP.get_if_output_final_model()) {
|
||||
// io.write_final_model(grid, IP);
|
||||
io.write_merged_model(grid, IP, "final_model.h5");
|
||||
}
|
||||
if (IP.get_if_output_middle_model()) {
|
||||
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_inv + 1) + ".h5";
|
||||
io.write_merged_model(grid, IP, tmp_fname);
|
||||
}
|
||||
|
||||
|
||||
} // end loop inverse
|
||||
|
||||
end_of_inversion:
|
||||
|
||||
// close xdmf file
|
||||
io.finalize_data_output_file();
|
||||
|
||||
timer.stop_timer();
|
||||
if (id_sim == 0 && myrank == 0) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "The program begin at " << timer.get_start_t() << std::endl;
|
||||
std::cout << "Forward_or_inversion end at " << timer.get_end_t() << std::endl;
|
||||
std::cout << "It has run " << timer.get_elapsed_t() << " seconds in total." << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// run earthquake relocation mode
|
||||
inline void run_earthquake_relocation(InputParams& IP, Grid& grid, IO_utils& io) {
|
||||
|
||||
Receiver recs;
|
||||
|
||||
// calculate traveltime for each receiver (swapped from source) and write in output file
|
||||
calculate_traveltime_for_all_src_rec(IP, grid, io);
|
||||
|
||||
// prepare output for iteration status
|
||||
std::ofstream out_main; // close() is not mandatory
|
||||
prepare_header_line(IP, out_main);
|
||||
|
||||
// objective function and its gradient
|
||||
CUSTOMREAL v_obj = 999999999.0;
|
||||
|
||||
int i_iter = 0;
|
||||
|
||||
std::vector<CUSTOMREAL> v_obj_misfit;
|
||||
|
||||
// iterate
|
||||
while (true) {
|
||||
|
||||
v_obj = 0.0;
|
||||
|
||||
// calculate gradient of objective function at sources
|
||||
v_obj_misfit = calculate_gradient_objective_function(IP, grid, io, i_iter);
|
||||
v_obj = v_obj_misfit[0];
|
||||
|
||||
// update source location
|
||||
recs.update_source_location(IP, grid);
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
// check convergence
|
||||
bool finished = false;
|
||||
|
||||
if (subdom_main && id_subdomain==0) {
|
||||
if (i_iter >= N_ITER_MAX_SRC_RELOC){
|
||||
std::cout << "Finished relocation because iteration number exceeds the maximum " << N_ITER_MAX_SRC_RELOC << std::endl;
|
||||
finished = true;
|
||||
}
|
||||
allreduce_bool_single_inplace_sim(finished); //LAND
|
||||
}
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
// check if all processes have finished
|
||||
broadcast_bool_inter_and_intra_sim(finished, 0);
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
// output location information
|
||||
if(id_sim == 0 && myrank == 0){
|
||||
// write objective function
|
||||
std::cout << "iteration: " << i_iter << ", objective function: " << v_obj << std::endl;
|
||||
}
|
||||
|
||||
// write objective functions
|
||||
write_objective_function(IP, i_iter, v_obj_misfit, out_main, "relocation");
|
||||
|
||||
// write out new src_rec_file
|
||||
if (IP.get_if_output_in_process_data() || i_iter == N_ITER_MAX_SRC_RELOC-1 || i_iter==0){
|
||||
IP.write_src_rec_file(0,i_iter);
|
||||
}
|
||||
|
||||
// modify the receiver's location for output
|
||||
IP.modify_swapped_source_location();
|
||||
|
||||
|
||||
if (finished)
|
||||
break;
|
||||
|
||||
// new iteration
|
||||
i_iter++;
|
||||
}
|
||||
|
||||
// modify the receiver's location
|
||||
IP.modify_swapped_source_location();
|
||||
// write out new src_rec_file
|
||||
IP.write_src_rec_file(0,i_iter);
|
||||
// close xdmf file
|
||||
io.finalize_data_output_file();
|
||||
|
||||
}
|
||||
|
||||
|
||||
// run earthquake relocation mode
|
||||
inline void run_inversion_and_relocation(InputParams& IP, Grid& grid, IO_utils& io) {
|
||||
|
||||
Timer timer("Inv_and_reloc", true);
|
||||
|
||||
/////////////////////
|
||||
// preparation of model update
|
||||
/////////////////////
|
||||
|
||||
// for check if the current source is the first source
|
||||
bool first_src = true;
|
||||
|
||||
if(myrank == 0)
|
||||
std::cout << "id_sim: " << id_sim << ", size of src_map: " << IP.src_map.size() << std::endl;
|
||||
|
||||
// prepare objective_function file
|
||||
std::ofstream out_main; // close() is not mandatory
|
||||
prepare_header_line(IP, out_main);
|
||||
|
||||
if (id_sim==0) {
|
||||
// prepare inverstion iteration group in xdmf file
|
||||
io.prepare_grid_inv_xdmf(0);
|
||||
|
||||
//io.change_xdmf_obj(0); // change xmf file for next src
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// write out model info
|
||||
io.write_vel(grid, 0);
|
||||
io.write_xi( grid, 0);
|
||||
io.write_eta(grid, 0);
|
||||
//io.write_zeta(grid, i_inv); // TODO
|
||||
|
||||
if (IP.get_verbose_output_level()){
|
||||
io.write_a(grid, 0);
|
||||
io.write_b(grid, 0);
|
||||
io.write_c(grid, 0);
|
||||
io.write_f(grid, 0);
|
||||
io.write_fun(grid, 0);
|
||||
}
|
||||
|
||||
// // output model_parameters_inv_0000.dat
|
||||
// if (IP.get_if_output_model_dat())
|
||||
// io.write_concerning_parameters(grid, 0, IP);
|
||||
}
|
||||
|
||||
// output station correction file (only for teleseismic differential data)
|
||||
IP.write_station_correction_file(0);
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
bool line_search_mode = false; // if true, run_simulation_one_step skips adjoint simulation and only calculates objective function value
|
||||
|
||||
/////////////////////
|
||||
// preparation of relocation
|
||||
/////////////////////
|
||||
|
||||
Receiver recs;
|
||||
|
||||
/////////////////////
|
||||
// initializae objective function
|
||||
/////////////////////
|
||||
|
||||
CUSTOMREAL v_obj = 0.0, old_v_obj = 10000000000.0;
|
||||
std::vector<CUSTOMREAL> v_obj_misfit(20, 0.0);
|
||||
|
||||
if (inv_mode == ITERATIVE){
|
||||
|
||||
/////////////////////
|
||||
// main loop for model update and relocation
|
||||
/////////////////////
|
||||
|
||||
int model_update_step = 0;
|
||||
int relocation_step = 0;
|
||||
|
||||
for (int i_loop = 0; i_loop < IP.get_max_loop_mode0(); i_loop++){
|
||||
/////////////////////
|
||||
// stage 1, update model parameters
|
||||
/////////////////////
|
||||
|
||||
for (int one_loop_i_inv = 0; one_loop_i_inv < IP.get_model_update_N_iter(); one_loop_i_inv++){
|
||||
// the actural step index of model update
|
||||
int i_inv = i_loop * IP.get_model_update_N_iter() + one_loop_i_inv;
|
||||
|
||||
if(myrank == 0 && id_sim ==0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "loop " << i_loop+1 << ", model update iteration " << one_loop_i_inv+1
|
||||
<< " ( the " << i_inv+1 << "-th model update) starting ... " << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
// prepare inverstion iteration group in xdmf file
|
||||
io.prepare_grid_inv_xdmf(i_inv);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// run (forward and adjoint) simulation for each source
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel for all sources
|
||||
line_search_mode = false;
|
||||
// skip for the mode with sub-iteration
|
||||
if (i_inv > 0 && optim_method != GRADIENT_DESCENT) {
|
||||
} else {
|
||||
bool is_save_T = false;
|
||||
v_obj_misfit = run_simulation_one_step(IP, grid, io, i_inv, first_src, line_search_mode, is_save_T);
|
||||
v_obj = v_obj_misfit[0];
|
||||
}
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
// check if v_obj is nan
|
||||
if (std::isnan(v_obj)) {
|
||||
if (myrank == 0)
|
||||
std::cout << "v_obj is nan, stop inversion" << std::endl;
|
||||
// stop inversion
|
||||
break;
|
||||
}
|
||||
|
||||
///////////////
|
||||
// model update
|
||||
///////////////
|
||||
|
||||
if (optim_method == GRADIENT_DESCENT)
|
||||
model_optimize(IP, grid, io, i_inv, v_obj, old_v_obj, first_src, out_main);
|
||||
else if (optim_method == HALVE_STEPPING_MODE)
|
||||
v_obj_misfit = model_optimize_halve_stepping(IP, grid, io, i_inv, v_obj, first_src, out_main);
|
||||
else if (optim_method == LBFGS_MODE) {
|
||||
bool found_next_step = model_optimize_lbfgs(IP, grid, io, i_inv, v_obj, first_src, out_main);
|
||||
|
||||
if (!found_next_step)
|
||||
break;
|
||||
}
|
||||
|
||||
// define old_v_obj
|
||||
old_v_obj = v_obj;
|
||||
|
||||
// output objective function
|
||||
write_objective_function(IP, i_inv, v_obj_misfit, out_main, "model update");
|
||||
|
||||
// since model is update. The written traveltime field should be discraded.
|
||||
// initialize is_T_written_into_file
|
||||
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
const std::string name_sim_src = IP.get_src_name(i_src);
|
||||
|
||||
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
|
||||
IP.src_map[name_sim_src].is_T_written_into_file = false;
|
||||
}
|
||||
|
||||
// output updated model
|
||||
if (id_sim==0) {
|
||||
//io.change_xdmf_obj(0); // change xmf file for next src
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// write out model info
|
||||
if (IP.get_if_output_in_process() || i_inv >= IP.get_max_loop_mode0()*IP.get_model_update_N_iter() - 2){
|
||||
io.write_vel(grid, i_inv+1);
|
||||
io.write_xi( grid, i_inv+1);
|
||||
io.write_eta(grid, i_inv+1);
|
||||
}
|
||||
//io.write_zeta(grid, i_inv); // TODO
|
||||
|
||||
if (IP.get_verbose_output_level()){
|
||||
io.write_a(grid, i_inv+1);
|
||||
io.write_b(grid, i_inv+1);
|
||||
io.write_c(grid, i_inv+1);
|
||||
io.write_f(grid, i_inv+1);
|
||||
io.write_fun(grid, i_inv+1);
|
||||
}
|
||||
|
||||
// // output model_parameters_inv_0000.dat
|
||||
// if (IP.get_if_output_model_dat()
|
||||
// && (IP.get_if_output_in_process() || i_inv >= IP.get_max_loop_mode0()*IP.get_model_update_N_iter() - 2))
|
||||
// io.write_concerning_parameters(grid, i_inv + 1, IP);
|
||||
|
||||
} // end output updated model
|
||||
|
||||
// writeout temporary xdmf file
|
||||
io.update_xdmf_file();
|
||||
|
||||
// write src rec files
|
||||
if (IP.get_if_output_in_process_data()){
|
||||
IP.write_src_rec_file(model_update_step,relocation_step);
|
||||
} else if (i_inv == IP.get_max_loop_mode0() * IP.get_model_update_N_iter()-1 || i_inv==0) {
|
||||
IP.write_src_rec_file(model_update_step,relocation_step);
|
||||
}
|
||||
model_update_step += 1;
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
// output current state of the model
|
||||
if (IP.get_if_output_final_model()) {
|
||||
// io.write_final_model(grid, IP);
|
||||
io.write_merged_model(grid, IP, "final_model.h5");
|
||||
}
|
||||
if (IP.get_if_output_middle_model()) {
|
||||
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_inv + 1) + ".h5";
|
||||
io.write_merged_model(grid, IP, tmp_fname);
|
||||
}
|
||||
|
||||
} // end model update in one loop
|
||||
|
||||
|
||||
/////////////////////
|
||||
// stage 2, update earthquake locations
|
||||
/////////////////////
|
||||
|
||||
if(myrank == 0 && id_sim ==0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "loop " << i_loop+1 << ", computing traveltime field for relocation" << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// calculate traveltime for each receiver (swapped from source) and write in output file
|
||||
if (IP.get_relocation_N_iter() > 0) // we really do relocation
|
||||
calculate_traveltime_for_all_src_rec(IP, grid, io);
|
||||
|
||||
|
||||
// initilize all earthquakes
|
||||
if (proc_store_srcrec){
|
||||
for(auto iter = IP.rec_map.begin(); iter != IP.rec_map.end(); iter++){
|
||||
iter->second.is_stop = false;
|
||||
}
|
||||
}
|
||||
|
||||
// iterate
|
||||
for (int one_loop_i_iter = 0; one_loop_i_iter < IP.get_relocation_N_iter(); one_loop_i_iter++){
|
||||
int i_iter = i_loop * IP.get_relocation_N_iter() + one_loop_i_iter;
|
||||
|
||||
if(myrank == 0 && id_sim ==0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "loop " << i_loop+1 << ", relocation iteration " << one_loop_i_iter+1
|
||||
<< " ( the " << i_iter+1 << "-th relocation) starting ... " << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
|
||||
// calculate gradient of objective function at sources
|
||||
v_obj_misfit = calculate_gradient_objective_function(IP, grid, io, i_iter);
|
||||
v_obj = v_obj_misfit[0];
|
||||
|
||||
// update source location
|
||||
recs.update_source_location(IP, grid);
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
|
||||
// output location information
|
||||
if(id_sim == 0 && myrank == 0){
|
||||
std::cout << "objective function: " << v_obj << std::endl;
|
||||
}
|
||||
|
||||
// write objective functions
|
||||
write_objective_function(IP, i_iter, v_obj_misfit, out_main, "relocation");
|
||||
|
||||
// write out new src_rec_file
|
||||
if (IP.get_if_output_in_process_data()){
|
||||
IP.write_src_rec_file(model_update_step,relocation_step);
|
||||
} else if (i_iter == IP.get_max_loop_mode0() * IP.get_relocation_N_iter()-1 || i_iter==0) {
|
||||
IP.write_src_rec_file(model_update_step,relocation_step);
|
||||
}
|
||||
|
||||
// modify the receiver's location for output
|
||||
IP.modify_swapped_source_location();
|
||||
|
||||
relocation_step += 1;
|
||||
} // end relocation loop
|
||||
|
||||
grid.rejuvenate_abcf(); // (a,b/r^2,c/(r^2*cos^2),f/(r^2*cos)) -> (a,b,c,f)
|
||||
|
||||
// estimate running time
|
||||
CUSTOMREAL time_elapsed = timer.get_t();
|
||||
if (id_sim == 0 && myrank == 0 && i_loop < IP.get_max_loop_mode0()-1) {
|
||||
const time_t end_time_estimated = time_elapsed / (i_loop + 1) * (IP.get_max_loop_mode0() - i_loop - 1) + timer.get_start();
|
||||
auto will_run_time = (int)(time_elapsed/(i_loop + 2) * (IP.get_max_loop_mode0() - i_loop - 1));
|
||||
|
||||
std::cout << std::endl;
|
||||
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
|
||||
std::cout << "Loop (" << i_loop + 1 << "/" << IP.get_max_loop_mode0() << ") finished at " << time_elapsed << " seconds" << std::endl;
|
||||
std::cout << i_loop + 1 << " loop run " << timer.get_t() << " seconds, the rest of " << IP.get_max_loop_mode0() - i_loop - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
|
||||
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
} // end loop for model update and relocation
|
||||
} else if (inv_mode == SIMULTANEOUS) {
|
||||
|
||||
for (int i_loop = 0; i_loop < IP.get_max_loop_mode1(); i_loop++){
|
||||
|
||||
if(myrank == 0 && id_sim ==0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "loop " << i_loop+1 << ", model update and relocation starting ... " << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
/////////////////////
|
||||
// stage 1, update model parameters
|
||||
/////////////////////
|
||||
|
||||
|
||||
// prepare inversion iteration group in xdmf file
|
||||
io.prepare_grid_inv_xdmf(i_loop);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// run (forward and adjoint) simulation for each source
|
||||
///////////////////////////////////////////////////////
|
||||
|
||||
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel for all sources
|
||||
line_search_mode = false;
|
||||
// skip for the mode with sub-iteration
|
||||
if (i_loop > 0 && optim_method != GRADIENT_DESCENT) {
|
||||
} else {
|
||||
bool is_save_T = true;
|
||||
v_obj_misfit = run_simulation_one_step(IP, grid, io, i_loop, first_src, line_search_mode, is_save_T);
|
||||
v_obj = v_obj_misfit[0];
|
||||
}
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
// check if v_obj is nan
|
||||
if (std::isnan(v_obj)) {
|
||||
if (myrank == 0)
|
||||
std::cout << "v_obj is nan, stop inversion" << std::endl;
|
||||
|
||||
// stop inversion
|
||||
break;
|
||||
}
|
||||
|
||||
///////////////
|
||||
// model update
|
||||
///////////////
|
||||
|
||||
if (optim_method == GRADIENT_DESCENT)
|
||||
model_optimize(IP, grid, io, i_loop, v_obj, old_v_obj, first_src, out_main);
|
||||
else if (optim_method == HALVE_STEPPING_MODE)
|
||||
v_obj_misfit = model_optimize_halve_stepping(IP, grid, io, i_loop, v_obj, first_src, out_main);
|
||||
else if (optim_method == LBFGS_MODE) {
|
||||
bool found_next_step = model_optimize_lbfgs(IP, grid, io, i_loop, v_obj, first_src, out_main);
|
||||
|
||||
if (!found_next_step)
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
// output objective function (model update part)
|
||||
write_objective_function(IP, i_loop, v_obj_misfit, out_main, "model update");
|
||||
|
||||
/////////////////////
|
||||
// stage 2, update earthquake locations
|
||||
/////////////////////
|
||||
|
||||
|
||||
// initilize all earthquakes
|
||||
if (proc_store_srcrec){
|
||||
for(auto iter = IP.rec_map.begin(); iter != IP.rec_map.end(); iter++){
|
||||
iter->second.is_stop = false;
|
||||
}
|
||||
}
|
||||
|
||||
// calculate gradient of objective function at sources
|
||||
v_obj_misfit = calculate_gradient_objective_function(IP, grid, io, i_loop);
|
||||
|
||||
// update source location
|
||||
recs.update_source_location(IP, grid);
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
// output objective function (relocation part)
|
||||
write_objective_function(IP, i_loop, v_obj_misfit, out_main, "relocation");
|
||||
|
||||
|
||||
// define old_v_obj
|
||||
old_v_obj = v_obj;
|
||||
|
||||
|
||||
// since model is update. The written traveltime field should be discraded.
|
||||
// initialize is_T_written_into_file
|
||||
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
const std::string name_sim_src = IP.get_src_name(i_src);
|
||||
|
||||
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
|
||||
IP.src_map[name_sim_src].is_T_written_into_file = false;
|
||||
}
|
||||
|
||||
// output updated model
|
||||
if (id_sim==0) {
|
||||
//io.change_xdmf_obj(0); // change xmf file for next src
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// write out model info
|
||||
if (IP.get_if_output_in_process() || i_loop >= IP.get_max_loop_mode1() - 2){
|
||||
io.write_vel(grid, i_loop+1);
|
||||
io.write_xi( grid, i_loop+1);
|
||||
io.write_eta(grid, i_loop+1);
|
||||
}
|
||||
//io.write_zeta(grid, i_inv); // TODO
|
||||
|
||||
if (IP.get_verbose_output_level()){
|
||||
io.write_a(grid, i_loop+1);
|
||||
io.write_b(grid, i_loop+1);
|
||||
io.write_c(grid, i_loop+1);
|
||||
io.write_f(grid, i_loop+1);
|
||||
io.write_fun(grid, i_loop+1);
|
||||
}
|
||||
|
||||
// // output model_parameters_inv_0000.dat
|
||||
// if (IP.get_if_output_model_dat()
|
||||
// && (IP.get_if_output_in_process() || i_loop >= IP.get_max_loop_mode1() - 2))
|
||||
// io.write_concerning_parameters(grid, i_loop + 1, IP);
|
||||
|
||||
} // end output updated model
|
||||
|
||||
// writeout temporary xdmf file
|
||||
io.update_xdmf_file();
|
||||
|
||||
// write src rec files
|
||||
if (IP.get_if_output_in_process_data()){
|
||||
IP.write_src_rec_file(i_loop,i_loop);
|
||||
} else if (i_loop == IP.get_max_loop_mode1() -1 || i_loop==0) {
|
||||
IP.write_src_rec_file(i_loop,i_loop);
|
||||
}
|
||||
|
||||
// modify the receiver's location for output
|
||||
IP.modify_swapped_source_location();
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
grid.rejuvenate_abcf(); // (a,b/r^2,c/(r^2*cos^2),f/(r^2*cos)) -> (a,b,c,f)
|
||||
|
||||
// estimate running time
|
||||
CUSTOMREAL time_elapsed = timer.get_t();
|
||||
if (id_sim == 0 && myrank == 0 && i_loop < IP.get_max_loop_mode1()-1) {
|
||||
const time_t end_time_estimated = time_elapsed / (i_loop + 1) * (IP.get_max_loop_mode1() - i_loop - 1) + timer.get_start();
|
||||
auto will_run_time = (int)(time_elapsed/(i_loop + 2) * (IP.get_max_loop_mode1() - i_loop - 1));
|
||||
|
||||
std::cout << std::endl;
|
||||
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
|
||||
std::cout << "Loop (" << i_loop + 1 << "/" << IP.get_max_loop_mode1() << ") finished at " << time_elapsed << " seconds" << std::endl;
|
||||
std::cout << i_loop + 1 << " loop run " << timer.get_t() << " seconds, the rest of " << IP.get_max_loop_mode1() - i_loop - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
|
||||
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// output current state of the model
|
||||
if (IP.get_if_output_final_model()) {
|
||||
// io.write_final_model(grid, IP);
|
||||
io.write_merged_model(grid, IP, "final_model.h5");
|
||||
}
|
||||
if (IP.get_if_output_middle_model()) {
|
||||
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_loop + 1) + ".h5";
|
||||
io.write_merged_model(grid, IP, tmp_fname);
|
||||
}
|
||||
} // end loop for model update and relocation
|
||||
|
||||
} else {
|
||||
std::cout << "Error: inv_mode is not defined" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
// close xdmf file
|
||||
io.finalize_data_output_file();
|
||||
|
||||
timer.stop_timer();
|
||||
if (id_sim == 0 && myrank == 0) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "The program begin at " << timer.get_start_t();
|
||||
std::cout << "Inv_and_reloc end at " << timer.get_end_t();
|
||||
std::cout << "It has run " << timer.get_elapsed_t() << " seconds in total." << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
// run 1D inversion mode
|
||||
inline void run_1d_inversion(InputParams& IP, Grid& grid, IO_utils& io) {
|
||||
OneDInversion oneDInv(IP, grid);
|
||||
|
||||
if(myrank == 0)
|
||||
std::cout << "id_sim: " << id_sim << ", size of src_map: " << IP.src_map.size() << std::endl;
|
||||
|
||||
// estimate running time
|
||||
Timer timer("1D_inversion", true);
|
||||
|
||||
// prepare objective_function file
|
||||
std::ofstream out_main; // close() is not mandatory
|
||||
prepare_header_line(IP, out_main);
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
/////////////////////
|
||||
// loop for inversion
|
||||
/////////////////////
|
||||
|
||||
// objective function for all src
|
||||
std::vector<CUSTOMREAL> v_obj_misfit(20, 0.0);
|
||||
|
||||
for (int i_inv = 0; i_inv < IP.get_max_iter_inv(); i_inv++) {
|
||||
if(myrank == 0 && id_sim ==0){
|
||||
std::cout << "iteration " << i_inv << " starting ... " << std::endl;
|
||||
}
|
||||
|
||||
// prepare inverstion iteration group in xdmf file
|
||||
io.prepare_grid_inv_xdmf(i_inv);
|
||||
|
||||
///////////////////////////////////////////////////////
|
||||
// run (forward and adjoint) simulation for each source
|
||||
///////////////////////////////////////////////////////
|
||||
v_obj_misfit = oneDInv.run_simulation_one_step_1dinv(IP, io, i_inv);
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
// check if v_obj is nan
|
||||
if (std::isnan(v_obj_misfit[0])) {
|
||||
if (myrank == 0)
|
||||
std::cout << "v_obj is nan, stop inversion" << std::endl;
|
||||
// stop inversion
|
||||
break;
|
||||
}
|
||||
|
||||
// output src rec file with the result arrival times
|
||||
if (IP.get_if_output_in_process_data()){
|
||||
IP.write_src_rec_file(i_inv,0);
|
||||
} else if (i_inv == IP.get_max_iter_inv()-1 || i_inv==0) {
|
||||
IP.write_src_rec_file(i_inv,0);
|
||||
}
|
||||
|
||||
///////////////
|
||||
// model update
|
||||
///////////////
|
||||
if(myrank == 0 && id_sim ==0)
|
||||
std::cout << "model update starting ... " << std::endl;
|
||||
oneDInv.model_optimize_1dinv(IP, grid, io, i_inv);
|
||||
|
||||
// output objective function
|
||||
write_objective_function(IP, i_inv, v_obj_misfit, out_main, "1d inversion");
|
||||
|
||||
// output updated model
|
||||
if (id_sim==0) {
|
||||
//io.change_xdmf_obj(0); // change xmf file for next src
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// write out model info
|
||||
if (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2){
|
||||
io.write_vel(grid, i_inv+1);
|
||||
io.write_xi( grid, i_inv+1);
|
||||
io.write_eta(grid, i_inv+1);
|
||||
}
|
||||
}
|
||||
|
||||
// writeout temporary xdmf file
|
||||
io.update_xdmf_file();
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
// estimate running time
|
||||
CUSTOMREAL time_elapsed = timer.get_t();
|
||||
if (id_sim == 0 && myrank == 0 && i_inv < IP.get_max_iter_inv()-1) {
|
||||
const time_t end_time_estimated = time_elapsed / (i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1) + timer.get_start();
|
||||
auto will_run_time = (int)(time_elapsed/(i_inv + 1) * (IP.get_max_iter_inv() - i_inv - 1));
|
||||
|
||||
std::cout << std::endl;
|
||||
std::cout << "The program begins at " << timer.get_start_t() << std::endl;
|
||||
std::cout << "Iteration (" << i_inv + 1 << "/" << IP.get_max_iter_inv() << ") finished at " << time_elapsed << " seconds" << std::endl;
|
||||
std::cout << i_inv + 1 << " iterations run " << timer.get_t() << " seconds, the rest of " << IP.get_max_iter_inv() - i_inv - 1 << " iterations require " << will_run_time << " seconds." << std::endl;
|
||||
std::cout << "The program is estimated to stop at " << timer.get_utc_from_time_t(end_time_estimated) << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// output current state of the model
|
||||
if (IP.get_if_output_final_model()) {
|
||||
// io.write_final_model(grid, IP);
|
||||
io.write_merged_model(grid, IP, "final_model.h5");
|
||||
}
|
||||
if (IP.get_if_output_middle_model()) {
|
||||
std::string tmp_fname = "middle_model_step_" + int2string_zero_fill(i_inv + 1) + ".h5";
|
||||
io.write_merged_model(grid, IP, tmp_fname);
|
||||
}
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
} // end loop inverse
|
||||
|
||||
// close xdmf file
|
||||
io.finalize_data_output_file();
|
||||
|
||||
timer.stop_timer();
|
||||
if (id_sim == 0 && myrank == 0) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "The program begin at " << timer.get_start_t() << std::endl;
|
||||
std::cout << "1d model inversion ended at " << timer.get_end_t() << std::endl;
|
||||
std::cout << "It has run " << timer.get_elapsed_t() << " seconds in total." << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // MAIN_ROUTINES_CALLING_H
|
||||
149
include/main_routines_earthquake_relocation.h
Normal file
149
include/main_routines_earthquake_relocation.h
Normal file
@@ -0,0 +1,149 @@
|
||||
#ifndef MAIN_ROUTINES_EARTHQUAKE_RELOCATION_H
|
||||
#define MAIN_ROUTINES_EARTHQUAKE_RELOCATION_H
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "mpi_funcs.h"
|
||||
#include "config.h"
|
||||
#include "utils.h"
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
#include "io.h"
|
||||
#include "iterator_selector.h"
|
||||
#include "iterator.h"
|
||||
#include "iterator_legacy.h"
|
||||
#include "iterator_level.h"
|
||||
#include "source.h"
|
||||
#include "receiver.h"
|
||||
#include "main_routines_inversion_mode.h"
|
||||
|
||||
|
||||
// calculate traveltime field for all sources-receivers and write to file
|
||||
void calculate_traveltime_for_all_src_rec(InputParams& IP, Grid& grid, IO_utils& io){
|
||||
|
||||
// check if this run is in source receiver swap mode
|
||||
// if not, stop the program
|
||||
if (IP.get_is_srcrec_swap() == false){
|
||||
std::cout << "Error: Source relocation mode must run with swap_src_rec = true" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// reinitialize factors
|
||||
grid.reinitialize_abcf();
|
||||
|
||||
// prepare inverstion iteration group in xdmf file
|
||||
io.prepare_grid_inv_xdmf(0);
|
||||
|
||||
///////////////////////
|
||||
// loop for each source
|
||||
///////////////////////
|
||||
|
||||
Source src;
|
||||
|
||||
// iterate over sources
|
||||
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
|
||||
const std::string name_sim_src = IP.get_src_name(i_src);
|
||||
const int id_sim_src = IP.get_src_id(name_sim_src); // global source id
|
||||
|
||||
// check if the source is teleseismic or not
|
||||
// because teleseismic source is not supported in this mode
|
||||
bool is_teleseismic = IP.get_if_src_teleseismic(name_sim_src);
|
||||
|
||||
if (is_teleseismic){
|
||||
std::cout << "Error: Teleseismic source is not supported in source relocation mode." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// set simu group id and source name for output files/dataset names
|
||||
io.reset_source_info(id_sim_src, name_sim_src);
|
||||
|
||||
// set source position
|
||||
src.set_source_position(IP, grid, is_teleseismic, name_sim_src);
|
||||
|
||||
// initialize iterator object
|
||||
bool first_init = (i_src==0);
|
||||
|
||||
// initialize iterator object
|
||||
std::unique_ptr<Iterator> It;
|
||||
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, false);
|
||||
|
||||
/////////////////////////
|
||||
// run forward simulation
|
||||
/////////////////////////
|
||||
|
||||
if (proc_store_srcrec){
|
||||
auto srcmap_this = IP.get_src_point(name_sim_src);
|
||||
|
||||
std::cout << "calculating source (" << i_src+1 << "/" << IP.n_src_this_sim_group << "), name: "
|
||||
<< name_sim_src << ", lat: " << srcmap_this.lat
|
||||
<< ", lon: " << srcmap_this.lon << ", dep: " << srcmap_this.dep
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
bool write_tmp_T = true;
|
||||
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It, name_sim_src, write_tmp_T);
|
||||
|
||||
}
|
||||
|
||||
// wait for all processes to finish traveltime calculation
|
||||
synchronize_all_world();
|
||||
}
|
||||
|
||||
|
||||
std::vector<CUSTOMREAL> calculate_gradient_objective_function(InputParams& IP, Grid& grid, IO_utils& io, int i_iter){
|
||||
|
||||
Receiver recs; // here the source is swapped to receiver
|
||||
|
||||
// initialize source parameters (obj, kernel, )
|
||||
recs.init_vars_src_reloc(IP);
|
||||
|
||||
// iterate over sources (to obtain gradient of traveltime field T and absolute traveltime, common source differential traveltime, and common receiver differential traveltime)
|
||||
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
|
||||
const std::string name_sim_src = IP.get_src_name(i_src);
|
||||
const int id_sim_src = IP.get_src_id(name_sim_src); // global source id
|
||||
|
||||
// set simu group id and source name for output files/dataset names
|
||||
io.reset_source_info(id_sim_src, name_sim_src);
|
||||
|
||||
// load travel time field on grid.T_loc
|
||||
io.read_T_tmp(grid);
|
||||
|
||||
// calculate travel time at the actual source location (absolute traveltime, common source differential traveltime)
|
||||
recs.interpolate_and_store_arrival_times_at_rec_position(IP, grid, name_sim_src);
|
||||
|
||||
// calculate gradient at the actual source location
|
||||
recs.calculate_T_gradient(IP, grid, name_sim_src);
|
||||
}
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
// gather all the traveltime to the main process and distribute to all processes
|
||||
// for calculating the synthetic (common receiver differential traveltime)
|
||||
IP.gather_traveltimes_and_calc_syn_diff();
|
||||
|
||||
|
||||
// iterate over sources for calculating gradient of objective function
|
||||
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
const std::string name_sim_src = IP.get_src_name(i_src);
|
||||
|
||||
// calculate gradient of objective function with respect to location and ortime
|
||||
recs.calculate_grad_obj_src_reloc(IP, name_sim_src);
|
||||
}
|
||||
|
||||
// compute the objective function
|
||||
std::vector<CUSTOMREAL> obj_residual = recs.calculate_obj_reloc(IP, i_iter);
|
||||
|
||||
// sum grad_tau and grad_chi_k of all simulation groups
|
||||
IP.allreduce_rec_map_grad_src();
|
||||
|
||||
//synchronize_all_world(); // not necessary here because allreduce is already synchronizing communication
|
||||
|
||||
return obj_residual;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // MAIN_ROUTINES_EARTHQUAKE_RELOCATION_H
|
||||
293
include/main_routines_inversion_mode.h
Normal file
293
include/main_routines_inversion_mode.h
Normal file
@@ -0,0 +1,293 @@
|
||||
#ifndef MAIN_ROUTINES_INVERSION_MODE_H
|
||||
#define MAIN_ROUTINES_INVERSION_MODE_H
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "mpi_funcs.h"
|
||||
#include "config.h"
|
||||
#include "utils.h"
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
#include "io.h"
|
||||
#include "iterator_selector.h"
|
||||
#include "iterator.h"
|
||||
#include "iterator_legacy.h"
|
||||
#include "iterator_level.h"
|
||||
#include "source.h"
|
||||
#include "receiver.h"
|
||||
#include "kernel.h"
|
||||
#include "model_update.h"
|
||||
#include "lbfgs.h"
|
||||
|
||||
|
||||
inline void calculate_or_read_traveltime_field(InputParams& IP, Grid& grid, IO_utils& io, const int i_src, const int N_src, bool first_init,
|
||||
std::unique_ptr<Iterator>& It, const std::string& name_sim_src, const bool& prerun=false){
|
||||
|
||||
if (IP.get_is_T_written_into_file(name_sim_src)){
|
||||
// load travel time field on grid.T_loc
|
||||
if (myrank == 0){
|
||||
std::cout << "id_sim: " << id_sim << ", reading source (" << i_src+1 << "/" << N_src
|
||||
<< "), name: "
|
||||
<< name_sim_src << ", lat: " << IP.src_map[name_sim_src].lat
|
||||
<< ", lon: " << IP.src_map[name_sim_src].lon << ", dep: " << IP.src_map[name_sim_src].dep
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
io.read_T_tmp(grid);
|
||||
|
||||
} else {
|
||||
// We need to solve eikonal equation
|
||||
if (myrank == 0){
|
||||
std::cout << "id_sim: " << id_sim << ", calculating source (" << i_src+1 << "/" << N_src
|
||||
<< "), name: "
|
||||
<< name_sim_src << ", lat: " << IP.src_map[name_sim_src].lat
|
||||
<< ", lon: " << IP.src_map[name_sim_src].lon << ", dep: " << IP.src_map[name_sim_src].dep
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
// solve travel time field on grid.T_loc
|
||||
It->run_iteration_forward(IP, grid, io, first_init);
|
||||
|
||||
// writeout travel time field
|
||||
if (prerun) {
|
||||
// write the temporary traveltime field into file for later use
|
||||
io.write_T_tmp(grid);
|
||||
|
||||
if (proc_store_srcrec) // only proc_store_srcrec has the src_map object
|
||||
IP.src_map[name_sim_src].is_T_written_into_file = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void pre_run_forward_only(InputParams& IP, Grid& grid, IO_utils& io, int i_inv){
|
||||
if(world_rank == 0)
|
||||
std::cout << "preparing traveltimes of common receiver data ..." << std::endl;
|
||||
|
||||
Source src;
|
||||
Receiver recs;
|
||||
|
||||
// noted that src_map_comm_rec is the subset of src_map
|
||||
for (int i_src = 0; i_src < IP.n_src_comm_rec_this_sim_group; i_src++){
|
||||
|
||||
// check if this is the first iteration of entire inversion process
|
||||
bool first_init = (i_inv == 0 && i_src==0);
|
||||
|
||||
// get source info
|
||||
std::string name_sim_src = IP.get_src_name_comm(i_src);
|
||||
int id_sim_src = IP.get_src_id(name_sim_src); // global source id
|
||||
bool is_teleseismic = IP.get_if_src_teleseismic(name_sim_src); // get is_teleseismic flag
|
||||
|
||||
// set simu group id and source name for output files/dataset names
|
||||
io.reset_source_info(id_sim_src, name_sim_src);
|
||||
|
||||
// set source position
|
||||
src.set_source_position(IP, grid, is_teleseismic, name_sim_src);
|
||||
|
||||
// initialize iterator object
|
||||
std::unique_ptr<Iterator> It;
|
||||
|
||||
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, false);
|
||||
|
||||
// calculate or read traveltime field
|
||||
bool prerun_mode = true;
|
||||
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_comm_rec_this_sim_group, first_init, It, name_sim_src, prerun_mode);
|
||||
|
||||
// interpolate and store traveltime, cs_dif. For cr_dif, only store the traveltime.
|
||||
recs.interpolate_and_store_arrival_times_at_rec_position(IP, grid, name_sim_src);
|
||||
// CHS: At this point, all the synthesised arrival times for all the co-located stations are recorded in syn_time_map_sr. When you need to use it later, you can just look it up.
|
||||
}
|
||||
|
||||
|
||||
// wait for all processes to finish
|
||||
synchronize_all_world();
|
||||
|
||||
if(world_rank == 0)
|
||||
std::cout << "synthetic traveltimes of common receiver data have been prepared." << std::endl;
|
||||
|
||||
// gather all the traveltime to the main process and distribute to all processes
|
||||
// for calculating cr_dif
|
||||
IP.gather_traveltimes_and_calc_syn_diff();
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
// run forward and adjoint simulation and calculate current objective function value and sensitivity kernel if requested
|
||||
inline std::vector<CUSTOMREAL> run_simulation_one_step(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, bool& first_src, bool line_search_mode, bool is_save_T){
|
||||
|
||||
// initialize kernel arrays
|
||||
if (IP.get_run_mode() == DO_INVERSION || IP.get_run_mode() == INV_RELOC)
|
||||
grid.initialize_kernels();
|
||||
|
||||
// reinitialize factors
|
||||
grid.reinitialize_abcf();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// compute the synthetic common receiver differential traveltime first
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
// prepare synthetic traveltime for all earthquakes, if
|
||||
// 1. common receiver data exists;
|
||||
// 2. we use common receiver data to update model; (cr + not swap) or (cs + swap)
|
||||
// 3. we do inversion
|
||||
if ( src_pair_exists &&
|
||||
((IP.get_use_cr() && !IP.get_is_srcrec_swap())
|
||||
|| (IP.get_use_cs() && IP.get_is_srcrec_swap()) )
|
||||
&& (IP.get_run_mode() == DO_INVERSION || IP.get_run_mode() == INV_RELOC)){
|
||||
pre_run_forward_only(IP, grid, io, i_inv);
|
||||
}
|
||||
//
|
||||
// loop over all sources
|
||||
//
|
||||
|
||||
Source src;
|
||||
Receiver recs;
|
||||
|
||||
if(world_rank == 0)
|
||||
std::cout << "computing traveltime field, adjoint field and kernel ..." << std::endl;
|
||||
|
||||
// iterate over sources
|
||||
for (int i_src = 0; i_src < IP.n_src_this_sim_group; i_src++){
|
||||
|
||||
// check if this is the first iteration of entire inversion process
|
||||
bool first_init = (i_inv == 0 && i_src==0);
|
||||
|
||||
// get source info
|
||||
const std::string name_sim_src = IP.get_src_name(i_src); // source name
|
||||
const int id_sim_src = IP.get_src_id(name_sim_src); // global source id
|
||||
bool is_teleseismic = IP.get_if_src_teleseismic(name_sim_src); // get is_teleseismic flag
|
||||
|
||||
// set simu group id and source name for output files/dataset names
|
||||
io.reset_source_info(id_sim_src, name_sim_src);
|
||||
|
||||
// output initial field
|
||||
if(first_src && IP.get_if_output_source_field()) {
|
||||
// write true solution
|
||||
if (if_test){
|
||||
io.write_true_solution(grid);
|
||||
}
|
||||
|
||||
first_src = false;
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
// run forward simulation
|
||||
/////////////////////////
|
||||
|
||||
// (re) initialize source object and set to grid
|
||||
src.set_source_position(IP, grid, is_teleseismic, name_sim_src);
|
||||
|
||||
// initialize iterator object
|
||||
std::unique_ptr<Iterator> It;
|
||||
|
||||
if (!hybrid_stencil_order){
|
||||
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, false);
|
||||
|
||||
// if traveltime field has been wriiten into the file, we choose to read the traveltime data.
|
||||
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It, name_sim_src, is_save_T);
|
||||
|
||||
} else {
|
||||
// hybrid stencil mode
|
||||
std::cout << "\nrunnning in hybrid stencil mode\n" << std::endl;
|
||||
|
||||
// run 1st order forward simulation
|
||||
std::unique_ptr<Iterator> It_pre;
|
||||
IP.set_stencil_order(1);
|
||||
IP.set_conv_tol(IP.get_conv_tol()*100.0);
|
||||
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It_pre, false);
|
||||
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It_pre, name_sim_src, is_save_T);
|
||||
|
||||
// run 3rd order forward simulation
|
||||
IP.set_stencil_order(3);
|
||||
IP.set_conv_tol(IP.get_conv_tol()/100.0);
|
||||
select_iterator(IP, grid, src, io, name_sim_src, first_init, is_teleseismic, It, true);
|
||||
calculate_or_read_traveltime_field(IP, grid, io, i_src, IP.n_src_this_sim_group, first_init, It, name_sim_src, is_save_T);
|
||||
}
|
||||
|
||||
// output the result of forward simulation
|
||||
// ignored for inversion mode.
|
||||
if (!line_search_mode && IP.get_if_output_source_field()) {
|
||||
|
||||
// output T (result timetable)
|
||||
io.write_T(grid, i_inv);
|
||||
|
||||
// output T0v
|
||||
//io.write_T0v(grid,i_inv); // initial Timetable
|
||||
// output u (true solution)
|
||||
//if (if_test)
|
||||
// io.write_u(grid); // true Timetable
|
||||
// output tau
|
||||
//io.write_tau(grid, i_inv); // calculated deviation
|
||||
// output residual (residual = true_solution - result)
|
||||
//if (if_test)
|
||||
// io.write_residual(grid); // this will over write the u_loc, so we need to call write_u_h5 first
|
||||
}
|
||||
// calculate the arrival times at each receivers
|
||||
recs.interpolate_and_store_arrival_times_at_rec_position(IP, grid, name_sim_src);
|
||||
|
||||
/////////////////////////
|
||||
// run adjoint simulation
|
||||
/////////////////////////
|
||||
|
||||
// if (myrank == 0){
|
||||
// std::cout << "calculating adjoint field, source (" << i_src+1 << "/" << (int)IP.src_id2name.size() << "), name: "
|
||||
// << name_sim_src << ", lat: " << IP.src_map[name_sim_src].lat
|
||||
// << ", lon: " << IP.src_map[name_sim_src].lon << ", dep: " << IP.src_map[name_sim_src].dep
|
||||
// << std::endl;
|
||||
// }
|
||||
|
||||
if (IP.get_run_mode()==DO_INVERSION || IP.get_run_mode()==INV_RELOC){
|
||||
// calculate adjoint source
|
||||
recs.calculate_adjoint_source(IP, name_sim_src);
|
||||
// run iteration for adjoint field calculation
|
||||
int adj_type = 0; // compute adjoint field
|
||||
It->run_iteration_adjoint(IP, grid, io, adj_type);
|
||||
// run iteration for density of the adjoint field
|
||||
adj_type = 1; // compute adjoint field
|
||||
It->run_iteration_adjoint(IP, grid, io, adj_type);
|
||||
// calculate sensitivity kernel
|
||||
calculate_sensitivity_kernel(grid, IP, name_sim_src);
|
||||
if (subdom_main && !line_search_mode && IP.get_if_output_source_field()) {
|
||||
// adjoint field will be output only at the end of subiteration
|
||||
// output the result of adjoint simulation
|
||||
io.write_adjoint_field(grid,i_inv);
|
||||
}
|
||||
|
||||
// io.write_adjoint_field(grid,i_inv);
|
||||
|
||||
// check adjoint source
|
||||
// if (proc_store_srcrec){
|
||||
// for (auto iter = IP.rec_map.begin(); iter != IP.rec_map.end(); iter++){
|
||||
// std::cout << "rec id: " << iter->second.id << ", rec name: " << iter->second.name << ", adjoint source: " << iter->second.adjoint_source << std::endl;
|
||||
// }
|
||||
// }
|
||||
|
||||
} // end if run_mode == DO_INVERSION
|
||||
|
||||
// wait for all processes to finish
|
||||
// this should not be called here, for the case that the simultaneous run group has different number of sources
|
||||
//synchronize_all_world();
|
||||
|
||||
} // end for i_src
|
||||
// synchronize all processes
|
||||
synchronize_all_world();
|
||||
|
||||
// gather all the traveltime to the main process and distribute to all processes
|
||||
// for calculating the synthetic common receiver differential traveltime
|
||||
if ( IP.get_run_mode()==ONLY_FORWARD || // case 1. if we are doing forward modeling, traveltime is not prepared for computing cr_dif data. Now we need to compute it
|
||||
(!IP.get_use_cr() && !IP.get_is_srcrec_swap()) || // case 2-1, we do inversion, but we do not use cr data (cr + no swap)
|
||||
(!IP.get_use_cs() && IP.get_is_srcrec_swap())){ // case 2-2, we do inversion, but we do not use cr data (cs + swap)
|
||||
IP.gather_traveltimes_and_calc_syn_diff();
|
||||
}
|
||||
// compute all residual and obj
|
||||
std::vector<CUSTOMREAL> obj_residual = recs.calculate_obj_and_residual(IP);
|
||||
|
||||
|
||||
// return current objective function value
|
||||
return obj_residual;
|
||||
}
|
||||
|
||||
|
||||
#endif // MAIN_ROUTINES_INVERSION_MODE_H
|
||||
496
include/model_optimization_routines.h
Normal file
496
include/model_optimization_routines.h
Normal file
@@ -0,0 +1,496 @@
|
||||
#ifndef MODEL_OPTIMIZATION_ROUTINES_H
|
||||
#define MODEL_OPTIMIZATION_ROUTINES_H
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "mpi_funcs.h"
|
||||
#include "config.h"
|
||||
#include "utils.h"
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
#include "io.h"
|
||||
#include "main_routines_calling.h"
|
||||
#include "iterator_selector.h"
|
||||
#include "iterator.h"
|
||||
#include "iterator_legacy.h"
|
||||
#include "iterator_level.h"
|
||||
#include "source.h"
|
||||
#include "receiver.h"
|
||||
#include "kernel.h"
|
||||
#include "model_update.h"
|
||||
#include "lbfgs.h"
|
||||
#include "objective_function_utils.h"
|
||||
|
||||
|
||||
// do model update by gradient descent
|
||||
inline void model_optimize(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, \
|
||||
CUSTOMREAL& v_obj_inout, CUSTOMREAL& old_v_obj, bool& first_src, std::ofstream& out_main) {
|
||||
|
||||
// check kernel density
|
||||
check_kernel_density(grid, IP);
|
||||
|
||||
// sum kernels among all simultaneous runs
|
||||
sumup_kernels(grid);
|
||||
|
||||
// write out original kernels
|
||||
if (id_sim==0 && subdom_main && IP.get_if_output_kernel() && (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2 || i_inv == 0)) {
|
||||
// store kernel only in the first src datafile
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// write kernel
|
||||
io.write_Ks(grid, i_inv);
|
||||
io.write_Keta(grid, i_inv);
|
||||
io.write_Kxi(grid, i_inv);
|
||||
|
||||
// write kernel density
|
||||
io.write_Ks_density(grid, i_inv);
|
||||
io.write_Kxi_density(grid, i_inv);
|
||||
io.write_Keta_density(grid, i_inv);
|
||||
}
|
||||
|
||||
// smooth kernels (multigrid, and kdensity normalization)
|
||||
smooth_kernels(grid, IP);
|
||||
|
||||
// write out modified kernels
|
||||
if (id_sim==0 && subdom_main && IP.get_if_output_kernel() && (IP.get_if_output_in_process() || i_inv >= IP.get_max_iter_inv() - 2 || i_inv == 0)) {
|
||||
// store kernel only in the first src datafile
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// // kernel over density
|
||||
// io.write_Ks_over_Kden(grid, i_inv);
|
||||
// io.write_Keta_over_Kden(grid, i_inv);
|
||||
// io.write_Kxi_over_Kden(grid, i_inv);
|
||||
|
||||
// kernel over density with smoothing
|
||||
io.write_Ks_update(grid, i_inv);
|
||||
io.write_Keta_update(grid, i_inv);
|
||||
io.write_Kxi_update(grid, i_inv);
|
||||
|
||||
// density with smoothing
|
||||
io.write_Ks_density_update(grid, i_inv);
|
||||
io.write_Kxi_density_update(grid, i_inv);
|
||||
io.write_Keta_density_update(grid, i_inv);
|
||||
}
|
||||
|
||||
// change stepsize
|
||||
// Option 1: the step length is modulated when obj changes.
|
||||
if (step_method == OBJ_DEFINED){
|
||||
if(i_inv != 0){
|
||||
if (v_obj_inout < old_v_obj) {
|
||||
step_length_init = std::min((CUSTOMREAL)0.02, step_length_init);
|
||||
if(myrank == 0 && id_sim == 0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "The obj keeps decreasing, from " << old_v_obj << " to " << v_obj_inout
|
||||
<< ", the step length is " << step_length_init << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
} else if (v_obj_inout >= old_v_obj) {
|
||||
step_length_init = std::max((CUSTOMREAL)0.0001, step_length_init*step_length_decay);
|
||||
if(myrank == 0 && id_sim == 0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "The obj keep increases, from " << old_v_obj << " to " << v_obj_inout
|
||||
<< ", the step length decreases from " << step_length_init/step_length_decay
|
||||
<< " to " << step_length_init << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(myrank == 0 && id_sim == 0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "At the first iteration, the step length is " << step_length_init << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
} else if (step_method == GRADIENT_DEFINED){
|
||||
// Option 2: we modulate the step length according to the angle between the previous and current gradient directions.
|
||||
// If the angle is less than XX degree, which means the model update direction is successive, we should enlarge the step size
|
||||
// Otherwise, the step length should decrease
|
||||
CUSTOMREAL angle = direction_change_of_model_update(grid);
|
||||
if(i_inv != 0){
|
||||
if (angle > step_length_gradient_angle){
|
||||
CUSTOMREAL old_step_length = step_length_init;
|
||||
step_length_init = std::max((CUSTOMREAL)0.0001, step_length_init * step_length_down);
|
||||
if(myrank == 0 && id_sim == 0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "The angle between two update darections is " << angle
|
||||
<< ". Because the angle is greater than " << step_length_gradient_angle << " degree, the step length decreases from "
|
||||
<< old_step_length << " to " << step_length_init << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
} else if (angle <= step_length_gradient_angle) {
|
||||
CUSTOMREAL old_step_length = step_length_init;
|
||||
step_length_init = std::min((CUSTOMREAL)0.02, step_length_init * step_length_up);
|
||||
if(myrank == 0 && id_sim == 0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "The angle between two update darections is " << angle
|
||||
<< ". Because the angle is less than " << step_length_gradient_angle << " degree, the step length increases from "
|
||||
<< old_step_length << " to " << step_length_init << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(myrank == 0 && id_sim == 0){
|
||||
std::cout << std::endl;
|
||||
std::cout << "At the first iteration, the step length is " << step_length_init << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::cout << std::endl;
|
||||
std::cout << "No supported method for step size change, step keep the same: " << step_length_init << std::endl;
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// broadcast the step_length
|
||||
broadcast_cr_single(step_length_init,0);
|
||||
|
||||
// update the model with the initial step size
|
||||
set_new_model(grid, step_length_init);
|
||||
|
||||
// make station correction
|
||||
IP.station_correction_update(step_length_init_sc);
|
||||
|
||||
// writeout temporary xdmf file
|
||||
if (IP.get_verbose_output_level())
|
||||
io.update_xdmf_file();
|
||||
|
||||
synchronize_all_world();
|
||||
}
|
||||
|
||||
|
||||
inline std::vector<CUSTOMREAL> model_optimize_halve_stepping(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, CUSTOMREAL& v_obj_inout, bool& first_src, std::ofstream& out_main) {
|
||||
|
||||
CUSTOMREAL step_length = step_length_init; // step size init is global variable
|
||||
CUSTOMREAL diff_obj = - 9999999999;
|
||||
CUSTOMREAL v_obj_old = v_obj_inout;
|
||||
CUSTOMREAL v_obj_new = 0.0;
|
||||
std::vector<CUSTOMREAL> v_obj_misfit_new = std::vector<CUSTOMREAL>(10, 0.0);
|
||||
int sub_iter_count = 0;
|
||||
|
||||
// sum kernels among all simultaneous runs
|
||||
sumup_kernels(grid);
|
||||
|
||||
// smooth kernels
|
||||
smooth_kernels(grid, IP);
|
||||
|
||||
// backup the initial model
|
||||
grid.back_up_fun_xi_eta_bcf();
|
||||
|
||||
// update the model with the initial step size
|
||||
set_new_model(grid, step_length);
|
||||
|
||||
|
||||
if (IP.get_verbose_output_level()) {
|
||||
// store kernel only in the first src datafile
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// output updated velocity models
|
||||
io.write_Ks(grid, i_inv);
|
||||
io.write_Keta(grid, i_inv);
|
||||
io.write_Kxi(grid, i_inv);
|
||||
|
||||
// output descent direction
|
||||
io.write_Ks_update(grid, i_inv);
|
||||
io.write_Keta_update(grid, i_inv);
|
||||
io.write_Kxi_update(grid, i_inv);
|
||||
}
|
||||
|
||||
// writeout temporary xdmf file
|
||||
if (IP.get_verbose_output_level())
|
||||
io.update_xdmf_file();
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
// loop till
|
||||
while (sub_iter_count < max_sub_iterations) {
|
||||
// check the new objective function value
|
||||
// v_obj_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true); // run simulations with line search mode
|
||||
v_obj_misfit_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true, false); // run simulations with line search mode
|
||||
v_obj_new = v_obj_misfit_new[0];
|
||||
// if the new objective function value is larger than the old one, make the step width to be half of the previous one
|
||||
diff_obj = v_obj_new - v_obj_old;
|
||||
|
||||
if ( diff_obj > _0_CR // if the objective function value is larger than the old one
|
||||
|| std::abs(diff_obj/v_obj_old) > MAX_DIFF_RATIO_VOBJ) { // if the objective function reduced too much ) {
|
||||
|
||||
// set step length just for writing out
|
||||
step_length_init = step_length;
|
||||
// print status
|
||||
write_objective_function(IP, i_inv, v_obj_misfit_new, out_main, "sub iter");
|
||||
|
||||
grid.restore_fun_xi_eta_bcf();
|
||||
step_length *= HALVE_STEP_RATIO;
|
||||
set_new_model(grid, step_length);
|
||||
|
||||
sub_iter_count++;
|
||||
} else {
|
||||
// if the new objective function value is smaller than the old one, make the step width to be twice of the previous one
|
||||
goto end_of_sub_iteration;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
end_of_sub_iteration:
|
||||
|
||||
v_obj_inout = v_obj_new;
|
||||
step_length_init = step_length/(HALVE_STEP_RATIO)*HALVE_STEP_RESTORAION_RATIO; // use this step size for the next inversion
|
||||
|
||||
// write adjoint field
|
||||
int next_i_inv = i_inv + 1;
|
||||
if (subdom_main && IP.get_if_output_source_field())
|
||||
io.write_adjoint_field(grid,next_i_inv);
|
||||
|
||||
return v_obj_misfit_new;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// do model update
|
||||
inline bool model_optimize_lbfgs(InputParams& IP, Grid& grid, IO_utils& io, int i_inv, CUSTOMREAL& v_obj_inout, bool& first_src, std::ofstream& out_main) {
|
||||
|
||||
int subiter_count = 0; // subiteration count
|
||||
//CUSTOMREAL qp_0 = _0_CR; // store initial p_k * grad(f_k) (descent_direction * gradient)
|
||||
CUSTOMREAL qp_t = _0_CR; // store p_k * grad(f_k)
|
||||
CUSTOMREAL td = _0_CR; // wolfes right step
|
||||
CUSTOMREAL tg = _0_CR; // wolfes left step
|
||||
CUSTOMREAL step_length = step_length_init; // step size init is global variable
|
||||
CUSTOMREAL v_obj_reg = _0_CR; // regularization term == q_t
|
||||
//CUSTOMREAL q_0 = _0_CR; // initial cost function
|
||||
CUSTOMREAL q_t = _0_CR; // current cost function
|
||||
std::vector<CUSTOMREAL> v_obj_misfit_new = std::vector<CUSTOMREAL>(10, 0.0);
|
||||
|
||||
//
|
||||
// initialize
|
||||
//
|
||||
if (i_inv == 0) {
|
||||
// initialize regularization
|
||||
// regularization_penalty[:] = 1.0
|
||||
init_regulaization_penalty_with_one(grid);
|
||||
|
||||
// calculate volume_domain = SquaredL2Norm(regularization_penalty)
|
||||
volume_domain = compute_volume_domain(grid);
|
||||
|
||||
// volume_domain /= N_params
|
||||
volume_domain /= N_params;
|
||||
|
||||
// weight_Tikonov = penalty_weight / volume_domain
|
||||
weight_Tikonov = regularization_weight / volume_domain;
|
||||
weight_Tikonov_ani = regularization_weight;
|
||||
std::cout << "DEBUG: weight_Tikonov: " << weight_Tikonov << std::endl;
|
||||
|
||||
// calculate damp_weights
|
||||
compute_damp_weights(grid);
|
||||
|
||||
// calculate gradient (run onestep forward+adjoint) not necessary to do here. already done
|
||||
//v_obj_misfit_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true);
|
||||
|
||||
// Q0 = initial obj
|
||||
q_0 = v_obj_inout;
|
||||
|
||||
// sum kernels among all simultaneous runs
|
||||
sumup_kernels(grid);
|
||||
|
||||
// smooth gradient
|
||||
smooth_kernels(grid, IP);
|
||||
|
||||
// calc regularization penalties
|
||||
calculate_regularization_penalty(grid);
|
||||
|
||||
//// smooth calculated grad regularization penalty
|
||||
//smooth_gradient_regularization(grid);
|
||||
|
||||
// calc (update) obj_regl = squaredL2Norm(regularization_penalty)
|
||||
v_obj_reg = calculate_regularization_obj(grid);
|
||||
|
||||
// Q0 += 0.5*weight_Tiknov*obj_regl
|
||||
q_0 += v_obj_reg;
|
||||
|
||||
// init_grad += weight_Tikonov * gadient_regularization_penalty
|
||||
add_regularization_grad(grid);
|
||||
|
||||
// store model and gradient
|
||||
store_model_and_gradient(grid, i_inv);
|
||||
|
||||
// log out
|
||||
if(myrank == 0 && id_sim ==0) {
|
||||
out_main \
|
||||
<< std::setw(5) << i_inv \
|
||||
<< "," << std::setw(5) << subiter_count \
|
||||
<< "," << std::setw(15) << step_length \
|
||||
<< "," << std::setw(15) << q_0 \
|
||||
<< "," << std::setw(15) << q_t \
|
||||
<< "," << std::setw(15) << v_obj_reg \
|
||||
<< "," << std::setw(15) << qp_0 \
|
||||
<< "," << std::setw(15) << qp_t \
|
||||
<< "," << std::setw(15) << td \
|
||||
<< "," << std::setw(15) << tg \
|
||||
<< "," << std::setw(15) << wolfe_c1*qp_0 \
|
||||
<< "," << std::setw(15) << wolfe_c2*qp_0 \
|
||||
<< "," << std::setw(15) << "init" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// backup the initial model
|
||||
grid.back_up_fun_xi_eta_bcf();
|
||||
|
||||
// update the model with the initial step size
|
||||
if (IP.get_verbose_output_level() && id_sim==0) {
|
||||
// store kernel only in the first src datafile
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// output updated velocity models
|
||||
io.write_Ks(grid, i_inv);
|
||||
io.write_Keta(grid, i_inv);
|
||||
io.write_Kxi(grid, i_inv);
|
||||
|
||||
// output descent direction
|
||||
io.write_Ks_descent_dir(grid, i_inv);
|
||||
io.write_Keta_descent_dir(grid, i_inv);
|
||||
io.write_Kxi_descent_dir(grid, i_inv);
|
||||
}
|
||||
|
||||
// writeout temporary xdmf file
|
||||
if (IP.get_verbose_output_level()&& id_sim==0)
|
||||
io.update_xdmf_file();
|
||||
|
||||
//
|
||||
// iteration
|
||||
//
|
||||
if (i_inv > 0) {
|
||||
// sum kernels among all simultaneous runs
|
||||
sumup_kernels(grid);
|
||||
|
||||
// smooth kernels
|
||||
smooth_kernels(grid, IP);
|
||||
}
|
||||
|
||||
// compute descent direction
|
||||
calc_descent_direction(grid, i_inv, IP);
|
||||
|
||||
// smooth descent direction
|
||||
smooth_descent_direction(grid);
|
||||
|
||||
// calc qp_0 = inital grad * descent direction
|
||||
qp_0 = compute_q_k(grid);
|
||||
|
||||
// loop wolfes subiterations
|
||||
bool wolfe_cond_ok = false;
|
||||
while (wolfe_cond_ok != true) {
|
||||
|
||||
// initial guess for isub = 0
|
||||
if (i_inv == 0 && subiter_count == 0)
|
||||
initial_guess_step(grid, step_length, 1.0);
|
||||
else if (i_inv == 1 && subiter_count == 0)
|
||||
initial_guess_step(grid, step_length, LBFGS_RELATIVE_step_length);
|
||||
|
||||
//// Update model
|
||||
set_new_model(grid, step_length);
|
||||
|
||||
//// calculate gradient (run onestep forward+adjoint)
|
||||
v_obj_misfit_new = run_simulation_one_step(IP, grid, io, i_inv, first_src, true, false);
|
||||
|
||||
//// Qt update (=current obj)
|
||||
q_t = v_obj_misfit_new[0];
|
||||
|
||||
// sum kernels among all simultaneous runs
|
||||
sumup_kernels(grid);
|
||||
|
||||
//// smooth gradient
|
||||
smooth_kernels(grid, IP);
|
||||
|
||||
//// calculate regularization to grad
|
||||
calculate_regularization_penalty(grid);
|
||||
|
||||
//// smooth calculated grad regularization penalty
|
||||
//smooth_gradient_regularization(grid);
|
||||
|
||||
//// add regularization term to Qt (Qt += 0.5 * obj_regl )
|
||||
v_obj_reg = calculate_regularization_obj(grid);
|
||||
q_t += v_obj_reg;
|
||||
|
||||
//// current grad += weight_Tikonov * gadient_regularization_penalty
|
||||
add_regularization_grad(grid);
|
||||
|
||||
//// Qpt = Inner product(current_grad * descent_direction)
|
||||
qp_t = compute_q_k(grid);
|
||||
|
||||
//// check wolfe conditions
|
||||
wolfe_cond_ok = check_wolfe_cond(grid, q_0, q_t, qp_0, qp_t, td, tg, step_length);
|
||||
|
||||
// update the model with the initial step size
|
||||
if (IP.get_verbose_output_level() && id_sim==0) {
|
||||
// store kernel only in the first src datafile
|
||||
io.change_group_name_for_model();
|
||||
|
||||
// gradient
|
||||
io.write_Ks_update(grid, i_inv*100+subiter_count);
|
||||
io.write_Keta_update(grid, i_inv*100+subiter_count);
|
||||
io.write_Kxi_update(grid, i_inv*100+subiter_count);
|
||||
|
||||
// output descent direction
|
||||
io.write_Ks_descent_dir(grid, i_inv*100+subiter_count);
|
||||
io.write_Keta_descent_dir(grid, i_inv*100+subiter_count);
|
||||
io.write_Kxi_descent_dir(grid, i_inv*100+subiter_count);
|
||||
|
||||
}
|
||||
|
||||
// writeout temporary xdmf file
|
||||
if (IP.get_verbose_output_level()&& id_sim==0)
|
||||
io.update_xdmf_file();
|
||||
|
||||
// log out
|
||||
if(myrank == 0 && id_sim ==0)
|
||||
out_main \
|
||||
<< std::setw(5) << i_inv \
|
||||
<< "," << std::setw(5) << subiter_count \
|
||||
<< "," << std::setw(15) << step_length \
|
||||
<< "," << std::setw(15) << q_0 \
|
||||
<< "," << std::setw(15) << q_t \
|
||||
<< "," << std::setw(15) << v_obj_reg \
|
||||
<< "," << std::setw(15) << qp_0 \
|
||||
<< "," << std::setw(15) << qp_t \
|
||||
<< "," << std::setw(15) << td \
|
||||
<< "," << std::setw(15) << tg \
|
||||
<< "," << std::setw(15) << wolfe_c1*qp_0 \
|
||||
<< "," << std::setw(15) << wolfe_c2*qp_0 \
|
||||
<< "," << std::setw(15) << wolfe_cond_ok << std::endl;
|
||||
|
||||
if (wolfe_cond_ok) {
|
||||
goto end_of_subiteration;
|
||||
} else if (subiter_count > max_sub_iterations){
|
||||
// reached max subiter
|
||||
// exit
|
||||
std::cout << "reached max subiterations" << std::endl;
|
||||
|
||||
return false;
|
||||
|
||||
} else {
|
||||
// wolfe conditions not satisfied
|
||||
subiter_count++;
|
||||
|
||||
grid.restore_fun_xi_eta_bcf();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
end_of_subiteration:
|
||||
//// store model and gradient,
|
||||
store_model_and_gradient(grid, i_inv+1);
|
||||
|
||||
//// Q0=Qt
|
||||
q_0 = q_t;
|
||||
qp_0 = qp_t;
|
||||
|
||||
step_length_init = step_length; // use current step size for the next iteration
|
||||
|
||||
|
||||
if (myrank == 0)
|
||||
std::cout << "Wolfe conditions satisfied at iteration " << subiter_count << std::endl;
|
||||
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // MODEL_OPTIMIZATION_ROUTINES_H
|
||||
485
include/model_update.h
Normal file
485
include/model_update.h
Normal file
@@ -0,0 +1,485 @@
|
||||
#ifndef MODEL_UPDATE_H
|
||||
#define MODEL_UPDATE_H
|
||||
|
||||
#include <cmath>
|
||||
#include "config.h"
|
||||
#include "grid.h"
|
||||
#include "smooth.h"
|
||||
#include "smooth_descent_dir.h"
|
||||
#include "smooth_grad_regul.h"
|
||||
#include "lbfgs.h"
|
||||
|
||||
|
||||
// generate smoothed kernels (K*_update_loc) from the kernels (K*_loc)
|
||||
// before doing this, K*_loc should be summed up among all simultaneous runs (by calling sumup_kernels)
|
||||
// before doing this, K*_update_loc has no meaning (unavailable)
|
||||
void smooth_kernels(Grid& grid, InputParams& IP) {
|
||||
|
||||
if (subdom_main){ // parallel level 3
|
||||
|
||||
if (id_sim==0){ // parallel level 1
|
||||
// initiaize update params
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
grid.Ks_update_loc_previous[I2V(i,j,k)] = grid.Ks_update_loc[I2V(i,j,k)];
|
||||
grid.Keta_update_loc_previous[I2V(i,j,k)] = grid.Keta_update_loc[I2V(i,j,k)];
|
||||
grid.Kxi_update_loc_previous[I2V(i,j,k)] = grid.Kxi_update_loc[I2V(i,j,k)];
|
||||
grid.Ks_update_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_update_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_update_loc[I2V(i,j,k)] = _0_CR;
|
||||
// grid.Ks_density_update_loc[I2V(i,j,k)] = _0_CR;
|
||||
// grid.Kxi_density_update_loc[I2V(i,j,k)] = _0_CR;
|
||||
// grid.Keta_density_update_loc[I2V(i,j,k)] = _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// check kernel
|
||||
CUSTOMREAL max_kernel = _0_CR;
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
max_kernel = std::max(max_kernel, std::abs(grid.Ks_loc[I2V(i,j,k)]));
|
||||
}
|
||||
}
|
||||
}
|
||||
CUSTOMREAL tmp = 0;
|
||||
allreduce_cr_single_max(max_kernel, tmp);
|
||||
max_kernel = tmp;
|
||||
|
||||
if (max_kernel <= eps) {
|
||||
std::cout << "Error: max_kernel is near zero (less than 10^-12), check data residual and whether no data is used" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (smooth_method == 0) {
|
||||
// Ks_loc, Keta_loc, Kxi_loc to be 0 for ghost layers to eliminate the effect of overlapping boundary
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
|
||||
if (i == 0 && !grid.i_first()) {
|
||||
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
}
|
||||
if (i == loc_I-1 && !grid.i_last()) {
|
||||
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
}
|
||||
if (j == 0 && !grid.j_first()) {
|
||||
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
}
|
||||
if (j == loc_J-1 && !grid.j_last()) {
|
||||
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
}
|
||||
if (k == 0 && !grid.k_first()) {
|
||||
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
}
|
||||
if (k == loc_K-1 && !grid.k_last()) {
|
||||
grid.Ks_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Ks_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Kxi_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
grid.Keta_density_loc[I2V(i,j,k)] = _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// grid based smoothing
|
||||
smooth_inv_kernels_orig(grid, IP);
|
||||
} else if (smooth_method == 1) {
|
||||
// CG smoothing
|
||||
smooth_inv_kernels_CG(grid, smooth_lr, smooth_lt, smooth_lp);
|
||||
}
|
||||
|
||||
// shared values on the boundary
|
||||
grid.send_recev_boundary_data(grid.Ks_update_loc);
|
||||
grid.send_recev_boundary_data(grid.Keta_update_loc);
|
||||
grid.send_recev_boundary_data(grid.Kxi_update_loc);
|
||||
grid.send_recev_boundary_data(grid.Ks_density_update_loc);
|
||||
grid.send_recev_boundary_data(grid.Kxi_density_update_loc);
|
||||
grid.send_recev_boundary_data(grid.Keta_density_update_loc);
|
||||
|
||||
grid.send_recev_boundary_data_kosumi(grid.Ks_update_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Keta_update_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Kxi_update_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Ks_density_update_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Kxi_density_update_loc);
|
||||
grid.send_recev_boundary_data_kosumi(grid.Keta_density_update_loc);
|
||||
|
||||
} // end if id_sim == 0
|
||||
|
||||
// send the updated model to all the simultaneous run
|
||||
broadcast_cr_inter_sim(grid.Ks_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Kxi_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Keta_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Ks_density_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Kxi_density_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Keta_density_update_loc, loc_I*loc_J*loc_K, 0);
|
||||
|
||||
// send the previous updated model to all the simultaneous run
|
||||
broadcast_cr_inter_sim(grid.Ks_update_loc_previous, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Kxi_update_loc_previous, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Keta_update_loc_previous, loc_I*loc_J*loc_K, 0);
|
||||
|
||||
} // end if subdom_main
|
||||
|
||||
synchronize_all_world();
|
||||
}
|
||||
|
||||
|
||||
// smooth gradient regularization term
|
||||
void smooth_gradient_regularization(Grid& grid) {
|
||||
|
||||
if (subdom_main && id_sim==0){ // only id_sim==0 has values for these arrays
|
||||
if (smooth_method == 0) {
|
||||
// grid based smoothing
|
||||
smooth_gradient_regularization_orig(grid);
|
||||
} else if (smooth_method == 1) {
|
||||
// CG smoothing
|
||||
smooth_gradient_regularization_CG(grid, smooth_lr, smooth_lt, smooth_lp);
|
||||
}
|
||||
|
||||
grid.send_recev_boundary_data(grid.fun_gradient_regularization_penalty_loc);
|
||||
grid.send_recev_boundary_data(grid.eta_gradient_regularization_penalty_loc);
|
||||
grid.send_recev_boundary_data(grid.xi_gradient_regularization_penalty_loc);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void smooth_descent_direction(Grid& grid){
|
||||
if (subdom_main){
|
||||
if (id_sim == 0) { // calculation of the update model is only done in the main simultaneous run
|
||||
if (smooth_method == 0) {
|
||||
// grid based smoothing
|
||||
smooth_descent_dir(grid);
|
||||
} else if (smooth_method == 1) {
|
||||
// CG smoothing
|
||||
smooth_descent_dir_CG(grid, smooth_lr, smooth_lt, smooth_lp);
|
||||
}
|
||||
|
||||
// shared values on the boundary
|
||||
grid.send_recev_boundary_data( grid.Ks_descent_dir_loc);
|
||||
grid.send_recev_boundary_data(grid.Keta_descent_dir_loc);
|
||||
grid.send_recev_boundary_data( grid.Kxi_descent_dir_loc);
|
||||
|
||||
} // end if id_sim == 0
|
||||
|
||||
// send the updated model to all the simultaneous run
|
||||
broadcast_cr_inter_sim( grid.Ks_descent_dir_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim( grid.Kxi_descent_dir_loc, loc_I*loc_J*loc_K, 0);
|
||||
broadcast_cr_inter_sim(grid.Keta_descent_dir_loc, loc_I*loc_J*loc_K, 0);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void calc_descent_direction(Grid& grid, int i_inv, InputParams& IP) {
|
||||
|
||||
if (subdom_main) {
|
||||
|
||||
// routines for LBFGS
|
||||
if (optim_method == LBFGS_MODE) {
|
||||
|
||||
// calculate the descent direction
|
||||
if (i_inv > 0) {
|
||||
calculate_descent_direction_lbfgs(grid, i_inv);
|
||||
// use gradient for the first iteration
|
||||
} else {
|
||||
int n_grid = loc_I*loc_J*loc_K;
|
||||
|
||||
// first time, descent direction = - precond * gradient
|
||||
// inverse the gradient to fit the update scheme for LBFGS
|
||||
for (int i = 0; i < n_grid; i++){
|
||||
grid.Ks_descent_dir_loc[i] = - _1_CR* grid.Ks_update_loc[i];
|
||||
grid.Keta_descent_dir_loc[i] = - _1_CR* grid.Keta_update_loc[i];
|
||||
grid.Kxi_descent_dir_loc[i] = - _1_CR* grid.Kxi_update_loc[i];
|
||||
//grid.Ks_descent_dir_loc[i] = - _1_CR* grid.Ks_loc[i];
|
||||
//grid.Keta_descent_dir_loc[i] = - _1_CR* grid.Keta_loc[i];
|
||||
//grid.Kxi_descent_dir_loc[i] = - _1_CR* grid.Kxi_loc[i];
|
||||
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// return error
|
||||
std::cout << "Error: optim_method is not set to LBFGS_MODE (=2)" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
} // end if subdom_main
|
||||
|
||||
synchronize_all_world();
|
||||
|
||||
}
|
||||
|
||||
CUSTOMREAL direction_change_of_model_update(Grid& grid){
|
||||
CUSTOMREAL norm_grad = _0_CR;
|
||||
CUSTOMREAL norm_grad_previous = _0_CR;
|
||||
CUSTOMREAL inner_product = _0_CR;
|
||||
CUSTOMREAL cos_angle = _0_CR;
|
||||
CUSTOMREAL angle = _0_CR;
|
||||
if (subdom_main) {
|
||||
// initiaize update params
|
||||
inner_product = dot_product(grid.Ks_update_loc_previous, grid.Ks_update_loc, loc_I*loc_J*loc_K);
|
||||
norm_grad = dot_product(grid.Ks_update_loc, grid.Ks_update_loc, loc_I*loc_J*loc_K);
|
||||
norm_grad_previous = dot_product(grid.Ks_update_loc_previous, grid.Ks_update_loc_previous, loc_I*loc_J*loc_K);
|
||||
|
||||
CUSTOMREAL tmp;
|
||||
allreduce_cr_single(inner_product,tmp);
|
||||
inner_product = tmp;
|
||||
|
||||
allreduce_cr_single(norm_grad,tmp);
|
||||
norm_grad = tmp;
|
||||
|
||||
allreduce_cr_single(norm_grad_previous,tmp);
|
||||
norm_grad_previous = tmp;
|
||||
|
||||
cos_angle = inner_product / (std::sqrt(norm_grad) * std::sqrt(norm_grad_previous));
|
||||
angle = acos(cos_angle) * RAD2DEG;
|
||||
}
|
||||
|
||||
return angle;
|
||||
}
|
||||
|
||||
|
||||
void set_new_model(Grid& grid, CUSTOMREAL step_length_new, bool init_bfgs=false) {
|
||||
|
||||
if (subdom_main) {
|
||||
|
||||
// for LBFGS mode. K*_update_loc is not directly the descent direction but smoothed gradient
|
||||
// so for non LBFGS_mode, nextstep will be calculated with K*_update_loc
|
||||
if (optim_method != LBFGS_MODE) {
|
||||
// // get the scaling factor
|
||||
// CUSTOMREAL Linf_Ks = _0_CR, Linf_Keta = _0_CR, Linf_Kxi = _0_CR;
|
||||
// for (int k = 1; k < loc_K-1; k++) {
|
||||
// for (int j = 1; j < loc_J-1; j++) {
|
||||
// for (int i = 1; i < loc_I-1; i++) {
|
||||
// Linf_Ks = std::max(Linf_Ks, std::abs(grid.Ks_update_loc[I2V(i,j,k)]));
|
||||
// Linf_Keta = std::max(Linf_Keta, std::abs(grid.Keta_update_loc[I2V(i,j,k)]));
|
||||
// Linf_Kxi = std::max(Linf_Kxi, std::abs(grid.Kxi_update_loc[I2V(i,j,k)]));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// // get the maximum scaling factor among subdomains
|
||||
// CUSTOMREAL Linf_tmp;
|
||||
// allreduce_cr_single_max(Linf_Ks, Linf_tmp); Linf_Ks = Linf_tmp;
|
||||
// allreduce_cr_single_max(Linf_Keta, Linf_tmp); Linf_Keta = Linf_tmp;
|
||||
// allreduce_cr_single_max(Linf_Kxi, Linf_tmp); Linf_Kxi = Linf_tmp;
|
||||
|
||||
// CUSTOMREAL Linf_all = _0_CR;
|
||||
// Linf_all = std::max(Linf_Ks, std::max(Linf_Keta, Linf_Kxi));
|
||||
|
||||
// // if (myrank == 0 && id_sim == 0)
|
||||
// // std::cout << "Scaling factor for all kernels: " << Linf_all << std::endl;
|
||||
// // std::cout << "Scaling factor for model update for Ks, Keta, Kx, stepsize: " << Linf_Ks << ", " << Linf_Keta << ", " << Linf_Kxi << ", " << step_length_new << std::endl;
|
||||
|
||||
|
||||
// Linf_Ks = Linf_all;
|
||||
// Linf_Keta = Linf_all;
|
||||
// Linf_Kxi = Linf_all;
|
||||
|
||||
// kernel update has been rescaled in "smooth_inv_kernels_orig"
|
||||
|
||||
// update the model
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
|
||||
// // update
|
||||
// grid.fun_loc[I2V(i,j,k)] *= (_1_CR - grid.Ks_update_loc[I2V(i,j,k) ] / (Linf_Ks /step_length_new) );
|
||||
// grid.xi_loc[I2V(i,j,k)] -= grid.Kxi_update_loc[I2V(i,j,k) ] / (Linf_Kxi /step_length_new) ;
|
||||
// grid.eta_loc[I2V(i,j,k)] -= grid.Keta_update_loc[I2V(i,j,k)] / (Linf_Keta /step_length_new) ;
|
||||
|
||||
// update
|
||||
grid.fun_loc[I2V(i,j,k)] *= (_1_CR - grid.Ks_update_loc[I2V(i,j,k) ] * step_length_new);
|
||||
grid.xi_loc[I2V(i,j,k)] -= grid.Kxi_update_loc[I2V(i,j,k) ] * step_length_new;
|
||||
grid.eta_loc[I2V(i,j,k)] -= grid.Keta_update_loc[I2V(i,j,k)] * step_length_new;
|
||||
|
||||
|
||||
// grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
|
||||
// grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
|
||||
// grid.fac_f_loc[I2V(i,j,k)] = - _2_CR * grid.eta_loc[I2V(i,j,k)];
|
||||
|
||||
// grid.fun_loc[I2V(i,j,k)] *= (_1_CR);
|
||||
// grid.xi_loc[I2V(i,j,k)] -= (_0_CR);
|
||||
// grid.eta_loc[I2V(i,j,k)] -= (_0_CR);
|
||||
|
||||
// grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
|
||||
// grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
|
||||
// grid.fac_f_loc[I2V(i,j,k)] = - _2_CR * grid.eta_loc[I2V(i,j,k)];
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
grid.rejuvenate_abcf();
|
||||
|
||||
|
||||
} else { // for LBFGS routine
|
||||
|
||||
// here all the simultaneous runs have the same values used in this routine.
|
||||
// thus we don't need to if(id_sim==0)
|
||||
|
||||
CUSTOMREAL step_length = step_length_new;
|
||||
const CUSTOMREAL factor = - _1_CR;
|
||||
|
||||
// update the model
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
// update
|
||||
grid.fun_loc[I2V(i,j,k)] *= (_1_CR - factor * grid.Ks_descent_dir_loc[I2V(i,j,k)] * step_length);
|
||||
grid.xi_loc[I2V(i,j,k)] -= factor * grid.Kxi_descent_dir_loc[I2V(i,j,k) ] * step_length;
|
||||
grid.eta_loc[I2V(i,j,k)] -= factor * grid.Keta_descent_dir_loc[I2V(i,j,k)] * step_length;
|
||||
//grid.fun_loc[I2V(i,j,k)] += grid.Ks_descent_dir_loc[I2V(i,j,k)] *step_length_new;
|
||||
//grid.xi_loc[I2V(i,j,k)] += grid.Kxi_descent_dir_loc[I2V(i,j,k)] *step_length_new;
|
||||
//grid.eta_loc[I2V(i,j,k)] += grid.Keta_descent_dir_loc[I2V(i,j,k)]*step_length_new;
|
||||
|
||||
grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
|
||||
grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
|
||||
grid.fac_f_loc[I2V(i,j,k)] = - _2_CR * grid.eta_loc[I2V(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // end LBFGS model update
|
||||
|
||||
// shared values on the boundary
|
||||
grid.send_recev_boundary_data(grid.fun_loc);
|
||||
grid.send_recev_boundary_data(grid.xi_loc);
|
||||
grid.send_recev_boundary_data(grid.eta_loc);
|
||||
grid.send_recev_boundary_data(grid.fac_b_loc);
|
||||
grid.send_recev_boundary_data(grid.fac_c_loc);
|
||||
grid.send_recev_boundary_data(grid.fac_f_loc);
|
||||
|
||||
} // end if subdom_main
|
||||
}
|
||||
|
||||
|
||||
|
||||
// compute p_k * grad(f_k)
|
||||
CUSTOMREAL compute_q_k(Grid& grid) {
|
||||
|
||||
CUSTOMREAL tmp_qk = _0_CR;
|
||||
|
||||
if (subdom_main && id_sim == 0) {
|
||||
|
||||
// grad * descent_direction
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
// we can add here a precondition (here use *_update_loc for storing descent direction)
|
||||
tmp_qk += grid.Ks_update_loc[I2V(i,j,k)] * grid.Ks_descent_dir_loc[I2V(i,j,k)]
|
||||
+ grid.Keta_update_loc[I2V(i,j,k)] * grid.Keta_descent_dir_loc[I2V(i,j,k)]
|
||||
+ grid.Kxi_update_loc[I2V(i,j,k)] * grid.Kxi_descent_dir_loc[I2V(i,j,k)];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add tmp_qk among all subdomain
|
||||
allreduce_cr_single(tmp_qk, tmp_qk);
|
||||
}
|
||||
|
||||
// share tmp_qk among all simultaneous runs
|
||||
if (subdom_main)
|
||||
broadcast_cr_single_inter_sim(tmp_qk,0);
|
||||
|
||||
return tmp_qk;
|
||||
|
||||
}
|
||||
|
||||
|
||||
// check if the wolfe conditions are satisfied
|
||||
bool check_wolfe_cond(Grid& grid, \
|
||||
CUSTOMREAL q_0, CUSTOMREAL q_t, \
|
||||
CUSTOMREAL qp_0, CUSTOMREAL qp_t, \
|
||||
CUSTOMREAL& td, CUSTOMREAL& tg, CUSTOMREAL& step_length_sub) {
|
||||
/*
|
||||
q_0 : initial cost function
|
||||
q_t : current cost function
|
||||
q_p0 : initial grad * descent_dir
|
||||
q_pt : current grad * descent_dir
|
||||
td : right step size
|
||||
tg : left step size
|
||||
step_length_sub : current step size
|
||||
*/
|
||||
|
||||
bool step_accepted = false;
|
||||
|
||||
// check if the wolfe conditions are satisfied and update the step_length_sub
|
||||
CUSTOMREAL qpt = (q_t - q_0) / step_length_sub;
|
||||
|
||||
if (subdom_main) {
|
||||
// good step size
|
||||
if (qpt <= wolfe_c1*qp_0 \
|
||||
&& wolfe_c2*qp_0 <= qp_t \
|
||||
){
|
||||
if (myrank==0)
|
||||
std::cout << "Wolfe rules: step accepted" << std::endl;
|
||||
step_accepted = true;
|
||||
} else {
|
||||
// modify the stepsize
|
||||
if (wolfe_c1*qp_0 < qpt) {
|
||||
td = step_length_sub;
|
||||
if (myrank==0)
|
||||
std::cout << "Wolfe rules: right step size updated." << std::endl;
|
||||
}
|
||||
if (qpt <= wolfe_c1*qp_0 && qp_t < wolfe_c2*qp_0) {
|
||||
tg = step_length_sub;
|
||||
if (myrank==0)
|
||||
std::cout << "Wolfe rules: left step size updated." << std::endl;
|
||||
}
|
||||
//if (isZero(td)) {
|
||||
if (td == _0_CR) {
|
||||
step_length_sub = _2_CR * step_length_sub;
|
||||
if (myrank==0)
|
||||
std::cout << "Wolfe rules: step size too small. Increased to " << step_length_sub << std::endl;
|
||||
} else {
|
||||
step_length_sub = _0_5_CR * (td+tg);
|
||||
if (myrank==0)
|
||||
std::cout << "Wolfe rules: step size too large. Decreased to " << step_length_sub << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// share step_accepted among all subdomains
|
||||
broadcast_bool_single(step_accepted,0);
|
||||
broadcast_cr_single(step_length_sub,0);
|
||||
} // end if subdom_main
|
||||
|
||||
broadcast_bool_single_sub(step_accepted,0);
|
||||
broadcast_cr_single_sub(step_length_sub,0);
|
||||
|
||||
return step_accepted;
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif // MODEL_UPDATE_H
|
||||
1062
include/mpi_funcs.h
Normal file
1062
include/mpi_funcs.h
Normal file
File diff suppressed because it is too large
Load Diff
181
include/objective_function_utils.h
Normal file
181
include/objective_function_utils.h
Normal file
@@ -0,0 +1,181 @@
|
||||
#ifndef OBJECTIVE_FUNCTION_UTILS_H
|
||||
#define OBJECTIVE_FUNCTION_UTILS_H
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "config.h"
|
||||
#include "input_params.h"
|
||||
|
||||
// prepare header line of objective_funciton.txt
|
||||
inline void prepare_header_line(InputParams &IP, std::ofstream &out_main) {
|
||||
// prepare output for iteration status
|
||||
if(myrank == 0 && id_sim ==0){
|
||||
out_main.open(output_dir + "/objective_function.txt");
|
||||
if (optim_method == GRADIENT_DESCENT || optim_method == HALVE_STEPPING_MODE){
|
||||
|
||||
out_main << std::setw(8) << std::right << "# iter,";
|
||||
out_main << std::setw(13) << std::right << " type,";
|
||||
|
||||
// if (optim_method == HALVE_STEPPING_MODE)
|
||||
// out_main << std::setw(8) << std::right << "subiter,"; (TODO in the future)
|
||||
std::string tmp = "obj(";
|
||||
if(IP.get_run_mode() == ONED_INVERSION){
|
||||
tmp.append(std::to_string(IP.N_abs_local_data)); // for 1d inversion, only absolute time residual is considered in obj
|
||||
} else {
|
||||
tmp.append(std::to_string(IP.N_data));
|
||||
}
|
||||
tmp.append("),");
|
||||
out_main << std::setw(20) << tmp;
|
||||
|
||||
tmp = "obj_abs(";
|
||||
tmp.append(std::to_string(IP.N_abs_local_data));
|
||||
tmp.append("),");
|
||||
out_main << std::setw(20) << tmp;
|
||||
|
||||
tmp = "obj_cs_dif(";
|
||||
if (IP.get_is_srcrec_swap())
|
||||
tmp.append(std::to_string(IP.N_cr_dif_local_data));
|
||||
else
|
||||
tmp.append(std::to_string(IP.N_cs_dif_local_data));
|
||||
tmp.append("),");
|
||||
out_main << std::setw(20) << tmp;
|
||||
|
||||
tmp = "obj_cr_dif(";
|
||||
if (IP.get_is_srcrec_swap())
|
||||
tmp.append(std::to_string(IP.N_cs_dif_local_data));
|
||||
else
|
||||
tmp.append(std::to_string(IP.N_cr_dif_local_data));
|
||||
tmp.append("),");
|
||||
out_main << std::setw(20) << tmp;
|
||||
|
||||
tmp = "obj_tele(";
|
||||
tmp.append(std::to_string(IP.N_teleseismic_data));
|
||||
tmp.append("),");
|
||||
out_main << std::setw(20) << tmp;
|
||||
|
||||
out_main << std::setw(25) << "res(mean/std),";
|
||||
|
||||
out_main << std::setw(25) << "res_abs(mean/std),";
|
||||
|
||||
out_main << std::setw(25) << "res_cs_dif(mean/std),";
|
||||
|
||||
out_main << std::setw(25) << "res_cr_dif(mean/std),";
|
||||
|
||||
out_main << std::setw(25) << "res_tele(mean/std),";
|
||||
|
||||
out_main << std::setw(20) << "step_length," << std::endl;
|
||||
|
||||
} else if (optim_method == LBFGS_MODE)
|
||||
out_main << std::setw(6) << "it," << std::setw(6) << "subit," << std::setw(16) << "step_length," << std::setw(16) << "q_0," << std::setw(16) << "q_t," \
|
||||
<< std::setw(16) << "v_obj_reg," << std::setw(16) << "qp_0," << std::setw(16) << "qp_t," << std::setw(16) << "td," << std::setw(16) << "tg," \
|
||||
<< std::setw(16) << "c1*qp_0," << std::setw(16) << "c2*qp_0," << std::setw(6) << "step ok," << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void write_objective_function(InputParams& IP, int i_inv, std::vector<CUSTOMREAL>& v_misfit_inout, std::ofstream& out_main, std::string type) {
|
||||
// output objective function
|
||||
if (myrank==0 && id_sim==0) {
|
||||
out_main << std::setw(8) << std::to_string(i_inv)+",";
|
||||
out_main << std::setw(13) << type;
|
||||
out_main << "," << std::setw(19) << v_misfit_inout[0];
|
||||
out_main << "," << std::setw(19) << v_misfit_inout[1];
|
||||
if (IP.get_is_srcrec_swap())
|
||||
out_main << "," << std::setw(19) << v_misfit_inout[3];
|
||||
else
|
||||
out_main << "," << std::setw(19) << v_misfit_inout[2];
|
||||
if (IP.get_is_srcrec_swap())
|
||||
out_main << "," << std::setw(19) << v_misfit_inout[2];
|
||||
else
|
||||
out_main << "," << std::setw(19) << v_misfit_inout[3];
|
||||
out_main << "," << std::setw(19) << v_misfit_inout[4];
|
||||
// res
|
||||
CUSTOMREAL mean;
|
||||
CUSTOMREAL std;
|
||||
std::string tmp;
|
||||
if (IP.N_data > 0) {
|
||||
mean = v_misfit_inout[5]/IP.N_data;
|
||||
std = sqrt(v_misfit_inout[6]/IP.N_data - my_square(mean));
|
||||
tmp = std::to_string(mean);
|
||||
tmp.append("/");
|
||||
tmp.append(std::to_string(std));
|
||||
out_main << "," << std::setw(24) << tmp;
|
||||
} else {
|
||||
out_main << "," << std::setw(24) << "0.0/0.0";
|
||||
}
|
||||
// res_abs
|
||||
if (IP.N_abs_local_data > 0) {
|
||||
mean = v_misfit_inout[7]/IP.N_abs_local_data;
|
||||
std = sqrt(v_misfit_inout[8]/IP.N_abs_local_data - my_square(mean));
|
||||
tmp = std::to_string(mean);
|
||||
tmp.append("/");
|
||||
tmp.append(std::to_string(std));
|
||||
out_main << "," << std::setw(24) << tmp;
|
||||
} else {
|
||||
out_main << "," << std::setw(24) << "0.0/0.0";
|
||||
}
|
||||
if (IP.get_is_srcrec_swap()){
|
||||
if (IP.N_cr_dif_local_data > 0) {
|
||||
mean = v_misfit_inout[11]/IP.N_cr_dif_local_data;
|
||||
std = sqrt(v_misfit_inout[12]/IP.N_cr_dif_local_data - my_square(mean));
|
||||
tmp = std::to_string(mean);
|
||||
tmp.append("/");
|
||||
tmp.append(std::to_string(std));
|
||||
out_main << "," << std::setw(24) << tmp;
|
||||
} else {
|
||||
out_main << "," << std::setw(24) << "0.0/0.0";
|
||||
}
|
||||
if (IP.N_cs_dif_local_data > 0) {
|
||||
mean = v_misfit_inout[9]/IP.N_cs_dif_local_data;
|
||||
std = sqrt(v_misfit_inout[10]/IP.N_cs_dif_local_data - my_square(mean));
|
||||
tmp = std::to_string(mean);
|
||||
tmp.append("/");
|
||||
tmp.append(std::to_string(std));
|
||||
out_main << "," << std::setw(24) << tmp;
|
||||
} else {
|
||||
out_main << "," << std::setw(24) << "0.0/0.0";
|
||||
}
|
||||
} else {
|
||||
if (IP.N_cs_dif_local_data > 0) {
|
||||
mean = v_misfit_inout[9]/IP.N_cs_dif_local_data;
|
||||
std = sqrt(v_misfit_inout[10]/IP.N_cs_dif_local_data - my_square(mean));
|
||||
tmp = std::to_string(mean);
|
||||
tmp.append("/");
|
||||
tmp.append(std::to_string(std));
|
||||
out_main << "," << std::setw(24) << tmp;
|
||||
} else {
|
||||
out_main << "," << std::setw(24) << "0.0/0.0";
|
||||
}
|
||||
if (IP.N_cr_dif_local_data > 0) {
|
||||
mean = v_misfit_inout[11]/IP.N_cr_dif_local_data;
|
||||
std = sqrt(v_misfit_inout[12]/IP.N_cr_dif_local_data - my_square(mean));
|
||||
tmp = std::to_string(mean);
|
||||
tmp.append("/");
|
||||
tmp.append(std::to_string(std));
|
||||
out_main << "," << std::setw(24) << tmp;
|
||||
} else {
|
||||
out_main << "," << std::setw(24) << "0.0/0.0";
|
||||
}
|
||||
}
|
||||
|
||||
if (IP.N_teleseismic_data > 0) {
|
||||
mean = v_misfit_inout[13]/IP.N_teleseismic_data;
|
||||
std = sqrt(v_misfit_inout[14]/IP.N_teleseismic_data - my_square(mean));
|
||||
tmp = std::to_string(mean);
|
||||
tmp.append("/");
|
||||
tmp.append(std::to_string(std));
|
||||
out_main << "," << std::setw(24) << tmp;
|
||||
} else {
|
||||
out_main << "," << std::setw(24) << "0.0/0.0";
|
||||
}
|
||||
if(type == "model update" || type == "1d inversion")
|
||||
out_main << "," << std::setw(19) << step_length_init << ",";
|
||||
|
||||
out_main << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // OBJECTIVE_FUNCTION_UTILS_H
|
||||
109
include/oneD_inversion.h
Normal file
109
include/oneD_inversion.h
Normal file
@@ -0,0 +1,109 @@
|
||||
#ifndef ONED_INVERSION_H
|
||||
#define ONED_INVERSION_H
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
#include "utils.h"
|
||||
#include "config.h"
|
||||
#include "receiver.h"
|
||||
|
||||
class OneDInversion {
|
||||
|
||||
public:
|
||||
OneDInversion(InputParams&, Grid&);
|
||||
~OneDInversion();
|
||||
|
||||
// grid parameters
|
||||
int nr_1dinv, nt_1dinv; // number of grid points in r and epicenter distance
|
||||
CUSTOMREAL *r_1dinv, *t_1dinv; // coordinates
|
||||
CUSTOMREAL dr_1dinv, dt_1dinv; // grid spacing
|
||||
|
||||
// parameters on grid nodes (for model)
|
||||
CUSTOMREAL *slowness_1dinv; // slowness
|
||||
CUSTOMREAL *fac_a_1dinv; // fac_a
|
||||
CUSTOMREAL *fac_b_1dinv; // fac_b
|
||||
|
||||
// parameters on grid nodes (for eikonal solver and adjoint solver)
|
||||
CUSTOMREAL *tau_1dinv; // tau
|
||||
CUSTOMREAL *tau_old_1dinv; // tau_old
|
||||
CUSTOMREAL *T_1dinv; // T
|
||||
CUSTOMREAL *Tadj_1dinv; // Tadj
|
||||
CUSTOMREAL *Tadj_density_1dinv; // Tadj
|
||||
CUSTOMREAL *T0v_1dinv; // T0v
|
||||
CUSTOMREAL *T0r_1dinv; // T0r
|
||||
CUSTOMREAL *T0t_1dinv; // T0t
|
||||
bool *is_changed_1dinv; // is_changed
|
||||
CUSTOMREAL *delta_1dinv; // delta
|
||||
|
||||
// parameters on grid nodes (for inversion)
|
||||
CUSTOMREAL *Ks_1dinv; // Ks
|
||||
CUSTOMREAL *Ks_density_1dinv; // Ks_density
|
||||
CUSTOMREAL *Ks_over_Kden_1dinv; // Ks_over_Kdensity
|
||||
CUSTOMREAL *Ks_multigrid_1dinv; // (Ks_over_Kdensity) parameterized by multigrid
|
||||
CUSTOMREAL *Ks_multigrid_previous_1dinv; // Ks_multigrid at previous iteration
|
||||
|
||||
// parameters for optimization
|
||||
CUSTOMREAL v_obj;
|
||||
CUSTOMREAL old_v_obj;
|
||||
|
||||
// functions
|
||||
std::vector<CUSTOMREAL> run_simulation_one_step_1dinv(InputParams&, IO_utils&, const int&);
|
||||
void model_optimize_1dinv(InputParams&, Grid&, IO_utils&, const int&);
|
||||
|
||||
// h5 file output
|
||||
void write_T_1dinv(IO_utils&, const std::string&, const int&);
|
||||
void write_Tadj_1dinv(IO_utils&, const std::string&, const int&);
|
||||
void write_vel_1dinv(IO_utils&, const int&);
|
||||
|
||||
private:
|
||||
|
||||
// for eikonal solver
|
||||
int count_cand;
|
||||
int ii,ii_nr,ii_n2r,ii_pr,ii_p2r,ii_nt,ii_n2t,ii_pt,ii_p2t;
|
||||
CUSTOMREAL ar,br,at,bt,ar1,ar2,at1,at2,br1,br2,bt1,bt2;
|
||||
CUSTOMREAL eqn_a, eqn_b, eqn_c, eqn_Delta;
|
||||
CUSTOMREAL tmp_tau, T_r, T_t, charact_r, charact_t;
|
||||
bool is_causality;
|
||||
std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);
|
||||
|
||||
|
||||
// member functions
|
||||
// class functions
|
||||
void generate_2d_mesh(InputParams&);
|
||||
void deallocate_arrays();
|
||||
void allocate_arrays();
|
||||
void load_1d_model(Grid&);
|
||||
int I2V_1DINV(const int&,const int&);
|
||||
|
||||
// "run_simulation_one_step_1dinv" subfunctions:
|
||||
void eikonal_solver_2d(InputParams&, int& );
|
||||
void initialize_eikonal_array(CUSTOMREAL);
|
||||
void FSM_2d();
|
||||
void calculate_stencil(const int&, const int&);
|
||||
|
||||
void calculate_synthetic_traveltime_and_adjoint_source(InputParams&, int& );
|
||||
CUSTOMREAL interpolate_2d_traveltime(const CUSTOMREAL&, const CUSTOMREAL&);
|
||||
|
||||
void adjoint_solver_2d(InputParams&, const int&, const int&);
|
||||
void initialize_adjoint_array(InputParams&, const int&, const int&);
|
||||
void FSM_2d_adjoint(const int&);
|
||||
void calculate_stencil_adj(const int&, const int&);
|
||||
|
||||
void initialize_kernel_1d();
|
||||
void calculate_kernel_1d();
|
||||
|
||||
// "model_optimize_1dinv" subfunctions:
|
||||
void kernel_processing_1dinv(Grid&);
|
||||
void density_normalization_1dinv();
|
||||
void multi_grid_parameterization_1dinv(Grid&);
|
||||
void model_update_1dinv(const int&);
|
||||
void determine_step_size_1dinv(const int&);
|
||||
CUSTOMREAL norm_1dinv(const CUSTOMREAL*, const int&);
|
||||
void generate_3d_model(Grid&);
|
||||
};
|
||||
|
||||
#endif // ONED_INVERSION_H
|
||||
36
include/receiver.h
Normal file
36
include/receiver.h
Normal file
@@ -0,0 +1,36 @@
|
||||
#ifndef RECEIVER_H
|
||||
#define RECEIVER_H
|
||||
|
||||
#include <vector>
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
|
||||
class Receiver {
|
||||
public:
|
||||
Receiver();
|
||||
~Receiver();
|
||||
|
||||
void interpolate_and_store_arrival_times_at_rec_position(InputParams&, Grid&, const std::string&); // only for common receiver differential traveltime
|
||||
|
||||
// adjoint source
|
||||
void calculate_adjoint_source(InputParams&, const std::string&);
|
||||
// objective function and residual
|
||||
std::vector<CUSTOMREAL> calculate_obj_and_residual(InputParams&);
|
||||
// Gradient of traveltime
|
||||
void calculate_T_gradient(InputParams&, Grid&, const std::string&);
|
||||
// initialize variables for source relocation
|
||||
void init_vars_src_reloc(InputParams&);
|
||||
// Gradient of objective function
|
||||
void calculate_grad_obj_src_reloc(InputParams&, const std::string&);
|
||||
// objective function
|
||||
std::vector<CUSTOMREAL> calculate_obj_reloc(InputParams&, int);
|
||||
// update source location
|
||||
void update_source_location(InputParams&, Grid&);
|
||||
|
||||
private:
|
||||
CUSTOMREAL interpolate_travel_time(Grid&, InputParams&, std::string, std::string);
|
||||
void calculate_T_gradient_one_rec(Grid&, InputParams&, std::string, CUSTOMREAL*);
|
||||
bool check_if_receiver_is_in_this_subdomain(Grid&, const CUSTOMREAL&, const CUSTOMREAL&, const CUSTOMREAL&);
|
||||
};
|
||||
|
||||
#endif // RECEIVER_H
|
||||
194
include/simd_conf.h
Normal file
194
include/simd_conf.h
Normal file
@@ -0,0 +1,194 @@
|
||||
#ifndef SIMD_CONF_H
|
||||
#define SIMD_CONF_H
|
||||
|
||||
#ifdef USE_SIMD
|
||||
|
||||
// flags for self controling the SIMD usage
|
||||
#define USE_AVX __AVX__
|
||||
#define USE_AVX512 __AVX512F__
|
||||
#define USE_ARM_SVE __ARM_FEATURE_SVE
|
||||
#define USE_NEON __ARM_NEON__
|
||||
|
||||
// forcely make USE_AVX and USE_AVX512 to be false if USE_ARM_SVE is true
|
||||
#if USE_ARM_SVE
|
||||
#undef USE_AVX
|
||||
#define USE_AVX 0
|
||||
#undef USE_AVX512
|
||||
#define USE_AVX512 0
|
||||
#endif
|
||||
|
||||
#if USE_AVX || USE_AVX512
|
||||
#include <immintrin.h>
|
||||
#elif USE_ARM_SVE
|
||||
#include <arm_sve.h>
|
||||
#elif USE_NEON
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef SINGLE_PRECISION
|
||||
|
||||
#if USE_AVX && !USE_AVX512
|
||||
const int ALIGN = 32;
|
||||
const int NSIMD = 8;
|
||||
#define __mT __m256
|
||||
#define _mmT_set1_pT _mm256_set1_ps
|
||||
#define _mmT_loadu_pT _mm256_loadu_ps
|
||||
#define _mmT_mul_pT _mm256_mul_ps
|
||||
#define _mmT_sub_pT _mm256_sub_ps
|
||||
#define _mmT_add_pT _mm256_add_ps
|
||||
#define _mmT_div_pT _mm256_div_ps
|
||||
#define _mmT_min_pT _mm256_min_ps
|
||||
#define _mmT_sqrt_pT _mm256_sqrt_ps
|
||||
#define _mmT_store_pT _mm256_store_ps
|
||||
#define _mmT_fmadd_pT _mm256_fmadd_ps
|
||||
|
||||
#define _mm256_cmp_pT _mm256_cmp_ps
|
||||
|
||||
#elif USE_AVX512
|
||||
const int ALIGN = 64;
|
||||
const int NSIMD = 16;
|
||||
#define __mT __m512
|
||||
#define _mmT_set1_pT _mm512_set1_ps
|
||||
#define _mmT_loadu_pT _mm512_loadu_ps
|
||||
#define _mmT_mul_pT _mm512_mul_ps
|
||||
#define _mmT_sub_pT _mm512_sub_ps
|
||||
#define _mmT_add_pT _mm512_add_ps
|
||||
#define _mmT_div_pT _mm512_div_ps
|
||||
#define _mmT_min_pT _mm512_min_ps
|
||||
#define _mmT_max_pT _mm512_max_ps
|
||||
#define _mmT_sqrt_pT _mm512_sqrt_ps
|
||||
#define _mmT_store_pT _mm512_store_ps
|
||||
#define _mmT_fmadd_pT _mm512_fmadd_ps
|
||||
|
||||
#define __mmaskT __mmask16
|
||||
#define _mm512_cmp_pT_mask _mm512_cmp_ps_mask
|
||||
#define _mm512_mask_blend_pT _mm512_mask_blend_ps
|
||||
#define _kand_maskT _kand_mask16
|
||||
|
||||
|
||||
#elif USE_ARM_SVE // NOT TESTED YET
|
||||
const int ALIGN = 8*svcntd(); // use svcntw() for float
|
||||
const int NSIMD = svcntd(); // Vector Length
|
||||
#define __mT svfloat32_t
|
||||
//#define _mmT_set1_pT svdup_f64
|
||||
//#define _mmT_loadu_pT svld1_f64_f64_f64
|
||||
//#define _mmT_mul_pT svmul_f64_m
|
||||
//#define _mmT_sub_pT svsub_f64_m
|
||||
//#define _mmT_add_pT svadd_f64_m
|
||||
//#define _mmT_div_pT svdiv_f64_m
|
||||
//#define _mmT_min_pT svmin_f64_m
|
||||
//#define _mmT_sqrt_pT svsqrt_f64_m
|
||||
//#define _mmT_store_pT svst1_f64
|
||||
|
||||
#elif USE_NEON
|
||||
const int ALIGN = 16;
|
||||
const int NSIMD = 4;
|
||||
#define __mT float32x4_t
|
||||
#define _mmT_set1_pT vdupq_n_f32
|
||||
#define _mmT_loadu_pT vld1q_f32
|
||||
#define _mmT_mul_pT vmulq_f32
|
||||
#define _mmT_sub_pT vsubq_f32
|
||||
#define _mmT_add_pT vaddq_f32
|
||||
#define _mmT_div_pT vdivq_f32
|
||||
#define _mmT_min_pT vminq_f32
|
||||
#define _mmT_sqrt_pT vsqrtq_f32
|
||||
#define _mmT_store_pT vst1q_f32
|
||||
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
|
||||
|
||||
#else // DOUBLE_PRECISION
|
||||
|
||||
#if USE_AVX && !USE_AVX512
|
||||
const int ALIGN = 32;
|
||||
const int NSIMD = 4;
|
||||
#define __mT __m256d
|
||||
#define _mmT_set1_pT _mm256_set1_pd
|
||||
#define _mmT_loadu_pT _mm256_loadu_pd
|
||||
#define _mmT_mul_pT _mm256_mul_pd
|
||||
#define _mmT_sub_pT _mm256_sub_pd
|
||||
#define _mmT_add_pT _mm256_add_pd
|
||||
#define _mmT_div_pT _mm256_div_pd
|
||||
#define _mmT_min_pT _mm256_min_pd
|
||||
#define _mmT_sqrt_pT _mm256_sqrt_pd
|
||||
#define _mmT_store_pT _mm256_store_pd
|
||||
#define _mmT_fmadd_pT _mm256_fmadd_pd
|
||||
|
||||
#define _mm256_cmp_pT _mm256_cmp_pd
|
||||
|
||||
#elif USE_AVX512
|
||||
const int ALIGN = 64;
|
||||
const int NSIMD = 8;
|
||||
#define __mT __m512d
|
||||
#define _mmT_set1_pT _mm512_set1_pd
|
||||
#define _mmT_loadu_pT _mm512_loadu_pd
|
||||
#define _mmT_mul_pT _mm512_mul_pd
|
||||
#define _mmT_sub_pT _mm512_sub_pd
|
||||
#define _mmT_add_pT _mm512_add_pd
|
||||
#define _mmT_div_pT _mm512_div_pd
|
||||
#define _mmT_min_pT _mm512_min_pd
|
||||
#define _mmT_max_pT _mm512_max_pd
|
||||
#define _mmT_sqrt_pT _mm512_sqrt_pd
|
||||
#define _mmT_store_pT _mm512_store_pd
|
||||
#define _mmT_fmadd_pT _mm512_fmadd_pd
|
||||
|
||||
#define __mmaskT __mmask8
|
||||
#define _mm512_cmp_pT_mask _mm512_cmp_pd_mask
|
||||
#define _mm512_mask_blend_pT _mm512_mask_blend_pd
|
||||
#define _kand_maskT _kand_mask8
|
||||
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
const int ALIGN = 8*svcntd(); // use svcntw() for float
|
||||
const int NSIMD = svcntd(); // Vector Length
|
||||
#define __mT svfloat64_t
|
||||
//#define _mmT_set1_pT svdup_f64
|
||||
//#define _mmT_loadu_pT svld1_f64_f64_f64
|
||||
//#define _mmT_mul_pT svmul_f64_m
|
||||
//#define _mmT_sub_pT svsub_f64_m
|
||||
//#define _mmT_add_pT svadd_f64_m
|
||||
//#define _mmT_div_pT svdiv_f64_m
|
||||
//#define _mmT_min_pT svmin_f64_m
|
||||
//#define _mmT_sqrt_pT svsqrt_f64_m
|
||||
//#define _mmT_store_pT svst1_f64
|
||||
|
||||
#elif USE_NEON
|
||||
const int ALIGN = 16;
|
||||
const int NSIMD = 2;
|
||||
#define __mT float64x2_t
|
||||
#define _mmT_set1_pT vdupq_n_f64
|
||||
#define _mmT_loadu_pT vld1q_f64
|
||||
#define _mmT_mul_pT vmulq_f64
|
||||
#define _mmT_sub_pT vsubq_f64
|
||||
#define _mmT_add_pT vaddq_f64
|
||||
#define _mmT_div_pT vdivq_f64
|
||||
#define _mmT_min_pT vminq_f64
|
||||
#define _mmT_sqrt_pT vsqrtq_f64
|
||||
#define _mmT_store_pT vst1q_f64
|
||||
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
|
||||
#endif // DOUBLE_PRECISION
|
||||
|
||||
inline void print_simd_type() {
|
||||
std::cout << "SIMD type: ";
|
||||
#if USE_AVX && !USE_AVX512
|
||||
std::cout << "AVX" << std::endl;
|
||||
#elif USE_AVX512
|
||||
std::cout << "AVX512" << std::endl;
|
||||
#elif USE_ARM_SVE
|
||||
std::cout << "ARM SVE" << std::endl;
|
||||
#elif USE_NEON
|
||||
std::cout << "NEON" << std::endl;
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
}
|
||||
|
||||
#else // USE_CUDA but not AVX dummy
|
||||
const int ALIGN = 32;
|
||||
const int NSIMD = 4;
|
||||
#endif // USE_SIMD
|
||||
|
||||
|
||||
|
||||
#endif // SIMD_CONF_H
|
||||
773
include/smooth.h
Normal file
773
include/smooth.h
Normal file
@@ -0,0 +1,773 @@
|
||||
#ifndef SMOOTH_H
|
||||
#define SMOOTH_H
|
||||
|
||||
#include <iostream>
|
||||
#include "grid.h"
|
||||
#include "config.h"
|
||||
#include "utils.h"
|
||||
|
||||
void calc_inversed_laplacian(CUSTOMREAL* d, CUSTOMREAL* Ap,
|
||||
const int i, const int j, const int k,
|
||||
const CUSTOMREAL lr, const CUSTOMREAL lt, const CUSTOMREAL lp,
|
||||
const CUSTOMREAL dr, const CUSTOMREAL dt, const CUSTOMREAL dp) {
|
||||
// calculate inversed laplacian operator
|
||||
CUSTOMREAL termx = _0_CR, termy = _0_CR, termz = _0_CR;
|
||||
|
||||
if (i==0) {
|
||||
termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i+1,j,k)]));
|
||||
} else if (i==loc_I-1) {
|
||||
termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i-1,j,k)]));
|
||||
} else {
|
||||
termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+d[I2V(i-1,j,k)]+d[I2V(i+1,j,k)]));
|
||||
}
|
||||
|
||||
if (j==0) {
|
||||
termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j+1,k)]));
|
||||
} else if (j==loc_J-1) {
|
||||
termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j-1,k)]));
|
||||
} else {
|
||||
termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+d[I2V(i,j-1,k)]+d[I2V(i,j+1,k)]));
|
||||
}
|
||||
|
||||
if (k==0) {
|
||||
termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j,k+1)]));
|
||||
} else if (k==loc_K-1) {
|
||||
termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j,k-1)]));
|
||||
} else {
|
||||
termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+d[I2V(i,j,k-1)]+d[I2V(i,j,k+1)]));
|
||||
}
|
||||
|
||||
Ap[I2V(i,j,k)] = termx+termy+termz;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void CG_smooth(Grid& grid, CUSTOMREAL* arr_in, CUSTOMREAL* arr_out, CUSTOMREAL lr, CUSTOMREAL lt, CUSTOMREAL lp) {
|
||||
// arr: array to be smoothed
|
||||
// lr: smooth length on r
|
||||
// lt: smooth length on theta
|
||||
// lp: smooth length on phi
|
||||
|
||||
// arrays :
|
||||
// x_array model
|
||||
// g_array gradient
|
||||
// d_array descent direction
|
||||
// Ap stiffness scalar (laplacian)
|
||||
// pAp = d_array*Ap
|
||||
// rr = g_array * g_array (dot product)
|
||||
|
||||
const int max_iter_cg = 1000;
|
||||
const CUSTOMREAL xtol = 0.001;
|
||||
const bool use_scaling = true;
|
||||
|
||||
CUSTOMREAL dr=grid.dr, dt=grid.dt, dp=grid.dp; // in km, rad, rad
|
||||
//CUSTOMREAL dr=_1_CR,dt=_1_CR,dp=_1_CR;
|
||||
//debug
|
||||
std::cout << "dr, dt, dp, lr, lt, lp = " << dr << " " << dt << " " << dp << " " << lr << " " << lt << " " << lp << std::endl;
|
||||
|
||||
// allocate memory
|
||||
CUSTOMREAL* x_array = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3000);
|
||||
CUSTOMREAL* r_array = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3001);
|
||||
CUSTOMREAL* p_array = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3002);
|
||||
CUSTOMREAL* Ap = allocateMemory<CUSTOMREAL>(loc_I*loc_J*loc_K, 3003);
|
||||
CUSTOMREAL pAp=_0_CR, rr_0=_0_CR, rr=_0_CR, rr_new=_0_CR, aa=_0_CR, bb=_0_CR, tmp=_0_CR;
|
||||
|
||||
CUSTOMREAL scaling_A=_1_CR, scaling_coeff = _1_CR;
|
||||
|
||||
if (use_scaling) {
|
||||
// calculate scaling factor
|
||||
//scaling_A = std::sqrt(_1_CR / (_8_CR * PI * lr * lt * lp));
|
||||
// scaling coefficient for gradient
|
||||
scaling_coeff = find_absmax(arr_in, loc_I*loc_J*loc_K);
|
||||
tmp = scaling_coeff;
|
||||
allreduce_cr_single_max(tmp, scaling_coeff);
|
||||
//if (scaling_coeff == _0_CR)
|
||||
if (isZero(scaling_coeff))
|
||||
scaling_coeff = _1_CR;
|
||||
}
|
||||
// std out scaling factors
|
||||
if (myrank == 0) {
|
||||
std::cout << "scaling_A = " << scaling_A << std::endl;
|
||||
std::cout << "scaling_coeff = " << scaling_coeff << std::endl;
|
||||
}
|
||||
|
||||
|
||||
// array initialization
|
||||
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
|
||||
x_array[i] = _0_CR; // x
|
||||
r_array[i] = arr_in[i]/scaling_coeff; // r = b - Ax
|
||||
p_array[i] = r_array[i]; // p
|
||||
Ap[i] = _0_CR;
|
||||
}
|
||||
|
||||
// initial rr
|
||||
rr = dot_product(r_array, r_array, loc_I*loc_J*loc_K);
|
||||
tmp = rr;
|
||||
// sum all rr among all processors
|
||||
allreduce_cr_single(tmp, rr);
|
||||
rr_0 = rr; // record initial rr
|
||||
|
||||
int k_start=0, k_end=loc_K, j_start=0, j_end=loc_J, i_start=0, i_end=loc_I;
|
||||
|
||||
// CG loop
|
||||
for (int iter=0; iter<max_iter_cg; iter++) {
|
||||
|
||||
// calculate laplacian
|
||||
for (int k = k_start; k < k_end; k++) {
|
||||
for (int j = j_start; j < j_end; j++) {
|
||||
for (int i = i_start; i < i_end; i++) {
|
||||
//scaling_coeff = std::max(scaling_coeff, std::abs(arr[I2V(i,j,k)]));
|
||||
|
||||
// calculate inversed laplacian operator
|
||||
calc_inversed_laplacian(p_array,Ap,i,j,k,lr,lt,lp,dr,dt,dp);
|
||||
|
||||
// scaling
|
||||
Ap[I2V(i,j,k)] = Ap[I2V(i,j,k)]*scaling_A;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get the values on the boundaries
|
||||
grid.send_recev_boundary_data(Ap);
|
||||
|
||||
// calculate pAp
|
||||
pAp = dot_product(p_array, Ap, loc_I*loc_J*loc_K);
|
||||
tmp = pAp;
|
||||
// sum all pAp among all processors
|
||||
allreduce_cr_single(tmp, pAp);
|
||||
|
||||
// compute step length
|
||||
aa = rr / pAp;
|
||||
|
||||
// update x_array (model)
|
||||
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
|
||||
x_array[i] += aa * p_array[i];
|
||||
}
|
||||
|
||||
// update r_array (gradient)
|
||||
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
|
||||
r_array[i] -= aa * Ap[i];
|
||||
}
|
||||
|
||||
// update rr
|
||||
rr_new = dot_product(r_array, r_array, loc_I*loc_J*loc_K);
|
||||
tmp = rr_new;
|
||||
// sum all rr among all processors
|
||||
allreduce_cr_single(tmp, rr_new);
|
||||
|
||||
// update d_array (descent direction)
|
||||
bb = rr_new / rr;
|
||||
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
|
||||
p_array[i] = r_array[i] + bb * p_array[i];
|
||||
}
|
||||
|
||||
if (myrank == 0 && iter%100==0){//} && if_verbose) {
|
||||
std::cout << "iter: " << iter << " rr: " << rr << " rr_new: " << rr_new << " rr/rr_0: " << rr/rr_0 << " pAp: " << pAp << " aa: " << aa << " bb: " << bb << std::endl;
|
||||
}
|
||||
|
||||
// update rr
|
||||
rr = rr_new;
|
||||
|
||||
// check convergence
|
||||
if (rr / rr_0 < xtol) {
|
||||
std::cout << "CG converged in " << iter << " iterations." << std::endl;
|
||||
break;
|
||||
}
|
||||
|
||||
} // end of CG loop
|
||||
|
||||
|
||||
// copy x_array to arr_out
|
||||
for (int i=0; i<loc_I*loc_J*loc_K; i++) {
|
||||
arr_out[i] = x_array[i]*scaling_coeff;
|
||||
}
|
||||
|
||||
// deallocate
|
||||
delete[] x_array;
|
||||
delete[] r_array;
|
||||
delete[] p_array;
|
||||
delete[] Ap;
|
||||
//delete[] Ap_tmp;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void smooth_inv_kernels_CG(Grid& grid, CUSTOMREAL lr, CUSTOMREAL lt, CUSTOMREAL lp) {
|
||||
// smoothing kernels with Conjugate Gradient method
|
||||
// lr,lt,lp: smoothing length in r, theta, phi direction
|
||||
|
||||
// smooth Ks
|
||||
CG_smooth(grid, grid.Ks_loc, grid.Ks_update_loc, lr, lt, lp);
|
||||
// smooth Keta
|
||||
CG_smooth(grid, grid.Keta_loc, grid.Keta_update_loc, lr, lt, lp);
|
||||
// smooth Kxi
|
||||
CG_smooth(grid, grid.Kxi_loc, grid.Kxi_update_loc, lr, lt, lp);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// original method for smoothing kernels
|
||||
inline void smooth_inv_kernels_orig(Grid& grid, InputParams& IP) {
|
||||
// necessary params
|
||||
CUSTOMREAL r_r, r_t, r_p;
|
||||
CUSTOMREAL r_r_ani, r_t_ani, r_p_ani;
|
||||
|
||||
int kdr = 0, jdt = 0, idp = 0;
|
||||
int kdr_ani = 0, jdt_ani = 0, idp_ani = 0;
|
||||
|
||||
int k_start = 0;
|
||||
int j_start = 0;
|
||||
int i_start = 0;
|
||||
int k_end = ngrid_k;
|
||||
int j_end = ngrid_j;
|
||||
int i_end = ngrid_i;
|
||||
|
||||
CUSTOMREAL weight = _1_CR;
|
||||
CUSTOMREAL * taper = IP.get_depth_taper();
|
||||
|
||||
// check final kernel density
|
||||
for (int i_loc = 0; i_loc < loc_I; i_loc++) {
|
||||
for (int j_loc = 0; j_loc < loc_J; j_loc++) {
|
||||
for (int k_loc = 0; k_loc < loc_K; k_loc++) {
|
||||
if (isNegative(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])){
|
||||
std::cout << "Warning: grid.Ks_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
|
||||
}
|
||||
if (isNegative(grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)])){
|
||||
std::cout << "Warning: grid.Kxi_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
|
||||
}
|
||||
if (isNegative(grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)])){
|
||||
std::cout << "Warning: grid.Keta_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// remark: kernel density normalization is shifted from kernel to kernel_inv.
|
||||
// Because high kernel density may concentrated below station at single grid point. This part should be strenghthened.
|
||||
// However, low kernel density may be distributed widely at middle depth. This part should be weakened.
|
||||
// kernel_inv considers the integration, whereas kernel only considers one grid point.
|
||||
// Therefore, kernel density normalization should be applied to kernel_inv, not kernel.
|
||||
|
||||
// // kernel density normalization
|
||||
// for (int i_loc = 0; i_loc < loc_I; i_loc++) {
|
||||
// for (int j_loc = 0; j_loc < loc_J; j_loc++) {
|
||||
// for (int k_loc = 0; k_loc < loc_K; k_loc++) {
|
||||
// if(isZero(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])){
|
||||
// // do nothing
|
||||
// } else {
|
||||
// if (grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)] < 0){
|
||||
// std::cout << "Warning: grid.Ks_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = "
|
||||
// << grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)]
|
||||
// << std::endl;
|
||||
// }
|
||||
// grid.Ks_loc[I2V(i_loc,j_loc,k_loc)] /= std::pow(std::abs(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)]),Kdensity_coe);
|
||||
// }
|
||||
|
||||
// if(isZero(grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)])){
|
||||
// // do nothing
|
||||
// } else {
|
||||
// if (grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)] < 0){
|
||||
// std::cout << "Warning: grid.Kxi_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
|
||||
// }
|
||||
// grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] /= std::pow(std::abs(grid.Kxi_density_loc[I2V(i_loc,j_loc,k_loc)]),Kdensity_coe);
|
||||
// }
|
||||
|
||||
// if(isZero(grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)])){
|
||||
// // do nothing
|
||||
// } else {
|
||||
// if (grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] < 0){
|
||||
// std::cout << "Warning: grid.Keta_density_loc[I2V(" << i_loc << "," << j_loc << "," << k_loc << ")] is less than 0, = " << grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] << std::endl;
|
||||
// }
|
||||
// grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] /= std::pow(std::abs(grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)]),Kdensity_coe);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
//
|
||||
// sum the kernel values on the inversion grid
|
||||
//
|
||||
|
||||
for (int i_grid = 0; i_grid < n_inv_grids; i_grid++) {
|
||||
|
||||
// init coarser kernel arrays
|
||||
// velocity
|
||||
for (int k = 0; k < n_inv_K_loc; k++) {
|
||||
for (int j = 0; j < n_inv_J_loc; j++) {
|
||||
for (int i = 0; i < n_inv_I_loc; i++) {
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
// anisotropy
|
||||
for (int k = 0; k < n_inv_K_loc_ani; k++) {
|
||||
for (int j = 0; j < n_inv_J_loc_ani; j++) {
|
||||
for (int i = 0; i < n_inv_I_loc_ani; i++) {
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
|
||||
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
|
||||
grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)] = _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (int k = k_start; k < k_end; k++) {
|
||||
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
r_r = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// increasing or decreasing order
|
||||
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr = ii_invr;
|
||||
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
r_r_ani = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc_ani-1; ii_invr++){
|
||||
// increasing or decreasing order
|
||||
if (in_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr_ani = ii_invr;
|
||||
r_r_ani = calc_ratio_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if r is out of the inversion grid
|
||||
if (r_r < _0_CR || r_r_ani < _0_CR) continue;
|
||||
|
||||
for (int j = j_start; j < j_end; j++) {
|
||||
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
|
||||
r_t = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt = ii_invt;
|
||||
r_t = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
r_t_ani = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc_ani-1; ii_invt++){
|
||||
//if ((t_glob - grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt,i_grid)]) * (t_glob - grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,i_grid)]) <= _0_CR) {
|
||||
CUSTOMREAL left = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
CUSTOMREAL right = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt_ani = ii_invt;
|
||||
r_t_ani = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if t is out of the inversion grid
|
||||
if (r_t < _0_CR || r_t_ani < _0_CR) continue;
|
||||
|
||||
for (int i = i_start; i < i_end; i++) {
|
||||
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
|
||||
r_p = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp = ii_invp;
|
||||
r_p = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
r_p_ani = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc_ani-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
CUSTOMREAL right = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp_ani = ii_invp;
|
||||
r_p_ani = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if p is out of the inversion grid
|
||||
if (r_p < _0_CR || r_p_ani < _0_CR) continue;
|
||||
|
||||
// global grid id to local id
|
||||
int k_loc = k - grid.get_offset_k();
|
||||
int j_loc = j - grid.get_offset_j();
|
||||
int i_loc = i - grid.get_offset_i();
|
||||
|
||||
// check if *_loc are inside the local subdomain
|
||||
if (k_loc < 0 || k_loc > loc_K-1) continue;
|
||||
if (j_loc < 0 || j_loc > loc_J-1) continue;
|
||||
if (i_loc < 0 || i_loc > loc_I-1) continue;
|
||||
|
||||
// check if Ks_density_loc is 0 (no contributary kernel here)
|
||||
// if(isZero(grid.Ks_density_loc[I2V(i_loc,j_loc,k_loc)])) continue;
|
||||
|
||||
CUSTOMREAL dxdydz = (grid.dr) * (r_glob*grid.dt) * (r_glob*cos(t_glob)*grid.dp); // dx*dy*dz = r^2*cos(t)dr*dt*dp
|
||||
|
||||
// update Ks_inv Keta_inv Kxi_inv
|
||||
// -p, -t, -r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
// -p, -t, +r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt, kdr +1)] += r_r *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt, kdr +1)] += r_r *(_1_CR-r_t) *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
// -p, +t, -r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr)] += (_1_CR-r_r) *r_t *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr)] += (_1_CR-r_r) *r_t *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
// +p, -t, -r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr)] += (_1_CR-r_r) *(_1_CR-r_t) *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)] += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
// -p, +t, +r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr +1)] += r_r *r_t *(_1_CR-r_p) *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp, jdt +1,kdr +1)] += r_r *r_t *(_1_CR-r_p) *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
// +p, -t, +r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr +1)] += r_r *(_1_CR-r_t) *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt, kdr +1)] += r_r *(_1_CR-r_t) *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)] += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
// +p, +t, -r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr)] += (_1_CR-r_r) *r_t *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr)] += (_1_CR-r_r) *r_t *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)] += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
// +p, +t, +r
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr +1)] += r_r *r_t *r_p *grid.Ks_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Keta_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
grid.Ks_density_inv_loc[ I2V_INV_KNL( idp +1,jdt +1,kdr +1)] += r_r *r_t *r_p *grid.Ks_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Keta_density_loc[I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)] += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_density_loc[ I2V(i_loc,j_loc,k_loc)] * dxdydz;
|
||||
|
||||
} // end for i
|
||||
} // end for j
|
||||
} // end for k
|
||||
|
||||
|
||||
// sum over all sub-domains
|
||||
allreduce_cr_inplace(grid.Ks_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
allreduce_cr_inplace(grid.Keta_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
|
||||
allreduce_cr_inplace(grid.Kxi_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
|
||||
|
||||
allreduce_cr_inplace(grid.Ks_density_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
allreduce_cr_inplace(grid.Keta_density_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
|
||||
allreduce_cr_inplace(grid.Kxi_density_inv_loc, n_inv_I_loc_ani*n_inv_J_loc_ani*n_inv_K_loc_ani);
|
||||
|
||||
// kernel density normalization
|
||||
// velocity
|
||||
for (int k = 0; k < n_inv_K_loc; k++) {
|
||||
for (int j = 0; j < n_inv_J_loc; j++) {
|
||||
for (int i = 0; i < n_inv_I_loc; i++) {
|
||||
if(isZero(grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)])){
|
||||
// do nothing
|
||||
} else {
|
||||
if (isNegative(grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)])){
|
||||
std::cout << "Warning: grid.Ks_density_inv_loc[I2V_INV_KNL(" << i << "," << j << "," << k << ")] is less than 0, = "
|
||||
<< grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)]
|
||||
<< ", using absolute value instead."
|
||||
<< std::endl;
|
||||
}
|
||||
grid.Ks_inv_loc[I2V_INV_KNL(i,j,k)] /= std::pow(std::abs(grid.Ks_density_inv_loc[I2V_INV_KNL(i,j,k)]),Kdensity_coe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// anisotropy
|
||||
for (int k = 0; k < n_inv_K_loc_ani; k++) {
|
||||
for (int j = 0; j < n_inv_J_loc_ani; j++) {
|
||||
for (int i = 0; i < n_inv_I_loc_ani; i++) {
|
||||
if(isZero(grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
|
||||
// do nothing
|
||||
} else {
|
||||
if (isNegative(grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
|
||||
std::cout << "Warning: grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(" << i << "," << j << "," << k << ")] is less than 0, = "
|
||||
<< grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]
|
||||
<< ", using absolute value instead."
|
||||
<< std::endl;
|
||||
}
|
||||
grid.Keta_inv_loc[I2V_INV_ANI_KNL(i,j,k)] /= std::pow(std::abs(grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]),Kdensity_coe);
|
||||
}
|
||||
|
||||
if(isZero(grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
|
||||
// do nothing
|
||||
} else {
|
||||
if (isNegative(grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)])){
|
||||
std::cout << "Warning: grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(" << i << "," << j << "," << k << ")] is less than 0, = "
|
||||
<< grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]
|
||||
<< ", using absolute value instead."
|
||||
<< std::endl;
|
||||
}
|
||||
grid.Kxi_inv_loc[I2V_INV_ANI_KNL(i,j,k)] /= std::pow(std::abs(grid.Kxi_density_inv_loc[I2V_INV_ANI_KNL(i,j,k)]),Kdensity_coe);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// update factors
|
||||
//
|
||||
for (int k = k_start; k < k_end; k++) {
|
||||
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
r_r = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr = ii_invr;
|
||||
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
r_r_ani = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc_ani-1; ii_invr++){
|
||||
// increasing or decreasing order
|
||||
if (in_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr_ani = ii_invr;
|
||||
r_r_ani = calc_ratio_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// depth model update taper
|
||||
CUSTOMREAL depth = radius2depth(r_glob);
|
||||
if (depth < taper[0]) { // weight = 0;
|
||||
weight = _0_CR;
|
||||
} else if (depth < taper[1]) {
|
||||
weight = (_1_CR - std::cos(PI*(depth - taper[0])/(taper[1] - taper[0]))) / _2_CR;
|
||||
} else {
|
||||
weight = _1_CR;
|
||||
}
|
||||
|
||||
// continue if r is out of the inversion grid
|
||||
if (r_r < _0_CR || r_r_ani < _0_CR) continue;
|
||||
|
||||
for (int j = j_start; j < j_end; j++) {
|
||||
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
|
||||
r_t = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt = ii_invt;
|
||||
r_t = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
r_t_ani = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc_ani-1; ii_invt++){
|
||||
CUSTOMREAL left = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
CUSTOMREAL right = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt_ani = ii_invt;
|
||||
r_t_ani = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if t is out of the inversion grid
|
||||
if (r_t < _0_CR || r_t_ani < _0_CR) continue;
|
||||
|
||||
for (int i = i_start; i < i_end; i++) {
|
||||
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
|
||||
r_p = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp = ii_invp;
|
||||
r_p = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
r_p_ani = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc_ani-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
CUSTOMREAL right = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani+1,i_grid)]*(r_r_ani);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp_ani = ii_invp;
|
||||
r_p_ani = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if p is out of the inversion grid
|
||||
if (r_p < _0_CR || r_p_ani < _0_CR) continue;
|
||||
|
||||
// global grid id to local id
|
||||
int k_loc = k - grid.get_offset_k();
|
||||
int j_loc = j - grid.get_offset_j();
|
||||
int i_loc = i - grid.get_offset_i();
|
||||
|
||||
// check if *_loc are inside the local subdomain
|
||||
if (k_loc < 0 || k_loc > loc_K-1) continue;
|
||||
if (j_loc < 0 || j_loc > loc_J-1) continue;
|
||||
if (i_loc < 0 || i_loc > loc_I-1) continue;
|
||||
|
||||
CUSTOMREAL pert_Ks = 0.0;
|
||||
CUSTOMREAL pert_Keta = 0.0;
|
||||
CUSTOMREAL pert_Kxi = 0.0;
|
||||
CUSTOMREAL pert_Ks_density = 0.0;
|
||||
CUSTOMREAL pert_Keta_density = 0.0;
|
||||
CUSTOMREAL pert_Kxi_density = 0.0;
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
|
||||
pert_Keta += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
|
||||
pert_Kxi += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
|
||||
pert_Ks_density += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
|
||||
pert_Keta_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
|
||||
pert_Kxi_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)];
|
||||
|
||||
pert_Ks += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
pert_Keta += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
|
||||
pert_Kxi += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
|
||||
pert_Ks_density += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
pert_Keta_density += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
|
||||
pert_Kxi_density += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
pert_Keta += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
|
||||
pert_Kxi += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
|
||||
pert_Ks_density += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
pert_Keta_density = (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
|
||||
pert_Kxi_density += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
pert_Keta += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
|
||||
pert_Kxi += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
|
||||
pert_Ks_density += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
pert_Keta_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
|
||||
pert_Kxi_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)];
|
||||
|
||||
pert_Ks += r_r*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
pert_Keta += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
|
||||
pert_Kxi += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
|
||||
pert_Ks_density += r_r*r_t*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
pert_Keta_density += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
|
||||
pert_Kxi_density += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)];
|
||||
|
||||
pert_Ks += r_r*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
pert_Keta += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
|
||||
pert_Kxi += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
|
||||
pert_Ks_density += r_r*(_1_CR-r_t)*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
pert_Keta_density += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
|
||||
pert_Kxi_density += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
pert_Keta += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
|
||||
pert_Kxi += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
|
||||
pert_Ks_density += (_1_CR-r_r)*r_t*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
pert_Keta_density += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
|
||||
pert_Kxi_density += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)];
|
||||
|
||||
pert_Ks += r_r*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
pert_Keta += r_r_ani*r_t_ani*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
|
||||
pert_Kxi += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
|
||||
pert_Ks_density += r_r*r_t*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
pert_Keta_density += r_r_ani*r_t_ani*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
|
||||
pert_Kxi_density += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)];
|
||||
|
||||
// update para
|
||||
grid.Ks_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Ks;
|
||||
grid.Keta_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Keta;
|
||||
grid.Kxi_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Kxi;
|
||||
grid.Ks_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Ks_density;
|
||||
grid.Keta_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Keta_density;
|
||||
grid.Kxi_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Kxi_density;
|
||||
|
||||
} // end for i
|
||||
} // end for j
|
||||
} // end for k
|
||||
|
||||
} // end i_grid
|
||||
|
||||
|
||||
//
|
||||
// rescale kernel update to -1 ~ 1
|
||||
//
|
||||
// get the scaling factor
|
||||
CUSTOMREAL Linf_Ks = _0_CR, Linf_Keta = _0_CR, Linf_Kxi = _0_CR;
|
||||
// CUSTOMREAL Linf_Kden = _0_CR;
|
||||
for (int k = 1; k < loc_K-1; k++) {
|
||||
for (int j = 1; j < loc_J-1; j++) {
|
||||
for (int i = 1; i < loc_I-1; i++) {
|
||||
Linf_Ks = std::max(Linf_Ks, std::abs(grid.Ks_update_loc[I2V(i,j,k)]));
|
||||
Linf_Keta = std::max(Linf_Keta, std::abs(grid.Keta_update_loc[I2V(i,j,k)]));
|
||||
Linf_Kxi = std::max(Linf_Kxi, std::abs(grid.Kxi_update_loc[I2V(i,j,k)]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// get the maximum scaling factor among subdomains
|
||||
CUSTOMREAL Linf_tmp;
|
||||
allreduce_cr_single_max(Linf_Ks, Linf_tmp); Linf_Ks = Linf_tmp;
|
||||
allreduce_cr_single_max(Linf_Keta, Linf_tmp); Linf_Keta = Linf_tmp;
|
||||
allreduce_cr_single_max(Linf_Kxi, Linf_tmp); Linf_Kxi = Linf_tmp;
|
||||
|
||||
CUSTOMREAL Linf_all = _0_CR;
|
||||
Linf_all = std::max(Linf_Ks, std::max(Linf_Keta, Linf_Kxi));
|
||||
|
||||
Linf_Ks = Linf_all;
|
||||
Linf_Keta = Linf_all;
|
||||
Linf_Kxi = Linf_all;
|
||||
|
||||
// rescale the kernel update
|
||||
for (int k = 0; k < loc_K; k++) {
|
||||
for (int j = 0; j < loc_J; j++) {
|
||||
for (int i = 0; i < loc_I; i++) {
|
||||
grid.Ks_update_loc[I2V(i,j,k)] /= Linf_Ks;
|
||||
grid.Keta_update_loc[I2V(i,j,k)] /= Linf_Keta;
|
||||
grid.Kxi_update_loc[I2V(i,j,k)] /= Linf_Kxi;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
319
include/smooth_descent_dir.h
Normal file
319
include/smooth_descent_dir.h
Normal file
@@ -0,0 +1,319 @@
|
||||
#ifndef SMOOTH_DESCENT_DIR_H
|
||||
#define SMOOTH_DESCENT_DIR_H
|
||||
|
||||
#include <iostream>
|
||||
#include "grid.h"
|
||||
#include "config.h"
|
||||
#include "smooth.h"
|
||||
|
||||
|
||||
inline void smooth_descent_dir_CG(Grid& grid, const CUSTOMREAL lr, const CUSTOMREAL lt, const CUSTOMREAL lp) {
|
||||
// smoothing kernels with Conjugate Gradient method
|
||||
// lr,lt,lp: smoothing length in r, theta, phi direction
|
||||
|
||||
// smooth Ks
|
||||
CG_smooth(grid, grid.Ks_descent_dir_loc, grid.Ks_descent_dir_loc, lr, lt, lp);
|
||||
// smooth Keta
|
||||
CG_smooth(grid, grid.Keta_descent_dir_loc, grid.Keta_descent_dir_loc, lr, lt, lp);
|
||||
// smooth Kxi
|
||||
CG_smooth(grid, grid.Kxi_descent_dir_loc, grid.Kxi_descent_dir_loc, lr, lt, lp);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// original method for smoothing kernels
|
||||
inline void smooth_descent_dir(Grid& grid){
|
||||
// necessary params
|
||||
CUSTOMREAL r_r, r_t, r_p;
|
||||
int kdr, jdt, idp;
|
||||
|
||||
int k_start = 0;
|
||||
int j_start = 0;
|
||||
int i_start = 0;
|
||||
int k_end = ngrid_k;
|
||||
int j_end = ngrid_j;
|
||||
int i_end = ngrid_i;
|
||||
|
||||
//
|
||||
// sum the kernel values on the inversion grid
|
||||
//
|
||||
|
||||
for (int i_grid = 0; i_grid < n_inv_grids; i_grid++) {
|
||||
|
||||
// init coarser kernel arrays
|
||||
for (int k = 0; k < n_inv_K_loc; k++) {
|
||||
for (int j = 0; j < n_inv_J_loc; j++) {
|
||||
for (int i = 0; i < n_inv_I_loc; i++) {
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = k_start; k < k_end; k++) {
|
||||
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
// r_r = -_1_CR;
|
||||
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
|
||||
// kdr = ii_invr;
|
||||
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
r_r = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// increasing or decreasing order
|
||||
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr = ii_invr;
|
||||
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// continue if r is out of the inversion grid
|
||||
if (r_r < 0) continue;
|
||||
|
||||
for (int j = j_start; j < j_end; j++) {
|
||||
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
|
||||
// r_t = -_1_CR;
|
||||
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
|
||||
// jdt = ii_invt;
|
||||
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
|
||||
r_t = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt = ii_invt;
|
||||
r_t = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if t is out of the inversion grid
|
||||
if (r_t < 0) continue;
|
||||
|
||||
for (int i = i_start; i < i_end; i++) {
|
||||
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
|
||||
// r_p = -_1_CR;
|
||||
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
|
||||
// idp = ii_invp;
|
||||
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
|
||||
r_p = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp = ii_invp;
|
||||
r_p = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if p is out of the inversion grid
|
||||
if (r_p < 0) continue;
|
||||
|
||||
// global grid id to local id
|
||||
int k_loc = k - grid.get_offset_k();
|
||||
int j_loc = j - grid.get_offset_j();
|
||||
int i_loc = i - grid.get_offset_i();
|
||||
|
||||
// check if *_loc are inside the local subdomain
|
||||
if (k_loc < 0 || k_loc > loc_K-1) continue;
|
||||
if (j_loc < 0 || j_loc > loc_J-1) continue;
|
||||
if (i_loc < 0 || i_loc > loc_I-1) continue;
|
||||
|
||||
// update Ks_inv Keta_inv Kxi_inv
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
} // end for i
|
||||
} // end for j
|
||||
} // end for k
|
||||
|
||||
// sum over all sub-domains
|
||||
allreduce_cr_inplace(grid.Ks_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
allreduce_cr_inplace(grid.Keta_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
allreduce_cr_inplace(grid.Kxi_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
|
||||
//
|
||||
// update factors
|
||||
//
|
||||
for (int k = k_start; k < k_end; k++) {
|
||||
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
// r_r = -_1_CR;
|
||||
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
|
||||
// kdr = ii_invr;
|
||||
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
r_r = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr = ii_invr;
|
||||
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if r is out of the inversion grid
|
||||
if (r_r < 0) continue;
|
||||
|
||||
for (int j = j_start; j < j_end; j++) {
|
||||
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
|
||||
// r_t = -_1_CR;
|
||||
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
|
||||
// jdt = ii_invt;
|
||||
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
|
||||
r_t = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt = ii_invt;
|
||||
r_t = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// continue if t is out of the inversion grid
|
||||
if (r_t < 0) continue;
|
||||
|
||||
for (int i = i_start; i < i_end; i++) {
|
||||
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
|
||||
// r_p = -_1_CR;
|
||||
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
|
||||
// idp = ii_invp;
|
||||
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
|
||||
r_p = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp = ii_invp;
|
||||
r_p = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if p is out of the inversion grid
|
||||
if (r_p < 0) continue;
|
||||
|
||||
// global grid id to local id
|
||||
int k_loc = k - grid.get_offset_k();
|
||||
int j_loc = j - grid.get_offset_j();
|
||||
int i_loc = i - grid.get_offset_i();
|
||||
|
||||
// check if *_loc are inside the local subdomain
|
||||
if (k_loc < 0 || k_loc > loc_K-1) continue;
|
||||
if (j_loc < 0 || j_loc > loc_J-1) continue;
|
||||
if (i_loc < 0 || i_loc > loc_I-1) continue;
|
||||
|
||||
CUSTOMREAL pert_Ks = 0.0;
|
||||
CUSTOMREAL pert_Keta = 0.0;
|
||||
CUSTOMREAL pert_Kxi = 0.0;
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
|
||||
|
||||
pert_Ks += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
pert_Keta += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
pert_Kxi += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
|
||||
pert_Ks += r_r*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
pert_Keta += r_r*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
pert_Kxi += r_r*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
|
||||
pert_Ks += r_r*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
pert_Keta += r_r*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
pert_Kxi += r_r*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
|
||||
pert_Ks += r_r*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
pert_Keta += r_r*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
pert_Kxi += r_r*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
|
||||
|
||||
// update para
|
||||
grid.Ks_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Ks;
|
||||
grid.Keta_descent_dir_loc[I2V(i_loc,j_loc,k_loc)] += pert_Keta;
|
||||
grid.Kxi_descent_dir_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Kxi;
|
||||
|
||||
|
||||
} // end for i
|
||||
} // end for j
|
||||
} // end for k
|
||||
|
||||
} // end i_grid
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
321
include/smooth_grad_regul.h
Normal file
321
include/smooth_grad_regul.h
Normal file
@@ -0,0 +1,321 @@
|
||||
#ifndef SMOOTH_GRAD_REGUL_H
|
||||
#define SMOOTH_GRAD_REGUL_H
|
||||
|
||||
#include <iostream>
|
||||
#include "grid.h"
|
||||
#include "config.h"
|
||||
#include "smooth.h"
|
||||
|
||||
inline void smooth_gradient_regularization_CG(Grid& grid, const CUSTOMREAL lr, const CUSTOMREAL lt, const CUSTOMREAL lp) {
|
||||
// smoothing kernels with Conjugate Gradient method
|
||||
// lr,lt,lp: smoothing length in r, theta, phi direction
|
||||
|
||||
// smooth Ks
|
||||
CG_smooth(grid, grid.fun_gradient_regularization_penalty_loc, \
|
||||
grid.fun_gradient_regularization_penalty_loc, lr, lt, lp);
|
||||
// smooth Keta
|
||||
CG_smooth(grid, grid.eta_gradient_regularization_penalty_loc, \
|
||||
grid.eta_gradient_regularization_penalty_loc, lr, lt, lp);
|
||||
// smooth Kxi
|
||||
CG_smooth(grid, grid.xi_gradient_regularization_penalty_loc, \
|
||||
grid.xi_gradient_regularization_penalty_loc, lr, lt, lp);
|
||||
|
||||
}
|
||||
|
||||
// original method for smoothing kernels
|
||||
inline void smooth_gradient_regularization_orig(Grid& grid){
|
||||
// necessary params
|
||||
CUSTOMREAL r_r, r_t, r_p;
|
||||
int kdr, jdt, idp;
|
||||
|
||||
int k_start = 0;
|
||||
int j_start = 0;
|
||||
int i_start = 0;
|
||||
int k_end = ngrid_k;
|
||||
int j_end = ngrid_j;
|
||||
int i_end = ngrid_i;
|
||||
|
||||
//
|
||||
// sum the kernel values on the inversion grid
|
||||
//
|
||||
|
||||
for (int i_grid = 0; i_grid < n_inv_grids; i_grid++) {
|
||||
|
||||
// init coarser kernel arrays
|
||||
for (int k = 0; k < n_inv_K_loc; k++) {
|
||||
for (int j = 0; j < n_inv_J_loc; j++) {
|
||||
for (int i = 0; i < n_inv_I_loc; i++) {
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(i,j,k)] = _0_CR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (int k = k_start; k < k_end; k++) {
|
||||
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
// r_r = -_1_CR;
|
||||
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
|
||||
// kdr = ii_invr;
|
||||
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
r_r = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// increasing or decreasing order
|
||||
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr = ii_invr;
|
||||
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if r is out of the inversion grid
|
||||
if (r_r < 0) continue;
|
||||
|
||||
for (int j = j_start; j < j_end; j++) {
|
||||
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
|
||||
// r_t = -_1_CR;
|
||||
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
|
||||
// jdt = ii_invt;
|
||||
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
|
||||
r_t = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt = ii_invt;
|
||||
r_t = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if t is out of the inversion grid
|
||||
if (r_t < 0) continue;
|
||||
|
||||
for (int i = i_start; i < i_end; i++) {
|
||||
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
|
||||
// r_p = -_1_CR;
|
||||
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
|
||||
// idp = ii_invp;
|
||||
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
|
||||
r_p = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp = ii_invp;
|
||||
r_p = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if p is out of the inversion grid
|
||||
if (r_p < 0) continue;
|
||||
|
||||
// global grid id to local id
|
||||
int k_loc = k - grid.get_offset_k();
|
||||
int j_loc = j - grid.get_offset_j();
|
||||
int i_loc = i - grid.get_offset_i();
|
||||
|
||||
// check if *_loc are inside the local subdomain
|
||||
if (k_loc < 0 || k_loc > loc_K-1) continue;
|
||||
if (j_loc < 0 || j_loc > loc_J-1) continue;
|
||||
if (i_loc < 0 || i_loc > loc_I-1) continue;
|
||||
|
||||
// update Ks_inv Keta_inv Kxi_inv
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)] += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)] += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)] += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)] += r_r*r_t*(_1_CR-r_p)*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)] += r_r*(_1_CR-r_t)*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)] += (_1_CR-r_r)*r_t*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)];
|
||||
grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)] += r_r*r_t*r_p*grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)];
|
||||
|
||||
} // end for i
|
||||
} // end for j
|
||||
} // end for k
|
||||
|
||||
// sum over all sub-domains
|
||||
allreduce_cr_inplace(grid.Ks_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
allreduce_cr_inplace(grid.Keta_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
allreduce_cr_inplace(grid.Kxi_inv_loc, n_inv_I_loc*n_inv_J_loc*n_inv_K_loc);
|
||||
|
||||
//
|
||||
// update factors
|
||||
//
|
||||
for (int k = k_start; k < k_end; k++) {
|
||||
// CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
// r_r = -_1_CR;
|
||||
// for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// if (r_glob > grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)] && r_glob <= grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]) {
|
||||
// kdr = ii_invr;
|
||||
// r_r = (r_glob - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]) / (grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)] - grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL r_glob = grid.get_r_min() + k*grid.get_delta_r(); // global coordinate of r
|
||||
r_r = -_1_CR;
|
||||
for (int ii_invr = 0; ii_invr < n_inv_K_loc-1; ii_invr++){
|
||||
// increasing or decreasing order
|
||||
if (in_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) {
|
||||
kdr = ii_invr;
|
||||
r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if r is out of the inversion grid
|
||||
if (r_r < 0) continue;
|
||||
|
||||
for (int j = j_start; j < j_end; j++) {
|
||||
// CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat();
|
||||
// r_t = -_1_CR;
|
||||
// for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
// if (t_glob > grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)] && t_glob <= grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)]) {
|
||||
// jdt = ii_invt;
|
||||
// r_t = (t_glob - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]) / (grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,i_grid)] - grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); // global coordinate of t (latitude)
|
||||
r_t = -_1_CR;
|
||||
for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){
|
||||
CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r);
|
||||
if (in_between(t_glob, left, right)) {
|
||||
jdt = ii_invt;
|
||||
r_t = calc_ratio_between(t_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if t is out of the inversion grid
|
||||
if (r_t < 0) continue;
|
||||
|
||||
for (int i = i_start; i < i_end; i++) {
|
||||
// CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon();
|
||||
// r_p = -_1_CR;
|
||||
// for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
// if (p_glob > grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)] && p_glob <= grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)]) {
|
||||
// idp = ii_invp;
|
||||
// r_p = (p_glob - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]) / (grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,i_grid)] - grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp,i_grid)]);
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); // global coordinate of p (longitude)
|
||||
r_p = -_1_CR;
|
||||
for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){
|
||||
CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(r_r);
|
||||
CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(r_r);
|
||||
if (in_between(p_glob, left, right)) {
|
||||
idp = ii_invp;
|
||||
r_p = calc_ratio_between(p_glob, left, right);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// continue if p is out of the inversion grid
|
||||
if (r_p < 0) continue;
|
||||
|
||||
// global grid id to local id
|
||||
int k_loc = k - grid.get_offset_k();
|
||||
int j_loc = j - grid.get_offset_j();
|
||||
int i_loc = i - grid.get_offset_i();
|
||||
|
||||
// check if *_loc are inside the local subdomain
|
||||
if (k_loc < 0 || k_loc > loc_K-1) continue;
|
||||
if (j_loc < 0 || j_loc > loc_J-1) continue;
|
||||
if (i_loc < 0 || i_loc > loc_I-1) continue;
|
||||
|
||||
CUSTOMREAL pert_Ks = 0.0;
|
||||
CUSTOMREAL pert_Keta = 0.0;
|
||||
CUSTOMREAL pert_Kxi = 0.0;
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)];
|
||||
|
||||
pert_Ks += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
pert_Keta += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
pert_Kxi += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)];
|
||||
|
||||
pert_Ks += r_r*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
pert_Keta += r_r*r_t*(_1_CR-r_p)*grid.Keta_inv_loc[I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
pert_Kxi += r_r*r_t*(_1_CR-r_p)*grid.Kxi_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)];
|
||||
|
||||
pert_Ks += r_r*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
pert_Keta += r_r*(_1_CR-r_t)*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
pert_Kxi += r_r*(_1_CR-r_t)*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)];
|
||||
|
||||
pert_Ks += (_1_CR-r_r)*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
pert_Keta += (_1_CR-r_r)*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
pert_Kxi += (_1_CR-r_r)*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)];
|
||||
|
||||
pert_Ks += r_r*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
pert_Keta += r_r*r_t*r_p*grid.Keta_inv_loc[I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
pert_Kxi += r_r*r_t*r_p*grid.Kxi_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)];
|
||||
|
||||
|
||||
// update para
|
||||
grid.fun_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Ks;
|
||||
grid.eta_gradient_regularization_penalty_loc[I2V(i_loc,j_loc,k_loc)] += pert_Keta;
|
||||
grid.xi_gradient_regularization_penalty_loc[ I2V(i_loc,j_loc,k_loc)] += pert_Kxi;
|
||||
|
||||
|
||||
} // end for i
|
||||
} // end for j
|
||||
} // end for k
|
||||
|
||||
} // end i_grid
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
2
include/source.fwd.h
Normal file
2
include/source.fwd.h
Normal file
@@ -0,0 +1,2 @@
|
||||
#pragma once
|
||||
class Source;
|
||||
71
include/source.h
Normal file
71
include/source.h
Normal file
@@ -0,0 +1,71 @@
|
||||
#ifndef SOURCE_H
|
||||
#define SOURCE_H
|
||||
|
||||
// to handle circular dependency
|
||||
#pragma once
|
||||
#include "grid.fwd.h"
|
||||
#include "source.fwd.h"
|
||||
|
||||
#include "input_params.h"
|
||||
#include "grid.h"
|
||||
#include "config.h"
|
||||
#include "mpi_funcs.h"
|
||||
|
||||
class Source {
|
||||
public:
|
||||
Source(){};
|
||||
~Source();
|
||||
|
||||
// set source information
|
||||
void set_source_position(InputParams &, Grid &, bool&, const std::string&, bool for_2d_solver=false);
|
||||
//
|
||||
// getters
|
||||
//
|
||||
CUSTOMREAL get_ds_lat(){return dis_src_lat;};
|
||||
CUSTOMREAL get_ds_lon(){return dis_src_lon;};
|
||||
CUSTOMREAL get_ds_r (){return dis_src_r;};
|
||||
CUSTOMREAL get_src_r(){return src_r;}; // radius
|
||||
CUSTOMREAL get_src_t(){return src_lat;}; // radian
|
||||
CUSTOMREAL get_src_p(){return src_lon;}; // radian
|
||||
CUSTOMREAL get_src_dep(){return radius2depth(src_r);}; // km
|
||||
|
||||
//
|
||||
// parallel getters
|
||||
//
|
||||
CUSTOMREAL get_fac_at_source(CUSTOMREAL*, bool check=false);
|
||||
CUSTOMREAL get_fac_at_point(CUSTOMREAL*, int, int, int);
|
||||
|
||||
private:
|
||||
// positions
|
||||
CUSTOMREAL src_lat;
|
||||
CUSTOMREAL src_lon;
|
||||
CUSTOMREAL src_r;
|
||||
|
||||
// discretize source position ids (LOCAL)
|
||||
int i_src_loc;
|
||||
int j_src_loc;
|
||||
int k_src_loc;
|
||||
|
||||
// discretized source position
|
||||
CUSTOMREAL dis_src_lat;
|
||||
CUSTOMREAL dis_src_lon;
|
||||
CUSTOMREAL dis_src_r;
|
||||
|
||||
CUSTOMREAL dis_src_err_r; // r1
|
||||
CUSTOMREAL dis_src_err_lat; // r2
|
||||
CUSTOMREAL dis_src_err_lon; // r3
|
||||
|
||||
// if source is in this subdomain
|
||||
bool is_in_subdomain = false;
|
||||
int src_rank;
|
||||
bool *src_flags;
|
||||
int n_dom_src = 0;
|
||||
|
||||
// position error by discretization for each direction
|
||||
CUSTOMREAL error_lon, error_lat, error_r;
|
||||
|
||||
// grid gaps
|
||||
CUSTOMREAL delta_lon, delta_lat, delta_r;
|
||||
};
|
||||
|
||||
#endif // SOURCE_H
|
||||
211
include/src_rec.h
Normal file
211
include/src_rec.h
Normal file
@@ -0,0 +1,211 @@
|
||||
#ifndef SRC_REC_H
|
||||
#define SRC_REC_H
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
|
||||
#include "config.h"
|
||||
#include "utils.h"
|
||||
#include "timer.h"
|
||||
#include "mpi_funcs.h"
|
||||
|
||||
|
||||
// information of source or receiver
|
||||
class SrcRecInfo {
|
||||
public:
|
||||
int id = -1;
|
||||
std::string name = "unknown";
|
||||
CUSTOMREAL dep; // stored as depth (km), so need to depth2radious function when using it in other part of the code
|
||||
CUSTOMREAL lat; // stored in degree, but convarted to radian when passed through get_src_* function
|
||||
CUSTOMREAL lon; // stored in degree, but convarted to radian when passed through get_src_* function
|
||||
|
||||
int year = -1;
|
||||
int month = -1;
|
||||
int day = -1;
|
||||
int hour = -1;
|
||||
int min = -1;
|
||||
CUSTOMREAL sec = -1.0;
|
||||
CUSTOMREAL mag = -1.0;
|
||||
|
||||
CUSTOMREAL adjoint_source = 0.0;
|
||||
CUSTOMREAL adjoint_source_density = 0.0;
|
||||
|
||||
int n_data = 0;
|
||||
|
||||
// arrays for storing arrival times on boundary surfaces, calculated by 2D Eikonal solver
|
||||
bool is_out_of_region = false; // is the source or receiver in the region; false: in the refion; true: teleseismic
|
||||
//CUSTOMREAL* arr_times_bound_N = nullptr; // arrival time of the receiver at the north boundary of the subdomain
|
||||
//CUSTOMREAL* arr_times_bound_E = nullptr; // arrival time of the receiver at the east boundary of the subdomain
|
||||
//CUSTOMREAL* arr_times_bound_W = nullptr; // arrival time of the receiver at the west boundary of the subdomain
|
||||
//CUSTOMREAL* arr_times_bound_S = nullptr; // arrival time of the receiver at the south boundary of the subdomain
|
||||
//CUSTOMREAL* arr_times_bound_Bot = nullptr; // arrival time of the receiver at the bottom boundary of the subdomain
|
||||
//bool* is_bound_src; // true if the source is on the boundary surface
|
||||
|
||||
// kernel
|
||||
CUSTOMREAL sta_correct = 0.0;
|
||||
CUSTOMREAL sta_correct_kernel = 0.0;
|
||||
|
||||
// relocation
|
||||
bool is_stop = false;
|
||||
CUSTOMREAL tau_opt = 0.0;
|
||||
CUSTOMREAL grad_chi_i = 0.0;
|
||||
CUSTOMREAL grad_chi_j = 0.0;
|
||||
CUSTOMREAL grad_chi_k = 0.0;
|
||||
CUSTOMREAL grad_tau = 0.0;
|
||||
CUSTOMREAL sum_weight = 0.0;
|
||||
CUSTOMREAL vobj_src_reloc_old = 99999999.9;
|
||||
CUSTOMREAL vobj_src_reloc = 0.0;
|
||||
CUSTOMREAL vobj_grad_norm_src_reloc = 0.0;
|
||||
|
||||
CUSTOMREAL step_length_max = step_length_src_reloc; // 2 km default, step length for relocation
|
||||
CUSTOMREAL change_dep = 0.0;
|
||||
CUSTOMREAL change_lat = 0.0;
|
||||
CUSTOMREAL change_lon = 0.0;
|
||||
CUSTOMREAL change_tau = 0.0;
|
||||
int Ndata = 0.0;
|
||||
|
||||
bool is_T_written_into_file = false;
|
||||
};
|
||||
|
||||
class DataInfo {
|
||||
public:
|
||||
|
||||
CUSTOMREAL data_weight = 1.0; // the weight in the src_rec file
|
||||
CUSTOMREAL weight = 1.0; // the actual weight in the inversion, equal data_weight * weight about the data type;
|
||||
CUSTOMREAL weight_reloc = 1.0; // the actual weight for relocation, equal data_weight * weight about the data type;
|
||||
|
||||
std::string phase = "unknown";
|
||||
|
||||
bool dual_data = false; // if true, this data is a dual data, used for generating kernel, but not for obj estimation (if true, data type = 2 or 3)
|
||||
|
||||
bool is_src_rec = false; // absolute traveltime, single source - receiver
|
||||
|
||||
// source information
|
||||
int id_src = -1;
|
||||
std::string name_src = "unknown";
|
||||
|
||||
// receiver information
|
||||
int id_rec = -1;
|
||||
std::string name_rec = "unknown";
|
||||
|
||||
// traveltime
|
||||
CUSTOMREAL travel_time = -999.0;
|
||||
CUSTOMREAL travel_time_obs = -999.0;
|
||||
|
||||
// receiver pair infomation
|
||||
bool is_rec_pair = false; // common source differential traveltime
|
||||
std::vector<int> id_rec_pair = {-1,-1};
|
||||
std::vector<std::string> name_rec_pair = {"unknown","unknown"};
|
||||
|
||||
// common source differential travel time
|
||||
CUSTOMREAL cs_dif_travel_time = -999.0;
|
||||
CUSTOMREAL cs_dif_travel_time_obs = -999.0;
|
||||
|
||||
// source pair infomation
|
||||
bool is_src_pair = false; // common receiver differential traveltime (for future)
|
||||
std::vector<int> id_src_pair = {-1,-1};
|
||||
std::vector<std::string> name_src_pair = {"unknown","unknown"};
|
||||
|
||||
// common receiver differential travel time
|
||||
CUSTOMREAL cr_dif_travel_time = -999.0;
|
||||
CUSTOMREAL cr_dif_travel_time_obs = -999.0;
|
||||
|
||||
// source relocation
|
||||
CUSTOMREAL DTi = 0.0;
|
||||
CUSTOMREAL DTj = 0.0;
|
||||
CUSTOMREAL DTk = 0.0;
|
||||
|
||||
std::vector<CUSTOMREAL> DTi_pair = {0.0, 0.0};
|
||||
std::vector<CUSTOMREAL> DTj_pair = {0.0, 0.0};
|
||||
std::vector<CUSTOMREAL> DTk_pair = {0.0, 0.0};
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
// methods for managing SrcRec objects/lists
|
||||
|
||||
// parse src_rec_file
|
||||
void parse_src_rec_file(std::string& , \
|
||||
std::map<std::string, SrcRecInfo>&, \
|
||||
std::map<std::string, SrcRecInfo>&, \
|
||||
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&, \
|
||||
std::vector<std::string>&, \
|
||||
std::vector<std::vector<std::vector<std::string>>>&);
|
||||
|
||||
// parse sta_correctoion_file
|
||||
void parse_sta_correction_file(std::string&, \
|
||||
std::map<std::string, SrcRecInfo>&);
|
||||
|
||||
// swap the sources and receivers
|
||||
void do_swap_src_rec(std::map<std::string, SrcRecInfo> &, \
|
||||
std::map<std::string, SrcRecInfo> &, \
|
||||
std::map<std::string, std::map<std::string, std::vector<DataInfo>>> &, \
|
||||
std::vector<std::string>&);
|
||||
|
||||
// do not swap the sources and receivers
|
||||
void do_not_swap_src_rec(std::map<std::string, SrcRecInfo> &, \
|
||||
std::map<std::string, SrcRecInfo> &, \
|
||||
std::map<std::string, std::map<std::string, std::vector<DataInfo>>> &, \
|
||||
std::vector<std::string>&);
|
||||
|
||||
// tele seismic source management
|
||||
void separate_region_and_tele_src_rec_data(std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, std::map<std::string, std::vector<DataInfo>>>&,
|
||||
std::map<std::string, int> &,
|
||||
int &,
|
||||
int &,
|
||||
int &,
|
||||
int &,
|
||||
int &,
|
||||
const CUSTOMREAL, const CUSTOMREAL,
|
||||
const CUSTOMREAL, const CUSTOMREAL,
|
||||
const CUSTOMREAL, const CUSTOMREAL,
|
||||
bool);
|
||||
|
||||
void merge_region_and_tele_src(std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&,
|
||||
std::vector<std::string>&,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, SrcRecInfo> &,
|
||||
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&);
|
||||
|
||||
// distribute srcrec data to all simulation groups
|
||||
void distribute_src_rec_data(std::map<std::string, SrcRecInfo>&, \
|
||||
std::map<std::string, SrcRecInfo>&, \
|
||||
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&, \
|
||||
std::vector<std::string>&, \
|
||||
std::map<std::string, SrcRecInfo>&, \
|
||||
std::map<std::string, SrcRecInfo>&, \
|
||||
std::map<std::string, std::map<std::string,std::vector<DataInfo>>>&, \
|
||||
std::vector<std::string>&, \
|
||||
std::vector<std::string>&);
|
||||
|
||||
void prepare_src_map_for_2d_solver(std::map<std::string, SrcRecInfo>&, \
|
||||
std::map<std::string, SrcRecInfo>&, \
|
||||
std::vector<std::string>&, \
|
||||
std::map<std::string, SrcRecInfo>&);
|
||||
|
||||
void send_src_info_inter_sim(SrcRecInfo&, int);
|
||||
void recv_src_info_inter_sim(SrcRecInfo&, int);
|
||||
void broadcast_src_info_intra_sim(SrcRecInfo&, int);
|
||||
void send_rec_info_inter_sim(SrcRecInfo&, int);
|
||||
void recv_rec_info_inter_sim(SrcRecInfo&, int);
|
||||
void broadcast_rec_info_intra_sim(SrcRecInfo&, int);
|
||||
void send_data_info_inter_sim(DataInfo&, int);
|
||||
void recv_data_info_inter_sim(DataInfo&, int);
|
||||
|
||||
#endif //SRC_REC_H
|
||||
98
include/timer.h
Normal file
98
include/timer.h
Normal file
@@ -0,0 +1,98 @@
|
||||
#ifndef TIMER_H
|
||||
#define TIMER_H
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <ctime>
|
||||
#include <string>
|
||||
#include "config.h"
|
||||
|
||||
class Timer {
|
||||
public:
|
||||
Timer(std::string name, bool show_start_time = false) : name(name), show_start_time(show_start_time) {
|
||||
start_timer();
|
||||
}
|
||||
|
||||
~Timer() {}
|
||||
|
||||
std::string name;
|
||||
std::chrono::high_resolution_clock::time_point start;
|
||||
std::chrono::high_resolution_clock::time_point end;
|
||||
std::chrono::duration<double> elapsed_time;
|
||||
bool show_start_time = false;
|
||||
|
||||
void start_timer() {
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
time_back = start;
|
||||
if (show_start_time && proc_read_srcrec) {
|
||||
std::cout << this->name << " started at " << get_current_utc_time() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void stop_timer() {
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = end - start;
|
||||
print_elapsed_time();
|
||||
}
|
||||
|
||||
double get_start() {
|
||||
return std::chrono::duration<double>(start.time_since_epoch()).count();
|
||||
}
|
||||
|
||||
double get_t_delta() {
|
||||
auto current_time = std::chrono::high_resolution_clock::now();
|
||||
auto delta = current_time - time_back;
|
||||
time_back = current_time;
|
||||
return std::chrono::duration<double>(delta).count();
|
||||
}
|
||||
|
||||
double get_t() {
|
||||
return std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - start).count();
|
||||
}
|
||||
|
||||
std::string get_start_t() {
|
||||
return get_current_utc_time(start);
|
||||
}
|
||||
|
||||
std::string get_end_t() {
|
||||
return get_current_utc_time(end);
|
||||
}
|
||||
|
||||
std::string get_elapsed_t() {
|
||||
return std::to_string(elapsed_time.count()) + " sec";
|
||||
}
|
||||
|
||||
std::string get_utc_from_time_t(std::time_t time_t) {
|
||||
std::tm utc_tm = *std::gmtime(&time_t);
|
||||
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(&utc_tm, "%F %T") << " UTC";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
private:
|
||||
std::chrono::high_resolution_clock::time_point time_back;
|
||||
|
||||
void print_elapsed_time() {
|
||||
if (if_verbose)
|
||||
std::cout << "Time for " << name << ": " << elapsed_time.count() << " sec" << std::endl;
|
||||
}
|
||||
|
||||
std::string get_current_utc_time(std::chrono::high_resolution_clock::time_point time = std::chrono::high_resolution_clock::now()) {
|
||||
std::chrono::milliseconds ms = std::chrono::duration_cast<std::chrono::milliseconds>(time.time_since_epoch());
|
||||
std::time_t time_t = ms.count() / 1000;
|
||||
|
||||
std::tm utc_tm = *std::gmtime(&time_t);
|
||||
|
||||
std::ostringstream oss;
|
||||
oss << std::put_time(&utc_tm, "%F %T") << " UTC";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif // TIMER_H
|
||||
401
include/utils.h
Normal file
401
include/utils.h
Normal file
@@ -0,0 +1,401 @@
|
||||
#ifndef UTILS_H
|
||||
#define UTILS_H
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <fstream>
|
||||
#include <sys/stat.h>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
#include <new> // for std::bad_alloc
|
||||
#include <cstdlib> // for std::exit
|
||||
|
||||
|
||||
// chec if compiler is gcc 7.5 or older
|
||||
//#if __GNUC__ == 7 && __GNUC_MINOR__ <= 5
|
||||
//#include <experimental/filesystem>
|
||||
//#define GNUC_7_5
|
||||
//#elif __cplusplus > 201402L // compilers supporting c++17
|
||||
//#include <filesystem>
|
||||
//#endif
|
||||
|
||||
#include "config.h"
|
||||
|
||||
|
||||
inline int mkpath(std::string s,mode_t mode) {
|
||||
size_t pos=0;
|
||||
std::string dir;
|
||||
int mdret = 0;
|
||||
|
||||
if(s[s.size()-1]!='/'){
|
||||
// force trailing / so we can handle everything in loop
|
||||
s+='/';
|
||||
}
|
||||
|
||||
while((pos=s.find_first_of('/',pos))!=std::string::npos){
|
||||
dir=s.substr(0,pos++);
|
||||
if(dir.size()==0) continue; // if leading / first time is 0 length
|
||||
if((mdret=mkdir(dir.c_str(),mode)) && errno!=EEXIST){
|
||||
return mdret;
|
||||
}
|
||||
}
|
||||
return mdret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void create_output_dir(std::string dir_path){
|
||||
|
||||
// create output directory if not exists (directories tree)
|
||||
if (world_rank == 0) {
|
||||
|
||||
//#if __cplusplus > 201402L && !defined(GNUC_7_5)
|
||||
// // this function requires c++17
|
||||
// if (!std::filesystem::exists(dir_path)){
|
||||
// std::filesystem::create_directories(dir_path);
|
||||
// } else {
|
||||
// std::cout << "Output directory already exists. Overwriting..." << std::endl;
|
||||
// }
|
||||
//#else // compilers not supporting std++17
|
||||
// check if directory exists
|
||||
struct stat info;
|
||||
if (stat(dir_path.c_str(), &info) != 0){
|
||||
// directory does not exist
|
||||
// create directory
|
||||
int status = mkpath(dir_path, 0755);
|
||||
if (status != 0){
|
||||
std::cout << "Error: cannot create output directory" << std::endl;
|
||||
std::cout << "Exiting..." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
// directory exists
|
||||
std::cout << "Output directory already exists. Overwriting..." << std::endl;
|
||||
}
|
||||
//#endif
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline bool is_file_exist(const char* fileName)
|
||||
{
|
||||
return static_cast<bool>(std::ifstream(fileName));
|
||||
}
|
||||
|
||||
|
||||
inline void stdout_by_main(char const* str){
|
||||
if (sim_rank == 0 && inter_sub_rank == 0 && sub_rank == 0 && id_sim == 0)
|
||||
std::cout << str << std::endl;
|
||||
}
|
||||
|
||||
|
||||
inline void stdout_by_rank_zero(char const* str){
|
||||
if(world_rank == 0)
|
||||
std::cout << str << std::endl;
|
||||
}
|
||||
|
||||
|
||||
inline void parse_options(int argc, char* argv[]){
|
||||
bool input_file_found = false;
|
||||
|
||||
for (int i = 1; i < argc; i++){
|
||||
if(strcmp(argv[i], "-v") == 0)
|
||||
if_verbose = true;
|
||||
else if (strcmp(argv[i],"-i") == 0){
|
||||
input_file = argv[i+1];
|
||||
input_file_found = true;
|
||||
}
|
||||
else if (strcmp(argv[i], "-h") == 0){
|
||||
stdout_by_main("usage: mpirun -np 4 TOMOATT -i input_params.yaml");
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
// error if input_file is not found
|
||||
if(!input_file_found){
|
||||
stdout_by_main("usage: mpirun -np 4 TOMOATT -i input_params.yaml");
|
||||
std::cout << "Error: input parameter file not found" << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void parse_options_srcrec_weight(int argc, char* argv[]){
|
||||
bool input_file_found = false;
|
||||
|
||||
for (int i = 1; i < argc; i++){
|
||||
if(strcmp(argv[i], "-v") == 0)
|
||||
if_verbose = true;
|
||||
else if (strcmp(argv[i],"-i") == 0){
|
||||
input_file = argv[i+1];
|
||||
input_file_found = true;
|
||||
} else if (strcmp(argv[i],"-r") == 0){
|
||||
// reference value
|
||||
ref_value = atof(argv[i+1]);
|
||||
} else if (strcmp(argv[i], "-o") == 0){
|
||||
// get output filename
|
||||
output_file_weight = argv[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
// error if input_file is not found
|
||||
if(!input_file_found){
|
||||
stdout_by_main("usage: ./SrcRecWeight -i srcrec_file.txt -r 10.0 -o srcrec_weight.txt");
|
||||
std::cout << argc << std::endl;
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline int check_data_type(T const& data){
|
||||
if (std::is_same<T,bool>::value){
|
||||
return 0;
|
||||
} else if (std::is_same<T, int>::value){
|
||||
return 1;
|
||||
} else if (std::is_same<T, float>::value){
|
||||
return 2;
|
||||
} else if (std::is_same<T, double>::value){
|
||||
return 3;
|
||||
} else {
|
||||
std::cout << "Error: custom real type is not float or double" << std::endl;
|
||||
std::cout << "Please check the definition of CUSTOMREAL in io.h" << std::endl;
|
||||
std::cout << "Exiting..." << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline T my_square(T const& a){
|
||||
return a * a;
|
||||
}
|
||||
|
||||
|
||||
// defined function is more than 2 times slower than inline function
|
||||
//#define my_square(a) (a * a)
|
||||
|
||||
|
||||
template <typename T>
|
||||
T* allocateMemory(size_t size, int allocationID) {
|
||||
try {
|
||||
T* ptr = new T[size];
|
||||
return ptr;
|
||||
} catch (const std::bad_alloc& e) {
|
||||
// Handle memory allocation failure
|
||||
std::cerr << "Memory allocation failed for ID " << allocationID << ": " << e.what() << std::endl;
|
||||
std::exit(EXIT_FAILURE); // Exit the program on allocation failure
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void RLonLat2xyz(CUSTOMREAL lon, CUSTOMREAL lat, CUSTOMREAL R, CUSTOMREAL& x, CUSTOMREAL& y, CUSTOMREAL& z){
|
||||
/*
|
||||
lon : longitude in radian
|
||||
lat : latitude in radian
|
||||
R : radius
|
||||
|
||||
x : x coordinate in m
|
||||
y : y coordinate in m
|
||||
z : z coordinate in m
|
||||
*/
|
||||
//x = R*cos(lat*DEG2RAD)*cos(lon*DEG2RAD);
|
||||
//y = R*cos(lat*DEG2RAD)*sin(lon*DEG2RAD);
|
||||
//z = R*sin(lat*DEG2RAD);
|
||||
x = R*cos(lat)*cos(lon);
|
||||
y = R*cos(lat)*sin(lon);
|
||||
z = R*sin(lat);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// calculate epicentral distance in radian from lon lat in radian
|
||||
inline void Epicentral_distance_sphere(const CUSTOMREAL lat1, const CUSTOMREAL lon1, \
|
||||
const CUSTOMREAL lat2, const CUSTOMREAL lon2, \
|
||||
CUSTOMREAL& dist) {
|
||||
|
||||
// calculate epicentral distance in radian
|
||||
//dist = std::abs(acos(sin(lat1)*sin(lat2)+cos(lat1)*cos(lat2)*cos(lon2-lon1)));
|
||||
CUSTOMREAL lon_dif = lon2 - lon1;
|
||||
|
||||
dist = std::atan2(std::sqrt((my_square(std::cos(lat2) * std::sin(lon_dif)) + \
|
||||
my_square(std::cos(lat1) * std::sin(lat2) - std::sin(lat1) * std::cos(lat2) * std::cos(lon_dif)))), \
|
||||
std::sin(lat1) * std::sin(lat2) + std::cos(lat1) * std::cos(lat2) * std::cos(lon_dif));
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//calculate azimuth
|
||||
inline void Azimuth_sphere(CUSTOMREAL lat1, CUSTOMREAL lon1, \
|
||||
CUSTOMREAL lat2, CUSTOMREAL lon2, \
|
||||
CUSTOMREAL& azi) {
|
||||
|
||||
if (isZero(my_square((lat1-lat2)) \
|
||||
&& + my_square((lon1-lon2)))){
|
||||
azi = _0_CR;
|
||||
} else {
|
||||
azi = atan2(sin(lon2-lon1)*cos(lat2), \
|
||||
cos(lat1)*sin(lat2)-sin(lat1)*cos(lat2)*cos(lon2-lon1));
|
||||
if (azi < _0_CR)
|
||||
azi += _2_CR * PI;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void WGS84ToCartesian(CUSTOMREAL& lon, CUSTOMREAL& lat, CUSTOMREAL R, \
|
||||
CUSTOMREAL& x, CUSTOMREAL& y, CUSTOMREAL& z, \
|
||||
CUSTOMREAL& lon_center, CUSTOMREAL& lat_center){
|
||||
|
||||
// equatorial radius WGS84 major axis
|
||||
const static CUSTOMREAL equRadius = R_earth; // in m in this function
|
||||
const static CUSTOMREAL flattening = 1.0 / 298.257222101;
|
||||
|
||||
const static CUSTOMREAL sqrEccentricity = flattening * (2.0 - flattening);
|
||||
|
||||
const CUSTOMREAL lat_rad = lat - lat_center; // input should be already in radian
|
||||
const CUSTOMREAL lon_rad = lon - lon_center;
|
||||
const CUSTOMREAL alt = R - equRadius; // altitude (height above sea level)
|
||||
|
||||
const CUSTOMREAL sinLat = sin(lat_rad);
|
||||
const CUSTOMREAL cosLat = cos(lat_rad);
|
||||
const CUSTOMREAL sinLon = sin(lon_rad);
|
||||
const CUSTOMREAL cosLon = cos(lon_rad);
|
||||
|
||||
// Normalized radius
|
||||
const CUSTOMREAL normRadius = equRadius / sqrt(1.0 - sqrEccentricity * sinLat * sinLat);
|
||||
|
||||
x = (normRadius + alt) * cosLat * cosLon;
|
||||
y = (normRadius + alt) * cosLat * sinLon;
|
||||
z = (normRadius * (1.0 - sqrEccentricity) + alt) * sinLat;
|
||||
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
inline T dot_product(T const* const a, T const* const b, int const& n){
|
||||
CUSTOMREAL result = _0_CR;
|
||||
for (int i = 0; i < n; i++){
|
||||
result += a[i] * b[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T find_max(T const* const a, int const& n){
|
||||
T max = a[0];
|
||||
for (int i = 1; i < n; i++){
|
||||
if (a[i] > max){
|
||||
max = a[i];
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T find_absmax(T const* const a, int const& n){
|
||||
T max = fabs(a[0]);
|
||||
for (int i = 1; i < n; i++){
|
||||
if (fabs(a[i]) > max){
|
||||
max = fabs(a[i]);
|
||||
}
|
||||
}
|
||||
return max;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T calc_l2norm(T const* const a, int const& n){
|
||||
T result = _0_CR;
|
||||
for (int i = 0; i < n; i++){
|
||||
result += a[i] * a[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
inline std::string int2string_zero_fill(int i) {
|
||||
std::stringstream ss;
|
||||
ss << std::setfill('0') << std::setw(4) << i;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
|
||||
inline bool in_between(CUSTOMREAL const& a, CUSTOMREAL const& b, CUSTOMREAL const& c){
|
||||
// check if a is between b and c
|
||||
if ((a-b)*(a-c) <= _0_CR){
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline CUSTOMREAL calc_ratio_between(CUSTOMREAL const& a, CUSTOMREAL const& b, CUSTOMREAL const& c){
|
||||
// calculate ratio of a between b and c
|
||||
//if (b < c)
|
||||
return (a - b) / (c - b);
|
||||
//else
|
||||
// return (a - b) / (b - c);
|
||||
}
|
||||
|
||||
|
||||
inline void linear_interpolation_1d_sorted(const CUSTOMREAL* x1, const CUSTOMREAL* y1, const int& n1, const CUSTOMREAL* x2, CUSTOMREAL* y2, const int& n2){
|
||||
// linear interpolation for sorted 1d array (monotonely increasing)
|
||||
// x1 : positions of the first array
|
||||
// y1 : values of the first array
|
||||
// n1 : size of the first array
|
||||
// x2 : positions of the second array
|
||||
// y2 : values of the second array
|
||||
// n2 : size of the second array
|
||||
int start = 0;
|
||||
for(int pt2=0; pt2<n2; pt2++){ // x2[pt2] <= x1[0]
|
||||
if (x2[pt2] <= x1[0]){
|
||||
y2[pt2] = y1[0];
|
||||
} else if (x2[pt2] >= x1[n1-1]){ // x2[pt2] >= x1[n1-1]
|
||||
y2[pt2] = y1[n1-1];
|
||||
} else { // x1[0] < x2[pt2] < x1[n1-1]
|
||||
for(int pt1=start; pt1<n1-1; pt1++){
|
||||
if (x2[pt2] >= x1[pt1] && x2[pt2] <= x1[pt1+1]){
|
||||
y2[pt2] = y1[pt1] + (y1[pt1+1] - y1[pt1]) * (x2[pt2] - x1[pt1]) / (x1[pt1+1] - x1[pt1]);
|
||||
start = pt1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::vector<CUSTOMREAL> linspace(T start_in, T end_in, int num_in)
|
||||
{
|
||||
|
||||
std::vector<CUSTOMREAL> linspaced;
|
||||
|
||||
double start = static_cast<CUSTOMREAL>(start_in);
|
||||
double end = static_cast<CUSTOMREAL>(end_in);
|
||||
double num = static_cast<CUSTOMREAL>(num_in);
|
||||
|
||||
if (num == 0) { return linspaced; }
|
||||
if (num == 1)
|
||||
{
|
||||
linspaced.push_back(start);
|
||||
return linspaced;
|
||||
}
|
||||
|
||||
double delta = (end - start) / (num - 1);
|
||||
|
||||
for(int i=0; i < num-1; ++i)
|
||||
{
|
||||
linspaced.push_back(start + delta * i);
|
||||
}
|
||||
linspaced.push_back(end); // I want to ensure that start and end
|
||||
// are exactly the same as the input
|
||||
return linspaced;
|
||||
}
|
||||
|
||||
#endif // UTILS_H
|
||||
825
include/vectorized_sweep.h
Normal file
825
include/vectorized_sweep.h
Normal file
@@ -0,0 +1,825 @@
|
||||
#ifndef VECTORIZED_SWEEP_H
|
||||
#define VECTORIZED_SWEEP_H
|
||||
|
||||
|
||||
#include<vector>
|
||||
#include "config.h"
|
||||
|
||||
#ifdef USE_SIMD // closed at the end of this file
|
||||
#include "simd_conf.h"
|
||||
|
||||
#if USE_AVX || USE_AVX512
|
||||
__mT COEF = _mmT_set1_pT(1.0);
|
||||
__mT v_1 = _mmT_set1_pT(1.0);
|
||||
__mT v_0 = _mmT_set1_pT(0.0);
|
||||
__mT v_half = _mmT_set1_pT(0.5);
|
||||
__mT v_2 = _mmT_set1_pT(2.0);
|
||||
__mT v_m2 = _mmT_set1_pT(-2.0);
|
||||
__mT v_4 = _mmT_set1_pT(4.0);
|
||||
__mT v_m3 = _mmT_set1_pT(-3.0);
|
||||
__mT coe_max = _mmT_set1_pT(1e19);
|
||||
__mT v_eps = _mmT_set1_pT(1e-12);
|
||||
__mT COEF_TELE = _mmT_set1_pT(1.05);
|
||||
|
||||
|
||||
// square of __mT
|
||||
inline __mT my_square_v(__mT const& a){
|
||||
return _mmT_mul_pT(a, a);
|
||||
}
|
||||
|
||||
// pp = (a - b) * Dinv
|
||||
inline __mT calc_1d_stencil(__mT const& a, __mT const& b, __mT const& Dinv){
|
||||
return _mmT_mul_pT(_mmT_sub_pT(a,b),Dinv);
|
||||
}
|
||||
|
||||
/*
|
||||
ww = _1_CR/(_1_CR+_2_CR*my_square_v((eps + my_square_v( a \
|
||||
-_2_CR*b \
|
||||
+ c) ) \
|
||||
|
||||
/ (eps + my_square_v( d \
|
||||
-_2_CR*a \
|
||||
+ b) )) );
|
||||
|
||||
pp = sign * (_1_CR - ww) * ( b \ <-- sign = +1 or -1
|
||||
- d) * 0.5 * Dinv \
|
||||
+ ww * ( -3.0 * a \
|
||||
+ 4.0 * b \
|
||||
- 1.0 * c ) * 0.5 * Dinv );
|
||||
*/
|
||||
inline __mT calc_3d_stencil(__mT const& a, __mT const& b, __mT const&c, __mT const& d, __mT const& Dinv_half, int const& sign){
|
||||
|
||||
#ifdef __FMA__
|
||||
// v_eps + square(a - 2.0*b + c)
|
||||
__mT tmp1 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(a,_mmT_fmadd_pT(v_m2,b,c))));
|
||||
// v_eps + square(d - 2.0*a + b)
|
||||
__mT tmp2 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(d,_mmT_fmadd_pT(v_m2,a,b))));
|
||||
// ww = 1.0/(1.0 + 2.0 * square(tmp1/tmp2))
|
||||
__mT ww = _mmT_div_pT(v_1,_mmT_fmadd_pT(v_2,my_square_v(_mmT_div_pT(tmp1,tmp2)),v_1));
|
||||
// pp = sign* ((1.0 - ww) * (b - d) / 2.0 / D
|
||||
// + ww * (-3.0* a + 4.0 * b - c) * Dinv_half)
|
||||
/*
|
||||
*/
|
||||
return _mmT_mul_pT(_mmT_set1_pT(sign), \
|
||||
_mmT_add_pT(\
|
||||
_mmT_mul_pT(_mmT_sub_pT(v_1,ww),_mmT_mul_pT(_mmT_sub_pT(b,d),Dinv_half)),\
|
||||
_mmT_mul_pT(ww,_mmT_mul_pT(_mmT_sub_pT(_mmT_fmadd_pT(v_4,b,_mmT_mul_pT(v_m3,a)),c),Dinv_half))\
|
||||
)\
|
||||
);
|
||||
#else
|
||||
__mT tmp1 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(a,_mmT_add_pT(_mmT_mul_pT(v_m2,b),c))));
|
||||
__mT tmp2 = _mmT_add_pT(v_eps,my_square_v(_mmT_add_pT(d,_mmT_add_pT(_mmT_mul_pT(v_m2,a),b))));
|
||||
__mT ww = _mmT_div_pT(v_1,_mmT_add_pT(v_1,_mmT_mul_pT(v_2,my_square_v(_mmT_div_pT(tmp1,tmp2)))));
|
||||
return _mmT_mul_pT(_mmT_set1_pT(sign), \
|
||||
_mmT_add_pT(\
|
||||
_mmT_mul_pT(_mmT_sub_pT(v_1,ww),_mmT_mul_pT(_mmT_sub_pT(b,d),Dinv_half)),\
|
||||
_mmT_mul_pT(ww,_mmT_mul_pT(_mmT_sub_pT(_mmT_add_pT(_mmT_mul_pT(v_4,b),_mmT_mul_pT(v_m3,a)),c),Dinv_half))\
|
||||
)\
|
||||
);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
|
||||
inline __mT my_square_v(svbool_t const& pg, __mT const& a){
|
||||
return svmul_f64_z(pg, a, a);
|
||||
}
|
||||
|
||||
inline __mT calc_1d_stencil(svbool_t const& pg, __mT const& a, __mT const& b, __mT const& Dinv){
|
||||
return svmul_f64_z(pg, svsub_f64_z(pg, a, b), Dinv);
|
||||
}
|
||||
|
||||
inline __mT calc_3d_stencil(svbool_t const& pg, __mT const& a, __mT const& b, __mT const&c, __mT const& d, __mT const& Dinv_half, int const& sign){
|
||||
|
||||
__mT v_1 = svdup_f64(1.0);
|
||||
__mT v_2 = svdup_f64(2.0);
|
||||
__mT v_m2 = svdup_f64(-2.0);
|
||||
__mT v_4 = svdup_f64(4.0);
|
||||
__mT v_m3 = svdup_f64(-3.0);
|
||||
__mT v_eps = svdup_f64(1e-12);
|
||||
|
||||
// v_eps + square(a - 2.0*b + c)
|
||||
//__mT tmp1 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, a, svadd_f64_z(pg, svmul_f64_z(pg, v_m2, b), c))));
|
||||
__mT tmp1 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, a, svmad_f64_z(pg, v_m2, b, c))));
|
||||
// v_eps + square(d - 2.0*a + b)
|
||||
//__mT tmp2 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, d, svadd_f64_z(pg, svmul_f64_z(pg, v_m2, a), b))));
|
||||
__mT tmp2 = svadd_f64_z(pg, v_eps, my_square_v(pg, svadd_f64_z(pg, d, svmad_f64_z(pg, v_m2, a, b))));
|
||||
// ww = 1.0/(1.0 + 2.0 * square(tmp1/tmp2))
|
||||
//__mT ww = svdiv_f64_z(pg, v_1, svadd_f64_z(pg, v_1, svmul_f64_z(pg, v_2, my_square_v(pg, svdiv_f64_z(pg, tmp1, tmp2)))));
|
||||
__mT ww = svdiv_f64_z(pg, v_1, svmad_f64_z(pg, v_2, my_square_v(pg, svdiv_f64_z(pg, tmp1, tmp2)), v_1));
|
||||
// pp = sign* ((1.0 - ww) * (b - d) / 2.0 / D
|
||||
// + ww * (-3.0* a + 4.0 * b - c) * Dinv_half)
|
||||
/*
|
||||
return svmul_f64_m(pg, svdup_f64(sign), \
|
||||
svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, svsub_f64_z(pg, v_1, ww), svmul_f64_z(pg, svsub_f64_z(pg, b, d), Dinv_half)),\
|
||||
svmul_f64_z(pg, ww, svmul_f64_z(pg, svsub_f64_z(pg, svadd_f64_z(pg, svmul_f64_z(pg, v_4, b), svmul_f64_z(pg, v_m3, a)), c), Dinv_half))\
|
||||
)\
|
||||
);
|
||||
*/
|
||||
return svmul_f64_m(pg, svdup_f64(sign), \
|
||||
svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, svsub_f64_z(pg, v_1, ww), svmul_f64_z(pg, svsub_f64_z(pg, b, d), Dinv_half)),\
|
||||
svmul_f64_z(pg, ww, svmul_f64_z(pg, svsub_f64_z(pg, svmad_f64_z(pg, v_4, b, svmul_f64_z(pg, v_m3, a)), c), Dinv_half))\
|
||||
)\
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
inline void vect_stencil_1st_pre_simd(
|
||||
#if USE_ARM_SVE
|
||||
svbool_t const& pg,
|
||||
#endif
|
||||
__mT const& v_iip, __mT const& v_jjt, __mT const& v_kkr,
|
||||
__mT const& v_c__,
|
||||
__mT const& v_p__, __mT const& v_m__, __mT const& v__p_, __mT const& v__m_, __mT const& v___p, __mT const& v___m,
|
||||
__mT& v_pp1, __mT& v_pp2, __mT& v_pt1, __mT& v_pt2, __mT& v_pr1, __mT& v_pr2,
|
||||
__mT const& v_DP_inv, __mT const& v_DT_inv, __mT const& v_DR_inv,
|
||||
__mT const& v_DP_inv_half, __mT const& v_DT_inv_half, __mT const& v_DR_inv_half,
|
||||
int const& NP, int const& NT, int const& NR){
|
||||
|
||||
#if USE_AVX512 || USE_AVX
|
||||
|
||||
v_pp1 = calc_1d_stencil(v_c__, v_m__, v_DP_inv);
|
||||
v_pp2 = calc_1d_stencil(v_p__, v_c__, v_DP_inv);
|
||||
v_pt1 = calc_1d_stencil(v_c__, v__m_, v_DT_inv);
|
||||
v_pt2 = calc_1d_stencil(v__p_, v_c__, v_DT_inv);
|
||||
v_pr1 = calc_1d_stencil(v_c__, v___m, v_DR_inv);
|
||||
v_pr2 = calc_1d_stencil(v___p, v_c__, v_DR_inv);
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
|
||||
v_pp1 = calc_1d_stencil(pg, v_c__, v_m__, v_DP_inv);
|
||||
v_pp2 = calc_1d_stencil(pg, v_p__, v_c__, v_DP_inv);
|
||||
v_pt1 = calc_1d_stencil(pg, v_c__, v__m_, v_DT_inv);
|
||||
v_pt2 = calc_1d_stencil(pg, v__p_, v_c__, v_DT_inv);
|
||||
v_pr1 = calc_1d_stencil(pg, v_c__, v___m, v_DR_inv);
|
||||
v_pr2 = calc_1d_stencil(pg, v___p, v_c__, v_DR_inv);
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline void vect_stencil_3rd_pre_simd(
|
||||
#if USE_ARM_SVE
|
||||
svbool_t const& pg,
|
||||
#endif
|
||||
__mT const& v_iip, __mT const& v_jjt, __mT const& v_kkr,
|
||||
__mT const& v_c__,
|
||||
__mT const& v_p__, __mT const& v_m__, __mT const& v__p_, __mT const& v__m_, __mT const& v___p, __mT const& v___m,
|
||||
__mT const& v_pp____, __mT const& v_mm____, __mT const& v___pp__, __mT const& v___mm__, __mT const& v_____pp, __mT const& v_____mm,
|
||||
__mT& v_pp1, __mT& v_pp2, __mT& v_pt1, __mT& v_pt2, __mT& v_pr1, __mT& v_pr2,
|
||||
__mT const& v_DP_inv, __mT const& v_DT_inv, __mT const& v_DR_inv,
|
||||
__mT const& v_DP_inv_half, __mT const& v_DT_inv_half, __mT const& v_DR_inv_half,
|
||||
int const& NP, int const& NT, int const& NR){
|
||||
|
||||
const int PLUS = 1;
|
||||
const int MINUS = -1;
|
||||
|
||||
#if USE_AVX512
|
||||
|
||||
__mmaskT mask_i_eq_1 = _mm512_cmp_pT_mask(v_iip, v_1,_CMP_EQ_OQ); // if iip == 1
|
||||
__mmaskT mask_j_eq_1 = _mm512_cmp_pT_mask(v_jjt, v_1,_CMP_EQ_OQ); // if jjt == 1
|
||||
__mmaskT mask_k_eq_1 = _mm512_cmp_pT_mask(v_kkr, v_1,_CMP_EQ_OQ); // if kkr == 1
|
||||
__mmaskT mask_i_eq_N_minus_2 = _mm512_cmp_pT_mask(v_iip, _mmT_set1_pT(NP-2),_CMP_EQ_OQ); // if iip == N-2
|
||||
__mmaskT mask_j_eq_N_minus_2 = _mm512_cmp_pT_mask(v_jjt, _mmT_set1_pT(NT-2),_CMP_EQ_OQ); // if jjt == N-2
|
||||
__mmaskT mask_k_eq_N_minus_2 = _mm512_cmp_pT_mask(v_kkr, _mmT_set1_pT(NR-2),_CMP_EQ_OQ); // if kkr == N-2
|
||||
|
||||
// 1 < iip < N-2
|
||||
__mmaskT mask_i_else = _kand_maskT(
|
||||
_mm512_cmp_pT_mask(v_iip, v_1 , _CMP_GT_OQ),
|
||||
_mm512_cmp_pT_mask(_mmT_set1_pT(NP-2), v_iip, _CMP_GT_OQ)
|
||||
);
|
||||
// 1 < jjt < N-2
|
||||
__mmaskT mask_j_else = _kand_maskT(
|
||||
_mm512_cmp_pT_mask(v_jjt, v_1 , _CMP_GT_OQ),
|
||||
_mm512_cmp_pT_mask(_mmT_set1_pT(NT-2), v_jjt, _CMP_GT_OQ)
|
||||
);
|
||||
// 1 < kkr < N-2
|
||||
__mmaskT mask_k_else = _kand_maskT(
|
||||
_mm512_cmp_pT_mask(v_kkr, v_1 , _CMP_GT_OQ),
|
||||
_mm512_cmp_pT_mask(_mmT_set1_pT(NR-2), v_kkr, _CMP_GT_OQ)
|
||||
);
|
||||
|
||||
// if _i_eq_1 == true
|
||||
v_pp1 = _mm512_mask_blend_pT(mask_i_eq_1, calc_1d_stencil(v_c__, v_m__, v_DP_inv) , v_pp1 );
|
||||
v_pp2 = _mm512_mask_blend_pT(mask_i_eq_1, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), v_pp2 );
|
||||
// if_i_eq_N_minus_2 == true
|
||||
v_pp1 = _mm512_mask_blend_pT(mask_i_eq_N_minus_2, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
|
||||
v_pp2 = _mm512_mask_blend_pT(mask_i_eq_N_minus_2, calc_1d_stencil(v_p__, v_c__, v_DP_inv) , v_pp2);
|
||||
// else
|
||||
v_pp1 = _mm512_mask_blend_pT(mask_i_else, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
|
||||
v_pp2 = _mm512_mask_blend_pT(mask_i_else, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS) , v_pp2);
|
||||
|
||||
// if _j_eq_1 == true
|
||||
v_pt1 = _mm512_mask_blend_pT(mask_j_eq_1, calc_1d_stencil(v_c__, v__m_, v_DT_inv) , v_pt1);
|
||||
v_pt2 = _mm512_mask_blend_pT(mask_j_eq_1, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), v_pt2);
|
||||
// if _j_eq_N_minus_2 == true
|
||||
v_pt1 = _mm512_mask_blend_pT(mask_j_eq_N_minus_2, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
|
||||
v_pt2 = _mm512_mask_blend_pT(mask_j_eq_N_minus_2, calc_1d_stencil(v__p_, v_c__, v_DT_inv) , v_pt2);
|
||||
// else
|
||||
v_pt1 = _mm512_mask_blend_pT(mask_j_else, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
|
||||
v_pt2 = _mm512_mask_blend_pT(mask_j_else, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS) , v_pt2);
|
||||
|
||||
// if _k_eq_1 == true
|
||||
v_pr1 = _mm512_mask_blend_pT(mask_k_eq_1, calc_1d_stencil(v_c__, v___m, v_DR_inv) , v_pr1 );
|
||||
v_pr2 = _mm512_mask_blend_pT(mask_k_eq_1, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), v_pr2 );
|
||||
// if _k_eq_N_minus_2 == true
|
||||
v_pr1 = _mm512_mask_blend_pT(mask_k_eq_N_minus_2, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
|
||||
v_pr2 = _mm512_mask_blend_pT(mask_k_eq_N_minus_2, calc_1d_stencil(v___p, v_c__, v_DR_inv) , v_pr2);
|
||||
// else
|
||||
v_pr1 = _mm512_mask_blend_pT(mask_k_else, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
|
||||
v_pr2 = _mm512_mask_blend_pT(mask_k_else, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS) , v_pr2);
|
||||
|
||||
#elif USE_AVX
|
||||
|
||||
__mT mask_i_eq_1 = _mm256_cmp_pT(v_iip, v_1,_CMP_EQ_OQ); // if iip == 1
|
||||
__mT mask_j_eq_1 = _mm256_cmp_pT(v_jjt, v_1,_CMP_EQ_OQ); // if jjt == 1
|
||||
__mT mask_k_eq_1 = _mm256_cmp_pT(v_kkr, v_1,_CMP_EQ_OQ); // if kkr == 1
|
||||
__mT mask_i_eq_N_minus_2 = _mm256_cmp_pT(v_iip, _mmT_set1_pT(NP-2),_CMP_EQ_OQ); // if iip == N-2
|
||||
__mT mask_j_eq_N_minus_2 = _mm256_cmp_pT(v_jjt, _mmT_set1_pT(NT-2),_CMP_EQ_OQ); // if jjt == N-2
|
||||
__mT mask_k_eq_N_minus_2 = _mm256_cmp_pT(v_kkr, _mmT_set1_pT(NR-2),_CMP_EQ_OQ); // if kkr == N-2
|
||||
|
||||
// 1 < iip < N-2
|
||||
__m256d mask_i_else = _mm256_and_pd(
|
||||
_mm256_cmp_pT(v_iip, v_1 , _CMP_GT_OQ),
|
||||
_mm256_cmp_pT(_mmT_set1_pT(NP-2),v_iip, _CMP_GT_OQ)
|
||||
);
|
||||
// 1 < jjt < N-2
|
||||
__m256d mask_j_else = _mm256_and_pd(
|
||||
_mm256_cmp_pT(v_jjt, v_1 , _CMP_GT_OQ),
|
||||
_mm256_cmp_pT(_mmT_set1_pT(NT-2),v_jjt, _CMP_GT_OQ)
|
||||
);
|
||||
// 1 < kkr < N-2
|
||||
__m256d mask_k_else = _mm256_and_pd(
|
||||
_mm256_cmp_pT(v_kkr, v_1 , _CMP_GT_OQ),
|
||||
_mm256_cmp_pT(_mmT_set1_pT(NR-2),v_kkr, _CMP_GT_OQ)
|
||||
);
|
||||
|
||||
// if _i_eq_1 == true
|
||||
v_pp1 = _mm256_blendv_pd(v_pp1, calc_1d_stencil(v_c__, v_m__, v_DP_inv), mask_i_eq_1);
|
||||
v_pp2 = _mm256_blendv_pd(v_pp2, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), mask_i_eq_1);
|
||||
// if_i_eq_N_minus_2 == true
|
||||
v_pp1 = _mm256_blendv_pd(v_pp1, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), mask_i_eq_N_minus_2);
|
||||
v_pp2 = _mm256_blendv_pd(v_pp2, calc_1d_stencil(v_p__, v_c__, v_DP_inv), mask_i_eq_N_minus_2);
|
||||
// else
|
||||
v_pp1 = _mm256_blendv_pd(v_pp1, calc_3d_stencil(v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), mask_i_else);
|
||||
v_pp2 = _mm256_blendv_pd(v_pp2, calc_3d_stencil(v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), mask_i_else);
|
||||
|
||||
// if _j_eq_1 == true
|
||||
v_pt1 = _mm256_blendv_pd(v_pt1, calc_1d_stencil(v_c__, v__m_, v_DT_inv), mask_j_eq_1);
|
||||
v_pt2 = _mm256_blendv_pd(v_pt2, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), mask_j_eq_1);
|
||||
// if _j_eq_N_minus_2 == true
|
||||
v_pt1 = _mm256_blendv_pd(v_pt1, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), mask_j_eq_N_minus_2);
|
||||
v_pt2 = _mm256_blendv_pd(v_pt2, calc_1d_stencil(v__p_, v_c__, v_DT_inv), mask_j_eq_N_minus_2);
|
||||
// else
|
||||
v_pt1 = _mm256_blendv_pd(v_pt1, calc_3d_stencil(v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), mask_j_else);
|
||||
v_pt2 = _mm256_blendv_pd(v_pt2, calc_3d_stencil(v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), mask_j_else);
|
||||
|
||||
// if _k_eq_1 == true
|
||||
v_pr1 = _mm256_blendv_pd(v_pr1, calc_1d_stencil(v_c__, v___m, v_DR_inv), mask_k_eq_1);
|
||||
v_pr2 = _mm256_blendv_pd(v_pr2, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), mask_k_eq_1);
|
||||
// if _k_eq_N_minus_2 == true
|
||||
v_pr1 = _mm256_blendv_pd(v_pr1, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), mask_k_eq_N_minus_2);
|
||||
v_pr2 = _mm256_blendv_pd(v_pr2, calc_1d_stencil(v___p, v_c__, v_DR_inv), mask_k_eq_N_minus_2);
|
||||
// else
|
||||
v_pr1 = _mm256_blendv_pd(v_pr1, calc_3d_stencil(v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), mask_k_else);
|
||||
v_pr2 = _mm256_blendv_pd(v_pr2, calc_3d_stencil(v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), mask_k_else);
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
|
||||
svfloat64_t v_1 = svdup_f64(1.0);
|
||||
|
||||
svfloat64_t v_NP_minus_2 = svdup_f64(NP-2);
|
||||
svfloat64_t v_NT_minus_2 = svdup_f64(NT-2);
|
||||
svfloat64_t v_NR_minus_2 = svdup_f64(NR-2);
|
||||
|
||||
svbool_t mask_i_eq_1 = svcmpeq_f64(pg, v_iip, v_1); // if iip == 1
|
||||
svbool_t mask_j_eq_1 = svcmpeq_f64(pg, v_jjt, v_1); // if jjt == 1
|
||||
svbool_t mask_k_eq_1 = svcmpeq_f64(pg, v_kkr, v_1); // if kkr == 1
|
||||
svbool_t mask_i_eq_N_minus_2 = svcmpeq_f64(pg, v_iip, v_NP_minus_2); // if iip == N-2
|
||||
svbool_t mask_j_eq_N_minus_2 = svcmpeq_f64(pg, v_jjt, v_NT_minus_2); // if jjt == N-2
|
||||
svbool_t mask_k_eq_N_minus_2 = svcmpeq_f64(pg, v_kkr, v_NR_minus_2); // if kkr == N-2
|
||||
|
||||
// 1 < iip < N-2
|
||||
svbool_t mask_i_else = svand_b_z(pg,
|
||||
svcmpgt_f64(pg, v_iip, v_1),
|
||||
svcmplt_f64(pg, v_iip, v_NP_minus_2)
|
||||
);
|
||||
// 1 < jjt < N-2
|
||||
svbool_t mask_j_else = svand_b_z(pg,
|
||||
svcmpgt_f64(pg, v_jjt, v_1),
|
||||
svcmplt_f64(pg, v_jjt, v_NT_minus_2)
|
||||
);
|
||||
// 1 < kkr < N-2
|
||||
svbool_t mask_k_else = svand_b_z(pg,
|
||||
svcmpgt_f64(pg, v_kkr, v_1),
|
||||
svcmplt_f64(pg, v_kkr, v_NR_minus_2)
|
||||
);
|
||||
|
||||
// if _i_eq_1 == true
|
||||
v_pp1 = svsel_f64(mask_i_eq_1, calc_1d_stencil(mask_i_eq_1, v_c__, v_m__, v_DP_inv) , v_pp1);
|
||||
v_pp2 = svsel_f64(mask_i_eq_1, calc_3d_stencil(mask_i_eq_1, v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS), v_pp2);
|
||||
// if_i_eq_N_minus_2 == true
|
||||
v_pp1 = svsel_f64(mask_i_eq_N_minus_2, calc_3d_stencil(mask_i_eq_N_minus_2, v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
|
||||
v_pp2 = svsel_f64(mask_i_eq_N_minus_2, calc_1d_stencil(mask_i_eq_N_minus_2, v_p__, v_c__, v_DP_inv) , v_pp2);
|
||||
// else
|
||||
v_pp1 = svsel_f64(mask_i_else, calc_3d_stencil(mask_i_else, v_c__, v_m__, v_mm____, v_p__, v_DP_inv_half, MINUS), v_pp1);
|
||||
v_pp2 = svsel_f64(mask_i_else, calc_3d_stencil(mask_i_else, v_c__, v_p__, v_pp____, v_m__, v_DP_inv_half, PLUS) , v_pp2);
|
||||
|
||||
// if _j_eq_1 == true
|
||||
v_pt1 = svsel_f64(mask_j_eq_1, calc_1d_stencil(mask_j_eq_1, v_c__, v__m_, v_DT_inv) , v_pt1);
|
||||
v_pt2 = svsel_f64(mask_j_eq_1, calc_3d_stencil(mask_j_eq_1, v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS), v_pt2);
|
||||
// if _j_eq_N_minus_2 == true
|
||||
v_pt1 = svsel_f64(mask_j_eq_N_minus_2, calc_3d_stencil(mask_j_eq_N_minus_2, v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
|
||||
v_pt2 = svsel_f64(mask_j_eq_N_minus_2, calc_1d_stencil(mask_j_eq_N_minus_2, v__p_, v_c__, v_DT_inv) , v_pt2);
|
||||
// else
|
||||
v_pt1 = svsel_f64(mask_j_else, calc_3d_stencil(mask_j_else, v_c__, v__m_, v___mm__, v__p_, v_DT_inv_half, MINUS), v_pt1);
|
||||
v_pt2 = svsel_f64(mask_j_else, calc_3d_stencil(mask_j_else, v_c__, v__p_, v___pp__, v__m_, v_DT_inv_half, PLUS) , v_pt2);
|
||||
|
||||
// if _k_eq_1 == true
|
||||
v_pr1 = svsel_f64(mask_k_eq_1, calc_1d_stencil(mask_k_eq_1, v_c__, v___m, v_DR_inv) , v_pr1);
|
||||
v_pr2 = svsel_f64(mask_k_eq_1, calc_3d_stencil(mask_k_eq_1, v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS), v_pr2);
|
||||
// if _k_eq_N_minus_2 == true
|
||||
v_pr1 = svsel_f64(mask_k_eq_N_minus_2, calc_3d_stencil(mask_k_eq_N_minus_2, v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
|
||||
v_pr2 = svsel_f64(mask_k_eq_N_minus_2, calc_1d_stencil(mask_k_eq_N_minus_2, v___p, v_c__, v_DR_inv) , v_pr2);
|
||||
// else
|
||||
v_pr1 = svsel_f64(mask_k_else, calc_3d_stencil(mask_k_else, v_c__, v___m, v_____mm, v___p, v_DR_inv_half, MINUS), v_pr1);
|
||||
v_pr2 = svsel_f64(mask_k_else, calc_3d_stencil(mask_k_else, v_c__, v___p, v_____pp, v___m, v_DR_inv_half, PLUS) , v_pr2);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
inline void vect_stencil_1st_3rd_apre_simd(
|
||||
#if USE_ARM_SVE
|
||||
svbool_t const& pg,
|
||||
#endif
|
||||
__mT& v_tau, __mT const& v_fac_a,__mT const& v_fac_b, __mT const& v_fac_c, __mT const& v_fac_f,
|
||||
__mT const& v_T0v, __mT const& v_T0p, __mT const& v_T0t, __mT const& v_T0r, __mT const& v_fun, __mT const& v_change,
|
||||
__mT const& v_pp1, __mT const& v_pp2, __mT const& v_pt1, __mT const& v_pt2, __mT const& v_pr1, __mT const& v_pr2,
|
||||
__mT const& DP_inv, __mT const& DT_inv, __mT const& DR_inv){
|
||||
|
||||
#if USE_AVX512 || USE_AVX
|
||||
|
||||
// sigr = COEF * sqrt(v_fac_a)*v_T0v;
|
||||
__mT sigr = _mmT_mul_pT(_mmT_mul_pT(COEF,_mmT_sqrt_pT(v_fac_a)),v_T0v);
|
||||
// sigt = COEF * sqrt(v_fac_b)*v_T0v;
|
||||
__mT sigt = _mmT_mul_pT(_mmT_mul_pT(COEF,_mmT_sqrt_pT(v_fac_b)),v_T0v);
|
||||
// sigp = COEF * sqrt(v_fac_c)*v_T0v;
|
||||
__mT sigp = _mmT_mul_pT(_mmT_mul_pT(COEF,_mmT_sqrt_pT(v_fac_c)),v_T0v);
|
||||
|
||||
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
|
||||
__mT coe = _mmT_div_pT(v_1,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,DR_inv),_mmT_mul_pT(sigt,DT_inv)),_mmT_mul_pT(sigp,DP_inv)));
|
||||
// coe becomes inf as sig* goes to 0
|
||||
// if coe > 1e19, set coe = 1e19
|
||||
coe = _mmT_min_pT(coe,coe_max);
|
||||
|
||||
// Htau = v_fac_a * square(v_T0r * v_tau + v_T0v * (v_pr1 + v_pr2)*0.5);
|
||||
__mT Htau = _mmT_mul_pT(v_fac_a,my_square_v(_mmT_add_pT(_mmT_mul_pT(v_T0r,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pr1,v_pr2),v_half)))));
|
||||
|
||||
// Htau += v_fac_b * square(v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5))
|
||||
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_b,my_square_v(_mmT_add_pT(_mmT_mul_pT(v_T0t,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half))))));
|
||||
// Htau += v_fac_c * square(v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
|
||||
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_c,my_square_v(_mmT_add_pT(_mmT_mul_pT(v_T0p,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half))))));
|
||||
|
||||
// tmp1 = ( v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5)
|
||||
__mT tmp1 = _mmT_add_pT(_mmT_mul_pT(v_T0t,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half)));
|
||||
// tmp2 = ( v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
|
||||
__mT tmp2 = _mmT_add_pT(_mmT_mul_pT(v_T0p,v_tau),_mmT_mul_pT(v_T0v,_mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half)));
|
||||
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
|
||||
__mT tmp3 = _mmT_mul_pT(v_m2,_mmT_mul_pT(v_fac_f,_mmT_mul_pT(tmp1,tmp2)));
|
||||
|
||||
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
|
||||
Htau = _mmT_sqrt_pT(_mmT_add_pT(Htau,tmp3));
|
||||
|
||||
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
|
||||
__mT tmp = _mmT_mul_pT(v_half,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,_mmT_sub_pT(v_pr2,v_pr1)),_mmT_mul_pT(sigt,_mmT_sub_pT(v_pt2,v_pt1))),_mmT_mul_pT(sigp,_mmT_sub_pT(v_pp2,v_pp1))));
|
||||
|
||||
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
|
||||
v_tau = _mmT_add_pT(v_tau,_mmT_mul_pT(coe,_mmT_add_pT(_mmT_sub_pT(v_fun,Htau),tmp)));
|
||||
|
||||
#if USE_AVX512
|
||||
// mask if v_change != 1.0
|
||||
__mmaskT mask = _mm512_cmp_pT_mask(v_change,v_1,_CMP_NEQ_OQ);
|
||||
// set 1 if mask is true
|
||||
v_tau = _mm512_mask_blend_pT(mask,v_tau,v_1);
|
||||
#elif USE_AVX
|
||||
__m256d mask = _mm256_cmp_pT(v_change, v_1,_CMP_NEQ_OQ);
|
||||
v_tau = _mm256_blendv_pd(v_tau, v_1,mask);
|
||||
#endif
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
|
||||
__mT COEF = svdup_f64(1.0);
|
||||
__mT v_1 = svdup_f64(1.0);
|
||||
__mT v_half = svdup_f64(0.5);
|
||||
__mT v_m2 = svdup_f64(-2.0);
|
||||
__mT coe_max = svdup_f64(1e19);
|
||||
|
||||
// sigr = COEF * sqrt(v_fac_a)*v_T0v;
|
||||
__mT sigr = svmul_f64_z(pg, svmul_f64_z(pg,COEF,svsqrt_f64_z(pg,v_fac_a)), v_T0v);
|
||||
// sigt = COEF * sqrt(v_fac_b)*v_T0v;
|
||||
__mT sigt = svmul_f64_z(pg, svmul_f64_z(pg,COEF,svsqrt_f64_z(pg,v_fac_b)), v_T0v);
|
||||
// sigp = COEF * sqrt(v_fac_c)*v_T0v;
|
||||
__mT sigp = svmul_f64_z(pg, svmul_f64_z(pg,COEF,svsqrt_f64_z(pg,v_fac_c)), v_T0v);
|
||||
|
||||
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
|
||||
__mT coe = svdiv_f64_z(pg, v_1, svadd_f64_z(pg, svadd_f64_z(pg, svmul_f64_z(pg, sigr, DR_inv), svmul_f64_z(pg, sigt, DT_inv)), svmul_f64_z(pg, sigp, DP_inv)));
|
||||
// coe becomes inf as sig* goes to 0
|
||||
// if coe > 1e19, set coe = 1e19
|
||||
coe = svmin_f64_z(pg, coe, coe_max);
|
||||
|
||||
// Htau = v_fac_a * square(v_T0r * v_tau + v_T0v * (v_pr1 + v_pr2)*0.5);
|
||||
__mT Htau = svmul_f64_z(pg, v_fac_a, my_square_v(pg, svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, v_T0r, v_tau), \
|
||||
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pr1, v_pr2))))\
|
||||
)\
|
||||
);
|
||||
|
||||
// Htau += v_fac_b * square(v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5))
|
||||
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_b, my_square_v(pg, svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, v_T0t, v_tau), \
|
||||
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2))))\
|
||||
)\
|
||||
)\
|
||||
);
|
||||
// Htau += v_fac_c * square(v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
|
||||
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_c, my_square_v(pg, svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, v_T0p, v_tau), \
|
||||
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2))))\
|
||||
)\
|
||||
)\
|
||||
);
|
||||
|
||||
// tmp1 = ( v_T0t * v_tau + v_T0v * (v_pt1 + v_pt2)*0.5)
|
||||
__mT tmp1 = svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, v_T0t, v_tau), \
|
||||
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2)))\
|
||||
);
|
||||
// tmp2 = ( v_T0p * v_tau + v_T0v * (v_pp1 + v_pp2)*0.5))
|
||||
__mT tmp2 = svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, v_T0p, v_tau), \
|
||||
svmul_f64_z(pg, v_T0v, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2)))\
|
||||
);
|
||||
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
|
||||
__mT tmp3 = svmul_f64_z(pg, v_m2, svmul_f64_z(pg, v_fac_f, svmul_f64_z(pg, tmp1, tmp2)));
|
||||
|
||||
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
|
||||
Htau = svsqrt_f64_z(pg, svadd_f64_z(pg, Htau, tmp3));
|
||||
|
||||
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
|
||||
__mT tmp = svmul_f64_z(pg, v_half, svadd_f64_z(pg, \
|
||||
svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, sigr, svsub_f64_z(pg, v_pr2, v_pr1)), \
|
||||
svmul_f64_z(pg, sigt, svsub_f64_z(pg, v_pt2, v_pt1))\
|
||||
), \
|
||||
svmul_f64_z(pg, sigp, svsub_f64_z(pg, v_pp2, v_pp1))\
|
||||
)\
|
||||
);
|
||||
|
||||
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
|
||||
v_tau = svadd_f64_z(pg, v_tau, svmul_f64_z(pg, coe, svadd_f64_z(pg, svsub_f64_z(pg, v_fun, Htau), tmp)));
|
||||
// mask = v_change != 1.0
|
||||
svbool_t mask = svcmpne_f64(pg, v_change, v_1);
|
||||
// v_tau = v_1 if mask is true (!= 1.0)
|
||||
v_tau = svsel_f64(mask, v_1, v_tau);
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline void vect_stencil_1st_3rd_apre_simd_tele(
|
||||
#if USE_ARM_SVE
|
||||
svbool_t const& pg,
|
||||
#endif
|
||||
__mT& v_tau, __mT const& v_fac_a,__mT const& v_fac_b, __mT const& v_fac_c, __mT const& v_fac_f,
|
||||
__mT const& v_fun, __mT const& v_change,
|
||||
__mT const& v_pp1, __mT const& v_pp2, __mT const& v_pt1, __mT const& v_pt2, __mT const& v_pr1, __mT const& v_pr2,
|
||||
__mT const& DP_inv, __mT const& DT_inv, __mT const& DR_inv){
|
||||
|
||||
#if USE_AVX512 || USE_AVX
|
||||
|
||||
// sigr = COEF * sqrt(v_fac_a);
|
||||
__mT sigr = _mmT_mul_pT(COEF_TELE,_mmT_sqrt_pT(v_fac_a));
|
||||
// sigt = COEF * sqrt(v_fac_b);
|
||||
__mT sigt = _mmT_mul_pT(COEF_TELE,_mmT_sqrt_pT(v_fac_b));
|
||||
// sigp = COEF * sqrt(v_fac_c);
|
||||
__mT sigp = _mmT_mul_pT(COEF_TELE,_mmT_sqrt_pT(v_fac_c));
|
||||
|
||||
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
|
||||
__mT coe = _mmT_div_pT(v_1,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,DR_inv),_mmT_mul_pT(sigt,DT_inv)),_mmT_mul_pT(sigp,DP_inv)));
|
||||
// coe becomes inf as sig* goes to 0
|
||||
// if coe > 1e19, set coe = 1e19
|
||||
coe = _mmT_min_pT(coe,coe_max);
|
||||
|
||||
// Htau = v_fac_a * square((v_pr1 + v_pr2)*0.5);
|
||||
__mT Htau = _mmT_mul_pT(v_fac_a,my_square_v(_mmT_mul_pT(_mmT_add_pT(v_pr1,v_pr2),v_half)));
|
||||
|
||||
// Htau += v_fac_b * square((v_pt1 + v_pt2)*0.5))
|
||||
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_b,my_square_v(_mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half))));
|
||||
// Htau += v_fac_c * square((v_pp1 + v_pp2)*0.5))
|
||||
Htau = _mmT_add_pT(Htau,_mmT_mul_pT(v_fac_c,my_square_v(_mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half))));
|
||||
|
||||
// tmp1 = (v_pt1 + v_pt2)*0.5
|
||||
__mT tmp1 = _mmT_mul_pT(_mmT_add_pT(v_pt1,v_pt2), v_half);
|
||||
// tmp2 = (v_pp1 + v_pp2)*0.5
|
||||
__mT tmp2 = _mmT_mul_pT(_mmT_add_pT(v_pp1,v_pp2), v_half);
|
||||
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
|
||||
__mT tmp3 = _mmT_mul_pT(v_m2,_mmT_mul_pT(v_fac_f,_mmT_mul_pT(tmp1,tmp2)));
|
||||
|
||||
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
|
||||
Htau = _mmT_sqrt_pT(_mmT_add_pT(Htau,tmp3));
|
||||
|
||||
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
|
||||
__mT tmp = _mmT_mul_pT(v_half,_mmT_add_pT(_mmT_add_pT(_mmT_mul_pT(sigr,_mmT_sub_pT(v_pr2,v_pr1)),_mmT_mul_pT(sigt,_mmT_sub_pT(v_pt2,v_pt1))),_mmT_mul_pT(sigp,_mmT_sub_pT(v_pp2,v_pp1))));
|
||||
|
||||
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
|
||||
v_tau = _mmT_add_pT(v_tau,_mmT_mul_pT(coe,_mmT_add_pT(_mmT_sub_pT(v_fun,Htau),tmp)));
|
||||
|
||||
#if USE_AVX512
|
||||
// mask if v_change != 1.0
|
||||
__mmaskT mask = _mm512_cmp_pT_mask(v_change,v_1,_CMP_NEQ_OQ);
|
||||
// set 1 if mask is true
|
||||
v_tau = _mm512_mask_blend_pT(mask,v_tau,v_1);
|
||||
#elif USE_AVX
|
||||
__m256d mask = _mm256_cmp_pT(v_change, v_1,_CMP_NEQ_OQ);
|
||||
v_tau = _mm256_blendv_pd(v_tau, v_1,mask);
|
||||
#endif
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
|
||||
__mT COEF_TELE = svdup_f64(1.05);
|
||||
__mT v_1 = svdup_f64(1.0);
|
||||
__mT v_half = svdup_f64(0.5);
|
||||
__mT v_m2 = svdup_f64(-2.0);
|
||||
__mT coe_max = svdup_f64(1e19);
|
||||
|
||||
// sigr = COEF * sqrt(v_fac_a);
|
||||
__mT sigr = svmul_f64_z(pg,COEF_TELE,svsqrt_f64_z(pg,v_fac_a));
|
||||
// sigt = COEF * sqrt(v_fac_b);
|
||||
__mT sigt = svmul_f64_z(pg,COEF_TELE,svsqrt_f64_z(pg,v_fac_b));
|
||||
// sigp = COEF * sqrt(v_fac_c);
|
||||
__mT sigp = svmul_f64_z(pg,COEF_TELE,svsqrt_f64_z(pg,v_fac_c));
|
||||
|
||||
// coe = 1.0 / (sigr/D + sigt/D + sigz/D);
|
||||
__mT coe = svdiv_f64_z(pg, v_1, svadd_f64_z(pg, svadd_f64_z(pg, svmul_f64_z(pg, sigr, DR_inv), svmul_f64_z(pg, sigt, DT_inv)), svmul_f64_z(pg, sigp, DP_inv)));
|
||||
// coe becomes inf as sig* goes to 0
|
||||
// if coe > 1e19, set coe = 1e19
|
||||
coe = svmin_f64_z(pg, coe, coe_max);
|
||||
|
||||
// Htau = v_fac_a * square((v_pr1 + v_pr2)*0.5);
|
||||
__mT Htau = svmul_f64_z(pg, v_fac_a, my_square_v(pg, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pr1, v_pr2))));
|
||||
// Htau += v_fac_b * square((v_pt1 + v_pt2)*0.5))
|
||||
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_b, my_square_v(pg, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2)))));
|
||||
// Htau += v_fac_c * square((v_pp1 + v_pp2)*0.5))
|
||||
Htau = svadd_f64_z(pg, Htau, svmul_f64_z(pg, v_fac_c, my_square_v(pg, svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2)))));
|
||||
|
||||
// tmp1 = (v_pt1 + v_pt2)*0.5
|
||||
__mT tmp1 = svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pt1, v_pt2));
|
||||
// tmp2 = (v_pp1 + v_pp2)*0.5
|
||||
__mT tmp2 = svmul_f64_z(pg, v_half, svadd_f64_z(pg, v_pp1, v_pp2));
|
||||
// tmp3 = -2.0 * v_fac_f * tmp1 * tmp2;
|
||||
__mT tmp3 = svmul_f64_z(pg, v_m2, svmul_f64_z(pg, v_fac_f, svmul_f64_z(pg, tmp1, tmp2)));
|
||||
|
||||
// Htau = sqrt(Htau + tmp3); // # becamse nan as Htau - tmp3 goes to 0
|
||||
Htau = svsqrt_f64_z(pg, svadd_f64_z(pg, Htau, tmp3));
|
||||
|
||||
// tmp = (sigr*(v_pr2 - v_pr1) + sigt*(v_pt2 - v_pt1) + sigz*(v_pp2 - v_pp1))*0.5
|
||||
__mT tmp = svmul_f64_z(pg, v_half, svadd_f64_z(pg, \
|
||||
svadd_f64_z(pg, \
|
||||
svmul_f64_z(pg, sigr, svsub_f64_z(pg, v_pr2, v_pr1)), \
|
||||
svmul_f64_z(pg, sigt, svsub_f64_z(pg, v_pt2, v_pt1))\
|
||||
), \
|
||||
svmul_f64_z(pg, sigp, svsub_f64_z(pg, v_pp2, v_pp1))\
|
||||
)\
|
||||
);
|
||||
|
||||
// v_tau += coe * ((v_fun - Htau) + tmp) if mask is true
|
||||
v_tau = svadd_f64_z(pg, v_tau, svmul_f64_z(pg, coe, svadd_f64_z(pg, svsub_f64_z(pg, v_fun, Htau), tmp)));
|
||||
// mask = v_change != 1.0
|
||||
svbool_t mask = svcmpne_f64(pg, v_change, v_1);
|
||||
// v_tau = v_1 if mask is true (!= 1.0)
|
||||
v_tau = svsel_f64(mask, v_1, v_tau);
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
// function for calculating the values on boundaries
|
||||
inline void calculate_boundary_nodes_tele_simd(
|
||||
#if USE_ARM_SVE
|
||||
svbool_t const& pg,
|
||||
#endif
|
||||
__mT const& v_iip, __mT const& v_jjt, __mT const& v_kkr,
|
||||
__mT & v_c__,
|
||||
__mT const& v_p__, __mT const& v_m__, __mT const& v__p_, __mT const& v__m_, __mT const& v___p, __mT const& v___m,
|
||||
__mT const& v_pp____, __mT const& v_mm____, __mT const& v___pp__, __mT const& v___mm__, __mT const& v_____pp, __mT const& v_____mm,
|
||||
__mT const& v_change,
|
||||
int const& loc_I, int const& loc_J, int const& loc_K
|
||||
){
|
||||
|
||||
#if USE_AVX512
|
||||
|
||||
// mask for bottom boundary (v_kkr == 0 && v_change == 1.0)
|
||||
__mmaskT mask_bot = _kand_maskT(\
|
||||
_mm512_cmp_pT_mask(v_kkr, v_0, _CMP_EQ_OQ), \
|
||||
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for top boundary (v_kkr == loc_K-1 && v_change == 1.0)
|
||||
__mmaskT mask_top = _kand_maskT(\
|
||||
_mm512_cmp_pT_mask(v_kkr, _mmT_set1_pT(loc_K-1), _CMP_EQ_OQ), \
|
||||
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for south boundary (v_jjt == 0 && v_change == 1.0)
|
||||
__mmaskT mask_south = _kand_maskT(\
|
||||
_mm512_cmp_pT_mask(v_jjt, v_0, _CMP_EQ_OQ), \
|
||||
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for north boundary (v_jjt == loc_J-1 && v_change == 1.0)
|
||||
__mmaskT mask_north = _kand_maskT(\
|
||||
_mm512_cmp_pT_mask(v_jjt, _mmT_set1_pT(loc_J-1), _CMP_EQ_OQ), \
|
||||
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for west boundary (v_iip == 0 && v_change == 1.0)
|
||||
__mmaskT mask_west = _kand_maskT(\
|
||||
_mm512_cmp_pT_mask(v_iip, v_0, _CMP_EQ_OQ), \
|
||||
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for east boundary (v_iip == loc_I-1 && v_change == 1.0)
|
||||
__mmaskT mask_east = _kand_maskT(\
|
||||
_mm512_cmp_pT_mask(v_iip, _mmT_set1_pT(loc_I-1), _CMP_EQ_OQ), \
|
||||
_mm512_cmp_pT_mask(v_change, v_1, _CMP_EQ_OQ));
|
||||
|
||||
// if mask_bot, v_c__ = max(2*v___p - v_____pp, v_____pp)
|
||||
v_c__ = _mm512_mask_blend_pT(mask_bot, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v___p), v_____pp), v_____pp));
|
||||
// if mask_top, v_c__ = max(2*v___m - v_____mm, v_____mm)
|
||||
v_c__ = _mm512_mask_blend_pT(mask_top, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v___m), v_____mm), v_____mm));
|
||||
// if mask_south, v_c__ = max(2*v__p_ - v___pp__, v___pp__)
|
||||
v_c__ = _mm512_mask_blend_pT(mask_south, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v__p_), v___pp__), v___pp__));
|
||||
// if mask_north, v_c__ = max(2*v__m_ - v___mm__, v___mm__)
|
||||
v_c__ = _mm512_mask_blend_pT(mask_north, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v__m_), v___mm__), v___mm__));
|
||||
// if mask_west, v_c__ = max(2*v_p__ - v_pp____, v_pp____)
|
||||
v_c__ = _mm512_mask_blend_pT(mask_west, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v_p__), v_pp____), v_pp____));
|
||||
// if mask_east, v_c__ = max(2*v_m__ - v_mm____, v_mm____)
|
||||
v_c__ = _mm512_mask_blend_pT(mask_east, v_c__, _mmT_max_pT(_mmT_sub_pT(_mmT_mul_pT(v_2, v_m__), v_mm____), v_mm____));
|
||||
#elif USE_AVX
|
||||
|
||||
// mask for bottom boundary (v_kkr == 0 && v_change == 1.0)
|
||||
__m256d mask_bot = _mm256_and_pd(\
|
||||
_mm256_cmp_pT(v_kkr, v_0, _CMP_EQ_OQ), \
|
||||
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for top boundary (v_kkr == loc_K-1 && v_change == 1.0)
|
||||
__m256d mask_top = _mm256_and_pd(\
|
||||
_mm256_cmp_pT(v_kkr, _mmT_set1_pT(loc_K-1), _CMP_EQ_OQ), \
|
||||
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for south boundary (v_jjt == 0 && v_change == 1.0)
|
||||
__m256d mask_south = _mm256_and_pd(\
|
||||
_mm256_cmp_pT(v_jjt, v_0, _CMP_EQ_OQ), \
|
||||
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for north boundary (v_jjt == loc_J-1 && v_change == 1.0)
|
||||
__m256d mask_north = _mm256_and_pd(\
|
||||
_mm256_cmp_pT(v_jjt, _mmT_set1_pT(loc_J-1), _CMP_EQ_OQ), \
|
||||
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for west boundary (v_iip == 0 && v_change == 1.0)
|
||||
__m256d mask_west = _mm256_and_pd(\
|
||||
_mm256_cmp_pT(v_iip, v_0, _CMP_EQ_OQ), \
|
||||
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
|
||||
// mask for east boundary (v_iip == loc_I-1 && v_change == 1.0)
|
||||
__m256d mask_east = _mm256_and_pd(\
|
||||
_mm256_cmp_pT(v_iip, _mmT_set1_pT(loc_I-1), _CMP_EQ_OQ), \
|
||||
_mm256_cmp_pT(v_change, v_1, _CMP_EQ_OQ));
|
||||
// if mask_bot, v_c__ = max(2*v___p - v_____pp, v_____pp)
|
||||
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v___p), v_____pp), v_____pp), mask_bot);
|
||||
// if mask_top, v_c__ = max(2*v___m - v_____mm, v_____mm)
|
||||
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v___m), v_____mm), v_____mm), mask_top);
|
||||
// if mask_south, v_c__ = max(2*v__p_ - v___pp__, v___pp__)
|
||||
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v__p_), v___pp__), v___pp__), mask_south);
|
||||
// if mask_north, v_c__ = max(2*v__m_ - v___mm__, v___mm__)
|
||||
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v__m_), v___mm__), v___mm__), mask_north);
|
||||
// if mask_west, v_c__ = max(2*v_p__ - v_pp____, v_pp____)
|
||||
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v_p__), v_pp____), v_pp____), mask_west);
|
||||
// if mask_east, v_c__ = max(2*v_m__ - v_mm____, v_mm____)
|
||||
v_c__ = _mm256_blendv_pd(v_c__, _mm256_max_pd(_mm256_sub_pd(_mm256_mul_pd(v_2, v_m__), v_mm____), v_mm____), mask_east);
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
|
||||
svfloat64_t v_0 = svdup_f64(0.0);
|
||||
svfloat64_t v_1 = svdup_f64(1.0);
|
||||
svfloat64_t v_2 = svdup_f64(2.0);
|
||||
svfloat64_t v_loc_I_minus_1 = svdup_f64(loc_I-1);
|
||||
svfloat64_t v_loc_J_minus_1 = svdup_f64(loc_J-1);
|
||||
svfloat64_t v_loc_K_minus_1 = svdup_f64(loc_K-1);
|
||||
|
||||
|
||||
// mask for bottom boundary
|
||||
svbool_t mask_bot = svand_b_z(pg, \
|
||||
svcmpeq_f64(pg, v_kkr, v_0), \
|
||||
svcmpeq_f64(pg, v_change, v_1));
|
||||
// mask for top boundary
|
||||
svbool_t mask_top = svand_b_z(pg, \
|
||||
svcmpeq_f64(pg, v_kkr, v_loc_K_minus_1), \
|
||||
svcmpeq_f64(pg, v_change, v_1));
|
||||
// mask for south boundary
|
||||
svbool_t mask_south = svand_b_z(pg, \
|
||||
svcmpeq_f64(pg, v_jjt, v_0), \
|
||||
svcmpeq_f64(pg, v_change, v_1));
|
||||
// mask for north boundary
|
||||
svbool_t mask_north = svand_b_z(pg, \
|
||||
svcmpeq_f64(pg, v_jjt, v_loc_J_minus_1), \
|
||||
svcmpeq_f64(pg, v_change, v_1));
|
||||
// mask for west boundary
|
||||
svbool_t mask_west = svand_b_z(pg, \
|
||||
svcmpeq_f64(pg, v_iip, v_0), \
|
||||
svcmpeq_f64(pg, v_change, v_1));
|
||||
// mask for east boundary
|
||||
svbool_t mask_east = svand_b_z(pg, \
|
||||
svcmpeq_f64(pg, v_iip, v_loc_I_minus_1), \
|
||||
svcmpeq_f64(pg, v_change, v_1));
|
||||
|
||||
// if mask_bot, v_c__ = max(2*v___p - v_____pp, v_____pp)
|
||||
v_c__ = svsel_f64(mask_bot, \
|
||||
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v___p), v_____pp), v_____pp), \
|
||||
v_c__);
|
||||
// if mask_top, v_c__ = max(2*v___m - v_____mm, v_____mm)
|
||||
v_c__ = svsel_f64(mask_top, \
|
||||
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v___m), v_____mm), v_____mm), \
|
||||
v_c__);
|
||||
// if mask_south, v_c__ = max(2*v__p_ - v___pp__, v___pp__)
|
||||
v_c__ = svsel_f64(mask_south, \
|
||||
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v__p_), v___pp__), v___pp__), \
|
||||
v_c__);
|
||||
// if mask_north, v_c__ = max(2*v__m_ - v___mm__, v___mm__)
|
||||
v_c__ = svsel_f64(mask_north, \
|
||||
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v__m_), v___mm__), v___mm__), \
|
||||
v_c__);
|
||||
// if mask_west, v_c__ = max(2*v_p__ - v_pp____, v_pp____)
|
||||
v_c__ = svsel_f64(mask_west, \
|
||||
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v_p__), v_pp____), v_pp____), \
|
||||
v_c__);
|
||||
// if mask_east, v_c__ = max(2*v_m__ - v_mm____, v_mm____)
|
||||
v_c__ = svsel_f64(mask_east, \
|
||||
svmax_f64_z(pg, svsub_f64_z(pg, svmul_f64_z(pg, v_2, v_m__), v_mm____), v_mm____), \
|
||||
v_c__);
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if USE_AVX512 || USE_AVX
|
||||
|
||||
inline __mT load_mem_gen_to_mTd(CUSTOMREAL* a, int* ijk){
|
||||
|
||||
CUSTOMREAL dump_[NSIMD];
|
||||
for (int i=0; i<NSIMD; i++){
|
||||
dump_[i] = a[ijk[i]];
|
||||
}
|
||||
|
||||
return _mmT_loadu_pT(dump_);
|
||||
}
|
||||
|
||||
inline __mT load_mem_bool_to_mTd(bool* a, int* ijk){
|
||||
|
||||
CUSTOMREAL dump_[NSIMD];
|
||||
for (int i=0; i<NSIMD; i++){
|
||||
dump_[i] = (CUSTOMREAL) a[ijk[i]];
|
||||
}
|
||||
|
||||
return _mmT_loadu_pT(dump_);
|
||||
}
|
||||
|
||||
#elif USE_ARM_SVE
|
||||
|
||||
inline __mT load_mem_gen_to_mTd(svbool_t const& pg, CUSTOMREAL* a, uint64_t* ijk){
|
||||
svuint64_t v_ijk = svld1_u64(pg, ijk);
|
||||
return svld1_gather_u64index_f64(pg, a, v_ijk);
|
||||
}
|
||||
|
||||
inline __mT load_mem_bool_to_mTd(svbool_t const& pg, bool* a, int* ijk){
|
||||
CUSTOMREAL dump_[NSIMD];
|
||||
|
||||
for (int i=0; i<NSIMD; i++){
|
||||
dump_[i] = (CUSTOMREAL) a[ijk[i]];
|
||||
}
|
||||
|
||||
return svld1_f64(pg, dump_); // change this to gather load
|
||||
}
|
||||
|
||||
#endif // __ARM_FEATURE_SVE
|
||||
|
||||
#endif // USE_SIMD
|
||||
|
||||
|
||||
|
||||
#endif // VECTORIZATED_SWEEP_H
|
||||
10
include/version.h.in
Normal file
10
include/version.h.in
Normal file
@@ -0,0 +1,10 @@
|
||||
#ifndef VERSION_H
|
||||
#define VERSION_H
|
||||
|
||||
#define PROJECT_VERSION_MAJOR @PROJECT_VERSION_MAJOR@
|
||||
#define PROJECT_VERSION_MINOR @PROJECT_VERSION_MINOR@
|
||||
#define PROJECT_VERSION_PATCH @PROJECT_VERSION_PATCH@
|
||||
|
||||
#define GIT_COMMIT "@GIT_COMMIT_HASH@"
|
||||
|
||||
#endif // VERSION_H
|
||||
Reference in New Issue
Block a user