#ifndef SMOOTH_H #define SMOOTH_H #include #include "grid.h" #include "config.h" #include "utils.h" void calc_inversed_laplacian(CUSTOMREAL* d, CUSTOMREAL* Ap, const int i, const int j, const int k, const CUSTOMREAL lr, const CUSTOMREAL lt, const CUSTOMREAL lp, const CUSTOMREAL dr, const CUSTOMREAL dt, const CUSTOMREAL dp) { // calculate inversed laplacian operator CUSTOMREAL termx = _0_CR, termy = _0_CR, termz = _0_CR; if (i==0) { termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i+1,j,k)])); } else if (i==loc_I-1) { termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i-1,j,k)])); } else { termx = dp*lp/3.0 * (1/(lp*dp)*(d[I2V(i,j,k)]) - lp/(dp*dp*dp)*(-2.0*d[I2V(i,j,k)]+d[I2V(i-1,j,k)]+d[I2V(i+1,j,k)])); } if (j==0) { termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j+1,k)])); } else if (j==loc_J-1) { termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j-1,k)])); } else { termy = dt*lt/3.0 * (1/(lt*dt)*(d[I2V(i,j,k)]) - lt/(dt*dt*dt)*(-2.0*d[I2V(i,j,k)]+d[I2V(i,j-1,k)]+d[I2V(i,j+1,k)])); } if (k==0) { termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j,k+1)])); } else if (k==loc_K-1) { termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+2.0*d[I2V(i,j,k-1)])); } else { termz = dr*lr/3.0 * (1/(lr*dr)*(d[I2V(i,j,k)]) - lr/(dr*dr*dr)*(-2.0*d[I2V(i,j,k)]+d[I2V(i,j,k-1)]+d[I2V(i,j,k+1)])); } Ap[I2V(i,j,k)] = termx+termy+termz; } void CG_smooth(Grid& grid, CUSTOMREAL* arr_in, CUSTOMREAL* arr_out, CUSTOMREAL lr, CUSTOMREAL lt, CUSTOMREAL lp) { // arr: array to be smoothed // lr: smooth length on r // lt: smooth length on theta // lp: smooth length on phi // arrays : // x_array model // g_array gradient // d_array descent direction // Ap stiffness scalar (laplacian) // pAp = d_array*Ap // rr = g_array * g_array (dot product) const int max_iter_cg = 1000; const CUSTOMREAL xtol = 0.001; const bool use_scaling = true; CUSTOMREAL dr=grid.dr, dt=grid.dt, dp=grid.dp; // in km, rad, rad //CUSTOMREAL dr=_1_CR,dt=_1_CR,dp=_1_CR; //debug std::cout << "dr, dt, dp, lr, lt, lp = " << dr << " " << dt << " " << dp << " " << lr << " " << lt << " " << lp << std::endl; // allocate memory CUSTOMREAL* x_array = allocateMemory(loc_I*loc_J*loc_K, 3000); CUSTOMREAL* r_array = allocateMemory(loc_I*loc_J*loc_K, 3001); CUSTOMREAL* p_array = allocateMemory(loc_I*loc_J*loc_K, 3002); CUSTOMREAL* Ap = allocateMemory(loc_I*loc_J*loc_K, 3003); CUSTOMREAL pAp=_0_CR, rr_0=_0_CR, rr=_0_CR, rr_new=_0_CR, aa=_0_CR, bb=_0_CR, tmp=_0_CR; CUSTOMREAL scaling_A=_1_CR, scaling_coeff = _1_CR; if (use_scaling) { // calculate scaling factor //scaling_A = std::sqrt(_1_CR / (_8_CR * PI * lr * lt * lp)); // scaling coefficient for gradient scaling_coeff = find_absmax(arr_in, loc_I*loc_J*loc_K); tmp = scaling_coeff; allreduce_cr_single_max(tmp, scaling_coeff); //if (scaling_coeff == _0_CR) if (isZero(scaling_coeff)) scaling_coeff = _1_CR; } // std out scaling factors if (myrank == 0) { std::cout << "scaling_A = " << scaling_A << std::endl; std::cout << "scaling_coeff = " << scaling_coeff << std::endl; } // array initialization for (int i=0; ir.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)])) { kdr = ii_invr; r_r = calc_ratio_between(r_glob, grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r.arr[I2V_INV_GRIDS_1DK(ii_invr+1,i_grid)]); break; } } r_r_ani = -_1_CR; for (int ii_invr = 0; ii_invr < n_inv_K_loc_ani-1; ii_invr++){ // increasing or decreasing order if (in_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)])) { kdr_ani = ii_invr; r_r_ani = calc_ratio_between(r_glob, grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr,i_grid)], grid.inv_grid->r_ani.arr[I2V_INV_ANI_GRIDS_1DK(ii_invr+1,i_grid)]); break; } } // depth model update taper CUSTOMREAL depth = radius2depth(r_glob); if (depth < taper[0]) { // weight = 0; weight = _0_CR; } else if (depth < taper[1]) { weight = (_1_CR - std::cos(PI*(depth - taper[0])/(taper[1] - taper[0]))) / _2_CR; } else { weight = _1_CR; } // continue if r is out of the inversion grid if (r_r < _0_CR || r_r_ani < _0_CR) continue; for (int j = j_start; j < j_end; j++) { CUSTOMREAL t_glob = grid.get_lat_min() + j*grid.get_delta_lat(); r_t = -_1_CR; for (int ii_invt = 0; ii_invt < n_inv_J_loc-1; ii_invt++){ CUSTOMREAL left = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt, kdr+1,i_grid)]*(r_r); CUSTOMREAL right = grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->t.arr[I2V_INV_GRIDS_2DJ(ii_invt+1,kdr+1,i_grid)]*(r_r); if (in_between(t_glob, left, right)) { jdt = ii_invt; r_t = calc_ratio_between(t_glob, left, right); break; } } r_t_ani = -_1_CR; for (int ii_invt = 0; ii_invt < n_inv_J_loc_ani-1; ii_invt++){ CUSTOMREAL left = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt, kdr_ani+1,i_grid)]*(r_r_ani); CUSTOMREAL right = grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->t_ani.arr[I2V_INV_ANI_GRIDS_1DJ(ii_invt+1,kdr_ani+1,i_grid)]*(r_r_ani); if (in_between(t_glob, left, right)) { jdt_ani = ii_invt; r_t_ani = calc_ratio_between(t_glob, left, right); break; } } // continue if t is out of the inversion grid if (r_t < _0_CR || r_t_ani < _0_CR) continue; for (int i = i_start; i < i_end; i++) { CUSTOMREAL p_glob = grid.get_lon_min() + i*grid.get_delta_lon(); r_p = -_1_CR; for (int ii_invp = 0; ii_invp < n_inv_I_loc-1; ii_invp++){ CUSTOMREAL left = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp, kdr+1,i_grid)]*(r_r); CUSTOMREAL right = grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr,i_grid)]*(1-r_r) + grid.inv_grid->p.arr[I2V_INV_GRIDS_2DI(ii_invp+1,kdr+1,i_grid)]*(r_r); if (in_between(p_glob, left, right)) { idp = ii_invp; r_p = calc_ratio_between(p_glob, left, right); break; } } r_p_ani = -_1_CR; for (int ii_invp = 0; ii_invp < n_inv_I_loc_ani-1; ii_invp++){ CUSTOMREAL left = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp, kdr_ani+1,i_grid)]*(r_r_ani); CUSTOMREAL right = grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani,i_grid)]*(1-r_r_ani) + grid.inv_grid->p_ani.arr[I2V_INV_ANI_GRIDS_1DI(ii_invp+1,kdr_ani+1,i_grid)]*(r_r_ani); if (in_between(p_glob, left, right)) { idp_ani = ii_invp; r_p_ani = calc_ratio_between(p_glob, left, right); break; } } // continue if p is out of the inversion grid if (r_p < _0_CR || r_p_ani < _0_CR) continue; // global grid id to local id int k_loc = k - grid.get_offset_k(); int j_loc = j - grid.get_offset_j(); int i_loc = i - grid.get_offset_i(); // check if *_loc are inside the local subdomain if (k_loc < 0 || k_loc > loc_K-1) continue; if (j_loc < 0 || j_loc > loc_J-1) continue; if (i_loc < 0 || i_loc > loc_I-1) continue; CUSTOMREAL pert_Ks = 0.0; CUSTOMREAL pert_Keta = 0.0; CUSTOMREAL pert_Kxi = 0.0; CUSTOMREAL pert_Ks_density = 0.0; CUSTOMREAL pert_Keta_density = 0.0; CUSTOMREAL pert_Kxi_density = 0.0; pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)]; pert_Keta += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)]; pert_Kxi += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)]; pert_Ks_density += (_1_CR-r_r)*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt,kdr)]; pert_Keta_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)]; pert_Kxi_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani)]; pert_Ks += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)]; pert_Keta += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)]; pert_Kxi += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)]; pert_Ks_density += r_r*(_1_CR-r_t)*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt,kdr+1)]; pert_Keta_density += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)]; pert_Kxi_density += r_r_ani*(_1_CR-r_t_ani)*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani,kdr_ani+1)]; pert_Ks += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)]; pert_Keta += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)]; pert_Kxi += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)]; pert_Ks_density += (_1_CR-r_r)*r_t*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr)]; pert_Keta_density = (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)]; pert_Kxi_density += (_1_CR-r_r_ani)*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani)]; pert_Ks += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)]; pert_Keta += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)]; pert_Kxi += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)]; pert_Ks_density += (_1_CR-r_r)*(_1_CR-r_t)*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr)]; pert_Keta_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)]; pert_Kxi_density += (_1_CR-r_r_ani)*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani)]; pert_Ks += r_r*r_t*(_1_CR-r_p)*grid.Ks_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)]; pert_Keta += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)]; pert_Kxi += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)]; pert_Ks_density += r_r*r_t*(_1_CR-r_p)*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp,jdt+1,kdr+1)]; pert_Keta_density += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)]; pert_Kxi_density += r_r_ani*r_t_ani*(_1_CR-r_p_ani)*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani,jdt_ani+1,kdr_ani+1)]; pert_Ks += r_r*(_1_CR-r_t)*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)]; pert_Keta += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)]; pert_Kxi += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)]; pert_Ks_density += r_r*(_1_CR-r_t)*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt,kdr+1)]; pert_Keta_density += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)]; pert_Kxi_density += r_r_ani*(_1_CR-r_t_ani)*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani,kdr_ani+1)]; pert_Ks += (_1_CR-r_r)*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)]; pert_Keta += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)]; pert_Kxi += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)]; pert_Ks_density += (_1_CR-r_r)*r_t*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr)]; pert_Keta_density += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)]; pert_Kxi_density += (_1_CR-r_r_ani)*r_t_ani*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani)]; pert_Ks += r_r*r_t*r_p*grid.Ks_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)]; pert_Keta += r_r_ani*r_t_ani*r_p_ani*grid.Keta_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)]; pert_Kxi += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)]; pert_Ks_density += r_r*r_t*r_p*grid.Ks_density_inv_loc[ I2V_INV_KNL(idp+1,jdt+1,kdr+1)]; pert_Keta_density += r_r_ani*r_t_ani*r_p_ani*grid.Keta_density_inv_loc[I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)]; pert_Kxi_density += r_r_ani*r_t_ani*r_p_ani*grid.Kxi_density_inv_loc[ I2V_INV_ANI_KNL(idp_ani+1,jdt_ani+1,kdr_ani+1)]; // update para grid.Ks_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Ks; grid.Keta_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Keta; grid.Kxi_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Kxi; grid.Ks_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Ks_density; grid.Keta_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Keta_density; grid.Kxi_density_update_loc[ I2V(i_loc,j_loc,k_loc)] += weight * pert_Kxi_density; } // end for i } // end for j } // end for k } // end i_grid // // rescale kernel update to -1 ~ 1 // // get the scaling factor CUSTOMREAL Linf_Ks = _0_CR, Linf_Keta = _0_CR, Linf_Kxi = _0_CR; // CUSTOMREAL Linf_Kden = _0_CR; for (int k = 1; k < loc_K-1; k++) { for (int j = 1; j < loc_J-1; j++) { for (int i = 1; i < loc_I-1; i++) { Linf_Ks = std::max(Linf_Ks, std::abs(grid.Ks_update_loc[I2V(i,j,k)])); Linf_Keta = std::max(Linf_Keta, std::abs(grid.Keta_update_loc[I2V(i,j,k)])); Linf_Kxi = std::max(Linf_Kxi, std::abs(grid.Kxi_update_loc[I2V(i,j,k)])); } } } // get the maximum scaling factor among subdomains CUSTOMREAL Linf_tmp; allreduce_cr_single_max(Linf_Ks, Linf_tmp); Linf_Ks = Linf_tmp; allreduce_cr_single_max(Linf_Keta, Linf_tmp); Linf_Keta = Linf_tmp; allreduce_cr_single_max(Linf_Kxi, Linf_tmp); Linf_Kxi = Linf_tmp; CUSTOMREAL Linf_all = _0_CR; Linf_all = std::max(Linf_Ks, std::max(Linf_Keta, Linf_Kxi)); Linf_Ks = Linf_all; Linf_Keta = Linf_all; Linf_Kxi = Linf_all; // rescale the kernel update for (int k = 0; k < loc_K; k++) { for (int j = 0; j < loc_J; j++) { for (int i = 0; i < loc_I; i++) { grid.Ks_update_loc[I2V(i,j,k)] /= Linf_Ks; grid.Keta_update_loc[I2V(i,j,k)] /= Linf_Keta; grid.Kxi_update_loc[I2V(i,j,k)] /= Linf_Kxi; } } } } #endif