TomoATT/include/model_update.h

#ifndef MODEL_UPDATE_H
#define MODEL_UPDATE_H

#include <cmath>
#include "config.h"
#include "grid.h"
#include "smooth.h"
#include "smooth_descent_dir.h"
#include "smooth_grad_regul.h"
#include "lbfgs.h"


// generate smoothed kernels (K*_update_loc) from the kernels (K*_loc)
// before doing this, K*_loc should be summed up among all simultaneous runs (by calling sumup_kernels)
// before doing this, K*_update_loc has no meaning (unavailable)
void smooth_kernels(Grid& grid, InputParams& IP) {

    if (subdom_main){ // parallel level 3

        if (id_sim==0){ // parallel level 1
            // initiaize update params
            for (int k = 0; k < loc_K; k++) {
                for (int j = 0; j < loc_J; j++) {
                    for (int i = 0; i < loc_I; i++) {
                        grid.Ks_update_loc_previous[I2V(i,j,k)]   = grid.Ks_update_loc[I2V(i,j,k)];
                        grid.Keta_update_loc_previous[I2V(i,j,k)] = grid.Keta_update_loc[I2V(i,j,k)];
                        grid.Kxi_update_loc_previous[I2V(i,j,k)]  = grid.Kxi_update_loc[I2V(i,j,k)];
                        grid.Ks_update_loc[I2V(i,j,k)]   = _0_CR;
                        grid.Keta_update_loc[I2V(i,j,k)] = _0_CR;
                        grid.Kxi_update_loc[I2V(i,j,k)]  = _0_CR;
                        // grid.Ks_density_update_loc[I2V(i,j,k)]  = _0_CR;
                        // grid.Kxi_density_update_loc[I2V(i,j,k)] = _0_CR;
                        // grid.Keta_density_update_loc[I2V(i,j,k)] = _0_CR;
                    }
                }
            }

            // check kernel
            CUSTOMREAL max_kernel = _0_CR;
            for (int k = 0; k < loc_K; k++) {
                for (int j = 0; j < loc_J; j++) {
                    for (int i = 0; i < loc_I; i++) {
                        max_kernel = std::max(max_kernel, std::abs(grid.Ks_loc[I2V(i,j,k)]));
                    }
                }
            }
            CUSTOMREAL tmp = 0;
            allreduce_cr_single_max(max_kernel, tmp);
            max_kernel = tmp;

            if (max_kernel <= eps) {
                std::cout << "Error: max_kernel is near zero (less than 10^-12), check data residual and whether no data is used" << std::endl;
                exit(1);
            }

            if (smooth_method == 0) {
                // Ks_loc, Keta_loc, Kxi_loc to be 0 for ghost layers to eliminate the effect of overlapping boundary
                for (int k = 0; k < loc_K; k++) {
                    for (int j = 0; j < loc_J; j++) {
                        for (int i = 0; i < loc_I; i++) {

                            if (i == 0 && !grid.i_first()) {
                                grid.Ks_loc[I2V(i,j,k)]         = _0_CR;
                                grid.Keta_loc[I2V(i,j,k)]       = _0_CR;
                                grid.Kxi_loc[I2V(i,j,k)]        = _0_CR;
                                grid.Ks_density_loc[I2V(i,j,k)]     = _0_CR;
                                grid.Kxi_density_loc[I2V(i,j,k)]    = _0_CR;
                                grid.Keta_density_loc[I2V(i,j,k)]   = _0_CR;
                            }
                            if (i == loc_I-1 && !grid.i_last()) {
                                grid.Ks_loc[I2V(i,j,k)]         = _0_CR;
                                grid.Keta_loc[I2V(i,j,k)]       = _0_CR;
                                grid.Kxi_loc[I2V(i,j,k)]        = _0_CR;
                                grid.Ks_density_loc[I2V(i,j,k)]     = _0_CR;
                                grid.Kxi_density_loc[I2V(i,j,k)]    = _0_CR;
                                grid.Keta_density_loc[I2V(i,j,k)]   = _0_CR;
                            }
                            if (j == 0 && !grid.j_first()) {
                                grid.Ks_loc[I2V(i,j,k)]         = _0_CR;
                                grid.Keta_loc[I2V(i,j,k)]       = _0_CR;
                                grid.Kxi_loc[I2V(i,j,k)]        = _0_CR;
                                grid.Ks_density_loc[I2V(i,j,k)]     = _0_CR;
                                grid.Kxi_density_loc[I2V(i,j,k)]    = _0_CR;
                                grid.Keta_density_loc[I2V(i,j,k)]   = _0_CR;
                            }
                            if (j == loc_J-1 && !grid.j_last()) {
                                grid.Ks_loc[I2V(i,j,k)]         = _0_CR;
                                grid.Keta_loc[I2V(i,j,k)]       = _0_CR;
                                grid.Kxi_loc[I2V(i,j,k)]        = _0_CR;
                                grid.Ks_density_loc[I2V(i,j,k)]     = _0_CR;
                                grid.Kxi_density_loc[I2V(i,j,k)]    = _0_CR;
                                grid.Keta_density_loc[I2V(i,j,k)]   = _0_CR;
                            }
                            if (k == 0 && !grid.k_first()) {
                                grid.Ks_loc[I2V(i,j,k)]         = _0_CR;
                                grid.Keta_loc[I2V(i,j,k)]       = _0_CR;
                                grid.Kxi_loc[I2V(i,j,k)]        = _0_CR;
                                grid.Ks_density_loc[I2V(i,j,k)]     = _0_CR;
                                grid.Kxi_density_loc[I2V(i,j,k)]    = _0_CR;
                                grid.Keta_density_loc[I2V(i,j,k)]   = _0_CR;
                            }
                            if (k == loc_K-1 && !grid.k_last()) {
                                grid.Ks_loc[I2V(i,j,k)]         = _0_CR;
                                grid.Keta_loc[I2V(i,j,k)]       = _0_CR;
                                grid.Kxi_loc[I2V(i,j,k)]        = _0_CR;
                                grid.Ks_density_loc[I2V(i,j,k)]     = _0_CR;
                                grid.Kxi_density_loc[I2V(i,j,k)]    = _0_CR;
                                grid.Keta_density_loc[I2V(i,j,k)]   = _0_CR;
                            }
                        }
                    }
                }

                // grid based smoothing
                smooth_inv_kernels_orig(grid, IP);
            } else if (smooth_method == 1) {
                // CG smoothing
                smooth_inv_kernels_CG(grid, smooth_lr, smooth_lt, smooth_lp);
            }

            // shared values on the boundary
            grid.send_recev_boundary_data(grid.Ks_update_loc);
            grid.send_recev_boundary_data(grid.Keta_update_loc);
            grid.send_recev_boundary_data(grid.Kxi_update_loc);
            grid.send_recev_boundary_data(grid.Ks_density_update_loc);
            grid.send_recev_boundary_data(grid.Kxi_density_update_loc);
            grid.send_recev_boundary_data(grid.Keta_density_update_loc);

            grid.send_recev_boundary_data_kosumi(grid.Ks_update_loc);
            grid.send_recev_boundary_data_kosumi(grid.Keta_update_loc);
            grid.send_recev_boundary_data_kosumi(grid.Kxi_update_loc);
            grid.send_recev_boundary_data_kosumi(grid.Ks_density_update_loc);
            grid.send_recev_boundary_data_kosumi(grid.Kxi_density_update_loc);
            grid.send_recev_boundary_data_kosumi(grid.Keta_density_update_loc);

        } // end if id_sim == 0

        // send the updated model to all the simultaneous run
        broadcast_cr_inter_sim(grid.Ks_update_loc, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Kxi_update_loc, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Keta_update_loc, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Ks_density_update_loc, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Kxi_density_update_loc, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Keta_density_update_loc, loc_I*loc_J*loc_K, 0);

        // send the previous updated model to all the simultaneous run
        broadcast_cr_inter_sim(grid.Ks_update_loc_previous, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Kxi_update_loc_previous, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Keta_update_loc_previous, loc_I*loc_J*loc_K, 0);

    } // end if subdom_main

    synchronize_all_world();
}


// smooth gradient regularization term
void smooth_gradient_regularization(Grid& grid) {

    if (subdom_main && id_sim==0){ // only id_sim==0 has values for these arrays
        if (smooth_method == 0) {
            // grid based smoothing
            smooth_gradient_regularization_orig(grid);
        } else if (smooth_method == 1) {
            // CG smoothing
            smooth_gradient_regularization_CG(grid, smooth_lr, smooth_lt, smooth_lp);
        }

        grid.send_recev_boundary_data(grid.fun_gradient_regularization_penalty_loc);
        grid.send_recev_boundary_data(grid.eta_gradient_regularization_penalty_loc);
        grid.send_recev_boundary_data(grid.xi_gradient_regularization_penalty_loc);
    }
}


void smooth_descent_direction(Grid& grid){
    if (subdom_main){
       if (id_sim == 0) { // calculation of the update model is only done in the main simultaneous run
            if (smooth_method == 0) {
                // grid based smoothing
                smooth_descent_dir(grid);
            } else if (smooth_method == 1) {
                // CG smoothing
                smooth_descent_dir_CG(grid, smooth_lr, smooth_lt, smooth_lp);
            }

            // shared values on the boundary
            grid.send_recev_boundary_data(  grid.Ks_descent_dir_loc);
            grid.send_recev_boundary_data(grid.Keta_descent_dir_loc);
            grid.send_recev_boundary_data( grid.Kxi_descent_dir_loc);

        } // end if id_sim == 0

        // send the updated model to all the simultaneous run
        broadcast_cr_inter_sim(  grid.Ks_descent_dir_loc, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim( grid.Kxi_descent_dir_loc, loc_I*loc_J*loc_K, 0);
        broadcast_cr_inter_sim(grid.Keta_descent_dir_loc, loc_I*loc_J*loc_K, 0);

    }
}


void calc_descent_direction(Grid& grid, int i_inv, InputParams& IP) {

    if (subdom_main) {

        // routines for LBFGS
        if (optim_method == LBFGS_MODE) {

            // calculate the descent direction
            if (i_inv > 0) {
                calculate_descent_direction_lbfgs(grid, i_inv);
            // use gradient for the first iteration
            } else {
                int n_grid = loc_I*loc_J*loc_K;

                // first time, descent direction = - precond * gradient
                // inverse the gradient to fit the update scheme for LBFGS
                for (int i = 0; i < n_grid; i++){
                    grid.Ks_descent_dir_loc[i]   = - _1_CR* grid.Ks_update_loc[i];
                    grid.Keta_descent_dir_loc[i] = - _1_CR* grid.Keta_update_loc[i];
                    grid.Kxi_descent_dir_loc[i]  = - _1_CR* grid.Kxi_update_loc[i];
                    //grid.Ks_descent_dir_loc[i]   = - _1_CR* grid.Ks_loc[i];
                    //grid.Keta_descent_dir_loc[i] = - _1_CR* grid.Keta_loc[i];
                    //grid.Kxi_descent_dir_loc[i]  = - _1_CR* grid.Kxi_loc[i];

                }
            }
        } else {
            // return error
            std::cout << "Error: optim_method is not set to LBFGS_MODE (=2)" << std::endl;
            exit(1);
        }


    } // end if subdom_main

    synchronize_all_world();

}

CUSTOMREAL direction_change_of_model_update(Grid& grid){
    CUSTOMREAL norm_grad = _0_CR;
    CUSTOMREAL norm_grad_previous = _0_CR;
    CUSTOMREAL inner_product = _0_CR;
    CUSTOMREAL cos_angle = _0_CR;
    CUSTOMREAL angle = _0_CR;
    if (subdom_main) {
        // initiaize update params
        inner_product      = dot_product(grid.Ks_update_loc_previous, grid.Ks_update_loc, loc_I*loc_J*loc_K);
        norm_grad          = dot_product(grid.Ks_update_loc, grid.Ks_update_loc, loc_I*loc_J*loc_K);
        norm_grad_previous = dot_product(grid.Ks_update_loc_previous, grid.Ks_update_loc_previous, loc_I*loc_J*loc_K);

        CUSTOMREAL tmp;
        allreduce_cr_single(inner_product,tmp);
        inner_product = tmp;

        allreduce_cr_single(norm_grad,tmp);
        norm_grad = tmp;

        allreduce_cr_single(norm_grad_previous,tmp);
        norm_grad_previous = tmp;

        cos_angle = inner_product / (std::sqrt(norm_grad) * std::sqrt(norm_grad_previous));
        angle     = acos(cos_angle) * RAD2DEG;
    }

    return angle;
}


void set_new_model(Grid& grid, CUSTOMREAL step_length_new, bool init_bfgs=false) {

    if (subdom_main) {

        // for LBFGS mode. K*_update_loc is not directly the descent direction but smoothed gradient
        // so for non LBFGS_mode, nextstep will be calculated with K*_update_loc
        if (optim_method != LBFGS_MODE) {
            // // get the scaling factor
            // CUSTOMREAL Linf_Ks = _0_CR, Linf_Keta = _0_CR, Linf_Kxi = _0_CR;
            // for (int k = 1; k < loc_K-1; k++) {
            //     for (int j = 1; j < loc_J-1; j++) {
            //         for (int i = 1; i < loc_I-1; i++) {
            //             Linf_Ks   = std::max(Linf_Ks,   std::abs(grid.Ks_update_loc[I2V(i,j,k)]));
            //             Linf_Keta = std::max(Linf_Keta, std::abs(grid.Keta_update_loc[I2V(i,j,k)]));
            //             Linf_Kxi  = std::max(Linf_Kxi,  std::abs(grid.Kxi_update_loc[I2V(i,j,k)]));
            //         }
            //     }
            // }

            // // get the maximum scaling factor among subdomains
            // CUSTOMREAL Linf_tmp;
            // allreduce_cr_single_max(Linf_Ks, Linf_tmp);   Linf_Ks = Linf_tmp;
            // allreduce_cr_single_max(Linf_Keta, Linf_tmp); Linf_Keta = Linf_tmp;
            // allreduce_cr_single_max(Linf_Kxi, Linf_tmp);  Linf_Kxi = Linf_tmp;

            // CUSTOMREAL Linf_all = _0_CR;
            // Linf_all = std::max(Linf_Ks, std::max(Linf_Keta, Linf_Kxi));

            // // if (myrank == 0 && id_sim == 0)
            // //    std::cout << "Scaling factor for all kernels: " << Linf_all << std::endl;
            // //    std::cout << "Scaling factor for model update for Ks, Keta, Kx, stepsize: " << Linf_Ks << ", " << Linf_Keta << ", " << Linf_Kxi << ", " << step_length_new << std::endl;


            // Linf_Ks = Linf_all;
            // Linf_Keta = Linf_all;
            // Linf_Kxi = Linf_all;

            // kernel update has been rescaled in "smooth_inv_kernels_orig"

            // update the model
            for (int k = 0; k < loc_K; k++) {
                for (int j = 0; j < loc_J; j++) {
                    for (int i = 0; i < loc_I; i++) {

                        // // update
                        // grid.fun_loc[I2V(i,j,k)] *= (_1_CR - grid.Ks_update_loc[I2V(i,j,k)  ] / (Linf_Ks   /step_length_new) );
                        // grid.xi_loc[I2V(i,j,k)]  -=          grid.Kxi_update_loc[I2V(i,j,k) ] / (Linf_Kxi  /step_length_new)  ;
                        // grid.eta_loc[I2V(i,j,k)] -=          grid.Keta_update_loc[I2V(i,j,k)] / (Linf_Keta /step_length_new)  ;

                        // update
                        grid.fun_loc[I2V(i,j,k)] *= (_1_CR - grid.Ks_update_loc[I2V(i,j,k)  ] * step_length_new);
                        grid.xi_loc[I2V(i,j,k)]  -=          grid.Kxi_update_loc[I2V(i,j,k) ] * step_length_new;
                        grid.eta_loc[I2V(i,j,k)] -=          grid.Keta_update_loc[I2V(i,j,k)] * step_length_new;


                        // grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
                        // grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
                        // grid.fac_f_loc[I2V(i,j,k)] =       - _2_CR * grid.eta_loc[I2V(i,j,k)];

                        // grid.fun_loc[I2V(i,j,k)] *= (_1_CR);
                        // grid.xi_loc[I2V(i,j,k)]  -= (_0_CR);
                        // grid.eta_loc[I2V(i,j,k)] -= (_0_CR);

                        // grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
                        // grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
                        // grid.fac_f_loc[I2V(i,j,k)] =       - _2_CR * grid.eta_loc[I2V(i,j,k)];

                    }
                }
            }

            grid.rejuvenate_abcf();


        } else { // for LBFGS routine

            // here all the simultaneous runs have the same values used in this routine.
            // thus we don't need to if(id_sim==0)

            CUSTOMREAL step_length = step_length_new;
            const CUSTOMREAL factor = - _1_CR;

            // update the model
            for (int k = 0; k < loc_K; k++) {
                for (int j = 0; j < loc_J; j++) {
                    for (int i = 0; i < loc_I; i++) {
                        // update
                        grid.fun_loc[I2V(i,j,k)] *= (_1_CR - factor * grid.Ks_descent_dir_loc[I2V(i,j,k)]   * step_length);
                        grid.xi_loc[I2V(i,j,k)]  -=          factor * grid.Kxi_descent_dir_loc[I2V(i,j,k) ] * step_length;
                        grid.eta_loc[I2V(i,j,k)] -=          factor * grid.Keta_descent_dir_loc[I2V(i,j,k)] * step_length;
                        //grid.fun_loc[I2V(i,j,k)] += grid.Ks_descent_dir_loc[I2V(i,j,k)]  *step_length_new;
                        //grid.xi_loc[I2V(i,j,k)]  += grid.Kxi_descent_dir_loc[I2V(i,j,k)] *step_length_new;
                        //grid.eta_loc[I2V(i,j,k)] += grid.Keta_descent_dir_loc[I2V(i,j,k)]*step_length_new;

                        grid.fac_b_loc[I2V(i,j,k)] = _1_CR - _2_CR * grid.xi_loc[I2V(i,j,k)];
                        grid.fac_c_loc[I2V(i,j,k)] = _1_CR + _2_CR * grid.xi_loc[I2V(i,j,k)];
                        grid.fac_f_loc[I2V(i,j,k)] =       - _2_CR * grid.eta_loc[I2V(i,j,k)];
                    }
                }
            }

        } // end LBFGS model update

        // shared values on the boundary
        grid.send_recev_boundary_data(grid.fun_loc);
        grid.send_recev_boundary_data(grid.xi_loc);
        grid.send_recev_boundary_data(grid.eta_loc);
        grid.send_recev_boundary_data(grid.fac_b_loc);
        grid.send_recev_boundary_data(grid.fac_c_loc);
        grid.send_recev_boundary_data(grid.fac_f_loc);

    } // end if subdom_main
}


// compute p_k * grad(f_k)
CUSTOMREAL compute_q_k(Grid& grid) {

    CUSTOMREAL tmp_qk = _0_CR;

    if (subdom_main && id_sim == 0) {

        // grad * descent_direction
        for (int k = 0; k < loc_K; k++) {
            for (int j = 0; j < loc_J; j++) {
                for (int i = 0; i < loc_I; i++) {
                    // we can add here a precondition (here use *_update_loc for storing descent direction)
                    tmp_qk += grid.Ks_update_loc[I2V(i,j,k)]   * grid.Ks_descent_dir_loc[I2V(i,j,k)]
                            + grid.Keta_update_loc[I2V(i,j,k)] * grid.Keta_descent_dir_loc[I2V(i,j,k)]
                            + grid.Kxi_update_loc[I2V(i,j,k)]  * grid.Kxi_descent_dir_loc[I2V(i,j,k)];
                }
            }
        }

        // add tmp_qk among all subdomain
        allreduce_cr_single(tmp_qk, tmp_qk);
    }

    // share tmp_qk among all simultaneous runs
    if (subdom_main)
        broadcast_cr_single_inter_sim(tmp_qk,0);

    return tmp_qk;

}


// check if the wolfe conditions are satisfied
bool check_wolfe_cond(Grid& grid, \
                    CUSTOMREAL q_0, CUSTOMREAL q_t, \
                    CUSTOMREAL qp_0, CUSTOMREAL qp_t, \
                    CUSTOMREAL& td, CUSTOMREAL& tg, CUSTOMREAL& step_length_sub) {
    /*
    q_0 : initial cost function
    q_t : current cost function
    q_p0 : initial grad * descent_dir
    q_pt : current grad * descent_dir
    td : right step size
    tg : left step size
    step_length_sub : current step size
    */

    bool step_accepted = false;

    // check  if the wolfe conditions are satisfied and update the step_length_sub
    CUSTOMREAL qpt = (q_t - q_0) / step_length_sub;

    if (subdom_main) {
        // good step size
        if (qpt   <= wolfe_c1*qp_0 \
         && wolfe_c2*qp_0 <= qp_t \
         ){
            if (myrank==0)
                std::cout << "Wolfe rules:  step accepted" << std::endl;
            step_accepted = true;
        } else {
            // modify the stepsize
            if (wolfe_c1*qp_0 < qpt) {
                td = step_length_sub;
                if (myrank==0)
                    std::cout << "Wolfe rules:  right step size updated." << std::endl;
            }
            if (qpt <= wolfe_c1*qp_0 && qp_t < wolfe_c2*qp_0) {
                tg = step_length_sub;
                if (myrank==0)
                    std::cout << "Wolfe rules:  left step size updated." << std::endl;
            }
            //if (isZero(td)) {
            if (td == _0_CR) {
                step_length_sub = _2_CR * step_length_sub;
                if (myrank==0)
                    std::cout << "Wolfe rules:  step size too small. Increased to " << step_length_sub << std::endl;
            } else {
                step_length_sub = _0_5_CR * (td+tg);
                if (myrank==0)
                    std::cout << "Wolfe rules:  step size too large. Decreased to " << step_length_sub << std::endl;
            }
        }

        // share step_accepted among all subdomains
        broadcast_bool_single(step_accepted,0);
        broadcast_cr_single(step_length_sub,0);
    } // end if subdom_main

    broadcast_bool_single_sub(step_accepted,0);
    broadcast_cr_single_sub(step_length_sub,0);

    return step_accepted;
}


#endif // MODEL_UPDATE_H