include/iterator.h

#ifndef ITERATOR_H
#define ITERATOR_H

#include <algorithm>
#include <initializer_list>
#include <vector>
#include <math.h>
#include "grid.h"
#include "input_params.h"
#include "utils.h"
#include "source.h"
#include "io.h"
#include "timer.h"
#include "eikonal_solver_2d.h"

#ifdef USE_CUDA
#include "grid_wrapper.cuh"
#include "iterator_wrapper.cuh"
#endif

//#ifdef USE_SIMD
#include "simd_conf.h"
//#endif

class Iterator {
public:
    Iterator(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);
    virtual ~Iterator();
    // regional source
    void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&); // run forward iteration till convergence
    void run_iteration_adjoint(InputParams&, Grid&, IO_utils&, int);        // run adjoint iteration till convergence

    void initialize_arrays(InputParams&, IO_utils&, Grid&, Source&, const std::string&); // initialize factors etc.

protected:
    void assign_processes_for_levels(Grid&, InputParams&); // assign intra-node processes for each sweeping level
    void set_sweep_direction(int);                         // set sweep direction
    // regional source
    virtual void do_sweep(int, Grid&, InputParams&){};                // do sweeping with ordinal method
    void calculate_stencil_1st_order(Grid&, int&, int&, int&);        // calculate stencil for 1st order
    void calculate_stencil_3rd_order(Grid&, int&, int&, int&);        // calculate stencil for 3rd order
    void calculate_stencil_1st_order_upwind(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form
    void calculate_boundary_nodes(Grid&);                             // calculate boundary values
    // teleseismic source
    void calculate_stencil_1st_order_tele(Grid&, int&, int&, int&);        // calculate stencil for 1st order
    void calculate_stencil_3rd_order_tele(Grid&, int&, int&, int&);        // calculate stencil for 3rd order
    void calculate_stencil_1st_order_upwind_tele(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form
    void calculate_boundary_nodes_tele(Grid&, int&, int&, int&);           // calculate boundary values for teleseismic source

    void calculate_boundary_nodes_adj(Grid&, int&, int&, int&);       // calculate boundary values for adjoint source (all zeros)

    // Hamiltonian calculation
    inline CUSTOMREAL calc_LF_Hamiltonian(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \
                                                 CUSTOMREAL& ,CUSTOMREAL& , \
                                                 CUSTOMREAL& ,CUSTOMREAL&, \
                                                 int&, int&, int& );
    inline CUSTOMREAL calc_LF_Hamiltonian_tele(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \
                                                 CUSTOMREAL& ,CUSTOMREAL& , \
                                                 CUSTOMREAL& ,CUSTOMREAL&, \
                                                 int&, int&, int& );

    // methods for adjoint field calculation
    void init_delta_and_Tadj(Grid&, InputParams&);                     // initialize delta and Tadj
    void init_delta_and_Tadj_density(Grid&, InputParams&);             // initialize delta and Tadj_density
    void fix_boundary_Tadj(Grid&);                                     // fix boundary values for Tadj
    virtual void do_sweep_adj(int, Grid&, InputParams&){};             // do sweeping with ordinal method for adjoint field
    void calculate_stencil_adj(Grid&, int&, int&, int&);               // calculate stencil for 1st order for adjoint field

    // grid point information
    int* _nr, *_nt, *_np;           // number of grid points on the direction r, theta, phi
    CUSTOMREAL* _dr, *_dt, *_dp;    // grid spacing on the direction r, theta, phi
    int nr, nt, np;                 // number of grid points on the direction r, theta, phi
    CUSTOMREAL dr, dt, dp;          // grid spacing on the direction r, theta, phi
    int st_level;                   // start level for sweeping
    int ed_level;                   // end level for sweeping
    std::vector< std::vector<int> > ijk_for_this_subproc; // ijk=I2V(i,j,k) for this process (level, ijk)
    int max_n_nodes_plane;                                // maximum number of nodes on a plane


#if defined USE_SIMD || defined USE_CUDA
    // stencil dumps
    // first orders
    CUSTOMREAL *dump_c__;// center of C
    // all grid data expect tau pre-load strategy (iswap, ilevel, inodes)
#if USE_AVX512 || USE_AVX || USE_NEON || defined USE_CUDA
    std::vector<std::vector<int*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;
    std::vector<std::vector<int*>>               vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;
#elif USE_ARM_SVE
    std::vector<std::vector<uint64_t*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;
    std::vector<std::vector<uint64_t*>>               vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;
#endif
    std::vector<std::vector<CUSTOMREAL*>> vv_iip, vv_jjt, vv_kkr;

    std::vector<std::vector<CUSTOMREAL*>> vv_fac_a, vv_fac_b, vv_fac_c, vv_fac_f, vv_T0v, vv_T0r, vv_T0t, vv_T0p, vv_fun;
    std::vector<std::vector<CUSTOMREAL*>>vv_change;
    std::vector<std::vector<bool*>>vv_change_bl;

    template <typename T>
    void preload_indices(std::vector<std::vector<T*>> &vi, std::vector<std::vector<T*>> &, std::vector<std::vector<T*>> &, int, int, int);
    template <typename T>
    void preload_indices_1d(std::vector<std::vector<T*>> &, int, int, int);
    template <typename T>
    std::vector<std::vector<CUSTOMREAL*>> preload_array(T* a);
    std::vector<std::vector<bool*>> preload_array_bl(bool* a);
    template <typename T>
    void free_preloaded_array(std::vector<std::vector<T*>> &vvv){
        for (int iswap = 0; iswap < 8; iswap++){
            for (auto& vv : vvv.at(iswap)) free(vv);
        }
    }
    // flag for deallocation
    bool simd_allocated     = false;
    bool simd_allocated_3rd = false;

#endif // USE_SIMD || USE_CUDA

#ifdef USE_CUDA
    Grid_on_device *gpu_grid;
#endif


    const int nswp = 8;          // number of sweeping directions
    int r_dirc, t_dirc, p_dirc;  // sweeping directions
    CUSTOMREAL sigr, sigt, sigp; //
    CUSTOMREAL coe;
    CUSTOMREAL wp1, pp1, wp2, pp2;
    CUSTOMREAL wt1, pt1, wt2, pt2;
    CUSTOMREAL wr1, pr1, wr2, pr2;
    CUSTOMREAL Htau, tpT;

    CUSTOMREAL ap1, bp1, ap2, bp2, ap, bp;
    CUSTOMREAL at1, bt1, at2, bt2, at, bt;
    CUSTOMREAL ar1, br1, ar2, br2, ar, br;

    CUSTOMREAL bc_f2, eqn_a, eqn_b, eqn_c, eqn_Delta;
    CUSTOMREAL tmp_tau, tmp_T;
    CUSTOMREAL T_r, T_t, T_p, charact_r, charact_t, charact_p;
    bool is_causality;
    int count_cand;
    std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);


    // iteration control
    int iter_count = 0;
    CUSTOMREAL ini_diff_L1 = HUGE_VAL, ini_diff_Linf = HUGE_VAL;
    CUSTOMREAL ini_err_L1  = HUGE_VAL, ini_err_Linf  = HUGE_VAL;
    CUSTOMREAL cur_diff_L1 = HUGE_VAL, cur_diff_Linf = HUGE_VAL;
    CUSTOMREAL cur_err_L1  = HUGE_VAL, cur_err_Linf  = HUGE_VAL;

    // teleseismic flag
    bool is_teleseismic = false;

    // second run for hybrid order method
    bool is_second_run = false;

};

// define derived classes for each iteration scheme


#endif // ITERATOR_H
initial upload 2025-12-17 10:53:43 +08:00			`#ifndef ITERATOR_H`
			`#define ITERATOR_H`

			`#include <algorithm>`
			`#include <initializer_list>`
			`#include <vector>`
			`#include <math.h>`
			`#include "grid.h"`
			`#include "input_params.h"`
			`#include "utils.h"`
			`#include "source.h"`
			`#include "io.h"`
			`#include "timer.h"`
			`#include "eikonal_solver_2d.h"`

			`#ifdef USE_CUDA`
			`#include "grid_wrapper.cuh"`
			`#include "iterator_wrapper.cuh"`
			`#endif`

			`//#ifdef USE_SIMD`
			`#include "simd_conf.h"`
			`//#endif`

			`class Iterator {`
			`public:`
			`Iterator(InputParams&, Grid&, Source&, IO_utils&, const std::string&, bool, bool, bool);`
			`virtual ~Iterator();`
			`// regional source`
			`void run_iteration_forward(InputParams&, Grid&, IO_utils&, bool&); // run forward iteration till convergence`
			`void run_iteration_adjoint(InputParams&, Grid&, IO_utils&, int); // run adjoint iteration till convergence`

			`void initialize_arrays(InputParams&, IO_utils&, Grid&, Source&, const std::string&); // initialize factors etc.`

			`protected:`
			`void assign_processes_for_levels(Grid&, InputParams&); // assign intra-node processes for each sweeping level`
			`void set_sweep_direction(int); // set sweep direction`
			`// regional source`
			`virtual void do_sweep(int, Grid&, InputParams&){}; // do sweeping with ordinal method`
			`void calculate_stencil_1st_order(Grid&, int&, int&, int&); // calculate stencil for 1st order`
			`void calculate_stencil_3rd_order(Grid&, int&, int&, int&); // calculate stencil for 3rd order`
			`void calculate_stencil_1st_order_upwind(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form`
			`void calculate_boundary_nodes(Grid&); // calculate boundary values`
			`// teleseismic source`
			`void calculate_stencil_1st_order_tele(Grid&, int&, int&, int&); // calculate stencil for 1st order`
			`void calculate_stencil_3rd_order_tele(Grid&, int&, int&, int&); // calculate stencil for 3rd order`
			`void calculate_stencil_1st_order_upwind_tele(Grid&, int&, int&, int&); // calculate stencil for 1st order in upwind form`
			`void calculate_boundary_nodes_tele(Grid&, int&, int&, int&); // calculate boundary values for teleseismic source`

			`void calculate_boundary_nodes_adj(Grid&, int&, int&, int&); // calculate boundary values for adjoint source (all zeros)`

			`// Hamiltonian calculation`
			`inline CUSTOMREAL calc_LF_Hamiltonian(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \`
			`CUSTOMREAL& ,CUSTOMREAL& , \`
			`CUSTOMREAL& ,CUSTOMREAL&, \`
			`int&, int&, int& );`
			`inline CUSTOMREAL calc_LF_Hamiltonian_tele(Grid&, CUSTOMREAL& ,CUSTOMREAL& , \`
			`CUSTOMREAL& ,CUSTOMREAL& , \`
			`CUSTOMREAL& ,CUSTOMREAL&, \`
			`int&, int&, int& );`

			`// methods for adjoint field calculation`
			`void init_delta_and_Tadj(Grid&, InputParams&); // initialize delta and Tadj`
			`void init_delta_and_Tadj_density(Grid&, InputParams&); // initialize delta and Tadj_density`
			`void fix_boundary_Tadj(Grid&); // fix boundary values for Tadj`
			`virtual void do_sweep_adj(int, Grid&, InputParams&){}; // do sweeping with ordinal method for adjoint field`
			`void calculate_stencil_adj(Grid&, int&, int&, int&); // calculate stencil for 1st order for adjoint field`

			`// grid point information`
			`int* _nr, _nt, _np; // number of grid points on the direction r, theta, phi`
			`CUSTOMREAL* _dr, _dt, _dp; // grid spacing on the direction r, theta, phi`
			`int nr, nt, np; // number of grid points on the direction r, theta, phi`
			`CUSTOMREAL dr, dt, dp; // grid spacing on the direction r, theta, phi`
			`int st_level; // start level for sweeping`
			`int ed_level; // end level for sweeping`
			`std::vector< std::vector<int> > ijk_for_this_subproc; // ijk=I2V(i,j,k) for this process (level, ijk)`
			`int max_n_nodes_plane; // maximum number of nodes on a plane`


			`#if defined USE_SIMD \|\| defined USE_CUDA`
			`// stencil dumps`
			`// first orders`
			`CUSTOMREAL *dump_c__;// center of C`
			`// all grid data expect tau pre-load strategy (iswap, ilevel, inodes)`
			`#if USE_AVX512 \|\| USE_AVX \|\| USE_NEON \|\| defined USE_CUDA`
			`std::vector<std::vector<int*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;`
			`std::vector<std::vector<int*>> vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;`
			`#elif USE_ARM_SVE`
			`std::vector<std::vector<uint64_t*>> vv_i__j__k__, vv_ip1j__k__, vv_im1j__k__, vv_i__jp1k__, vv_i__jm1k__, vv_i__j__kp1, vv_i__j__km1;`
			`std::vector<std::vector<uint64_t*>> vv_ip2j__k__, vv_im2j__k__, vv_i__jp2k__, vv_i__jm2k__, vv_i__j__kp2, vv_i__j__km2;`
			`#endif`
			`std::vector<std::vector<CUSTOMREAL*>> vv_iip, vv_jjt, vv_kkr;`

			`std::vector<std::vector<CUSTOMREAL*>> vv_fac_a, vv_fac_b, vv_fac_c, vv_fac_f, vv_T0v, vv_T0r, vv_T0t, vv_T0p, vv_fun;`
			`std::vector<std::vector<CUSTOMREAL*>>vv_change;`
			`std::vector<std::vector<bool*>>vv_change_bl;`

			`template <typename T>`
			`void preload_indices(std::vector<std::vector<T>> &vi, std::vector<std::vector<T>> &, std::vector<std::vector<T*>> &, int, int, int);`
			`template <typename T>`
			`void preload_indices_1d(std::vector<std::vector<T*>> &, int, int, int);`
			`template <typename T>`
			`std::vector<std::vector<CUSTOMREAL>> preload_array(T a);`
			`std::vector<std::vector<bool>> preload_array_bl(bool a);`
			`template <typename T>`
			`void free_preloaded_array(std::vector<std::vector<T*>> &vvv){`
			`for (int iswap = 0; iswap < 8; iswap++){`
			`for (auto& vv : vvv.at(iswap)) free(vv);`
			`}`
			`}`
			`// flag for deallocation`
			`bool simd_allocated = false;`
			`bool simd_allocated_3rd = false;`

			`#endif // USE_SIMD \|\| USE_CUDA`

			`#ifdef USE_CUDA`
			`Grid_on_device *gpu_grid;`
			`#endif`


			`const int nswp = 8; // number of sweeping directions`
			`int r_dirc, t_dirc, p_dirc; // sweeping directions`
			`CUSTOMREAL sigr, sigt, sigp; //`
			`CUSTOMREAL coe;`
			`CUSTOMREAL wp1, pp1, wp2, pp2;`
			`CUSTOMREAL wt1, pt1, wt2, pt2;`
			`CUSTOMREAL wr1, pr1, wr2, pr2;`
			`CUSTOMREAL Htau, tpT;`

			`CUSTOMREAL ap1, bp1, ap2, bp2, ap, bp;`
			`CUSTOMREAL at1, bt1, at2, bt2, at, bt;`
			`CUSTOMREAL ar1, br1, ar2, br2, ar, br;`

			`CUSTOMREAL bc_f2, eqn_a, eqn_b, eqn_c, eqn_Delta;`
			`CUSTOMREAL tmp_tau, tmp_T;`
			`CUSTOMREAL T_r, T_t, T_p, charact_r, charact_t, charact_p;`
			`bool is_causality;`
			`int count_cand;`
			`std::vector<CUSTOMREAL> canditate = std::vector<CUSTOMREAL>(60);`


			`// iteration control`
			`int iter_count = 0;`
			`CUSTOMREAL ini_diff_L1 = HUGE_VAL, ini_diff_Linf = HUGE_VAL;`
			`CUSTOMREAL ini_err_L1 = HUGE_VAL, ini_err_Linf = HUGE_VAL;`
			`CUSTOMREAL cur_diff_L1 = HUGE_VAL, cur_diff_Linf = HUGE_VAL;`
			`CUSTOMREAL cur_err_L1 = HUGE_VAL, cur_err_Linf = HUGE_VAL;`

			`// teleseismic flag`
			`bool is_teleseismic = false;`

			`// second run for hybrid order method`
			`bool is_second_run = false;`

			`};`

			`// define derived classes for each iteration scheme`


			`#endif // ITERATOR_H`