initial upload

This commit is contained in:
2025-12-17 10:53:43 +08:00
commit f3f1778f77
308 changed files with 129940 additions and 0 deletions

0
utils/__init__.py Normal file
View File

67
utils/compare_models.py Normal file
View File

@@ -0,0 +1,67 @@
#!/bin/python3
import os
import h5py
import numpy as np
import argparse
def read_model_h5(model_path):
with h5py.File(model_path, 'r') as f:
# read xi eta vel
xi = f['xi'][:]
eta = f['eta'][:]
vel = f['vel'][:]
return xi, eta, vel
# main function
if __name__ == '__main__':
"""
Compare two models
parth args
-t : true model file path
-r : result model file path
"""
parser = argparse.ArgumentParser(description='Compare two models')
parser.add_argument('-t', '--true', type=str, help='true model file path')
parser.add_argument('-r', '--result', type=str, help='result model file path')
args = parser.parse_args()
# read true model
xi_true, eta_true, vel_true = read_model_h5(args.true)
# read result model
xi_result, eta_result, vel_result = read_model_h5(args.result)
# check shapes
if xi_true.shape != xi_result.shape:
print('xi shape not match')
exit(1)
if eta_true.shape != eta_result.shape:
print('eta shape not match')
exit(1)
if vel_true.shape != vel_result.shape:
print('vel shape not match')
exit(1)
# print model info
print('model info: ')
print('vel shape: ', vel_true.shape)
# compare
print('vel max error: ', np.max(np.abs(vel_true - vel_result)))
print('xi max error: ', np.max(np.abs(xi_true - xi_result)))
print('eta max error: ', np.max(np.abs(eta_true - eta_result)))
# L2 norm
print('vel L2 norm: ', np.linalg.norm(vel_true - vel_result))
print('xi L2 norm: ', np.linalg.norm(xi_true - xi_result))
print('eta L2 norm: ', np.linalg.norm(eta_true - eta_result))
# exit
exit(0)

View File

@@ -0,0 +1,184 @@
import os
import argparse
from ruamel.yaml import YAML
from contextlib import suppress
def map_value_to_bool(params_in, target_key, orig_key=None):
"""
Map an integer value to a boolean and update the target key in params_in.
Parameters:
params_in (dict): The input dictionary.
target_key (str): The key whose value needs to be mapped to a boolean.
Returns:
None
"""
if orig_key is None:
orig_key = target_key
print('key name {} is changed to {}'.format(orig_key, target_key))
value = params_in.get(orig_key, None)
if value is not None:
params_in[target_key] = bool(value)
print('value {} type is changed from int to bool'.format(target_key))
# remove the old key
if orig_key != target_key:
params_in.pop(orig_key, None)
if __name__ == '__main__':
# parse the argument for the input file
parser = argparse.ArgumentParser(description='Convert a parameter file from version 2 to version 3')
parser.add_argument('-i', '--input', help='Input file name', required=True)
args = parser.parse_args()
infile = args.input
# get path to this file
path_this = os.path.dirname(os.path.realpath(__file__))
# path to the v3 model file
v3_model_file = path_this + '/params_model_v3.yaml'
yaml = YAML()
# read the input file
try:
with open(infile, 'r') as f:
str_in = f.read()
params_in = yaml.load(str_in)
except IOError:
raise ValueError('Cannot read the input file')
# read the v3 model file
try:
with open(v3_model_file, 'r') as f:
str_v3 = f.read()
params_v3 = yaml.load(str_v3)
except IOError:
raise ValueError('Cannot read the v3 model file')
# check the version of the input file
if params_in['version'] != 2:
raise ValueError('The input file is not version 2')
# change version to 3
params_v3['version'] = 3
# copy the values in the input file to the output file
#
# domain section
#
params_v3['domain'] = params_in['domain']
#
# source section
#
params_v3['source'] = params_in['source']
map_value_to_bool(params_v3['source'], 'swap_src_rec')
#
# model section
#
params_v3['model'] = params_in['model']
#
# parallel section
#
params_v3['parallel'] = params_in['parallel']
# change parallel->use_gpu from 0,1 to false,true
map_value_to_bool(params_v3['parallel'], 'use_gpu')
#
# output_setting section
#
with suppress(KeyError): params_v3['output_setting']['output_dir'] = params_in['inversion']['output_dir']
with suppress(KeyError): params_v3['output_setting']['output_source_field'] = params_in['output_setting']['is_output_source_field']
with suppress(KeyError): params_v3['output_setting']['output_model_dat'] = params_in['output_setting']['is_output_model_dat']
with suppress(KeyError): params_v3['output_setting']['output_final_model'] = params_in['output_setting']['is_output_final_model']
with suppress(KeyError): params_v3['output_setting']['output_in_process'] = params_in['output_setting']['is_output_in_process']
with suppress(KeyError): params_v3['output_setting']['single_precision_output'] = params_in['output_setting']['is_single_precision_output']
map_value_to_bool(params_v3['output_setting'], 'output_source_field')
map_value_to_bool(params_v3['output_setting'], 'output_model_dat')
map_value_to_bool(params_v3['output_setting'], 'output_final_model')
map_value_to_bool(params_v3['output_setting'], 'output_in_process')
map_value_to_bool(params_v3['output_setting'], 'single_precision_output')
# remove the old key 'output_setting'->'is_verbose_output'
params_v3['output_setting'].pop('is_verbose_output', None)
with suppress(KeyError): params_v3['output_setting']['output_file_format'] = params_in['calculation']['output_file_format']
#
# run_mode section
#
params_v3['run_mode'] = params_in['inversion']['run_mode']
#
# model_update section
#
with suppress(KeyError): params_v3['model_update']['max_iterations'] = params_in['inversion']['max_iterations_inv']
with suppress(KeyError): params_v3['model_update']['optim_method'] = params_in['inversion']['optim_method']
with suppress(KeyError): params_v3['model_update']['step_length'] = params_in['inversion']['step_size']
with suppress(KeyError): params_v3['model_update']['optim_method_0']['step_length_decay'] = params_in['inversion']['step_size_decay']
with suppress(KeyError): params_v3['model_update']['optim_method_0']['step_length_sc'] = params_in['inversion']['step_size_sc']
with suppress(KeyError): params_v3['model_update']['optim_method_1_2']['max_sub_iterations'] = params_in['inversion']['max_sub_iterations']
with suppress(KeyError): params_v3['model_update']['optim_method_1_2']['regularization_weight'] = params_in['inversion']['regularization_weight']
with suppress(KeyError): params_v3['model_update']['smoothing']['smooth_method'] = params_in['inversion']['smooth_method']
with suppress(KeyError): params_v3['model_update']['smoothing']['l_smooth_rtp'] = params_in['inversion']['l_smooth_rtp']
with suppress(KeyError): params_v3['model_update']['n_inversion_grid'] = params_in['inversion']['n_inversion_grid']
with suppress(KeyError): params_v3['model_update']['type_invgrid_dep'] = params_in['inversion']['type_dep_inv']
with suppress(KeyError): params_v3['model_update']['type_invgrid_lat'] = params_in['inversion']['type_lat_inv']
with suppress(KeyError): params_v3['model_update']['type_invgrid_lon'] = params_in['inversion']['type_lon_inv']
with suppress(KeyError): params_v3['model_update']['n_inv_dep_lat_lon'] = params_in['inversion']['n_inv_dep_lat_lon']
with suppress(KeyError): params_v3['model_update']['min_max_dep_inv'] = params_in['inversion']['min_max_dep_inv']
with suppress(KeyError): params_v3['model_update']['min_max_lat_inv'] = params_in['inversion']['min_max_lat_inv']
with suppress(KeyError): params_v3['model_update']['min_max_lon_inv'] = params_in['inversion']['min_max_lon_inv']
with suppress(KeyError): params_v3['model_update']['dep_inv'] = params_in['inversion']['dep_inv']
with suppress(KeyError): params_v3['model_update']['lat_inv'] = params_in['inversion']['lat_inv']
with suppress(KeyError): params_v3['model_update']['lon_inv'] = params_in['inversion']['lon_inv']
with suppress(KeyError): params_v3['model_update']['sta_correction_file'] = params_in['inversion']['sta_correction_file']
with suppress(KeyError): params_v3['model_update']['update_slowness'] = params_in['inv_strategy']['is_inv_slowness']
with suppress(KeyError): params_v3['model_update']['update_azi_ani'] = params_in['inv_strategy']['is_inv_azi_ani']
with suppress(KeyError): params_v3['model_update']['update_rad_ani'] = params_in['inv_strategy']['is_inv_rad_ani']
map_value_to_bool(params_v3['model_update'], 'update_slowness')
map_value_to_bool(params_v3['model_update'], 'update_azi_ani')
map_value_to_bool(params_v3['model_update'], 'update_rad_ani')
with suppress(KeyError): params_v3['model_update']['depth_taper'] = params_in['inv_strategy']['kernel_taper']
with suppress(KeyError): params_v3['model_update']['use_sta_correction'] = params_in['inv_strategy']['is_sta_correction']
map_value_to_bool(params_v3['model_update'], 'use_sta_correction')
#
# relocation section
#
# replocation section is completely new in v3, so we don't need to move any value
#
# inversion_strategy section
#
# inversion_strategy section is completely new in v3, so we don't need to move any value
#
# calculation section
#
params_v3['calculation'] = params_in['calculation']
# erase the old key 'calculation'->'output_file_format' if it exists
params_v3['calculation'].pop('output_file_format', None)
# write the output file with adding .v3 to the file name
outfile = infile + '.v3.yaml'
with open(outfile, 'w') as f:
yaml.dump(params_v3, f)

View File

@@ -0,0 +1,28 @@
import os
import re
def find_kanji_chars_in_file(file_path):
kanji_pattern = re.compile(r'[\u4E00-\u9FFF]')
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
content = file.read()
matches = kanji_pattern.findall(content)
if matches:
print(f"Found Kanji characters in {file_path}: {''.join(matches)}")
def scan_directory(directory):
for root, _, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
# skip non-text files
target_files = ['.cpp', '.h', '.hpp', '.c', '.cc', '.hh', '.cxx', '.hxx', '.py', '.txt', '.md', '.rst', '.ipynb', '.sh']
if not file_path.endswith(tuple(target_files)):
continue
find_kanji_chars_in_file(file_path)
if __name__ == "__main__":
#directory_to_scan = '.' # Change this to the directory you want to scan
list_target_dir = ['./src', './test', './include', './examples']
for directory_to_scan in list_target_dir:
scan_directory(directory_to_scan)

230
utils/params_model_v3.yaml Normal file
View File

@@ -0,0 +1,230 @@
version: 3
#################################################
# computational domian #
#################################################
domain:
min_max_dep: [-10, 10] # depth in km
min_max_lat: [37.7, 42.3] # latitude in degree
min_max_lon: [22.7, 27.3] # longitude in degree
n_rtp: [10, 50, 50] # number of nodes in depth,latitude,longitude direction
#################################################
# traveltime data file path #
#################################################
source:
src_rec_file: OUTPUT_FILES/src_rec_file_forward.dat ### source receiver file path
swap_src_rec: true # swap source and receiver
#################################################
# initial model file path #
#################################################
model:
init_model_path: ./test_model_init.h5 # path to initial model file
#################################################
# parallel computation settings #
#################################################
parallel: # parameters for parallel computation
n_sims: 1 # number of simultanoues runs
ndiv_rtp: [1, 2, 2] # number of subdivision on each direction
nproc_sub: 2 # number of processors for sweep parallelization
use_gpu: false # true if use gpu (EXPERIMENTAL)
############################################
# output file setting #
############################################
output_setting:
output_dir: ./OUTPUT_FILES/ # path to output director (default is ./OUTPUT_FILES/)
output_source_field: false # output the calculated field of all sources
output_model_dat: false # output model_parameters_inv_0000.dat or not.
output_final_model: true # output merged final model or not.
output_in_process: true # output model at each inv iteration or not.
output_in_process_data: false # output src_rec_file at each inv iteration or not.
single_precision_output: false # output results in single precision or not.
verbose_output_level: 0 # output internal parameters, if 0, only model parameters are out. Higher level, more internal parameters are output. default: 0
output_file_format: 0 # in/output file format, if 0: HDF5, if 1: ASCII
#################################################
# inversion or forward modeling #
#################################################
# run mode
# 0 for forward simulation only,
# 1 for inversion
# 2 for earthquake relocation
# 3 for inversion+earthquake relocation
run_mode: 1
###################################################
# model update parameters setting #
###################################################
model_update: # update model parameters (when run_mode : 1 and 3)
max_iterations: 3 # maximum number of inversion iterations
optim_method: 1 # optimization method. 0 : grad_descent, 1 : halve-stepping, 2 : lbfgs (EXPERIMENTAL)
# common parameters for all optim methods
step_length: 0.01 # step length of model perturbation at each iteration. 0.01 means maximum 1% perturbation for each iteration.
# parameters for optim_method 0 (grad_descent)
optim_method_0:
step_length_decay: 0.9 # if objective function increase, step size -> step length * step_length_decay. default: 0.9
# parameters for optim_method 1 (halve-stepping) or 2 (lbfgs)
optim_method_1_2:
max_sub_iterations: 10 # maximum number of each sub-iteration
regularization_weight: 0.01 # weight value for regularization (lbfgs mode only)
coefs_regulalization_rtp: [1.0, 1.0, 1.0] # coefficients for regularization on each direction(lbfgs mode only)
# smoothing
smoothing:
smooth_method: 0 # 0: multiparametrization, 1: laplacian smoothing (EXPERIMENTAL)
l_smooth_rtp: [100, 100, 100] # smoothing coefficients for laplacian smoothing
# parameters for smooth method 0 (multigrid model parametrization)
n_inversion_grid: 5 # number of inversion grid sets
# inversion grid type
type_invgrid_dep: 0 # 0: uniform inversion grid, 1: flexible grid
type_invgrid_lat: 0 # 0: uniform inversion grid, 1: flexible grid
type_invgrid_lon: 0 # 0: uniform inversion grid, 1: flexible grid
# settings for uniform inversion grid (if type_*_inv : 0)
n_inv_dep_lat_lon: [5, 10, 10] # number of the base inversion grid points (ignored if type_*_inv : 1)
min_max_dep_inv: [-10, 10] # depth in km (Radius of the earth is defined in config.h/R_earth) (ignored if type_dep_inv : 1)
min_max_lat_inv: [37.7, 42.3] # latitude in degree
min_max_lon_inv: [22.7, 27.3] # longitude in degree
# settings for flexible inversion grid (if type_*_inv : 1)
dep_inv: [-10.0, -7.5, -5.0, -2.5, 0.0, 2.5, 5.0, 7.5, 10.0] # depth in km (Radius of the earth is defined in config.h/R_earth)
lat_inv: [0.0, 1.0] # latitude in degree (ignored if type_lat_inv : 0)
lon_inv: [0.0, 1.0] # longitude in degree (ignored if type_lon_inv : 0)
# if we want to use another inversion grid for inverting anisotropy, set invgrid_ani: true (default: false)
invgrid_ani: false
# settings for flexible inversion grid for anisotropy (only flexible grid input is provided)
# dep_inv_ani: [1, 1, 1]
# lat_inv_ani: [1, 1, 1]
# lon_inv_ani: [1, 1, 1]
# inversion grid volume rescale (kernel -> kernel / volume of inversion grid mesh),
# this precondition may be carefully applied if the sizes of inversion grids are unbalanced
invgrid_volume_rescale: false
# path to station correction file (under development)
use_sta_correction: false
# sta_correction_file: dummy_sta_correction_file # station correction file path
#step_length_sc: 0.001 step length relate to the update of station correction terms
# path to station correction file
#use_sta_correction: false
#sta_correction_file: dummy_sta_correction_file # station correction file path
# In the following data subsection, XXX_weight means a weight is assigned to the data, influencing the objective function and gradient
# XXX_weight : [d1,d2,w1,w2] means:
# if XXX < d1, weight = w1
# if d1 <= XXX < d2, weight = w1 + (XXX-d1)/(d2-d1)*(w2-w1), (linear interpolation)
# if d2 <= XXX , weight = w2
# You can easily set w1 = w2 = 1.0 to normalize the weight related to XXX.
# -------------- using absolute traveltime data --------------
abs_time:
use_abs_time: true # 'true' for using absolute traveltime data to update model parameters; 'false' for not using (no need to set parameters in this section)
residual_weight: [0, 9999, 1.0, 1.0] # XXX is the absolute traveltime residual (second) = abs(t^{obs}_{n,i} - t^{syn}_{n,j})
distance_weight: [0, 9999, 1.0, 1.0] # XXX is epicenter distance (km) between the source and receiver related to the data
# -------------- using common source differential traveltime data --------------
cs_dif_time:
use_cs_time: false # 'true' for using common source differential traveltime data to update model parameters; 'false' for not using (no need to set parameters in this section)
residual_weight: [0, 0, 0, 0] # XXX is the common source differential traveltime residual (second) = abs(t^{obs}_{n,i} - t^{obs}_{n,j} - t^{syn}_{n,i} + t^{syn}_{n,j}).
azimuthal_weight: [0, 0, 0, 0] # XXX is the azimuth difference between two separate stations related to the common source.
# -------------- using common receiver differential traveltime data --------------
cr_dif_time:
use_cr_time: false # 'true' for using common receiver differential traveltime data to update model parameters; 'false' for not using (no need to set parameters in this section)
residual_weight: [0, 0, 0, 0] # XXX is the common receiver differential traveltime residual (second) = abs(t^{obs}_{n,i} - t^{obs}_{m,i} - t^{syn}_{n,i} + t^{syn}_{m,i})
azimuthal_weight: [0, 0, 0, 0] # XXX is the azimuth difference between two separate sources related to the common receiver.
# -------------- global weight of different types of data (to balance the weight of different data) --------------
global_weight:
balance_data_weight: false # yes: over the total weight of the each type of the data. no: use original weight (below weight for each type of data needs to be set)
abs_time_weight: 1 # weight of absolute traveltime data after balance, default: 1.0
cs_dif_time_local_weight: 1 # weight of common source differential traveltime data after balance, default: 1.0
cr_dif_time_local_weight: 1 # weight of common receiver differential traveltime data after balance, default: 1.0
teleseismic_weight: 1 # weight of teleseismic data after balance, default: 1.0 (exclude in this version)
# -------------- inversion parameters --------------
update_slowness : true # update slowness (velocity) or not. default: true
update_azi_ani : false # update azimuthal anisotropy (xi, eta) or not. default: false
#update_rad_ani : false # update radial anisotropy (in future) or not. default: false
# -------------- for teleseismic inversion (under development) --------------
# depth_taper : [d1,d2] means:
# if XXX < d1, kernel <- kernel * 0.0
# if d1 <= XXX < d2, kernel <- kernel * (XXX-d1)/(d2-d1), (linear interpolation)
# if d2 <= XXX , kernel <- kernel * 1.0
# You can easily set d1 = -200, d1 = -100 to remove this taper.
depth_taper : [-1e+07, -1e+07]
#################################################
# relocation parameters setting #
#################################################
relocation: # update earthquake hypocenter and origin time (when run_mode : 2 and 3)
min_Ndata: 4 # if the number of data of the earthquake is less than <min_Ndata>, the earthquake will not be relocated. defaut value: 4
# relocation_strategy
step_length : 0.01 # step length of relocation perturbation at each iteration. 0.01 means maximum 1% perturbation for each iteration.
step_length_decay : 0.9 # if objective function increase, step size -> step length * step_length_decay. default: 0.9
rescaling_dep_lat_lon_ortime : [10, 1, 1, 0.5] # The perturbation is related to <rescaling_dep_lat_lon_ortime>. Unit: km,km,km,second
max_change_dep_lat_lon_ortime : [10, 1, 1, 0.5] # the change of dep,lat,lon,ortime do not exceed max_change. Unit: km,km,km,second
max_iterations : 501 # maximum number of iterations for relocation
tol_gradient : 0.001 # if the norm of gradient is smaller than the tolerance, the iteration of relocation terminates
# -------------- using absolute traveltime data --------------
abs_time:
use_abs_time : false # 'yes' for using absolute traveltime data to update model parameters; 'no' for not using (no need to set parameters in this section)
residual_weight : [0, 0, 0, 0] # XXX is the absolute traveltime residual (second) = abs(t^{obs}_{n,i} - t^{syn}_{n,j})
distance_weight : [0, 0, 0, 0] # XXX is epicenter distance (km) between the source and receiver related to the data
# -------------- using common receiver differential traveltime data --------------
cr_dif_time:
use_cr_time : false # 'yes' for using common receiver differential traveltime data to update model parameters; 'no' for not using (no need to set parameters in this section)
residual_weight : [0, 0, 0, 0] # XXX is the common receiver differential traveltime residual (second) = abs(t^{obs}_{n,i} - t^{obs}_{m,i} - t^{syn}_{n,i} + t^{syn}_{m,i})
azimuthal_weight : [0, 0, 0, 0] # XXX is the azimuth difference between two separate sources related to the common receiver.
# -------------- global weight of different types of data (to balance the weight of different data) --------------
global_weight:
balance_data_weight: false # yes: over the total weight of the each type of the data. no: use original weight (below weight for each type of data needs to be set)
abs_time_local_weight: 1 # weight of absolute traveltime data for relocation after balance, default: 1.0
cr_dif_time_local_weight: 1 # weight of common receiver differential traveltime data for relocation after balance, default: 1.0
####################################################################
# inversion strategy for tomography and relocation #
####################################################################
inversion_strategy: # update model parameters and earthquake hypocenter iteratively (when run_mode : 3)
inv_mode : 0 # 0 for update model parameters and relocation iteratively. (other options for future work)
# for inv_mode : 0, parameters below are required
inv_mode_0: # update model for <model_update_N_iter> steps, then update location for <relocation_N_iter> steps, and repeat the process for <max_loop> loops.
model_update_N_iter : 1
relocation_N_iter : 1
max_loop : 10
# --- parameters for core solver ---------------------------------------------------------
# --- please do not change the following parameters unless you know what you are doing ---
########################################################################
# Scheme of Eikonal solver (fast sweeping method) #
########################################################################
calculation:
convergence_tolerance: 0.0001 # threshold value for checking the convergence for each forward/adjoint run
max_iterations: 500 # number of maximum iteration for each forward/adjoint run
stencil_order: 3 # order of stencil, 1 or 3
stencil_type: 0 # 0: , 1: first-order upwind scheme (only sweep_type 0 is supported)
sweep_type: 1 # 0: legacy, 1: cuthill-mckee with shm parallelization

View File

@@ -0,0 +1,357 @@
# class for storing one event data
class AttSrcRec:
_id_src = None
_id_rec = None
_year = None
_month = None
_day = None
_hour = None
_min = None
_sec = None
_lat = None
_lon = None
_dep = None
_mag = None
_nrec = None
_id_event = None
_data_source = None
_phase = None
_epi_dist = None
_arr_time = None
_name_rec = None
def __init__(self,
id_src = None,
id_rec = None,
year = None,
month = None,
day = None,
hour = None,
_min = None,
sec = None,
lat = None,
lon = None,
dep = None,
mag = None,
nrec = None,
id_event = None,
data_source= None,
phase = None,
epi_dist = None,
arr_time = None,
name_rec = None):
self._id_src = id_src
self._id_rec = id_rec
self._year = year
self._month = month
self._day = day
self._hour = hour
self._min = _min
self._sec = sec
self._lat = lat
self._lon = lon
self._dep = dep
self._mag = mag
self._nrec = nrec
self._id_event = id_event
self._data_source = data_source
self._phase = phase
self._epi_dist = epi_dist
self._arr_time = arr_time
self._name_rec = name_rec
def convert_to_pandas_df(event_list):
# conveert event_list to pandas dataframe
import pandas as pd
import datetime
df_ev = pd.DataFrame()
list_id_src = []
list_id_rec = []
list_year = []
list_month = []
list_day = []
list_hour = []
list__min = []
list_sec = []
list_lat = []
list_lon = []
list_dep = []
list_mag = []
list_nrec = []
list_id_event = []
list_data_source= []
list_phase = []
list_epi_dist = []
list_arr_time = []
list_name_rec = []
list_datetime = []
for ev in event_list:
list_id_src.append(ev._id_src)
list_id_rec.append(ev._id_rec)
list_year.append(ev._year)
list_month.append(ev._month)
list_day.append(ev._day)
list_hour.append(ev._hour)
list__min.append(ev._min)
list_sec.append(ev._sec)
list_lat.append(ev._lat)
list_lon.append(ev._lon)
list_dep.append(ev._dep)
list_mag.append(ev._mag)
list_nrec.append(ev._nrec)
list_id_event.append(ev._id_event)
list_data_source.append(ev._data_source)
list_phase.append(ev._phase)
list_epi_dist.append(ev._epi_dist)
list_arr_time.append(ev._arr_time)
list_name_rec.append(ev._name_rec)
try:
date_this = datetime.datetime(ev._year, ev._month, ev._day, ev._hour, ev._min, int(ev._sec))
except:
date_this = None
list_datetime.append(date_this)
# convert all the lists to pandas series
df_ev['id_src'] = pd.Series(list_id_src)
df_ev['id_rec'] = pd.Series(list_id_rec)
df_ev['year'] = pd.Series(list_year)
df_ev['month'] = pd.Series(list_month)
df_ev['day'] = pd.Series(list_day)
df_ev['hour'] = pd.Series(list_hour)
df_ev['min'] = pd.Series(list__min)
df_ev['sec'] = pd.Series(list_sec)
df_ev['lat'] = pd.Series(list_lat)
df_ev['lon'] = pd.Series(list_lon)
df_ev['dep'] = pd.Series(list_dep)
df_ev['mag'] = pd.Series(list_mag)
df_ev['nrec'] = pd.Series(list_nrec)
df_ev['id_event'] = pd.Series(list_id_event)
df_ev['data_source']= pd.Series(list_data_source)
df_ev['phase'] = pd.Series(list_phase)
df_ev['epi_dist'] = pd.Series(list_epi_dist)
df_ev['arr_time'] = pd.Series(list_arr_time)
df_ev['name_rec'] = pd.Series(list_name_rec)
df_ev['datetime'] = pd.Series(list_datetime)
return df_ev
# read file
def read_src_rec_file(fpath, two_station_names=False, data_source_flag=0, id_src_offset=0, no_epi_dist=False):
#fpath = "./src_rec_test_out.dat"
print ("read file: ", fpath)
event_list = []
rec_list = []
with open(fpath, "r") as f:
lines = f.readlines()
cc = 0
nc = 0
i_src = 0
# parse
for iline, line in enumerate(lines):
#print(line)
if line.startswith("#"):
continue
else:
if cc == 0:
try:
# firstly source line is read
ll = line.split()
#src_id = int(ll[0])
src_id = i_src + id_src_offset
src_year = int(ll[1])
src_month = int(ll[2])
src_day = int(ll[3])
src_hour = int(ll[4])
src_min = int(ll[5])
src_sec = float(ll[6])
src_lat = float(ll[7])
src_lon = float(ll[8])
src_dep = float(ll[9])
src_mag = float(ll[10])
src_nrec = int(ll[11])
src_id_event = ll[12]
nrec_tmp = src_nrec
# store source
if (nrec_tmp != 0):
#src = AttEvent(src_id, src_year, src_month, src_day, src_hour, src_min, src_sec, src_lat, src_lon, src_dep, src_mag, src_nrec, src_id_event, data_source_flag)
src = AttSrcRec(src_id, None, src_year, src_month, src_day, src_hour, src_min, src_sec, src_lat, src_lon, src_dep, src_mag, src_nrec, src_id_event, data_source_flag, None, None, None, None)
event_list.append(src)
cc+=1
else:
pass
except:
pass
else:
try:
# read rec line
ll = line.split()
if(not two_station_names):
#src_id = int(ll[0])
src_id = i_src + id_src_offset
rec_id = int(ll[1])
rec_name = ll[2]
rec_lat = float(ll[3])
rec_lon = float(ll[4])
rec_elev = float(ll[5])
rec_phase = ll[6]
if no_epi_dist:
rec_epi_dist = None
rec_arr_time = float(ll[7])
else:
rec_epi_dist = float(ll[7])
rec_arr_time = float(ll[8])
else:
#src_id = int(ll[0])
src_id = i_src + id_src_offset
rec_id = int(ll[1])
rec_name = ll[2] + "_" + ll[3]
rec_lat = float(ll[4])
rec_lon = float(ll[5])
rec_elev = float(ll[6])
rec_phase = ll[7]
if no_epi_dist:
rec_epi_dist = None
rec_arr_time = float(ll[8])
else:
rec_epi_dist = float(ll[8])
rec_arr_time = float(ll[9])
# store rec
#rec = AttArrival(src_id, rec_id, rec_name, rec_lat, rec_lon, rec_elev, rec_phase, rec_epi_dist, rec_arr_time)
#event_list[i_src].add_rec(rec)
rec = AttSrcRec(src_id, rec_id, None, None, None, None, None, None, rec_lat, rec_lon, rec_elev, None, None, None, data_source_flag, rec_phase, rec_epi_dist, rec_arr_time, rec_name)
rec_list.append(rec)
nc+=1
except:
print("error in line: ", iline)
print("error in line: " + line)
#return None
cc+=1
if cc > nrec_tmp:
cc = 0
i_src += 1
if nc == 0:
print("error: no rec found")
# erase last event
event_list.pop()
i_src -= 1
nc = 0
# return length of event_list
print("number of events: ", len(event_list))
print("number of recs: ", len(rec_list))
df_ev = convert_to_pandas_df(event_list)
df_rec = convert_to_pandas_df(rec_list)
return df_ev, df_rec
def write_src_rec_file(df_events, df_recs, fpath, no_epi_dist=True):
print ("write file: ", fpath)
f = open(fpath, 'w')
import tqdm
for i in tqdm.tqdm(range(len(df_events))):
# receivers of this event
recs_this_ev = df_recs[df_recs['id_src'] == df_events['id_src'].iloc[i]]
nrecs = len(recs_this_ev)
f.write("{} {} {} {} {} {} {} {} {} {} {} {} {}\n".format(i,
df_events['year'].iloc[i],
df_events['month'].iloc[i],
df_events['day'].iloc[i],
df_events['hour'].iloc[i],
df_events['min'].iloc[i],
df_events['sec'].iloc[i],
df_events['lat'].iloc[i],
df_events['lon'].iloc[i],
df_events['dep'].iloc[i],
df_events['mag'].iloc[i],
nrecs,
df_events['id_event'].iloc[i]))
# write receivers
for j in range(len(recs_this_ev)):
if no_epi_dist:
f.write(" {} {} {} {} {} {} {} {}\n".format( i,
recs_this_ev['id_rec'].iloc[j],
recs_this_ev['name_rec'].iloc[j],
recs_this_ev['lat'].iloc[j],
recs_this_ev['lon'].iloc[j],
recs_this_ev['dep'].iloc[j],
recs_this_ev['phase'].iloc[j],
recs_this_ev['arr_time'].iloc[j]))
else:
f.write(" {} {} {} {} {} {} {} {} {}\n".format( i,
recs_this_ev['id_rec'].iloc[j],
recs_this_ev['name_rec'].iloc[j],
recs_this_ev['lat'].iloc[j],
recs_this_ev['lon'].iloc[j],
recs_this_ev['dep'].iloc[j],
recs_this_ev['phase'].iloc[j],
recs_this_ev['epi_dist'].iloc[j],
recs_this_ev['arr_time'].iloc[j]))
#break
f.close()
if __name__ == "__main__":
event_list = read_src_rec_file("./src_rec_test_out.dat")
print(event_list[0].rec_list[0].name_rec)
print(event_list[0].rec_list[0].epi_dist)
print(event_list[0].rec_list[0].arr_time)
print(event_list[0].rec_list[0].id_rec)
print(event_list[0].rec_list[0].id_src)
print(event_list[0].rec_list[0].lat)
print(event_list[0].rec_list[0].lon)
print(event_list[0].rec_list[0].dep)
print(event_list[0].rec_list[0].phase)
print(event_list[0].rec_list[0].arr_time)
print(event_list[0].rec_list[1].name_rec)
print(event_list[0].rec_list[1].arr_time)
print(event_list[0].rec_list[1].id_rec)
print(event_list[0].rec_list[1].id_src)
print(event_list[0].rec_list[1].lat)
print(event_list[0].rec_list[1].lon)
print(event_list[0].rec_list[1].dep)
print(event_list[0].rec_list[1].phase)
print(event_list[0].rec_list[1].arr_time)
print(event_list[0].rec_list[2].name_rec)
print(event_list[0].rec_list[2].epi_dist)

View File

@@ -0,0 +1,228 @@
# functinos for post-processing output data in h5 file
import numpy
import h5py
def get_data_from_h5(fpath, fpath_grid, dataset_name, nr_glob, nt_glob, np_glob, ndiv_r, ndiv_t, ndiv_p, verbose=False):
"""
Output arrays has one overlapping layer between adjusted subdomains.
This function will skip those layers and reconstruct the entire grid dataset.
fpath: path to field data file
fpaht_grid: path to grid data file
dataset_name: name of dataset in h5 file
nr_glob: number of grid points in r direction
nt_glob: number of grid points in t direction
np_glob: number of grid points in p direction
ndiv_r: number of subdomains in r direction
ndiv_t: number of subdomains in t direction
ndiv_p: number of subdomains in p direction
verbose: print out information
"""
# total number of subdomains
n_sub = ndiv_r*ndiv_t*ndiv_p
# total points on each direction with overlap
nr_total_glob = nr_glob + ndiv_r - 1
nt_total_glob = nt_glob + ndiv_t - 1
np_total_glob = np_glob + ndiv_p - 1
# prepare a 3D array to store the data
data_glob = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
grid_glob_r = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
grid_glob_t = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
grid_glob_p = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
# open grid data file
fgrid = h5py.File(fpath_grid, 'r')
# open field data file
fdata = h5py.File(fpath, 'r')
#try:
# load data data by each subdomain
# offset
offset = 0
for ir_sub in range(ndiv_r):
for it_sub in range(ndiv_t):
for ip_sub in range(ndiv_p):
# number of data point for this sub domain
nr_sub = nr_glob//ndiv_r
nt_sub = nt_glob//ndiv_t
np_sub = np_glob//ndiv_p
# offset for each direction
offset_r = ir_sub*nr_sub
offset_t = it_sub*nt_sub
offset_p = ip_sub*np_sub
# add modulus to the last subdomains
if ir_sub == ndiv_r-1:
nr_sub += nr_glob%ndiv_r
if it_sub == ndiv_t-1:
nt_sub += nt_glob%ndiv_t
if ip_sub == ndiv_p-1:
np_sub += np_glob%ndiv_p
# add overlap layer if this subdomain is not the last one for each direction
if ir_sub != ndiv_r-1:
nr_sub += 1
if it_sub != ndiv_t-1:
nt_sub += 1
if ip_sub != ndiv_p-1:
np_sub += 1
# number of data point for this sub domain
n_points_total_sub = nr_sub*nt_sub*np_sub
# load data
data_sub = fdata[dataset_name][offset:offset+n_points_total_sub]
data_sub_p = fgrid["/Mesh/node_coords_p"][offset:offset+n_points_total_sub]
data_sub_t = fgrid["/Mesh/node_coords_t"][offset:offset+n_points_total_sub]
data_sub_r = fgrid["/Mesh/node_coords_r"][offset:offset+n_points_total_sub]
# reshape data
data_sub = data_sub.reshape(nr_sub, nt_sub, np_sub)
data_sub_p = data_sub_p.reshape(nr_sub, nt_sub, np_sub)
data_sub_t = data_sub_t.reshape(nr_sub, nt_sub, np_sub)
data_sub_r = data_sub_r.reshape(nr_sub, nt_sub, np_sub)
# put those data in global 3d array
data_glob[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = data_sub
grid_glob_p[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = data_sub_p
grid_glob_t[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = data_sub_t
grid_glob_r[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = data_sub_r
# update offset
offset += n_points_total_sub
return data_glob, grid_glob_r, grid_glob_t, grid_glob_p
#except:
# fdata.close()
# fgrid.close()
# print("error occured while reading file.")
# return -1
# output arrays has one overlapping layer between adjusted subdomains
# this function will skip those layers and reconstruct the entire grid dataset
def get_data_from_ascii(fpath, fpath_grid, nr_glob, nt_glob, np_glob, ndiv_r, ndiv_t, ndiv_p, verbose=False):
"""
fpath: path to ascii data file
fpath_grid: path to ascii grid data file
nr_glob: number of grid points in r direction
nt_glob: number of grid points in t direction
np_glob: number of grid points in p direction
ndiv_r: number of subdomains in r direction
ndiv_t: number of subdomains in t direction
ndiv_p: number of subdomains in p direction
verbose: print out information
"""
# total number of subdomains
n_sub = ndiv_r*ndiv_t*ndiv_p
# total points on each direction with overlap
nr_total_glob = nr_glob + ndiv_r - 1
nt_total_glob = nt_glob + ndiv_t - 1
np_total_glob = np_glob + ndiv_p - 1
# prepare a 3D array to store the data
data_glob = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
grid_glob_r = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
grid_glob_t = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
grid_glob_p = numpy.zeros((nr_glob,nt_glob,np_glob), dtype=numpy.float64)
# read data
data_tmp = numpy.loadtxt(fpath)
grid_tmp = numpy.loadtxt(fpath_grid)
# load data data by each subdomain
# offset
offset = 0
for ir_sub in range(ndiv_r):
for it_sub in range(ndiv_t):
for ip_sub in range(ndiv_p):
# number of data point for this sub domain
nr_sub = nr_glob//ndiv_r
nt_sub = nt_glob//ndiv_t
np_sub = np_glob//ndiv_p
# offset for each direction
offset_r = ir_sub*nr_sub
offset_t = it_sub*nt_sub
offset_p = ip_sub*np_sub
# add modulus to the last subdomains
if ir_sub == ndiv_r-1:
nr_sub += nr_glob%ndiv_r
if it_sub == ndiv_t-1:
nt_sub += nt_glob%ndiv_t
if ip_sub == ndiv_p-1:
np_sub += np_glob%ndiv_p
# add overlap layer if this subdomain is not the last one for each direction
if ir_sub != ndiv_r-1:
nr_sub += 1
if it_sub != ndiv_t-1:
nt_sub += 1
if ip_sub != ndiv_p-1:
np_sub += 1
# number of data point for this sub domain
n_points_total_sub = nr_sub*nt_sub*np_sub
# load data
data_sub = data_tmp[offset:offset+n_points_total_sub]
grid_sub_p = grid_tmp[offset:offset+n_points_total_sub,0]
grid_sub_t = grid_tmp[offset:offset+n_points_total_sub,1]
grid_sub_r = grid_tmp[offset:offset+n_points_total_sub,2]
# reshape data
data_sub = data_sub.reshape(nr_sub, nt_sub, np_sub)
grid_sub_p = grid_sub_p.reshape(nr_sub, nt_sub, np_sub)
grid_sub_t = grid_sub_t.reshape(nr_sub, nt_sub, np_sub)
grid_sub_r = grid_sub_r.reshape(nr_sub, nt_sub, np_sub)
# put those data in global 3d array
data_glob[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = data_sub
grid_glob_p[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = grid_sub_p
grid_glob_t[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = grid_sub_t
grid_glob_r[offset_r:offset_r+nr_sub, offset_t:offset_t+nt_sub, offset_p:offset_p+np_sub] = grid_sub_r
# update offset
offset += n_points_total_sub
return data_glob, grid_glob_r, grid_glob_t, grid_glob_p
if __name__ == '__main__':
# examples
fpath = './OUTPUT_FILES/out_data_sim_0.h5'
fpath_grid = './OUPUT_FILES/out_data_grid.h5'
nr = 10
nt = 10
np = 10
ndiv_r = 2
ndiv_t = 2
ndiv_p = 2
Ks_tomoatt = get_data_from_h5(fpath, fpath_grid, "Data/Ks_inv_0000", nr, nt, np, ndiv_r, ndiv_t, ndiv_p, verbose=True)