gctl/lib/io/text_io.cpp
2024-09-16 14:28:08 +08:00

488 lines
13 KiB
C++

/********************************************************
* ██████╗ ██████╗████████╗██╗
* ██╔════╝ ██╔════╝╚══██╔══╝██║
* ██║ ███╗██║ ██║ ██║
* ██║ ██║██║ ██║ ██║
* ╚██████╔╝╚██████╗ ██║ ███████╗
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "text_io.h"
#include "cmath"
gctl::text_descriptor::text_descriptor()
{
set();
}
gctl::text_descriptor::text_descriptor(std::string filename, std::string file_exten,
std::string col_order, char deli, char att_sym, char tag_sym, int h_num, int io_psn)
{
set(filename, file_exten, col_order, deli, att_sym, tag_sym, h_num, io_psn);
}
gctl::text_descriptor::~text_descriptor()
{
clear();
}
void gctl::text_descriptor::set(std::string filename, std::string file_exten,
std::string col_order, char deli, char att_sym, char tag_sym, int h_num, int io_psn)
{
file_name_ = filename;
file_ext_ = file_exten;
col_str_ = col_order;
att_sym_ = att_sym;
tag_sym_ = tag_sym;
delimiter_ = deli;
head_num_ = h_num;
float_psn_ = io_psn;
return;
}
void gctl::text_descriptor::clear()
{
head_strs_.clear();
annotates_.clear();
tags_.clear();
return;
}
void gctl::text_descriptor::set_float_precision(int psn)
{
float_psn_ = psn;
return;
}
void gctl::text_descriptor::set_head_number(int n)
{
head_num_ = n;
return;
}
void gctl::text_descriptor::set_annotation_symbol(char sym)
{
att_sym_ = sym;
return;
}
void gctl::text_descriptor::set_tag_symbol(char sym)
{
tag_sym_ = sym;
return;
}
void gctl::text_descriptor::set_column_delimeter(char sym)
{
delimiter_ = sym;
return;
}
void gctl::text_descriptor::set_file_extension(std::string ext_s)
{
file_ext_ = ext_s;
return;
}
void gctl::text_descriptor::set_column_order(std::string col_s)
{
col_str_ = col_s;
return;
}
void gctl::read_text_lines(text_descriptor &file_desc, std::vector<std::string> &out_vec, int start_line, int end_line)
{
if (start_line < 0 || start_line > end_line)
{
throw invalid_argument("Invalid line numbers. From gctl::readlines(...)");
}
std::ifstream infile;
open_infile(infile, file_desc.file_name_, file_desc.file_ext_);
if (!out_vec.empty()) out_vec.clear();
std::string tmp_line;
if (start_line == 0 && end_line == 0)
{
for (size_t i = 0; i < file_desc.head_num_; i++)
{
getline(infile, tmp_line);
}
size_t tl = 0, al = 0, gl = 0;
while (getline(infile, tmp_line))
{
if (tmp_line[0] == file_desc.att_sym_) al++;
else if (tmp_line[0] == file_desc.tag_sym_) gl++;
else tl++;
}
out_vec.reserve(tl);
infile.clear(std::ios::goodbit);
infile.seekg(0, std::ios::beg);
file_desc.annotates_.reserve(al);
file_desc.tags_.reserve(gl);
file_desc.head_strs_.reserve(1024);
for (size_t i = 0; i < file_desc.head_num_; i++)
{
getline(infile, tmp_line);
file_desc.head_strs_.push_back(tmp_line);
}
while (getline(infile, tmp_line))
{
if (tmp_line[0] == file_desc.att_sym_)
{
// remove first symbol and space
tmp_line.erase(0, tmp_line.find_first_not_of(file_desc.att_sym_));
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
file_desc.annotates_.push_back(tmp_line);
}
else if (tmp_line[0] == file_desc.tag_sym_)
{
// remove first symbol and space
tmp_line.erase(0, tmp_line.find_first_not_of(file_desc.tag_sym_));
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
file_desc.tags_.push_back(tmp_line);
}
else out_vec.push_back(tmp_line);
}
infile.close();
return;
}
out_vec.reserve(end_line - start_line + 1);
file_desc.annotates_.reserve(1024);
file_desc.tags_.reserve(1024);
file_desc.head_strs_.reserve(1024);
for (int i = 0; i < start_line + file_desc.head_num_; i++)
{
std::getline(infile, tmp_line);
file_desc.head_strs_.push_back(tmp_line);
}
for (int i = start_line; i <= end_line; i++)
{
std::getline(infile, tmp_line);
if (tmp_line[0] == file_desc.att_sym_)
{
// remove first symbol and space
tmp_line.erase(0, tmp_line.find_first_not_of(file_desc.att_sym_));
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
file_desc.annotates_.push_back(tmp_line);
}
else if (tmp_line[0] == file_desc.tag_sym_)
{
// remove first symbol and space
tmp_line.erase(0, tmp_line.find_first_not_of(file_desc.tag_sym_));
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
file_desc.tags_.push_back(tmp_line);
}
else out_vec.push_back(tmp_line);
}
infile.close();
return;
}
void gctl::read_annotations(std::string filename, std::vector<std::string> &out_vec,
std::string file_ext, char annotate)
{
std::ifstream infile;
open_infile(infile, filename, file_ext);
if (!out_vec.empty()) out_vec.clear();
std::string tmp_line;
while (std::getline(infile, tmp_line))
{
if (!tmp_line.empty() && tmp_line[0] == annotate)
{
// 去掉注释符
tmp_line = tmp_line.substr(1, tmp_line.length());
// 去掉可能存在的头尾的空白符号
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
tmp_line.erase(tmp_line.find_last_not_of(" \t") + 1);
// 保存到字符串数组
out_vec.push_back(tmp_line);
}
}
if (out_vec.empty())
{
GCTL_ShowWhatError("No annotations found, From read_annotations(...)",
GCTL_WARNING_ERROR, 0, 0, 0);
}
infile.close();
return;
}
void gctl::get_xyz_points(const _2d_vector &data_table, array<point3dc> &out_ps, std::string order)
{
//默认的读入的数据列为前三列
int orders[3] = {0, 1, 2};
std::stringstream tmp_ss;
if (order != "0,1,2")
{
str2ss(order, tmp_ss, ",");
tmp_ss >> orders[0] >> orders[1] >> orders[2];
if (tmp_ss.fail())
{
throw runtime_error("Fail to read column orders. From gctl::get_xyz_points(...)");
}
}
int max_col = GCTL_MAX(GCTL_MAX(orders[0], orders[1]), orders[2]);
for (int i = 0; i < data_table.size(); i++)
{
if (data_table[i].size() < max_col+1)
{
throw runtime_error("Invalid column size. From gctl::get_xyz_points(...)");
}
}
// convert to point3dc
out_ps.resize(data_table.size());
for (int i = 0; i < out_ps.size(); i++)
{
out_ps.at(i).x = data_table.at(i).at(orders[0]);
out_ps.at(i).y = data_table.at(i).at(orders[1]);
out_ps.at(i).z = data_table.at(i).at(orders[2]);
}
return;
}
void gctl::get_xyz_points(const _2d_vector &data_table, array<point3ds> &out_ps, std::string order)
{
//默认的读入的数据列为前三列
int orders[3] = {0, 1, 2};
std::stringstream tmp_ss;
if (order != "0,1,2")
{
str2ss(order, tmp_ss, ",");
tmp_ss >> orders[0] >> orders[1] >> orders[2];
if (tmp_ss.fail())
{
throw runtime_error("Fail to read column orders. From gctl::get_xyz_points(...)");
}
}
int max_col = GCTL_MAX(GCTL_MAX(orders[0], orders[1]), orders[2]);
for (int i = 0; i < data_table.size(); i++)
{
if (data_table[i].size() < max_col+1)
{
throw runtime_error("Invalid column size. From gctl::get_xyz_points(...)");
}
}
// convert to point3dc
out_ps.resize(data_table.size());
for (int i = 0; i < out_ps.size(); i++)
{
out_ps.at(i).lon = data_table.at(i).at(orders[0]);
out_ps.at(i).lat = data_table.at(i).at(orders[1]);
out_ps.at(i).rad = data_table.at(i).at(orders[2]);
}
return;
}
void gctl::get_xyz_points(std::string filename, array<point3dc> &out_ps, text_descriptor &desc,
std::string order, std::string file_ext)
{
// read file
_2d_vector points_vec;
read_text2vector2d(desc, points_vec);
get_xyz_points(points_vec, out_ps, order);
destroy_vector(points_vec);
return;
}
void gctl::get_xyz_points(std::string filename, array<point3ds> &out_ps, text_descriptor &desc,
std::string order, std::string file_ext)
{
// read file
_2d_vector points_vec;
read_text2vector2d(desc, points_vec);
get_xyz_points(points_vec, out_ps, order);
destroy_vector(points_vec);
return;
}
/**
* @brief 读取数据列
*
* @param[in] data_table 二维数据向量
* @param[in] dat_val 输入的数据数组列表
* @param[in] dat_col 输入的数据列(数据列不足时会按从左到右顺序读入)
*/
void gctl::get_data_column(const _2d_vector &data_table, std::initializer_list<array<double>*> dat_val,
std::initializer_list<int> dat_col)
{
// 初始化数据列
array<int> orders(dat_val.size());
for (int i = 0; i < dat_val.size(); i++)
{
orders[i] = i;
}
int c_count = 0;
std::initializer_list<int>::iterator ii;
for (ii = dat_col.begin(); ii != dat_col.end(); ++ii)
{
orders[c_count] = *ii;
c_count++;
}
c_count = 0;
std::initializer_list<array<double>*>::iterator id;
array<double> *dat_ptr;
for (id = dat_val.begin(); id != dat_val.end(); ++id)
{
dat_ptr = *id;
dat_ptr->resize(data_table.size());
for (int i = 0; i < data_table.size(); i++)
{
dat_ptr->at(i) = data_table[i][orders[c_count]];
}
c_count++;
}
return;
}
void gctl::get_data_column(std::string filename, std::initializer_list<array<double>*> dat_val,
std::initializer_list<int> dat_col, text_descriptor &desc, std::string file_ext)
{
// read file
_2d_vector data_vec;
read_text2vector2d(desc, data_vec);
get_data_column(data_vec, dat_val, dat_col);
destroy_vector(data_vec);
return;
}
void gctl::save_data_column(std::ofstream &outfile, std::initializer_list<array<double>* > dat_val,
std::initializer_list<std::string> dat_name, char delimiter, int precision)
{
std::initializer_list<std::string>::iterator is;
outfile << "#";
for (is = dat_name.begin(); is != dat_name.end(); ++is)
{
outfile << delimiter << *is;
}
outfile << std::endl;
size_t d_num = dat_val.size(), d_size = 0;
std::initializer_list<array<double>*>::iterator id;
array<double> *dat_ptr;
for (id = dat_val.begin(); id != dat_val.end(); ++id)
{
dat_ptr = *id;
d_size = GCTL_MAX(d_size, dat_ptr->size());
}
_2d_matrix out_data(d_num, d_size, NAN);
size_t d_count = 0;
for (id = dat_val.begin(); id != dat_val.end(); ++id)
{
dat_ptr = *id;
for (size_t j = 0; j < dat_ptr->size(); j++)
{
out_data[d_count][j] = dat_ptr->at(j);
}
d_count++;
}
for (size_t i = 0; i < d_size; i++)
{
outfile << i + 1;
for (size_t j = 0; j < d_num; j++)
{
outfile << std::setprecision(precision) << delimiter << out_data[j][i];
}
outfile << std::endl;
}
return;
}
void gctl::save_data_column(std::ofstream &outfile, std::initializer_list<std::vector<double>* > dat_val,
std::initializer_list<std::string> dat_name, char delimiter, int precision)
{
std::initializer_list<std::string>::iterator is;
outfile << "#";
for (is = dat_name.begin(); is != dat_name.end(); ++is)
{
outfile << delimiter << *is;
}
outfile << std::endl;
size_t d_num = dat_val.size(), tmp_size, d_size = 0;
std::initializer_list<std::vector<double>* >::iterator id;
std::vector<double> *dat_ptr;
for (id = dat_val.begin(); id != dat_val.end(); ++id)
{
dat_ptr = *id;
tmp_size = dat_ptr->size(); // 超细节 size()返回的值类型为 unsigned int 不要直接和 int 做比较
d_size = GCTL_MAX(d_size, tmp_size);
}
if (d_size <= 0)
{
std::string err_str = "Invalid maximal data size (" + std::to_string(d_size) + "), From gctl::save_data_column(...)";
throw runtime_error(err_str);
}
_2d_matrix out_data(d_num, d_size, NAN);
size_t d_count = 0;
for (id = dat_val.begin(); id != dat_val.end(); ++id)
{
dat_ptr = *id;
for (size_t j = 0; j < dat_ptr->size(); j++)
{
out_data[d_count][j] = dat_ptr->at(j);
}
d_count++;
}
for (size_t i = 0; i < d_size; i++)
{
outfile << i + 1;
for (size_t j = 0; j < d_num; j++)
{
outfile << std::setprecision(precision) << delimiter << out_data[j][i];
}
outfile << std::endl;
}
return;
}