gctl/lib/io/dsv_io.cpp
2024-12-28 21:02:10 +08:00

515 lines
13 KiB
C++

/********************************************************
* ██████╗ ██████╗████████╗██╗
* ██╔════╝ ██╔════╝╚══██╔══╝██║
* ██║ ███╗██║ ██║ ██║
* ██║ ██║██║ ██║ ██║
* ╚██████╔╝╚██████╗ ██║ ███████╗
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some tcription of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "dsv_io.h"
gctl::dsv_io::dsv_io()
{
file_ = "";
att_sym_ = '#';
tag_sym_ = '!';
deli_sym_ = ' ';
head_num_ = 0;
row_num_ = 0;
col_num_ = 0;
thead_ = ColumnHead;
}
gctl::dsv_io::~dsv_io()
{
clear();
}
gctl::dsv_io::dsv_io(std::string filename, std::string file_exten, table_headtype_e t)
{
file_ = "";
att_sym_ = '#';
tag_sym_ = '!';
deli_sym_ = ' ';
head_num_ = 0;
row_num_ = 0;
col_num_ = 0;
thead_ = ColumnHead;
if (file_exten == ".csv") load_csv(filename, t);
else load_text(filename, file_exten, t);
}
void gctl::dsv_io::clear()
{
file_ = "";
att_sym_ = '#';
tag_sym_ = '!';
deli_sym_ = ' ';
head_num_ = 0;
row_num_ = 0;
col_num_ = 0;
thead_ = ColumnHead;
destroy_vector(heads_);
destroy_vector(annotates_);
destroy_vector(tags_);
destroy_vector(table_);
destroy_vector(bool_table_);
return;
}
void gctl::dsv_io::load_text(std::string filename, std::string file_exten, table_headtype_e t)
{
std::ifstream infile;
open_infile(infile, filename, file_exten);
int h = 0;
std::string tmp_line;
std::vector<std::string> lines;
while (std::getline(infile, tmp_line))
{
if (tmp_line.empty()) continue; // 跳过空行 空行不会并记入头信息计数中
else if (tmp_line[0] == att_sym_) // 注释行或者标记行 # #!
{
if (tmp_line[1] == tag_sym_) // #!
{
tmp_line = tmp_line.substr(2); // 去掉前两个字符
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
tmp_line.erase(tmp_line.find_last_not_of(" \t") + 1);
tags_.push_back(tmp_line);
continue;
}
// #
tmp_line = tmp_line.substr(1); // 去掉第一个字符
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
tmp_line.erase(tmp_line.find_last_not_of(" \t") + 1);
annotates_.push_back(tmp_line);
}
else if (h < head_num_) //读入到头信息中
{
heads_.push_back(tmp_line);
h++;
}
else lines.push_back(tmp_line);
}
infile.close();
// 首先初始化行数
row_num_ = lines.size();
table_.resize(row_num_);
int cn;
std::vector<std::string> tmp_cols;
for (size_t i = 0; i < row_num_; i++)
{
tmp_cols.clear();
parse_string_to_vector(lines[i], deli_sym_, tmp_cols);
// 动态调整列数
cn = tmp_cols.size();
col_num_ = std::max(cn, col_num_);
table_[i].resize(tmp_cols.size());
for (size_t j = 0; j < tmp_cols.size(); j++)
{
table_[i][j].str_ = tmp_cols[j];
}
}
// 补齐可能的空格
table_cell empty_cell;
empty_cell.str_ = "";
for (size_t i = 0; i < row_num_; i++)
{
cn = table_[i].size();
for (size_t j = cn; j < col_num_; j++)
{
table_[i].push_back(empty_cell);
}
}
bool_table_.resize(row_num_);
for (size_t i = 0; i < row_num_; i++)
{
bool_table_[i].resize(col_num_);
for (size_t j = 0; j < col_num_; j++)
{
bool_table_[i][j] = true;
}
}
thead_ = t;
file_ = filename + file_exten;
destroy_vector(lines);
return;
}
void gctl::dsv_io::load_csv(std::string filename, table_headtype_e t)
{
set_delimeter(',');
load_text(filename, ".csv", t);
return;
}
void gctl::dsv_io::save_text(std::string filename, std::string file_exten)
{
std::ofstream outfile;
open_outfile(outfile, filename, file_exten);
for (int i = 0; i < heads_.size(); i++)
{
outfile << heads_[i] << std::endl;
}
for (int i = 0; i < tags_.size(); i++)
{
outfile << "#! " << tags_[i] << std::endl;
}
for (int i = 0; i < annotates_.size(); i++)
{
outfile << "# " << annotates_[i] << std::endl;
}
bool line_st;
for (int i = 0; i < row_num_; i++)
{
line_st = false;
if (bool_table_[i][0])
{
outfile << table_[i][0].str_;
line_st = true;
}
for (size_t j = 1; j < col_num_; j++)
{
if (line_st && bool_table_[i][j]) outfile << deli_sym_ << table_[i][j].str_; // line started
else if (bool_table_[i][j]) // line not started
{
outfile << table_[i][j].str_;
line_st = true; // start line
}
}
outfile << std::endl;
}
outfile.close();
return;
}
void gctl::dsv_io::save_csv(std::string filename)
{
set_delimeter(',');
save_text(filename, ".csv");
return;
}
void gctl::dsv_io::init_table(int row, int col, table_headtype_e t)
{
row_num_ = row;
col_num_ = col;
thead_ = t;
table_.resize(row_num_);
for (size_t i = 0; i < row_num_; i++)
{
table_[i].resize(col_num_);
for (size_t j = 0; j < col_num_; j++)
{
table_[i][j].str_ = "";
}
}
if (t == RowHead || t == BothHead)
{
for (size_t i = 0; i < row_num_; i++)
{
table_[i][0].str_ = "row-" + std::to_string(i);
}
}
if (t == ColumnHead || t == BothHead)
{
for (size_t i = 0; i < col_num_; i++)
{
table_[0][i].str_ = "col-" + std::to_string(i);
}
}
if (t == BothHead)
{
table_[0][0].str_ = "row-idx";
}
return;
}
void gctl::dsv_io::set_head_records(const std::vector<std::string> &heads)
{
heads_ = heads;
head_num_ = heads_.size();
return;
}
void gctl::dsv_io::set_annotoations(const std::vector<std::string> &att)
{
annotates_ = att;
return;
}
void gctl::dsv_io::set_tags(const std::vector<std::string> &tags)
{
tags_ = tags;
return;
}
void gctl::dsv_io::info()
{
std::clog << "File: " << file_ << "\n------------\n";
std::clog << "Head(s): " << head_num_ << "\n";
std::clog << "Annotation(s): " << annotates_.size() << "\n";
std::clog << "Tag(s): " << tags_.size() << "\n";
std::clog << "------------\nColumns:\n";
for (size_t i = 0; i < col_num_; i++)
{
if (thead_ == ColumnHead || thead_ == BothHead)
{
std::clog << table_[0][i].str_ << ": " << table_[1][i].str_ << " -> " << table_[row_num_ - 1][i].str_;
}
else
{
std::clog << table_[0][i].str_ << " -> " << table_[row_num_ - 1][i].str_;
}
if (!bool_table_[0][i]) std::clog << " (No output)";
std::clog << std::endl;
}
std::clog << "============\n";
return;
}
int gctl::dsv_io::name_index(std::string name, bool iter_row)
{
if (iter_row)
{
for (size_t i = 0; i < row_num_; i++)
{
if (table_[i][0].str_ == name) return i;
}
return -1;
}
else
{
for (size_t i = 0; i < col_num_; i++)
{
if (table_[0][i].str_ == name) return i;
}
return -1;
}
}
void gctl::dsv_io::column_output(int idx, switch_type_e s)
{
if (idx >= col_num_)
{
throw std::runtime_error("[gctl::dsv_io] Invalid column index.");
}
for (size_t i = 0; i < row_num_; i++)
{
if (s == Enable) bool_table_[i][idx] = true;
else bool_table_[i][idx] = false;
}
return;
}
void gctl::dsv_io::column_output(std::string name, switch_type_e s)
{
column_output(name_index(name), s);
return;
}
void gctl::dsv_io::row_output(int idx, switch_type_e s)
{
if (idx >= row_num_)
{
throw std::runtime_error("[gctl::dsv_io] Invalid row index.");
}
for (size_t i = 0; i < col_num_; i++)
{
if (s == Enable) bool_table_[idx][i] = true;
else bool_table_[idx][i] = false;
}
return;
}
void gctl::dsv_io::row_output(std::string name, switch_type_e s)
{
row_output(name_index(name), s);
return;
}
gctl::geodsv_io::geodsv_io(){}
gctl::geodsv_io::~geodsv_io(){}
gctl::geodsv_io::geodsv_io(std::string filename, std::string file_exten, table_headtype_e t)
{
file_ = "";
att_sym_ = '#';
tag_sym_ = '!';
deli_sym_ = ' ';
head_num_ = 0;
row_num_ = 0;
col_num_ = 0;
thead_ = ColumnHead;
if (file_exten == ".csv") load_csv(filename, t);
else load_text(filename, file_exten, t);
}
void gctl::geodsv_io::fill_column_point2dc(int xid, int yid, const array<point2dc> &data, int p)
{
if (xid >= col_num_ || yid >= col_num_ || xid == yid)
{
throw std::runtime_error("[gctl::geodsv_io] Invalid column index.");
}
int st = 0;
if (thead_ == ColumnHead || thead_ == BothHead) st = 1;
std::stringstream ss;
std::string s;
for (size_t i = 0; i < std::min(row_num_ - st, (int) data.size()); i++)
{
ss.clear();
ss << data[i].x;
ss >> s;
table_[i + st][xid].str_ = s;
ss.clear();
ss << data[i].y;
ss >> s;
table_[i + st][yid].str_ = s;
}
return;
}
void gctl::geodsv_io::fill_column_point2dc(std::string xname, std::string yname, const array<point2dc> &data, int p)
{
fill_column_point2dc(name_index(xname, false), name_index(yname, false), data, p);
return;
}
void gctl::geodsv_io::fill_column_point3dc(int xid, int yid, int zid, const array<point3dc> &data, int p)
{
if (xid >= col_num_ || yid >= col_num_ || zid >= col_num_ || xid == yid || yid == zid || xid == zid)
{
throw std::runtime_error("[gctl::geodsv_io] Invalid column index.");
}
int st = 0;
if (thead_ == ColumnHead || thead_ == BothHead) st = 1;
std::stringstream ss;
std::string s;
for (size_t i = 0; i < std::min(row_num_ - st, (int) data.size()); i++)
{
ss.clear();
ss << data[i].x;
ss >> s;
table_[i + st][xid].str_ = s;
ss.clear();
ss << data[i].y;
ss >> s;
table_[i + st][yid].str_ = s;
ss.clear();
ss << data[i].z;
ss >> s;
table_[i + st][zid].str_ = s;
}
return;
}
void gctl::geodsv_io::fill_column_point3dc(std::string xname, std::string yname, std::string zname, const array<point3dc> &data, int p)
{
fill_column_point3dc(name_index(xname, false), name_index(yname, false), name_index(zname, false), data, p);
return;
}
void gctl::geodsv_io::get_column_point2dc(int xid, int yid, array<point2dc> &data)
{
if (xid >= col_num_ || yid >= col_num_ || xid == yid)
{
throw std::runtime_error("[gctl::geodsv_io] Invalid column index.");
}
int st = 0;
if (thead_ == ColumnHead || thead_ == BothHead) st = 1;
data.resize(row_num_ - st);
for (size_t i = st; i < row_num_; i++)
{
data[i - st].x = table_[i][xid].value<double>();
data[i - st].y = table_[i][yid].value<double>();
}
return;
}
void gctl::geodsv_io::get_column_point2dc(std::string xname, std::string yname, array<point2dc> &data)
{
get_column_point2dc(name_index(xname, false), name_index(yname, false), data);
return;
}
void gctl::geodsv_io::get_column_point3dc(int xid, int yid, int zid, array<point3dc> &data)
{
if (xid >= col_num_ || yid >= col_num_ || zid >= col_num_ || xid == yid || yid == zid || xid == zid)
{
throw std::runtime_error("[gctl::geodsv_io] Invalid column index.");
}
int st = 0;
if (thead_ == ColumnHead || thead_ == BothHead) st = 1;
data.resize(row_num_ - st);
for (size_t i = st; i < row_num_; i++)
{
data[i - st].x = table_[i][xid].value<double>();
data[i - st].y = table_[i][yid].value<double>();
data[i - st].z = table_[i][zid].value<double>();
}
return;
}
void gctl::geodsv_io::get_column_point3dc(std::string xname, std::string yname, std::string zname, array<point3dc> &data)
{
get_column_point3dc(name_index(xname, false), name_index(yname, false), name_index(zname, false), data);
return;
}