gctl/lib/io/text_io2.cpp

305 lines
8.2 KiB
C++
Raw Normal View History

2024-09-10 15:45:07 +08:00
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some tcription of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "text_io2.h"
gctl::text_content::text_content()
{
// 设置基础参数
att_sym_ = '#';
2024-11-21 12:43:10 +08:00
tag_sym_ = '!';
2024-09-10 15:45:07 +08:00
deli_sym_ = ' ';
head_num_ = 0;
2024-12-12 16:55:06 +08:00
row_num_ = 0;
col_num_ = 0;
2024-09-10 15:45:07 +08:00
}
gctl::text_content::~text_content()
{
clear();
}
2024-12-12 23:23:55 +08:00
gctl::text_content::text_content(std::string filename, std::string file_exten, text_head_type_e t)
2024-09-10 15:45:07 +08:00
{
// 设置基础参数
att_sym_ = '#';
2024-11-21 12:43:10 +08:00
tag_sym_ = '!';
2024-09-10 15:45:07 +08:00
deli_sym_ = ' ';
head_num_ = 0;
2024-12-12 16:55:06 +08:00
row_num_ = 0;
col_num_ = 0;
2024-09-10 15:45:07 +08:00
// 载入文本内容
2024-12-12 23:23:55 +08:00
load_text(filename, file_exten, t);
2024-09-10 15:45:07 +08:00
}
void gctl::text_content::clear()
{
2024-11-04 16:03:39 +08:00
destroy_vector(heads_);
destroy_vector(annotates_);
destroy_vector(tags_);
destroy_vector(lines_);
2024-12-12 23:23:55 +08:00
destroy_vector(col_names_);
2024-11-04 16:03:39 +08:00
destroy_vector(table_);
2024-09-10 15:45:07 +08:00
return;
}
2024-12-12 23:23:55 +08:00
void gctl::text_content::load_text(std::string filename, std::string file_exten, text_head_type_e t)
2024-09-10 15:45:07 +08:00
{
std::ifstream infile;
open_infile(infile, filename, file_exten);
clear();
std::string tmp_line;
std::stringstream tmp_ss;
2024-11-21 12:43:10 +08:00
for (int i = 0; i < head_num_; i++) // 跳过前n行 包括空行 但不会保存空行
2024-09-10 15:45:07 +08:00
{
std::getline(infile, tmp_line);
2024-11-04 16:03:39 +08:00
if (!tmp_line.empty()) heads_.push_back(tmp_line);
2024-09-10 15:45:07 +08:00
}
2024-12-13 10:11:47 +08:00
// 校正头信息行数
head_num_ = heads_.size();
2024-09-10 15:45:07 +08:00
while (std::getline(infile, tmp_line))
{
2024-11-21 12:43:10 +08:00
if (tmp_line.empty()) continue; // 跳过空行
else if (tmp_line[0] == att_sym_) // 注释行或者标记行 # #!
2024-09-10 15:45:07 +08:00
{
2024-11-21 12:43:10 +08:00
if (tmp_line[1] == tag_sym_) // #!
{
tmp_line = tmp_line.substr(2); // 去掉前两个字符
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
tmp_line.erase(tmp_line.find_last_not_of(" \t") + 1);
tags_.push_back(tmp_line);
continue;
}
// #
tmp_line = tmp_line.substr(1); // 去掉第一个字符
2024-09-10 15:45:07 +08:00
tmp_line.erase(0, tmp_line.find_first_not_of(" \t"));
tmp_line.erase(tmp_line.find_last_not_of(" \t") + 1);
annotates_.push_back(tmp_line);
}
2024-11-21 12:43:10 +08:00
else lines_.push_back(tmp_line);
2024-09-10 15:45:07 +08:00
}
infile.close();
2024-11-04 16:03:39 +08:00
2024-12-12 23:23:55 +08:00
if (t == HasColumnName)
{
parse_string_to_vector(lines_[0], deli_sym_, col_names_);
lines_.erase(lines_.begin());
}
2024-12-12 16:55:06 +08:00
row_num_ = lines_.size();
table_.resize(row_num_);
2024-11-04 16:03:39 +08:00
2024-12-12 16:55:06 +08:00
int cn;
std::vector<std::string> tmp_cols;
for (size_t i = 0; i < row_num_; i++)
{
tmp_cols.clear();
parse_string_to_vector(lines_[i], deli_sym_, tmp_cols);
cn = tmp_cols.size();
col_num_ = std::max(cn, col_num_);
table_[i].resize(tmp_cols.size());
for (size_t j = 0; j < tmp_cols.size(); j++)
2024-11-04 16:03:39 +08:00
{
2024-12-12 16:55:06 +08:00
table_[i][j].str_ = tmp_cols[j];
}
}
2024-11-04 16:03:39 +08:00
2024-12-12 16:55:06 +08:00
cell_content empty_cell;
empty_cell.str_ = "";
for (size_t i = 0; i < row_num_; i++)
{
cn = table_[i].size();
for (size_t j = cn; j < col_num_; j++)
{
table_[i].push_back(empty_cell);
2024-11-04 16:03:39 +08:00
}
}
2024-12-13 10:11:47 +08:00
for (size_t i = col_names_.size(); i < col_num_; i++)
{
col_names_.push_back("col-" + std::to_string(i + 1));
}
file_ = filename + file_exten;
return;
}
void gctl::text_content::load_csv(std::string filename, text_head_type_e t)
{
set_delimeter(',');
load_text(filename, ".csv", t);
2024-09-10 15:45:07 +08:00
return;
}
2024-12-12 16:55:06 +08:00
void gctl::text_content::save_text(std::string filename, std::string file_exten)
2024-09-10 15:45:07 +08:00
{
std::ofstream outfile;
open_outfile(outfile, filename, file_exten);
for (int i = 0; i < heads_.size(); i++)
{
outfile << heads_[i] << std::endl;
}
for (int i = 0; i < tags_.size(); i++)
{
2024-11-21 12:43:10 +08:00
outfile << "#! " << tags_[i] << std::endl;
2024-09-10 15:45:07 +08:00
}
for (int i = 0; i < annotates_.size(); i++)
{
outfile << "# " << annotates_[i] << std::endl;
}
2024-12-13 10:11:47 +08:00
outfile << col_names_[0];
for (size_t j = 1; j < col_names_.size(); j++)
2024-12-12 23:23:55 +08:00
{
2024-12-13 10:11:47 +08:00
outfile << deli_sym_ << col_names_[j];
2024-12-12 23:23:55 +08:00
}
2024-12-13 10:11:47 +08:00
outfile << std::endl;
2024-12-12 23:23:55 +08:00
2024-12-12 16:55:06 +08:00
for (int i = 0; i < row_num_; i++)
2024-09-10 15:45:07 +08:00
{
2024-12-12 16:55:06 +08:00
outfile << table_[i][0].str_;
for (size_t j = 1; j < col_num_; j++)
2024-11-04 16:03:39 +08:00
{
2024-12-12 16:55:06 +08:00
outfile << deli_sym_ << table_[i][j].str_;
2024-11-04 16:03:39 +08:00
}
2024-12-12 16:55:06 +08:00
outfile << std::endl;
2024-11-04 16:03:39 +08:00
}
2024-12-12 16:55:06 +08:00
outfile.close();
return;
}
2024-12-13 10:11:47 +08:00
void gctl::text_content::save_csv(std::string filename)
{
set_delimeter(',');
save_text(filename, ".csv");
return;
}
2024-12-12 16:55:06 +08:00
void gctl::text_content::init_table(int row, int col)
{
row_num_ = row;
col_num_ = col;
table_.resize(row_num_);
for (size_t i = 0; i < row_num_; i++)
2024-11-04 16:03:39 +08:00
{
2024-12-12 16:55:06 +08:00
table_[i].resize(col_num_);
for (size_t j = 0; j < col_num_; j++)
2024-11-04 16:03:39 +08:00
{
2024-12-12 16:55:06 +08:00
table_[i][j].str_ = "";
2024-11-04 16:03:39 +08:00
}
2024-09-10 15:45:07 +08:00
}
2024-12-13 10:11:47 +08:00
col_names_.resize(col_num_);
for (size_t i = 0; i < col_num_; i++)
{
col_names_[i] = "col-" + std::to_string(i + 1);
}
return;
}
void gctl::text_content::init_table(int row, int col, const std::vector<std::string> &names)
{
init_table(row, col);
set_column_names(names);
return;
}
void gctl::text_content::set_head_records(const std::vector<std::string> &heads)
{
heads_ = heads;
head_num_ = heads_.size();
return;
}
void gctl::text_content::set_annotoations(const std::vector<std::string> &att)
{
annotates_ = att;
return;
}
void gctl::text_content::set_tags(const std::vector<std::string> &tags)
{
tags_ = tags;
2024-09-10 15:45:07 +08:00
return;
2024-12-12 23:23:55 +08:00
}
2024-12-13 10:11:47 +08:00
void gctl::text_content::set_column_names(const std::vector<std::string> &names)
2024-12-12 23:23:55 +08:00
{
2024-12-13 10:11:47 +08:00
if (col_num_ != col_names_.size())
{
throw std::runtime_error("[gctl::text_content::set_column_names] Invalid name size.");
}
2024-12-12 23:23:55 +08:00
col_names_ = names;
return;
2024-12-13 10:11:47 +08:00
}
void gctl::text_content::cell(int r, int c, double d, int p)
{
std::stringstream ss;
if (p != 6) ss.precision(p);
ss << d;
ss >> table_[r][c].str_;
return;
}
void gctl::text_content::info()
{
std::clog << "File: " << file_ << "\n------------\n";
std::clog << "Head(s): " << head_num_ << "\n";
std::clog << "Annotation(s): " << annotates_.size() << "\n";
std::clog << "Tag(s): " << tags_.size() << "\n";
std::clog << "------------\nColumns:\n============\n";
if (col_names_.empty())
{
for (size_t i = 0; i < col_num_; i++)
{
std::clog << table_[0][i].str_ << " -> " << table_.back()[i].str_ << "\n";
}
}
else
{
for (size_t i = 0; i < col_num_; i++)
{
std::clog << col_names_[i] << ": " << table_[0][i].str_ << " -> " << table_.back()[i].str_ << "\n";
}
}
return;
2024-09-10 15:45:07 +08:00
}