update dsv_io

This commit is contained in:
张壹 2025-02-12 13:19:22 +08:00
parent fc34ed3e2c
commit 2802086a76
5 changed files with 182 additions and 328 deletions

View File

@ -32,27 +32,19 @@ using namespace gctl;
int main(int argc, char const *argv[]) try int main(int argc, char const *argv[]) try
{ {
dsv_io tc, tout; dsv_io tc;
tc.delimeter('|'); tc.delimeter('|');
tc.head_number(1); tc.head_number(1);
tc.load_text("tmp/world_data", ".txt", ColHead|RowHead); tc.load_text("tmp/world_data", ".txt", ColHead|RowHead);
tc.info(AttInfo|HeadInfo|TagInfo|ColInfo|RowInfo); tc.info(AttInfo|HeadInfo|TagInfo|ColInfo);
//tc.set_column_type(Int, "IndepYear_n"); tc.filter("America", "Continent_s", ColHead);
//tc.filt_column("IndepYear_n < 0", {"IndepYear_n"}, {"Name_s", "Population_n", "GNP_n"}, tout); tc.save_text("out");
tc.filt_column("America", "Continent_s", {"Name_s", "Population_n", "GNP_n"}, tout); dsv_io tc2 = tc.export_table();
//tc.match_column("America", "Continent_s", {}, tout); tc2.head_records(tc.head_records());
tc2.delimeter('|');
//tout.add_column("GNP_n2", "Population_n"); tc2.save_text("out2");
//array<int> GNP_n2(tout.row_number(), 1000.0);
//tout.fill_column(GNP_n2, "GNP_n2");
int lr_id = tout.add_row();
tout.fill_row(array<std::string>{"Asia", "China", "14000000", "1949"}, lr_id);
tout.delimeter('|');
tout.save_text("out");
/* /*
geodsv_io tc; geodsv_io tc;

View File

@ -342,26 +342,21 @@ void gctl::dsv_io::save_text(std::string filename, std::string file_exten)
outfile << "# " << annotates_[i] << std::endl; outfile << "# " << annotates_[i] << std::endl;
} }
// 探测是否有行头
bool col_st = 1;
for (int i = 0; i <= row_num_; i++) for (int i = 0; i <= row_num_; i++)
{ {
if (table_[i][0].out_ok_ && table_[i][0].str_ != "") for (size_t j = 0; j <= col_num_; j++)
{ {
col_st = 0; if (table_[i][j].out_ok_ && table_[i][j].str_!= "")
{
outfile << table_[i][j].str_;
for (size_t k = j + 1; k <= col_num_; k++)
{
if (table_[i][k].out_ok_) outfile << deli_sym_ << table_[i][k].str_;
}
outfile << std::endl;
break; break;
} }
} }
for (int i = 0; i <= row_num_; i++)
{
// 单独处理第一列 即行头
outfile << table_[i][col_st].str_;
for (int j = col_st + 1; j <= col_num_; j++)
{
if (table_[i][j].out_ok_) outfile << deli_sym_ << table_[i][j].str_;
}
outfile << std::endl;
} }
outfile.close(); outfile.close();
@ -391,6 +386,47 @@ void gctl::dsv_io::init_table(int row, int col)
return; return;
} }
gctl::dsv_io gctl::dsv_io::export_table(bool ignore_disabled)
{
std::vector<std::string> str_line, row_names, col_names;
std::vector<std::vector<std::string> > str_table;
std::string cor_name = table_[0][0].str_;
for (size_t j = 1; j <= col_num_; j++)
{
if (table_[0][j].out_ok_ || !ignore_disabled)
col_names.push_back(table_[0][j].str_);
}
for (size_t i = 1; i <= row_num_; i++)
{
if (table_[i][0].out_ok_ || !ignore_disabled)
{
str_line.clear();
for (size_t j = 1; j <= col_num_; j++)
{
if (table_[i][j].out_ok_ || !ignore_disabled)
str_line.push_back(table_[i][j].str_);
}
str_table.push_back(str_line);
row_names.push_back(table_[i][0].str_);
}
}
dsv_io out_table;
out_table.init_table(str_table);
out_table.row_names(row_names, {}, cor_name);
out_table.column_names(col_names);
destroy_vector(row_names);
destroy_vector(col_names);
destroy_vector(str_line);
destroy_vector(str_table);
return out_table;
}
void gctl::dsv_io::info(int t) void gctl::dsv_io::info(int t)
{ {
if (t & HeadInfo) if (t & HeadInfo)
@ -521,6 +557,19 @@ int gctl::dsv_io::name_index(std::string name, bool iter_row)
} }
} }
void gctl::dsv_io::table_output(switch_type_e s)
{
for (size_t i = 0; i <= row_num_; i++)
{
for (size_t j = 0; j <= col_num_; j++)
{
if (s == Enable) table_[i][j].out_ok_ = true;
else table_[i][j].out_ok_ = false;
}
}
return;
}
void gctl::dsv_io::column_output(int idx, switch_type_e s) void gctl::dsv_io::column_output(int idx, switch_type_e s)
{ {
if (idx > col_num_ || idx <= 0) if (idx > col_num_ || idx <= 0)
@ -627,162 +676,63 @@ int gctl::dsv_io::add_row(std::string name, std::string id_name)
return add_row(name, name_index(id_name, true)); return add_row(name, name_index(id_name, true));
} }
void gctl::dsv_io::filt_column(std::string cnd_str, std::string cnd_col, void gctl::dsv_io::filter(std::string cnd_str, std::string cnd_tar, table_headtype_e thead)
const std::vector<std::string> &out_col, dsv_io &out_table)
{ {
int idx = name_index(cnd_col); int idx;
if (idx < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name."); if (thead == RowHead) idx = name_index(cnd_tar, true);
else if (thead == ColHead) idx = name_index(cnd_tar);
else throw std::runtime_error("[gctl::dsv_io::filter] Invalid table head type.");
array<int> odx; if (idx < 0) throw std::runtime_error("[gctl::dsv_io::filter] Invalid row/column index or name.");
bool out_row = false;
if (out_col.empty()) out_row = true;
else
{
odx.resize(out_col.size());
for (size_t i = 0; i < out_col.size(); i++)
{
odx[i] = name_index(out_col[i]);
if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
}
}
std::smatch ret; std::smatch ret;
std::regex pat(cnd_str); std::regex pat(cnd_str);
std::vector<std::string> str_line, row_names; if (thead == RowHead) // cnd_tar是行头 此时为按列过滤
std::vector<std::vector<std::string> > str_table; {
for (size_t i = 1; i <= col_num_; i++)
{
if (!regex_search(table_[idx][i].str_, ret, pat))
{
column_output(i, Disable);
}
}
}
else // cnd_tar是列头 此时为按行过滤
{
for (size_t i = 1; i <= row_num_; i++) for (size_t i = 1; i <= row_num_; i++)
{ {
if (regex_search(table_[i][idx].str_, ret, pat)) if (!regex_search(table_[i][idx].str_, ret, pat))
{ {
if (out_row) row_output(i, Disable);
{
str_line.clear();
for (size_t j = 1; j <= col_num_; j++)
{
str_line.push_back(table_[i][j].str_);
}
str_table.push_back(str_line);
}
else
{
str_line.clear();
str_line.push_back(table_[i][idx].str_);
for (size_t j = 0; j < odx.size(); j++)
{
str_line.push_back(table_[i][odx[j]].str_);
}
str_table.push_back(str_line);
}
row_names.push_back(table_[i][0].str_);
} }
} }
out_table.init_table(str_table);
std::vector<std::string> io_col;
if (out_row)
{
column_names(io_col);
out_table.cell(table_[0][0].str_, 0, 0);
} }
else
{
io_col.push_back(cnd_col);
for (size_t j = 0; j < odx.size(); j++)
{
io_col.push_back(out_col[j]);
}
}
out_table.column_names(io_col);
out_table.row_names(row_names, {}, table_[0][0].str_);
destroy_vector(row_names);
destroy_vector(io_col);
destroy_vector(str_line);
destroy_vector(str_table);
return; return;
} }
void gctl::dsv_io::filt_column(rowbool_func_t func, const std::vector<std::string> &out_col, dsv_io &out_table) void gctl::dsv_io::filter(linebool_func_t func, table_headtype_e thead)
{ {
array<int> odx; if (thead == RowHead)
bool out_row = false;
if (out_col.empty()) out_row = true;
else
{ {
odx.resize(out_col.size());
for (size_t i = 0; i < out_col.size(); i++)
{
odx[i] = name_index(out_col[i]);
if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
}
}
std::vector<std::string> str_line, row_names;
std::vector<std::vector<std::string> > str_table;
for (size_t i = 1; i <= row_num_; i++) for (size_t i = 1; i <= row_num_; i++)
{ {
if (func(table_[i])) if (!func(table_[i])) row_output(i, Disable);
}
}
else if (thead == ColHead)
{ {
if (out_row) std::vector<table_cell> col_cell(row_num_);
for (size_t i = 1; i <= col_num_; i++)
{ {
str_line.clear(); for (size_t j = 1; j < row_num_; j++)
for (size_t j = 1; j <= col_num_; j++)
{ {
str_line.push_back(table_[i][j].str_); col_cell[j] = table_[j][i];
} }
str_table.push_back(str_line); if (!func(col_cell)) column_output(i, Disable);
}
else
{
str_line.clear();
for (size_t j = 0; j < odx.size(); j++)
{
str_line.push_back(table_[i][odx[j]].str_);
}
str_table.push_back(str_line);
}
row_names.push_back(table_[i][0].str_);
} }
} }
else throw std::runtime_error("[gctl::dsv_io::filter] Invalid table head type.");
out_table.init_table(str_table);
std::vector<std::string> io_col;
if (out_row)
{
column_names(io_col);
out_table.cell(table_[0][0].str_, 0, 0);
}
else
{
for (size_t j = 0; j < odx.size(); j++)
{
io_col.push_back(out_col[j]);
}
}
out_table.column_names(io_col);
out_table.row_names(row_names, {}, table_[0][0].str_);
destroy_vector(row_names);
destroy_vector(io_col);
destroy_vector(str_line);
destroy_vector(str_table);
return; return;
} }
@ -834,41 +784,45 @@ void gctl::dsv_io::cal_column(std::string expr_str, const std::vector<std::strin
return; return;
} }
void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector<std::string> &cnd_col, void gctl::dsv_io::filter(std::string cnd_str, const std::vector<std::string> &cnd_tars, table_headtype_e thead)
const std::vector<std::string> &out_col, dsv_io& out_table)
{ {
array<int> idx(cnd_col.size()); array<int> idx(cnd_tars.size());
for (size_t i = 0; i < cnd_col.size(); i++) if (thead == RowHead)
{ {
idx[i] = name_index(cnd_col[i]); for (size_t i = 0; i < cnd_tars.size(); i++)
{
idx[i] = name_index(cnd_tars[i], true);
if (idx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name."); if (idx[i] <= 0 || idx[i] > row_num_) throw std::runtime_error("[gctl::dsv_io::filter] Invalid row index or name.");
if (table_[idx[i]][0].type_ != Int && table_[idx[i]][0].type_ != Float)
{
throw std::runtime_error("[gctl::dsv_io::filter] Invalid row type for numerical calculating.");
}
}
}
else if (thead == ColHead)
{
for (size_t i = 0; i < cnd_tars.size(); i++)
{
idx[i] = name_index(cnd_tars[i]);
if (idx[i] <= 0 || idx[i] > col_num_) throw std::runtime_error("[gctl::dsv_io::filter] Invalid column index or name.");
if (table_[0][idx[i]].type_ != Int && table_[0][idx[i]].type_ != Float) if (table_[0][idx[i]].type_ != Int && table_[0][idx[i]].type_ != Float)
{ {
throw std::runtime_error("[gctl::dsv_io] Invalid column type for numerical calculating."); throw std::runtime_error("[gctl::dsv_io::filter] Invalid column type for numerical calculating.");
} }
} }
array<int> odx;
bool out_row = false;
if (out_col.empty()) out_row = true;
else
{
odx.resize(out_col.size());
for (size_t i = 0; i < out_col.size(); i++)
{
odx[i] = name_index(out_col[i]);
if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
}
} }
else throw std::runtime_error("[gctl::dsv_io::filter] Invalid table head type.");
exprtk::symbol_table<double> symbol_table; exprtk::symbol_table<double> symbol_table;
array<double> var(cnd_col.size()); array<double> var(cnd_tars.size());
for (size_t i = 0; i < var.size(); i++) for (size_t i = 0; i < var.size(); i++)
{ {
symbol_table.add_variable(cnd_col[i], var[i]); symbol_table.add_variable(cnd_tars[i], var[i]);
} }
exprtk::expression<double> expression; exprtk::expression<double> expression;
@ -880,9 +834,21 @@ void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector<std::strin
throw std::runtime_error("[gctl::dsv_io] Fail to compile the math expression."); throw std::runtime_error("[gctl::dsv_io] Fail to compile the math expression.");
} }
std::vector<std::string> str_line, row_names; if (thead == RowHead) // cnd_tars是行头 此时为按列过滤
std::vector<std::vector<std::string> > str_table; {
for (size_t i = 1; i <= col_num_; i++)
{
for (size_t j = 0; j < var.size(); j++)
{
var[j] = table_[idx[j]][i].value<double>();
}
// return 1 if matched or 0 if dismatched
if (expression.value() < 0.5) column_output(i, Disable);
}
}
else // cnd_tars是列头 此时为按行过滤
{
for (size_t i = 1; i <= row_num_; i++) for (size_t i = 1; i <= row_num_; i++)
{ {
for (size_t j = 0; j < var.size(); j++) for (size_t j = 0; j < var.size(); j++)
@ -890,67 +856,10 @@ void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector<std::strin
var[j] = table_[i][idx[j]].value<double>(); var[j] = table_[i][idx[j]].value<double>();
} }
if (expression.value() > 0.5) // return 1 if matched or 0 if dismatched // return 1 if matched or 0 if dismatched
{ if (expression.value() < 0.5) row_output(i, Disable);
if (out_row)
{
str_line.clear();
for (size_t j = 1; j <= col_num_; j++)
{
str_line.push_back(table_[i][j].str_);
}
str_table.push_back(str_line);
}
else
{
str_line.clear();
for (size_t j = 0; j < idx.size(); j++)
{
str_line.push_back(table_[i][idx[j]].str_);
}
for (size_t j = 0; j < odx.size(); j++)
{
str_line.push_back(table_[i][odx[j]].str_);
}
str_table.push_back(str_line);
}
row_names.push_back(table_[i][0].str_);
} }
} }
out_table.init_table(str_table);
std::vector<std::string> io_col;
if (out_row)
{
column_names(io_col);
out_table.cell(table_[0][0].str_, 0, 0);
}
else
{
for (size_t j = 0; j < idx.size(); j++)
{
io_col.push_back(cnd_col[j]);
}
for (size_t j = 0; j < odx.size(); j++)
{
io_col.push_back(out_col[j]);
}
}
out_table.column_names(io_col);
out_table.row_names(row_names, {}, table_[0][0].str_);
destroy_vector(row_names);
destroy_vector(io_col);
destroy_vector(str_line);
destroy_vector(str_table);
return; return;
} }

View File

@ -251,14 +251,14 @@ namespace gctl
* *
* @param att * @param att
*/ */
void annotoations(const std::vector<std::string> &att){annotates_ = att;} void annotations(const std::vector<std::string> &att){annotates_ = att;}
/** /**
* @brief * @brief
* *
* @return * @return
*/ */
const std::vector<std::string> &annotoations(){return annotates_;} const std::vector<std::string> &annotations(){return annotates_;}
/** /**
* @brief * @brief
@ -422,6 +422,14 @@ namespace gctl
*/ */
void init_table(int row, int col); void init_table(int row, int col);
/**
* @brief
*
* @param ignore_disabled
* @return
*/
dsv_io export_table(bool ignore_disabled = true);
/** /**
* @brief * @brief
* *
@ -438,6 +446,13 @@ namespace gctl
*/ */
int name_index(std::string name, bool iter_row = false); int name_index(std::string name, bool iter_row = false);
/**
* @brief
*
* @param s
*/
void table_output(switch_type_e s);
/** /**
* @brief 使 * @brief 使
* *
@ -511,32 +526,27 @@ namespace gctl
int add_row(std::string name, std::string id_name); int add_row(std::string name, std::string id_name);
/** /**
* @brief * @brief
*
* @note
* *
* @param cnd_str * @param cnd_str
* @param cnd_col * @param cnd_tar
* @param out_col * @param thead RowHead时表示按列过滤ColHead时表示按行过滤
* @param out_table
*/ */
void filt_column(std::string cnd_str, std::string cnd_col, void filter(std::string cnd_str, std::string cnd_tar, table_headtype_e thead = RowHead);
const std::vector<std::string> &out_col, dsv_io &out_table);
/** /**
* @brief row operate function pointer * @brief table line operate function pointer
* *
*/ */
typedef bool (*rowbool_func_t)(const std::vector<table_cell> &table_row); typedef bool (*linebool_func_t)(const std::vector<table_cell> &table_line);
/** /**
* @brief * @brief
* *
* @param func * @param func
* @param out_col * @param thead RowHead时表示按行过滤ColHead时表示按列过滤
* @param out_table
*/ */
void filt_column(rowbool_func_t func, const std::vector<std::string> &out_col, dsv_io &out_table); void filter(linebool_func_t func, table_headtype_e thead = RowHead);
#ifdef GCTL_EXPRTK #ifdef GCTL_EXPRTK
@ -553,18 +563,16 @@ namespace gctl
void cal_column(std::string expr_str, const std::vector<std::string> &col_list, int p = 6); void cal_column(std::string expr_str, const std::vector<std::string> &col_list, int p = 6);
/** /**
* @brief * @brief
* *
* @note float和Int类型的列数据才能用于计算exprtk库完成 * @note float和Int类型的列数据才能用于计算exprtk库完成
* 使strtk库的相关内容使 * 使strtk库的相关内容使
* *
* @param cnd_str * @param cnd_str
* @param cnd_col * @param cnd_tars
* @param out_col * @param thead RowHead时表示按列过滤ColHead时表示按行过滤
* @param out_table
*/ */
void filt_column(std::string cnd_str, const std::vector<std::string> &cnd_col, void filter(std::string cnd_str, const std::vector<std::string> &cnd_tars, table_headtype_e thead = RowHead);
const std::vector<std::string> &out_col, dsv_io &out_table);
#endif // GCTL_EXPRTK #endif // GCTL_EXPRTK

53
out.txt
View File

@ -1,53 +0,0 @@
Code_s|Continent_s|Name_s|Population_n|GNP_n
ANT|North America|Netherlands Antilles|217000|1941
AIA|North America|Anguilla|8000|63.2
ATG|North America|Antigua and Barbuda|68000|612
ARG|South America|Argentina|37032000|340238
ABW|North America|Aruba|103000|828
BHS|North America|Bahamas|307000|3527
BRB|North America|Barbados|270000|2223
BLZ|North America|Belize|241000|630
BMU|North America|Bermuda|65000|2328
BOL|South America|Bolivia|8329000|8571
BRA|South America|Brazil|170115000|776739
VGB|North America|Virgin Islands, British|21000|612
CYM|North America|Cayman Islands|38000|1263
CHL|South America|Chile|15211000|72949
CRI|North America|Costa Rica|4023000|10226
DMA|North America|Dominica|71000|256
DOM|North America|Dominican Republic|8495000|15846
ECU|South America|Ecuador|12646000|19770
SLV|North America|El Salvador|6276000|11863
FLK|South America|Falkland Islands|2000|0
GRD|North America|Grenada|94000|318
GRL|North America|Greenland|56000|0
GLP|North America|Guadeloupe|456000|3501
GTM|North America|Guatemala|11385000|19008
GUY|South America|Guyana|861000|722
HTI|North America|Haiti|8222000|3459
HND|North America|Honduras|6485000|5333
JAM|North America|Jamaica|2583000|6871
CAN|North America|Canada|31147000|598862
COL|South America|Colombia|42321000|102896
CUB|North America|Cuba|11201000|17843
MTQ|North America|Martinique|395000|2731
MEX|North America|Mexico|98881000|414972
MSR|North America|Montserrat|11000|109
NIC|North America|Nicaragua|5074000|1988
PAN|North America|Panama|2856000|9131
PRY|South America|Paraguay|5496000|8444
PER|South America|Peru|25662000|64140
PRI|North America|Puerto Rico|3869000|34100
GUF|South America|French Guiana|181000|681
KNA|North America|Saint Kitts and Nevis|38000|299
LCA|North America|Saint Lucia|154000|571
VCT|North America|Saint Vincent and the Grenadines|114000|285
SPM|North America|Saint Pierre and Miquelon|7000|0
SUR|South America|Suriname|417000|870
TTO|North America|Trinidad and Tobago|1295000|6232
TCA|North America|Turks and Caicos Islands|17000|96
URY|South America|Uruguay|3337000|20831
VEN|South America|Venezuela|24170000|95023
USA|North America|United States|278357000|8510700
VIR|North America|Virgin Islands, U.S.|93000|0
|Asia|China|14000000|1949

View File

@ -460,20 +460,18 @@ void rand_data(const std::vector<std::string> &cmd_units)
void filt_data(const std::vector<std::string> &cmd_units) void filt_data(const std::vector<std::string> &cmd_units)
{ {
// filter <out-table> <expression> <cdn1>,<cdn2>,<cdn3>... <out1>,<out2>,<out3>... // filter row|col <expression> <cdn1>,<cdn2>,<cdn3>...
if (cmd_units.size() < 5) throw std::runtime_error("filter: insufficient parameters."); if (cmd_units.size() < 4) throw std::runtime_error("filter: insufficient parameters.");
std::vector<std::string> tar_names, out_names; std::vector<std::string> tar_names;
parse_string_to_vector(cmd_units[3], ',', tar_names); parse_string_to_vector(cmd_units[3], ',', tar_names);
parse_string_to_vector(cmd_units[4], ',', out_names);
dsv_io out_table; table_headtype_e thead;
if (tar_names.size() == 1) tc.filt_column(cmd_units[2], tar_names[0], out_names, out_table); if (cmd_units[1] == "row") thead = RowHead;
else tc.filt_column(cmd_units[2], tar_names, out_names, out_table); else if (cmd_units[1] == "col") thead = ColHead;
else throw std::runtime_error("filter: invalid parameters.");
std::string naked_name, exten_name; if (tar_names.size() == 1) tc.filter(cmd_units[2], tar_names[0], thead);
parse_filename(cmd_units[1], naked_name, exten_name); else tc.filter(cmd_units[2], tar_names, thead);
if (exten_name == ".csv") out_table.save_csv(cmd_units[1]);
else out_table.save_text(naked_name, exten_name);
return; return;
} }