update dsv_io

This commit is contained in:
张壹 2025-02-12 13:19:22 +08:00
parent fc34ed3e2c
commit 2802086a76
5 changed files with 182 additions and 328 deletions

View File

@ -32,27 +32,19 @@ using namespace gctl;
int main(int argc, char const *argv[]) try
{
dsv_io tc, tout;
dsv_io tc;
tc.delimeter('|');
tc.head_number(1);
tc.load_text("tmp/world_data", ".txt", ColHead|RowHead);
tc.info(AttInfo|HeadInfo|TagInfo|ColInfo|RowInfo);
tc.info(AttInfo|HeadInfo|TagInfo|ColInfo);
//tc.set_column_type(Int, "IndepYear_n");
//tc.filt_column("IndepYear_n < 0", {"IndepYear_n"}, {"Name_s", "Population_n", "GNP_n"}, tout);
tc.filter("America", "Continent_s", ColHead);
tc.save_text("out");
tc.filt_column("America", "Continent_s", {"Name_s", "Population_n", "GNP_n"}, tout);
//tc.match_column("America", "Continent_s", {}, tout);
//tout.add_column("GNP_n2", "Population_n");
//array<int> GNP_n2(tout.row_number(), 1000.0);
//tout.fill_column(GNP_n2, "GNP_n2");
int lr_id = tout.add_row();
tout.fill_row(array<std::string>{"Asia", "China", "14000000", "1949"}, lr_id);
tout.delimeter('|');
tout.save_text("out");
dsv_io tc2 = tc.export_table();
tc2.head_records(tc.head_records());
tc2.delimeter('|');
tc2.save_text("out2");
/*
geodsv_io tc;

View File

@ -342,28 +342,23 @@ void gctl::dsv_io::save_text(std::string filename, std::string file_exten)
outfile << "# " << annotates_[i] << std::endl;
}
// 探测是否有行头
bool col_st = 1;
for (int i = 0; i <= row_num_; i++)
{
if (table_[i][0].out_ok_ && table_[i][0].str_ != "")
for (size_t j = 0; j <= col_num_; j++)
{
col_st = 0;
break;
if (table_[i][j].out_ok_ && table_[i][j].str_!= "")
{
outfile << table_[i][j].str_;
for (size_t k = j + 1; k <= col_num_; k++)
{
if (table_[i][k].out_ok_) outfile << deli_sym_ << table_[i][k].str_;
}
outfile << std::endl;
break;
}
}
}
for (int i = 0; i <= row_num_; i++)
{
// 单独处理第一列 即行头
outfile << table_[i][col_st].str_;
for (int j = col_st + 1; j <= col_num_; j++)
{
if (table_[i][j].out_ok_) outfile << deli_sym_ << table_[i][j].str_;
}
outfile << std::endl;
}
outfile.close();
return;
}
@ -391,6 +386,47 @@ void gctl::dsv_io::init_table(int row, int col)
return;
}
gctl::dsv_io gctl::dsv_io::export_table(bool ignore_disabled)
{
std::vector<std::string> str_line, row_names, col_names;
std::vector<std::vector<std::string> > str_table;
std::string cor_name = table_[0][0].str_;
for (size_t j = 1; j <= col_num_; j++)
{
if (table_[0][j].out_ok_ || !ignore_disabled)
col_names.push_back(table_[0][j].str_);
}
for (size_t i = 1; i <= row_num_; i++)
{
if (table_[i][0].out_ok_ || !ignore_disabled)
{
str_line.clear();
for (size_t j = 1; j <= col_num_; j++)
{
if (table_[i][j].out_ok_ || !ignore_disabled)
str_line.push_back(table_[i][j].str_);
}
str_table.push_back(str_line);
row_names.push_back(table_[i][0].str_);
}
}
dsv_io out_table;
out_table.init_table(str_table);
out_table.row_names(row_names, {}, cor_name);
out_table.column_names(col_names);
destroy_vector(row_names);
destroy_vector(col_names);
destroy_vector(str_line);
destroy_vector(str_table);
return out_table;
}
void gctl::dsv_io::info(int t)
{
if (t & HeadInfo)
@ -521,6 +557,19 @@ int gctl::dsv_io::name_index(std::string name, bool iter_row)
}
}
void gctl::dsv_io::table_output(switch_type_e s)
{
for (size_t i = 0; i <= row_num_; i++)
{
for (size_t j = 0; j <= col_num_; j++)
{
if (s == Enable) table_[i][j].out_ok_ = true;
else table_[i][j].out_ok_ = false;
}
}
return;
}
void gctl::dsv_io::column_output(int idx, switch_type_e s)
{
if (idx > col_num_ || idx <= 0)
@ -627,162 +676,63 @@ int gctl::dsv_io::add_row(std::string name, std::string id_name)
return add_row(name, name_index(id_name, true));
}
void gctl::dsv_io::filt_column(std::string cnd_str, std::string cnd_col,
const std::vector<std::string> &out_col, dsv_io &out_table)
void gctl::dsv_io::filter(std::string cnd_str, std::string cnd_tar, table_headtype_e thead)
{
int idx = name_index(cnd_col);
if (idx < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
int idx;
if (thead == RowHead) idx = name_index(cnd_tar, true);
else if (thead == ColHead) idx = name_index(cnd_tar);
else throw std::runtime_error("[gctl::dsv_io::filter] Invalid table head type.");
array<int> odx;
bool out_row = false;
if (out_col.empty()) out_row = true;
else
{
odx.resize(out_col.size());
for (size_t i = 0; i < out_col.size(); i++)
{
odx[i] = name_index(out_col[i]);
if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
}
}
if (idx < 0) throw std::runtime_error("[gctl::dsv_io::filter] Invalid row/column index or name.");
std::smatch ret;
std::regex pat(cnd_str);
std::vector<std::string> str_line, row_names;
std::vector<std::vector<std::string> > str_table;
for (size_t i = 1; i <= row_num_; i++)
if (thead == RowHead) // cnd_tar是行头 此时为按列过滤
{
if (regex_search(table_[i][idx].str_, ret, pat))
for (size_t i = 1; i <= col_num_; i++)
{
if (out_row)
if (!regex_search(table_[idx][i].str_, ret, pat))
{
str_line.clear();
for (size_t j = 1; j <= col_num_; j++)
{
str_line.push_back(table_[i][j].str_);
}
str_table.push_back(str_line);
column_output(i, Disable);
}
else
{
str_line.clear();
str_line.push_back(table_[i][idx].str_);
for (size_t j = 0; j < odx.size(); j++)
{
str_line.push_back(table_[i][odx[j]].str_);
}
str_table.push_back(str_line);
}
row_names.push_back(table_[i][0].str_);
}
}
out_table.init_table(str_table);
std::vector<std::string> io_col;
if (out_row)
else // cnd_tar是列头 此时为按行过滤
{
column_names(io_col);
out_table.cell(table_[0][0].str_, 0, 0);
}
else
{
io_col.push_back(cnd_col);
for (size_t j = 0; j < odx.size(); j++)
for (size_t i = 1; i <= row_num_; i++)
{
io_col.push_back(out_col[j]);
if (!regex_search(table_[i][idx].str_, ret, pat))
{
row_output(i, Disable);
}
}
}
out_table.column_names(io_col);
out_table.row_names(row_names, {}, table_[0][0].str_);
destroy_vector(row_names);
destroy_vector(io_col);
destroy_vector(str_line);
destroy_vector(str_table);
return;
}
void gctl::dsv_io::filt_column(rowbool_func_t func, const std::vector<std::string> &out_col, dsv_io &out_table)
void gctl::dsv_io::filter(linebool_func_t func, table_headtype_e thead)
{
array<int> odx;
bool out_row = false;
if (out_col.empty()) out_row = true;
else
if (thead == RowHead)
{
odx.resize(out_col.size());
for (size_t i = 0; i < out_col.size(); i++)
for (size_t i = 1; i <= row_num_; i++)
{
odx[i] = name_index(out_col[i]);
if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
if (!func(table_[i])) row_output(i, Disable);
}
}
std::vector<std::string> str_line, row_names;
std::vector<std::vector<std::string> > str_table;
for (size_t i = 1; i <= row_num_; i++)
else if (thead == ColHead)
{
if (func(table_[i]))
std::vector<table_cell> col_cell(row_num_);
for (size_t i = 1; i <= col_num_; i++)
{
if (out_row)
for (size_t j = 1; j < row_num_; j++)
{
str_line.clear();
for (size_t j = 1; j <= col_num_; j++)
{
str_line.push_back(table_[i][j].str_);
}
str_table.push_back(str_line);
col_cell[j] = table_[j][i];
}
else
{
str_line.clear();
for (size_t j = 0; j < odx.size(); j++)
{
str_line.push_back(table_[i][odx[j]].str_);
}
str_table.push_back(str_line);
}
row_names.push_back(table_[i][0].str_);
if (!func(col_cell)) column_output(i, Disable);
}
}
out_table.init_table(str_table);
std::vector<std::string> io_col;
if (out_row)
{
column_names(io_col);
out_table.cell(table_[0][0].str_, 0, 0);
}
else
{
for (size_t j = 0; j < odx.size(); j++)
{
io_col.push_back(out_col[j]);
}
}
out_table.column_names(io_col);
out_table.row_names(row_names, {}, table_[0][0].str_);
destroy_vector(row_names);
destroy_vector(io_col);
destroy_vector(str_line);
destroy_vector(str_table);
else throw std::runtime_error("[gctl::dsv_io::filter] Invalid table head type.");
return;
}
@ -834,41 +784,45 @@ void gctl::dsv_io::cal_column(std::string expr_str, const std::vector<std::strin
return;
}
void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector<std::string> &cnd_col,
const std::vector<std::string> &out_col, dsv_io& out_table)
void gctl::dsv_io::filter(std::string cnd_str, const std::vector<std::string> &cnd_tars, table_headtype_e thead)
{
array<int> idx(cnd_col.size());
for (size_t i = 0; i < cnd_col.size(); i++)
array<int> idx(cnd_tars.size());
if (thead == RowHead)
{
idx[i] = name_index(cnd_col[i]);
if (idx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
if (table_[0][idx[i]].type_ != Int && table_[0][idx[i]].type_ != Float)
for (size_t i = 0; i < cnd_tars.size(); i++)
{
throw std::runtime_error("[gctl::dsv_io] Invalid column type for numerical calculating.");
idx[i] = name_index(cnd_tars[i], true);
if (idx[i] <= 0 || idx[i] > row_num_) throw std::runtime_error("[gctl::dsv_io::filter] Invalid row index or name.");
if (table_[idx[i]][0].type_ != Int && table_[idx[i]][0].type_ != Float)
{
throw std::runtime_error("[gctl::dsv_io::filter] Invalid row type for numerical calculating.");
}
}
}
array<int> odx;
bool out_row = false;
if (out_col.empty()) out_row = true;
else
else if (thead == ColHead)
{
odx.resize(out_col.size());
for (size_t i = 0; i < out_col.size(); i++)
for (size_t i = 0; i < cnd_tars.size(); i++)
{
odx[i] = name_index(out_col[i]);
if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name.");
idx[i] = name_index(cnd_tars[i]);
if (idx[i] <= 0 || idx[i] > col_num_) throw std::runtime_error("[gctl::dsv_io::filter] Invalid column index or name.");
if (table_[0][idx[i]].type_ != Int && table_[0][idx[i]].type_ != Float)
{
throw std::runtime_error("[gctl::dsv_io::filter] Invalid column type for numerical calculating.");
}
}
}
else throw std::runtime_error("[gctl::dsv_io::filter] Invalid table head type.");
exprtk::symbol_table<double> symbol_table;
array<double> var(cnd_col.size());
array<double> var(cnd_tars.size());
for (size_t i = 0; i < var.size(); i++)
{
symbol_table.add_variable(cnd_col[i], var[i]);
symbol_table.add_variable(cnd_tars[i], var[i]);
}
exprtk::expression<double> expression;
@ -878,79 +832,34 @@ void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector<std::strin
if (!parser.compile(cnd_str, expression))
{
throw std::runtime_error("[gctl::dsv_io] Fail to compile the math expression.");
}
}
std::vector<std::string> str_line, row_names;
std::vector<std::vector<std::string> > str_table;
for (size_t i = 1; i <= row_num_; i++)
if (thead == RowHead) // cnd_tars是行头 此时为按列过滤
{
for (size_t j = 0; j < var.size(); j++)
for (size_t i = 1; i <= col_num_; i++)
{
var[j] = table_[i][idx[j]].value<double>();
}
if (expression.value() > 0.5) // return 1 if matched or 0 if dismatched
{
if (out_row)
for (size_t j = 0; j < var.size(); j++)
{
str_line.clear();
for (size_t j = 1; j <= col_num_; j++)
{
str_line.push_back(table_[i][j].str_);
}
str_table.push_back(str_line);
}
else
{
str_line.clear();
for (size_t j = 0; j < idx.size(); j++)
{
str_line.push_back(table_[i][idx[j]].str_);
}
for (size_t j = 0; j < odx.size(); j++)
{
str_line.push_back(table_[i][odx[j]].str_);
}
str_table.push_back(str_line);
var[j] = table_[idx[j]][i].value<double>();
}
row_names.push_back(table_[i][0].str_);
// return 1 if matched or 0 if dismatched
if (expression.value() < 0.5) column_output(i, Disable);
}
}
out_table.init_table(str_table);
std::vector<std::string> io_col;
if (out_row)
else // cnd_tars是列头 此时为按行过滤
{
column_names(io_col);
out_table.cell(table_[0][0].str_, 0, 0);
}
else
{
for (size_t j = 0; j < idx.size(); j++)
for (size_t i = 1; i <= row_num_; i++)
{
io_col.push_back(cnd_col[j]);
}
for (size_t j = 0; j < var.size(); j++)
{
var[j] = table_[i][idx[j]].value<double>();
}
for (size_t j = 0; j < odx.size(); j++)
{
io_col.push_back(out_col[j]);
// return 1 if matched or 0 if dismatched
if (expression.value() < 0.5) row_output(i, Disable);
}
}
out_table.column_names(io_col);
out_table.row_names(row_names, {}, table_[0][0].str_);
destroy_vector(row_names);
destroy_vector(io_col);
destroy_vector(str_line);
destroy_vector(str_table);
return;
}

View File

@ -251,14 +251,14 @@ namespace gctl
*
* @param att
*/
void annotoations(const std::vector<std::string> &att){annotates_ = att;}
void annotations(const std::vector<std::string> &att){annotates_ = att;}
/**
* @brief
*
* @return
*/
const std::vector<std::string> &annotoations(){return annotates_;}
const std::vector<std::string> &annotations(){return annotates_;}
/**
* @brief
@ -421,6 +421,14 @@ namespace gctl
* @param col
*/
void init_table(int row, int col);
/**
* @brief
*
* @param ignore_disabled
* @return
*/
dsv_io export_table(bool ignore_disabled = true);
/**
* @brief
@ -438,6 +446,13 @@ namespace gctl
*/
int name_index(std::string name, bool iter_row = false);
/**
* @brief
*
* @param s
*/
void table_output(switch_type_e s);
/**
* @brief 使
*
@ -511,32 +526,27 @@ namespace gctl
int add_row(std::string name, std::string id_name);
/**
* @brief
*
* @note
* @brief
*
* @param cnd_str
* @param cnd_col
* @param out_col
* @param out_table
* @param cnd_tar
* @param thead RowHead时表示按列过滤ColHead时表示按行过滤
*/
void filt_column(std::string cnd_str, std::string cnd_col,
const std::vector<std::string> &out_col, dsv_io &out_table);
void filter(std::string cnd_str, std::string cnd_tar, table_headtype_e thead = RowHead);
/**
* @brief row operate function pointer
* @brief table line operate function pointer
*
*/
typedef bool (*rowbool_func_t)(const std::vector<table_cell> &table_row);
typedef bool (*linebool_func_t)(const std::vector<table_cell> &table_line);
/**
* @brief
* @brief
*
* @param func
* @param out_col
* @param out_table
* @param func
* @param thead RowHead时表示按行过滤ColHead时表示按列过滤
*/
void filt_column(rowbool_func_t func, const std::vector<std::string> &out_col, dsv_io &out_table);
void filter(linebool_func_t func, table_headtype_e thead = RowHead);
#ifdef GCTL_EXPRTK
@ -553,18 +563,16 @@ namespace gctl
void cal_column(std::string expr_str, const std::vector<std::string> &col_list, int p = 6);
/**
* @brief
* @brief
*
* @note float和Int类型的列数据才能用于计算exprtk库完成
* 使strtk库的相关内容使
*
* @param cnd_str
* @param cnd_col
* @param out_col
* @param out_table
* @param cnd_tars
* @param thead RowHead时表示按列过滤ColHead时表示按行过滤
*/
void filt_column(std::string cnd_str, const std::vector<std::string> &cnd_col,
const std::vector<std::string> &out_col, dsv_io &out_table);
void filter(std::string cnd_str, const std::vector<std::string> &cnd_tars, table_headtype_e thead = RowHead);
#endif // GCTL_EXPRTK

53
out.txt
View File

@ -1,53 +0,0 @@
Code_s|Continent_s|Name_s|Population_n|GNP_n
ANT|North America|Netherlands Antilles|217000|1941
AIA|North America|Anguilla|8000|63.2
ATG|North America|Antigua and Barbuda|68000|612
ARG|South America|Argentina|37032000|340238
ABW|North America|Aruba|103000|828
BHS|North America|Bahamas|307000|3527
BRB|North America|Barbados|270000|2223
BLZ|North America|Belize|241000|630
BMU|North America|Bermuda|65000|2328
BOL|South America|Bolivia|8329000|8571
BRA|South America|Brazil|170115000|776739
VGB|North America|Virgin Islands, British|21000|612
CYM|North America|Cayman Islands|38000|1263
CHL|South America|Chile|15211000|72949
CRI|North America|Costa Rica|4023000|10226
DMA|North America|Dominica|71000|256
DOM|North America|Dominican Republic|8495000|15846
ECU|South America|Ecuador|12646000|19770
SLV|North America|El Salvador|6276000|11863
FLK|South America|Falkland Islands|2000|0
GRD|North America|Grenada|94000|318
GRL|North America|Greenland|56000|0
GLP|North America|Guadeloupe|456000|3501
GTM|North America|Guatemala|11385000|19008
GUY|South America|Guyana|861000|722
HTI|North America|Haiti|8222000|3459
HND|North America|Honduras|6485000|5333
JAM|North America|Jamaica|2583000|6871
CAN|North America|Canada|31147000|598862
COL|South America|Colombia|42321000|102896
CUB|North America|Cuba|11201000|17843
MTQ|North America|Martinique|395000|2731
MEX|North America|Mexico|98881000|414972
MSR|North America|Montserrat|11000|109
NIC|North America|Nicaragua|5074000|1988
PAN|North America|Panama|2856000|9131
PRY|South America|Paraguay|5496000|8444
PER|South America|Peru|25662000|64140
PRI|North America|Puerto Rico|3869000|34100
GUF|South America|French Guiana|181000|681
KNA|North America|Saint Kitts and Nevis|38000|299
LCA|North America|Saint Lucia|154000|571
VCT|North America|Saint Vincent and the Grenadines|114000|285
SPM|North America|Saint Pierre and Miquelon|7000|0
SUR|South America|Suriname|417000|870
TTO|North America|Trinidad and Tobago|1295000|6232
TCA|North America|Turks and Caicos Islands|17000|96
URY|South America|Uruguay|3337000|20831
VEN|South America|Venezuela|24170000|95023
USA|North America|United States|278357000|8510700
VIR|North America|Virgin Islands, U.S.|93000|0
|Asia|China|14000000|1949

View File

@ -460,20 +460,18 @@ void rand_data(const std::vector<std::string> &cmd_units)
void filt_data(const std::vector<std::string> &cmd_units)
{
// filter <out-table> <expression> <cdn1>,<cdn2>,<cdn3>... <out1>,<out2>,<out3>...
if (cmd_units.size() < 5) throw std::runtime_error("filter: insufficient parameters.");
std::vector<std::string> tar_names, out_names;
// filter row|col <expression> <cdn1>,<cdn2>,<cdn3>...
if (cmd_units.size() < 4) throw std::runtime_error("filter: insufficient parameters.");
std::vector<std::string> tar_names;
parse_string_to_vector(cmd_units[3], ',', tar_names);
parse_string_to_vector(cmd_units[4], ',', out_names);
dsv_io out_table;
if (tar_names.size() == 1) tc.filt_column(cmd_units[2], tar_names[0], out_names, out_table);
else tc.filt_column(cmd_units[2], tar_names, out_names, out_table);
table_headtype_e thead;
if (cmd_units[1] == "row") thead = RowHead;
else if (cmd_units[1] == "col") thead = ColHead;
else throw std::runtime_error("filter: invalid parameters.");
std::string naked_name, exten_name;
parse_filename(cmd_units[1], naked_name, exten_name);
if (exten_name == ".csv") out_table.save_csv(cmd_units[1]);
else out_table.save_text(naked_name, exten_name);
if (tar_names.size() == 1) tc.filter(cmd_units[2], tar_names[0], thead);
else tc.filter(cmd_units[2], tar_names, thead);
return;
}