diff --git a/example/text_io_ex.cpp b/example/text_io_ex.cpp index 4559ec1..f864ca0 100644 --- a/example/text_io_ex.cpp +++ b/example/text_io_ex.cpp @@ -32,25 +32,22 @@ using namespace gctl; int main(int argc, char const *argv[]) try { -/* - dsv_io tc; + + dsv_io tc, tout; tc.set_delimeter('|'); tc.load_text("tmp/world_data", ".txt", BothHead); - tc.info(BothHead); + //tc.info(); - //_1s_vector name = tc.get_row_names(); - //display_vector(name); + //tc.set_column_type(Int, "IndepYear_n"); + //tc.filt_column("IndepYear_n < 0", {"IndepYear_n"}, {"Name_s", "Population_n", "GNP_n"}, tout); - //_1s_array name; - //tc.get_column("Name_s", name); - //name.show(std::cout, '|'); + tc.filt_column("America", "Continent_s", {"Name_s", "Population_n", "GNP_n"}, tout); + //tc.match_column("America", "Continent_s", {}, tout); - //tc.get_row("AUS", name); - //name.show(std::cout, ','); - - tc.save_csv("out"); -*/ + tout.set_delimeter('|'); + tout.save_text("out"); +/* geodsv_io tc; tc.load_text("tmp/topo", ".txt", ColumnHead); tc.set_column_names({"x (m)", "y (m)", "elev (m)"}); @@ -96,6 +93,7 @@ int main(int argc, char const *argv[]) try std::clog << std::setprecision(12) << tc.cell(2, 1) << "\n"; tc.info(); +*/ return 0; } catch(std::exception &e) diff --git a/lib/io/dsv_io.cpp b/lib/io/dsv_io.cpp index 38a6fe5..1115f62 100644 --- a/lib/io/dsv_io.cpp +++ b/lib/io/dsv_io.cpp @@ -77,7 +77,7 @@ void gctl::dsv_io::clear() void gctl::dsv_io::get_row_names(std::vector &names) { names.resize(row_num_); - for (size_t i = 1; i < row_num_; i++) + for (size_t i = 1; i <= row_num_; i++) { names[i - 1] = table_[i][0].str_; } @@ -87,21 +87,21 @@ void gctl::dsv_io::get_row_names(std::vector &names) void gctl::dsv_io::get_column_names(std::vector &names) { names.resize(col_num_); - for (size_t i = 1; i < col_num_; i++) + for (size_t i = 1; i <= col_num_; i++) { names[i - 1] = table_[0][i].str_; } return; } -void gctl::dsv_io::set_row_names(const std::vector &names) +void gctl::dsv_io::set_row_names(const std::vector &names, std::string corner_name) { for (size_t i = 1; i <= std::min(row_num_, (int) names.size()); i++) { table_[i][0].str_ = names[i - 1]; } - table_[0][0].str_ = "row_name"; + table_[0][0].str_ = corner_name; return; } @@ -438,6 +438,11 @@ int gctl::dsv_io::name_index(std::string name, bool iter_row) if (iter_row) { + for (size_t i = 1; i <= row_num_; i++) + { + if (table_[i][0].str_ == name) return i; + } + if (regex_search(name, ret, patr)) { int r = atoi(std::string(ret[1]).c_str()); @@ -445,15 +450,15 @@ int gctl::dsv_io::name_index(std::string name, bool iter_row) else return -1; } - for (size_t i = 1; i <= row_num_; i++) - { - if (table_[i][0].str_ == name) return i; - } - return -1; } else { + for (size_t i = 1; i <= col_num_; i++) + { + if (table_[0][i].str_ == name) return i; + } + if (regex_search(name, ret, patc)) { int c = atoi(std::string(ret[1]).c_str()); @@ -461,11 +466,6 @@ int gctl::dsv_io::name_index(std::string name, bool iter_row) else return -1; } - for (size_t i = 1; i <= col_num_; i++) - { - if (table_[0][i].str_ == name) return i; - } - return -1; } } @@ -593,6 +593,165 @@ void gctl::dsv_io::add_row(std::string id_name, std::string name) return; } +void gctl::dsv_io::filt_column(std::string cnd_str, std::string cnd_col, + const std::vector &out_col, dsv_io &out_table) +{ + int idx = name_index(cnd_col); + if (idx < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name."); + + array odx; + bool out_row = false; + if (out_col.empty()) out_row = true; + else + { + odx.resize(out_col.size()); + for (size_t i = 0; i < out_col.size(); i++) + { + odx[i] = name_index(out_col[i]); + if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name."); + } + } + + std::smatch ret; + std::regex pat(cnd_str); + std::vector str_line, row_names; + std::vector > str_table; + + for (size_t i = 1; i <= row_num_; i++) + { + if (regex_search(table_[i][idx].str_, ret, pat)) + { + if (out_row) + { + str_line.clear(); + + for (size_t j = 1; j <= col_num_; j++) + { + str_line.push_back(table_[i][j].str_); + } + + str_table.push_back(str_line); + } + else + { + str_line.clear(); + str_line.push_back(table_[i][idx].str_); + + for (size_t j = 0; j < odx.size(); j++) + { + str_line.push_back(table_[i][odx[j]].str_); + } + + str_table.push_back(str_line); + } + + row_names.push_back(table_[i][0].str_); + } + } + + out_table.init_table(str_table); + + std::vector io_col; + if (out_row) + { + get_column_names(io_col); + out_table.cell(table_[0][0].str_, 0, 0); + } + else + { + io_col.push_back(cnd_col); + + for (size_t j = 0; j < odx.size(); j++) + { + io_col.push_back(out_col[j]); + } + } + + out_table.set_column_names(io_col); + out_table.set_row_names(row_names, table_[0][0].str_); + + destroy_vector(row_names); + destroy_vector(io_col); + destroy_vector(str_line); + destroy_vector(str_table); + return; +} + +void gctl::dsv_io::filt_column(rowbool_func_t func, const std::vector &out_col, dsv_io &out_table) +{ + array odx; + bool out_row = false; + if (out_col.empty()) out_row = true; + else + { + odx.resize(out_col.size()); + for (size_t i = 0; i < out_col.size(); i++) + { + odx[i] = name_index(out_col[i]); + if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name."); + } + } + + std::vector str_line, row_names; + std::vector > str_table; + + for (size_t i = 1; i <= row_num_; i++) + { + if (func(table_[i])) + { + if (out_row) + { + str_line.clear(); + + for (size_t j = 1; j <= col_num_; j++) + { + str_line.push_back(table_[i][j].str_); + } + + str_table.push_back(str_line); + } + else + { + str_line.clear(); + + for (size_t j = 0; j < odx.size(); j++) + { + str_line.push_back(table_[i][odx[j]].str_); + } + + str_table.push_back(str_line); + } + + row_names.push_back(table_[i][0].str_); + } + } + + out_table.init_table(str_table); + + std::vector io_col; + if (out_row) + { + get_column_names(io_col); + out_table.cell(table_[0][0].str_, 0, 0); + } + else + { + for (size_t j = 0; j < odx.size(); j++) + { + io_col.push_back(out_col[j]); + } + } + + out_table.set_column_names(io_col); + out_table.set_row_names(row_names, table_[0][0].str_); + + destroy_vector(row_names); + destroy_vector(io_col); + destroy_vector(str_line); + destroy_vector(str_table); + return; +} + #ifdef GCTL_EXPRTK void gctl::dsv_io::cal_column(std::string expr_str, const std::vector &col_list, int p) @@ -602,6 +761,8 @@ void gctl::dsv_io::cal_column(std::string expr_str, const std::vector odx(out_col.size()); - for (size_t i = 0; i < out_col.size(); i++) + array odx; + bool out_row = false; + if (out_col.empty()) out_row = true; + else { - odx[i] = name_index(out_col[i]); + odx.resize(out_col.size()); + for (size_t i = 0; i < out_col.size(); i++) + { + odx[i] = name_index(out_col[i]); + if (odx[i] < 0) throw std::runtime_error("[gctl::dsv_io::] Invalid column index or name."); + } } exprtk::symbol_table symbol_table; @@ -664,7 +834,7 @@ void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector expression; @@ -676,7 +846,7 @@ void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector str_line; + std::vector str_line, row_names; std::vector > str_table; for (size_t i = 1; i <= row_num_; i++) @@ -688,18 +858,65 @@ void gctl::dsv_io::filt_column(std::string cnd_str, const std::vector 0.5) // return 1 if matched or 0 if dismatched { - str_line.clear(); - for (size_t j = 0; j < odx.size(); j++) + if (out_row) { - str_line.push_back(table_[i][odx[j]].str_); + str_line.clear(); + + for (size_t j = 1; j <= col_num_; j++) + { + str_line.push_back(table_[i][j].str_); + } + + str_table.push_back(str_line); } - - str_table.push_back(str_line); + else + { + str_line.clear(); + for (size_t j = 0; j < idx.size(); j++) + { + str_line.push_back(table_[i][idx[j]].str_); + } + + for (size_t j = 0; j < odx.size(); j++) + { + str_line.push_back(table_[i][odx[j]].str_); + } + + str_table.push_back(str_line); + } + + row_names.push_back(table_[i][0].str_); } } out_table.init_table(str_table); - out_table.set_column_names(out_col); + + std::vector io_col; + if (out_row) + { + get_column_names(io_col); + out_table.cell(table_[0][0].str_, 0, 0); + } + else + { + for (size_t j = 0; j < idx.size(); j++) + { + io_col.push_back(cnd_col[j]); + } + + for (size_t j = 0; j < odx.size(); j++) + { + io_col.push_back(out_col[j]); + } + } + + out_table.set_column_names(io_col); + out_table.set_row_names(row_names, table_[0][0].str_); + + destroy_vector(row_names); + destroy_vector(io_col); + destroy_vector(str_line); + destroy_vector(str_table); return; } diff --git a/lib/io/dsv_io.h b/lib/io/dsv_io.h index 92ca29e..c6f2fba 100644 --- a/lib/io/dsv_io.h +++ b/lib/io/dsv_io.h @@ -275,7 +275,7 @@ namespace gctl * * @param names 名称 */ - void set_row_names(const std::vector &names); + void set_row_names(const std::vector &names, std::string corner_name = "row-idx"); /** * @brief 设置列名称 @@ -449,6 +449,32 @@ namespace gctl */ void add_row(std::string id_name, std::string name = ""); + /** + * @brief 按行过滤并返回符合条件的列数据 + * + * @param cnd_str 正则表达式 + * @param cnd_col 用于匹配正则表达式的列名称 + * @param out_col 输出的列索引列表(列表为空时则会输出所有列),正则表达式为真时即筛选这些行与列上对应的数据 + * @param out_table 输出的表格 + */ + void filt_column(std::string cnd_str, std::string cnd_col, + const std::vector &out_col, dsv_io &out_table); + + /** + * @brief row operate function pointer + * + */ + typedef bool (*rowbool_func_t)(const std::vector &table_row); + + /** + * @brief 按行过滤并返回符合条件的列数据 + * + * @param func 处理行类容的布尔函数 + * @param out_col 输出的列索引列表(列表为空时则会输出所有列),正则表达式为真时即筛选这些行与列上对应的数据 + * @param out_table 输出的表格 + */ + void filt_column(rowbool_func_t func, const std::vector &out_col, dsv_io &out_table); + #ifdef GCTL_EXPRTK /** @@ -467,10 +493,11 @@ namespace gctl * @brief 按行过滤并返回符合条件的列数据 * * @note 只有单元格类型为float和Int类型的列数据才能用于计算。计算由exprtk库完成,支持的表达式见其说明文档。 + * 因为没有使用strtk库的相关内容,所以并不支持对字符串与数字类型的混合条件判断。基于字符串的内容提取请使用其他函数。 * * @param cnd_str 条件表达式 * @param cnd_col 用于条件表达式的列索引列表 - * @param out_col 输出的列索引列表,即条件判断为真是即筛选这些行与列上对应的数据 + * @param out_col 输出的列索引列表(列表为空时则会输出所有列),即条件判断为真时即筛选这些行与列上对应的数据 * @param out_table 输出的表格 */ void filt_column(std::string cnd_str, const std::vector &cnd_col,