/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #include "clcg.h" /** * Default parameter for conjugate gradient methods */ static const gctl::clcg_para clcg_defparam = {0, 1e-8, 0, 1e-8}; gctl::clcg_solver::clcg_solver() { clcg_param_ = clcg_defparam; clcg_inter_ = 1; clcg_msg_ = CLCG_ITERATION; } gctl::clcg_solver::~clcg_solver(){} int gctl::clcg_solver::CLCG_Progress(const array > &m, const double converge, const clcg_para ¶m, size_t t, std::ostream &ss) { std::string ss_str = typeid(ss).name(); if (ss_str.find("ofstream") != std::string::npos) { if (clcg_msg_ > CLCG_ERROR && converge <= param.epsilon) // clcg_msg_ == CLCG_SOLUTION { ss << "Iterations: " << std::setw(6) << t << ", Convergence: " << converge; } } else if (clcg_msg_ > CLCG_ERROR) { if (converge <= param.epsilon) // clcg_msg_ == CLCG_SOLUTION { ss << GCTL_CLEARLINE << "\rIterations: " << std::setw(6) << t << ", Convergence: " << converge << std::endl; return 0; } if (clcg_msg_ == CLCG_ITERATION && clcg_inter_ > 0 && t%clcg_inter_ == 0) { ss << GCTL_CLEARLINE << "\rIterations: " << std::setw(6) << t << ", Convergence: " << converge; } } return 0; } void gctl::clcg_solver::set_clcg_message(clcg_message_type msg) { clcg_msg_ = msg; return; } void gctl::clcg_solver::set_clcg_report_interval(size_t inter) { clcg_inter_ = inter; return; } void gctl::clcg_solver::set_clcg_para(const clcg_para &in_param) { clcg_param_ = in_param; return; } gctl::clcg_para gctl::clcg_solver::default_clcg_para() { clcg_para dp = clcg_defparam; return dp; } #ifdef GCTL_OPTIMIZATION_TOML void gctl::clcg_solver::set_clcg_para(std::string filename) { toml::value toml_data; toml_data = toml::parse(filename); set_clcg_para(toml_data); return; } void gctl::clcg_solver::set_clcg_para(const toml::value &toml_data) { clcg_param_ = clcg_defparam; std::string CLCG = "clcg"; if (toml_data.contains(CLCG)) { if (toml_data.at(CLCG).contains("max_iterations")) clcg_param_.max_iterations = toml::find(toml_data, CLCG, "max_iterations"); if (toml_data.at(CLCG).contains("epsilon")) clcg_param_.epsilon = toml::find(toml_data, CLCG, "epsilon"); if (toml_data.at(CLCG).contains("abs_diff")) clcg_param_.abs_diff = toml::find(toml_data, CLCG, "abs_diff"); } return; } #endif // GCTL_OPTIMIZATION_TOML void gctl::clcg_solver::clcg_error_str(clcg_return_code err_code, std::ostream &ss) { std::string ss_str = typeid(ss).name(); #if defined _WINDOWS || __WIN32__ if (clcg_msg_ > CLCG_ERROR) { if (ss_str.find("ofstream") != std::string::npos) { if (err_code >= 0) ss << "CLCG Success! "; else ss << "CLCG Fail! "; } else { if (err_code >= 0) { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN); ss << "CLCG Success! "; } else { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); ss << "CLCG Fail! "; } } } #else if (clcg_msg_ > CLCG_ERROR) { if (ss_str.find("ofstream") != std::string::npos) { if (err_code >= 0) ss << "CLCG Success! "; else ss << "CLCG Fail! "; } else { if (err_code >= 0) ss << "\033[1m\033[32mCLCG Success! "; else ss << "\033[1m\033[31mCLCG Fail! "; } } #endif std::string err_str; switch (err_code) { case CLCG_SUCCESS: err_str = "Iteration reached convergence."; break; case CLCG_STOP: err_str = "Iteration is stopped by the progress evaluation function."; break; case CLCG_ALREADY_OPTIMIZIED: err_str = "The variables are already optimized."; break; case CLCG_UNKNOWN_ERROR: err_str = "Unknown error."; break; case CLCG_INVILAD_VARIABLE_SIZE: err_str = "The size of the variables is negative."; break; case CLCG_INVILAD_MAX_ITERATIONS: err_str = "The maximal iteration times is negative."; break; case CLCG_INVILAD_EPSILON: err_str = "The epsilon is not in the range (0, 1)."; break; case CLCG_REACHED_MAX_ITERATIONS: err_str = "The maximal iteration has been reached."; break; case CLCG_NAN_VALUE: err_str = "The model values are NaN."; break; case CLCG_INVALID_POINTER: err_str = "Invalid pointer."; break; case CLCG_SIZE_NOT_MATCH: err_str = "The sizes of the solution and target do not match."; break; case CLCG_UNKNOWN_SOLVER: err_str = "Unknown solver."; break; default: err_str = "Unknown error."; break; } if (clcg_msg_ == CLCG_THROW && err_code < 0) throw std::runtime_error(err_str.c_str()); else if (clcg_msg_ == CLCG_ERROR && err_code < 0) ss << err_str; else if (clcg_msg_ > CLCG_ERROR) ss << err_str; #if defined _WINDOWS || __WIN32__ if (clcg_msg_ > CLCG_ERROR) { if (ss_str.find("ofstream") != std::string::npos) { ss << " "; } else { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7); ss << std::endl; } } #else if (clcg_msg_ > CLCG_ERROR) { if (ss_str.find("ofstream") != std::string::npos) ss << " "; else ss << "\033[0m" << std::endl; } #endif return; } void gctl::clcg_solver::CLCG_Minimize(array > &m, const array > &B, clcg_solver_type solver_id, std::ostream &ss, bool verbose, bool er_throw) { #ifdef GCTL_OPENMP double start = omp_get_wtime(); if (solver_id == CLCG_BICG) clbicg(m, B, ss); else if (solver_id == CLCG_BICG_SYM) clbicg_symmetric(m, B, ss); else if (solver_id == CLCG_CGS) clcgs(m, B, ss); else if (solver_id == CLCG_BICGSTAB) clbicgstab(m, B, ss); else if (solver_id == CLCG_TFQMR) cltfqmr(m, B, ss); else throw std::invalid_argument("Invalid solver type."); double end = omp_get_wtime(); double costime = 1000*(end-start); #else clock_t start = clock(); if (solver_id == CLCG_BICG) clbicg(m, B, ss); else if (solver_id == CLCG_BICG_SYM) clbicg_symmetric(m, B, ss); else if (solver_id == CLCG_CGS) clcgs(m, B, ss); else if (solver_id == CLCG_BICGSTAB) clbicgstab(m, B, ss); else if (solver_id == CLCG_TFQMR) cltfqmr(m, B, ss); else throw std::invalid_argument("Invalid solver type."); clock_t end = clock(); double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC; #endif if (clcg_msg_ > CLCG_ERROR) { switch (solver_id) { case CLCG_BICG: ss << "Solver: " << std::setw(9) << "BiCG, Times cost: " << costime << " ms" << std::endl; break; case CLCG_BICG_SYM: ss << "Solver: " << std::setw(9) << "BiCG-Sym, Times cost: " << costime << " ms" << std::endl; break; case CLCG_CGS: ss << "Solver: " << std::setw(9) << "CGS, Times cost: " << costime << " ms" << std::endl; break; case CLCG_BICGSTAB: ss << "Solver: " << std::setw(9) << "BiCG-Stab, Times cost: " << costime << " ms" << std::endl; break; case CLCG_TFQMR: ss << "Solver: " << std::setw(9) << "TFQMR, Times cost: " << costime << " ms" << std::endl; break; default: ss << "Solver: " << std::setw(9) << "Unknown, Times cost: " << costime << " ms" << std::endl; break; } } return; } void gctl::clcg_solver::clbicg(array > &m, const array > &B, std::ostream &ss) { size_t n_size = B.size(); //check parameters if (n_size <= 0) return clcg_error_str(CLCG_INVILAD_VARIABLE_SIZE, ss); if (clcg_param_.max_iterations < 0) return clcg_error_str(CLCG_INVILAD_MAX_ITERATIONS, ss); if (clcg_param_.epsilon <= 0.0 || clcg_param_.epsilon >= 1.0) return clcg_error_str(CLCG_INVILAD_EPSILON, ss); r1k.resize(n_size); r2k.resize(n_size); d1k.resize(n_size); d2k.resize(n_size); Ax.resize(n_size); std::complex ak, Ad1d2, r1r2_next, betak; CLCG_Ax(m, Ax, gctl::NoTrans, gctl::NoConj); std::complex one_z(1.0, 0.0); vecdiff(r1k, B, Ax, one_z, one_z); veccpy(d1k, r1k, one_z); vecconj(r2k, r1k); veccpy(d2k, r2k, one_z); std::complex r1r2 = vecinner(r2k, r1k); double r0_square, rk_square; std::complex r0_mod; std::complex rk_mod = vecinner(r1k, r1k); r0_square = rk_square = std::norm(rk_mod); if (r0_square < 1.0) r0_square = 1.0; if (clcg_param_.abs_diff && sqrt(rk_square)/n_size <= clcg_param_.epsilon) { CLCG_Progress(m, sqrt(rk_square)/n_size, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } else if (rk_square/r0_square <= clcg_param_.epsilon) { CLCG_Progress(m, rk_square/r0_square, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } double residual; size_t t = 0; while(1) { if (clcg_param_.abs_diff) residual = sqrt(rk_square)/n_size; else residual = rk_square/r0_square; if (CLCG_Progress(m, residual, clcg_param_, t, ss)) { return clcg_error_str(CLCG_STOP, ss); } if (residual <= clcg_param_.epsilon) { return clcg_error_str(CLCG_CONVERGENCE, ss); } if (clcg_param_.max_iterations > 0 && t+1 > clcg_param_.max_iterations) { return clcg_error_str(CLCG_REACHED_MAX_ITERATIONS, ss); } t++; CLCG_Ax(d1k, Ax, gctl::NoTrans, gctl::NoConj); Ad1d2 = vecinner(d2k, Ax); ak = r1r2/Ad1d2; vecapp(m, d1k, ak); vecsub(r1k, Ax, ak); if (!vecvalid(m)) { return clcg_error_str(CLCG_NAN_VALUE, ss); } rk_mod = vecinner(r1k, r1k); rk_square = std::norm(rk_mod); CLCG_Ax(d2k, Ax, gctl::Trans, gctl::Conj); vecsub(r2k, Ax, std::conj(ak)); r1r2_next = vecinner(r2k, r1k); betak = r1r2_next/r1r2; r1r2 = r1r2_next; vecadd(d1k, d1k, r1k, betak, one_z); vecadd(d2k, d2k, r2k, std::conj(betak), one_z); } return clcg_error_str(CLCG_UNKNOWN_ERROR, ss); } void gctl::clcg_solver::clbicg_symmetric(array > &m, const array > &B, std::ostream &ss) { size_t n_size = B.size(); //check parameters if (n_size <= 0) return clcg_error_str(CLCG_INVILAD_VARIABLE_SIZE, ss); if (clcg_param_.max_iterations < 0) return clcg_error_str(CLCG_INVILAD_MAX_ITERATIONS, ss); if (clcg_param_.epsilon <= 0.0 || clcg_param_.epsilon >= 1.0) return clcg_error_str(CLCG_INVILAD_EPSILON, ss); r1k.resize(n_size); d1k.resize(n_size); Ax.resize(n_size); CLCG_Ax(m, Ax, gctl::NoTrans, gctl::NoConj); std::complex one_z(1.0, 0.0); vecdiff(r1k, B, Ax, one_z, one_z); veccpy(d1k, r1k, one_z); std::complex rkrk = vecdot(r1k, r1k); double r0_square, rk_square; std::complex r0_mod, rk_mod; rk_mod = vecinner(r1k, r1k); r0_square = rk_square = std::norm(rk_mod); if (r0_square < 1.0) r0_square = 1.0; if (clcg_param_.abs_diff && sqrt(rk_square)/n_size <= clcg_param_.epsilon) { CLCG_Progress(m, sqrt(rk_square)/n_size, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } else if (rk_square/r0_square <= clcg_param_.epsilon) { CLCG_Progress(m, rk_square/r0_square, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } double residual; std::complex ak, rkrk2, betak, dkAx; size_t t = 0; while(1) { if (clcg_param_.abs_diff) residual = sqrt(rk_square)/n_size; else residual = rk_square/r0_square; if (CLCG_Progress(m, residual, clcg_param_, t, ss)) { return clcg_error_str(CLCG_STOP, ss); } if (residual <= clcg_param_.epsilon) { return clcg_error_str(CLCG_CONVERGENCE, ss); } if (clcg_param_.max_iterations > 0 && t+1 > clcg_param_.max_iterations) { return clcg_error_str(CLCG_REACHED_MAX_ITERATIONS, ss); } t++; CLCG_Ax(d1k, Ax, gctl::NoTrans, gctl::NoConj); dkAx = vecdot(d1k, Ax); ak = rkrk/dkAx; vecapp(m, d1k, ak); vecsub(r1k, Ax, ak); rk_mod = vecdot(r1k, r1k); rk_square = std::norm(rk_mod); if (!vecvalid(m)) { return clcg_error_str(CLCG_NAN_VALUE, ss); } rkrk2 = vecdot(r1k, r1k); betak = rkrk2/rkrk; rkrk = rkrk2; vecadd(d1k, d1k, r1k, betak, one_z); } return clcg_error_str(CLCG_UNKNOWN_ERROR, ss); } void gctl::clcg_solver::clcgs(array > &m, const array > &B, std::ostream &ss) { size_t n_size = B.size(); //check parameters if (n_size <= 0) return clcg_error_str(CLCG_INVILAD_VARIABLE_SIZE, ss); if (clcg_param_.max_iterations < 0) return clcg_error_str(CLCG_INVILAD_MAX_ITERATIONS, ss); if (clcg_param_.epsilon <= 0.0 || clcg_param_.epsilon >= 1.0) return clcg_error_str(CLCG_INVILAD_EPSILON, ss); r1k.resize(n_size); r2k.resize(n_size); d1k.resize(n_size); uk.resize(n_size); qk.resize(n_size); wk.resize(n_size); Ax.resize(n_size); CLCG_Ax(m, Ax, gctl::NoTrans, gctl::NoConj); std::complex one_z(1.0, 0.0); std::complex one_one(1.0, 1.0); vecdiff(r1k, B, Ax, one_z, one_z); veccpy(uk, r1k, one_z); veccpy(d1k, r1k, one_z); srand(time(0)); std::complex rhok; do { for (size_t i = 0; i < n_size; i++) { r2k[i] = random(one_z, 2.0*one_one); } rhok = vecinner(r2k, r1k); } while (std::norm(rhok) < clcg_param_.lamda); double r0_square, rk_square; std::complex r0_mod, rk_mod; rk_mod = vecinner(r1k, r1k); r0_square = rk_square = std::norm(rk_mod); if (r0_square < 1.0) r0_square = 1.0; if (clcg_param_.abs_diff && sqrt(rk_square)/n_size <= clcg_param_.epsilon) { CLCG_Progress(m, sqrt(rk_square)/n_size, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } else if (rk_square/r0_square <= clcg_param_.epsilon) { CLCG_Progress(m, rk_square/r0_square, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } double residual; std::complex ak, rhok2, sigma, betak; size_t t = 0; while(1) { if (clcg_param_.abs_diff) residual = sqrt(rk_square)/n_size; else residual = rk_square/r0_square; if (CLCG_Progress(m, residual, clcg_param_, t, ss)) { return clcg_error_str(CLCG_STOP, ss); } if (residual <= clcg_param_.epsilon) { return clcg_error_str(CLCG_CONVERGENCE, ss); } if (clcg_param_.max_iterations > 0 && t+1 > clcg_param_.max_iterations) { return clcg_error_str(CLCG_REACHED_MAX_ITERATIONS, ss); } t++; CLCG_Ax(d1k, Ax, gctl::NoTrans, gctl::NoConj); // vk = Apk sigma = vecinner(r2k, Ax); ak = rhok/sigma; vecdiff(qk, uk, Ax, one_z, ak); vecadd(wk, uk, qk, one_z, one_z); CLCG_Ax(wk, Ax, gctl::NoTrans, gctl::NoConj); vecapp(m, wk, ak); vecsub(r1k, Ax, ak); rk_mod = vecinner(r1k, r1k); rk_square = std::norm(rk_mod); if (!vecvalid(m)) { return clcg_error_str(CLCG_NAN_VALUE, ss); } rhok2 = vecinner(r2k, r1k); betak = rhok2/rhok; rhok = rhok2; vecadd(uk, r1k, qk, one_z, betak); vecadd(d1k, qk, d1k, one_z, betak); vecadd(d1k, uk, d1k, one_z, betak); } return clcg_error_str(CLCG_UNKNOWN_ERROR, ss); } void gctl::clcg_solver::clbicgstab(array > &m, const array > &B, std::ostream &ss) { size_t n_size = B.size(); //check parameters if (n_size <= 0) return clcg_error_str(CLCG_INVILAD_VARIABLE_SIZE, ss); if (clcg_param_.max_iterations < 0) return clcg_error_str(CLCG_INVILAD_MAX_ITERATIONS, ss); if (clcg_param_.epsilon <= 0.0 || clcg_param_.epsilon >= 1.0) return clcg_error_str(CLCG_INVILAD_EPSILON, ss); r1k.resize(n_size); r2k.resize(n_size); d1k.resize(n_size); d2k.resize(n_size); Ax.resize(n_size); Ad.resize(n_size); CLCG_Ax(m, Ax, gctl::NoTrans, gctl::NoConj); std::complex one_z(1.0, 0.0); std::complex one_one(1.0, 1.0); vecdiff(r1k, B, Ax, one_z, one_z); veccpy(d1k, r1k, one_z); srand(time(0)); std::complex rhok; do { for (size_t i = 0; i < n_size; i++) { r2k[i] = random(one_z, 2.0*one_one); } rhok = vecinner(r2k, r1k); } while (std::norm(rhok) < clcg_param_.lamda); double r0_square, rk_square; std::complex r0_mod, rk_mod; rk_mod = vecinner(r1k, r1k); r0_square = rk_square = std::norm(rk_mod); if (r0_square < 1.0) r0_square = 1.0; if (clcg_param_.abs_diff && sqrt(rk_square)/n_size <= clcg_param_.epsilon) { CLCG_Progress(m, sqrt(rk_square)/n_size, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } else if (rk_square/r0_square <= clcg_param_.epsilon) { CLCG_Progress(m, rk_square/r0_square, clcg_param_, 0, ss); return clcg_error_str(CLCG_ALREADY_OPTIMIZIED, ss); } double residual; std::complex ak, rhok2, sigma, omega, betak, Ass, AsAs; size_t t = 0; while(1) { if (clcg_param_.abs_diff) residual = sqrt(rk_square)/n_size; else residual = rk_square/r0_square; if (CLCG_Progress(m, residual, clcg_param_, t, ss)) { return clcg_error_str(CLCG_STOP, ss); } if (residual <= clcg_param_.epsilon) { return clcg_error_str(CLCG_CONVERGENCE, ss); } if (clcg_param_.max_iterations > 0 && t+1 > clcg_param_.max_iterations) { return clcg_error_str(CLCG_REACHED_MAX_ITERATIONS, ss); } t++; CLCG_Ax(d1k, Ax, gctl::NoTrans, gctl::NoConj); sigma = vecinner(r2k, Ax); ak = rhok/sigma; vecdiff(d2k, r1k, Ax, one_z, ak); CLCG_Ax(d2k, Ad, gctl::NoTrans, gctl::NoConj); Ass = vecinner(Ad, d2k); AsAs = vecinner(Ad, Ad); omega = Ass/AsAs; vecdiff(r1k, d2k, Ad, one_z, omega); vecadd(d2k, d1k, d2k, ak, omega); vecapp(m, d2k, one_z); rk_mod = vecinner(r1k, r1k); rk_square = std::norm(rk_mod); if (!vecvalid(m)) { return clcg_error_str(CLCG_NAN_VALUE, ss); } rhok2 = vecinner(r2k, r1k); betak = rhok2*ak/(rhok*omega); rhok = rhok2; vecdiff(d1k, d1k, Ax, one_z, omega); vecadd(d1k, r1k, d1k, one_z, betak); } return clcg_error_str(CLCG_UNKNOWN_ERROR, ss); } void gctl::clcg_solver::cltfqmr(array > &m, const array > &B, std::ostream &ss) { return; }