/********************************************************
* ██████╗ ██████╗████████╗██╗
* ██╔════╝ ██╔════╝╚══██╔══╝██║
* ██║ ███╗██║ ██║ ██║
* ██║ ██║██║ ██║ ██║
* ╚██████╔╝╚██████╗ ██║ ███████╗
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see .
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "lgd.h"
/**
* Default parameter for the Lévy-Gradient Descent (L-GD) method.
*/
static const gctl::lgd_para lgd_defparam = {1000, 0, 1e-5, 1.0, 1.5, 0.01, 1e-8, -1.0};
gctl::lgd_solver::lgd_solver()
{
lgd_param_ = lgd_defparam;
lgd_inter_ = 1; lgd_ques_num_ = 0; lgd_trace_times_ = 0;
lgd_silent_ = lgd_has_range_ = lgd_has_alpha_ = lgd_save_trace_ = false;
}
gctl::lgd_solver::~lgd_solver(){}
int gctl::lgd_solver::LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para ¶m)
{
if (lgd_silent_) return 0;
if (param.epsilon > 0.0 && mean_fx <= param.epsilon)
{
std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t;
return 0;
}
if (lgd_inter_ > 0 && curr_t%lgd_inter_ == 0)
{
std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t;
}
return 0;
}
void gctl::lgd_solver::lgd_silent()
{
lgd_silent_ = true;
return;
}
void gctl::lgd_solver::set_lgd_report_interval(int inter)
{
lgd_inter_ = inter;
return;
}
void gctl::lgd_solver::set_lgd_para(const lgd_para &in_param)
{
lgd_param_ = in_param;
return;
}
#ifdef GCTL_OPTIMIZATION_TOML
void gctl::lgd_solver::set_lgd_para(std::string filename)
{
toml::value toml_data;
toml_data = toml::parse(filename);
set_lgd_para(toml_data);
return;
}
void gctl::lgd_solver::set_lgd_para(const toml::value &toml_data)
{
lgd_param_ = lgd_defparam;
std::string LGD = "lgd";
if (toml_data.contains(LGD))
{
if (toml_data.at(LGD).contains("flight_times")) lgd_param_.flight_times = toml::find(toml_data, LGD, "flight_times");
if (toml_data.at(LGD).contains("batch")) lgd_param_.batch = toml::find(toml_data, LGD, "batch");
if (toml_data.at(LGD).contains("epsilon")) lgd_param_.epsilon = toml::find(toml_data, LGD, "epsilon");
if (toml_data.at(LGD).contains("stddev_v")) lgd_param_.stddev_v = toml::find(toml_data, LGD, "stddev_v");
if (toml_data.at(LGD).contains("beta")) lgd_param_.beta = toml::find(toml_data, LGD, "beta");
if (toml_data.at(LGD).contains("alpha")) lgd_param_.alpha = toml::find(toml_data, LGD, "alpha");
if (toml_data.at(LGD).contains("sigma")) lgd_param_.sigma = toml::find(toml_data, LGD, "sigma");
if (toml_data.at(LGD).contains("lambda")) lgd_param_.lambda = toml::find(toml_data, LGD, "lambda");
}
return;
}
#endif // GCTL_OPTIMIZATION_TOML
void gctl::lgd_solver::set_lgd_record_trace()
{
lgd_save_trace_ = true;
return;
}
void gctl::lgd_solver::show_solver()
{
std::clog << "Solver's Setup Panel\n";
std::clog << "-----------------------------\n";
std::clog << "Solver: LGD\n";
std::clog << "Flights = " << lgd_param_.flight_times << ", Batch = " << lgd_param_.batch << ", Epsilon = " << lgd_param_.epsilon << ", Lambda = " << lgd_param_.lambda << "\n";
std::clog << "STD(v) = " << lgd_param_.stddev_v << ", Beta = " << lgd_param_.beta << ", Alpha = " << lgd_param_.alpha << ", Sigma = " << lgd_param_.sigma << "\n";
std::clog << "=============================\n";
return;
}
void gctl::lgd_solver::save_lgd_trace(std::string trace_file)
{
if (lgd_trace_times_ == 0)
{
GCTL_ShowWhatError("[gctl::lgd_solver] No trace is recorded.", GCTL_WARNING_ERROR, 0, 0, 0);
return;
}
std::ofstream ofile;
open_outfile(ofile, trace_file, ".txt");
int m_size = lgd_trace_.size()/lgd_trace_times_;
ofile << "# L-GD flight traces.\n";
ofile << "# Each row represents an accepted solution.\n";
ofile << "# Model size: " << m_size << "\n";
ofile << "# Accepted solutions: " << lgd_trace_times_ << "\n";
for (size_t i = 0; i < lgd_trace_times_; i++)
{
for (size_t j = 0; j < m_size; j++)
{
ofile << lgd_trace_[i*m_size+j] << " ";
}
ofile << "\n";
}
ofile.close();
return;
}
void gctl::lgd_solver::lgd_error_str(lgd_return_code err_code, std::ostream &ss, bool err_throw)
{
#if defined _WINDOWS || __WIN32__
if (!err_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
ss << "Success! ";
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
ss << "Fail! ";
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[1m\033[32mLGD Success! ";
else
ss << "\033[1m\033[31mLGD Fail! ";
}
#endif
std::string err_str;
switch (err_code)
{
case LGD_CONVERGENCE:
err_str = "The iteration has reached convergence."; break;
case LGD_STOP:
err_str = "The iteration is stopped by the progress monitoring function."; break;
case LGD_REACHED_MAX_ITERATIONS:
err_str = "The maximal flight times has been reached."; break;
case LGD_INVALID_SOLUTION_SIZE:
err_str = "Invalid solution size."; break;
case LGD_INVALID_MAX_ITERATIONS:
err_str = "Invalid flight times."; break;
case LGD_INVALID_EPSILON:
err_str = "Invalid epsilon value."; break;
case LGD_INVALID_STDV:
err_str = "Invalid STD value for generating the levy distribution."; break;
case LGD_INVALID_BETA:
err_str = "Invalid beta value."; break;
case LGD_INVALID_ALPHA:
err_str = "Invalid alpha value."; break;
case LGD_INVALID_SIGMA:
err_str = "Invalid sigma value."; break;
case LGD_NAN_VALUE:
err_str = "NaN values found."; break;
default:
err_str = "Unknown error."; break;
}
if (err_throw && err_code < 0) throw err_str;
else ss << err_str;
#if defined _WINDOWS || __WIN32__
if (!err_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[0m" << std::endl;
else
ss << "\033[0m" << std::endl;
}
#endif
return;
}
gctl::lgd_para gctl::lgd_solver::default_lgd_para()
{
lgd_para dp = lgd_defparam;
return dp;
}
void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m,
std::ostream &ss, bool verbose, bool err_throw)
{
if (lgd_silent_)
{
lgd_return_code ret = lgd(best_m, mean_m, std_m);
if (ret < 0) lgd_error_str(ret, ss, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef GCTL_OPENMP
double start = omp_get_wtime();
lgd_return_code ret = lgd(best_m, mean_m, std_m);
double end = omp_get_wtime();
double costime = 1000*(end-start);
#else
clock_t start = clock();
lgd_return_code ret = lgd(best_m, mean_m, std_m);
clock_t end = clock();
double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!err_throw) std::clog << std::endl << "Solver: LGD. Time cost: " << costime << " ms" << std::endl;
if (verbose) lgd_error_str(ret, ss, err_throw);
else if (ret < 0) lgd_error_str(ret, ss, err_throw);
return;
}
void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m,
const array &alphas, std::ostream &ss, bool verbose, bool err_throw)
{
lgd_ques_num_ = best_m.size();
if (lgd_ques_num_ != alphas.size())
{
throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match.");
}
lgd_alpha_.resize(lgd_ques_num_);
for (int i = 0; i < lgd_ques_num_; i++)
{
if (alphas[i] <= 0.0)
{
throw std::runtime_error("[gctl::lgd_solver] Invalid scaling value.");
}
lgd_alpha_[i] = alphas[i];
}
lgd_has_alpha_ = true;
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
return;
}
void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m,
const array &lows, const array &higs, std::ostream &ss, bool verbose, bool err_throw)
{
lgd_ques_num_ = best_m.size();
if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size())
{
throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match.");
}
lgd_low_.resize(lgd_ques_num_);
lgd_hig_.resize(lgd_ques_num_);
for (int i = 0; i < lgd_ques_num_; i++)
{
if (lows[i] >= higs[i])
{
throw std::runtime_error("[gctl::lgd_solver] Invalid bound value.");
}
lgd_low_[i] = lows[i];
lgd_hig_[i] = higs[i];
}
lgd_has_range_ = true;
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
return;
}
void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m,
double low, double hig, std::ostream &ss, bool verbose, bool err_throw)
{
if (low >= hig)
{
throw std::runtime_error("[gctl::lgd_solver] Invalid bound value.");
}
lgd_ques_num_ = best_m.size();
lgd_low_.resize(lgd_ques_num_, low);
lgd_hig_.resize(lgd_ques_num_, hig);
lgd_has_range_ = true;
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
return;
}
gctl::lgd_return_code gctl::lgd_solver::lgd(array &best_m, array &mean_m, array &std_m)
{
lgd_ques_num_ = best_m.size();
// check parameters
if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE;
if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS;
if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON;
if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV;
if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA;
if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA;
if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA;
// initiate solutions
mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0);
double gamma1 = tgamma(lgd_param_.beta + 1.0);
double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0));
double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta);
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(seed);
std::normal_distribution dist_u(0, stddev_u);
std::normal_distribution dist_v(0, lgd_param_.stddev_v);
std::uniform_real_distribution dist_s(1.0, 2.0);
array g, g_mem, g_orth, new_mean, b_m, alphas;
g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_);
new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_);
// 初始化参数变化范围为lgd_param_.alpha
vecset(alphas, lgd_param_.alpha);
if (lgd_has_range_)
{
vecdiff(alphas, lgd_hig_, lgd_low_);
vecscale(alphas, lgd_param_.alpha);
}
if (lgd_has_alpha_)
{
veccpy(alphas, lgd_alpha_, lgd_param_.alpha);
}
double fx_best, fx_tmp, direct_mod, levy_length;
double fx_mean = NAN;
// 开始飞行
int rcd_times = 0;
lgd_trace_times_ = 0;
for (int ft = 0; ft <= lgd_param_.flight_times; ft++)
{
// 计算尝试解
fx_tmp = LGD_Evaluate(best_m, g);
if (ft == 0 || fx_tmp < fx_best)
{
fx_best = fx_tmp;
veccpy(b_m, best_m);
}
// 记录飞行轨迹
if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda))
{
for (int i = 0; i < lgd_ques_num_; i++)
{
std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]);
mean_m[i] = new_mean[i];
}
rcd_times++;
if (lgd_save_trace_)
{
lgd_trace_.append_array(best_m);
lgd_trace_times_++;
}
}
if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_))
{
// 将迭代结果返还给m
veccpy(best_m, b_m);
return LGD_STOP;
}
if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0)
{
fx_mean = LGD_Evaluate(mean_m, g);
if (fx_mean < lgd_param_.epsilon)
{
LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_);
// 将迭代结果返还给m
veccpy(best_m, b_m);
return LGD_CONVERGENCE;
}
}
// 驻点检测
direct_mod = sqrt(vecdot(g, g));
if (direct_mod < lgd_param_.sigma)
{
if (ft == 0) // 初次飞行时无记录
{
do // 如果梯度消失 则采用一个随机方向
{
for (int i = 0; i < lgd_ques_num_; i++)
{
g[i] = dist_s(generator);
}
direct_mod = sqrt(vecdot(g, g));
}
while (direct_mod < lgd_param_.sigma);
}
else // 如果梯度消失 则朝着上一次迭代方向的正交方向走一步
{
for (int i = 0; i < lgd_ques_num_; i++)
{
g_mem[i+lgd_ques_num_] = dist_s(generator);
}
schmidt_orthogonal(g_mem, g_orth, 2);
for (int i = 0; i < lgd_ques_num_; i++)
{
g[i] = g_orth[i+lgd_ques_num_];
}
direct_mod = 1.0; // 此时的模量为单位模量
}
}
// 莱维飞行的步长 注意原公式中无最外层绝对值符号
// 这是我们需要步长的绝对值 因此取绝对值
levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta));
for (int i = 0; i < lgd_ques_num_; i++)
{
best_m[i] -= levy_length*alphas[i]*g[i]/direct_mod;
}
if (!vecvalid(best_m))
{
return LGD_NAN_VALUE;
}
// 记录梯度方向
for (int i = 0; i < lgd_ques_num_; i++)
{
g_mem[i] = g[i];
}
// 这里可以添加取值范围的约束
if (lgd_has_range_)
{
vecbtm(best_m, lgd_low_);
vectop(best_m, lgd_hig_);
}
}
// 将迭代结果返还给m
veccpy(best_m, b_m);
return LGD_REACHED_MAX_ITERATIONS;
}