/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #include "lgd.h" /** * Default parameter for the Lévy-Gradient Descent (L-GD) method. */ static const gctl::lgd_para lgd_defparam = {1000, 0, 0, 1e-5, 1.0, 1.5, 0.01, 1e-8, -1.0, 0.5, 0.05}; gctl::lgd_solver::lgd_solver() { lgd_param_ = lgd_defparam; lgd_inter_ = 1; lgd_ques_num_ = 0; lgd_trace_times_ = 0; lgd_silent_ = lgd_has_range_ = lgd_has_alpha_ = lgd_save_trace_ = false; } gctl::lgd_solver::~lgd_solver(){} int gctl::lgd_solver::LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para ¶m) { if (lgd_silent_) return 0; if (param.epsilon > 0.0 && mean_fx <= param.epsilon) { std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t; return 0; } if (lgd_inter_ > 0 && curr_t%lgd_inter_ == 0) { std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t; } return 0; } void gctl::lgd_solver::lgd_silent() { lgd_silent_ = true; return; } void gctl::lgd_solver::set_lgd_report_interval(int inter) { lgd_inter_ = inter; return; } void gctl::lgd_solver::set_lgd_para(const lgd_para &in_param) { lgd_param_ = in_param; return; } #ifdef GCTL_OPTIMIZATION_TOML void gctl::lgd_solver::set_lgd_para(std::string filename) { toml::value toml_data; toml_data = toml::parse(filename); set_lgd_para(toml_data); return; } void gctl::lgd_solver::set_lgd_para(const toml::value &toml_data) { lgd_param_ = lgd_defparam; std::string LGD = "lgd"; if (toml_data.contains(LGD)) { if (toml_data.at(LGD).contains("flight_times")) lgd_param_.flight_times = toml::find(toml_data, LGD, "flight_times"); if (toml_data.at(LGD).contains("batch")) lgd_param_.batch = toml::find(toml_data, LGD, "batch"); if (toml_data.at(LGD).contains("epsilon")) lgd_param_.epsilon = toml::find(toml_data, LGD, "epsilon"); if (toml_data.at(LGD).contains("stddev_v")) lgd_param_.stddev_v = toml::find(toml_data, LGD, "stddev_v"); if (toml_data.at(LGD).contains("beta")) lgd_param_.beta = toml::find(toml_data, LGD, "beta"); if (toml_data.at(LGD).contains("alpha")) lgd_param_.alpha = toml::find(toml_data, LGD, "alpha"); if (toml_data.at(LGD).contains("sigma")) lgd_param_.sigma = toml::find(toml_data, LGD, "sigma"); if (toml_data.at(LGD).contains("lambda")) lgd_param_.lambda = toml::find(toml_data, LGD, "lambda"); } return; } #endif // GCTL_OPTIMIZATION_TOML void gctl::lgd_solver::set_lgd_record_trace() { lgd_save_trace_ = true; return; } void gctl::lgd_solver::show_solver() { std::clog << "Solver's Setup Panel\n"; std::clog << "-----------------------------\n"; std::clog << "Solver: LGD\n"; std::clog << "Flights = " << lgd_param_.flight_times << ", Batch = " << lgd_param_.batch << ", Epsilon = " << lgd_param_.epsilon << ", Lambda = " << lgd_param_.lambda << "\n"; std::clog << "STD(v) = " << lgd_param_.stddev_v << ", Beta = " << lgd_param_.beta << ", Alpha = " << lgd_param_.alpha << ", Sigma = " << lgd_param_.sigma << "\n"; std::clog << "=============================\n"; return; } void gctl::lgd_solver::save_lgd_trace(std::string trace_file) { if (lgd_trace_times_ == 0) { GCTL_ShowWhatError("[gctl::lgd_solver] No trace is recorded.", GCTL_WARNING_ERROR, 0, 0, 0); return; } std::ofstream ofile; open_outfile(ofile, trace_file, ".txt"); int m_size = lgd_trace_.size()/lgd_trace_times_; ofile << "# L-GD flight traces.\n"; ofile << "# Each row represents an accepted solution.\n"; ofile << "# Model size: " << m_size << "\n"; ofile << "# Accepted solutions: " << lgd_trace_times_ << "\n"; for (size_t i = 0; i < lgd_trace_times_; i++) { for (size_t j = 0; j < m_size; j++) { ofile << lgd_trace_[i*m_size+j] << " "; } ofile << "\n"; } ofile.close(); return; } gctl::lgd_return_code gctl::lgd_solver::get_levy_distribution(array &dist) { if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS; if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA; double gamma1 = tgamma(lgd_param_.beta + 1.0); double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0)); double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta))/ (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta); unsigned int sd; if (lgd_param_.seed == 0) sd = std::chrono::system_clock::now().time_since_epoch().count(); else sd = lgd_param_.seed; std::default_random_engine generator(sd); std::normal_distribution dist_u(0, stddev_u); std::normal_distribution dist_v(0, lgd_param_.stddev_v); std::uniform_real_distribution dist_s(1.0, 2.0); dist.resize(lgd_param_.flight_times); for (int i = 0; i <= lgd_param_.flight_times; i++) { dist[i] = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta)); } return LGD_REACHED_MAX_ITERATIONS; } void gctl::lgd_solver::lgd_error_str(lgd_return_code err_code, std::ostream &ss, bool err_throw) { #if defined _WINDOWS || __WIN32__ if (!err_throw) { if (err_code >= 0) { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN); ss << "Success! "; } else { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); ss << "Fail! "; } } #else if (!err_throw) { if (err_code >= 0) ss << "\033[1m\033[32mLGD Success! "; else ss << "\033[1m\033[31mLGD Fail! "; } #endif std::string err_str; switch (err_code) { case LGD_CONVERGENCE: err_str = "The iteration has reached convergence."; break; case LGD_STOP: err_str = "The iteration is stopped by the progress monitoring function."; break; case LGD_REACHED_MAX_ITERATIONS: err_str = "The maximal flight times has been reached."; break; case LGD_INVALID_SOLUTION_SIZE: err_str = "Invalid solution size."; break; case LGD_INVALID_MAX_ITERATIONS: err_str = "Invalid flight times."; break; case LGD_INVALID_EPSILON: err_str = "Invalid epsilon value."; break; case LGD_INVALID_STDV: err_str = "Invalid STD value for generating the levy distribution."; break; case LGD_INVALID_BETA: err_str = "Invalid beta value."; break; case LGD_INVALID_ALPHA: err_str = "Invalid alpha value."; break; case LGD_INVALID_SIGMA: err_str = "Invalid sigma value."; break; case LGD_NAN_VALUE: err_str = "NaN values found."; break; default: err_str = "Unknown error."; break; } if (err_throw && err_code < 0) throw err_str; else ss << err_str; #if defined _WINDOWS || __WIN32__ if (!err_throw) { if (err_code >= 0) { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7); ss << std::endl; } else { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7); ss << std::endl; } } #else if (!err_throw) { if (err_code >= 0) ss << "\033[0m" << std::endl; else ss << "\033[0m" << std::endl; } #endif return; } gctl::lgd_para gctl::lgd_solver::default_lgd_para() { lgd_para dp = lgd_defparam; return dp; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, std::ostream &ss, bool verbose, bool err_throw) { if (lgd_silent_) { lgd_return_code ret = lgd(best_m, mean_m, std_m); if (ret < 0) lgd_error_str(ret, ss, true); return; } // 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略 #ifdef GCTL_OPENMP double start = omp_get_wtime(); lgd_return_code ret = lgd(best_m, mean_m, std_m); double end = omp_get_wtime(); double costime = 1000*(end-start); #else clock_t start = clock(); lgd_return_code ret = lgd(best_m, mean_m, std_m); clock_t end = clock(); double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC; #endif if (!err_throw) std::clog << std::endl << "Solver: LGD. Time cost: " << costime << " ms" << std::endl; if (verbose) lgd_error_str(ret, ss, err_throw); else if (ret < 0) lgd_error_str(ret, ss, err_throw); return; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, const array &alphas, std::ostream &ss, bool verbose, bool err_throw) { lgd_ques_num_ = best_m.size(); if (lgd_ques_num_ != alphas.size()) { throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match."); } lgd_alpha_.resize(lgd_ques_num_); for (int i = 0; i < lgd_ques_num_; i++) { if (alphas[i] <= 0.0) { throw std::runtime_error("[gctl::lgd_solver] Invalid scaling value."); } lgd_alpha_[i] = alphas[i]; } lgd_has_alpha_ = true; LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw); return; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, const array &lows, const array &higs, std::ostream &ss, bool verbose, bool err_throw) { lgd_ques_num_ = best_m.size(); if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size()) { throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match."); } lgd_low_.resize(lgd_ques_num_); lgd_hig_.resize(lgd_ques_num_); for (int i = 0; i < lgd_ques_num_; i++) { if (lows[i] >= higs[i]) { throw std::runtime_error("[gctl::lgd_solver] Invalid bound value."); } lgd_low_[i] = lows[i]; lgd_hig_[i] = higs[i]; } lgd_has_range_ = true; LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw); return; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, double low, double hig, std::ostream &ss, bool verbose, bool err_throw) { if (low >= hig) { throw std::runtime_error("[gctl::lgd_solver] Invalid bound value."); } lgd_ques_num_ = best_m.size(); lgd_low_.resize(lgd_ques_num_, low); lgd_hig_.resize(lgd_ques_num_, hig); lgd_has_range_ = true; LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw); return; } void gctl::lgd_solver::LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, std::ostream &ss, bool verbose, bool err_throw) { if (lgd_silent_) { lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); if (ret < 0) lgd_error_str(ret, ss, true); return; } // 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略 #ifdef GCTL_OPENMP double start = omp_get_wtime(); lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); double end = omp_get_wtime(); double costime = 1000*(end-start); #else clock_t start = clock(); lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); clock_t end = clock(); double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC; #endif if (!err_throw) std::clog << std::endl << "Solver: LGDM. Time cost: " << costime << " ms" << std::endl; if (verbose) lgd_error_str(ret, ss, err_throw); else if (ret < 0) lgd_error_str(ret, ss, err_throw); return; } void gctl::lgd_solver::LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, const array &lows, const array &higs, std::ostream &ss, bool verbose, bool err_throw) { lgd_ques_num_ = best_m.size(); if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size()) { throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match."); } lgd_low_.resize(lgd_ques_num_); lgd_hig_.resize(lgd_ques_num_); for (int i = 0; i < lgd_ques_num_; i++) { if (lows[i] >= higs[i]) { throw std::runtime_error("[gctl::lgd_solver] Invalid bound value."); } lgd_low_[i] = lows[i]; lgd_hig_[i] = higs[i]; } lgd_has_range_ = true; LGD_Minimize_Momentum(best_m, mean_m, std_m, ss, verbose, err_throw); return; } gctl::lgd_return_code gctl::lgd_solver::lgd(array &best_m, array &mean_m, array &std_m) { lgd_ques_num_ = best_m.size(); // check parameters if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE; if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS; if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON; if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV; if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA; if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA; if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA; // initiate solutions mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0); double gamma1 = tgamma(lgd_param_.beta + 1.0); double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0)); double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta); unsigned int sd; if (lgd_param_.seed == 0) sd = std::chrono::system_clock::now().time_since_epoch().count(); else sd = lgd_param_.seed; std::default_random_engine generator(sd); std::normal_distribution dist_u(0, stddev_u); std::normal_distribution dist_v(0, lgd_param_.stddev_v); std::uniform_real_distribution dist_s(1.0, 2.0); array g, g_mem, g_orth, new_mean, b_m, alphas; g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_); new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_); // 初始化参数变化范围为lgd_param_.alpha vecset(alphas, lgd_param_.alpha); if (lgd_has_range_) { vecdiff(alphas, lgd_hig_, lgd_low_); vecscale(alphas, lgd_param_.alpha); } if (lgd_has_alpha_) { veccpy(alphas, lgd_alpha_, lgd_param_.alpha); } double fx_best, fx_tmp, direct_mod, levy_length; double fx_mean = NAN; // 开始飞行 int rcd_times = 0; lgd_trace_times_ = 0; for (int ft = 0; ft <= lgd_param_.flight_times; ft++) { // 计算尝试解 fx_tmp = LGD_Evaluate(best_m, g); if (ft == 0 || fx_tmp < fx_best) { fx_best = fx_tmp; veccpy(b_m, best_m); } // 记录飞行轨迹 if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda)) { for (int i = 0; i < lgd_ques_num_; i++) { std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]); mean_m[i] = new_mean[i]; } rcd_times++; if (lgd_save_trace_) { lgd_trace_.append_array(best_m); lgd_trace_times_++; } } if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_)) { // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_STOP; } if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0) { fx_mean = LGD_Evaluate(mean_m, g); if (fx_mean < lgd_param_.epsilon) { LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_); // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_CONVERGENCE; } } // 驻点检测 direct_mod = sqrt(vecdot(g, g)); if (direct_mod < lgd_param_.sigma) { if (ft == 0) // 初次飞行时无记录 { do // 如果梯度消失 则采用一个随机方向 { for (int i = 0; i < lgd_ques_num_; i++) { g[i] = dist_s(generator); } direct_mod = sqrt(vecdot(g, g)); } while (direct_mod < lgd_param_.sigma); } else // 如果梯度消失 则朝着上一次迭代方向的正交方向走一步 { for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i+lgd_ques_num_] = dist_s(generator); } schmidt_orthogonal(g_mem, g_orth, 2); for (int i = 0; i < lgd_ques_num_; i++) { g[i] = g_orth[i+lgd_ques_num_]; } direct_mod = 1.0; // 此时的模量为单位模量 } } // 莱维飞行的步长 注意原公式中无最外层绝对值符号 // 这是我们需要步长的绝对值 因此取绝对值 levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta)); for (int i = 0; i < lgd_ques_num_; i++) { best_m[i] -= levy_length*alphas[i]*g[i]/direct_mod; //best_m[i] -= levy_length*g[i]/direct_mod; } if (!vecvalid(best_m)) { return LGD_NAN_VALUE; } // 记录梯度方向 for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i] = g[i]; } // 这里可以添加取值范围的约束 if (lgd_has_range_) { vecbtm(best_m, lgd_low_); vectop(best_m, lgd_hig_); } } // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_REACHED_MAX_ITERATIONS; } gctl::lgd_return_code gctl::lgd_solver::lgd_momentum(array &best_m, array &mean_m, array &std_m) { lgd_ques_num_ = best_m.size(); // check parameters if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE; if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS; if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON; if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV; if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA; if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA; if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA; // initiate solutions mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0); double gamma1 = tgamma(lgd_param_.beta + 1.0); double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0)); double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta); unsigned int sd; if (lgd_param_.seed == 0) sd = std::chrono::system_clock::now().time_since_epoch().count(); else sd = lgd_param_.seed; std::default_random_engine generator(sd); std::normal_distribution dist_u(0, stddev_u); std::normal_distribution dist_v(0, lgd_param_.stddev_v); std::uniform_real_distribution dist_s(1.0, 2.0); array g, g_mem, g_orth, new_mean, b_m, alphas, fst_mt, sec_mt; g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_); new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_); double mt = 1.0, vt = 1.0; double fhat, shat; fst_mt.resize(lgd_ques_num_, 0.0); sec_mt.resize(lgd_ques_num_, 0.0); // 初始化参数变化范围为lgd_param_.alpha vecset(alphas, lgd_param_.alpha); if (lgd_has_range_) { vecdiff(alphas, lgd_hig_, lgd_low_); vecscale(alphas, lgd_param_.alpha); } if (lgd_has_alpha_) { veccpy(alphas, lgd_alpha_, lgd_param_.alpha); } double fx_best, fx_tmp, direct_mod, levy_length; double fx_mean = NAN; // 开始飞行 int rcd_times = 0; lgd_trace_times_ = 0; for (int ft = 0; ft <= lgd_param_.flight_times; ft++) { // 计算尝试解 fx_tmp = LGD_Evaluate(best_m, g); if (ft == 0 || fx_tmp < fx_best) { fx_best = fx_tmp; veccpy(b_m, best_m); } // 记录飞行轨迹 if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda)) { for (int i = 0; i < lgd_ques_num_; i++) { std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]); mean_m[i] = new_mean[i]; } rcd_times++; if (lgd_save_trace_) { lgd_trace_.append_array(best_m); lgd_trace_times_++; } } if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_)) { // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_STOP; } if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0) { fx_mean = LGD_Evaluate(mean_m, g); if (fx_mean < lgd_param_.epsilon) { LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_); // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_CONVERGENCE; } } // 驻点检测 direct_mod = sqrt(vecdot(g, g)); if (direct_mod < lgd_param_.sigma) { if (ft == 0) // 初次飞行时无记录 { do // 如果梯度消失 则采用一个随机方向 { for (int i = 0; i < lgd_ques_num_; i++) { g[i] = dist_s(generator); } direct_mod = sqrt(vecdot(g, g)); } while (direct_mod < lgd_param_.sigma); } else // 如果梯度消失 则朝着上一次迭代方向的正交方向走一步 { for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i+lgd_ques_num_] = dist_s(generator); } schmidt_orthogonal(g_mem, g_orth, 2); for (int i = 0; i < lgd_ques_num_; i++) { g[i] = g_orth[i+lgd_ques_num_]; } direct_mod = 1.0; // 此时的模量为单位模量 } } // 莱维飞行的步长 注意原公式中无最外层绝对值符号 // 这是我们需要步长的绝对值 因此取绝对值 levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta)); mt *= lgd_param_.fmt; vt *= lgd_param_.smt; for (int i = 0; i < lgd_ques_num_; i++) { g[i] = levy_length*alphas[i]*g[i]/direct_mod; //g[i] = levy_length*g[i]/direct_mod; fst_mt[i] = lgd_param_.fmt*fst_mt[i] + (1.0 - lgd_param_.fmt)*g[i]; sec_mt[i] = lgd_param_.smt*sec_mt[i] + (1.0 - lgd_param_.smt)*g[i]*g[i]; fhat = fst_mt[i]/(1.0 - mt); shat = sec_mt[i]/(1.0 - vt); best_m[i] -= fhat/(sqrt(shat) + lgd_param_.sigma); } if (!vecvalid(best_m)) { return LGD_NAN_VALUE; } // 记录梯度方向 for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i] = g[i]; } // 这里可以添加取值范围的约束 if (lgd_has_range_) { vecbtm(best_m, lgd_low_); vectop(best_m, lgd_hig_); } } // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_REACHED_MAX_ITERATIONS; }