/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #include "lgd.h" /** * Default parameter for the Lévy-Gradient Descent (L-GD) method. */ static const gctl::lgd_para lgd_defparam = {1000, 0, 0, 1e-5, 1.0, 1.5, 0.01, 1e-8, -1.0, 0.5, 0.05}; gctl::lgd_solver::lgd_solver() { lgd_param_ = lgd_defparam; lgd_inter_ = 1; lgd_ques_num_ = 0; lgd_trace_times_ = 0; lgd_silent_ = lgd_has_range_ = lgd_has_alpha_ = lgd_save_trace_ = false; } gctl::lgd_solver::~lgd_solver(){} int gctl::lgd_solver::LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para ¶m) { if (lgd_silent_) return 0; if (param.epsilon > 0.0 && mean_fx <= param.epsilon) { std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t; return 0; } if (lgd_inter_ > 0 && curr_t%lgd_inter_ == 0) { std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t; } return 0; } void gctl::lgd_solver::lgd_silent() { lgd_silent_ = true; return; } void gctl::lgd_solver::set_lgd_report_interval(int inter) { lgd_inter_ = inter; return; } void gctl::lgd_solver::set_lgd_para(const lgd_para &in_param) { lgd_param_ = in_param; return; } #ifdef GCTL_OPTIMIZATION_TOML void gctl::lgd_solver::set_lgd_para(std::string filename) { toml::value toml_data; toml_data = toml::parse(filename); set_lgd_para(toml_data); return; } void gctl::lgd_solver::set_lgd_para(const toml::value &toml_data) { lgd_param_ = lgd_defparam; std::string LGD = "lgd"; if (toml_data.contains(LGD)) { if (toml_data.at(LGD).contains("flight_times")) lgd_param_.flight_times = toml::find(toml_data, LGD, "flight_times"); if (toml_data.at(LGD).contains("batch")) lgd_param_.batch = toml::find(toml_data, LGD, "batch"); if (toml_data.at(LGD).contains("epsilon")) lgd_param_.epsilon = toml::find(toml_data, LGD, "epsilon"); if (toml_data.at(LGD).contains("stddev_v")) lgd_param_.stddev_v = toml::find(toml_data, LGD, "stddev_v"); if (toml_data.at(LGD).contains("beta")) lgd_param_.beta = toml::find(toml_data, LGD, "beta"); if (toml_data.at(LGD).contains("alpha")) lgd_param_.alpha = toml::find(toml_data, LGD, "alpha"); if (toml_data.at(LGD).contains("sigma")) lgd_param_.sigma = toml::find(toml_data, LGD, "sigma"); if (toml_data.at(LGD).contains("lambda")) lgd_param_.lambda = toml::find(toml_data, LGD, "lambda"); } return; } #endif // GCTL_OPTIMIZATION_TOML void gctl::lgd_solver::set_lgd_record_trace() { lgd_save_trace_ = true; return; } void gctl::lgd_solver::show_solver() { std::clog << "Solver's Setup Panel\n"; std::clog << "-----------------------------\n"; std::clog << "Solver: LGD\n"; std::clog << "Flights = " << lgd_param_.flight_times << ", Batch = " << lgd_param_.batch << ", Epsilon = " << lgd_param_.epsilon << ", Lambda = " << lgd_param_.lambda << "\n"; std::clog << "STD(v) = " << lgd_param_.stddev_v << ", Beta = " << lgd_param_.beta << ", Alpha = " << lgd_param_.alpha << ", Sigma = " << lgd_param_.sigma << "\n"; std::clog << "=============================\n"; return; } void gctl::lgd_solver::save_lgd_trace(std::string trace_file) { if (lgd_trace_times_ == 0) { GCTL_ShowWhatError("[gctl::lgd_solver] No trace is recorded.", GCTL_WARNING_ERROR, 0, 0, 0); return; } std::ofstream ofile; open_outfile(ofile, trace_file, ".txt"); int m_size = lgd_trace_.size()/lgd_trace_times_; ofile << "# L-GD flight traces.\n"; ofile << "# Each row represents an accepted solution.\n"; ofile << "# Model size: " << m_size << "\n"; ofile << "# Accepted solutions: " << lgd_trace_times_ << "\n"; for (size_t i = 0; i < lgd_trace_times_; i++) { for (size_t j = 0; j < m_size; j++) { ofile << lgd_trace_[i*m_size+j] << " "; } ofile << "\n"; } ofile.close(); return; } gctl::lgd_return_code gctl::lgd_solver::get_levy_distribution(array &dist) { if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS; if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA; double gamma1 = tgamma(lgd_param_.beta + 1.0); double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0)); double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta))/ (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta); unsigned int sd; if (lgd_param_.seed == 0) sd = std::chrono::system_clock::now().time_since_epoch().count(); else sd = lgd_param_.seed; std::default_random_engine generator(sd); std::normal_distribution dist_u(0, stddev_u); std::normal_distribution dist_v(0, lgd_param_.stddev_v); std::uniform_real_distribution dist_s(1.0, 2.0); dist.resize(lgd_param_.flight_times); for (int i = 0; i <= lgd_param_.flight_times; i++) { dist[i] = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta)); } return LGD_REACHED_MAX_ITERATIONS; } void gctl::lgd_solver::lgd_error_str(lgd_return_code err_code, std::ostream &ss, bool err_throw) { #if defined _WINDOWS || __WIN32__ if (!err_throw) { if (err_code >= 0) { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN); ss << "Success! "; } else { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED); ss << "Fail! "; } } #else if (!err_throw) { if (err_code >= 0) ss << "\033[1m\033[32mLGD Success! "; else ss << "\033[1m\033[31mLGD Fail! "; } #endif std::string err_str; switch (err_code) { case LGD_CONVERGENCE: err_str = "The iteration has reached convergence."; break; case LGD_STOP: err_str = "The iteration is stopped by the progress monitoring function."; break; case LGD_REACHED_MAX_ITERATIONS: err_str = "The maximal flight times has been reached."; break; case LGD_INVALID_SOLUTION_SIZE: err_str = "Invalid solution size."; break; case LGD_INVALID_MAX_ITERATIONS: err_str = "Invalid flight times."; break; case LGD_INVALID_EPSILON: err_str = "Invalid epsilon value."; break; case LGD_INVALID_STDV: err_str = "Invalid STD value for generating the levy distribution."; break; case LGD_INVALID_BETA: err_str = "Invalid beta value."; break; case LGD_INVALID_ALPHA: err_str = "Invalid alpha value."; break; case LGD_INVALID_SIGMA: err_str = "Invalid sigma value."; break; case LGD_NAN_VALUE: err_str = "NaN values found."; break; default: err_str = "Unknown error."; break; } if (err_throw && err_code < 0) throw err_str; else ss << err_str; #if defined _WINDOWS || __WIN32__ if (!err_throw) { if (err_code >= 0) { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7); ss << std::endl; } else { SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7); ss << std::endl; } } #else if (!err_throw) { if (err_code >= 0) ss << "\033[0m" << std::endl; else ss << "\033[0m" << std::endl; } #endif return; } gctl::lgd_para gctl::lgd_solver::default_lgd_para() { lgd_para dp = lgd_defparam; return dp; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, std::ostream &ss, bool verbose, bool err_throw) { if (lgd_silent_) { lgd_return_code ret = lgd(best_m, mean_m, std_m); if (ret < 0) lgd_error_str(ret, ss, true); return; } // 使用lcg求解注意当我们使用函数指针来调用求解函数时默认参数不可以省略 #ifdef GCTL_OPENMP double start = omp_get_wtime(); lgd_return_code ret = lgd(best_m, mean_m, std_m); double end = omp_get_wtime(); double costime = 1000*(end-start); #else clock_t start = clock(); lgd_return_code ret = lgd(best_m, mean_m, std_m); clock_t end = clock(); double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC; #endif if (!err_throw) std::clog << std::endl << "Solver: LGD. Time cost: " << costime << " ms" << std::endl; if (verbose) lgd_error_str(ret, ss, err_throw); else if (ret < 0) lgd_error_str(ret, ss, err_throw); return; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, const array &alphas, std::ostream &ss, bool verbose, bool err_throw) { lgd_ques_num_ = best_m.size(); if (lgd_ques_num_ != alphas.size()) { throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match."); } lgd_alpha_.resize(lgd_ques_num_); for (int i = 0; i < lgd_ques_num_; i++) { if (alphas[i] <= 0.0) { throw std::runtime_error("[gctl::lgd_solver] Invalid scaling value."); } lgd_alpha_[i] = alphas[i]; } lgd_has_alpha_ = true; LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw); return; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, const array &lows, const array &higs, std::ostream &ss, bool verbose, bool err_throw) { lgd_ques_num_ = best_m.size(); if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size()) { throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match."); } lgd_low_.resize(lgd_ques_num_); lgd_hig_.resize(lgd_ques_num_); for (int i = 0; i < lgd_ques_num_; i++) { if (lows[i] >= higs[i]) { throw std::runtime_error("[gctl::lgd_solver] Invalid bound value."); } lgd_low_[i] = lows[i]; lgd_hig_[i] = higs[i]; } lgd_has_range_ = true; LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw); return; } void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m, array &std_m, double low, double hig, std::ostream &ss, bool verbose, bool err_throw) { if (low >= hig) { throw std::runtime_error("[gctl::lgd_solver] Invalid bound value."); } lgd_ques_num_ = best_m.size(); lgd_low_.resize(lgd_ques_num_, low); lgd_hig_.resize(lgd_ques_num_, hig); lgd_has_range_ = true; LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw); return; } void gctl::lgd_solver::LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, std::ostream &ss, bool verbose, bool err_throw) { if (lgd_silent_) { lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); if (ret < 0) lgd_error_str(ret, ss, true); return; } // 使用lcg求解注意当我们使用函数指针来调用求解函数时默认参数不可以省略 #ifdef GCTL_OPENMP double start = omp_get_wtime(); lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); double end = omp_get_wtime(); double costime = 1000*(end-start); #else clock_t start = clock(); lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); clock_t end = clock(); double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC; #endif if (!err_throw) std::clog << std::endl << "Solver: LGDM. Time cost: " << costime << " ms" << std::endl; if (verbose) lgd_error_str(ret, ss, err_throw); else if (ret < 0) lgd_error_str(ret, ss, err_throw); return; } void gctl::lgd_solver::LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, const array &lows, const array &higs, std::ostream &ss, bool verbose, bool err_throw) { lgd_ques_num_ = best_m.size(); if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size()) { throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match."); } lgd_low_.resize(lgd_ques_num_); lgd_hig_.resize(lgd_ques_num_); for (int i = 0; i < lgd_ques_num_; i++) { if (lows[i] >= higs[i]) { throw std::runtime_error("[gctl::lgd_solver] Invalid bound value."); } lgd_low_[i] = lows[i]; lgd_hig_[i] = higs[i]; } lgd_has_range_ = true; LGD_Minimize_Momentum(best_m, mean_m, std_m, ss, verbose, err_throw); return; } gctl::lgd_return_code gctl::lgd_solver::lgd(array &best_m, array &mean_m, array &std_m) { lgd_ques_num_ = best_m.size(); // check parameters if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE; if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS; if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON; if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV; if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA; if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA; if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA; // initiate solutions mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0); double gamma1 = tgamma(lgd_param_.beta + 1.0); double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0)); double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta); unsigned int sd; if (lgd_param_.seed == 0) sd = std::chrono::system_clock::now().time_since_epoch().count(); else sd = lgd_param_.seed; std::default_random_engine generator(sd); std::normal_distribution dist_u(0, stddev_u); std::normal_distribution dist_v(0, lgd_param_.stddev_v); std::uniform_real_distribution dist_s(1.0, 2.0); array g, g_mem, g_orth, new_mean, b_m, alphas; g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_); new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_); // 初始化参数变化范围为lgd_param_.alpha vecset(alphas, lgd_param_.alpha); if (lgd_has_range_) { vecdiff(alphas, lgd_hig_, lgd_low_); vecscale(alphas, lgd_param_.alpha); } if (lgd_has_alpha_) { veccpy(alphas, lgd_alpha_, lgd_param_.alpha); } double fx_best, fx_tmp, direct_mod, levy_length; double fx_mean = NAN; // 开始飞行 int rcd_times = 0; lgd_trace_times_ = 0; for (int ft = 0; ft <= lgd_param_.flight_times; ft++) { // 计算尝试解 fx_tmp = LGD_Evaluate(best_m, g); if (ft == 0 || fx_tmp < fx_best) { fx_best = fx_tmp; veccpy(b_m, best_m); } // 记录飞行轨迹 if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda)) { for (int i = 0; i < lgd_ques_num_; i++) { std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]); mean_m[i] = new_mean[i]; } rcd_times++; if (lgd_save_trace_) { lgd_trace_.append_array(best_m); lgd_trace_times_++; } } if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_)) { // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_STOP; } if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0) { fx_mean = LGD_Evaluate(mean_m, g); if (fx_mean < lgd_param_.epsilon) { LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_); // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_CONVERGENCE; } } // 驻点检测 direct_mod = sqrt(vecdot(g, g)); if (direct_mod < lgd_param_.sigma) { if (ft == 0) // 初次飞行时无记录 { do // 如果梯度消失则采用一个随机方向 { for (int i = 0; i < lgd_ques_num_; i++) { g[i] = dist_s(generator); } direct_mod = sqrt(vecdot(g, g)); } while (direct_mod < lgd_param_.sigma); } else // 如果梯度消失则朝着上一次迭代方向的正交方向走一步 { for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i+lgd_ques_num_] = dist_s(generator); } schmidt_orthogonal(g_mem, g_orth, 2); for (int i = 0; i < lgd_ques_num_; i++) { g[i] = g_orth[i+lgd_ques_num_]; } direct_mod = 1.0; // 此时的模量为单位模量 } } // 莱维飞行的步长注意原公式中无最外层绝对值符号 // 这是我们需要步长的绝对值因此取绝对值 levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta)); for (int i = 0; i < lgd_ques_num_; i++) { best_m[i] -= levy_length*alphas[i]*g[i]/direct_mod; //best_m[i] -= levy_length*g[i]/direct_mod; } if (!vecvalid(best_m)) { return LGD_NAN_VALUE; } // 记录梯度方向 for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i] = g[i]; } // 这里可以添加取值范围的约束 if (lgd_has_range_) { vecbtm(best_m, lgd_low_); vectop(best_m, lgd_hig_); } } // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_REACHED_MAX_ITERATIONS; } gctl::lgd_return_code gctl::lgd_solver::lgd_momentum(array &best_m, array &mean_m, array &std_m) { lgd_ques_num_ = best_m.size(); // check parameters if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE; if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS; if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON; if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV; if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA; if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA; if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA; // initiate solutions mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0); double gamma1 = tgamma(lgd_param_.beta + 1.0); double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0)); double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta); unsigned int sd; if (lgd_param_.seed == 0) sd = std::chrono::system_clock::now().time_since_epoch().count(); else sd = lgd_param_.seed; std::default_random_engine generator(sd); std::normal_distribution dist_u(0, stddev_u); std::normal_distribution dist_v(0, lgd_param_.stddev_v); std::uniform_real_distribution dist_s(1.0, 2.0); array g, g_mem, g_orth, new_mean, b_m, alphas, fst_mt, sec_mt; g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_); new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_); double mt = 1.0, vt = 1.0; double fhat, shat; fst_mt.resize(lgd_ques_num_, 0.0); sec_mt.resize(lgd_ques_num_, 0.0); // 初始化参数变化范围为lgd_param_.alpha vecset(alphas, lgd_param_.alpha); if (lgd_has_range_) { vecdiff(alphas, lgd_hig_, lgd_low_); vecscale(alphas, lgd_param_.alpha); } if (lgd_has_alpha_) { veccpy(alphas, lgd_alpha_, lgd_param_.alpha); } double fx_best, fx_tmp, direct_mod, levy_length; double fx_mean = NAN; // 开始飞行 int rcd_times = 0; lgd_trace_times_ = 0; for (int ft = 0; ft <= lgd_param_.flight_times; ft++) { // 计算尝试解 fx_tmp = LGD_Evaluate(best_m, g); if (ft == 0 || fx_tmp < fx_best) { fx_best = fx_tmp; veccpy(b_m, best_m); } // 记录飞行轨迹 if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda)) { for (int i = 0; i < lgd_ques_num_; i++) { std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]); mean_m[i] = new_mean[i]; } rcd_times++; if (lgd_save_trace_) { lgd_trace_.append_array(best_m); lgd_trace_times_++; } } if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_)) { // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_STOP; } if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0) { fx_mean = LGD_Evaluate(mean_m, g); if (fx_mean < lgd_param_.epsilon) { LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_); // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_CONVERGENCE; } } // 驻点检测 direct_mod = sqrt(vecdot(g, g)); if (direct_mod < lgd_param_.sigma) { if (ft == 0) // 初次飞行时无记录 { do // 如果梯度消失则采用一个随机方向 { for (int i = 0; i < lgd_ques_num_; i++) { g[i] = dist_s(generator); } direct_mod = sqrt(vecdot(g, g)); } while (direct_mod < lgd_param_.sigma); } else // 如果梯度消失则朝着上一次迭代方向的正交方向走一步 { for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i+lgd_ques_num_] = dist_s(generator); } schmidt_orthogonal(g_mem, g_orth, 2); for (int i = 0; i < lgd_ques_num_; i++) { g[i] = g_orth[i+lgd_ques_num_]; } direct_mod = 1.0; // 此时的模量为单位模量 } } // 莱维飞行的步长注意原公式中无最外层绝对值符号 // 这是我们需要步长的绝对值因此取绝对值 levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta)); mt *= lgd_param_.fmt; vt *= lgd_param_.smt; for (int i = 0; i < lgd_ques_num_; i++) { g[i] = levy_length*alphas[i]*g[i]/direct_mod; //g[i] = levy_length*g[i]/direct_mod; fst_mt[i] = lgd_param_.fmt*fst_mt[i] + (1.0 - lgd_param_.fmt)*g[i]; sec_mt[i] = lgd_param_.smt*sec_mt[i] + (1.0 - lgd_param_.smt)*g[i]*g[i]; fhat = fst_mt[i]/(1.0 - mt); shat = sec_mt[i]/(1.0 - vt); best_m[i] -= fhat/(sqrt(shat) + lgd_param_.sigma); } if (!vecvalid(best_m)) { return LGD_NAN_VALUE; } // 记录梯度方向 for (int i = 0; i < lgd_ques_num_; i++) { g_mem[i] = g[i]; } // 这里可以添加取值范围的约束 if (lgd_has_range_) { vecbtm(best_m, lgd_low_); vectop(best_m, lgd_hig_); } } // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_REACHED_MAX_ITERATIONS; }