From 4a3ee5c81507146394041150136d6d3f33f48f53 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 24 Dec 2024 09:57:24 +0800 Subject: [PATCH] tmp --- example/ex2.cpp | 20 ++-- lib/optimization/lgd.cpp | 232 ++++++++++++++++++++++++++++++++++++++- lib/optimization/lgd.h | 58 +++++++++- 3 files changed, 297 insertions(+), 13 deletions(-) diff --git a/example/ex2.cpp b/example/ex2.cpp index 6b9a637..3a90eed 100644 --- a/example/ex2.cpp +++ b/example/ex2.cpp @@ -27,8 +27,8 @@ #include "../lib/optimization.h" -#define M 100 -#define N 90 +#define M 90 +#define N 100 // get random floating points double random_double(double l, double t) @@ -134,28 +134,30 @@ int main(int argc, char const *argv[]) // 生成一组正演解 包含一些大值和一些小值 gctl::array fm(N); int N2 = (int) N/2; + + srand(time(0)); for (int i = 0; i < N2; i++) { - //fm[i] = random_double(5, 10); - fm[i] = 10.0; + fm[i] = random_double(5, 10); + //fm[i] = 10.0; } for (int i = N2; i < N; i++) { - //fm[i] = random_double(1, 2); - fm[i] = 1.0; + fm[i] = random_double(1, 2); + //fm[i] = 1.0; } for (int i = 0; i < N2; i++) { - low[i] = 9.0; // 对解的范围进行约束 + low[i] = 4.0; // 对解的范围进行约束 hig[i] = 11.0; } for (int i = N2; i < N; i++) { low[i] = 0.0; - hig[i] = 2.0; + hig[i] = 3.0; } ex2 e; @@ -164,6 +166,8 @@ int main(int argc, char const *argv[]) gctl::lgd_para my_para = e.default_lgd_para(); my_para.flight_times = 20000; my_para.batch = 100; + my_para.fmt = 0.5; + my_para.smt = 0.05; e.set_lgd_para(my_para); e.LGD_Minimize(m, mean_m, stddev_m, low, hig); diff --git a/lib/optimization/lgd.cpp b/lib/optimization/lgd.cpp index ee46e48..0c24842 100644 --- a/lib/optimization/lgd.cpp +++ b/lib/optimization/lgd.cpp @@ -30,7 +30,7 @@ /** * Default parameter for the Lévy-Gradient Descent (L-GD) method. */ -static const gctl::lgd_para lgd_defparam = {1000, 0, 0, 1e-5, 1.0, 1.5, 0.01, 1e-8, -1.0}; +static const gctl::lgd_para lgd_defparam = {1000, 0, 0, 1e-5, 1.0, 1.5, 0.01, 1e-8, -1.0, 0.5, 0.05}; gctl::lgd_solver::lgd_solver() { @@ -370,6 +370,62 @@ void gctl::lgd_solver::LGD_Minimize(array &best_m, array &mean_m return; } +void gctl::lgd_solver::LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, + std::ostream &ss, bool verbose, bool err_throw) +{ + if (lgd_silent_) + { + lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); + if (ret < 0) lgd_error_str(ret, ss, true); + return; + } + + // 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略 +#ifdef GCTL_OPENMP + double start = omp_get_wtime(); + lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); + double end = omp_get_wtime(); + double costime = 1000*(end-start); +#else + clock_t start = clock(); + lgd_return_code ret = lgd_momentum(best_m, mean_m, std_m); + clock_t end = clock(); + double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC; +#endif + + if (!err_throw) std::clog << std::endl << "Solver: LGDM. Time cost: " << costime << " ms" << std::endl; + if (verbose) lgd_error_str(ret, ss, err_throw); + else if (ret < 0) lgd_error_str(ret, ss, err_throw); + return; +} + +void gctl::lgd_solver::LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, + const array &lows, const array &higs, std::ostream &ss, bool verbose, bool err_throw) +{ + lgd_ques_num_ = best_m.size(); + if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size()) + { + throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match."); + } + + lgd_low_.resize(lgd_ques_num_); + lgd_hig_.resize(lgd_ques_num_); + for (int i = 0; i < lgd_ques_num_; i++) + { + if (lows[i] >= higs[i]) + { + throw std::runtime_error("[gctl::lgd_solver] Invalid bound value."); + } + + lgd_low_[i] = lows[i]; + lgd_hig_[i] = higs[i]; + } + lgd_has_range_ = true; + + LGD_Minimize_Momentum(best_m, mean_m, std_m, ss, verbose, err_throw); + return; +} + gctl::lgd_return_code gctl::lgd_solver::lgd(array &best_m, array &mean_m, array &std_m) { lgd_ques_num_ = best_m.size(); @@ -530,6 +586,180 @@ gctl::lgd_return_code gctl::lgd_solver::lgd(array &best_m, array } } + // 将迭代结果返还给m + veccpy(best_m, b_m); + return LGD_REACHED_MAX_ITERATIONS; +} + +gctl::lgd_return_code gctl::lgd_solver::lgd_momentum(array &best_m, array &mean_m, array &std_m) +{ + lgd_ques_num_ = best_m.size(); + // check parameters + if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE; + if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS; + if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON; + if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV; + if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA; + if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA; + if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA; + + // initiate solutions + mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0); + + double gamma1 = tgamma(lgd_param_.beta + 1.0); + double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0)); + double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta); + + unsigned int sd; + if (lgd_param_.seed == 0) sd = std::chrono::system_clock::now().time_since_epoch().count(); + else sd = lgd_param_.seed; + + std::default_random_engine generator(sd); + std::normal_distribution dist_u(0, stddev_u); + std::normal_distribution dist_v(0, lgd_param_.stddev_v); + std::uniform_real_distribution dist_s(1.0, 2.0); + + array g, g_mem, g_orth, new_mean, b_m, alphas, fst_mt, sec_mt; + g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_); + new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_); + + double mt = 1.0, vt = 1.0; + fst_mt.resize(lgd_ques_num_, 0.0); + sec_mt.resize(lgd_ques_num_, 0.0); + + // 初始化参数变化范围为lgd_param_.alpha + vecset(alphas, lgd_param_.alpha); + + if (lgd_has_range_) + { + vecdiff(alphas, lgd_hig_, lgd_low_); + vecscale(alphas, lgd_param_.alpha); + } + + if (lgd_has_alpha_) + { + veccpy(alphas, lgd_alpha_, lgd_param_.alpha); + } + + double fx_best, fx_tmp, direct_mod, levy_length; + double fx_mean = NAN; + + // 开始飞行 + int rcd_times = 0; + lgd_trace_times_ = 0; + for (int ft = 0; ft <= lgd_param_.flight_times; ft++) + { + // 计算尝试解 + fx_tmp = LGD_Evaluate(best_m, g); + if (ft == 0 || fx_tmp < fx_best) + { + fx_best = fx_tmp; + veccpy(b_m, best_m); + } + + // 记录飞行轨迹 + if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda)) + { + for (int i = 0; i < lgd_ques_num_; i++) + { + std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]); + mean_m[i] = new_mean[i]; + } + rcd_times++; + + if (lgd_save_trace_) + { + lgd_trace_.append_array(best_m); + lgd_trace_times_++; + } + } + + if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_)) + { + // 将迭代结果返还给m + veccpy(best_m, b_m); + return LGD_STOP; + } + + if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0) + { + fx_mean = LGD_Evaluate(mean_m, g); + if (fx_mean < lgd_param_.epsilon) + { + LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_); + // 将迭代结果返还给m + veccpy(best_m, b_m); + return LGD_CONVERGENCE; + } + } + + // 驻点检测 + direct_mod = sqrt(vecdot(g, g)); + if (direct_mod < lgd_param_.sigma) + { + if (ft == 0) // 初次飞行时无记录 + { + do // 如果梯度消失 则采用一个随机方向 + { + for (int i = 0; i < lgd_ques_num_; i++) + { + g[i] = dist_s(generator); + } + + direct_mod = sqrt(vecdot(g, g)); + } + while (direct_mod < lgd_param_.sigma); + } + else // 如果梯度消失 则朝着上一次迭代方向的正交方向走一步 + { + for (int i = 0; i < lgd_ques_num_; i++) + { + g_mem[i+lgd_ques_num_] = dist_s(generator); + } + + schmidt_orthogonal(g_mem, g_orth, 2); + + for (int i = 0; i < lgd_ques_num_; i++) + { + g[i] = g_orth[i+lgd_ques_num_]; + } + direct_mod = 1.0; // 此时的模量为单位模量 + } + } + + // 莱维飞行的步长 注意原公式中无最外层绝对值符号 + // 这是我们需要步长的绝对值 因此取绝对值 + levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta)); + + mt *= lgd_param_.fmt; + vt *= lgd_param_.smt; + for (int i = 0; i < lgd_ques_num_; i++) + { + g[i] = levy_length*alphas[i]*g[i]/direct_mod; + fst_mt[i] = (lgd_param_.fmt*fst_mt[i] + (1.0 - lgd_param_.fmt)*g[i])/(1.0 - mt); + sec_mt[i] = (lgd_param_.smt*sec_mt[i] + (1.0 - lgd_param_.smt)*g[i]*g[i])/(1.0 - vt); + best_m[i] -= fst_mt[i]/(sqrt(sec_mt[i]) + 1e-10); + } + + if (!vecvalid(best_m)) + { + return LGD_NAN_VALUE; + } + + // 记录梯度方向 + for (int i = 0; i < lgd_ques_num_; i++) + { + g_mem[i] = g[i]; + } + + // 这里可以添加取值范围的约束 + if (lgd_has_range_) + { + vecbtm(best_m, lgd_low_); + vectop(best_m, lgd_hig_); + } + } + // 将迭代结果返还给m veccpy(best_m, b_m); return LGD_REACHED_MAX_ITERATIONS; diff --git a/lib/optimization/lgd.h b/lib/optimization/lgd.h index 24fbfe6..c8d2b38 100644 --- a/lib/optimization/lgd.h +++ b/lib/optimization/lgd.h @@ -138,8 +138,24 @@ namespace gctl * is set. The default is -1.0. */ double lambda; + + /** + * @brief First order moment. + * + */ + double fmt; + + /** + * @brief Second order moment. + * + */ + double smt; }; + /** + * @brief The lévy gradient descent solver (LGD). + * + */ class lgd_solver { public: @@ -169,13 +185,39 @@ namespace gctl virtual int LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para ¶m); + /** + * @brief Susppend all output info. + * + */ void lgd_silent(); + + /** + * @brief Set interval to run the LGD_Progress function. + * + * @param inter + */ void set_lgd_report_interval(int inter); + + /** + * @brief Show solver setup. + * + */ void show_solver(); - - void set_lgd_record_trace(); ///< Turn on the recording of fight traces. - // Save fight traces to file. Not that only qualified solutions will be - // saved if the recording threshold is set. + + /** + * @brief Turn on the recording of fight traces. Use this with caution as it may use a lot of momeries. + * + */ + void set_lgd_record_trace(); + + /** + * @brief Save fight traces to file. + * + * @note Must turn on the trace recording for this function to work properly. + * Only qualified solutions will be saved if the recording threshold is set. + * + * @param trace_file output file. + */ void save_lgd_trace(std::string trace_file); /** @@ -227,9 +269,17 @@ namespace gctl double low, double hig, std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false); + void LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, + std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false); + + void LGD_Minimize_Momentum(array &best_m, array &mean_m, array &std_m, + const array &lows, const array &higs, std::ostream &ss = std::clog, + bool verbose = true, bool err_throw = false); + private: void lgd_error_str(lgd_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false); lgd_return_code lgd(array &best_m, array &mean_m, array &std_m); + lgd_return_code lgd_momentum(array &best_m, array &mean_m, array &std_m); private: lgd_para lgd_param_;