/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #ifndef _GCTL_LBFGS_H #define _GCTL_LBFGS_H #include "gctl/core.h" #include "gctl/maths.h" #include "gctl/algorithm.h" #include "gctl_optimization_config.h" #ifdef GCTL_OPTIMIZATION_TOML #include "toml.hpp" #endif // GCTL_OPTIMIZATION_TOML #if defined _WINDOWS || __WIN32__ #include "windows.h" #endif // _WINDOWS || __WIN32__ namespace gctl { /** * @brief Return value of the lbfgs() function. Roughly speaking, a negative value indicates an error. */ enum lbfgs_return_code { /** L-BFGS reaches convergence. */ LBFGS_EPS_CONVERGENCE = 0, LBFGS_DELTA_CONVERGENCE, LBFGS_RESI_CONVERGENCE, LBFGS_STOP, //1 /** The initial variables already minimize the objective function. */ LBFGS_ALREADY_MINIMIZED, //2 /** Unknown error. */ LBFGSERR_UNKNOWNERROR = -1024, /** Logic error. */ LBFGSERR_LOGICERROR, //-1023 /** Insufficient memory. */ LBFGSERR_OUTOFMEMORY, //-1022 /** The minimization process has been canceled. */ LBFGSERR_CANCELED, /** Invalid number of variables specified. */ LBFGSERR_INVALID_N, /** Invalid number of variables (for SSE) specified. */ LBFGSERR_INVALID_N_SSE, /** The array x must be aligned to 16 (for SSE). */ LBFGSERR_INVALID_X_SSE, /** Invalid parameter lbfgs_para::epsilon specified. */ LBFGSERR_INVALID_EPSILON, /** Invalid parameter lbfgs_para::past specified. */ LBFGSERR_INVALID_TESTPERIOD, /** Invalid parameter lbfgs_para::delta specified. */ LBFGSERR_INVALID_DELTA, /** Invalid parameter lbfgs_para::linesearch specified. */ LBFGSERR_INVALID_LINESEARCH, /** Invalid parameter lbfgs_para::max_step specified. */ LBFGSERR_INVALID_MINSTEP, /** Invalid parameter lbfgs_para::max_step specified. */ LBFGSERR_INVALID_MAXSTEP, /** Invalid parameter lbfgs_para::ftol specified. */ LBFGSERR_INVALID_FTOL, /** Invalid parameter lbfgs_para::wolfe specified. */ LBFGSERR_INVALID_WOLFE, /** Invalid parameter lbfgs_para::gtol specified. */ LBFGSERR_INVALID_GTOL, /** Invalid parameter lbfgs_para::xtol specified. */ LBFGSERR_INVALID_XTOL, /** Invalid parameter lbfgs_para::max_linesearch specified. */ LBFGSERR_INVALID_MAXLINESEARCH, /** Invalid parameter lbfgs_para::orthantwise_c specified. */ LBFGSERR_INVALID_ORTHANTWISE, /** Invalid parameter lbfgs_para::orthantwise_start specified. */ LBFGSERR_INVALID_ORTHANTWISE_START, /** Invalid parameter lbfgs_para::orthantwise_end specified. */ LBFGSERR_INVALID_ORTHANTWISE_END, /** The line-search step went out of the interval of uncertainty. */ LBFGSERR_OUTOFINTERVAL, /** A logic error occurred; alternatively, the interval of uncertainty became too small. */ LBFGSERR_INCORRECT_TMINMAX, /** A rounding error occurred; alternatively, no line-search step satisfies the sufficient decrease and curvature conditions. */ LBFGSERR_ROUNDING_ERROR, /** The line-search step became smaller than lbfgs_para::min_step. */ LBFGSERR_MINIMUMSTEP, /** The line-search step became larger than lbfgs_para::max_step. */ LBFGSERR_MAXIMUMSTEP, /** The line-search routine reaches the maximum number of evaluations. */ LBFGSERR_MAXIMUMLINESEARCH, /** The algorithm routine reaches the maximum number of iterations. */ LBFGSERR_MAXIMUMITERATION, /** Relative width of the interval of uncertainty is at most lbfgs_para::xtol. */ LBFGSERR_WIDTHTOOSMALL, /** A logic error (negative line-search step) occurred. */ LBFGSERR_INVALIDPARAMETERS, /** The current search direction increases the objective function value. */ LBFGSERR_INCREASEGRADIENT, }; // 枚举类型 线性搜索方法 // 0 MoreThuente方法 // 1 Armijo条件方法 // 2 标准Wolfe条件方法 // 3 增强Wolfe条件方法 /** * @brief Line search algorithms. */ enum line_search_type { /** The default algorithm (MoreThuente method). */ LBFGS_LINESEARCH_DEFAULT = 0, /** MoreThuente method proposd by More and Thuente. */ LBFGS_LINESEARCH_MORETHUENTE = 0, /** * Backtracking method with the Armijo condition. * The backtracking method finds the step length such that it satisfies * the sufficient decrease (Armijo) condition, * - f(x + a * d) <= f(x) + lbfgs_para::ftol * a * g(x)^T d, * * where x is the current point, d is the current search direction, and * a is the step length. */ LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1, /** The backtracking method with the defualt (regular Wolfe) condition. */ LBFGS_LINESEARCH_BACKTRACKING = 2, /** * Backtracking method with regular Wolfe condition. * The backtracking method finds the step length such that it satisfies * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) * and the curvature condition, * - g(x + a * d)^T d >= lbfgs_para::wolfe * g(x)^T d, * * where x is the current point, d is the current search direction, and * a is the step length. */ LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2, /** * Backtracking method with strong Wolfe condition. * The backtracking method finds the step length such that it satisfies * both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) * and the following condition, * - |g(x + a * d)^T d| <= lbfgs_para::wolfe * |g(x)^T d|, * * where x is the current point, d is the current search direction, and * a is the step length. */ LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3, LBFGS_LINESEARCH_BACKTRACKING_ARMIJO_QUAD = 4, //LBFGS_LINESEARCH_BACKTRACKING_QUAD = 5, //LBFGS_LINESEARCH_BACKTRACKING_WOLFE_QUAD = 5, //LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE_QUAD = 6, }; // L-BFGS参数类型。参数很多,简要说明如下: // m L-BFGS算法中储存的前序sk与yk向量个数,这个值控制了算法使用的内存多少,默认值为6(不建议小于3的值),值多大近似精度越高,计算量也越大。 // epsilon 迭代的终止精度,默认值为1e-5 // past 以delta(不同迭代次数的目标函数值)为基础的迭代终止条件数,past代表了以多少迭代次数之前的目标函数值作为delta计算的间隔,默认值为0, // 即不以delta为迭代终止条件。 // delta (f' - f) / f 不同迭代次数时目标函数之差与当前目标函数值之比,但past不为0时会计算。 // max_iterations 最大迭代次数,为0时表示一直迭代到终止条件被满足或出现其他错误。 // linesearch 线性搜索方式,由此文件前述枚举类型定义。 // max_linesearch 每次迭代中线性搜索的最大次数,默认值为40 // min_step 线性搜索中的最小步长,默认值为1e-20 // max_step 线性搜索中的最大步长,默认值为1e+20 // ftol 线性搜索的精度值,默认值为1e-4,取值范围(0-0.5)。 // wolfe Wolfe线性搜索中的控制参数,默认值为0.9,大于ftol小于1.0 // gtol 线性搜索中的控制参数,默认值为0.9,大于ftol小于1.0 // xtol 浮点数精度,默认值为1e-16 // orthantwise_c 模型参数x的L1模的乘积参数,默认值为0.0,此时算法即为L2模形式,当此参数大于0时,算法即为OWL-QN // orthantwise_start 开始计算模型参数x的L1模的迭代序号 // orthantwise_end 终止计算模型参数x的L1模的迭代序号 /** * L-BFGS optimization parameters. * Call lbfgs_parameter_init() function to initialize parameters to the * default values. */ struct lbfgs_para { /** * The number of corrections to approximate the inverse hessian matrix. * The L-BFGS routine stores the computation results of previous \ref m * iterations to approximate the inverse hessian matrix of the current * iteration. This parameter controls the size of the limited memories * (corrections). The default value is \c 6. Values less than \c 3 are * not recommended. Large values will result in excessive computing time. */ int m; /** * Epsilon for convergence test. * This parameter determines the accuracy with which the solution is to * be found. A minimization terminates when * ||g|| < \ref epsilon * max(1, ||x||), * where ||.|| denotes the Euclidean (L2) norm. The default value is * \c 1e-5. */ double epsilon; /** * Distance for delta-based convergence test. * This parameter determines the distance, in iterations, to compute * the rate of decrease of the objective function. If the value of this * parameter is zero, the library does not perform the delta-based * convergence test. The default value is \c 0. */ int past; /** * Delta for convergence test. * This parameter determines the minimum rate of decrease of the * objective function. The library stops iterations when the * following condition is met: * (f' - f) / f < \ref delta, * where f' is the objective value of \ref past iterations ago, and f is * the objective value of the current iteration. * The default value is \c 1e-5. */ double delta; /** * Residual for convergence test. * This parameter determines the accuracy with which the solution is to * be found. A minimization terminates when * f(x) <= residual, * The default value is \c 1e-8. * */ double residual; /** * The maximum number of iterations. * The lbfgs() function terminates an optimization process with * ::LBFGSERR_MAXIMUMITERATION status code when the iteration count * exceedes this parameter. Setting this parameter to zero continues an * optimization process until a convergence or error. The default value * is \c 0. */ int max_iterations; /** * The line search algorithm. * This parameter specifies a line search algorithm to be used by the * L-BFGS routine. */ int linesearch; /** * The maximum number of trials for the line search. * This parameter controls the number of function and gradients evaluations * per iteration for the line search routine. The default value is \c 40. */ int max_linesearch; /** * The minimum step of the line search routine. * The default value is \c 1e-20. This value need not be modified unless * the exponents are too large for the machine being used, or unless the * problem is extremely badly scaled (in which case the exponents should * be increased). */ double min_step; /** * The maximum step of the line search. * The default value is \c 1e+20. This value need not be modified unless * the exponents are too large for the machine being used, or unless the * problem is extremely badly scaled (in which case the exponents should * be increased). */ double max_step; /** * A parameter to control the accuracy of the line search routine. * The default value is \c 1e-4. This parameter should be greater * than zero and smaller than \c 0.5. */ double ftol; /** * A coefficient for the Wolfe condition. * This parameter is valid only when the backtracking line-search * algorithm is used with the Wolfe condition, * ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or * ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE . * The default value is \c 0.9. This parameter should be greater * the \ref ftol parameter and smaller than \c 1.0. */ double wolfe; /** * A parameter to control the accuracy of the line search routine. * The default value is \c 0.9. If the function and gradient * evaluations are inexpensive with respect to the cost of the * iteration (which is sometimes the case when solving very large * problems) it may be advantageous to set this parameter to a small * value. A typical small value is \c 0.1. This parameter shuold be * greater than the \ref ftol parameter (\c 1e-4) and smaller than * \c 1.0. */ double gtol; /** * The machine precision for floating-point values. * This parameter must be a positive value set by a client program to * estimate the machine precision. The line search routine will terminate * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width * of the interval of uncertainty is less than this parameter. */ double xtol; /** * Coeefficient for the L1 norm of variables. * This parameter should be set to zero for standard minimization * problems. Setting this parameter to a positive value activates * Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which * minimizes the objective function F(x) combined with the L1 norm |x| * of the variables, {F(x) + C |x|}. This parameter is the coeefficient * for the |x|, i.e., C. As the L1 norm |x| is not differentiable at * zero, the library modifies function and gradient evaluations from * a client program suitably; a client program thus have only to return * the function value F(x) and gradients G(x) as usual. The default value * is zero. */ double orthantwise_c; /** * Start index for computing L1 norm of the variables. * This parameter is valid only for OWL-QN method * (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N) * specifies the index number from which the library computes the * L1 norm of the variables x, * |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| . * In other words, variables x_1, ..., x_{b-1} are not used for * computing the L1 norm. Setting b (0 < b < N), one can protect * variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic * regression) from being regularized. The default value is zero. */ int orthantwise_start; /** * End index for computing L1 norm of the variables. * This parameter is valid only for OWL-QN method * (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N) * specifies the index number at which the library stops computing the * L1 norm of the variables x, */ int orthantwise_end; }; class lbfgs_solver { private: lbfgs_para lbfgs_param_; ///< lbfgs 算法参数 bool lbfgs_silent_; ///< 显示运行信息 // 算法函数是私有的,不能直接使用,通过Minimize函数调用 // 下面是L-BFGS的主函数,各个参数的说明简要翻译如下: // n 数组的长度,也就是待求的模型参数的数量 // x 模型参数数组的指针,函数通过指针直接操作模型数组,所以不需要返回计算结果。一开始赋给函数的数组即为 // 初始模型,函数结束后即为最优化结果 // ptr_fx 目标函数的值的指针,设计成指针可以方便在函数外部监控迭代过程的收敛情况 // retval 返回值。无错即为0,非0值代表此文件上部枚举类型中的对应错误。此文件下部定义的错误信息显示即利用此返回值与 // 预定义的枚举类型输出相应的错误信息。 /** * Start a L-BFGS optimization. * * @param x The array of variables. A client program can set * default values for the optimization and receive the * optimization result through this array. This array * must be allocated by ::lbfgs_malloc function * for libLBFGS built with SSE/SSE2 optimization routine * enabled. The library built without SSE/SSE2 * optimization does not have such a requirement. * @param ptr_fx The pointer to the variable that receives the final * value of the objective function for the variables. * This argument can be set to \c NULL if the final * value of the objective function is unnecessary. * @retval The status code. This function returns zero if the * minimization process terminates without an error. A * non-zero value indicates an error. */ lbfgs_return_code lbfgs(array &x, double &ptr_fx, std::ostream &ss); lbfgs_return_code lbfgs_preconditioned(array &x, double &ptr_fx, std::ostream &ss); // 线性搜索方法 内部私有函数 不能直接使用 lbfgs_return_code line_search_backtracking(int n, gctl::array &x, double *f, gctl::array &g, gctl::array &s, double *stp, const gctl::array &xp, const gctl::array &gp, gctl::array &wp, int &ls); lbfgs_return_code line_search_backtracking_quad(int n, gctl::array &x, double *f, gctl::array &g, gctl::array &s, double *stp, const gctl::array &xp, const gctl::array &gp, gctl::array &wp, int &ls); lbfgs_return_code line_search_backtracking_owlqn(int n, gctl::array &x, double *f, gctl::array &g, gctl::array &s, double *stp, const gctl::array &xp, const gctl::array &gp, gctl::array &wp, int &ls); lbfgs_return_code line_search_morethuente(int n, gctl::array &x, double *f, gctl::array &g, gctl::array &s, double *stp, const gctl::array &xp, const gctl::array &gp, gctl::array &wa, int &ls); // 显示lbfgs函数返回值信息 主要是错误信息 void lbfgs_error_str(lbfgs_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false); public: lbfgs_solver(); virtual ~lbfgs_solver(); /** * @brief 不显示运行信息,仅抛出运行错误。 * */ void lbfgs_silent(); /** * @brief 设置算法参数。 * * @param in_param 参数对象 */ void set_lbfgs_para(const lbfgs_para &in_param); #ifdef GCTL_OPTIMIZATION_TOML /** * @brief 设置算法参数。 * * @param toml_data toml数据对象 */ void set_lbfgs_para(const toml::value &toml_data); #endif // GCTL_OPTIMIZATION_TOML /** * @brief 返回一个全为默认值的参数对象。 * * @return lbfgs_para */ lbfgs_para default_lbfgs_para(); /** * @brief 显示当前运行参数 * * @param ss 标准输出流 */ void show_lbfgs_para(std::ostream &ss = std::clog); // 目标函数与其梯度值计算函数的接口,参数简要说明如下: // x 当前的模型参数值的指针 // g 当前模型参数值对应的梯度指针 // step 当前线性搜索所使用的步长 // retval 当前模型参数的目标函数值 /** * Callback interface to provide objective function and gradient evaluations. * * The lbfgs() function call this function to obtain the values of objective * function and its gradients when needed. A client program must implement * this function to evaluate the values of the objective function and its * gradients, given current values of variables. * * @param x The current values of variables. * @param g The gradient vector. The callback function must compute * the gradient values for the current variables. * @param step The current step of the line search routine. * @retval double The value of the objective function for the current * variables. */ virtual double LBFGS_Evaluate(const array &x, array &g) = 0; /** * Callback interface to implement the preconditioning process. * * The lbfgs() function call this function for each iteration. Implementing * this function, a client program can preform the preconditioning process. * * @param x The current values of variables. * @param g The current gradient values of variables. * @param d The current values of search directions. * @param d_pre The values of search directions being preconditioned. * The callback function must compute these values. */ virtual void LBFGS_Precondition(const array &x, const array &g, const array &d, array &d_pre); // 进程函数的接口,参数简要说明如下: // x 当前的模型参数值的指针 // g 当前模型参数值对应的梯度指针 // fx 目标函数的值 // xnorm 模型参数数组的L2模长 // gnorm 模型梯度数组的L2模长 // step 当前线性搜索所使用的步长 // k 迭代的次数 // ls 此次迭代所使用的线性搜索次数 // retval 返回0则lbfgs()函数继续,否则终止 /** * Callback interface to receive the progress of the optimization process. * * The lbfgs() function call this function for each iteration. Implementing * this function, a client program can store or display the current progress * of the optimization process. * * @param x The current values of variables. * @param g The current gradient values of variables. * @param fx The current value of the objective function. * @param converge Current value of the convergence test. * @param rate Current value of the delta-based convergence test. * @param param 这是我们添加了一个指针以使用参数类型来监控迭代流程 * @param k The iteration count. * @param ls The number of evaluations called for this iteration. * @param ss Output stream object. * @retval int Zero to continue the optimization process. Returning a * non-zero value will cancel the optimization process. */ virtual int LBFGS_Progress(const array &x, const array &g, const double fx, const double converge, const double rate, const lbfgs_para param, int k, int ls, std::ostream &ss); /** * @brief 调用算法执行最小化流程 * * @param m 初始模型,最优化结果也保存在此数组内 * @param ss 信息的输出流 * @param err_throw 仅抛出错误 * @return double 最终的目标函数值 */ double LBFGS_Minimize(array &m, std::ostream &ss = std::clog, bool err_throw = false); /** * @brief 调用算法执行最小化流程 * * @param m 初始模型,最优化结果也保存在此数组内 * @param ss 信息的输出流 * @param verbose 使用输出详细信息 * @param err_throw 仅抛出错误 * @return double 最终的目标函数值 */ double LBFGS_MinimizePreconditioned(array &m, std::ostream &ss = std::clog, bool err_throw = false); }; } #endif // _GCTL_LBFGS_H