gctl_optimization/lib/optimization/lbfgs.h
2025-02-22 18:10:15 +08:00

560 lines
26 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/********************************************************
* ██████╗ ██████╗████████╗██╗
* ██╔════╝ ██╔════╝╚══██╔══╝██║
* ██║ ███╗██║ ██║ ██║
* ██║ ██║██║ ██║ ██║
* ╚██████╔╝╚██████╗ ██║ ███████╗
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_LBFGS_H
#define _GCTL_LBFGS_H
#include "gctl/utility.h"
#include "gctl/core.h"
#include "gctl/maths.h"
#include "gctl/algorithms.h"
#include "gctl_optimization_config.h"
#ifdef GCTL_OPTIMIZATION_TOML
#include "toml.hpp"
#endif // GCTL_OPTIMIZATION_TOML
#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif // _WINDOWS || __WIN32__
namespace gctl
{
/**
* @brief Return value of the lbfgs() function. Roughly speaking, a negative value indicates an error.
*/
enum lbfgs_return_code
{
/** L-BFGS reaches convergence. */
LBFGS_EPS_CONVERGENCE = 0,
LBFGS_DELTA_CONVERGENCE,
LBFGS_RESI_CONVERGENCE,
LBFGS_STOP, //1
/** The initial variables already minimize the objective function. */
LBFGS_ALREADY_MINIMIZED, //2
/** Unknown error. */
LBFGSERR_UNKNOWNERROR = -1024,
/** Logic error. */
LBFGSERR_LOGICERROR, //-1023
/** Insufficient memory. */
LBFGSERR_OUTOFMEMORY, //-1022
/** The minimization process has been canceled. */
LBFGSERR_CANCELED,
/** Invalid number of variables specified. */
LBFGSERR_INVALID_N,
/** Invalid number of variables (for SSE) specified. */
LBFGSERR_INVALID_N_SSE,
/** The array x must be aligned to 16 (for SSE). */
LBFGSERR_INVALID_X_SSE,
/** Invalid parameter lbfgs_para::epsilon specified. */
LBFGSERR_INVALID_EPSILON,
/** Invalid parameter lbfgs_para::past specified. */
LBFGSERR_INVALID_TESTPERIOD,
/** Invalid parameter lbfgs_para::delta specified. */
LBFGSERR_INVALID_DELTA,
/** Invalid parameter lbfgs_para::linesearch specified. */
LBFGSERR_INVALID_LINESEARCH,
/** Invalid parameter lbfgs_para::max_step specified. */
LBFGSERR_INVALID_MINSTEP,
/** Invalid parameter lbfgs_para::max_step specified. */
LBFGSERR_INVALID_MAXSTEP,
/** Invalid parameter lbfgs_para::ftol specified. */
LBFGSERR_INVALID_FTOL,
/** Invalid parameter lbfgs_para::wolfe specified. */
LBFGSERR_INVALID_WOLFE,
/** Invalid parameter lbfgs_para::gtol specified. */
LBFGSERR_INVALID_GTOL,
/** Invalid parameter lbfgs_para::xtol specified. */
LBFGSERR_INVALID_XTOL,
/** Invalid parameter lbfgs_para::max_linesearch specified. */
LBFGSERR_INVALID_MAXLINESEARCH,
/** Invalid parameter lbfgs_para::orthantwise_c specified. */
LBFGSERR_INVALID_ORTHANTWISE,
/** Invalid parameter lbfgs_para::orthantwise_start specified. */
LBFGSERR_INVALID_ORTHANTWISE_START,
/** Invalid parameter lbfgs_para::orthantwise_end specified. */
LBFGSERR_INVALID_ORTHANTWISE_END,
/** The line-search step went out of the interval of uncertainty. */
LBFGSERR_OUTOFINTERVAL,
/** A logic error occurred; alternatively, the interval of uncertainty
became too small. */
LBFGSERR_INCORRECT_TMINMAX,
/** A rounding error occurred; alternatively, no line-search step
satisfies the sufficient decrease and curvature conditions. */
LBFGSERR_ROUNDING_ERROR,
/** The line-search step became smaller than lbfgs_para::min_step. */
LBFGSERR_MINIMUMSTEP,
/** The line-search step became larger than lbfgs_para::max_step. */
LBFGSERR_MAXIMUMSTEP,
/** The line-search routine reaches the maximum number of evaluations. */
LBFGSERR_MAXIMUMLINESEARCH,
/** The algorithm routine reaches the maximum number of iterations. */
LBFGSERR_MAXIMUMITERATION,
/** Relative width of the interval of uncertainty is at most
lbfgs_para::xtol. */
LBFGSERR_WIDTHTOOSMALL,
/** A logic error (negative line-search step) occurred. */
LBFGSERR_INVALIDPARAMETERS,
/** The current search direction increases the objective function value. */
LBFGSERR_INCREASEGRADIENT,
};
// 枚举类型 线性搜索方法
// 0 MoreThuente方法
// 1 Armijo条件方法
// 2 标准Wolfe条件方法
// 3 增强Wolfe条件方法
/**
* @brief Line search algorithms.
*/
enum line_search_type
{
/** The default algorithm (MoreThuente method). */
LBFGS_LINESEARCH_DEFAULT = 0,
/** MoreThuente method proposd by More and Thuente. */
LBFGS_LINESEARCH_MORETHUENTE = 0,
/**
* Backtracking method with the Armijo condition.
* The backtracking method finds the step length such that it satisfies
* the sufficient decrease (Armijo) condition,
* - f(x + a * d) <= f(x) + lbfgs_para::ftol * a * g(x)^T d,
*
* where x is the current point, d is the current search direction, and
* a is the step length.
*/
LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1,
/** The backtracking method with the defualt (regular Wolfe) condition. */
LBFGS_LINESEARCH_BACKTRACKING = 2,
/**
* Backtracking method with regular Wolfe condition.
* The backtracking method finds the step length such that it satisfies
* both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
* and the curvature condition,
* - g(x + a * d)^T d >= lbfgs_para::wolfe * g(x)^T d,
*
* where x is the current point, d is the current search direction, and
* a is the step length.
*/
LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2,
/**
* Backtracking method with strong Wolfe condition.
* The backtracking method finds the step length such that it satisfies
* both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
* and the following condition,
* - |g(x + a * d)^T d| <= lbfgs_para::wolfe * |g(x)^T d|,
*
* where x is the current point, d is the current search direction, and
* a is the step length.
*/
LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3,
LBFGS_LINESEARCH_BACKTRACKING_ARMIJO_QUAD = 4,
//LBFGS_LINESEARCH_BACKTRACKING_QUAD = 5,
//LBFGS_LINESEARCH_BACKTRACKING_WOLFE_QUAD = 5,
//LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE_QUAD = 6,
};
// L-BFGS参数类型。参数很多简要说明如下
// m L-BFGS算法中储存的前序sk与yk向量个数这个值控制了算法使用的内存多少默认值为6不建议小于3的值值多大近似精度越高计算量也越大。
// epsilon 迭代的终止精度默认值为1e-5
// past 以delta(不同迭代次数的目标函数值)为基础的迭代终止条件数past代表了以多少迭代次数之前的目标函数值作为delta计算的间隔默认值为0
// 即不以delta为迭代终止条件。
// delta (f' - f) / f 不同迭代次数时目标函数之差与当前目标函数值之比但past不为0时会计算。
// max_iterations 最大迭代次数为0时表示一直迭代到终止条件被满足或出现其他错误。
// linesearch 线性搜索方式,由此文件前述枚举类型定义。
// max_linesearch 每次迭代中线性搜索的最大次数默认值为40
// min_step 线性搜索中的最小步长默认值为1e-20
// max_step 线性搜索中的最大步长默认值为1e+20
// ftol 线性搜索的精度值默认值为1e-4取值范围0-0.5)。
// wolfe Wolfe线性搜索中的控制参数默认值为0.9大于ftol小于1.0
// gtol 线性搜索中的控制参数默认值为0.9大于ftol小于1.0
// xtol 浮点数精度默认值为1e-16
// orthantwise_c 模型参数x的L1模的乘积参数默认值为0.0此时算法即为L2模形式当此参数大于0时算法即为OWL-QN
// orthantwise_start 开始计算模型参数x的L1模的迭代序号
// orthantwise_end 终止计算模型参数x的L1模的迭代序号
/**
* L-BFGS optimization parameters.
* Call lbfgs_parameter_init() function to initialize parameters to the
* default values.
*/
struct lbfgs_para
{
/**
* The number of corrections to approximate the inverse hessian matrix.
* The L-BFGS routine stores the computation results of previous \ref m
* iterations to approximate the inverse hessian matrix of the current
* iteration. This parameter controls the size of the limited memories
* (corrections). The default value is \c 6. Values less than \c 3 are
* not recommended. Large values will result in excessive computing time.
*/
int m;
/**
* Epsilon for convergence test.
* This parameter determines the accuracy with which the solution is to
* be found. A minimization terminates when
* ||g|| < \ref epsilon * max(1, ||x||),
* where ||.|| denotes the Euclidean (L2) norm. The default value is
* \c 1e-5.
*/
double epsilon;
/**
* Distance for delta-based convergence test.
* This parameter determines the distance, in iterations, to compute
* the rate of decrease of the objective function. If the value of this
* parameter is zero, the library does not perform the delta-based
* convergence test. The default value is \c 0.
*/
int past;
/**
* Delta for convergence test.
* This parameter determines the minimum rate of decrease of the
* objective function. The library stops iterations when the
* following condition is met:
* (f' - f) / f < \ref delta,
* where f' is the objective value of \ref past iterations ago, and f is
* the objective value of the current iteration.
* The default value is \c 1e-5.
*/
double delta;
/**
* Residual for convergence test.
* This parameter determines the accuracy with which the solution is to
* be found. A minimization terminates when
* f(x) <= residual,
* The default value is \c 1e-8.
*
*/
double residual;
/**
* The maximum number of iterations.
* The lbfgs() function terminates an optimization process with
* ::LBFGSERR_MAXIMUMITERATION status code when the iteration count
* exceedes this parameter. Setting this parameter to zero continues an
* optimization process until a convergence or error. The default value
* is \c 0.
*/
int max_iterations;
/**
* The line search algorithm.
* This parameter specifies a line search algorithm to be used by the
* L-BFGS routine.
*/
int linesearch;
/**
* The maximum number of trials for the line search.
* This parameter controls the number of function and gradients evaluations
* per iteration for the line search routine. The default value is \c 40.
*/
int max_linesearch;
/**
* The minimum step of the line search routine.
* The default value is \c 1e-20. This value need not be modified unless
* the exponents are too large for the machine being used, or unless the
* problem is extremely badly scaled (in which case the exponents should
* be increased).
*/
double min_step;
/**
* The maximum step of the line search.
* The default value is \c 1e+20. This value need not be modified unless
* the exponents are too large for the machine being used, or unless the
* problem is extremely badly scaled (in which case the exponents should
* be increased).
*/
double max_step;
/**
* A parameter to control the accuracy of the line search routine.
* The default value is \c 1e-4. This parameter should be greater
* than zero and smaller than \c 0.5.
*/
double ftol;
/**
* A coefficient for the Wolfe condition.
* This parameter is valid only when the backtracking line-search
* algorithm is used with the Wolfe condition,
* ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or
* ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE .
* The default value is \c 0.9. This parameter should be greater
* the \ref ftol parameter and smaller than \c 1.0.
*/
double wolfe;
/**
* A parameter to control the accuracy of the line search routine.
* The default value is \c 0.9. If the function and gradient
* evaluations are inexpensive with respect to the cost of the
* iteration (which is sometimes the case when solving very large
* problems) it may be advantageous to set this parameter to a small
* value. A typical small value is \c 0.1. This parameter shuold be
* greater than the \ref ftol parameter (\c 1e-4) and smaller than
* \c 1.0.
*/
double gtol;
/**
* The machine precision for floating-point values.
* This parameter must be a positive value set by a client program to
* estimate the machine precision. The line search routine will terminate
* with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width
* of the interval of uncertainty is less than this parameter.
*/
double xtol;
/**
* Coeefficient for the L1 norm of variables.
* This parameter should be set to zero for standard minimization
* problems. Setting this parameter to a positive value activates
* Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which
* minimizes the objective function F(x) combined with the L1 norm |x|
* of the variables, {F(x) + C |x|}. This parameter is the coeefficient
* for the |x|, i.e., C. As the L1 norm |x| is not differentiable at
* zero, the library modifies function and gradient evaluations from
* a client program suitably; a client program thus have only to return
* the function value F(x) and gradients G(x) as usual. The default value
* is zero.
*/
double orthantwise_c;
/**
* Start index for computing L1 norm of the variables.
* This parameter is valid only for OWL-QN method
* (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N)
* specifies the index number from which the library computes the
* L1 norm of the variables x,
* |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| .
* In other words, variables x_1, ..., x_{b-1} are not used for
* computing the L1 norm. Setting b (0 < b < N), one can protect
* variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic
* regression) from being regularized. The default value is zero.
*/
int orthantwise_start;
/**
* End index for computing L1 norm of the variables.
* This parameter is valid only for OWL-QN method
* (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N)
* specifies the index number at which the library stops computing the
* L1 norm of the variables x,
*/
int orthantwise_end;
};
class lbfgs_solver
{
private:
lbfgs_para lbfgs_param_; ///< lbfgs 算法参数
bool lbfgs_silent_; ///< 显示运行信息
// 算法函数是私有的不能直接使用通过Minimize函数调用
// 下面是L-BFGS的主函数各个参数的说明简要翻译如下
// n 数组的长度,也就是待求的模型参数的数量
// x 模型参数数组的指针,函数通过指针直接操作模型数组,所以不需要返回计算结果。一开始赋给函数的数组即为
// 初始模型,函数结束后即为最优化结果
// ptr_fx 目标函数的值的指针,设计成指针可以方便在函数外部监控迭代过程的收敛情况
// retval 返回值。无错即为0非0值代表此文件上部枚举类型中的对应错误。此文件下部定义的错误信息显示即利用此返回值与
// 预定义的枚举类型输出相应的错误信息。
/**
* Start a L-BFGS optimization.
*
* @param x The array of variables. A client program can set
* default values for the optimization and receive the
* optimization result through this array. This array
* must be allocated by ::lbfgs_malloc function
* for libLBFGS built with SSE/SSE2 optimization routine
* enabled. The library built without SSE/SSE2
* optimization does not have such a requirement.
* @param ptr_fx The pointer to the variable that receives the final
* value of the objective function for the variables.
* This argument can be set to \c NULL if the final
* value of the objective function is unnecessary.
* @retval The status code. This function returns zero if the
* minimization process terminates without an error. A
* non-zero value indicates an error.
*/
lbfgs_return_code lbfgs(array<double> &x, double &ptr_fx, std::ostream &ss);
lbfgs_return_code lbfgs_preconditioned(array<double> &x, double &ptr_fx, std::ostream &ss);
// 线性搜索方法 内部私有函数 不能直接使用
lbfgs_return_code line_search_backtracking(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
lbfgs_return_code line_search_backtracking_quad(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
lbfgs_return_code line_search_backtracking_owlqn(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
lbfgs_return_code line_search_morethuente(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wa, int &ls);
// 显示lbfgs函数返回值信息 主要是错误信息
void lbfgs_error_str(lbfgs_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
public:
lbfgs_solver();
virtual ~lbfgs_solver();
/**
* @brief 不显示运行信息,仅抛出运行错误。
*
*/
void lbfgs_silent();
/**
* @brief 设置算法参数。
*
* @param in_param 参数对象
*/
void set_lbfgs_para(const lbfgs_para &in_param);
#ifdef GCTL_OPTIMIZATION_TOML
/**
* @brief 设置算法参数。
*
* @param toml_data toml数据对象
*/
void set_lbfgs_para(const toml::value &toml_data);
#endif // GCTL_OPTIMIZATION_TOML
/**
* @brief 返回一个全为默认值的参数对象。
*
* @return lbfgs_para
*/
lbfgs_para default_lbfgs_para();
/**
* @brief 显示当前运行参数
*
* @param ss 标准输出流
*/
void show_lbfgs_para(std::ostream &ss = std::clog);
// 目标函数与其梯度值计算函数的接口,参数简要说明如下:
// x 当前的模型参数值的指针
// g 当前模型参数值对应的梯度指针
// step 当前线性搜索所使用的步长
// retval 当前模型参数的目标函数值
/**
* Callback interface to provide objective function and gradient evaluations.
*
* The lbfgs() function call this function to obtain the values of objective
* function and its gradients when needed. A client program must implement
* this function to evaluate the values of the objective function and its
* gradients, given current values of variables.
*
* @param x The current values of variables.
* @param g The gradient vector. The callback function must compute
* the gradient values for the current variables.
* @param step The current step of the line search routine.
* @retval double The value of the objective function for the current
* variables.
*/
virtual double LBFGS_Evaluate(const array<double> &x, array<double> &g) = 0;
/**
* Callback interface to implement the preconditioning process.
*
* The lbfgs() function call this function for each iteration. Implementing
* this function, a client program can preform the preconditioning process.
*
* @param x The current values of variables.
* @param g The current gradient values of variables.
* @param d The current values of search directions.
* @param d_pre The values of search directions being preconditioned.
* The callback function must compute these values.
*/
virtual void LBFGS_Precondition(const array<double> &x, const array<double> &g, const array<double> &d, array<double> &d_pre);
// 进程函数的接口,参数简要说明如下:
// x 当前的模型参数值的指针
// g 当前模型参数值对应的梯度指针
// fx 目标函数的值
// xnorm 模型参数数组的L2模长
// gnorm 模型梯度数组的L2模长
// step 当前线性搜索所使用的步长
// k 迭代的次数
// ls 此次迭代所使用的线性搜索次数
// retval 返回0则lbfgs()函数继续,否则终止
/**
* Callback interface to receive the progress of the optimization process.
*
* The lbfgs() function call this function for each iteration. Implementing
* this function, a client program can store or display the current progress
* of the optimization process.
*
* @param x The current values of variables.
* @param g The current gradient values of variables.
* @param fx The current value of the objective function.
* @param converge Current value of the convergence test.
* @param rate Current value of the delta-based convergence test.
* @param param 这是我们添加了一个指针以使用参数类型来监控迭代流程
* @param k The iteration count.
* @param ls The number of evaluations called for this iteration.
* @param ss Output stream object.
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will cancel the optimization process.
*/
virtual int LBFGS_Progress(const array<double> &x, const array<double> &g, const double fx,
const double converge, const double rate, const lbfgs_para param, int k, int ls, std::ostream &ss);
/**
* @brief 调用算法执行最小化流程
*
* @param m 初始模型,最优化结果也保存在此数组内
* @param ss 信息的输出流
* @param err_throw 仅抛出错误
* @return double 最终的目标函数值
*/
double LBFGS_Minimize(array<double> &m, std::ostream &ss = std::clog, bool err_throw = false);
/**
* @brief 调用算法执行最小化流程
*
* @param m 初始模型,最优化结果也保存在此数组内
* @param ss 信息的输出流
* @param verbose 使用输出详细信息
* @param err_throw 仅抛出错误
* @return double 最终的目标函数值
*/
double LBFGS_MinimizePreconditioned(array<double> &m, std::ostream &ss = std::clog, bool err_throw = false);
};
};
#endif // _GCTL_LBFGS_H