添加注释

This commit is contained in:
张壹 2019-10-21 22:09:44 +08:00
parent d3ebea8582
commit 462d3121ee
2 changed files with 54 additions and 37 deletions

View File

@ -473,6 +473,7 @@ progress notification is not necessary.
// 1. 变量的数量必须是16的倍数 // 1. 变量的数量必须是16的倍数
// 2. 变量的储存以16对齐 // 2. 变量的储存以16对齐
// 还不太明白为什么要这么要求。这里需要以后再注解。 // 还不太明白为什么要这么要求。这里需要以后再注解。
// 注解貌似只有使用SEE
In addition, a user must preserve two requirements: In addition, a user must preserve two requirements:
- The number of variables must be multiples of 16 (this is not 4). - The number of variables must be multiples of 16 (this is not 4).
- The memory block of variable array ::x must be aligned to 16. - The memory block of variable array ::x must be aligned to 16.
@ -550,7 +551,7 @@ int lbfgs(
); );
// 将一个参数类型内的全部变量值重置为默认值 // 将一个参数类型内的全部变量值重置为默认值
// 如果害怕参数被自己调乱了可以用这个函数将参数充值 // 如果害怕参数被自己调乱了可以用这个函数将参数重置
/** /**
* Initialize L-BFGS parameters to the default values. * Initialize L-BFGS parameters to the default values.
* *

View File

@ -89,16 +89,16 @@ licence.
#include "arithmetic_ansi.h" #include "arithmetic_ansi.h"
#endif #endif
//宏函数 比较几个数的大小
#define min2(a, b) ((a) <= (b) ? (a) : (b)) #define min2(a, b) ((a) <= (b) ? (a) : (b))
#define max2(a, b) ((a) >= (b) ? (a) : (b)) #define max2(a, b) ((a) >= (b) ? (a) : (b))
#define max3(a, b, c) max2(max2((a), (b)), (c)); #define max3(a, b, c) max2(max2((a), (b)), (c));
//回调函数数据类型
struct tag_callback_data { struct tag_callback_data {
int n; int n; // 变量的大小
void *instance; void *instance; // 用户给出的运行实例
lbfgs_evaluate_t proc_evaluate; lbfgs_evaluate_t proc_evaluate; // 目标函数与模型梯度计算函数指针
lbfgs_progress_t proc_progress; lbfgs_progress_t proc_progress; // 迭代过程监控函数指针
}; };
typedef struct tag_callback_data callback_data_t; typedef struct tag_callback_data callback_data_t;
@ -106,10 +106,10 @@ struct tag_iteration_data {
lbfgsfloatval_t alpha; lbfgsfloatval_t alpha;
lbfgsfloatval_t *s; /* [n] */ lbfgsfloatval_t *s; /* [n] */
lbfgsfloatval_t *y; /* [n] */ lbfgsfloatval_t *y; /* [n] */
lbfgsfloatval_t ys; /* vecdot(y, s) */ lbfgsfloatval_t ys; /* vecdot(y, s) y与s的点积 */
}; };
typedef struct tag_iteration_data iteration_data_t; typedef struct tag_iteration_data iteration_data_t;
// 默认的迭代参数
static const lbfgs_parameter_t _defparam = { static const lbfgs_parameter_t _defparam = {
6, 1e-5, 0, 1e-5, 6, 1e-5, 0, 1e-5,
0, LBFGS_LINESEARCH_DEFAULT, 40, 0, LBFGS_LINESEARCH_DEFAULT, 40,
@ -118,7 +118,7 @@ static const lbfgs_parameter_t _defparam = {
}; };
/* Forward function declarations. */ /* Forward function declarations. */
// 这里定义了线性搜索的函数类型模版 一下是几个具体的线性搜索函数的声明
typedef int (*line_search_proc)( typedef int (*line_search_proc)(
int n, int n,
lbfgsfloatval_t *x, lbfgsfloatval_t *x,
@ -174,7 +174,8 @@ static int line_search_morethuente(
callback_data_t *cd, callback_data_t *cd,
const lbfgs_parameter_t *param const lbfgs_parameter_t *param
); );
// 以上是线性搜索函数的声明
// 试算测试步长
static int update_trial_interval( static int update_trial_interval(
lbfgsfloatval_t *x, lbfgsfloatval_t *x,
lbfgsfloatval_t *fx, lbfgsfloatval_t *fx,
@ -189,13 +190,13 @@ static int update_trial_interval(
const lbfgsfloatval_t tmax, const lbfgsfloatval_t tmax,
int *brackt int *brackt
); );
// 计算x的L1模长
static lbfgsfloatval_t owlqn_x1norm( static lbfgsfloatval_t owlqn_x1norm(
const lbfgsfloatval_t* x, const lbfgsfloatval_t* x,
const int start, const int start,
const int n const int n
); );
// 计算似模长
static void owlqn_pseudo_gradient( static void owlqn_pseudo_gradient(
lbfgsfloatval_t* pg, lbfgsfloatval_t* pg,
const lbfgsfloatval_t* x, const lbfgsfloatval_t* x,
@ -223,7 +224,7 @@ static int round_out_variables(int n)
return n; return n;
} }
#endif/*defined(USE_SSE)*/ #endif/*defined(USE_SSE)*/
// 开辟内存空间
lbfgsfloatval_t* lbfgs_malloc(int n) lbfgsfloatval_t* lbfgs_malloc(int n)
{ {
#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
@ -231,12 +232,12 @@ lbfgsfloatval_t* lbfgs_malloc(int n)
#endif/*defined(USE_SSE)*/ #endif/*defined(USE_SSE)*/
return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n); return (lbfgsfloatval_t*)vecalloc(sizeof(lbfgsfloatval_t) * n);
} }
// 释放内存空间
void lbfgs_free(lbfgsfloatval_t *x) void lbfgs_free(lbfgsfloatval_t *x)
{ {
vecfree(x); vecfree(x);
} }
// 重置参数至默认参数
void lbfgs_parameter_init(lbfgs_parameter_t *param) void lbfgs_parameter_init(lbfgs_parameter_t *param)
{ {
memcpy(param, &_defparam, sizeof(*param)); memcpy(param, &_defparam, sizeof(*param));
@ -257,7 +258,9 @@ int lbfgs(
lbfgsfloatval_t step; lbfgsfloatval_t step;
/* Constant parameters and their default values. */ /* Constant parameters and their default values. */
// 若无输入参数则使用默认参数
lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam;
// m是计算海森矩阵时储存的前序向量大小
const int m = param.m; const int m = param.m;
lbfgsfloatval_t *xp = NULL; lbfgsfloatval_t *xp = NULL;
@ -268,6 +271,7 @@ int lbfgs(
lbfgsfloatval_t xnorm, gnorm, beta; lbfgsfloatval_t xnorm, gnorm, beta;
lbfgsfloatval_t fx = 0.; lbfgsfloatval_t fx = 0.;
lbfgsfloatval_t rate = 0.; lbfgsfloatval_t rate = 0.;
// 设置线性搜索函数为morethuente此处line_search_morethuente为函数名
line_search_proc linesearch = line_search_morethuente; line_search_proc linesearch = line_search_morethuente;
/* Construct a callback data. */ /* Construct a callback data. */
@ -334,11 +338,13 @@ int lbfgs(
return LBFGSERR_INVALID_ORTHANTWISE_START; return LBFGSERR_INVALID_ORTHANTWISE_START;
} }
if (param.orthantwise_end < 0) { if (param.orthantwise_end < 0) {
// 默认设置在每个迭代都计算L1模
param.orthantwise_end = n; param.orthantwise_end = n;
} }
if (n < param.orthantwise_end) { if (n < param.orthantwise_end) {
return LBFGSERR_INVALID_ORTHANTWISE_END; return LBFGSERR_INVALID_ORTHANTWISE_END;
} }
// 若|x|的参数不是0则检查线性搜索方法
if (param.orthantwise_c != 0.) { if (param.orthantwise_c != 0.) {
switch (param.linesearch) { switch (param.linesearch) {
case LBFGS_LINESEARCH_BACKTRACKING: case LBFGS_LINESEARCH_BACKTRACKING:
@ -362,7 +368,7 @@ int lbfgs(
return LBFGSERR_INVALID_LINESEARCH; return LBFGSERR_INVALID_LINESEARCH;
} }
} }
// 初始化数组
/* Allocate working space. */ /* Allocate working space. */
xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
@ -373,7 +379,7 @@ int lbfgs(
ret = LBFGSERR_OUTOFMEMORY; ret = LBFGSERR_OUTOFMEMORY;
goto lbfgs_exit; goto lbfgs_exit;
} }
// 初始化计算L1模的数组
if (param.orthantwise_c != 0.) { if (param.orthantwise_c != 0.) {
/* Allocate working space for OW-LQN. */ /* Allocate working space for OW-LQN. */
pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); pg = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
@ -382,7 +388,7 @@ int lbfgs(
goto lbfgs_exit; goto lbfgs_exit;
} }
} }
// 初始化有限内存方法需要的空间
/* Allocate limited memory storage. */ /* Allocate limited memory storage. */
lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t)); lm = (iteration_data_t*)vecalloc(m * sizeof(iteration_data_t));
if (lm == NULL) { if (lm == NULL) {
@ -392,7 +398,7 @@ int lbfgs(
/* Initialize the limited memory. */ /* Initialize the limited memory. */
for (i = 0;i < m;++i) { for (i = 0;i < m;++i) {
it = &lm[i]; it = &lm[i]; // 取it的地址为lm数组中的一个
it->alpha = 0; it->alpha = 0;
it->ys = 0; it->ys = 0;
it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); it->s = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t));
@ -407,20 +413,22 @@ int lbfgs(
if (0 < param.past) { if (0 < param.past) {
pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t)); pf = (lbfgsfloatval_t*)vecalloc(param.past * sizeof(lbfgsfloatval_t));
} }
// 到此所有初始化工作完成 下面开始迭代前的初始计算
/* Evaluate the function value and its gradient. */ /* Evaluate the function value and its gradient. */
fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); // 步长为0现在用不了
// 若|x|参数不为0 则需要计算x的L1模与似梯度
if (0. != param.orthantwise_c) { if (0. != param.orthantwise_c) {
/* Compute the L1 norm of the variable and add it to the object value. */ /* Compute the L1 norm of the variable and add it to the object value. */
xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end); xnorm = owlqn_x1norm(x, param.orthantwise_start, param.orthantwise_end);
fx += xnorm * param.orthantwise_c; fx += xnorm * param.orthantwise_c; // 此时fx为这两部分的和
owlqn_pseudo_gradient( owlqn_pseudo_gradient(
pg, x, g, n, pg, x, g, n,
param.orthantwise_c, param.orthantwise_start, param.orthantwise_end param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
); ); // 计算似梯度
} }
/* Store the initial value of the objective function. */ /* Store the initial value of the objective function. */
// 如果param.past不为0则pf不为NULL
if (pf != NULL) { if (pf != NULL) {
pf[0] = fx; pf[0] = fx;
} }
@ -429,22 +437,26 @@ int lbfgs(
Compute the direction; Compute the direction;
we assume the initial hessian matrix H_0 as the identity matrix. we assume the initial hessian matrix H_0 as the identity matrix.
*/ */
// 初始下降方向为梯度的反方向
if (param.orthantwise_c == 0.) { if (param.orthantwise_c == 0.) {
vecncpy(d, g, n); vecncpy(d, g, n); //拷贝数组 并反号(乘-1
} else { } else {
vecncpy(d, pg, n); vecncpy(d, pg, n); //此时需拷贝似梯度 并反号(乘-1
} }
/* /*
Make sure that the initial variables are not a minimizer. Make sure that the initial variables are not a minimizer.
*/ */
vec2norm(&xnorm, x, n); vec2norm(&xnorm, x, n); // vec2norm计算数组的L2模长
// 此段又要区别对待是否含有L1模的部分
if (param.orthantwise_c == 0.) { if (param.orthantwise_c == 0.) {
vec2norm(&gnorm, g, n); vec2norm(&gnorm, g, n);
} else { } else {
vec2norm(&gnorm, pg, n); vec2norm(&gnorm, pg, n);
} }
// 为啥要保证xnorm大于等于1不明白
if (xnorm < 1.0) xnorm = 1.0; if (xnorm < 1.0) xnorm = 1.0;
// 如果输入x即为最优化的解 则退出
if (gnorm / xnorm <= param.epsilon) { if (gnorm / xnorm <= param.epsilon) {
ret = LBFGS_ALREADY_MINIMIZED; ret = LBFGS_ALREADY_MINIMIZED;
goto lbfgs_exit; goto lbfgs_exit;
@ -453,7 +465,8 @@ int lbfgs(
/* Compute the initial step: /* Compute the initial step:
step = 1.0 / sqrt(vecdot(d, d, n)) step = 1.0 / sqrt(vecdot(d, d, n))
*/ */
vec2norminv(&step, d, n); // 计算估算的初始步长
vec2norminv(&step, d, n); // 计算数组L2模的倒数与注释的内容等效
k = 1; k = 1;
end = 0; end = 0;
@ -472,6 +485,7 @@ int lbfgs(
param.orthantwise_c, param.orthantwise_start, param.orthantwise_end param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
); );
} }
// 线性搜索错误 此时则退回到上一次迭代的位置并退出
if (ls < 0) { if (ls < 0) {
/* Revert to the previous point. */ /* Revert to the previous point. */
veccpy(x, xp, n); veccpy(x, xp, n);
@ -490,6 +504,7 @@ int lbfgs(
/* Report the progress. */ /* Report the progress. */
if (cd.proc_progress) { if (cd.proc_progress) {
// 如果监控函数返回值不为0 则退出迭过程
if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
goto lbfgs_exit; goto lbfgs_exit;
} }
@ -535,13 +550,14 @@ int lbfgs(
break; break;
} }
// 以下是L-BFGS算法的核心部分
/* /*
Update vectors s and y: Update vectors s and y:
s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}. s_{k+1} = x_{k+1} - x_{k} = \step * d_{k}.
y_{k+1} = g_{k+1} - g_{k}. y_{k+1} = g_{k+1} - g_{k}.
*/ */
it = &lm[end]; it = &lm[end];
vecdiff(it->s, x, xp, n); vecdiff(it->s, x, xp, n); // 计算两个数组的差 it->s = x - xp
vecdiff(it->y, g, gp, n); vecdiff(it->y, g, gp, n);
/* /*
@ -550,7 +566,7 @@ int lbfgs(
yy = y^t \cdot y. yy = y^t \cdot y.
Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor). Notice that yy is used for scaling the hessian matrix H_0 (Cholesky factor).
*/ */
vecdot(&ys, it->y, it->s, n); vecdot(&ys, it->y, it->s, n); // 计算两个数组的点积
vecdot(&yy, it->y, it->y, n); vecdot(&yy, it->y, it->y, n);
it->ys = ys; it->ys = ys;
@ -569,11 +585,11 @@ int lbfgs(
/* Compute the steepest direction. */ /* Compute the steepest direction. */
if (param.orthantwise_c == 0.) { if (param.orthantwise_c == 0.) {
/* Compute the negative of gradients. */ /* Compute the negative of gradients. */
vecncpy(d, g, n); vecncpy(d, g, n); // 注意这里有符号的翻转
} else { } else {
vecncpy(d, pg, n); vecncpy(d, pg, n);
} }
// 此处开始迭代
j = end; j = end;
for (i = 0;i < bound;++i) { for (i = 0;i < bound;++i) {
j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */ j = (j + m - 1) % m; /* if (--j == -1) j = m-1; */
@ -585,7 +601,7 @@ int lbfgs(
vecadd(d, it->y, -it->alpha, n); vecadd(d, it->y, -it->alpha, n);
} }
vecscale(d, ys / yy, n); vecscale(d, ys / yy, n); // 适当缩放d的大小
for (i = 0;i < bound;++i) { for (i = 0;i < bound;++i) {
it = &lm[j]; it = &lm[j];
@ -808,7 +824,7 @@ static int line_search_backtracking(
++count; ++count;
if (*f > finit + *stp * dgtest) { if (*f > finit + *stp * dgtest) {
width = dec; width = dec; //减小步长
} else { } else {
/* The sufficient decrease condition (Armijo condition). */ /* The sufficient decrease condition (Armijo condition). */
if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) { if (param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_ARMIJO) {
@ -819,7 +835,7 @@ static int line_search_backtracking(
/* Check the Wolfe condition. */ /* Check the Wolfe condition. */
vecdot(&dg, g, s, n); vecdot(&dg, g, s, n);
if (dg < param->wolfe * dginit) { if (dg < param->wolfe * dginit) {
width = inc; width = inc; //增大步长
} else { } else {
if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) { if(param->linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE) {
/* Exit with the regular Wolfe condition. */ /* Exit with the regular Wolfe condition. */
@ -828,7 +844,7 @@ static int line_search_backtracking(
/* Check the strong Wolfe condition. */ /* Check the strong Wolfe condition. */
if(dg > -param->wolfe * dginit) { if(dg > -param->wolfe * dginit) {
width = dec; width = dec; //减小步长
} else { } else {
/* Exit with the strong Wolfe condition. */ /* Exit with the strong Wolfe condition. */
return count; return count;
@ -1414,7 +1430,7 @@ static int update_trial_interval(
// 计算x的L1模 计算从start到n的绝对值的和
static lbfgsfloatval_t owlqn_x1norm( static lbfgsfloatval_t owlqn_x1norm(
const lbfgsfloatval_t* x, const lbfgsfloatval_t* x,
const int start, const int start,