diff --git a/src/lib/lbfgs.c b/src/lib/lbfgs.c index e037e8b..93cc64e 100644 --- a/src/lib/lbfgs.c +++ b/src/lib/lbfgs.c @@ -268,9 +268,11 @@ int lbfgs( lbfgsfloatval_t *xp = NULL; lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; // gp (p for previous) pg (p for pesudo) lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL; - lbfgsfloatval_t *dp = NULL; // p for preconditioned + lbfgsfloatval_t *dp = NULL; // p for preconditioned (Add by Yi Zhang on 02-16-2022) + lbfgsfloatval_t *y2 = NULL; // Add by Yi Zhang on 02-16-2022 iteration_data_t *lm = NULL, *it = NULL; lbfgsfloatval_t ys, yy; + lbfgsfloatval_t Yk; // Add by Yi Zhang on 02-16-2022 lbfgsfloatval_t xnorm, gnorm, beta; lbfgsfloatval_t fx = 0.; lbfgsfloatval_t rate = 0.; @@ -384,6 +386,13 @@ int lbfgs( goto lbfgs_exit; } + // Add by Yi Zhang on 02-16-2022 + y2 = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + if (y2 == NULL) { + ret = LBFGSERR_OUTOFMEMORY; + goto lbfgs_exit; + } + if (cd.proc_precondition) { dp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); if (dp == NULL) { @@ -391,6 +400,7 @@ int lbfgs( goto lbfgs_exit; } } + // End // 初始化计算L1模的数组 if (param.orthantwise_c != 0.) { @@ -625,9 +635,13 @@ int lbfgs( vecadd(d, it->y, -it->alpha, n); } - vecscale(d, ys / yy, n); // 适当缩放d的大小 + // Annotated by Yi Zhang on 02-16-2022 + //vecscale(d, ys / yy, n); // 适当缩放d的大小 - // 我们在这里提供一个预优函数的接口 返回 d = H0^-1 * d + // Add by Yi Zhang on 02-16-2022 + // Wah June Leong & Chuei Yee Chen (2013) A class of diagonal preconditioners for limited memory BFGS method, + // Optimization Methods and Software, 28:2, 379-392, DOI: 10.1080/10556788.2011.653356 + // 我们在这里提供一个预优函数的接口 需还原则删除下面的代码段 同时取消上面一行注释 if (cd.proc_precondition) { if (param.orthantwise_c == 0.) { cd.proc_precondition(cd.instance, x, g, d, dp, n); @@ -636,7 +650,15 @@ int lbfgs( } veccpy(d, dp, n); + } else if (ys < yy) { + vecscale(d, ys / yy, n); // 适当缩放d的大小 + } else { + veccpy(y2, lm[end].y, n); + vecmul(y2, lm[end].y, n); + vecdot(&Yk, y2, y2, n); + vecadd(d, y2, (ys-yy)/Yk, n); } + // End for (i = 0;i < bound;++i) { it = &lm[j]; @@ -688,6 +710,14 @@ lbfgs_exit: vecfree(g); vecfree(xp); + // Add by Yi Zhang on 02-16-2022 + vecfree(y2); + + if (cd.proc_precondition) { + vecfree(dp); + } + // End + return ret; } diff --git a/src/sample/sample4.cpp b/src/sample/sample4.cpp index 32dcb6b..ad31406 100644 --- a/src/sample/sample4.cpp +++ b/src/sample/sample4.cpp @@ -8,6 +8,7 @@ using std::clog; using std::endl; +#define M 120 #define N 100 inline lbfgsfloatval_t scalernd(lbfgsfloatval_t l, lbfgsfloatval_t h) @@ -79,32 +80,29 @@ private: TEST_FUNC::TEST_FUNC() { - kernel = new lbfgsfloatval_t* [N]; - for (size_t i = 0; i < N; i++) + kernel = new lbfgsfloatval_t* [M]; + for (size_t i = 0; i < M; i++) { kernel[i] = new lbfgsfloatval_t [N]; } x = new lbfgsfloatval_t [N]; m = new lbfgsfloatval_t [N]; - mid = new lbfgsfloatval_t [N]; - d = new lbfgsfloatval_t [N]; + mid = new lbfgsfloatval_t [M]; + d = new lbfgsfloatval_t [M]; srand(time(nullptr)); - for (size_t i = 0; i < N-1; i++) + for (size_t i = 0; i < M; i++) { - for (size_t j = i+1; j < N; j++) + for (size_t j = 0; j < N; j++) { - kernel[i][j] = scalernd(0.0, 2.0); - kernel[j][i] = kernel[i][j]; + kernel[i][j] = scalernd(-2.0, 2.0); } - - kernel[i][i] = scalernd(10.0, 20.0); } vecrnd(m, 1.0, 2.0, N); - for (size_t i = 0; i < N; i++) + for (size_t i = 0; i < M; i++) { d[i] = 0.0; for (size_t j = 0; j < N; j++) @@ -131,7 +129,7 @@ TEST_FUNC::~TEST_FUNC() lbfgsfloatval_t TEST_FUNC::Func(const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step) { - for (size_t i = 0; i < N; i++) + for (size_t i = 0; i < M; i++) { mid[i] = 0.0; for (size_t j = 0; j < N; j++) @@ -150,7 +148,7 @@ lbfgsfloatval_t TEST_FUNC::Func(const lbfgsfloatval_t *x, lbfgsfloatval_t *g, for (size_t j = 0; j < N; j++) { g[j] = 0.0; - for (size_t i = 0; i < N; i++) + for (size_t i = 0; i < M; i++) { g[j] += kernel[i][j]*mid[i]; } @@ -171,7 +169,12 @@ void TEST_FUNC::Precondition(const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, { for (size_t j = 0; j < N; j++) { - d_pre[j] = d[j]/kernel[j][j]; + d_pre[j] = 0.0; + for (size_t i = 0; i < M; i++) + { + d_pre[j] += kernel[i][j]*kernel[i][j]; + } + d_pre[j] = d[j]/d_pre[j]; } return; } @@ -183,9 +186,8 @@ int TEST_FUNC::Routine() lbfgs_parameter_t self_para; lbfgs_parameter_init(&self_para); self_para.epsilon = 1e-7; - self_para.max_linesearch = 1000; - int ret = lbfgs(N, x, &fx, _Func, _Progress, this, &self_para, _Precondition); + int ret = lbfgs(N, x, &fx, _Func, _Progress, this, &self_para, NULL); clog << endl << "L-BFGS optimization terminated with status: " << lbfgs_strerror(ret) << endl; clog << "Maximal difference = " << max_diff(x, m, N) << endl;