- Added a function owlqn_direction_line.

- Source code simplifications and clean-ups.

git-svn-id: file:///home/svnrepos/software/liblbfgs/trunk@20 ecf4c44f-38d1-4fa4-9757-a0b4dd0349fc
This commit is contained in:
naoaki 2008-07-10 07:01:53 +00:00
parent 36b9ae4a62
commit a8edbfe8b9
4 changed files with 81 additions and 107 deletions

View File

@ -120,14 +120,14 @@ inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lb
} }
} }
inline static void vecnorm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
{ {
vecdot(s, x, x, n); vecdot(s, x, x, n);
*s = (lbfgsfloatval_t)sqrt(*s); *s = (lbfgsfloatval_t)sqrt(*s);
} }
inline static void vecrnorm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n) inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
{ {
vecnorm(s, x, n); vec2norm(s, x, n);
*s = (lbfgsfloatval_t)(1.0 / *s); *s = (lbfgsfloatval_t)(1.0 / *s);
} }

View File

@ -237,7 +237,7 @@ inline static void vecfree(void *memblock)
_mm_store_sd((s), XMM0); \ _mm_store_sd((s), XMM0); \
} }
#define vecnorm(s, x, n) \ #define vec2norm(s, x, n) \
{ \ { \
int i; \ int i; \
__m128d XMM0 = _mm_setzero_pd(); \ __m128d XMM0 = _mm_setzero_pd(); \
@ -261,7 +261,7 @@ inline static void vecfree(void *memblock)
} }
#define vecrnorm(s, x, n) \ #define vec2norminv(s, x, n) \
{ \ { \
int i; \ int i; \
__m128d XMM0 = _mm_setzero_pd(); \ __m128d XMM0 = _mm_setzero_pd(); \

View File

@ -229,7 +229,7 @@ inline static void vecfree(void *memblock)
_mm_store_ss((s), XMM0); \ _mm_store_ss((s), XMM0); \
} }
#define vecnorm(s, x, n) \ #define vec2norm(s, x, n) \
{ \ { \
int i; \ int i; \
__m128 XMM0 = _mm_setzero_ps(); \ __m128 XMM0 = _mm_setzero_ps(); \
@ -258,7 +258,7 @@ inline static void vecfree(void *memblock)
_mm_store_ss((s), XMM3); \ _mm_store_ss((s), XMM3); \
} }
#define vecrnorm(s, x, n) \ #define vec2norminv(s, x, n) \
{ \ { \
int i; \ int i; \
__m128 XMM0 = _mm_setzero_ps(); \ __m128 XMM0 = _mm_setzero_ps(); \

View File

@ -169,13 +169,13 @@ static int update_trial_interval(
int *brackt int *brackt
); );
static lbfgsfloatval_t owlqn_xnorm1( static lbfgsfloatval_t owlqn_x1norm(
const lbfgsfloatval_t* x, const lbfgsfloatval_t* x,
const int start, const int start,
const int n const int n
); );
static lbfgsfloatval_t owlqn_gnorm( static lbfgsfloatval_t owlqn_g2norm(
const lbfgsfloatval_t* x, const lbfgsfloatval_t* x,
const lbfgsfloatval_t* g, const lbfgsfloatval_t* g,
const lbfgsfloatval_t c, const lbfgsfloatval_t c,
@ -192,6 +192,15 @@ static void owlqn_direction(
const int n const int n
); );
static lbfgsfloatval_t owlqn_direction_line(
const lbfgsfloatval_t* x,
const lbfgsfloatval_t* g,
const lbfgsfloatval_t* s,
const lbfgsfloatval_t c,
const int start,
const int n
);
static void owlqn_project( static void owlqn_project(
lbfgsfloatval_t* d, lbfgsfloatval_t* d,
const lbfgsfloatval_t* sign, const lbfgsfloatval_t* sign,
@ -249,7 +258,7 @@ int lbfgs(
lbfgsfloatval_t *xp = NULL, *g = NULL, *gp = NULL, *d = NULL, *w = NULL; lbfgsfloatval_t *xp = NULL, *g = NULL, *gp = NULL, *d = NULL, *w = NULL;
iteration_data_t *lm = NULL, *it = NULL; iteration_data_t *lm = NULL, *it = NULL;
lbfgsfloatval_t ys, yy; lbfgsfloatval_t ys, yy;
lbfgsfloatval_t norm, xnorm, gnorm, beta; lbfgsfloatval_t xnorm, gnorm, beta;
lbfgsfloatval_t fx = 0.; lbfgsfloatval_t fx = 0.;
line_search_proc linesearch = line_search_morethuente; line_search_proc linesearch = line_search_morethuente;
@ -345,10 +354,10 @@ int lbfgs(
/* Evaluate the function value and its gradient. */ /* Evaluate the function value and its gradient. */
fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
if (0. < param->orthantwise_c) { if (0. != param->orthantwise_c) {
/* Compute the L1 norm of the variable and add it to the object value. */ /* Compute the L1 norm of the variable and add it to the object value. */
norm = owlqn_xnorm1(x, param->orthantwise_start, n); xnorm = owlqn_x1norm(x, param->orthantwise_start, n);
fx += norm * param->orthantwise_c; fx += xnorm * param->orthantwise_c;
} }
/* /*
@ -364,11 +373,11 @@ int lbfgs(
/* /*
Make sure that the initial variables are not a minimizer. Make sure that the initial variables are not a minimizer.
*/ */
vecnorm(&xnorm, x, n); vec2norm(&xnorm, x, n);
if (param->orthantwise_c == 0.) { if (param->orthantwise_c == 0.) {
vecnorm(&gnorm, g, n); vec2norm(&gnorm, g, n);
} else { } else {
gnorm = owlqn_gnorm(x, g, param->orthantwise_c, param->orthantwise_start, n); gnorm = owlqn_g2norm(x, g, param->orthantwise_c, param->orthantwise_start, n);
} }
if (xnorm < 1.0) xnorm = 1.0; if (xnorm < 1.0) xnorm = 1.0;
if (gnorm / xnorm <= param->epsilon) { if (gnorm / xnorm <= param->epsilon) {
@ -379,7 +388,7 @@ int lbfgs(
/* Compute the initial step: /* Compute the initial step:
step = 1.0 / sqrt(vecdot(d, d, n)) step = 1.0 / sqrt(vecdot(d, d, n))
*/ */
vecrnorm(&step, d, n); vec2norminv(&step, d, n);
k = 1; k = 1;
end = 0; end = 0;
@ -396,11 +405,11 @@ int lbfgs(
} }
/* Compute x and g norms. */ /* Compute x and g norms. */
vecnorm(&xnorm, x, n); vec2norm(&xnorm, x, n);
if (param->orthantwise_c != 0.) { if (param->orthantwise_c == 0.) {
gnorm = owlqn_gnorm(x, g, param->orthantwise_c, param->orthantwise_start, n); vec2norm(&gnorm, g, n);
} else { } else {
vecnorm(&gnorm, g, n); gnorm = owlqn_g2norm(x, g, param->orthantwise_c, param->orthantwise_start, n);
} }
/* Report the progress. */ /* Report the progress. */
@ -557,34 +566,7 @@ static int line_search_backtracking(
/* Compute the initial gradient in the search direction. */ /* Compute the initial gradient in the search direction. */
if (param->orthantwise_c != 0.) { if (param->orthantwise_c != 0.) {
/* Compute the negative of gradients. */ dginit = owlqn_direction_line(x, g, s, param->orthantwise_c, param->orthantwise_start, n);
for (i = 0;i < param->orthantwise_start;++i) {
dginit += s[i] * g[i];
}
/* Use psuedo-gradients for orthant-wise updates. */
for (i = param->orthantwise_start;i < n;++i) {
/* Notice that:
(-s[i] < 0) <==> (g[i] < -param->orthantwise_c)
(-s[i] > 0) <==> (param->orthantwise_c < g[i])
as the result of the lbfgs() function for orthant-wise updates.
*/
if (s[i] != 0.) {
if (x[i] < 0.) {
/* Differentiable. */
dginit += s[i] * (g[i] - param->orthantwise_c);
} else if (0. < x[i]) {
/* Differentiable. */
dginit += s[i] * (g[i] + param->orthantwise_c);
} else if (s[i] < 0.) {
/* Take the left partial derivative. */
dginit += s[i] * (g[i] - param->orthantwise_c);
} else if (0. < s[i]) {
/* Take the right partial derivative. */
dginit += s[i] * (g[i] + param->orthantwise_c);
}
}
}
} else { } else {
vecdot(&dginit, g, s, n); vecdot(&dginit, g, s, n);
} }
@ -614,7 +596,7 @@ static int line_search_backtracking(
*f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
if (0. < param->orthantwise_c) { if (0. < param->orthantwise_c) {
/* Compute the L1 norm of the variables and add it to the object value. */ /* Compute the L1 norm of the variables and add it to the object value. */
norm = owlqn_xnorm1(x, param->orthantwise_start, n); norm = owlqn_x1norm(x, param->orthantwise_start, n);
*f += norm * param->orthantwise_c; *f += norm * param->orthantwise_c;
} }
@ -674,35 +656,7 @@ static int line_search_morethuente(
/* Compute the initial gradient in the search direction. */ /* Compute the initial gradient in the search direction. */
if (param->orthantwise_c != 0.) { if (param->orthantwise_c != 0.) {
dginit = 0.; dginit = owlqn_direction_line(x, g, s, param->orthantwise_c, param->orthantwise_start, n);
for (i = 0;i < param->orthantwise_start;++i) {
dginit += s[i] * g[i];
}
/* Use psuedo-gradients for orthant-wise updates. */
for (i = param->orthantwise_start;i < n;++i) {
/* Notice that:
(-s[i] < 0) <==> (g[i] < -param->orthantwise_c)
(-s[i] > 0) <==> (param->orthantwise_c < g[i])
as the result of the lbfgs() function for orthant-wise updates.
*/
if (s[i] != 0.) {
if (x[i] < 0.) {
/* Differentiable. */
dginit += s[i] * (g[i] - param->orthantwise_c);
} else if (0. < x[i]) {
/* Differentiable. */
dginit += s[i] * (g[i] + param->orthantwise_c);
} else if (s[i] < 0.) {
/* Take the left partial derivative. */
dginit += s[i] * (g[i] - param->orthantwise_c);
} else if (0. < s[i]) {
/* Take the right partial derivative. */
dginit += s[i] * (g[i] + param->orthantwise_c);
}
}
}
} else { } else {
vecdot(&dginit, g, s, n); vecdot(&dginit, g, s, n);
} }
@ -777,34 +731,10 @@ static int line_search_morethuente(
*f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
if (0. < param->orthantwise_c) { if (0. < param->orthantwise_c) {
/* Compute the L1 norm of the variables and add it to the object value. */ /* Compute the L1 norm of the variables and add it to the object value. */
norm = owlqn_xnorm1(x, param->orthantwise_start, n); norm = owlqn_x1norm(x, param->orthantwise_start, n);
*f += norm * param->orthantwise_c; *f += norm * param->orthantwise_c;
dg = 0.; dg = owlqn_direction_line(x, g, s, param->orthantwise_c, param->orthantwise_start, n);
for (i = 0;i < param->orthantwise_start;++i) {
dg += s[i] * g[i];
}
/* Use psuedo-gradients for orthant-wise updates. */
for (i = param->orthantwise_start;i < n;++i) {
if (x[i] < 0.) {
/* Differentiable. */
dg += s[i] * (g[i] - param->orthantwise_c);
} else if (0. < x[i]) {
/* Differentiable. */
dg += s[i] * (g[i] + param->orthantwise_c);
} else {
if (g[i] < -param->orthantwise_c) {
/* Take the right partial derivative. */
dg += s[i] * (g[i] + param->orthantwise_c);
} else if (param->orthantwise_c < g[i]) {
/* Take the left partial derivative. */
dg += s[i] * (g[i] - param->orthantwise_c);
} else {
/* dg += 0.; */
}
}
}
} else { } else {
vecdot(&dg, g, s, n); vecdot(&dg, g, s, n);
} }
@ -1197,7 +1127,7 @@ static int update_trial_interval(
} }
static lbfgsfloatval_t owlqn_xnorm1( static lbfgsfloatval_t owlqn_x1norm(
const lbfgsfloatval_t* x, const lbfgsfloatval_t* x,
const int start, const int start,
const int n const int n
@ -1213,7 +1143,7 @@ static lbfgsfloatval_t owlqn_xnorm1(
return norm; return norm;
} }
static lbfgsfloatval_t owlqn_gnorm( static lbfgsfloatval_t owlqn_g2norm(
const lbfgsfloatval_t* x, const lbfgsfloatval_t* x,
const lbfgsfloatval_t* g, const lbfgsfloatval_t* g,
const lbfgsfloatval_t c, const lbfgsfloatval_t c,
@ -1280,6 +1210,50 @@ static void owlqn_direction(
} }
} }
static lbfgsfloatval_t owlqn_direction_line(
const lbfgsfloatval_t* x,
const lbfgsfloatval_t* g,
const lbfgsfloatval_t* s,
const lbfgsfloatval_t c,
const int start,
const int n
)
{
int i;
lbfgsfloatval_t d = 0.;
/* Compute the negative of gradients. */
for (i = 0;i < start;++i) {
d += s[i] * g[i];
}
/* Use psuedo-gradients for orthant-wise updates. */
for (i = start;i < n;++i) {
/* Notice that:
(-s[i] < 0) <==> (g[i] < -param->orthantwise_c)
(-s[i] > 0) <==> (param->orthantwise_c < g[i])
as the result of the lbfgs() function for orthant-wise updates.
*/
if (s[i] != 0.) {
if (x[i] < 0.) {
/* Differentiable. */
d += s[i] * (g[i] - c);
} else if (0. < x[i]) {
/* Differentiable. */
d += s[i] * (g[i] + c);
} else if (s[i] < 0.) {
/* Take the left partial derivative. */
d += s[i] * (g[i] - c);
} else if (0. < s[i]) {
/* Take the right partial derivative. */
d += s[i] * (g[i] + c);
}
}
}
return d;
}
static void owlqn_project( static void owlqn_project(
lbfgsfloatval_t* d, lbfgsfloatval_t* d,
const lbfgsfloatval_t* sign, const lbfgsfloatval_t* sign,