diff --git a/README b/README index 01f9afa..be602b0 100644 --- a/README +++ b/README @@ -2,7 +2,7 @@ libLBFGS: C library of limited-memory BFGS (L-BFGS) Copyright (c) 1990, Jorge Nocedal - Copyright (c) 2007, Naoaki Okazaki + Copyright (c) 2007,2008, Naoaki Okazaki ========================================================================= 1. Introduction @@ -22,19 +22,48 @@ http://www.chokkan.org/software/liblbfgs/ ========================================================================= -2. How to build the sample program +2. How to build ========================================================================= [Microsoft Visual Studio 2005] -Open the solution file "test/lbfgs.sln" and build it. +Open the solution file "lbfgs.sln" and build it. [GCC] -$ cd test -$ ./build.sh +$ ./configure +$ make +$ make install # To install libLBFGS library and header. ========================================================================= -3. License +3. Note on SSE/SSE2 optimization +========================================================================= +This library has SSE/SSE2 optimization routines for vector arithmetic +operations on Intel/AMD processors. The SSE2 routine is for 64 bit double +values whereas the SSE routine is for 32 bit float values. Since the +default parameters for L-BFGS are tuned for double precision values, it +may be more straightforward to use SSE2 optimization than to use SSE. + +To use the SSE2 optimization routine, specify --enable-sse2 option to the +configure script. + +$ ./configure --enable-sse2 + +To build libLBFGS with SSE optimization on Microsoft Visual Studio 2005, +define USE_SSE and __SSE2__ symbols. + +Make sure to run libLBFGS on processors where SSE2 instrunctions are +available. + +To package maintainers, + +Please do not enable SSE/SSE2 optimization routine. The library built +with SSE/SSE2 optimization will crash without any notice when necessary +SSE/SSE2 instructions are unavailable. + + + +========================================================================= +4. License ========================================================================= libLBFGS is distributed under the term of the MIT license. Please refer to COPYING file in the distribution. diff --git a/configure.in b/configure.in index 5ce4a10..0608a41 100644 --- a/configure.in +++ b/configure.in @@ -1,4 +1,4 @@ -dnl $Id:$ +dnl $Id$ dnl dnl dnl Exported and configured variables: @@ -44,7 +44,7 @@ dnl ------------------------------------------------------------------ dnl Checks for header files. dnl ------------------------------------------------------------------ AC_HEADER_STDC -AC_CHECK_HEADERS(fcntl.h limits.h malloc.h strings.h unistd.h stdint.h) +AC_CHECK_HEADERS(xmmintrin.h emmintrin.h) dnl ------------------------------------------------------------------ @@ -74,6 +74,19 @@ if test "x$enable_profile" = "xyes"; then fi +dnl ------------------------------------------------------------------ +dnl Checks for SSE build +dnl ------------------------------------------------------------------ +AC_ARG_ENABLE( + sse, + [AS_HELP_STRING([--enable-sse],[Enable SSE optimized routines])] +) + +if test "x$enable_sse" = "xyes"; then + CFLAGS="-msse2 -DUSE_SSE ${CFLAGS}" +fi + + dnl ------------------------------------------------------------------ dnl Checks for library functions. dnl ------------------------------------------------------------------ diff --git a/doc/doxyfile b/doc/doxyfile index 348cac6..bcd11f1 100644 --- a/doc/doxyfile +++ b/doc/doxyfile @@ -500,7 +500,7 @@ EXCLUDE_PATTERNS = # directories that contain example code fragments that are included (see # the \include command). -EXAMPLE_PATH = ../test +EXAMPLE_PATH = ../sample # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp diff --git a/include/lbfgs.h b/include/lbfgs.h index d686700..cddcb7c 100644 --- a/include/lbfgs.h +++ b/include/lbfgs.h @@ -33,6 +33,9 @@ extern "C" { #endif/*__cplusplus*/ +#define USE_SSE 1 +#define __SSE2__ 1 + /* * The default precision of floating point values is 64bit (double). */ @@ -87,6 +90,8 @@ enum { LBFGSERR_INVALID_N, /** Invalid number of variables (for SSE) specified. */ LBFGSERR_INVALID_N_SSE, + /** The array x must be aligned to 16 (for SSE). */ + LBFGSERR_INVALID_X_SSE, /** Invalid parameter lbfgs_parameter_t::linesearch specified. */ LBFGSERR_INVALID_LINESEARCH, /** Invalid parameter lbfgs_parameter_t::max_step specified. */ @@ -336,7 +341,9 @@ In this formula, ||.|| denotes the Euclidean norm. * @param n The number of variables. * @param x The array of variables. A client program can set * default values for the optimization and receive the - * optimization result through this array. + * optimization result through this array. The memory + * block of this array must be aligned to 16 for liblbfgs + * built with SSE/SSE2 optimization routine enabled. * @param ptr_fx The pointer to the variable that receives the final * value of the objective function for the variables. * This argument can be set to \c NULL if the final @@ -364,7 +371,7 @@ In this formula, ||.|| denotes the Euclidean norm. * non-zero value indicates an error. */ int lbfgs( - const int n, + int n, lbfgsfloatval_t *x, lbfgsfloatval_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, @@ -383,6 +390,26 @@ int lbfgs( */ void lbfgs_parameter_init(lbfgs_parameter_t *param); +/** + * Allocate an array for variables. + * + * Use this function to allocate a variable array for liblbfgs built with + * or without SSE/SSE2 optimization routine enabled. When SSE/SSE2 routine in + * liblbfgs is disabled, it is unnecessary to use this function; liblbfgs + * accepts a variable array allocated by any + * + * @param n The number of variables. + */ +lbfgsfloatval_t* lbfgs_malloc(int n); + +/** + * Free an array of variables. + * + * @param x The array of variables allocated by ::lbfgs_malloc + * function. + */ +void lbfgs_free(lbfgsfloatval_t *x); + /** @} */ #ifdef __cplusplus @@ -411,18 +438,20 @@ only if the objective function F(x) and its gradient G(x) are computable. The well-known Newton's method requires computation of the inverse of the hessian matrix of the objective function. However, the computational cost for the inverse hessian matrix is expensive especially when the objective function -takes a large number of variables. The L-BFGS method iteratively find a +takes a large number of variables. The L-BFGS method iteratively finds a minimizer by approximating the inverse hessian matrix by information from last m iterations. This innovation saves the memory storage and computational time drastically for large-scaled problems. Among the various ports of L-BFGS, this library provides several features: -- Optimization with L1-norm (orthant-wise L-BFGS): +- Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton + (OW-LQN) method): In addition to standard minimization problems, the library can minimize a function F(x) combined with L1-norm |x| of the variables, {F(x) + C |x|}, where C is a constant scalar parameter. This feature is - useful for estimating parameters of log-linear models (e.g., logistic - regression and maximum entropy) with L1-regularization. + useful for estimating parameters of sparse log-linear models (e.g., + logistic regression and maximum entropy) with L1-regularization (or + Laplacian prior). - Clean C code: Unlike C codes generated automatically by f2c (Fortran 77 into C converter), this port includes changes based on my interpretations, improvements, @@ -446,8 +475,7 @@ Among the various ports of L-BFGS, this library provides several features: This library includes SSE/SSE2 optimization (written in compiler intrinsics) for vector arithmetic operations on Intel/AMD processors. The library uses SSE for float values and SSE2 for double values. The SSE/SSE2 optimization - routine is disabled by default; compile the library with __SSE__ symbol - defined to activate the optimization routine. + routine is disabled by default. This library is used by: - CRFsuite: A fast implementation of Conditional Random Fields (CRFs) @@ -470,6 +498,7 @@ libLBFGS is distributed under the term of the psuedo-gradients properly in the line search routine. This bug might quit an iteration process too early when the orthant-wise L-BFGS routine was activated (0 < ::lbfgs_parameter_t::orthantwise_c). + - Added configure script. - Version 1.3 (2007-12-16): - An API change. An argument was added to lbfgs() function to receive the final value of the objective function. This argument can be set to diff --git a/lbfgs.sln b/lbfgs.sln new file mode 100644 index 0000000..5868e5c --- /dev/null +++ b/lbfgs.sln @@ -0,0 +1,29 @@ + +Microsoft Visual Studio Solution File, Format Version 9.00 +# Visual Studio 2005 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lib", "lib\lib.vcproj", "{85EA93B7-A41A-45F2-9159-55BBEE530704}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sample", "sample\sample.vcproj", "{230FE517-BDAB-4CDD-A1C4-EF41974F82CE}" + ProjectSection(ProjectDependencies) = postProject + {85EA93B7-A41A-45F2-9159-55BBEE530704} = {85EA93B7-A41A-45F2-9159-55BBEE530704} + EndProjectSection +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {85EA93B7-A41A-45F2-9159-55BBEE530704}.Debug|Win32.ActiveCfg = Debug|Win32 + {85EA93B7-A41A-45F2-9159-55BBEE530704}.Debug|Win32.Build.0 = Debug|Win32 + {85EA93B7-A41A-45F2-9159-55BBEE530704}.Release|Win32.ActiveCfg = Release|Win32 + {85EA93B7-A41A-45F2-9159-55BBEE530704}.Release|Win32.Build.0 = Release|Win32 + {230FE517-BDAB-4CDD-A1C4-EF41974F82CE}.Debug|Win32.ActiveCfg = Debug|Win32 + {230FE517-BDAB-4CDD-A1C4-EF41974F82CE}.Debug|Win32.Build.0 = Debug|Win32 + {230FE517-BDAB-4CDD-A1C4-EF41974F82CE}.Release|Win32.ActiveCfg = Release|Win32 + {230FE517-BDAB-4CDD-A1C4-EF41974F82CE}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/lib/arithmetic_sse_double.h b/lib/arithmetic_sse_double.h index d243ae8..92b3958 100644 --- a/lib/arithmetic_sse_double.h +++ b/lib/arithmetic_sse_double.h @@ -31,11 +31,19 @@ #if 1400 <= _MSC_VER #include -#endif +#endif/*1400 <= _MSC_VER*/ + +#if HAVE_EMMINTRIN_H +#include +#endif/*HAVE_EMMINTRIN_H*/ inline static void* vecalloc(size_t size) { +#ifdef _MSC_VER void *memblock = _aligned_malloc(size, 16); +#else + void *memblock = memalign(16, size); +#endif if (memblock != NULL) { memset(memblock, 0, size); } @@ -44,7 +52,11 @@ inline static void* vecalloc(size_t size) inline static void vecfree(void *memblock) { +#ifdef _MSC_VER _aligned_free(memblock); +#else + free(memblock); +#endif } #define fsigndiff(x, y) \ diff --git a/lib/arithmetic_sse_float.h b/lib/arithmetic_sse_float.h index 4a356bb..5b22e16 100644 --- a/lib/arithmetic_sse_float.h +++ b/lib/arithmetic_sse_float.h @@ -31,7 +31,11 @@ #if 1400 <= _MSC_VER #include -#endif +#endif/*_MSC_VER*/ + +#if HAVE_XMMINTRIN_H +#include +#endif/*HAVE_XMMINTRIN_H*/ #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U) diff --git a/lib/lbfgs.c b/lib/lbfgs.c index ca95380..aa13a03 100644 --- a/lib/lbfgs.c +++ b/lib/lbfgs.c @@ -76,14 +76,14 @@ licence. typedef unsigned int uint32_t; #endif/*_MSC_VER*/ -#if defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32 -/* Use SSE optimization for 32bit float precision. */ -#include "arithmetic_sse_float.h" - -#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 64 +#if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64 /* Use SSE2 optimization for 64bit double precision. */ #include "arithmetic_sse_double.h" +#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32 +/* Use SSE optimization for 32bit float precision. */ +#include "arithmetic_sse_float.h" + #else /* No CPU specific optimization. */ #include "arithmetic_ansi.h" @@ -94,6 +94,14 @@ typedef unsigned int uint32_t; #define max2(a, b) ((a) >= (b) ? (a) : (b)) #define max3(a, b, c) max2(max2((a), (b)), (c)); +struct tag_callback_data { + int n; + void *instance; + lbfgs_evaluate_t proc_evaluate; + lbfgs_progress_t proc_progress; +}; +typedef struct tag_callback_data callback_data_t; + struct tag_iteration_data { lbfgsfloatval_t alpha; lbfgsfloatval_t *s; /* [n] */ @@ -118,8 +126,7 @@ typedef int (*line_search_proc)( lbfgsfloatval_t *s, lbfgsfloatval_t *stp, lbfgsfloatval_t *wa, - lbfgs_evaluate_t proc_evaluate, - void *instance, + callback_data_t *cd, const lbfgs_parameter_t *param ); @@ -131,8 +138,7 @@ static int line_search_backtracking( lbfgsfloatval_t *s, lbfgsfloatval_t *stp, lbfgsfloatval_t *wa, - lbfgs_evaluate_t proc_evaluate, - void *instance, + callback_data_t *cd, const lbfgs_parameter_t *param ); @@ -144,8 +150,7 @@ static int line_search_morethuente( lbfgsfloatval_t *s, lbfgsfloatval_t *stp, lbfgsfloatval_t *wa, - lbfgs_evaluate_t proc_evaluate, - void *instance, + callback_data_t *cd, const lbfgs_parameter_t *param ); @@ -164,8 +169,26 @@ static int update_trial_interval( int *brackt ); +static int round_out_variables(int n) +{ + n += 7; + n /= 8; + n *= 8; + return n; +} +lbfgsfloatval_t* lbfgs_malloc(int n) +{ +#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) + n = round_out_variables(n); +#endif/*defined(USE_SSE)*/ + return vecalloc(sizeof(lbfgsfloatval_t) * n); +} +void lbfgs_free(lbfgsfloatval_t *x) +{ + vecfree(x); +} void lbfgs_parameter_init(lbfgs_parameter_t *param) { @@ -173,7 +196,7 @@ void lbfgs_parameter_init(lbfgs_parameter_t *param) } int lbfgs( - const int n, + int n, lbfgsfloatval_t *x, lbfgsfloatval_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, @@ -197,15 +220,30 @@ int lbfgs( lbfgsfloatval_t fx = 0.; line_search_proc linesearch = line_search_morethuente; + /* Construct a callback data. */ + callback_data_t cd; + cd.n = n; + cd.instance = instance; + cd.proc_evaluate = proc_evaluate; + cd.proc_progress = proc_progress; + +#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) + /* Round out the number of variables. */ + n = round_out_variables(n); +#endif/*defined(USE_SSE)*/ + /* Check the input parameters for errors. */ if (n <= 0) { return LBFGSERR_INVALID_N; } -#if defined(USE_SSE) && defined(__SSE__) +#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) if (n % 8 != 0) { return LBFGSERR_INVALID_N_SSE; } -#endif/*defined(__SSE__)*/ + if (((unsigned short)x & 0x000F) != 0) { + return LBFGSERR_INVALID_X_SSE; + } +#endif/*defined(USE_SSE)*/ if (param->min_step < 0.) { return LBFGSERR_INVALID_MINSTEP; } @@ -270,7 +308,7 @@ int lbfgs( } /* Evaluate the function value and its gradient. */ - fx = proc_evaluate(instance, x, g, n, 0); + fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); if (0. < param->orthantwise_c) { /* Compute L1-regularization factor and add it to the object value. */ norm = 0.; @@ -319,8 +357,7 @@ int lbfgs( veccpy(gp, g, n); /* Search for an optimal step. */ - ls = linesearch( - n, x, &fx, g, d, &step, w, proc_evaluate, instance, param); + ls = linesearch(n, x, &fx, g, d, &step, w, &cd, param); if (ls < 0) { ret = ls; goto lbfgs_exit; @@ -331,8 +368,8 @@ int lbfgs( vecnorm(&xnorm, x, n); /* Report the progress. */ - if (proc_progress) { - if (ret = proc_progress(instance, x, g, fx, xnorm, gnorm, step, n, k, ls)) { + if (cd.proc_progress) { + if (ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls)) { goto lbfgs_exit; } } @@ -487,8 +524,7 @@ static int line_search_backtracking( lbfgsfloatval_t *s, lbfgsfloatval_t *stp, lbfgsfloatval_t *xp, - lbfgs_evaluate_t proc_evaluate, - void *instance, + callback_data_t *cd, const lbfgs_parameter_t *param ) { @@ -556,7 +592,7 @@ static int line_search_backtracking( } /* Evaluate the function and gradient values. */ - *f = proc_evaluate(instance, x, g, n, *stp); + *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); if (0. < param->orthantwise_c) { /* Compute L1-regularization factor and add it to the object value. */ norm = 0.; @@ -601,8 +637,7 @@ static int line_search_morethuente( lbfgsfloatval_t *s, lbfgsfloatval_t *stp, lbfgsfloatval_t *wa, - lbfgs_evaluate_t proc_evaluate, - void *instance, + callback_data_t *cd, const lbfgs_parameter_t *param ) { @@ -722,7 +757,7 @@ static int line_search_morethuente( } /* Evaluate the function and gradient values. */ - *f = proc_evaluate(instance, x, g, n, *stp); + *f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp); if (0. < param->orthantwise_c) { /* Compute L1-regularization factor and add it to the object value. */ norm = 0.; diff --git a/lib/lib.vcproj b/lib/lib.vcproj new file mode 100644 index 0000000..bf24952 --- /dev/null +++ b/lib/lib.vcproj @@ -0,0 +1,187 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/sample/main.c b/sample/sample.c similarity index 87% rename from sample/main.c rename to sample/sample.c index 22068ec..5ea5ecc 100644 --- a/sample/main.c +++ b/sample/sample.c @@ -42,12 +42,18 @@ static int progress( return 0; } -#define N 8 +#define N 100 int main(int argc, char *argv) { int i, ret = 0; - lbfgsfloatval_t x[N], fx; + lbfgsfloatval_t fx; + lbfgsfloatval_t *x = lbfgs_malloc(N); + + if (x == NULL) { + printf("ERROR: Failed to allocate a memory block for variables.\n"); + return 1; + } /* Initialize the variables. */ for (i = 0;i < N;i += 2) { @@ -65,5 +71,6 @@ int main(int argc, char *argv) printf("L-BFGS optimization terminated with status code = %d\n", ret); printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]); + lbfgs_free(x); return 0; } diff --git a/sample/sample.vcproj b/sample/sample.vcproj new file mode 100644 index 0000000..a9831e2 --- /dev/null +++ b/sample/sample.vcproj @@ -0,0 +1,199 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +