- Added some note on SSE/SEE2 optimization.
- Supported SSE/SSE2 optimization with GCC. git-svn-id: file:///home/svnrepos/software/liblbfgs/trunk@3 ecf4c44f-38d1-4fa4-9757-a0b4dd0349fc
This commit is contained in:
@@ -31,11 +31,19 @@
|
||||
|
||||
#if 1400 <= _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif/*1400 <= _MSC_VER*/
|
||||
|
||||
#if HAVE_EMMINTRIN_H
|
||||
#include <emmintrin.h>
|
||||
#endif/*HAVE_EMMINTRIN_H*/
|
||||
|
||||
inline static void* vecalloc(size_t size)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
void *memblock = _aligned_malloc(size, 16);
|
||||
#else
|
||||
void *memblock = memalign(16, size);
|
||||
#endif
|
||||
if (memblock != NULL) {
|
||||
memset(memblock, 0, size);
|
||||
}
|
||||
@@ -44,7 +52,11 @@ inline static void* vecalloc(size_t size)
|
||||
|
||||
inline static void vecfree(void *memblock)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(memblock);
|
||||
#else
|
||||
free(memblock);
|
||||
#endif
|
||||
}
|
||||
|
||||
#define fsigndiff(x, y) \
|
||||
|
@@ -31,7 +31,11 @@
|
||||
|
||||
#if 1400 <= _MSC_VER
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
#endif/*_MSC_VER*/
|
||||
|
||||
#if HAVE_XMMINTRIN_H
|
||||
#include <xmmintrin.h>
|
||||
#endif/*HAVE_XMMINTRIN_H*/
|
||||
|
||||
#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
|
||||
#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
|
||||
|
85
lib/lbfgs.c
85
lib/lbfgs.c
@@ -76,14 +76,14 @@ licence.
|
||||
typedef unsigned int uint32_t;
|
||||
#endif/*_MSC_VER*/
|
||||
|
||||
#if defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32
|
||||
/* Use SSE optimization for 32bit float precision. */
|
||||
#include "arithmetic_sse_float.h"
|
||||
|
||||
#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 64
|
||||
#if defined(USE_SSE) && defined(__SSE2__) && LBFGS_FLOAT == 64
|
||||
/* Use SSE2 optimization for 64bit double precision. */
|
||||
#include "arithmetic_sse_double.h"
|
||||
|
||||
#elif defined(USE_SSE) && defined(__SSE__) && LBFGS_FLOAT == 32
|
||||
/* Use SSE optimization for 32bit float precision. */
|
||||
#include "arithmetic_sse_float.h"
|
||||
|
||||
#else
|
||||
/* No CPU specific optimization. */
|
||||
#include "arithmetic_ansi.h"
|
||||
@@ -94,6 +94,14 @@ typedef unsigned int uint32_t;
|
||||
#define max2(a, b) ((a) >= (b) ? (a) : (b))
|
||||
#define max3(a, b, c) max2(max2((a), (b)), (c));
|
||||
|
||||
struct tag_callback_data {
|
||||
int n;
|
||||
void *instance;
|
||||
lbfgs_evaluate_t proc_evaluate;
|
||||
lbfgs_progress_t proc_progress;
|
||||
};
|
||||
typedef struct tag_callback_data callback_data_t;
|
||||
|
||||
struct tag_iteration_data {
|
||||
lbfgsfloatval_t alpha;
|
||||
lbfgsfloatval_t *s; /* [n] */
|
||||
@@ -118,8 +126,7 @@ typedef int (*line_search_proc)(
|
||||
lbfgsfloatval_t *s,
|
||||
lbfgsfloatval_t *stp,
|
||||
lbfgsfloatval_t *wa,
|
||||
lbfgs_evaluate_t proc_evaluate,
|
||||
void *instance,
|
||||
callback_data_t *cd,
|
||||
const lbfgs_parameter_t *param
|
||||
);
|
||||
|
||||
@@ -131,8 +138,7 @@ static int line_search_backtracking(
|
||||
lbfgsfloatval_t *s,
|
||||
lbfgsfloatval_t *stp,
|
||||
lbfgsfloatval_t *wa,
|
||||
lbfgs_evaluate_t proc_evaluate,
|
||||
void *instance,
|
||||
callback_data_t *cd,
|
||||
const lbfgs_parameter_t *param
|
||||
);
|
||||
|
||||
@@ -144,8 +150,7 @@ static int line_search_morethuente(
|
||||
lbfgsfloatval_t *s,
|
||||
lbfgsfloatval_t *stp,
|
||||
lbfgsfloatval_t *wa,
|
||||
lbfgs_evaluate_t proc_evaluate,
|
||||
void *instance,
|
||||
callback_data_t *cd,
|
||||
const lbfgs_parameter_t *param
|
||||
);
|
||||
|
||||
@@ -164,8 +169,26 @@ static int update_trial_interval(
|
||||
int *brackt
|
||||
);
|
||||
|
||||
static int round_out_variables(int n)
|
||||
{
|
||||
n += 7;
|
||||
n /= 8;
|
||||
n *= 8;
|
||||
return n;
|
||||
}
|
||||
|
||||
lbfgsfloatval_t* lbfgs_malloc(int n)
|
||||
{
|
||||
#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
|
||||
n = round_out_variables(n);
|
||||
#endif/*defined(USE_SSE)*/
|
||||
return vecalloc(sizeof(lbfgsfloatval_t) * n);
|
||||
}
|
||||
|
||||
void lbfgs_free(lbfgsfloatval_t *x)
|
||||
{
|
||||
vecfree(x);
|
||||
}
|
||||
|
||||
void lbfgs_parameter_init(lbfgs_parameter_t *param)
|
||||
{
|
||||
@@ -173,7 +196,7 @@ void lbfgs_parameter_init(lbfgs_parameter_t *param)
|
||||
}
|
||||
|
||||
int lbfgs(
|
||||
const int n,
|
||||
int n,
|
||||
lbfgsfloatval_t *x,
|
||||
lbfgsfloatval_t *ptr_fx,
|
||||
lbfgs_evaluate_t proc_evaluate,
|
||||
@@ -197,15 +220,30 @@ int lbfgs(
|
||||
lbfgsfloatval_t fx = 0.;
|
||||
line_search_proc linesearch = line_search_morethuente;
|
||||
|
||||
/* Construct a callback data. */
|
||||
callback_data_t cd;
|
||||
cd.n = n;
|
||||
cd.instance = instance;
|
||||
cd.proc_evaluate = proc_evaluate;
|
||||
cd.proc_progress = proc_progress;
|
||||
|
||||
#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
|
||||
/* Round out the number of variables. */
|
||||
n = round_out_variables(n);
|
||||
#endif/*defined(USE_SSE)*/
|
||||
|
||||
/* Check the input parameters for errors. */
|
||||
if (n <= 0) {
|
||||
return LBFGSERR_INVALID_N;
|
||||
}
|
||||
#if defined(USE_SSE) && defined(__SSE__)
|
||||
#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__))
|
||||
if (n % 8 != 0) {
|
||||
return LBFGSERR_INVALID_N_SSE;
|
||||
}
|
||||
#endif/*defined(__SSE__)*/
|
||||
if (((unsigned short)x & 0x000F) != 0) {
|
||||
return LBFGSERR_INVALID_X_SSE;
|
||||
}
|
||||
#endif/*defined(USE_SSE)*/
|
||||
if (param->min_step < 0.) {
|
||||
return LBFGSERR_INVALID_MINSTEP;
|
||||
}
|
||||
@@ -270,7 +308,7 @@ int lbfgs(
|
||||
}
|
||||
|
||||
/* Evaluate the function value and its gradient. */
|
||||
fx = proc_evaluate(instance, x, g, n, 0);
|
||||
fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0);
|
||||
if (0. < param->orthantwise_c) {
|
||||
/* Compute L1-regularization factor and add it to the object value. */
|
||||
norm = 0.;
|
||||
@@ -319,8 +357,7 @@ int lbfgs(
|
||||
veccpy(gp, g, n);
|
||||
|
||||
/* Search for an optimal step. */
|
||||
ls = linesearch(
|
||||
n, x, &fx, g, d, &step, w, proc_evaluate, instance, param);
|
||||
ls = linesearch(n, x, &fx, g, d, &step, w, &cd, param);
|
||||
if (ls < 0) {
|
||||
ret = ls;
|
||||
goto lbfgs_exit;
|
||||
@@ -331,8 +368,8 @@ int lbfgs(
|
||||
vecnorm(&xnorm, x, n);
|
||||
|
||||
/* Report the progress. */
|
||||
if (proc_progress) {
|
||||
if (ret = proc_progress(instance, x, g, fx, xnorm, gnorm, step, n, k, ls)) {
|
||||
if (cd.proc_progress) {
|
||||
if (ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls)) {
|
||||
goto lbfgs_exit;
|
||||
}
|
||||
}
|
||||
@@ -487,8 +524,7 @@ static int line_search_backtracking(
|
||||
lbfgsfloatval_t *s,
|
||||
lbfgsfloatval_t *stp,
|
||||
lbfgsfloatval_t *xp,
|
||||
lbfgs_evaluate_t proc_evaluate,
|
||||
void *instance,
|
||||
callback_data_t *cd,
|
||||
const lbfgs_parameter_t *param
|
||||
)
|
||||
{
|
||||
@@ -556,7 +592,7 @@ static int line_search_backtracking(
|
||||
}
|
||||
|
||||
/* Evaluate the function and gradient values. */
|
||||
*f = proc_evaluate(instance, x, g, n, *stp);
|
||||
*f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
|
||||
if (0. < param->orthantwise_c) {
|
||||
/* Compute L1-regularization factor and add it to the object value. */
|
||||
norm = 0.;
|
||||
@@ -601,8 +637,7 @@ static int line_search_morethuente(
|
||||
lbfgsfloatval_t *s,
|
||||
lbfgsfloatval_t *stp,
|
||||
lbfgsfloatval_t *wa,
|
||||
lbfgs_evaluate_t proc_evaluate,
|
||||
void *instance,
|
||||
callback_data_t *cd,
|
||||
const lbfgs_parameter_t *param
|
||||
)
|
||||
{
|
||||
@@ -722,7 +757,7 @@ static int line_search_morethuente(
|
||||
}
|
||||
|
||||
/* Evaluate the function and gradient values. */
|
||||
*f = proc_evaluate(instance, x, g, n, *stp);
|
||||
*f = cd->proc_evaluate(cd->instance, x, g, cd->n, *stp);
|
||||
if (0. < param->orthantwise_c) {
|
||||
/* Compute L1-regularization factor and add it to the object value. */
|
||||
norm = 0.;
|
||||
|
187
lib/lib.vcproj
Normal file
187
lib/lib.vcproj
Normal file
@@ -0,0 +1,187 @@
|
||||
<?xml version="1.0" encoding="shift_jis"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="8.00"
|
||||
Name="lib"
|
||||
ProjectGUID="{85EA93B7-A41A-45F2-9159-55BBEE530704}"
|
||||
RootNamespace="lib"
|
||||
Keyword="Win32Proj"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="$(SolutionDir)include"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_LIB"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="4"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="$(SolutionDir)include"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_LIB"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="<22>\<5C>[<5B>X <20>t<EFBFBD>@<40>C<EFBFBD><43>"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\lbfgs.c"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="<22>w<EFBFBD>b<EFBFBD>_<EFBFBD>[ <20>t<EFBFBD>@<40>C<EFBFBD><43>"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
<File
|
||||
RelativePath=".\arithmetic_ansi.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\arithmetic_sse_double.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\arithmetic_sse_float.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\include\lbfgs.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="<22><><EFBFBD>\<5C>[<5B>X <20>t<EFBFBD>@<40>C<EFBFBD><43>"
|
||||
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
|
||||
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
|
||||
>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
Reference in New Issue
Block a user