initial upload
This commit is contained in:
parent
3cafe21fc3
commit
859a6357c8
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
||||
build/
|
||||
.DS_Store
|
26
CMakeLists.txt
Normal file
26
CMakeLists.txt
Normal file
@ -0,0 +1,26 @@
|
||||
cmake_minimum_required(VERSION 3.15.2)
|
||||
|
||||
# 设置工程名称和语言
|
||||
project(LIBSGD)
|
||||
|
||||
# 设置库版本号
|
||||
set(VERSION_MAJOR 0)
|
||||
set(VERSION_MINOR 1)
|
||||
|
||||
# 添加源文件地址
|
||||
add_subdirectory(src/)
|
||||
|
||||
# 设置安装地址(通过homebrew安装时需要注释掉)
|
||||
set(CMAKE_INSTALL_PREFIX /usr/local)
|
||||
|
||||
# 构建一个 CPack 安装包
|
||||
include (InstallRequiredSystemLibraries)
|
||||
set(CPACK_OUTPUT_FILE_PREFIX "${PROJECT_SOURCE_DIR}/package")
|
||||
#set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||
set(CPACK_PACKAGE_VERSION_MAJOR "${VERSION_MAJOR}")
|
||||
set(CPACK_PACKAGE_VERSION_MINOR "${VERSION_MINOR}")
|
||||
set(CPACK_PACKAGE_VERSION_PATCH "${VERSION_PATCH}")
|
||||
set(PROJECT_VERSION_FULL ${VERSION_MAJOR}.${VERSION_MINOR})
|
||||
set(CPACK_SOURCE_GENERATOR "TGZ")
|
||||
set(CPACK_SOURCE_PACKAGE_FILE_NAME libadam-${PROJECT_VERSION_FULL})
|
||||
include (CPack)
|
39
src/CMakeLists.txt
Normal file
39
src/CMakeLists.txt
Normal file
@ -0,0 +1,39 @@
|
||||
# 设定源文件文件夹
|
||||
aux_source_directory(lib/ SGDLIB_SRC)
|
||||
|
||||
# 以下部分为例子程序的编译
|
||||
# 设置可执行文件的输出地址
|
||||
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
|
||||
|
||||
# 以下部分为库的编译
|
||||
# 注意目标名必须唯一 所以不能直接生成相同名称的动态库与静态库
|
||||
# 注意此处不必为目标名称添加lib前缀和相应后缀,cmake会自行添加
|
||||
add_library(sgd SHARED ${SGDLIB_SRC})
|
||||
# 首先添加静态库的生成命令
|
||||
add_library(sgd_static STATIC ${SGDLIB_SRC})
|
||||
# 设置静态库的输出名称从而获得与动态库名称相同的静态库
|
||||
set_target_properties(sgd_static PROPERTIES OUTPUT_NAME "sgd")
|
||||
# 设置输出目标属性以同时输出动态库与静态库
|
||||
set_target_properties(sgd PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
set_target_properties(sgd_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
# 设置动态库的版本号
|
||||
set_target_properties(sgd PROPERTIES VERSION 0.1 SOVERSION 0.1)
|
||||
# 设置库文件的输出地址
|
||||
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
|
||||
|
||||
# 设置编译选项
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++11 -O2")
|
||||
|
||||
# 库的安装命令
|
||||
install(TARGETS sgd sgd_static
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib)
|
||||
# 头文件安装命令
|
||||
install(FILES lib/sgd.h DESTINATION include)
|
||||
|
||||
# 添加可执行文件 命令行
|
||||
add_executable(sgd_sample sample/sample.cpp)
|
||||
# 为安装文件添加动态库的搜索地址
|
||||
set_target_properties(sgd_sample PROPERTIES INSTALL_RPATH "/usr/local/lib")
|
||||
# 链接动态库
|
||||
target_link_libraries(sgd_sample PUBLIC sgd)
|
328
src/lib/sgd.cpp
Normal file
328
src/lib/sgd.cpp
Normal file
@ -0,0 +1,328 @@
|
||||
#include "sgd.h"
|
||||
#include "cmath"
|
||||
|
||||
/**
|
||||
* @brief return absolute value
|
||||
*
|
||||
* @param x input value
|
||||
*/
|
||||
#define sgd_fabs(x) ((x < 0) ? -1*x : x)
|
||||
|
||||
/**
|
||||
* @brief return the bigger value
|
||||
*
|
||||
* @param a input value
|
||||
* @param b another input value
|
||||
*
|
||||
* @return the bigger value
|
||||
*/
|
||||
#define sgd_max(a, b) (a>b?a:b)
|
||||
|
||||
/**
|
||||
* @brief return value of the sgd_solver() function.
|
||||
*/
|
||||
enum sgd_return_e
|
||||
{
|
||||
SGD_SUCCESS = 0,
|
||||
SGD_CONVERGENCE = 1,
|
||||
SGD_STOP, //1
|
||||
SGD_UNKNOWN_ERROR = -1024,
|
||||
// The variable size is negative
|
||||
SGD_INVILAD_VARIABLE_SIZE, //-1023
|
||||
// The maximal iteration times is negative.
|
||||
SGD_INVILAD_MAX_ITERATIONS, //-1022
|
||||
// The epsilon is negative.
|
||||
SGD_INVILAD_EPSILON, //-1021
|
||||
// Iteration reached max limit
|
||||
SGD_REACHED_MAX_ITERATIONS,
|
||||
// Invalid value for alpha
|
||||
SGD_INVALID_ALPHA,
|
||||
// Invalid value for beta
|
||||
SGD_INVALID_BETA,
|
||||
// Invalid value for sigma
|
||||
SGD_INVALID_SIGMA,
|
||||
// Nan value
|
||||
SGD_NAN_VALUE,
|
||||
};
|
||||
|
||||
/**
|
||||
* Default parameter for the SGD methods.
|
||||
*/
|
||||
static const sgd_para defparam = {100, 1e-6, 0.001, 0.9, 0.999, 1e-8};
|
||||
|
||||
sgd_float *sgd_malloc(const int n_size)
|
||||
{
|
||||
sgd_float *x = new sgd_float [n_size];
|
||||
return x;
|
||||
}
|
||||
|
||||
void sgd_free(sgd_float* x)
|
||||
{
|
||||
if (x != nullptr) delete[] x;
|
||||
x = nullptr;
|
||||
return;
|
||||
}
|
||||
|
||||
sgd_para sgd_default_parameters()
|
||||
{
|
||||
sgd_para param = defparam;
|
||||
return param;
|
||||
}
|
||||
|
||||
const char* sgd_error_str(int er_index)
|
||||
{
|
||||
switch (er_index)
|
||||
{
|
||||
case SGD_SUCCESS:
|
||||
return "Success.";
|
||||
case SGD_CONVERGENCE:
|
||||
return "The iteration reached convergence.";
|
||||
case SGD_STOP:
|
||||
return "The iteration stopped by the progress evaluation function.";
|
||||
case SGD_UNKNOWN_ERROR:
|
||||
return "Unknown error.";
|
||||
case SGD_INVILAD_VARIABLE_SIZE:
|
||||
return "Invalid array size.";
|
||||
case SGD_INVILAD_MAX_ITERATIONS:
|
||||
return "Invalid maximal iteration times.";
|
||||
case SGD_REACHED_MAX_ITERATIONS:
|
||||
return "The maximal iteration is reached.";
|
||||
case SGD_INVILAD_EPSILON:
|
||||
return "Invalid value for epsilon.";
|
||||
case SGD_INVALID_BETA:
|
||||
return "Invalid value for beta.";
|
||||
case SGD_INVALID_ALPHA:
|
||||
return "Invalid value for alpha.";
|
||||
case SGD_INVALID_SIGMA:
|
||||
return "Invalid value for sigma.";
|
||||
case SGD_NAN_VALUE:
|
||||
return "NaN values found.";
|
||||
default:
|
||||
return "Unknown error.";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief An SGD solver function.
|
||||
*
|
||||
* @note The size of all arrays must be equal to n_size.
|
||||
*
|
||||
* @param[in] Evafp Callback function for calculating the objective function and its gradient.
|
||||
* @param[in] Profp Callback function for monitoring the optimization process.
|
||||
* @param fx Returned best value of the objective function by now.
|
||||
* @param m Pointer of the solution array.
|
||||
* @param[in] n_size Length of the solution array.
|
||||
* @param[in] m_size Length of the observation.
|
||||
* @param[in] param Parameters of optimization process.
|
||||
* @param instance The user data sent for the function by the client.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
typedef int (*sgd_solver_ptr)(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *m,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance);
|
||||
|
||||
int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *x,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance);
|
||||
int adamax(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *x,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance);
|
||||
int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *x,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance);
|
||||
|
||||
|
||||
int sgd_solver(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *m,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance, sgd_solver_enum solver_id)
|
||||
{
|
||||
sgd_solver_ptr solver;
|
||||
switch (solver_id)
|
||||
{
|
||||
case SGD_ADAM:
|
||||
solver = adam;
|
||||
break;
|
||||
case SGD_ADAMAX:
|
||||
solver = adamax;
|
||||
break;
|
||||
case SGD_ADABELIEF:
|
||||
solver = adabelief;
|
||||
break;
|
||||
default:
|
||||
solver = adam;
|
||||
break;
|
||||
}
|
||||
|
||||
return solver(Evafp, Profp, fx, m, n_size, m_size, param, instance);
|
||||
}
|
||||
|
||||
|
||||
int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *x,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance)
|
||||
{
|
||||
// set the Adam's parameters
|
||||
sgd_para para = (param != nullptr) ? (*param) : defparam;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return SGD_INVILAD_VARIABLE_SIZE;
|
||||
if (para.iteration <= 0) return SGD_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon < 0) return SGD_INVILAD_EPSILON;
|
||||
if (para.alpha < 0) return SGD_INVALID_ALPHA;
|
||||
if (para.beta_1 < 0.0 || para.beta_1 >= 1.0) return SGD_INVALID_BETA;
|
||||
if (para.beta_2 < 0.0 || para.beta_2 >= 1.0) return SGD_INVALID_BETA;
|
||||
if (para.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||
|
||||
sgd_float *mk = sgd_malloc(n_size);
|
||||
sgd_float *vk = sgd_malloc(n_size);
|
||||
sgd_float *g = sgd_malloc(n_size);
|
||||
|
||||
for (int i = 0; i < n_size; i++)
|
||||
{
|
||||
mk[i] = vk[i] = 0.0;
|
||||
}
|
||||
|
||||
sgd_float beta_1t = 1.0, beta_2t = 1.0;
|
||||
sgd_float alpha_k;
|
||||
|
||||
int overall_iteration = para.iteration * m_size;
|
||||
for (int t = 0; t < para.iteration; t++)
|
||||
{
|
||||
beta_1t *= para.beta_1;
|
||||
beta_2t *= para.beta_2;
|
||||
|
||||
alpha_k = para.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);
|
||||
|
||||
for (int m = 0; m < m_size; m++)
|
||||
{
|
||||
*fx = Evafp(instance, x, g, n_size, m);
|
||||
|
||||
for (int i = 0; i < n_size; i++)
|
||||
{
|
||||
mk[i] = para.beta_1*mk[i] + (1.0 - para.beta_1)*g[i];
|
||||
vk[i] = para.beta_2*vk[i] + (1.0 - para.beta_2)*g[i]*g[i];
|
||||
|
||||
x[i] = x[i] - alpha_k * mk[i]/(sqrt(vk[i]) + para.sigma);
|
||||
if (x[i] != x[i]) return SGD_NAN_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
|
||||
if (*fx < para.epsilon) return SGD_CONVERGENCE;
|
||||
}
|
||||
|
||||
sgd_free(mk);
|
||||
sgd_free(vk);
|
||||
sgd_free(g);
|
||||
return SGD_REACHED_MAX_ITERATIONS;
|
||||
}
|
||||
|
||||
int adamax(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *x,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance)
|
||||
{
|
||||
// set the Adam's parameters
|
||||
sgd_para para = (param != nullptr) ? (*param) : defparam;
|
||||
if (param == nullptr) para.alpha = 0.002;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return SGD_INVILAD_VARIABLE_SIZE;
|
||||
if (para.iteration <= 0) return SGD_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon < 0) return SGD_INVILAD_EPSILON;
|
||||
if (para.alpha < 0) return SGD_INVALID_ALPHA;
|
||||
if (para.beta_1 < 0.0 || para.beta_1 >= 1.0) return SGD_INVALID_BETA;
|
||||
if (para.beta_2 < 0.0 || para.beta_2 >= 1.0) return SGD_INVALID_BETA;
|
||||
if (para.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||
|
||||
sgd_float *mk = sgd_malloc(n_size);
|
||||
sgd_float *vk = sgd_malloc(n_size);
|
||||
sgd_float *g = sgd_malloc(n_size);
|
||||
|
||||
for (int i = 0; i < n_size; i++)
|
||||
{
|
||||
mk[i] = vk[i] = 0.0;
|
||||
}
|
||||
|
||||
sgd_float beta_1t = 1.0;
|
||||
|
||||
int overall_iteration = para.iteration * m_size;
|
||||
for (int t = 0; t < para.iteration; t++)
|
||||
{
|
||||
beta_1t *= para.beta_1;
|
||||
|
||||
for (int m = 0; m < m_size; m++)
|
||||
{
|
||||
*fx = Evafp(instance, x, g, n_size, m);
|
||||
|
||||
for (int i = 0; i < n_size; i++)
|
||||
{
|
||||
mk[i] = para.beta_1*mk[i] + (1.0 - para.beta_1)*g[i];
|
||||
vk[i] = sgd_max(para.beta_2*vk[i], sgd_fabs(g[i]));
|
||||
|
||||
x[i] = x[i] - para.alpha * mk[i]/((1.0 - beta_1t)*vk[i]);
|
||||
if (x[i] != x[i]) return SGD_NAN_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
|
||||
if (*fx < para.epsilon) return SGD_CONVERGENCE;
|
||||
}
|
||||
|
||||
sgd_free(mk);
|
||||
sgd_free(vk);
|
||||
sgd_free(g);
|
||||
return SGD_REACHED_MAX_ITERATIONS;
|
||||
}
|
||||
|
||||
int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *x,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance)
|
||||
{
|
||||
// set the Adam's parameters
|
||||
sgd_para para = (param != nullptr) ? (*param) : defparam;
|
||||
|
||||
//check parameters
|
||||
if (n_size <= 0) return SGD_INVILAD_VARIABLE_SIZE;
|
||||
if (para.iteration <= 0) return SGD_INVILAD_MAX_ITERATIONS;
|
||||
if (para.epsilon < 0) return SGD_INVILAD_EPSILON;
|
||||
if (para.alpha < 0) return SGD_INVALID_ALPHA;
|
||||
if (para.beta_1 < 0.0 || para.beta_1 >= 1.0) return SGD_INVALID_BETA;
|
||||
if (para.beta_2 < 0.0 || para.beta_2 >= 1.0) return SGD_INVALID_BETA;
|
||||
if (para.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||
|
||||
sgd_float *mk = sgd_malloc(n_size);
|
||||
sgd_float *vk = sgd_malloc(n_size);
|
||||
sgd_float *g = sgd_malloc(n_size);
|
||||
|
||||
for (int i = 0; i < n_size; i++)
|
||||
{
|
||||
mk[i] = vk[i] = 0.0;
|
||||
}
|
||||
|
||||
sgd_float beta_1t = 1.0, beta_2t = 1.0;
|
||||
sgd_float alpha_k;
|
||||
|
||||
int overall_iteration = para.iteration * m_size;
|
||||
for (int t = 0; t < para.iteration; t++)
|
||||
{
|
||||
beta_1t *= para.beta_1;
|
||||
beta_2t *= para.beta_2;
|
||||
|
||||
alpha_k = para.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);
|
||||
|
||||
for (int m = 0; m < m_size; m++)
|
||||
{
|
||||
*fx = Evafp(instance, x, g, n_size, m);
|
||||
|
||||
for (int i = 0; i < n_size; i++)
|
||||
{
|
||||
mk[i] = para.beta_1*mk[i] + (1.0 - para.beta_1)*g[i];
|
||||
vk[i] = para.beta_2*vk[i] + (1.0 - para.beta_2)*(g[i] - mk[i])*(g[i] - mk[i]);
|
||||
|
||||
x[i] = x[i] - alpha_k * mk[i]/(sqrt(vk[i]) + para.sigma);
|
||||
if (x[i] != x[i]) return SGD_NAN_VALUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
|
||||
if (*fx < para.epsilon) return SGD_CONVERGENCE;
|
||||
}
|
||||
|
||||
sgd_free(mk);
|
||||
sgd_free(vk);
|
||||
sgd_free(g);
|
||||
return SGD_REACHED_MAX_ITERATIONS;
|
||||
}
|
194
src/lib/sgd.h
Normal file
194
src/lib/sgd.h
Normal file
@ -0,0 +1,194 @@
|
||||
/******************************************************//**
|
||||
* C++ library of the Stochastic Gradient Descent (SGD) methods.
|
||||
*
|
||||
* Copyright (c) 2020-2031 Yi Zhang (zhangyiss@icloud.com)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
*********************************************************/
|
||||
|
||||
#ifndef _SGD_H
|
||||
#define _SGD_H
|
||||
|
||||
#ifndef _cplusplus
|
||||
extern "C"
|
||||
{
|
||||
|
||||
#include "stddef.h"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief A simple definition of the float type we use here.
|
||||
* Easy to change in the future. For now it is just an alias of the double type.
|
||||
*/
|
||||
typedef double sgd_float;
|
||||
|
||||
/**
|
||||
* @brief Types of method that could be recognized by the sgd_solver() function.
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
/**
|
||||
* Adam method.
|
||||
*/
|
||||
SGD_ADAM,
|
||||
|
||||
/**
|
||||
* AdaMax method.
|
||||
*/
|
||||
SGD_ADAMAX,
|
||||
|
||||
/**
|
||||
* AdaBelief method.
|
||||
*/
|
||||
SGD_ADABELIEF,
|
||||
} sgd_solver_enum;
|
||||
|
||||
/**
|
||||
* @brief Parameters of the Adam method.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
/**
|
||||
* Iteration times for the entire observation set. The default is 100.
|
||||
*/
|
||||
int iteration;
|
||||
|
||||
/**
|
||||
* Epsilon for convergence test. This parameter determines the accuracy
|
||||
* with which the solution is to be found. Must be bigger than zero and
|
||||
* the default is 1e-6.
|
||||
*/
|
||||
sgd_float epsilon;
|
||||
|
||||
/**
|
||||
* Step size of the iteration. The default value is 0.001 for Adam and 0.002
|
||||
* for AdaMax.
|
||||
*/
|
||||
sgd_float alpha;
|
||||
|
||||
/**
|
||||
* Exponential decay rates for the first order moment estimates. The range of this
|
||||
* parameter is [0, 1) and the default value is 0.9.
|
||||
*/
|
||||
sgd_float beta_1;
|
||||
|
||||
/**
|
||||
* Exponential decay rates for the second order moment estimates. The range of this
|
||||
* parameter is [0, 1) and the default value is 0.999.
|
||||
*/
|
||||
sgd_float beta_2;
|
||||
|
||||
/**
|
||||
* A small positive number validates the algorithm. The default value is 1e-8.
|
||||
*/
|
||||
sgd_float sigma;
|
||||
} sgd_para;
|
||||
|
||||
/**
|
||||
* @brief Callback interface for calculating the value of objective function
|
||||
* and the corresponding model gradients.
|
||||
*
|
||||
* @param instance The user data sent for the sgd_solver() functions by the client.
|
||||
* @param x Pointer of the solution.
|
||||
* @param g Pointer of the model gradient.
|
||||
* @param n_size Length of the solution.
|
||||
* @param m Length of the observation.
|
||||
*
|
||||
* @return Value of objective function.
|
||||
*/
|
||||
typedef sgd_float (*sgd_evaulate_ptr)(void *instance, const sgd_float *x, sgd_float *g,
|
||||
const int n_size, const int m);
|
||||
|
||||
/**
|
||||
* @brief Callback interface for monitoring the progress and terminate the iteration
|
||||
* if necessary.
|
||||
*
|
||||
* @param instance The user data sent for the sgd_solver() functions by the client.
|
||||
* @param fx Current value of the objective function.
|
||||
* @param x Current solution.
|
||||
* @param g Current model gradients.
|
||||
* @param param User defined iteration parameters.
|
||||
* @param n_size Length of the solution array.
|
||||
* @param k Times of the iteration.
|
||||
*
|
||||
* @return int Zero to continue the optimization process. Otherwise, the optimization
|
||||
* process will be terminated.
|
||||
*/
|
||||
typedef int (*sgd_progress_ptr)(void *instance, sgd_float fx, const sgd_float *x, const sgd_float *g,
|
||||
const sgd_para *param, const int n_size, const int k);
|
||||
|
||||
/**
|
||||
* @brief Locate memory for a sgd_float pointer type.
|
||||
*
|
||||
* @param[in] n_size Size of the sgd_float array.
|
||||
*
|
||||
* @return Pointer of the data
|
||||
*/
|
||||
sgd_float *sgd_malloc(const int n_size);
|
||||
|
||||
/**
|
||||
* @brief Destroy memory used by the sgd_float type array.
|
||||
*
|
||||
* @param x Pointer of the array.
|
||||
*/
|
||||
void sgd_free(sgd_float *x);
|
||||
|
||||
/**
|
||||
* @brief Return a sgd_para type instance with default values.
|
||||
*
|
||||
* @return A sgd_para type instance.
|
||||
*/
|
||||
sgd_para sgd_default_parameters();
|
||||
|
||||
/**
|
||||
* @brief Return a string explanation for the sgd_solver() function's return values.
|
||||
*
|
||||
* @param[in] er_index The error index returned by the sgd_solver() function.
|
||||
*
|
||||
* @return A string explanation of the error.
|
||||
*/
|
||||
const char* sgd_error_str(int er_index);
|
||||
|
||||
/**
|
||||
* @brief An Adam solver function.
|
||||
*
|
||||
* @note The size of all arrays must be equal to n_size.
|
||||
*
|
||||
* @param[in] Evafp Callback function for calculating the objective function and its gradient.
|
||||
* @param[in] Profp Callback function for monitoring the optimization process.
|
||||
* @param fx Returned best value of the objective function by now.
|
||||
* @param m Pointer of the solution array.
|
||||
* @param[in] n_size Length of the solution array.
|
||||
* @param[in] m_size Length of the observation.
|
||||
* @param[in] param Parameters of optimization process.
|
||||
* @param instance The user data sent for the function by the client.
|
||||
* @param solver_id Solver type used to solve the objective. The default value is SGD_ADAM.
|
||||
*
|
||||
* @return Status of the function.
|
||||
*/
|
||||
int sgd_solver(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *m,
|
||||
const int n_size, const int m_size, const sgd_para *param, void *instance,
|
||||
sgd_solver_enum solver_id = SGD_ADAM);
|
||||
|
||||
#ifndef _cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // _SGD_H
|
118
src/sample/sample.cpp
Normal file
118
src/sample/sample.cpp
Normal file
@ -0,0 +1,118 @@
|
||||
#include "../lib/sgd.h"
|
||||
|
||||
#include "iostream"
|
||||
#include "cmath"
|
||||
#include "vector"
|
||||
#include "string"
|
||||
|
||||
#define pi (atan(1.0)*4.0)
|
||||
|
||||
//方向梯度类型
|
||||
enum gradient_type_e
|
||||
{
|
||||
Dnull,
|
||||
Dx,
|
||||
Dy
|
||||
};
|
||||
|
||||
//正态函数参数组
|
||||
struct gaussian_para
|
||||
{
|
||||
double mu_x, mu_y, sigma_x, sigma_y, rho;
|
||||
};
|
||||
|
||||
double gaussian_distribution(double x, double y, const gaussian_para &p,
|
||||
gradient_type_e mode = Dnull)
|
||||
{
|
||||
double part = -0.5*(pow((x-p.mu_x)/p.sigma_x,2) -2*p.rho*(x-p.mu_x)*(y-p.mu_y)/(p.sigma_x*p.sigma_y)
|
||||
+ pow((y-p.mu_y)/p.sigma_y,2)) / (1.0-p.rho*p.rho);
|
||||
|
||||
double part2;
|
||||
if (mode == Dnull)
|
||||
{
|
||||
return exp(part)/(2*pi*p.sigma_x*p.sigma_y*sqrt(1-p.rho*p.rho));
|
||||
}
|
||||
else if (mode == Dx)
|
||||
{
|
||||
part2 = -1.0*((x-p.mu_x)/(p.sigma_x*p.sigma_x) - p.rho*(y-p.mu_y)/(p.sigma_x*p.sigma_y))/(1.0-p.rho*p.rho);
|
||||
return part2*exp(part)/(2*pi*p.sigma_x*p.sigma_y*sqrt(1-p.rho*p.rho));
|
||||
}
|
||||
else if (mode == Dy)
|
||||
{
|
||||
part2 = -1.0*((y-p.mu_y)/(p.sigma_y*p.sigma_y) - p.rho*(x-p.mu_x)/(p.sigma_x*p.sigma_y))/(1.0-p.rho*p.rho);
|
||||
return part2*exp(part)/(2*pi*p.sigma_x*p.sigma_y*sqrt(1-p.rho*p.rho));
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string err_str = "Invalid calculation mode.";
|
||||
throw err_str;
|
||||
}
|
||||
}
|
||||
|
||||
// 正演目标函数的参数
|
||||
// 声明我们所使用的二维高斯分布的参数
|
||||
static std::vector<gaussian_para> para;
|
||||
|
||||
// 计算当前解的目标函数值以及目标函数在不同方向的梯度
|
||||
double evaluate(void *instance, const double *x, double *g, const int n, const int m)
|
||||
{
|
||||
double fx = 0.0;
|
||||
g[0] = g[1] = 0.0;
|
||||
|
||||
for (int i = 0; i < para.size(); i++)
|
||||
{
|
||||
fx += -1.0*gaussian_distribution(x[0], x[1], para[i]);
|
||||
g[0] += -1.0*gaussian_distribution(x[0], x[1], para[i], Dx);
|
||||
g[1] += -1.0*gaussian_distribution(x[0], x[1], para[i], Dy);
|
||||
}
|
||||
|
||||
fx += 12.82906044;
|
||||
|
||||
g[0] *= 2.0*fx;
|
||||
g[1] *= 2.0*fx;
|
||||
return fx*fx;
|
||||
}
|
||||
|
||||
int progress(void *instance, sgd_float fx, const sgd_float *x, const sgd_float *g,
|
||||
const sgd_para *param, const int n_size, const int k)
|
||||
{
|
||||
std::clog << "iteration time: " << k << ", fx: " << fx << "\r";
|
||||
if (fx < param->epsilon) std::clog << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
gaussian_para tmp_p;
|
||||
|
||||
tmp_p.mu_x = 0.50; tmp_p.mu_y = 0.40; tmp_p.sigma_x = 0.15; tmp_p.sigma_y = 0.10; tmp_p.rho = 0.2;
|
||||
para.push_back(tmp_p);
|
||||
|
||||
tmp_p.mu_x = 0.60; tmp_p.mu_y = 0.70; tmp_p.sigma_x = 0.10; tmp_p.sigma_y = 0.20; tmp_p.rho = 0.0;
|
||||
para.push_back(tmp_p);
|
||||
|
||||
sgd_float fx;
|
||||
sgd_float x[2] = {0.25, 0.20};
|
||||
|
||||
sgd_para my_para = sgd_default_parameters();
|
||||
my_para.iteration = 10000;
|
||||
my_para.alpha = 0.01;
|
||||
|
||||
int ret = sgd_solver(evaluate, progress, &fx, &x[0], 2, 1, &my_para, nullptr);
|
||||
std::clog << "Adam return: " << sgd_error_str(ret) << std::endl;
|
||||
std::clog << "fx = " << fx << std::endl;
|
||||
std::clog << "model: " << x[0] << " " << x[1] << std::endl;
|
||||
|
||||
x[0] = 0.25; x[1] = 0.20;
|
||||
ret = sgd_solver(evaluate, progress, &fx, &x[0], 2, 1, &my_para, nullptr, SGD_ADAMAX);
|
||||
std::clog << "AdaMax return: " << sgd_error_str(ret) << std::endl;
|
||||
std::clog << "fx = " << fx << std::endl;
|
||||
std::clog << "model: " << x[0] << " " << x[1] << std::endl;
|
||||
|
||||
x[0] = 0.25; x[1] = 0.20;
|
||||
ret = sgd_solver(evaluate, progress, &fx, &x[0], 2, 1, &my_para, nullptr, SGD_ADABELIEF);
|
||||
std::clog << "AdaBelief return: " << sgd_error_str(ret) << std::endl;
|
||||
std::clog << "fx = " << fx << std::endl;
|
||||
std::clog << "model: " << x[0] << " " << x[1] << std::endl;
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user