/******************************************************//** * C++ library of the Stochastic Gradient Descent (SGD) methods. * * Copyright (c) 2020-2031 Yi Zhang (zhangyiss@icloud.com) * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. *********************************************************/ #ifndef _SGD_H #define _SGD_H #ifndef _cplusplus extern "C" { #include "stddef.h" #endif /** * @brief A simple definition of the float type we use here. * Easy to change in the future. For now it is just an alias of the double type. */ typedef double sgd_float; /** * @brief Types of method that could be recognized by the sgd_solver() function. */ typedef enum { /** * Classic momentum. */ SGD_MOMENTUM, /** * Nesterov’s accelerated gradient (NAG) */ SGD_NAG, /** * AdaGrad method. */ SGD_ADAGRAD, /** * RMSProp method. */ SGD_RMSPROP, /** * Adam method. */ SGD_ADAM, /** * Nadam method. */ SGD_NADAM, /** * AdaMax method. */ SGD_ADAMAX, /** * AdaBelief method. */ SGD_ADABELIEF, } sgd_solver_enum; /** * @brief Parameters of the Adam method. */ typedef struct { /** * Iteration times for the entire observation set. The default is 100. */ int iteration; /** * Epsilon for convergence test. This parameter determines the accuracy * with which the solution is to be found. Must be bigger than zero and * the default is 1e-6. */ sgd_float epsilon; /** * Damping rate of the classic momentum method and the NAG method, which * is typically given between 0 and 1. The default is 0.01. */ sgd_float mu; /** * Step size of the iteration. The default value is 0.001 for Adam and 0.002 * for AdaMax. */ sgd_float alpha; /** * Exponential decay rates for the first order moment estimates. The range of this * parameter is [0, 1) and the default value is 0.9. */ sgd_float beta_1; /** * Exponential decay rates for the second order moment estimates. The range of this * parameter is [0, 1) and the default value is 0.999. */ sgd_float beta_2; /** * A small positive number validates the algorithm. The default value is 1e-8. */ sgd_float sigma; } sgd_para; /** * @brief Callback interface for calculating the value of objective function * and the corresponding model gradients. * * @param instance The user data sent for the sgd_solver() functions by the client. * @param x Pointer of the solution. * @param g Pointer of the model gradient. * @param n_size Length of the solution. * @param m Index of the observation. * * @return Value of objective function. */ typedef sgd_float (*sgd_evaulate_ptr)(void *instance, const sgd_float *x, sgd_float *g, const int n_size, const int m); /** * @brief Callback interface for monitoring the progress and terminate the iteration * if necessary. * * @param instance The user data sent for the sgd_solver() functions by the client. * @param fx Current value of the objective function. * @param x Current solution. * @param g Current model gradients. * @param param User defined iteration parameters. * @param n_size Length of the solution array. * @param k Times of the iteration. * * @return int Zero to continue the optimization process. Otherwise, the optimization * process will be terminated. */ typedef int (*sgd_progress_ptr)(void *instance, sgd_float fx, const sgd_float *x, const sgd_float *g, const sgd_para *param, const int n_size, const int k); /** * @brief Locate memory for a sgd_float pointer type. * * @param[in] n_size Size of the sgd_float array. * * @return Pointer of the data */ sgd_float *sgd_malloc(const int n_size); /** * @brief Destroy memory used by the sgd_float type array. * * @param x Pointer of the array. */ void sgd_free(sgd_float *x); /** * @brief Return a sgd_para type instance with default values. * * @return A sgd_para type instance. */ sgd_para sgd_default_parameters(); /** * @brief Return a string explanation for the sgd_solver() function's return values. * * @param[in] er_index The error index returned by the sgd_solver() function. * * @return A string explanation of the error. */ const char* sgd_error_str(int er_index); /** * @brief An Adam solver function. * * @note The size of all arrays must be equal to n_size. * * @param[in] Evafp Callback function for calculating the objective function and its gradient. * @param[in] Profp Callback function for monitoring the optimization process. * @param fx Returned best value of the objective function by now. * @param m Pointer of the solution array. * @param[in] n_size Length of the solution array. * @param[in] m_size Length of the observation. * @param[in] param Parameters of optimization process. * @param instance The user data sent for the function by the client. * @param solver_id Solver type used to solve the objective. The default value is SGD_ADAM. * * @return Status of the function. */ int sgd_solver(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float *m, const int n_size, const int m_size, const sgd_para *param, void *instance, sgd_solver_enum solver_id = SGD_ADAM); #ifndef _cplusplus } #endif #endif // _SGD_H