gctl_optimization/lib/optimization/lgd.h

/********************************************************
 *  ██████╗  ██████╗████████╗██╗
 * ██╔════╝ ██╔════╝╚══██╔══╝██║
 * ██║  ███╗██║        ██║   ██║
 * ██║   ██║██║        ██║   ██║
 * ╚██████╔╝╚██████╗   ██║   ███████╗
 *  ╚═════╝  ╚═════╝   ╚═╝   ╚══════╝
 * Geophysical Computational Tools & Library (GCTL)
 *
 * Copyright (c) 2022  Yi Zhang (yizhang-geo@zju.edu.cn)
 *
 * GCTL is distributed under a dual licensing scheme. You can redistribute 
 * it and/or modify it under the terms of the GNU Lesser General Public 
 * License as published by the Free Software Foundation, either version 2 
 * of the License, or (at your option) any later version. You should have 
 * received a copy of the GNU Lesser General Public License along with this 
 * program. If not, see <http://www.gnu.org/licenses/>.
 * 
 * If the terms and conditions of the LGPL v.2. would prevent you from using 
 * the GCTL, please consider the option to obtain a commercial license for a 
 * fee. These licenses are offered by the GCTL's original author. As a rule, 
 * licenses are provided "as-is", unlimited in time for a one time fee. Please 
 * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget 
 * to include some description of your company and the realm of its activities. 
 * Also add information on how to contact you by electronic and paper mail.
 ******************************************************/

#ifndef _GCTL_LGD_H
#define _GCTL_LGD_H

#include "gctl/core.h"
#include "gctl/io.h"
#include "gctl/maths.h"
#include "gctl/algorithm.h"

#include "gctl_optimization_config.h"
#ifdef GCTL_OPTIMIZATION_TOML
#include "toml.hpp"
#endif // GCTL_OPTIMIZATION_TOML

#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif // _WINDOWS || __WIN32__

#ifdef GSTL_OPENMP
#include "omp.h"
#endif // GSTL_OPENMP

namespace gctl
{
	/**
	 * @brief      return value of the lgd_solver class.
	 */
	enum lgd_return_code
	{
		LGD_CONVERGENCE = 1, ///< The iteration reached convergence.
		LGD_STOP, ///< The iteration stopped by the progress monitoring function.
		LGD_REACHED_MAX_ITERATIONS, ///< Iteration reached max limit.
        LGD_INVALID_SOLUTION_SIZE = -1024, ///< Invalid solution size.
		LGD_INVALID_MAX_ITERATIONS, ///< The maximal iteration times is negative.
        LGD_INVALID_EPSILON, ///< The epsilon is negative.
		LGD_INVALID_STDV,
        LGD_INVALID_BETA, ///< Invalid value for beta.
        LGD_INVALID_ALPHA,
        LGD_INVALID_SIGMA,
		LGD_NAN_VALUE, ///< Nan value.
	};

    /**
     * @brief    Parameters of the L-GD method.
     */
    struct lgd_para
    {
        /**
         * Maximal times of the lévy flight. The iteration process will stop till the maximal 
         * flight times is reached unless the mean convergence test is set and satisfied. To 
         * active the test, set the 'batch' parameter which is shown as below. The default value 
         * is 1000.
         */
        int flight_times;

        /**
         * Batch size for the mean convergence test. This parameter determines the batch size, 
         * in recorded solutions, to compute the value of the objective function. Note that 
         * only qualified solutions will be recorded for analyzing if the 'lambda' parameter is 
         * set. The library does not perform the mean convergence test if the value of this 
         * parameter is zero. The default is 0.
         */
        int batch;

        /**
         * Random seed for generating the Lévy distribution. Input zero for setting the seed according
         * to the current time value. The default is 0.
         */
        int seed;

        /**
         * Epsilon for the mean convergence test. This parameter determines the accuracy 
         * with which the mean solution is to be found. The default is 1e-5.
         */
        double epsilon;

        /**
         * Standard deviation of v that is used to calculate the distance of each
         * lévy flight in length = stddev_u/|stddev_v|^{1/beta}. This parameter is 
         * typically given as 1.0.
         */
        double stddev_v;

        /**
         * Scale parameter for calculating stddev_u and the flying length. Must be at 
         * (1.0, 2.0). The default value is 1.5. The bigger beta is the smaller of the 
         * range of flying length gets.
         */
        double beta;

        /**
         * Scale parameter multiplied by the flying length. The default value is 0.01. 
         * The parameter should be set according to the expected convergence speed. Normally, 
         * The bigger alpha is, the faster the L-GD convergences. However, the L-GD may 
         * miss the optimized solutions if alpha was too big.
         */
        double alpha;

        /**
         * Sigma for the stagnation point test. The algorithm will take one search 
         * orthogonal with the last iteration if the module of the gradients is smaller 
         * than sigma. This mechanism helps the algorithm escaping from stagnation 
         * points such as local minimal or saddle points.The default is 1e-8.
         */
        double sigma;

        /**
         * Threshold for recording the search paths. If the value is bigger then zero, then
         * only values of the objective function that are smaller to equal to the threshold be 
         * used for statistic analyzing. Otherwise, all records will be used. The recorded paths 
         * could be save to file using the save_lgd_trace(string) function if set_lgd_record_trace() 
         * is set. The default is -1.0.
         */
        double lambda;

        /**
         * @brief First order moment. The default value is 0.5.
         * 
         */
        double fmt;

        /**
         * @brief Second order moment. The default value is 0.05.
         * 
         */
        double smt;
    };

    /**
     * @brief The lévy gradient descent solver (LGD).
     * 
     */
    class lgd_solver
    {
    public:
        lgd_solver(); ///< Default constructor.
        virtual ~lgd_solver(); ///< Default de-constructor.

        /**
         * @brief Interface for the evaluation of the objective function. Concrete 
         * contents of this function is determined according to the optimizing problem. 
         * 
         * @param x Inputs of the current solution.
         * @param g Outputs of the model gradient calculated using the input solution.
         * @return Current objective value.
         */
        virtual double LGD_Evaluate(const array<double> &x, array<double> &g) = 0;

        /**
         * @brief Default monitoring function of the optimizing process. 
         * 
         * @param best_fx Objective value of the best solution. 
         * @param curr_fx Objective value of the current solution.
         * @param mean_fx Objective value of the mean solution.
         * @param param L-GD's parameters used for the optimzing process.
         * @param curr_t Current flight times.
         * @return The optimizing process will be stopped if a non-zero value is returned.
         */
        virtual int LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, 
            const double best_fx, const lgd_para &param);

        /**
         * @brief Susppend all output info.
         * 
         */
        void lgd_silent();

        /**
         * @brief Set interval to run the LGD_Progress function.
         * 
         * @param inter 
         */
        void set_lgd_report_interval(int inter);

        /**
         * @brief Show solver setup.
         * 
         */
        void show_solver();

        /**
         * @brief Turn on the recording of fight traces. Use this with caution as it may use a lot of momeries.
         * 
         */
        void set_lgd_record_trace();

        /**
         * @brief Save fight traces to file.
         * 
         * @note Must turn on the trace recording for this function to work properly. 
         * Only qualified solutions will be saved if the recording threshold is set.
         * 
         * @param trace_file output file.
         */
        void save_lgd_trace(std::string trace_file);
        
        /**
         * @brief 按照levy分布采样一组数据，参数可通过set_lgd_para函数设置
         * 
         * @param dist 返回的levy分布
         * 
         * @return 状态代码
         */
        lgd_return_code get_levy_distribution(array<double> &dist);
        
        lgd_para default_lgd_para();
        void set_lgd_para(const lgd_para &param);

#ifdef GCTL_OPTIMIZATION_TOML

        /**
		 * @brief Set parameters of the levy-gradient descent algorithm using a toml file. 
		 * All parameter options must be listed under a top-level table 'lgd'. Available options 
		 * under the 'lgd' table are as declared in the lgd_para structure.
		 * 
		 * @param filename Input configuration file
		 */
        void set_lgd_para(std::string filename);

        /**
		 * @brief Set parameters of the levy-gradient descent algorithm using a toml file. 
		 * All parameter options must be listed under a top-level table 'lgd'. Available options 
		 * under the 'lgd' table are as declared in the lgd_para structure.
		 * 
		 * @param toml_data Input toml data
		 */
        void set_lgd_para(const toml::value &toml_data);

#endif // GCTL_OPTIMIZATION_TOML

        void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m, 
            std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false);
        
        void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m, 
            const array<double> &alphas, std::ostream &ss = std::clog, 
            bool verbose = true, bool err_throw = false);

        void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m, 
            const array<double> &lows, const array<double> &higs, std::ostream &ss = std::clog, 
            bool verbose = true, bool err_throw = false);

        void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m, 
            double low, double hig, std::ostream &ss = std::clog, bool verbose = true, 
            bool err_throw = false);

        void LGD_Minimize_Momentum(array<double> &best_m, array<double> &mean_m, array<double> &std_m, 
            std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false);

        void LGD_Minimize_Momentum(array<double> &best_m, array<double> &mean_m, array<double> &std_m, 
            const array<double> &lows, const array<double> &higs, std::ostream &ss = std::clog, 
            bool verbose = true, bool err_throw = false);

    private:
        void lgd_error_str(lgd_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
        lgd_return_code lgd(array<double> &best_m, array<double> &mean_m, array<double> &std_m);
        lgd_return_code lgd_momentum(array<double> &best_m, array<double> &mean_m, array<double> &std_m);

    private:
        lgd_para lgd_param_;
        int lgd_inter_, lgd_ques_num_, lgd_trace_times_;
        bool lgd_silent_, lgd_has_range_, lgd_has_alpha_, lgd_save_trace_;
        array<double> lgd_low_, lgd_hig_, lgd_alpha_, lgd_trace_;
    };
}

#endif // _GCTL_LGD_H