gctl_optimization/lib/optimization/gradnorm.h

/********************************************************
 *  ██████╗  ██████╗████████╗██╗
 * ██╔════╝ ██╔════╝╚══██╔══╝██║
 * ██║  ███╗██║        ██║   ██║
 * ██║   ██║██║        ██║   ██║
 * ╚██████╔╝╚██████╗   ██║   ███████╗
 *  ╚═════╝  ╚═════╝   ╚═╝   ╚══════╝
 * Geophysical Computational Tools & Library (GCTL)
 *
 * Copyright (c) 2022  Yi Zhang (yizhang-geo@zju.edu.cn)
 *
 * GCTL is distributed under a dual licensing scheme. You can redistribute
 * it and/or modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation, either version 2
 * of the License, or (at your option) any later version. You should have
 * received a copy of the GNU Lesser General Public License along with this
 * program. If not, see <http://www.gnu.org/licenses/>.
 *
 * If the terms and conditions of the LGPL v.2. would prevent you from using
 * the GCTL, please consider the option to obtain a commercial license for a
 * fee. These licenses are offered by the GCTL's original author. As a rule,
 * licenses are provided "as-is", unlimited in time for a one time fee. Please
 * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
 * to include some description of your company and the realm of its activities.
 * Also add information on how to contact you by electronic and paper mail.
 ******************************************************/

#ifndef _GCTL_GRADNORM_H
#define _GCTL_GRADNORM_H

#include "gctl/core.h"
#include "gctl/io.h"

namespace gctl
{
    /**
     * @brief Gradient normalized (balanced) multitask evaluation.
     *
     * @note Reference: Zhao Chen et. al., 2018. GradNorm: Gradient normalization
     * for adaptive loss balancing in deep multitask networks.
     *
     */
    class grad_norm
    {
    private:
        bool initialized_;
        size_t fx_n_, fx_c_;
        double resi_T_, T_;
        double lamda_, alpha_, multi_fx_;
        array<bool> fst_iter_;
        array<double> wgts_;
        array<double> fx0_;
        array<double> Gw_, Gdw_, Lx_;
        array<double> grad_;
        array<double> rcd_fxs_;
        array<double> fixed_wgts_;
        std::vector<double> rcd_wgts_;

    public:
        grad_norm();
        virtual ~grad_norm();

        /**
         * @brief Initiate the number of loss functions and size of the model gradients.
         *
         * @note  This function must be called at first.
         *
         * @param num Number of the total loss functions
         * @param grad_num Size of the model gradients
         */
        void InitGradNorm(size_t num, size_t grad_num);

        /**
         * @brief Add the value of a single loss function and the current model gradients.
         *
         * @param fx objective value
         * @param g model gradients
         *
         * @return weighted value of the current loss function
         */
        double AddSingleLoss(double fx, const array<double> &g);

        /**
         * @brief Get the merged objective value and the model gradients.
         *
         * @note All single loss functions must be added before calling this function. The
         * merged objective value and the model gradients will be reset after the calling.
         *
         * @param g model gradients
         *
         * @return objective value
         */
        double GradNormLoss(array<double> &g);

        /**
         * @brief Update weights for single loss functions using the GradNorm algorithm.
         *
         */
        void UpdateWeights();

        /**
         * @brief Show statistics of the tasks' weight and loss function's value.
         *
         */
        void ShowStatistics(std::ostream &ss = std::clog, bool one_line = false);

        /**
         * @brief Set the control factor alpha. The default is 1.0
         *
         * @param a Input alpha
         */
        void set_control_weight(double a);

        /**
         * @brief Set the normal sum of the weights. Ths default equals to function size.
         *
         * @param t Input sum
         */
        void set_normal_sum(double t);

        /**
         * @brief Set a learning rate of the weights. The default is 0.001
         *
         * @param l Input learning rate
         */
        void set_weight_step(double l);

        /**
         * @brief Set the fixed weight.
         *
         * @param id Index of the loss function
         * @param wgt weight of the loss function
         */
        void set_fixed_weight(int id, double wgt);

        /**
         * @brief Set the initial weights
         *
         * @param w Input weights
         */
        void set_initial_weights(const array<double> &w);

        /**
         * @brief Get the recorded weights. Size of the log equals the function size times iteration times.
         *
         * @param logs Output log
         */
        void get_records(array<double> &logs);

        /**
         * @brief Save recored weights to file.
         *
         * @param file File name
         */
        void save_records(std::string file);
    };
}

#endif // _GCTL_GRADNORM_H