gctl_optimization/lib/optimization/dwa.h

/********************************************************
 *  ██████╗  ██████╗████████╗██╗
 * ██╔════╝ ██╔════╝╚══██╔══╝██║
 * ██║  ███╗██║        ██║   ██║
 * ██║   ██║██║        ██║   ██║
 * ╚██████╔╝╚██████╗   ██║   ███████╗
 *  ╚═════╝  ╚═════╝   ╚═╝   ╚══════╝
 * Geophysical Computational Tools & Library (GCTL)
 *
 * Copyright (c) 2022  Yi Zhang (yizhang-geo@zju.edu.cn)
 *
 * GCTL is distributed under a dual licensing scheme. You can redistribute
 * it and/or modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation, either version 2
 * of the License, or (at your option) any later version. You should have
 * received a copy of the GNU Lesser General Public License along with this
 * program. If not, see <http://www.gnu.org/licenses/>.
 *
 * If the terms and conditions of the LGPL v.2. would prevent you from using
 * the GCTL, please consider the option to obtain a commercial license for a
 * fee. These licenses are offered by the GCTL's original author. As a rule,
 * licenses are provided "as-is", unlimited in time for a one time fee. Please
 * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
 * to include some description of your company and the realm of its activities.
 * Also add information on how to contact you by electronic and paper mail.
 ******************************************************/

#ifndef _GCTL_DWA_H
#define _GCTL_DWA_H

#include "gctl/core.h"

namespace gctl
{
    /**
     * @brief Lost balanced multitask evaluation.
     *
     * @note Reference: 2019. End-to-end multitask learning with attention.
     *
     */
    class dwa
    {
    private:
        bool l_ready_;
        size_t fx_c_, fx_n_;
        double K_, T_, multi_fx_;
        array<double> wgts_;
        array<double> L_p1_, L_p2_;
        array<double> grad_;
        std::vector<array<double>> rcd_wgts_;

    public:
        dwa();
        virtual ~dwa();

        /**
         * @brief Initiate the number of loss functions and size of the model gradients.
         *
         * @note  This function must be called at first.
         *
         * @param num Number of the loss functions
         * @param grad_num Size of the model gradients
         */
        void InitDWA(size_t num, size_t grad_num);

        /**
         * @brief Add the value of a single loss function and the current model gradients.
         *
         * @param fx objective value
         * @param g model gradients
         */
        void AddSingleLoss(double fx, const array<double> &g);

        /**
         * @brief Get the merged objective value and the model gradients.
         *
         * @note All single loss functions must be added before calling this function. The merged objective value and the model gradients will be reset after the calling.
         *
         * @param g model gradients
         *
         * @return objective value
         */
        double DWALoss(array<double> &g);

        /**
         * @brief Update weights for single loss functions using the DWA algorithm.
         *
         */
        void UpdateWeights();

        /**
         * @brief Set the cooling temperature. The bigger value is, the closer the weights will be to one. The default is 1.0.
         *
         * @param t Input temperature
         */
        void set_control_temperature(double t);

        /**
         * @brief Set the normal sum of the weights. Ths default equals to function size.
         *
         * @param k Input sum
         */
        void set_normal_sum(double k);

        /**
         * @brief Get the recorded weights. Size of the log equals the function size times iteration times.
         *
         * @param logs Output log
         */
        void get_records(array<double> &logs);
    };
}

#endif // _GCTL_DWA_H