gctl_optimization/lib/optimization/gradnorm.h

159 lines
5.3 KiB
C
Raw Permalink Normal View History

2024-09-10 20:04:47 +08:00
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_GRADNORM_H
#define _GCTL_GRADNORM_H
#include "gctl/core.h"
#include "gctl/io.h"
namespace gctl
{
/**
* @brief Gradient normalized (balanced) multitask evaluation.
*
* @note Reference: Zhao Chen et. al., 2018. GradNorm: Gradient normalization
* for adaptive loss balancing in deep multitask networks.
*
*/
class grad_norm
{
private:
bool initialized_;
size_t fx_n_, fx_c_;
double resi_T_, T_;
double lamda_, alpha_, multi_fx_;
array<bool> fst_iter_;
array<double> wgts_;
array<double> fx0_;
array<double> Gw_, Gdw_, Lx_;
array<double> grad_;
array<double> rcd_fxs_;
array<double> fixed_wgts_;
std::vector<double> rcd_wgts_;
public:
grad_norm();
virtual ~grad_norm();
/**
* @brief Initiate the number of loss functions and size of the model gradients.
*
* @note This function must be called at first.
*
* @param num Number of the total loss functions
* @param grad_num Size of the model gradients
*/
void InitGradNorm(size_t num, size_t grad_num);
/**
* @brief Add the value of a single loss function and the current model gradients.
*
* @param fx objective value
* @param g model gradients
*
* @return weighted value of the current loss function
*/
double AddSingleLoss(double fx, const array<double> &g);
/**
* @brief Get the merged objective value and the model gradients.
*
* @note All single loss functions must be added before calling this function. The
* merged objective value and the model gradients will be reset after the calling.
*
* @param g model gradients
*
* @return objective value
*/
double GradNormLoss(array<double> &g);
/**
* @brief Update weights for single loss functions using the GradNorm algorithm.
*
*/
void UpdateWeights();
/**
* @brief Show statistics of the tasks' weight and loss function's value.
*
*/
void ShowStatistics(std::ostream &ss = std::clog, bool one_line = false);
/**
* @brief Set the control factor alpha. The default is 1.0
*
* @param a Input alpha
*/
void set_control_weight(double a);
/**
* @brief Set the normal sum of the weights. Ths default equals to function size.
*
* @param t Input sum
*/
void set_normal_sum(double t);
/**
* @brief Set a learning rate of the weights. The default is 0.001
*
* @param l Input learning rate
*/
void set_weight_step(double l);
/**
* @brief Set the fixed weight.
*
* @param id Index of the loss function
* @param wgt weight of the loss function
*/
void set_fixed_weight(int id, double wgt);
/**
* @brief Set the initial weights
*
* @param w Input weights
*/
void set_initial_weights(const array<double> &w);
/**
* @brief Get the recorded weights. Size of the log equals the function size times iteration times.
*
* @param logs Output log
*/
void get_records(array<double> &logs);
/**
* @brief Save recored weights to file.
*
* @param file File name
*/
void save_records(std::string file);
};
}
#endif // _GCTL_GRADNORM_H