gctl_optimization/lib/optimization/gradnorm.cpp

/********************************************************
 *  ██████╗  ██████╗████████╗██╗
 * ██╔════╝ ██╔════╝╚══██╔══╝██║
 * ██║  ███╗██║        ██║   ██║
 * ██║   ██║██║        ██║   ██║
 * ╚██████╔╝╚██████╗   ██║   ███████╗
 *  ╚═════╝  ╚═════╝   ╚═╝   ╚══════╝
 * Geophysical Computational Tools & Library (GCTL)
 *
 * Copyright (c) 2022  Yi Zhang (yizhang-geo@zju.edu.cn)
 *
 * GCTL is distributed under a dual licensing scheme. You can redistribute 
 * it and/or modify it under the terms of the GNU Lesser General Public 
 * License as published by the Free Software Foundation, either version 2 
 * of the License, or (at your option) any later version. You should have 
 * received a copy of the GNU Lesser General Public License along with this 
 * program. If not, see <http://www.gnu.org/licenses/>.
 * 
 * If the terms and conditions of the LGPL v.2. would prevent you from using 
 * the GCTL, please consider the option to obtain a commercial license for a 
 * fee. These licenses are offered by the GCTL's original author. As a rule, 
 * licenses are provided "as-is", unlimited in time for a one time fee. Please 
 * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget 
 * to include some description of your company and the realm of its activities. 
 * Also add information on how to contact you by electronic and paper mail.
 ******************************************************/

#include "gradnorm.h"

gctl::grad_norm::grad_norm()
{
    fx_c_ = 0;
    alpha_ = 1.0;
    lamda_ = 0.001;
    initialized_ = false;
}
    
gctl::grad_norm::~grad_norm(){}

void gctl::grad_norm::InitGradNorm(size_t num, size_t grad_num)
{
    fx_n_ = num;
    T_ = 1.0;
    resi_T_ = 0.0;

    fst_iter_.resize(num, true);
    wgts_.resize(num, 1.0/num);
    fx0_.resize(num, 0.0);
    Gw_.resize(num, 0.0);
    Gdw_.resize(num, 0.0);
    Lx_.resize(num, 0.0);
    grad_.resize(grad_num, 0.0);
    rcd_fxs_.resize(num, 0.0);
    fixed_wgts_.resize(num, -1.0);

    rcd_wgts_.reserve(100000);
    for (size_t i = 0; i < fx_n_; i++)
    {
        rcd_wgts_.push_back(wgts_[i]);
    }

    initialized_ = true;
    return;
}

double gctl::grad_norm::AddSingleLoss(double fx, const array<double> &g)
{
    if (fst_iter_[fx_c_])
    {
        fx0_[fx_c_] = fx;
        fst_iter_[fx_c_] = false;
    }
    Lx_[fx_c_] = fx/fx0_[fx_c_];

    double curr_fx = wgts_[fx_c_]*fx;
    multi_fx_ += curr_fx;
    rcd_fxs_[fx_c_] = fx;

    double sum = 0.0;
    for (size_t i = 0; i < g.size(); i++)
    {
        sum += g[i]*g[i];
        grad_[i] += wgts_[fx_c_]*g[i];
    }
    Gw_[fx_c_] = sqrt(wgts_[fx_c_]*wgts_[fx_c_]*sum);
    Gdw_[fx_c_] = sqrt(sum); // wgts_[fx_c_]*sum/Gw_[fx_c_]

    fx_c_++;
    return curr_fx;
}

void gctl::grad_norm::UpdateWeights()
{
    double ac = 0;
    double avg_Lx = 0.0, avg_Gw = 0.0;
    resi_T_ = T_;
    for (size_t i = 0; i < fx_n_; i++)
    {
        if (fixed_wgts_[i] < 0.0)
        {
            avg_Lx += Lx_[i];
            avg_Gw += Gw_[i];
            ac += 1.0;
        }
        else resi_T_ -= fixed_wgts_[i];
    }

    avg_Lx /= ac;
    avg_Gw /= ac;

    double r_i, sum = 0.0;

    // L1 norm approach
    for (size_t i = 0; i < fx_n_; i++)
    {
        if (fixed_wgts_[i] < 0.0)
        {
            r_i = Lx_[i]/avg_Lx;

            if (Gw_[i] >= avg_Gw*pow(r_i, alpha_))
            {
                wgts_[i] -= lamda_*Gdw_[i];
            }
            else wgts_[i] += lamda_*Gdw_[i];

            // make sure the weights are positive
            wgts_[i] = std::max(wgts_[i], 1e-16);

            sum += wgts_[i];
        }
    }

    for (size_t i = 0; i < fx_n_; i++)
    {
        if (fixed_wgts_[i] < 0.0) wgts_[i] *= resi_T_/sum;
        rcd_wgts_.push_back(wgts_[i]);
    }

    return;
}

void gctl::grad_norm::ShowStatistics(std::ostream &ss, bool one_line)
{
    double s, t = 0.0;

    if (one_line)
    {
        ss << "Wgts:";
        for (size_t i = 0; i < fx_n_; i++)
        {
            ss << " " << wgts_[i];
        }

        ss << ", Loss:";
        for (size_t i = 0; i < fx_n_; i++)
        {
            ss << " " << rcd_fxs_[i];
        }

        ss << ", WgtLoss:";
        for (size_t i = 0; i < fx_n_; i++)
        {
            s = wgts_[i]*rcd_fxs_[i];
            ss << " " << s;
            t += s;
        }
        ss << ", Total: " << t << "\n";
        return;
    }

    ss << "----------------------------\n";
    ss << "GradNorm's Progress\n";
    ss << "Tasks' weight: ";
    for (size_t i = 0; i < fx_n_; i++)
    {
        ss << wgts_[i] << " | ";
    }
    ss << "\n";

    ss << "Tasks' loss: ";
    for (size_t i = 0; i < fx_n_; i++)
    {
        ss << rcd_fxs_[i] << " | ";
    }
    ss << "\n";

    ss << "Weighted losses: ";
    for (size_t i = 0; i < fx_n_; i++)
    {
        s = wgts_[i]*rcd_fxs_[i];
        ss << s << " | ";
        t += s;
    }
    ss << t << " (total) |\n";
    ss << "----------------------------\n";
    return;
}

double gctl::grad_norm::GradNormLoss(array<double> &g)
{
    if (fx_c_ != fx_n_)
    {
        throw std::runtime_error("Not all loss functions evaluated. From gctl::grad_norm::GradNormLoss()");
    }

    if (!initialized_)
    {
        throw std::runtime_error("GradNorm is not initialized. From gctl::grad_norm::GradNormLoss()");
    }

    double fx = multi_fx_;
    g = grad_;

    fx_c_ = 0;
    multi_fx_ = 0.0;
    grad_.assign(0.0);
    return fx;
}

void gctl::grad_norm::set_control_weight(double a)
{
    alpha_ = a;
    return;
}

void gctl::grad_norm::set_normal_sum(double t)
{
    T_ = t;
    return;
}

void gctl::grad_norm::set_weight_step(double l)
{
    lamda_ = l;
    return;
}

void gctl::grad_norm::set_fixed_weight(int id, double wgt)
{
    if (id < 0 || id >= fx_n_)
    {
        throw std::runtime_error("Invalid loss function's index. From gctl::grad_norm::set_fixed_weight(...)");
    }

    if (wgt <= 0.0 || wgt >= T_)
    {
        throw std::runtime_error("Invalid fixed weight value. From gctl::grad_norm::set_fixed_weight(...)");
    }

    fixed_wgts_[id] = wgt;
    wgts_[id] = wgt;

    resi_T_ = T_;
    double ac = 0.0;
    for (size_t i = 0; i < fx_n_; i++)
    {
        if (fixed_wgts_[i] > 0.0) resi_T_ -= fixed_wgts_[i];
        else ac += 1.0;
    }

    if (resi_T_ <= 0.0)
    {
        throw std::runtime_error("Invalid tasks' weight detected. From gctl::grad_norm::UpdateWeights()");
    }

    for (size_t i = 0; i < fx_n_; i++)
    {
        if (fixed_wgts_[i] < 0.0) wgts_[i] = resi_T_/ac;
    }

    for (size_t i = 0; i < fx_n_; i++)
    {
        rcd_wgts_[i] = wgts_[i];
    }
    return;
}

void gctl::grad_norm::set_initial_weights(const array<double> &w)
{
    if (w.size() != fx_n_)
    {
        throw std::runtime_error("Invalid input array size. From gctl::grad_norm::set_initial_weights(...)");
    }
    
    double sum = 0.0;
    for (size_t i = 0; i < fx_n_; i++)
    {
        wgts_[i] = w[i];
        sum += wgts_[i];
    }
    
    for (size_t i = 0; i < fx_n_; i++)
    {
        wgts_[i] *= T_/sum;
        rcd_wgts_[i] = wgts_[i];
    }
    return;
}

void gctl::grad_norm::get_records(array<double> &logs)
{
    logs.resize(rcd_wgts_.size());
    for (size_t i = 0; i < rcd_wgts_.size(); i++)
    {
        logs[i] = rcd_wgts_[i];
    }
    return;
}

void gctl::grad_norm::save_records(std::string file)
{
    std::ofstream ofile;
    open_outfile(ofile, file, ".txt");

    ofile << "# 'tw' for 'task weight'\n# ";
    for (size_t j = 0; j < fx_n_; j++)
    {
        ofile << "tw" << std::to_string(j) << " ";
    }
    ofile << "\n";

    for (int i = 0; i < rcd_wgts_.size(); i++)
    {
        ofile << rcd_wgts_[i] << " ";
        if ((i+1)%fx_n_ == 0) ofile << "\n";
    }
    ofile.close();
}
initial upload 2024-09-10 20:04:47 +08:00			`/********************************************************`
			`* ██████╗ ██████╗████████╗██╗`
			`* ██╔════╝ ██╔════╝╚══██╔══╝██║`
			`* ██║ ███╗██║ ██║ ██║`
			`* ██║ ██║██║ ██║ ██║`
			`* ╚██████╔╝╚██████╗ ██║ ███████╗`
			`* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝`
			`* Geophysical Computational Tools & Library (GCTL)`
			`*`
			`* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)`
			`*`
			`* GCTL is distributed under a dual licensing scheme. You can redistribute`
			`* it and/or modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation, either version 2`
			`* of the License, or (at your option) any later version. You should have`
			`* received a copy of the GNU Lesser General Public License along with this`
			`* program. If not, see <http://www.gnu.org/licenses/>.`
			`*`
			`* If the terms and conditions of the LGPL v.2. would prevent you from using`
			`* the GCTL, please consider the option to obtain a commercial license for a`
			`* fee. These licenses are offered by the GCTL's original author. As a rule,`
			`* licenses are provided "as-is", unlimited in time for a one time fee. Please`
			`* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget`
			`* to include some description of your company and the realm of its activities.`
			`* Also add information on how to contact you by electronic and paper mail.`
			`******************************************************/`

			`#include "gradnorm.h"`

			`gctl::grad_norm::grad_norm()`
			`{`
			`fx_c_ = 0;`
			`alpha_ = 1.0;`
			`lamda_ = 0.001;`
			`initialized_ = false;`
			`}`

			`gctl::grad_norm::~grad_norm(){}`

			`void gctl::grad_norm::InitGradNorm(size_t num, size_t grad_num)`
			`{`
			`fx_n_ = num;`
			`T_ = 1.0;`
			`resi_T_ = 0.0;`

			`fst_iter_.resize(num, true);`
			`wgts_.resize(num, 1.0/num);`
			`fx0_.resize(num, 0.0);`
			`Gw_.resize(num, 0.0);`
			`Gdw_.resize(num, 0.0);`
			`Lx_.resize(num, 0.0);`
			`grad_.resize(grad_num, 0.0);`
			`rcd_fxs_.resize(num, 0.0);`
			`fixed_wgts_.resize(num, -1.0);`

			`rcd_wgts_.reserve(100000);`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`rcd_wgts_.push_back(wgts_[i]);`
			`}`

			`initialized_ = true;`
			`return;`
			`}`

			`double gctl::grad_norm::AddSingleLoss(double fx, const array<double> &g)`
			`{`
			`if (fst_iter_[fx_c_])`
			`{`
			`fx0_[fx_c_] = fx;`
			`fst_iter_[fx_c_] = false;`
			`}`
			`Lx_[fx_c_] = fx/fx0_[fx_c_];`

			`double curr_fx = wgts_[fx_c_]*fx;`
			`multi_fx_ += curr_fx;`
			`rcd_fxs_[fx_c_] = fx;`

			`double sum = 0.0;`
			`for (size_t i = 0; i < g.size(); i++)`
			`{`
			`sum += g[i]*g[i];`
			`grad_[i] += wgts_[fx_c_]*g[i];`
			`}`
			`Gw_[fx_c_] = sqrt(wgts_[fx_c_]wgts_[fx_c_]sum);`
			`Gdw_[fx_c_] = sqrt(sum); // wgts_[fx_c_]*sum/Gw_[fx_c_]`

			`fx_c_++;`
			`return curr_fx;`
			`}`

			`void gctl::grad_norm::UpdateWeights()`
			`{`
			`double ac = 0;`
			`double avg_Lx = 0.0, avg_Gw = 0.0;`
			`resi_T_ = T_;`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`if (fixed_wgts_[i] < 0.0)`
			`{`
			`avg_Lx += Lx_[i];`
			`avg_Gw += Gw_[i];`
			`ac += 1.0;`
			`}`
			`else resi_T_ -= fixed_wgts_[i];`
			`}`

			`avg_Lx /= ac;`
			`avg_Gw /= ac;`

			`double r_i, sum = 0.0;`

			`// L1 norm approach`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`if (fixed_wgts_[i] < 0.0)`
			`{`
			`r_i = Lx_[i]/avg_Lx;`

			`if (Gw_[i] >= avg_Gw*pow(r_i, alpha_))`
			`{`
			`wgts_[i] -= lamda_*Gdw_[i];`
			`}`
			`else wgts_[i] += lamda_*Gdw_[i];`

			`// make sure the weights are positive`
			`wgts_[i] = std::max(wgts_[i], 1e-16);`

			`sum += wgts_[i];`
			`}`
			`}`

			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`if (fixed_wgts_[i] < 0.0) wgts_[i] *= resi_T_/sum;`
			`rcd_wgts_.push_back(wgts_[i]);`
			`}`

			`return;`
			`}`

			`void gctl::grad_norm::ShowStatistics(std::ostream &ss, bool one_line)`
			`{`
			`double s, t = 0.0;`

			`if (one_line)`
			`{`
			`ss << "Wgts:";`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`ss << " " << wgts_[i];`
			`}`

			`ss << ", Loss:";`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`ss << " " << rcd_fxs_[i];`
			`}`

			`ss << ", WgtLoss:";`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`s = wgts_[i]*rcd_fxs_[i];`
			`ss << " " << s;`
			`t += s;`
			`}`
			`ss << ", Total: " << t << "\n";`
			`return;`
			`}`

			`ss << "----------------------------\n";`
			`ss << "GradNorm's Progress\n";`
			`ss << "Tasks' weight: ";`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`ss << wgts_[i] << " \| ";`
			`}`
			`ss << "\n";`

			`ss << "Tasks' loss: ";`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`ss << rcd_fxs_[i] << " \| ";`
			`}`
			`ss << "\n";`

			`ss << "Weighted losses: ";`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`s = wgts_[i]*rcd_fxs_[i];`
			`ss << s << " \| ";`
			`t += s;`
			`}`
			`ss << t << " (total) \|\n";`
			`ss << "----------------------------\n";`
			`return;`
			`}`

			`double gctl::grad_norm::GradNormLoss(array<double> &g)`
			`{`
			`if (fx_c_ != fx_n_)`
			`{`
			`throw std::runtime_error("Not all loss functions evaluated. From gctl::grad_norm::GradNormLoss()");`
			`}`

			`if (!initialized_)`
			`{`
			`throw std::runtime_error("GradNorm is not initialized. From gctl::grad_norm::GradNormLoss()");`
			`}`

			`double fx = multi_fx_;`
			`g = grad_;`

			`fx_c_ = 0;`
			`multi_fx_ = 0.0;`
bug fixed 2025-03-03 19:53:18 +08:00			`grad_.assign(0.0);`
initial upload 2024-09-10 20:04:47 +08:00			`return fx;`
			`}`

			`void gctl::grad_norm::set_control_weight(double a)`
			`{`
			`alpha_ = a;`
			`return;`
			`}`

			`void gctl::grad_norm::set_normal_sum(double t)`
			`{`
			`T_ = t;`
			`return;`
			`}`

			`void gctl::grad_norm::set_weight_step(double l)`
			`{`
			`lamda_ = l;`
			`return;`
			`}`

			`void gctl::grad_norm::set_fixed_weight(int id, double wgt)`
			`{`
			`if (id < 0 \|\| id >= fx_n_)`
			`{`
			`throw std::runtime_error("Invalid loss function's index. From gctl::grad_norm::set_fixed_weight(...)");`
			`}`

			`if (wgt <= 0.0 \|\| wgt >= T_)`
			`{`
			`throw std::runtime_error("Invalid fixed weight value. From gctl::grad_norm::set_fixed_weight(...)");`
			`}`

			`fixed_wgts_[id] = wgt;`
			`wgts_[id] = wgt;`

			`resi_T_ = T_;`
			`double ac = 0.0;`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`if (fixed_wgts_[i] > 0.0) resi_T_ -= fixed_wgts_[i];`
			`else ac += 1.0;`
			`}`

			`if (resi_T_ <= 0.0)`
			`{`
			`throw std::runtime_error("Invalid tasks' weight detected. From gctl::grad_norm::UpdateWeights()");`
			`}`

			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`if (fixed_wgts_[i] < 0.0) wgts_[i] = resi_T_/ac;`
			`}`

			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`rcd_wgts_[i] = wgts_[i];`
			`}`
			`return;`
			`}`

			`void gctl::grad_norm::set_initial_weights(const array<double> &w)`
			`{`
			`if (w.size() != fx_n_)`
			`{`
			`throw std::runtime_error("Invalid input array size. From gctl::grad_norm::set_initial_weights(...)");`
			`}`

			`double sum = 0.0;`
			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`wgts_[i] = w[i];`
			`sum += wgts_[i];`
			`}`

			`for (size_t i = 0; i < fx_n_; i++)`
			`{`
			`wgts_[i] *= T_/sum;`
			`rcd_wgts_[i] = wgts_[i];`
			`}`
			`return;`
			`}`

			`void gctl::grad_norm::get_records(array<double> &logs)`
			`{`
			`logs.resize(rcd_wgts_.size());`
			`for (size_t i = 0; i < rcd_wgts_.size(); i++)`
			`{`
			`logs[i] = rcd_wgts_[i];`
			`}`
			`return;`
			`}`

			`void gctl::grad_norm::save_records(std::string file)`
			`{`
			`std::ofstream ofile;`
			`open_outfile(ofile, file, ".txt");`

			`ofile << "# 'tw' for 'task weight'\n# ";`
			`for (size_t j = 0; j < fx_n_; j++)`
			`{`
			`ofile << "tw" << std::to_string(j) << " ";`
			`}`
			`ofile << "\n";`

			`for (int i = 0; i < rcd_wgts_.size(); i++)`
			`{`
			`ofile << rcd_wgts_[i] << " ";`
			`if ((i+1)%fx_n_ == 0) ofile << "\n";`
			`}`
			`ofile.close();`
			`}`