gctl/lib/algorithm/sinkhorn.h

/********************************************************
 *  ██████╗  ██████╗████████╗██╗
 * ██╔════╝ ██╔════╝╚══██╔══╝██║
 * ██║  ███╗██║        ██║   ██║
 * ██║   ██║██║        ██║   ██║
 * ╚██████╔╝╚██████╗   ██║   ███████╗
 *  ╚═════╝  ╚═════╝   ╚═╝   ╚══════╝
 * Geophysical Computational Tools & Library (GCTL)
 *
 * Copyright (c) 2023  Yi Zhang (yizhang-geo@zju.edu.cn)
 *
 * GCTL is distributed under a dual licensing scheme. You can redistribute 
 * it and/or modify it under the terms of the GNU Lesser General Public 
 * License as published by the Free Software Foundation, either version 2 
 * of the License, or (at your option) any later version. You should have 
 * received a copy of the GNU Lesser General Public License along with this 
 * program. If not, see <http://www.gnu.org/licenses/>.
 * 
 * If the terms and conditions of the LGPL v.2. would prevent you from using 
 * the GCTL, please consider the option to obtain a commercial license for a 
 * fee. These licenses are offered by the GCTL's original author. As a rule, 
 * licenses are provided "as-is", unlimited in time for a one time fee. Please 
 * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget 
 * to include some description of your company and the realm of its activities. 
 * Also add information on how to contact you by electronic and paper mail.
 ******************************************************/

#ifndef _GCTL_SINKHORN_H
#define _GCTL_SINKHORN_H

#include "../core/array.h"
#include "algorithm_func.h"
#include "interpolate.h"

namespace gctl
{
    /**
     * @brief Sinkhorn 算法计算两个一维分布之间的最优传输计划
     * 
     */
    class sinkhorn1d
    {
    public:
        sinkhorn1d();
        sinkhorn1d(const array<double> &tar, double tmin, double tmax, double eta = 10, double eps = 1e-10, norm_type_e nt = L2);
        virtual ~sinkhorn1d();

        void init(const array<double> &tar, double tmin, double tmax, double eta = 10, double eps = 1e-10, norm_type_e nt = L2);
        void make_plan_from(const array<double> &inp, double imin, double imax, bool verbose = false);
        
        double get_distance();
        double get_distance(array<double> &grad);
        void sampling_to_target(array<double> &in_out);

        matrix<double> &get_plan();
        void save_plan(std::string filename);

    private:
        double L1_distance(double x, double y);
        double L2_distance(double x, double y);
    
    private:
        norm_type_e nt_; // 传输代价的测度标准
        double eta_, eps_; // Sinkhorn算法的正则化参数 Sinkhorn算法的迭代终止精度 这里我们使用均方根误差计算迭代精度
        int xnum_, ynum_; // x与y分布的数量
        double xmin_, dx_, xmax_, ymin_, dy_, ymax_; // x和y分布的参数
        array<double> px_; // 待转换概率分布
        array<double> px_grad_; // px分布相对于x的导数
        array<double> px_maxi_; // P_中每一列的最大值
        array<double> py_; // 目标概率分布
        array<double> u_, v_; // 迭代向量
        matrix<double> K_; // 转化核矩阵
        matrix<double> P_; // 转换矩阵
    };

    /**
     * @brief Sinkhorn 算法计算两个二维分布之间的最优传输计划
     * 
     */
    class sinkhorn2d
    {
    public:
        sinkhorn2d();
        sinkhorn2d(const matrix<double> &tar, double xmin, double xmax, double ymin, double ymax, 
            double eta = 10, double eps = 1e-10, norm_type_e nt = L2);
        virtual ~sinkhorn2d();

        void init(const matrix<double> &tar, double xmin, double xmax, double ymin, double ymax, 
            double eta = 10, double eps = 1e-10, norm_type_e nt = L2);
        void make_plan_from(const matrix<double> &inp, double xmin, double xmax, double ymin, double ymax, 
            bool verbose = false);
        
        double get_distance();
        void sampling_to_target(array<double> &inx, array<double> &iny);

        matrix<double> &get_plan();
        void save_plan(std::string filename, int idx = -1, int idy = -1);

    private:
        double L1_distance(double x, double y, double x2, double y2);
        double L2_distance(double x, double y, double x2, double y2);
    
    private:
        norm_type_e nt_; // 传输代价的测度标准
        double eta_, eps_; // Sinkhorn算法的正则化参数 Sinkhorn算法的迭代终止精度 这里我们使用均方根误差计算迭代精度
        int t_xnum_, t_ynum_, i_xnum_, i_ynum_, px_num_, py_num_; // x与y分布的数量
        double t_xmin_, t_dx_, t_xmax_, t_ymin_, t_dy_, t_ymax_; // x和y分布的参数
        double i_xmin_, i_dx_, i_xmax_, i_ymin_, i_dy_, i_ymax_;
        array<double> px_; // 待转换概率分布
        array<double> py_; // 目标概率分布
        array<double> u_, v_; // 迭代向量
        matrix<double> K_; // 转化核矩阵
        matrix<double> P_; // 转换矩阵
        matrix<double> RP_; // 整理后的转换矩阵
        matrix<double> rp_maxi_; // RP_中每一快的最大值
    };
}

#endif // _GCTL_SINKHORN_H
initial upload 2024-09-10 15:45:07 +08:00			`/********************************************************`
			`* ██████╗ ██████╗████████╗██╗`
			`* ██╔════╝ ██╔════╝╚══██╔══╝██║`
			`* ██║ ███╗██║ ██║ ██║`
			`* ██║ ██║██║ ██║ ██║`
			`* ╚██████╔╝╚██████╗ ██║ ███████╗`
			`* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝`
			`* Geophysical Computational Tools & Library (GCTL)`
			`*`
			`* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)`
			`*`
			`* GCTL is distributed under a dual licensing scheme. You can redistribute`
			`* it and/or modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation, either version 2`
			`* of the License, or (at your option) any later version. You should have`
			`* received a copy of the GNU Lesser General Public License along with this`
			`* program. If not, see <http://www.gnu.org/licenses/>.`
			`*`
			`* If the terms and conditions of the LGPL v.2. would prevent you from using`
			`* the GCTL, please consider the option to obtain a commercial license for a`
			`* fee. These licenses are offered by the GCTL's original author. As a rule,`
			`* licenses are provided "as-is", unlimited in time for a one time fee. Please`
			`* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget`
			`* to include some description of your company and the realm of its activities.`
			`* Also add information on how to contact you by electronic and paper mail.`
			`******************************************************/`

			`#ifndef _GCTL_SINKHORN_H`
			`#define _GCTL_SINKHORN_H`

			`#include "../core/array.h"`
			`#include "algorithm_func.h"`
			`#include "interpolate.h"`

			`namespace gctl`
			`{`
			`/**`
			`* @brief Sinkhorn 算法计算两个一维分布之间的最优传输计划`
			`*`
			`*/`
			`class sinkhorn1d`
			`{`
			`public:`
			`sinkhorn1d();`
			`sinkhorn1d(const array<double> &tar, double tmin, double tmax, double eta = 10, double eps = 1e-10, norm_type_e nt = L2);`
			`virtual ~sinkhorn1d();`

			`void init(const array<double> &tar, double tmin, double tmax, double eta = 10, double eps = 1e-10, norm_type_e nt = L2);`
			`void make_plan_from(const array<double> &inp, double imin, double imax, bool verbose = false);`

			`double get_distance();`
			`double get_distance(array<double> &grad);`
			`void sampling_to_target(array<double> &in_out);`

			`matrix<double> &get_plan();`
			`void save_plan(std::string filename);`

			`private:`
			`double L1_distance(double x, double y);`
			`double L2_distance(double x, double y);`

			`private:`
			`norm_type_e nt_; // 传输代价的测度标准`
			`double eta_, eps_; // Sinkhorn算法的正则化参数 Sinkhorn算法的迭代终止精度这里我们使用均方根误差计算迭代精度`
			`int xnum_, ynum_; // x与y分布的数量`
			`double xmin_, dx_, xmax_, ymin_, dy_, ymax_; // x和y分布的参数`
			`array<double> px_; // 待转换概率分布`
			`array<double> px_grad_; // px分布相对于x的导数`
			`array<double> px_maxi_; // P_中每一列的最大值`
			`array<double> py_; // 目标概率分布`
			`array<double> u_, v_; // 迭代向量`
			`matrix<double> K_; // 转化核矩阵`
			`matrix<double> P_; // 转换矩阵`
			`};`

			`/**`
			`* @brief Sinkhorn 算法计算两个二维分布之间的最优传输计划`
			`*`
			`*/`
			`class sinkhorn2d`
			`{`
			`public:`
			`sinkhorn2d();`
			`sinkhorn2d(const matrix<double> &tar, double xmin, double xmax, double ymin, double ymax,`
			`double eta = 10, double eps = 1e-10, norm_type_e nt = L2);`
			`virtual ~sinkhorn2d();`

			`void init(const matrix<double> &tar, double xmin, double xmax, double ymin, double ymax,`
			`double eta = 10, double eps = 1e-10, norm_type_e nt = L2);`
			`void make_plan_from(const matrix<double> &inp, double xmin, double xmax, double ymin, double ymax,`
			`bool verbose = false);`

			`double get_distance();`
			`void sampling_to_target(array<double> &inx, array<double> &iny);`

			`matrix<double> &get_plan();`
			`void save_plan(std::string filename, int idx = -1, int idy = -1);`

			`private:`
			`double L1_distance(double x, double y, double x2, double y2);`
			`double L2_distance(double x, double y, double x2, double y2);`

			`private:`
			`norm_type_e nt_; // 传输代价的测度标准`
			`double eta_, eps_; // Sinkhorn算法的正则化参数 Sinkhorn算法的迭代终止精度这里我们使用均方根误差计算迭代精度`
			`int t_xnum_, t_ynum_, i_xnum_, i_ynum_, px_num_, py_num_; // x与y分布的数量`
			`double t_xmin_, t_dx_, t_xmax_, t_ymin_, t_dy_, t_ymax_; // x和y分布的参数`
			`double i_xmin_, i_dx_, i_xmax_, i_ymin_, i_dy_, i_ymax_;`
			`array<double> px_; // 待转换概率分布`
			`array<double> py_; // 目标概率分布`
			`array<double> u_, v_; // 迭代向量`
			`matrix<double> K_; // 转化核矩阵`
			`matrix<double> P_; // 转换矩阵`
			`matrix<double> RP_; // 整理后的转换矩阵`
			`matrix<double> rp_maxi_; // RP_中每一快的最大值`
			`};`
			`}`

			`#endif // _GCTL_SINKHORN_H`