406 lines
12 KiB
C++
406 lines
12 KiB
C++
/********************************************************
|
|
* ██████╗ ██████╗████████╗██╗
|
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
|
* ██║ ███╗██║ ██║ ██║
|
|
* ██║ ██║██║ ██║ ██║
|
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
|
* Geophysical Computational Tools & Library (GCTL)
|
|
*
|
|
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
|
|
*
|
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation, either version 2
|
|
* of the License, or (at your option) any later version. You should have
|
|
* received a copy of the GNU Lesser General Public License along with this
|
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
|
* the GCTL, please consider the option to obtain a commercial license for a
|
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
|
* to include some description of your company and the realm of its activities.
|
|
* Also add information on how to contact you by electronic and paper mail.
|
|
******************************************************/
|
|
|
|
#include "kde.h"
|
|
|
|
gctl::kde::kde(){}
|
|
|
|
gctl::kde::~kde(){}
|
|
|
|
gctl::kde::kde(double h, const array<double> &x)
|
|
{
|
|
init(h, x);
|
|
}
|
|
|
|
void gctl::kde::init(double h, const array<double> &x)
|
|
{
|
|
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
|
|
if (x.size() < 2) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
|
|
|
|
h_ = h;
|
|
x_ = x;
|
|
return;
|
|
}
|
|
|
|
double gctl::kde::get_density_at(double x, kde_kernel_e k_type)
|
|
{
|
|
double out = 0;
|
|
if (k_type == KDE_Gaussian)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += gaussian_kernel((x - x_[i])/h_);
|
|
}
|
|
}
|
|
else if (k_type == KDE_Epanechnikov)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += epanechnikov_kernel((x - x_[i])/h_);
|
|
}
|
|
}
|
|
else if (k_type == KDE_Rectangular)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += rectangular_kernel((x - x_[i])/h_);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += triangular_kernel((x - x_[i])/h_);
|
|
}
|
|
}
|
|
return out/(h_*x_.size());
|
|
}
|
|
|
|
double gctl::kde::get_kernel_density_at(size_t k_id, double x, kde_kernel_e k_type)
|
|
{
|
|
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde::get_kernel_density_at(...)] Invalid kernel index.");
|
|
|
|
double out;
|
|
if (k_type == KDE_Gaussian) out = gaussian_kernel((x - x_[k_id])/h_);
|
|
else if (k_type == KDE_Epanechnikov) out = epanechnikov_kernel((x - x_[k_id])/h_);
|
|
else if (k_type == KDE_Rectangular) out = rectangular_kernel((x - x_[k_id])/h_);
|
|
else out = triangular_kernel((x - x_[k_id])/h_);
|
|
|
|
return out/h_;
|
|
}
|
|
|
|
double gctl::kde::get_gradient_at(double x, kde_kernel_e k_type)
|
|
{
|
|
double out = 0;
|
|
if (k_type == KDE_Gaussian)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += ((x - x_[i])/h_)*gaussian_kernel((x - x_[i])/h_);
|
|
}
|
|
}
|
|
else if (k_type == KDE_Epanechnikov)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += epanechnikov_kernel((x - x_[i])/h_, true);
|
|
}
|
|
}
|
|
else if (k_type == KDE_Rectangular)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += rectangular_kernel((x - x_[i])/h_, true);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += triangular_kernel((x - x_[i])/h_, true);
|
|
}
|
|
}
|
|
return -1.0*out/(h_*h_*x_.size());
|
|
}
|
|
|
|
double gctl::kde::get_kernel_gradient_at(size_t k_id, double x, kde_kernel_e k_type)
|
|
{
|
|
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde::get_kernel_gradient_at(...)] Invalid kernel index.");
|
|
|
|
double out;
|
|
if (k_type == KDE_Gaussian) out = ((x - x_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_);
|
|
else if (k_type == KDE_Epanechnikov) out = epanechnikov_kernel((x - x_[k_id])/h_);
|
|
else if (k_type == KDE_Rectangular) out = rectangular_kernel((x - x_[k_id])/h_);
|
|
else out = triangular_kernel((x - x_[k_id])/h_);
|
|
|
|
return -1.0*out/(h_*h_);
|
|
}
|
|
|
|
void gctl::kde::get_distribution(const array<double> x, array<double> &d,
|
|
array<double> &dx, kde_kernel_e k_type, kde_norm_e n_type, double norm)
|
|
{
|
|
if (norm < 0.0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid normalization value.");
|
|
|
|
size_t xnum = x.size();
|
|
d.resize(xnum);
|
|
dx.resize(xnum);
|
|
|
|
double s = 0.0;
|
|
if (n_type == KDE_MAX2ONE)
|
|
{
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] = get_density_at(x[i], k_type);
|
|
dx[i]= get_gradient_at(x[i], k_type);
|
|
|
|
s = std::max(s, d[i]);
|
|
}
|
|
}
|
|
else if (n_type == KDE_SUM2ONE)
|
|
{
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] = get_density_at(x[i], k_type);
|
|
dx[i]= get_gradient_at(x[i], k_type);
|
|
|
|
s += d[i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] = get_density_at(x[i], k_type);
|
|
dx[i]= get_gradient_at(x[i], k_type);
|
|
}
|
|
|
|
s = norm;
|
|
}
|
|
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] /= s;
|
|
dx[i]/= s;
|
|
}
|
|
return;
|
|
}
|
|
|
|
double gctl::kde::gaussian_kernel(double x)
|
|
{
|
|
return exp(-0.5*x*x)/sqrt(2*M_PI);
|
|
}
|
|
|
|
double gctl::kde::epanechnikov_kernel(double x, bool gradient)
|
|
{
|
|
if (gradient)
|
|
{
|
|
if (fabs(x) >= 1) return 0;
|
|
else return 1.5*x;
|
|
}
|
|
|
|
if (fabs(x) >= 1) return 0;
|
|
else return 0.75*(1 - x*x);
|
|
}
|
|
|
|
double gctl::kde::rectangular_kernel(double x, bool gradient)
|
|
{
|
|
if (gradient) return 0;
|
|
|
|
if (fabs(x) >= 1) return 0;
|
|
else return 0.5;
|
|
}
|
|
|
|
double gctl::kde::triangular_kernel(double x, bool gradient)
|
|
{
|
|
if (gradient)
|
|
{
|
|
if (fabs(x) >= 1) return 0;
|
|
else if (x >= 0) return 1.0;
|
|
else return -1.0;
|
|
}
|
|
|
|
if (fabs(x) >= 1) return 0;
|
|
else return (1 - fabs(x));
|
|
}
|
|
|
|
gctl::kde2d::kde2d(){}
|
|
|
|
gctl::kde2d::~kde2d(){}
|
|
|
|
gctl::kde2d::kde2d(double h, const array<double> &x, const array<double> &y)
|
|
{
|
|
init(h, x, y);
|
|
}
|
|
|
|
gctl::kde2d::kde2d(double h, const std::vector<double> &x, const std::vector<double> &y)
|
|
{
|
|
init(h, x, y);
|
|
}
|
|
|
|
void gctl::kde2d::init(double h, const array<double> &x, const array<double> &y)
|
|
{
|
|
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
|
|
if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
|
|
|
|
h_ = h;
|
|
x_ = x;
|
|
y_ = y;
|
|
return;
|
|
}
|
|
|
|
void gctl::kde2d::init(double h, const std::vector<double> &x, const std::vector<double> &y)
|
|
{
|
|
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
|
|
if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
|
|
|
|
h_ = h;
|
|
x_.import_vector(x);
|
|
y_.import_vector(y);
|
|
return;
|
|
}
|
|
|
|
double gctl::kde2d::get_density_at(double x, double y, kde_kernel_e k_type)
|
|
{
|
|
double out = 0;
|
|
if (k_type == KDE_Gaussian)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out += gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
|
|
}
|
|
}
|
|
else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type.");
|
|
return out/(h_*h_*x_.size());
|
|
}
|
|
|
|
double gctl::kde2d::get_kernel_density_at(size_t k_id, double x, double y, kde_kernel_e k_type)
|
|
{
|
|
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel index.");
|
|
|
|
double out;
|
|
if (k_type == KDE_Gaussian) out = gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
|
|
else throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel type.");
|
|
|
|
return out/(h_*h_);
|
|
}
|
|
|
|
void gctl::kde2d::get_gradient_at(double x, double y, double &gx, double &gy, kde_kernel_e k_type)
|
|
{
|
|
double out_x = 0.0, out_y = 0.0;
|
|
if (k_type == KDE_Gaussian)
|
|
{
|
|
for (size_t i = 0; i < x_.size(); i++)
|
|
{
|
|
out_x += ((x - x_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
|
|
out_y += ((y - y_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
|
|
}
|
|
}
|
|
else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type.");
|
|
|
|
gx = -1.0*out_x/(h_*h_*h_*x_.size());
|
|
gy = -1.0*out_y/(h_*h_*h_*x_.size());
|
|
return;
|
|
}
|
|
|
|
void gctl::kde2d::get_kernel_gradient_at(size_t k_id, double x, double y, double &gx, double &gy, kde_kernel_e k_type)
|
|
{
|
|
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel index.");
|
|
|
|
double out_x, out_y;
|
|
if (k_type == KDE_Gaussian)
|
|
{
|
|
out_x = ((x - x_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
|
|
out_y = ((y - y_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
|
|
}
|
|
else throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel type.");
|
|
|
|
gx = -1.0*out_x/(h_*h_*h_);
|
|
gy = -1.0*out_y/(h_*h_*h_);
|
|
return;
|
|
}
|
|
|
|
void gctl::kde2d::get_distribution(const array<double> x, const array<double> y, array<double> &d,
|
|
array<double> &dx, array<double> &dy, kde_kernel_e k_type, kde_norm_e n_type, double norm)
|
|
{
|
|
if (x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid distribution size.");
|
|
if (norm < 0.0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid normalization value.");
|
|
|
|
size_t xnum = x.size();
|
|
d.resize(xnum);
|
|
dx.resize(xnum);
|
|
dy.resize(xnum);
|
|
|
|
double s = 0.0;
|
|
if (n_type == KDE_MAX2ONE)
|
|
{
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] = get_density_at(x[i], y[i], k_type);
|
|
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
|
|
|
|
s = std::max(s, d[i]);
|
|
}
|
|
}
|
|
else if (n_type == KDE_SUM2ONE)
|
|
{
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] = get_density_at(x[i], y[i], k_type);
|
|
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
|
|
|
|
s += d[i];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] = get_density_at(x[i], y[i], k_type);
|
|
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
|
|
}
|
|
|
|
s = norm;
|
|
}
|
|
|
|
for (size_t i = 0; i < xnum; i++)
|
|
{
|
|
d[i] /= s;
|
|
dx[i]/= s;
|
|
dy[i]/= s;
|
|
}
|
|
return;
|
|
}
|
|
|
|
void gctl::kde2d::save(double xmin, double xmax, double ymin, double ymax, int xnum, int ynum, std::string file)
|
|
{
|
|
std::string suffix_str = file.substr(file.find_last_of('.') + 1);
|
|
if (suffix_str != "nc")
|
|
{
|
|
throw std::runtime_error("[gctl::kde2d::save(...)] Invalid file extension type.");
|
|
}
|
|
|
|
array<double> dist(xnum*ynum, 0.0);
|
|
double dx = (xmax - xmin)/(xnum - 1);
|
|
double dy = (ymax - ymin)/(ynum - 1);
|
|
|
|
for (size_t i = 0; i < ynum; i++)
|
|
{
|
|
for (size_t j = 0; j < xnum; j++)
|
|
{
|
|
dist[j + i*xnum] = get_density_at(xmin + dx*j, ymin + dy*i);
|
|
}
|
|
}
|
|
|
|
if (suffix_str == "nc") save_netcdf_grid(file, dist, xnum, ynum, xmin, dx, ymin, dy, "x", "y", "probability density");
|
|
|
|
return;
|
|
}
|
|
|
|
double gctl::kde2d::gaussian_kernel(double x, double y)
|
|
{
|
|
return exp(-0.5*(x*x + y*y))/(2*M_PI);
|
|
} |