/********************************************************
* ██████╗ ██████╗████████╗██╗
* ██╔════╝ ██╔════╝╚══██╔══╝██║
* ██║ ███╗██║ ██║ ██║
* ██║ ██║██║ ██║ ██║
* ╚██████╔╝╚██████╗ ██║ ███████╗
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see .
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "kde.h"
gctl::kde::kde(){}
gctl::kde::~kde(){}
gctl::kde::kde(double h, const array &x)
{
init(h, x);
}
void gctl::kde::init(double h, const array &x)
{
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
if (x.size() < 2) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
h_ = h;
x_ = x;
return;
}
double gctl::kde::get_density_at(double x, kde_kernel_e k_type)
{
double out = 0;
if (k_type == KDE_Gaussian)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += gaussian_kernel((x - x_[i])/h_);
}
}
else if (k_type == KDE_Epanechnikov)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += epanechnikov_kernel((x - x_[i])/h_);
}
}
else if (k_type == KDE_Rectangular)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += rectangular_kernel((x - x_[i])/h_);
}
}
else
{
for (size_t i = 0; i < x_.size(); i++)
{
out += triangular_kernel((x - x_[i])/h_);
}
}
return out/(h_*x_.size());
}
double gctl::kde::get_kernel_density_at(size_t k_id, double x, kde_kernel_e k_type)
{
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde::get_kernel_density_at(...)] Invalid kernel index.");
double out;
if (k_type == KDE_Gaussian) out = gaussian_kernel((x - x_[k_id])/h_);
else if (k_type == KDE_Epanechnikov) out = epanechnikov_kernel((x - x_[k_id])/h_);
else if (k_type == KDE_Rectangular) out = rectangular_kernel((x - x_[k_id])/h_);
else out = triangular_kernel((x - x_[k_id])/h_);
return out/h_;
}
double gctl::kde::get_gradient_at(double x, kde_kernel_e k_type)
{
double out = 0;
if (k_type == KDE_Gaussian)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += ((x - x_[i])/h_)*gaussian_kernel((x - x_[i])/h_);
}
}
else if (k_type == KDE_Epanechnikov)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += epanechnikov_kernel((x - x_[i])/h_, true);
}
}
else if (k_type == KDE_Rectangular)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += rectangular_kernel((x - x_[i])/h_, true);
}
}
else
{
for (size_t i = 0; i < x_.size(); i++)
{
out += triangular_kernel((x - x_[i])/h_, true);
}
}
return -1.0*out/(h_*h_*x_.size());
}
double gctl::kde::get_kernel_gradient_at(size_t k_id, double x, kde_kernel_e k_type)
{
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde::get_kernel_gradient_at(...)] Invalid kernel index.");
double out;
if (k_type == KDE_Gaussian) out = ((x - x_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_);
else if (k_type == KDE_Epanechnikov) out = epanechnikov_kernel((x - x_[k_id])/h_);
else if (k_type == KDE_Rectangular) out = rectangular_kernel((x - x_[k_id])/h_);
else out = triangular_kernel((x - x_[k_id])/h_);
return -1.0*out/(h_*h_);
}
void gctl::kde::get_distribution(const array x, array &d,
array &dx, kde_kernel_e k_type, kde_norm_e n_type, double norm)
{
if (norm < 0.0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid normalization value.");
size_t xnum = x.size();
d.resize(xnum);
dx.resize(xnum);
double s = 0.0;
if (n_type == KDE_MAX2ONE)
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], k_type);
dx[i]= get_gradient_at(x[i], k_type);
s = std::max(s, d[i]);
}
}
else if (n_type == KDE_SUM2ONE)
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], k_type);
dx[i]= get_gradient_at(x[i], k_type);
s += d[i];
}
}
else
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], k_type);
dx[i]= get_gradient_at(x[i], k_type);
}
s = norm;
}
for (size_t i = 0; i < xnum; i++)
{
d[i] /= s;
dx[i]/= s;
}
return;
}
double gctl::kde::gaussian_kernel(double x)
{
return exp(-0.5*x*x)/sqrt(2*M_PI);
}
double gctl::kde::epanechnikov_kernel(double x, bool gradient)
{
if (gradient)
{
if (fabs(x) >= 1) return 0;
else return 1.5*x;
}
if (fabs(x) >= 1) return 0;
else return 0.75*(1 - x*x);
}
double gctl::kde::rectangular_kernel(double x, bool gradient)
{
if (gradient) return 0;
if (fabs(x) >= 1) return 0;
else return 0.5;
}
double gctl::kde::triangular_kernel(double x, bool gradient)
{
if (gradient)
{
if (fabs(x) >= 1) return 0;
else if (x >= 0) return 1.0;
else return -1.0;
}
if (fabs(x) >= 1) return 0;
else return (1 - fabs(x));
}
gctl::kde2d::kde2d(){}
gctl::kde2d::~kde2d(){}
gctl::kde2d::kde2d(double h, const array &x, const array &y)
{
init(h, x, y);
}
gctl::kde2d::kde2d(double h, const std::vector &x, const std::vector &y)
{
init(h, x, y);
}
void gctl::kde2d::init(double h, const array &x, const array &y)
{
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
h_ = h;
x_ = x;
y_ = y;
return;
}
void gctl::kde2d::init(double h, const std::vector &x, const std::vector &y)
{
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
h_ = h;
x_.import_vector(x);
y_.import_vector(y);
return;
}
double gctl::kde2d::get_density_at(double x, double y, kde_kernel_e k_type)
{
double out = 0;
if (k_type == KDE_Gaussian)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
}
}
else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type.");
return out/(h_*h_*x_.size());
}
double gctl::kde2d::get_kernel_density_at(size_t k_id, double x, double y, kde_kernel_e k_type)
{
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel index.");
double out;
if (k_type == KDE_Gaussian) out = gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
else throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel type.");
return out/(h_*h_);
}
void gctl::kde2d::get_gradient_at(double x, double y, double &gx, double &gy, kde_kernel_e k_type)
{
double out_x = 0.0, out_y = 0.0;
if (k_type == KDE_Gaussian)
{
for (size_t i = 0; i < x_.size(); i++)
{
out_x += ((x - x_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
out_y += ((y - y_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
}
}
else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type.");
gx = -1.0*out_x/(h_*h_*h_*x_.size());
gy = -1.0*out_y/(h_*h_*h_*x_.size());
return;
}
void gctl::kde2d::get_kernel_gradient_at(size_t k_id, double x, double y, double &gx, double &gy, kde_kernel_e k_type)
{
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel index.");
double out_x, out_y;
if (k_type == KDE_Gaussian)
{
out_x = ((x - x_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
out_y = ((y - y_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
}
else throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel type.");
gx = -1.0*out_x/(h_*h_*h_);
gy = -1.0*out_y/(h_*h_*h_);
return;
}
void gctl::kde2d::get_distribution(const array x, const array y, array &d,
array &dx, array &dy, kde_kernel_e k_type, kde_norm_e n_type, double norm)
{
if (x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid distribution size.");
if (norm < 0.0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid normalization value.");
size_t xnum = x.size();
d.resize(xnum);
dx.resize(xnum);
dy.resize(xnum);
double s = 0.0;
if (n_type == KDE_MAX2ONE)
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], y[i], k_type);
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
s = std::max(s, d[i]);
}
}
else if (n_type == KDE_SUM2ONE)
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], y[i], k_type);
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
s += d[i];
}
}
else
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], y[i], k_type);
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
}
s = norm;
}
for (size_t i = 0; i < xnum; i++)
{
d[i] /= s;
dx[i]/= s;
dy[i]/= s;
}
return;
}
void gctl::kde2d::save(double xmin, double xmax, double ymin, double ymax, int xnum, int ynum, std::string file)
{
std::string suffix_str = file.substr(file.find_last_of('.') + 1);
if (suffix_str != "nc")
{
throw std::runtime_error("[gctl::kde2d::save(...)] Invalid file extension type.");
}
array dist(xnum*ynum, 0.0);
double dx = (xmax - xmin)/(xnum - 1);
double dy = (ymax - ymin)/(ynum - 1);
for (size_t i = 0; i < ynum; i++)
{
for (size_t j = 0; j < xnum; j++)
{
dist[j + i*xnum] = get_density_at(xmin + dx*j, ymin + dy*i);
}
}
if (suffix_str == "nc") save_netcdf_grid(file, dist, xnum, ynum, xmin, dx, ymin, dy, "x", "y", "probability density");
return;
}
double gctl::kde2d::gaussian_kernel(double x, double y)
{
return exp(-0.5*(x*x + y*y))/(2*M_PI);
}