gctl/lib/algorithm/kde.cpp

363 lines
10 KiB
C++
Raw Normal View History

2024-09-10 15:45:07 +08:00
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "kde.h"
gctl::kde::kde(){}
gctl::kde::~kde(){}
gctl::kde::kde(double h, const array<double> &x)
{
init(h, x);
}
void gctl::kde::init(double h, const array<double> &x)
{
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
if (x.size() < 2) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
h_ = h;
x_ = x;
2024-09-30 14:15:16 +08:00
xs_ = x.size();
2024-09-10 15:45:07 +08:00
return;
}
2024-09-30 14:15:16 +08:00
void gctl::kde::get_distribution(const array<double> &m, array<double> &d,
kde_kernel_e k_type)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
double out;
int ms = m.size();
d.resize(xs_);
2024-09-10 15:45:07 +08:00
if (k_type == KDE_Gaussian)
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
out = 0;
for (size_t j = 0; j < ms; j++)
{
out += gaussian_kernel((x_[i] - m[j])/h_);
}
d[i] = out/(h_*ms);
2024-09-10 15:45:07 +08:00
}
}
else if (k_type == KDE_Epanechnikov)
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
out = 0;
for (size_t j = 0; j < ms; j++)
{
out += epanechnikov_kernel((x_[i] - m[j])/h_);
}
d[i] = out/(h_*ms);
2024-09-10 15:45:07 +08:00
}
}
else if (k_type == KDE_Rectangular)
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
out = 0;
for (size_t j = 0; j < ms; j++)
{
out += rectangular_kernel((x_[i] - m[j])/h_);
}
d[i] = out/(h_*ms);
2024-09-10 15:45:07 +08:00
}
}
else
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
out = 0;
for (size_t j = 0; j < ms; j++)
{
out += triangular_kernel((x_[i] - m[j])/h_);
}
d[i] = out/(h_*ms);
2024-09-10 15:45:07 +08:00
}
}
2024-09-30 14:15:16 +08:00
return;
2024-09-10 15:45:07 +08:00
}
2024-09-30 14:15:16 +08:00
void gctl::kde::get_gradient_at(size_t m_id, const array<double> &m,
array<double> &dm, kde_kernel_e k_type)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
dm.resize(xs_);
int ms = m.size();
2024-09-10 15:45:07 +08:00
if (k_type == KDE_Gaussian)
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
dm[i] = ((x_[i] - m[m_id])/h_)*gaussian_kernel((x_[i] - m[m_id])/h_)/(h_*h_*ms);
2024-09-10 15:45:07 +08:00
}
}
else if (k_type == KDE_Epanechnikov)
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
dm[i] = -1.0*epanechnikov_kernel((x_[i] - m[m_id])/h_, true)/(h_*h_*ms);
2024-09-10 15:45:07 +08:00
}
}
else if (k_type == KDE_Rectangular)
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
dm[i] = -1.0*rectangular_kernel((x_[i] - m[m_id])/h_, true)/(h_*h_*ms);
2024-09-10 15:45:07 +08:00
}
}
else
{
2024-09-30 14:15:16 +08:00
for (size_t i = 0; i < xs_; i++)
2024-09-10 15:45:07 +08:00
{
2024-09-30 14:15:16 +08:00
dm[i] = -1.0*triangular_kernel((x_[i] - m[m_id])/h_, true)/(h_*h_*ms);
2024-09-10 15:45:07 +08:00
}
}
return;
}
double gctl::kde::gaussian_kernel(double x)
{
return exp(-0.5*x*x)/sqrt(2*M_PI);
}
double gctl::kde::epanechnikov_kernel(double x, bool gradient)
{
if (gradient)
{
if (fabs(x) >= 1) return 0;
2024-09-30 14:15:16 +08:00
else return -1.5*x;
2024-09-10 15:45:07 +08:00
}
if (fabs(x) >= 1) return 0;
else return 0.75*(1 - x*x);
}
double gctl::kde::rectangular_kernel(double x, bool gradient)
{
if (gradient) return 0;
if (fabs(x) >= 1) return 0;
else return 0.5;
}
double gctl::kde::triangular_kernel(double x, bool gradient)
{
if (gradient)
{
if (fabs(x) >= 1) return 0;
2024-09-30 14:15:16 +08:00
else if (x >= 0) return -1.0;
else return 1.0;
2024-09-10 15:45:07 +08:00
}
if (fabs(x) >= 1) return 0;
else return (1 - fabs(x));
}
gctl::kde2d::kde2d(){}
gctl::kde2d::~kde2d(){}
gctl::kde2d::kde2d(double h, const array<double> &x, const array<double> &y)
{
init(h, x, y);
}
gctl::kde2d::kde2d(double h, const std::vector<double> &x, const std::vector<double> &y)
{
init(h, x, y);
}
void gctl::kde2d::init(double h, const array<double> &x, const array<double> &y)
{
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
h_ = h;
x_ = x;
y_ = y;
return;
}
void gctl::kde2d::init(double h, const std::vector<double> &x, const std::vector<double> &y)
{
if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width.");
if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size.");
h_ = h;
x_.import_vector(x);
y_.import_vector(y);
return;
}
double gctl::kde2d::get_density_at(double x, double y, kde_kernel_e k_type)
{
double out = 0;
if (k_type == KDE_Gaussian)
{
for (size_t i = 0; i < x_.size(); i++)
{
out += gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
}
}
else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type.");
return out/(h_*h_*x_.size());
}
double gctl::kde2d::get_kernel_density_at(size_t k_id, double x, double y, kde_kernel_e k_type)
{
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel index.");
double out;
if (k_type == KDE_Gaussian) out = gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
else throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel type.");
return out/(h_*h_);
}
void gctl::kde2d::get_gradient_at(double x, double y, double &gx, double &gy, kde_kernel_e k_type)
{
double out_x = 0.0, out_y = 0.0;
if (k_type == KDE_Gaussian)
{
for (size_t i = 0; i < x_.size(); i++)
{
out_x += ((x - x_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
out_y += ((y - y_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_);
}
}
else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type.");
gx = -1.0*out_x/(h_*h_*h_*x_.size());
gy = -1.0*out_y/(h_*h_*h_*x_.size());
return;
}
void gctl::kde2d::get_kernel_gradient_at(size_t k_id, double x, double y, double &gx, double &gy, kde_kernel_e k_type)
{
if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel index.");
double out_x, out_y;
if (k_type == KDE_Gaussian)
{
out_x = ((x - x_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
out_y = ((y - y_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_);
}
else throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel type.");
gx = -1.0*out_x/(h_*h_*h_);
gy = -1.0*out_y/(h_*h_*h_);
return;
}
void gctl::kde2d::get_distribution(const array<double> x, const array<double> y, array<double> &d,
array<double> &dx, array<double> &dy, kde_kernel_e k_type, kde_norm_e n_type, double norm)
{
if (x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid distribution size.");
if (norm < 0.0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid normalization value.");
size_t xnum = x.size();
d.resize(xnum);
dx.resize(xnum);
dy.resize(xnum);
double s = 0.0;
if (n_type == KDE_MAX2ONE)
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], y[i], k_type);
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
s = std::max(s, d[i]);
}
}
else if (n_type == KDE_SUM2ONE)
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], y[i], k_type);
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
s += d[i];
}
}
else
{
for (size_t i = 0; i < xnum; i++)
{
d[i] = get_density_at(x[i], y[i], k_type);
get_gradient_at(x[i], y[i], dx[i], dy[i], k_type);
}
s = norm;
}
for (size_t i = 0; i < xnum; i++)
{
d[i] /= s;
dx[i]/= s;
dy[i]/= s;
}
return;
}
void gctl::kde2d::save(double xmin, double xmax, double ymin, double ymax, int xnum, int ynum, std::string file)
{
std::string suffix_str = file.substr(file.find_last_of('.') + 1);
if (suffix_str != "nc")
{
throw std::runtime_error("[gctl::kde2d::save(...)] Invalid file extension type.");
}
array<double> dist(xnum*ynum, 0.0);
double dx = (xmax - xmin)/(xnum - 1);
double dy = (ymax - ymin)/(ynum - 1);
for (size_t i = 0; i < ynum; i++)
{
for (size_t j = 0; j < xnum; j++)
{
dist[j + i*xnum] = get_density_at(xmin + dx*j, ymin + dy*i);
}
}
if (suffix_str == "nc") save_netcdf_grid(file, dist, xnum, ynum, xmin, dx, ymin, dy, "x", "y", "probability density");
return;
}
double gctl::kde2d::gaussian_kernel(double x, double y)
{
return exp(-0.5*(x*x + y*y))/(2*M_PI);
}