/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #include "kde.h" gctl::kde::kde(){} gctl::kde::~kde(){} gctl::kde::kde(double h, const array &x) { init(h, x); } void gctl::kde::init(double h, const array &x) { if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width."); if (x.size() < 2) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size."); h_ = h; x_ = x; return; } double gctl::kde::get_density_at(double x, kde_kernel_e k_type) { double out = 0; if (k_type == KDE_Gaussian) { for (size_t i = 0; i < x_.size(); i++) { out += gaussian_kernel((x - x_[i])/h_); } } else if (k_type == KDE_Epanechnikov) { for (size_t i = 0; i < x_.size(); i++) { out += epanechnikov_kernel((x - x_[i])/h_); } } else if (k_type == KDE_Rectangular) { for (size_t i = 0; i < x_.size(); i++) { out += rectangular_kernel((x - x_[i])/h_); } } else { for (size_t i = 0; i < x_.size(); i++) { out += triangular_kernel((x - x_[i])/h_); } } return out/(h_*x_.size()); } double gctl::kde::get_kernel_density_at(size_t k_id, double x, kde_kernel_e k_type) { if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde::get_kernel_density_at(...)] Invalid kernel index."); double out; if (k_type == KDE_Gaussian) out = gaussian_kernel((x - x_[k_id])/h_); else if (k_type == KDE_Epanechnikov) out = epanechnikov_kernel((x - x_[k_id])/h_); else if (k_type == KDE_Rectangular) out = rectangular_kernel((x - x_[k_id])/h_); else out = triangular_kernel((x - x_[k_id])/h_); return out/h_; } double gctl::kde::get_gradient_at(double x, kde_kernel_e k_type) { double out = 0; if (k_type == KDE_Gaussian) { for (size_t i = 0; i < x_.size(); i++) { out += ((x - x_[i])/h_)*gaussian_kernel((x - x_[i])/h_); } } else if (k_type == KDE_Epanechnikov) { for (size_t i = 0; i < x_.size(); i++) { out += epanechnikov_kernel((x - x_[i])/h_, true); } } else if (k_type == KDE_Rectangular) { for (size_t i = 0; i < x_.size(); i++) { out += rectangular_kernel((x - x_[i])/h_, true); } } else { for (size_t i = 0; i < x_.size(); i++) { out += triangular_kernel((x - x_[i])/h_, true); } } return -1.0*out/(h_*h_*x_.size()); } double gctl::kde::get_kernel_gradient_at(size_t k_id, double x, kde_kernel_e k_type) { if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde::get_kernel_gradient_at(...)] Invalid kernel index."); double out; if (k_type == KDE_Gaussian) out = ((x - x_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_); else if (k_type == KDE_Epanechnikov) out = epanechnikov_kernel((x - x_[k_id])/h_); else if (k_type == KDE_Rectangular) out = rectangular_kernel((x - x_[k_id])/h_); else out = triangular_kernel((x - x_[k_id])/h_); return -1.0*out/(h_*h_); } void gctl::kde::get_distribution(const array x, array &d, array &dx, kde_kernel_e k_type, kde_norm_e n_type, double norm) { if (norm < 0.0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid normalization value."); size_t xnum = x.size(); d.resize(xnum); dx.resize(xnum); double s = 0.0; if (n_type == KDE_MAX2ONE) { for (size_t i = 0; i < xnum; i++) { d[i] = get_density_at(x[i], k_type); dx[i]= get_gradient_at(x[i], k_type); s = std::max(s, d[i]); } } else if (n_type == KDE_SUM2ONE) { for (size_t i = 0; i < xnum; i++) { d[i] = get_density_at(x[i], k_type); dx[i]= get_gradient_at(x[i], k_type); s += d[i]; } } else { for (size_t i = 0; i < xnum; i++) { d[i] = get_density_at(x[i], k_type); dx[i]= get_gradient_at(x[i], k_type); } s = norm; } for (size_t i = 0; i < xnum; i++) { d[i] /= s; dx[i]/= s; } return; } double gctl::kde::gaussian_kernel(double x) { return exp(-0.5*x*x)/sqrt(2*M_PI); } double gctl::kde::epanechnikov_kernel(double x, bool gradient) { if (gradient) { if (fabs(x) >= 1) return 0; else return 1.5*x; } if (fabs(x) >= 1) return 0; else return 0.75*(1 - x*x); } double gctl::kde::rectangular_kernel(double x, bool gradient) { if (gradient) return 0; if (fabs(x) >= 1) return 0; else return 0.5; } double gctl::kde::triangular_kernel(double x, bool gradient) { if (gradient) { if (fabs(x) >= 1) return 0; else if (x >= 0) return 1.0; else return -1.0; } if (fabs(x) >= 1) return 0; else return (1 - fabs(x)); } gctl::kde2d::kde2d(){} gctl::kde2d::~kde2d(){} gctl::kde2d::kde2d(double h, const array &x, const array &y) { init(h, x, y); } gctl::kde2d::kde2d(double h, const std::vector &x, const std::vector &y) { init(h, x, y); } void gctl::kde2d::init(double h, const array &x, const array &y) { if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width."); if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size."); h_ = h; x_ = x; y_ = y; return; } void gctl::kde2d::init(double h, const std::vector &x, const std::vector &y) { if (h <= 0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid averaging width."); if (x.size() < 2 || y.size() < 2 || x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid sample size."); h_ = h; x_.import_vector(x); y_.import_vector(y); return; } double gctl::kde2d::get_density_at(double x, double y, kde_kernel_e k_type) { double out = 0; if (k_type == KDE_Gaussian) { for (size_t i = 0; i < x_.size(); i++) { out += gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_); } } else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type."); return out/(h_*h_*x_.size()); } double gctl::kde2d::get_kernel_density_at(size_t k_id, double x, double y, kde_kernel_e k_type) { if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel index."); double out; if (k_type == KDE_Gaussian) out = gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_); else throw std::runtime_error("[gctl::kde2d::get_kernel_density_at(...)] Invalid kernel type."); return out/(h_*h_); } void gctl::kde2d::get_gradient_at(double x, double y, double &gx, double &gy, kde_kernel_e k_type) { double out_x = 0.0, out_y = 0.0; if (k_type == KDE_Gaussian) { for (size_t i = 0; i < x_.size(); i++) { out_x += ((x - x_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_); out_y += ((y - y_[i])/h_)*gaussian_kernel((x - x_[i])/h_, (y - y_[i])/h_); } } else throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid kernel type."); gx = -1.0*out_x/(h_*h_*h_*x_.size()); gy = -1.0*out_y/(h_*h_*h_*x_.size()); return; } void gctl::kde2d::get_kernel_gradient_at(size_t k_id, double x, double y, double &gx, double &gy, kde_kernel_e k_type) { if (k_id >= x_.size()) throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel index."); double out_x, out_y; if (k_type == KDE_Gaussian) { out_x = ((x - x_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_); out_y = ((y - y_[k_id])/h_)*gaussian_kernel((x - x_[k_id])/h_, (y - y_[k_id])/h_); } else throw std::runtime_error("[gctl::kde2d::get_kernel_gradient_at(...)] Invalid kernel type."); gx = -1.0*out_x/(h_*h_*h_); gy = -1.0*out_y/(h_*h_*h_); return; } void gctl::kde2d::get_distribution(const array x, const array y, array &d, array &dx, array &dy, kde_kernel_e k_type, kde_norm_e n_type, double norm) { if (x.size() != y.size()) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid distribution size."); if (norm < 0.0) throw std::runtime_error("[GCTL Kernel Density Estimation] Invalid normalization value."); size_t xnum = x.size(); d.resize(xnum); dx.resize(xnum); dy.resize(xnum); double s = 0.0; if (n_type == KDE_MAX2ONE) { for (size_t i = 0; i < xnum; i++) { d[i] = get_density_at(x[i], y[i], k_type); get_gradient_at(x[i], y[i], dx[i], dy[i], k_type); s = std::max(s, d[i]); } } else if (n_type == KDE_SUM2ONE) { for (size_t i = 0; i < xnum; i++) { d[i] = get_density_at(x[i], y[i], k_type); get_gradient_at(x[i], y[i], dx[i], dy[i], k_type); s += d[i]; } } else { for (size_t i = 0; i < xnum; i++) { d[i] = get_density_at(x[i], y[i], k_type); get_gradient_at(x[i], y[i], dx[i], dy[i], k_type); } s = norm; } for (size_t i = 0; i < xnum; i++) { d[i] /= s; dx[i]/= s; dy[i]/= s; } return; } void gctl::kde2d::save(double xmin, double xmax, double ymin, double ymax, int xnum, int ynum, std::string file) { std::string suffix_str = file.substr(file.find_last_of('.') + 1); if (suffix_str != "nc") { throw std::runtime_error("[gctl::kde2d::save(...)] Invalid file extension type."); } array dist(xnum*ynum, 0.0); double dx = (xmax - xmin)/(xnum - 1); double dy = (ymax - ymin)/(ynum - 1); for (size_t i = 0; i < ynum; i++) { for (size_t j = 0; j < xnum; j++) { dist[j + i*xnum] = get_density_at(xmin + dx*j, ymin + dy*i); } } if (suffix_str == "nc") save_netcdf_grid(file, dist, xnum, ynum, xmin, dx, ymin, dy, "x", "y", "probability density"); return; } double gctl::kde2d::gaussian_kernel(double x, double y) { return exp(-0.5*(x*x + y*y))/(2*M_PI); }