/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #include "linear_algebra.h" void gctl::vecset(_1d_array &x, double c) { size_t i; #pragma omp parallel for private (i) shared (c) schedule(guided) for (i = 0; i < x.size(); i++) { x[i] = c; } return; } void gctl::vecset(_1cd_array &x, std::complex c) { size_t i; #pragma omp parallel for private (i) shared (c) schedule(guided) for (i = 0; i < x.size(); i++) { x[i] = c; } return; } void gctl::vecconj(_1cd_array &x, const _1cd_array &y) { x.resize(y.size()); size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < x.size(); i++) { x[i] = y[i]; x[i].imag(-1*x[i].imag()); } return; } void gctl::veccpy(_1d_array &x, const _1d_array &y, double c) { #ifdef GCTL_OPENBLAS cblas_dcopy(y.size(), y.get(), 1, x.get(), 1); cblas_dscal(x.size(), c, x.get(), 1); return; #else #ifdef GCTL_CHECK_SIZE if (x.size() != y.size()) { throw std::runtime_error("Unequal array sizes. algebra::veccpy(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) shared(c) schedule(guided) for (i = 0; i < x.size(); i++) { x[i] = c*y[i]; } return; #endif // GCTL_OPENBLAS } void gctl::veccpy(_1cd_array &x, const _1cd_array &y, std::complex c) { #ifdef GCTL_CHECK_SIZE if (x.size() != y.size()) { throw std::runtime_error("Unequal array sizes. algebra::veccpy(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) shared(c) schedule(guided) for (i = 0; i < x.size(); i++) { x[i] = c * y[i]; //x[i].real(c.real()*y[i].real() - c.imag()*y[i].imag()); //x[i].imag(c.real()*y[i].imag() + c.imag()*y[i].real()); } return; } void gctl::vecadd(_1d_array &a, const _1d_array &b, const _1d_array &c, double t, double p) { #ifdef GCTL_OPENBLAS cblas_dcopy(b.size(), b.get(), 1, a.get(), 1); cblas_dscal(a.size(), t, a.get(), 1); cblas_daxpy(c.size(), p, c.get(), 1, a.get(), 1); return; #else #ifdef GCTL_CHECK_SIZE if (a.size() != b.size() || b.size() != c.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecadd(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) shared (t, p) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] = t*b[i] + p*c[i]; } return; #endif // GCTL_OPENBLAS } void gctl::vecadd(_1cd_array &a, const _1cd_array &b, const _1cd_array &c, std::complex t, std::complex p) { #ifdef GCTL_CHECK_SIZE if (a.size() != b.size() || b.size() != c.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecadd(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) shared (t, p) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] = t*b[i] + p*c[i]; } return; } void gctl::vecdiff(_1d_array &a, const _1d_array &b, const _1d_array &c, double t, double p) { #ifdef GCTL_OPENBLAS cblas_dcopy(b.size(), b.get(), 1, a.get(), 1); cblas_dscal(a.size(), t, a.get(), 1); cblas_daxpy(c.size(), -1.0*p, c.get(), 1, a.get(), 1); return; #else #ifdef GCTL_CHECK_SIZE if (a.size() != b.size() || b.size() != c.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecdiff(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] = t*b[i] - p*c[i]; } return; #endif // GCTL_OPENBLAS } void gctl::vecdiff(_1cd_array &a, const _1cd_array &b, const _1cd_array &c, std::complex t, std::complex p) { #ifdef GCTL_CHECK_SIZE if (a.size() != b.size() || b.size() != c.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecdiff(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] = t*b[i] - p*c[i]; } return; } void gctl::vecmul(_1d_array &a, const _1d_array &b, const _1d_array &c, double t) { #ifdef GCTL_OPENBLAS cblas_dgbmv(CblasRowMajor, CblasNoTrans, b.size(), b.size(), 0, 0, t, b.get(), 1, c.get(), 1, 0.0, a.get(), 1); return; #else #ifdef GCTL_CHECK_SIZE if (a.size() != b.size() || b.size() != c.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecmul(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] = t*b[i]*c[i]; } return; #endif // GCTL_OPENBLAS } void gctl::vecdiv(_1d_array &a, const _1d_array &b, const _1d_array &c, double t) { #ifdef GCTL_OPENBLAS size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < c.size(); i++) { c[i] = 1.0/c[i]; } cblas_dgbmv(CblasRowMajor, CblasNoTrans, b.size(), b.size(), 0, 0, t, b.get(), 1, c.get(), 1, 0.0, a.get(), 1); return; #else #ifdef GCTL_CHECK_SIZE if (a.size() != b.size() || b.size() != c.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecdiv(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] = t*b[i]/c[i]; } return; #endif // GCTL_OPENBLAS } void gctl::vecscale(_1d_array &x, double c) { #ifdef GCTL_OPENBLAS cblas_dscal(x.size(), c, x.get(), 1); return; #else size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < x.size(); i++) { x[i] = c*x[i]; } return; #endif // GCTL_OPENBLAS } void gctl::vecapp(_1d_array &a, const _1d_array &b, double c) { #ifdef GCTL_OPENBLAS cblas_daxpy(b.size(), c, b.get(), 1, a.get(), 1); return; #else #ifdef GCTL_CHECK_SIZE if (a.size() != b.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecapp(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] += c*b[i]; } return; #endif // GCTL_OPENBLAS } void gctl::vecapp(_1cd_array &a, const _1cd_array &b, std::complex c) { #ifdef GCTL_CHECK_SIZE if (a.size() != b.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecapp(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] += c*b[i]; } return; } void gctl::vecsub(_1d_array &a, const _1d_array &b, double c) { #ifdef GCTL_OPENBLAS cblas_daxpy(b.size(), -1.0*c, b.get(), 1, a.get(), 1); return; #else #ifdef GCTL_CHECK_SIZE if (a.size() != b.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecsub(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] -= c*b[i]; } return; #endif // GCTL_OPENBLAS } void gctl::vecsub(_1cd_array &a, const _1cd_array &b, std::complex c) { #ifdef GCTL_CHECK_SIZE if (a.size() != b.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecsub(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { a[i] -= c*b[i]; } return; } double gctl::vecdot(const _1d_array &b, const _1d_array &c) { #ifdef GCTL_OPENBLAS return cblas_ddot(b.size(), b.get(), 1, c.get(), 1); #else #ifdef GCTL_CHECK_SIZE if (c.size() != b.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecdot(...)"); } #endif // GCTL_CHECK_SIZE double s = 0.0; for (size_t i = 0; i < b.size(); i++) { s += b[i]*c[i]; } return s; #endif // GCTL_OPENBLAS } std::complex gctl::vecdot(const _1cd_array &a, const _1cd_array &b) { #ifdef GCTL_CHECK_SIZE if (a.size() != b.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecdot(...)"); } #endif // GCTL_CHECK_SIZE double re = 0.0, im = 0.0; // = \sum{a_i \cdot b_i} for (size_t i = 0; i < a.size(); i++) { re += (a[i].real()*b[i].real() - a[i].imag()*b[i].imag()); im += (a[i].real()*b[i].imag() + a[i].imag()*b[i].real()); } std::complex ret; ret.real(re); ret.imag(im); return ret; } std::complex gctl::vecinner(const _1cd_array &a, const _1cd_array &b) { #ifdef GCTL_CHECK_SIZE if (a.size() != b.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecdot(...)"); } #endif // GCTL_CHECK_SIZE double re = 0.0, im = 0.0; // = \sum{a_i \cdot b_i} for (size_t i = 0; i < a.size(); i++) { re += (a[i].real()*b[i].real() + a[i].imag()*b[i].imag()); im += (a[i].real()*b[i].imag() - a[i].imag()*b[i].real()); } std::complex ret; ret.real(re); ret.imag(im); return ret; } bool gctl::veccheckbox(const _1d_array &h, const _1d_array &l) { #ifdef GCTL_CHECK_SIZE if (h.size() != l.size()) { throw std::runtime_error("Unequal array sizes. algebra::veccheckbox(...)"); } #endif // GCTL_CHECK_SIZE for (size_t i = 0; i < h.size(); i++) { if (h[i] < l[i]) return false; } return true; } bool gctl::veccheckpos(const _1d_array &a) { for (size_t i = 0; i < a.size(); i++) { if (a[i] <= 0.0) return false; } return true; } void gctl::vecbtm(_1d_array &x, const _1d_array &l) { #ifdef GCTL_CHECK_SIZE if (x.size() != l.size()) { throw std::runtime_error("Unequal array sizes. algebra::vecbtm(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < x.size(); i++) { if (x[i] < l[i]) x[i] = l[i]; } return; } void gctl::vectop(_1d_array &x, const _1d_array &h) { #ifdef GCTL_CHECK_SIZE if (x.size() != h.size()) { throw std::runtime_error("Unequal array sizes. algebra::vectop(...)"); } #endif // GCTL_CHECK_SIZE size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < x.size(); i++) { if (x[i] > h[i]) x[i] = h[i]; } return; } void gctl::matvec(_1d_array &r, const _2d_matrix &m, const _1d_array &v, matrix_layout_e trans) { // Use OpenBLAS backends #ifdef GCTL_OPENBLAS if (trans == NoTrans) { cblas_dgemv(CblasRowMajor, CblasNoTrans, m.row_size(), m.col_size(), 1.0, m.get(), m.col_size(), v.get(), 1, 0.0, r.get(), 1); } else { cblas_dgemv(CblasRowMajor, CblasTrans, m.row_size(), m.col_size(), 1.0, m.get(), m.col_size(), v.get(), 1, 0.0, r.get(), 1); } return; // Use native backends #else if (trans == NoTrans) { #ifdef GCTL_CHECK_SIZE if (v.size() != m.col_size()) { throw std::runtime_error("Unequal matrix-array sizes. algebra::matvec(...)"); } #endif // GCTL_CHECK_SIZE size_t i, j; #pragma omp parallel for private (i, j) schedule(guided) for (i = 0; i < m.row_size(); i++) { r[i] = 0.0; for (j = 0; j < m.col_size(); j++) { r[i] += m[i][j]*v[j]; } } return; } #ifdef GCTL_CHECK_SIZE if (v.size() != m.row_size()) { throw std::runtime_error("Unequal matrix-array sizes. algebra::matvec(...)"); } #endif // GCTL_CHECK_SIZE size_t i, j; #pragma omp parallel for private (i, j) schedule(guided) for (j = 0; j < m.col_size(); j++) { r[j] = 0.0; for (i = 0; i < m.row_size(); i++) { r[j] += m[i][j]*v[i]; } } return; #endif // GCTL_OPENBLAS } void gctl::matmat(_2d_matrix &r, const _2d_matrix &m, const _2d_matrix &v, matrix_layout_e m_trans, matrix_layout_e v_trans) { if (m.empty() || v.empty()) { throw runtime_error("Empty matrix(s). From gctl::matmat(...)"); } size_t i, j, k; if (m_trans == NoTrans && v_trans == NoTrans) { if (m.col_size() != v.row_size()) { throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)"); } r.resize(m.row_size(), v.col_size()); #pragma omp parallel for private (i, j, k) schedule(guided) for (i = 0; i < m.row_size(); i++) { for (j = 0; j < v.col_size(); j++) { r[i][j] = 0.0; for (k = 0; k < m.col_size(); k++) { r[i][j] += m[i][k]*v[k][j]; } } } } else if (m_trans == Trans && v_trans == NoTrans) { if (m.row_size() != v.row_size()) { throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)"); } r.resize(m.col_size(), v.col_size()); #pragma omp parallel for private (i, j, k) schedule(guided) for (i = 0; i < m.col_size(); i++) { for (j = 0; j < v.col_size(); j++) { r[i][j] = 0.0; for (k = 0; k < m.row_size(); k++) { r[i][j] += m[k][i]*v[k][j]; } } } } else if (m_trans == NoTrans && v_trans == Trans) { if (m.col_size() != v.col_size()) { throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)"); } r.resize(m.row_size(), v.row_size()); #pragma omp parallel for private (i, j, k) schedule(guided) for (i = 0; i < m.row_size(); i++) { for (j = 0; j < v.row_size(); j++) { r[i][j] = 0.0; for (k = 0; k < m.col_size(); k++) { r[i][j] += m[i][k]*v[j][k]; } } } } else { if (m.row_size() != v.col_size()) { throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)"); } r.resize(m.col_size(), v.row_size()); #pragma omp parallel for private (i, j, k) schedule(guided) for (i = 0; i < m.col_size(); i++) { for (j = 0; j < v.row_size(); j++) { r[i][j] = 0.0; for (k = 0; k < m.row_size(); k++) { r[i][j] += m[k][i]*v[j][k]; } } } } return; } void gctl::matapp(_2d_matrix &r, const _1d_array &v, matrix_order_e odr) { size_t i, j; if (odr == RowMajor) { if (r.col_size() != v.size()) { throw runtime_error("Incompatible matrix and vector sizes. From gctl::matapp(...)"); } #pragma omp parallel for private (i, j) schedule(guided) for (i = 0; i < r.row_size(); i++) { for (j = 0; j < r.col_size(); j++) { r[i][j] += v[j]; } } } else { if (r.row_size() != v.size()) { throw runtime_error("Incompatible matrix and vector sizes. From gctl::matapp(...)"); } #pragma omp parallel for private (i, j) schedule(guided) for (j = 0; j < r.col_size(); j++) { for (i = 0; i < r.row_size(); i++) { r[i][j] += v[i]; } } } return; } void gctl::matdiff(_2d_matrix &r, const _2d_matrix &m, const _2d_matrix &v) { if (m.row_size() != v.row_size() || m.col_size() != v.col_size()) { throw std::invalid_argument("Incompatible matrix sizes. From gctl::matdiff(...)"); } r.resize(m.row_size(), m.col_size()); size_t i, j; #pragma omp parallel for private (i, j) schedule(guided) for (i = 0; i < r.row_size(); i++) { for (j = 0; j < r.col_size(); j++) { r[i][j] = m[i][j] - v[i][j]; } } return; } void gctl::matvec(_1cd_array &r, const _2cd_matrix &m, const _1cd_array &v, matrix_layout_e trans, conjugate_type_e conj) { size_t i, j; size_t m_size = m.row_size(); size_t n_size = m.col_size(); double re, im; if (conj == Conj) { if (trans == NoTrans) { #pragma omp parallel for private (i, j, re, im) schedule(guided) for (i = 0; i < m_size; i++) { re = 0.0; im = 0.0; for (j = 0; j < n_size; j++) { re += (m[i][j].real()*v[j].real() + m[i][j].imag()*v[j].imag()); im += (m[i][j].real()*v[j].imag() - m[i][j].imag()*v[j].real()); } r[i].real(re); r[i].imag(im); } return; } #pragma omp parallel for private (i, j, re, im) schedule(guided) for (j = 0; j < n_size; j++) { re = 0.0; im = 0.0; for (i = 0; i < m_size; i++) { re += (m[i][j].real()*v[i].real() + m[i][j].imag()*v[i].imag()); im += (m[i][j].real()*v[i].imag() - m[i][j].imag()*v[i].real()); } r[j].real(re); r[j].imag(im); } return; } if (trans == Trans) { #pragma omp parallel for private (i, j, re, im) schedule(guided) for (i = 0; i < m_size; i++) { re = 0.0; im = 0.0; for (j = 0; j < n_size; j++) { re += (m[i][j].real()*v[j].real() - m[i][j].imag()*v[j].imag()); im += (m[i][j].real()*v[j].imag() + m[i][j].imag()*v[j].real()); } r[i].real(re); r[i].imag(im); } return; } #pragma omp parallel for private (i, j, re, im) schedule(guided) for (j = 0; j < n_size; j++) { re = 0.0; im = 0.0; for (i = 0; i < m_size; i++) { re += (m[i][j].real()*v[i].real() - m[i][j].imag()*v[i].imag()); im += (m[i][j].real()*v[i].imag() + m[i][j].imag()*v[i].real()); } r[j].real(re); r[j].imag(im); } return; } double gctl::dynamic_average(double old_mean, size_t old_count, double new_input) { return (old_mean*old_count + new_input)/(old_count + 1.0); } double gctl::dynamic_stddev(double old_stddev, size_t old_count, double old_mean, double new_input, double &new_mean) { new_mean = dynamic_average(old_mean, old_count, new_input); return sqrt(old_stddev*old_stddev*old_count/(old_count + 1.0) + old_count*pow(old_mean - new_input, 2.0)/pow(old_count + 1.0, 3.0) + pow(new_input - new_mean, 2.0)/(old_count + 1.0)); } void gctl::schmidt_orthogonal(const _1d_array &a, _1d_array &e, size_t a_s) { size_t t_s = a.size(); if (t_s != e.size()) { throw std::runtime_error("Unequal array sizes. algebra::schmidt_orthogonal(...)"); } if (t_s%a_s != 0 || t_s <= 3) { throw std::runtime_error("Incompatible function setup. algebra::schmidt_orthogonal(...)"); } size_t len = t_s/a_s; if (len < a_s) { throw std::runtime_error("The linear system is over-determined. algebra::schmidt_orthogonal(...)"); } double ae, ee; for (size_t i = 0; i < a_s; i++) { for (size_t l = 0; l < len; l++) { e[l + i*len] = a[l + i*len]; } for (size_t m = 0; m < i; m++) { ae = ee = 0.0; for (size_t n = 0; n < len; n++) { ae += a[n + i*len] * e[n + m*len]; ee += e[n + m*len] * e[n + m*len]; } for (size_t n = 0; n < len; n++) { e[n + i*len] -= e[n + m*len] * ae/ee; } } } for (size_t i = 0; i < a_s; i++) { ee = 0.0; for (size_t l = 0; l < len; l++) { ee += e[l + i*len] * e[l + i*len]; } ee = sqrt(ee); for (size_t l = 0; l < len; l++) { e[l + i*len] /= ee; } } return; } bool gctl::vecvalid(const _1d_array &a) { bool valid = true; size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { if (a[i] != a[i]) valid = false; } return valid; } bool gctl::vecvalid(const _1cd_array &a) { bool valid = true; size_t i; #pragma omp parallel for private (i) schedule(guided) for (i = 0; i < a.size(); i++) { if (a[i] != a[i]) valid = false; } return valid; }