gctl/lib/maths/linear_algebra.cpp
2024-09-21 11:08:56 +08:00

871 lines
22 KiB
C++

/********************************************************
* ██████╗ ██████╗████████╗██╗
* ██╔════╝ ██╔════╝╚══██╔══╝██║
* ██║ ███╗██║ ██║ ██║
* ██║ ██║██║ ██║ ██║
* ╚██████╔╝╚██████╗ ██║ ███████╗
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "linear_algebra.h"
void gctl::vecset(_1d_array &x, double c)
{
size_t i;
#pragma omp parallel for private (i) shared (c) schedule(guided)
for (i = 0; i < x.size(); i++)
{
x[i] = c;
}
return;
}
void gctl::vecset(_1cd_array &x, std::complex<double> c)
{
size_t i;
#pragma omp parallel for private (i) shared (c) schedule(guided)
for (i = 0; i < x.size(); i++)
{
x[i] = c;
}
return;
}
void gctl::vecconj(_1cd_array &x, const _1cd_array &y)
{
x.resize(y.size());
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < x.size(); i++)
{
x[i] = y[i];
x[i].imag(-1*x[i].imag());
}
return;
}
void gctl::veccpy(_1d_array &x, const _1d_array &y, double c)
{
#ifdef GCTL_OPENBLAS
cblas_dcopy(y.size(), y.get(), 1, x.get(), 1);
cblas_dscal(x.size(), c, x.get(), 1);
return;
#else
#ifdef GCTL_CHECK_SIZE
if (x.size() != y.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::veccpy(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) shared(c) schedule(guided)
for (i = 0; i < x.size(); i++)
{
x[i] = c*y[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::veccpy(_1cd_array &x, const _1cd_array &y, std::complex<double> c)
{
#ifdef GCTL_CHECK_SIZE
if (x.size() != y.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::veccpy(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) shared(c) schedule(guided)
for (i = 0; i < x.size(); i++)
{
x[i] = c * y[i];
//x[i].real(c.real()*y[i].real() - c.imag()*y[i].imag());
//x[i].imag(c.real()*y[i].imag() + c.imag()*y[i].real());
}
return;
}
void gctl::vecadd(_1d_array &a, const _1d_array &b, const _1d_array &c, double t, double p)
{
#ifdef GCTL_OPENBLAS
cblas_dcopy(b.size(), b.get(), 1, a.get(), 1);
cblas_dscal(a.size(), t, a.get(), 1);
cblas_daxpy(c.size(), p, c.get(), 1, a.get(), 1);
return;
#else
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size() || b.size() != c.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecadd(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) shared (t, p) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] = t*b[i] + p*c[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::vecadd(_1cd_array &a, const _1cd_array &b, const _1cd_array &c, std::complex<double> t, std::complex<double> p)
{
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size() || b.size() != c.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecadd(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) shared (t, p) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] = t*b[i] + p*c[i];
}
return;
}
void gctl::vecdiff(_1d_array &a, const _1d_array &b, const _1d_array &c, double t, double p)
{
#ifdef GCTL_OPENBLAS
cblas_dcopy(b.size(), b.get(), 1, a.get(), 1);
cblas_dscal(a.size(), t, a.get(), 1);
cblas_daxpy(c.size(), -1.0*p, c.get(), 1, a.get(), 1);
return;
#else
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size() || b.size() != c.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecdiff(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] = t*b[i] - p*c[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::vecdiff(_1cd_array &a, const _1cd_array &b, const _1cd_array &c, std::complex<double> t, std::complex<double> p)
{
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size() || b.size() != c.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecdiff(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] = t*b[i] - p*c[i];
}
return;
}
void gctl::vecmul(_1d_array &a, const _1d_array &b, const _1d_array &c, double t)
{
#ifdef GCTL_OPENBLAS
cblas_dgbmv(CblasRowMajor, CblasNoTrans, b.size(), b.size(),
0, 0, t, b.get(), 1, c.get(), 1, 0.0, a.get(), 1);
return;
#else
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size() || b.size() != c.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecmul(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] = t*b[i]*c[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::vecdiv(_1d_array &a, const _1d_array &b, const _1d_array &c, double t)
{
#ifdef GCTL_OPENBLAS
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < c.size(); i++)
{
c[i] = 1.0/c[i];
}
cblas_dgbmv(CblasRowMajor, CblasNoTrans, b.size(), b.size(),
0, 0, t, b.get(), 1, c.get(), 1, 0.0, a.get(), 1);
return;
#else
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size() || b.size() != c.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecdiv(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] = t*b[i]/c[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::vecscale(_1d_array &x, double c)
{
#ifdef GCTL_OPENBLAS
cblas_dscal(x.size(), c, x.get(), 1);
return;
#else
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < x.size(); i++)
{
x[i] = c*x[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::vecapp(_1d_array &a, const _1d_array &b, double c)
{
#ifdef GCTL_OPENBLAS
cblas_daxpy(b.size(), c, b.get(), 1, a.get(), 1);
return;
#else
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecapp(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] += c*b[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::vecapp(_1cd_array &a, const _1cd_array &b, std::complex<double> c)
{
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecapp(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] += c*b[i];
}
return;
}
void gctl::vecsub(_1d_array &a, const _1d_array &b, double c)
{
#ifdef GCTL_OPENBLAS
cblas_daxpy(b.size(), -1.0*c, b.get(), 1, a.get(), 1);
return;
#else
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecsub(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] -= c*b[i];
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::vecsub(_1cd_array &a, const _1cd_array &b, std::complex<double> c)
{
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecsub(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
a[i] -= c*b[i];
}
return;
}
double gctl::vecdot(const _1d_array &b, const _1d_array &c)
{
#ifdef GCTL_OPENBLAS
return cblas_ddot(b.size(), b.get(), 1, c.get(), 1);
#else
#ifdef GCTL_CHECK_SIZE
if (c.size() != b.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecdot(...)");
}
#endif // GCTL_CHECK_SIZE
double s = 0.0;
for (size_t i = 0; i < b.size(); i++)
{
s += b[i]*c[i];
}
return s;
#endif // GCTL_OPENBLAS
}
std::complex<double> gctl::vecdot(const _1cd_array &a, const _1cd_array &b)
{
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecdot(...)");
}
#endif // GCTL_CHECK_SIZE
double re = 0.0, im = 0.0;
// <a,b> = \sum{a_i \cdot b_i}
for (size_t i = 0; i < a.size(); i++)
{
re += (a[i].real()*b[i].real() - a[i].imag()*b[i].imag());
im += (a[i].real()*b[i].imag() + a[i].imag()*b[i].real());
}
std::complex<double> ret;
ret.real(re); ret.imag(im);
return ret;
}
std::complex<double> gctl::vecinner(const _1cd_array &a, const _1cd_array &b)
{
#ifdef GCTL_CHECK_SIZE
if (a.size() != b.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecdot(...)");
}
#endif // GCTL_CHECK_SIZE
double re = 0.0, im = 0.0;
// <a,b> = \sum{a_i \cdot b_i}
for (size_t i = 0; i < a.size(); i++)
{
re += (a[i].real()*b[i].real() + a[i].imag()*b[i].imag());
im += (a[i].real()*b[i].imag() - a[i].imag()*b[i].real());
}
std::complex<double> ret;
ret.real(re); ret.imag(im);
return ret;
}
bool gctl::veccheckbox(const _1d_array &h, const _1d_array &l)
{
#ifdef GCTL_CHECK_SIZE
if (h.size() != l.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::veccheckbox(...)");
}
#endif // GCTL_CHECK_SIZE
for (size_t i = 0; i < h.size(); i++)
{
if (h[i] < l[i]) return false;
}
return true;
}
bool gctl::veccheckpos(const _1d_array &a)
{
for (size_t i = 0; i < a.size(); i++)
{
if (a[i] <= 0.0) return false;
}
return true;
}
void gctl::vecbtm(_1d_array &x, const _1d_array &l)
{
#ifdef GCTL_CHECK_SIZE
if (x.size() != l.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vecbtm(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < x.size(); i++)
{
if (x[i] < l[i]) x[i] = l[i];
}
return;
}
void gctl::vectop(_1d_array &x, const _1d_array &h)
{
#ifdef GCTL_CHECK_SIZE
if (x.size() != h.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::vectop(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < x.size(); i++)
{
if (x[i] > h[i]) x[i] = h[i];
}
return;
}
void gctl::matvec(_1d_array &r, const _2d_matrix &m, const _1d_array &v, matrix_layout_e trans)
{
// Use OpenBLAS backends
#ifdef GCTL_OPENBLAS
if (trans == NoTrans)
{
cblas_dgemv(CblasRowMajor, CblasNoTrans, m.row_size(), m.col_size(), 1.0, m.get(), m.col_size(), v.get(), 1, 0.0, r.get(), 1);
}
else
{
cblas_dgemv(CblasRowMajor, CblasTrans, m.row_size(), m.col_size(), 1.0, m.get(), m.col_size(), v.get(), 1, 0.0, r.get(), 1);
}
return;
// Use native backends
#else
if (trans == NoTrans)
{
#ifdef GCTL_CHECK_SIZE
if (v.size() != m.col_size())
{
throw std::runtime_error("Unequal matrix-array sizes. algebra<T>::matvec(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i, j;
#pragma omp parallel for private (i, j) schedule(guided)
for (i = 0; i < m.row_size(); i++)
{
r[i] = 0.0;
for (j = 0; j < m.col_size(); j++)
{
r[i] += m[i][j]*v[j];
}
}
return;
}
#ifdef GCTL_CHECK_SIZE
if (v.size() != m.row_size())
{
throw std::runtime_error("Unequal matrix-array sizes. algebra<T>::matvec(...)");
}
#endif // GCTL_CHECK_SIZE
size_t i, j;
#pragma omp parallel for private (i, j) schedule(guided)
for (j = 0; j < m.col_size(); j++)
{
r[j] = 0.0;
for (i = 0; i < m.row_size(); i++)
{
r[j] += m[i][j]*v[i];
}
}
return;
#endif // GCTL_OPENBLAS
}
void gctl::matmat(_2d_matrix &r, const _2d_matrix &m, const _2d_matrix &v, matrix_layout_e m_trans, matrix_layout_e v_trans)
{
if (m.empty() || v.empty())
{
throw runtime_error("Empty matrix(s). From gctl::matmat(...)");
}
size_t i, j, k;
if (m_trans == NoTrans && v_trans == NoTrans)
{
if (m.col_size() != v.row_size())
{
throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)");
}
r.resize(m.row_size(), v.col_size());
#pragma omp parallel for private (i, j, k) schedule(guided)
for (i = 0; i < m.row_size(); i++)
{
for (j = 0; j < v.col_size(); j++)
{
r[i][j] = 0.0;
for (k = 0; k < m.col_size(); k++)
{
r[i][j] += m[i][k]*v[k][j];
}
}
}
}
else if (m_trans == Trans && v_trans == NoTrans)
{
if (m.row_size() != v.row_size())
{
throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)");
}
r.resize(m.col_size(), v.col_size());
#pragma omp parallel for private (i, j, k) schedule(guided)
for (i = 0; i < m.col_size(); i++)
{
for (j = 0; j < v.col_size(); j++)
{
r[i][j] = 0.0;
for (k = 0; k < m.row_size(); k++)
{
r[i][j] += m[k][i]*v[k][j];
}
}
}
}
else if (m_trans == NoTrans && v_trans == Trans)
{
if (m.col_size() != v.col_size())
{
throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)");
}
r.resize(m.row_size(), v.row_size());
#pragma omp parallel for private (i, j, k) schedule(guided)
for (i = 0; i < m.row_size(); i++)
{
for (j = 0; j < v.row_size(); j++)
{
r[i][j] = 0.0;
for (k = 0; k < m.col_size(); k++)
{
r[i][j] += m[i][k]*v[j][k];
}
}
}
}
else
{
if (m.row_size() != v.col_size())
{
throw runtime_error("Incompatible matrix sizes. From gctl::matmat(...)");
}
r.resize(m.col_size(), v.row_size());
#pragma omp parallel for private (i, j, k) schedule(guided)
for (i = 0; i < m.col_size(); i++)
{
for (j = 0; j < v.row_size(); j++)
{
r[i][j] = 0.0;
for (k = 0; k < m.row_size(); k++)
{
r[i][j] += m[k][i]*v[j][k];
}
}
}
}
return;
}
void gctl::matapp(_2d_matrix &r, const _1d_array &v, matrix_order_e odr)
{
size_t i, j;
if (odr == RowMajor)
{
if (r.col_size() != v.size())
{
throw runtime_error("Incompatible matrix and vector sizes. From gctl::matapp(...)");
}
#pragma omp parallel for private (i, j) schedule(guided)
for (i = 0; i < r.row_size(); i++)
{
for (j = 0; j < r.col_size(); j++)
{
r[i][j] += v[j];
}
}
}
else
{
if (r.row_size() != v.size())
{
throw runtime_error("Incompatible matrix and vector sizes. From gctl::matapp(...)");
}
#pragma omp parallel for private (i, j) schedule(guided)
for (j = 0; j < r.col_size(); j++)
{
for (i = 0; i < r.row_size(); i++)
{
r[i][j] += v[i];
}
}
}
return;
}
void gctl::matdiff(_2d_matrix &r, const _2d_matrix &m, const _2d_matrix &v)
{
if (m.row_size() != v.row_size() || m.col_size() != v.col_size())
{
throw std::invalid_argument("Incompatible matrix sizes. From gctl::matdiff(...)");
}
r.resize(m.row_size(), m.col_size());
size_t i, j;
#pragma omp parallel for private (i, j) schedule(guided)
for (i = 0; i < r.row_size(); i++)
{
for (j = 0; j < r.col_size(); j++)
{
r[i][j] = m[i][j] - v[i][j];
}
}
return;
}
void gctl::matvec(_1cd_array &r, const _2cd_matrix &m, const _1cd_array &v, matrix_layout_e trans, conjugate_type_e conj)
{
size_t i, j;
size_t m_size = m.row_size();
size_t n_size = m.col_size();
double re, im;
if (conj == Conj)
{
if (trans == NoTrans)
{
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (i = 0; i < m_size; i++)
{
re = 0.0; im = 0.0;
for (j = 0; j < n_size; j++)
{
re += (m[i][j].real()*v[j].real() + m[i][j].imag()*v[j].imag());
im += (m[i][j].real()*v[j].imag() - m[i][j].imag()*v[j].real());
}
r[i].real(re); r[i].imag(im);
}
return;
}
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (j = 0; j < n_size; j++)
{
re = 0.0; im = 0.0;
for (i = 0; i < m_size; i++)
{
re += (m[i][j].real()*v[i].real() + m[i][j].imag()*v[i].imag());
im += (m[i][j].real()*v[i].imag() - m[i][j].imag()*v[i].real());
}
r[j].real(re); r[j].imag(im);
}
return;
}
if (trans == Trans)
{
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (i = 0; i < m_size; i++)
{
re = 0.0; im = 0.0;
for (j = 0; j < n_size; j++)
{
re += (m[i][j].real()*v[j].real() - m[i][j].imag()*v[j].imag());
im += (m[i][j].real()*v[j].imag() + m[i][j].imag()*v[j].real());
}
r[i].real(re); r[i].imag(im);
}
return;
}
#pragma omp parallel for private (i, j, re, im) schedule(guided)
for (j = 0; j < n_size; j++)
{
re = 0.0; im = 0.0;
for (i = 0; i < m_size; i++)
{
re += (m[i][j].real()*v[i].real() - m[i][j].imag()*v[i].imag());
im += (m[i][j].real()*v[i].imag() + m[i][j].imag()*v[i].real());
}
r[j].real(re); r[j].imag(im);
}
return;
}
double gctl::dynamic_average(double old_mean, size_t old_count, double new_input)
{
return (old_mean*old_count + new_input)/(old_count + 1.0);
}
double gctl::dynamic_stddev(double old_stddev, size_t old_count, double old_mean, double new_input, double &new_mean)
{
new_mean = dynamic_average(old_mean, old_count, new_input);
return sqrt(old_stddev*old_stddev*old_count/(old_count + 1.0)
+ old_count*pow(old_mean - new_input, 2.0)/pow(old_count + 1.0, 3.0)
+ pow(new_input - new_mean, 2.0)/(old_count + 1.0));
}
void gctl::schmidt_orthogonal(const _1d_array &a, _1d_array &e, size_t a_s)
{
size_t t_s = a.size();
if (t_s != e.size())
{
throw std::runtime_error("Unequal array sizes. algebra<T>::schmidt_orthogonal(...)");
}
if (t_s%a_s != 0 || t_s <= 3)
{
throw std::runtime_error("Incompatible function setup. algebra<T>::schmidt_orthogonal(...)");
}
size_t len = t_s/a_s;
if (len < a_s)
{
throw std::runtime_error("The linear system is over-determined. algebra<T>::schmidt_orthogonal(...)");
}
double ae, ee;
for (size_t i = 0; i < a_s; i++)
{
for (size_t l = 0; l < len; l++)
{
e[l + i*len] = a[l + i*len];
}
for (size_t m = 0; m < i; m++)
{
ae = ee = 0.0;
for (size_t n = 0; n < len; n++)
{
ae += a[n + i*len] * e[n + m*len];
ee += e[n + m*len] * e[n + m*len];
}
for (size_t n = 0; n < len; n++)
{
e[n + i*len] -= e[n + m*len] * ae/ee;
}
}
}
for (size_t i = 0; i < a_s; i++)
{
ee = 0.0;
for (size_t l = 0; l < len; l++)
{
ee += e[l + i*len] * e[l + i*len];
}
ee = sqrt(ee);
for (size_t l = 0; l < len; l++)
{
e[l + i*len] /= ee;
}
}
return;
}
bool gctl::vecvalid(const _1d_array &a)
{
bool valid = true;
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
if (a[i] != a[i]) valid = false;
}
return valid;
}
bool gctl::vecvalid(const _1cd_array &a)
{
bool valid = true;
size_t i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < a.size(); i++)
{
if (a[i] != a[i]) valid = false;
}
return valid;
}