initial upload

This commit is contained in:
张壹 2024-09-10 20:04:47 +08:00
parent 12b778cd3e
commit b4b4275576
40 changed files with 8751 additions and 35 deletions

37
.gitignore vendored
View File

@ -1,34 +1,3 @@
# ---> C++
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
.DS_Store
build/
.vscode/

28
CMakeLists.txt Normal file
View File

@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.15.2)
#
project(GCTL_OPTIMIZATION VERSION 1.0)
#
include(CMakePackageConfigHelpers)
message(STATUS "Platform: " ${CMAKE_HOST_SYSTEM_NAME})
message(STATUS "Install prefix: " ${CMAKE_INSTALL_PREFIX})
message(STATUS "Processor: " ${CMAKE_HOST_SYSTEM_PROCESSOR})
option(GCTL_OPTIMIZATION_TOML "Use the TOML library" ON)
message(STATUS "[GCTL_OPTIMIZATION] Use the TOML library: " ${GCTL_OPTIMIZATION_TOML})
find_package(GCTL REQUIRED)
include_directories(${GCTL_INC_DIR})
# cmake
configure_file(
"${PROJECT_SOURCE_DIR}/config.h.in"
"${PROJECT_SOURCE_DIR}/lib/optimization/gctl_optimization_config.h"
)
#
add_subdirectory(lib)
#
add_subdirectory(example)

View File

@ -0,0 +1,25 @@
@PACKAGE_INIT@
set(@PROJECT_NAME@_VERSION "@PROJECT_VERSION@")
set_and_check(@PROJECT_NAME@_INSTALL_PREFIX "${PACKAGE_PREFIX_DIR}")
set_and_check(@PROJECT_NAME@_INC_DIR "${PACKAGE_PREFIX_DIR}/include")
set_and_check(@PROJECT_NAME@_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")
set_and_check(@PROJECT_NAME@_LIB_DIR "${PACKAGE_PREFIX_DIR}/lib")
set_and_check(@PROJECT_NAME@_LIBRARY_DIR "${PACKAGE_PREFIX_DIR}/lib")
set(@PROJECT_NAME@_LIB gctl_optimization)
set(@PROJECT_NAME@_LIBRARY gctl_optimization)
set(@PROJECT_NAME@_TOML @GCTL_OPTIMIZATION_TOML@)
message(STATUS "[GCTL_OPTIMIZATION] Use the TOML library: " @GCTL_OPTIMIZATION_TOML@)
if(NOT GCTL_FOUND)
find_package(GCTL REQUIRED)
include_directories(${GCTL_INC_DIR})
endif()
# include target information
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
check_required_components(@PROJECT_NAME@)

View File

@ -1,2 +1,17 @@
# gctl_optimization
## lcg_solver 共轭梯度求解器
### 求解器参数设置
#### 1. 从toml文件读取参数
用户可以从toml文件中读取并设置求解器参数。所有的参数都定义在名为lcg的顶级表格下可设置的参数及类型如下所示
```toml
[lcg]
max_iterations=<int>
epsilon=<float>
abs_diff=0|1
restart_epsilon=<float>
step=<float>
sigma=<float>
beta=<float>
maxi_m=<int>
```

1
config.h.in Normal file
View File

@ -0,0 +1 @@
#cmakedefine GCTL_OPTIMIZATION_TOML

20
example/CMakeLists.txt Normal file
View File

@ -0,0 +1,20 @@
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin/examples)
macro(add_example name switch)
if(${switch})
add_executable(${name} ${name}.cpp)
set_target_properties(${name} PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON)
target_link_libraries(${name} PRIVATE ${GCTL_LIB})
target_link_libraries(${name} PRIVATE gctl_optimization)
endif()
endmacro()
add_example(ex1 ON)
add_example(ex2 ON)
add_example(ex3 ON)
add_example(ex4 ON)
add_example(ex5 ON)
add_example(ex6 ON)
add_example(ex7 ON)
add_example(ex8 ON)

164
example/ex1.cpp Normal file
View File

@ -0,0 +1,164 @@
/********************************************************
*
*
*
*
*
*
* Generic Scientific Template Library
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* The GSTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License (LGPL) along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GSTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GSTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "../lib/optimization.h"
#define M 1000
#define N 800
double max_diff(const gctl::array<double> &a, const gctl::array<double> &b)
{
double max = -1.0;
for (size_t i = 0; i < a.size(); i++)
{
max = std::max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
}
return max;
}
class ex1 : public gctl::lcg_solver
{
public:
ex1();
virtual ~ex1();
// 计算共轭梯度的B项
void cal_partb(const gctl::array<double> &x, gctl::array<double> &B);
//定义共轭梯度中Ax的算法
virtual void LCG_Ax(const gctl::array<double> &x, gctl::array<double> &ax);
virtual void LCG_Mx(const gctl::array<double> &x, gctl::array<double> &mx);
private:
gctl::matrix<double> kernel; // 普通二维数组做核矩阵
gctl::array<double> tmp_arr; // 中间结果数组
gctl::array<double> p; // 预优矩阵
};
ex1::ex1()
{
kernel.resize(M, N);
kernel.random(-1.0, 1.0, gctl::RdUniform);
tmp_arr.resize(M);
p.resize(N);
for (size_t i = 0; i < N; i++)
{
p[i] = 1.0;
}
double diag;
for (size_t i = 0; i < N; i++)
{
diag = 0.0;
for (size_t j = 0; j < M; j++)
{
diag += kernel[j][i]*kernel[j][i];
}
p[i] = 1.0/diag;
}
}
ex1::~ex1(){}
void ex1::cal_partb(const gctl::array<double> &x, gctl::array<double> &B)
{
LCG_Ax(x, B);
return;
}
void ex1::LCG_Ax(const gctl::array<double> &x, gctl::array<double> &ax)
{
matvec(tmp_arr, kernel, x);
matvec(ax, kernel, tmp_arr, gctl::Trans);
return;
}
void ex1::LCG_Mx(const gctl::array<double> &x, gctl::array<double> &mx)
{
vecmul(mx, p, x);
return;
}
int main(int argc, char const *argv[])
{
// 生成一组正演解
gctl::array<double> fm(N);
gctl::random(fm, 1.0, 2.0, gctl::RdUniform);
ex1 test;
// 计算共轭梯度B项
gctl::array<double> B(N);
test.cal_partb(fm, B);
// 声明一组解
gctl::array<double> m(N, 0.0);
test.set_lcg_message(gctl::LCG_SOLUTION);
std::ofstream ofile("log.txt");
test.LCG_Minimize(m, B, gctl::LCG_CG, ofile);
ofile << "maximal difference: " << max_diff(fm, m) << std::endl;
m.assign_all(0.0);
test.LCG_Minimize(m, B, gctl::LCG_PCG, ofile);
ofile << "maximal difference: " << max_diff(fm, m) << std::endl;
m.assign_all(0.0);
test.LCG_Minimize(m, B, gctl::LCG_CGS, ofile);
ofile << "maximal difference: " << max_diff(fm, m) << std::endl;
ofile.close();
test.set_lcg_message(gctl::LCG_SOLUTION);
m.assign_all(0.0);
test.LCG_Minimize(m, B, gctl::LCG_BICGSTAB);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
m.assign_all(0.0);
test.LCG_Minimize(m, B, gctl::LCG_BICGSTAB2);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
gctl::array<double> low(N, 1.0);
gctl::array<double> hig(N, 2.0);
m.assign_all(0.0);
test.LCG_MinimizeConstrained(m, B, low, hig, gctl::LCG_PG);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
m.assign_all(0.0);
test.LCG_MinimizeConstrained(m, B, low, hig, gctl::LCG_SPG);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
return 0;
}

175
example/ex2.cpp Normal file
View File

@ -0,0 +1,175 @@
/********************************************************
*
*
*
*
*
*
* Generic Scientific Template Library
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* The GSTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License (LGPL) along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GSTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GSTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "../lib/optimization.h"
#define M 100
#define N 90
// get random floating points
double random_double(double l, double t)
{
return (t-l)*rand()*1.0/RAND_MAX + l;
}
// get random integral numbers
int random_int(int small, int big)
{
return (rand() % (big - small)) + small;
}
class ex2 : public gctl::lgd_solver
{
protected:
gctl::matrix<double> kernel;
gctl::array<double> obs, tmp;
protected:
double LGD_Evaluate(const gctl::array<double> &x, gctl::array<double> &g);
public:
ex2();
virtual ~ex2(){}
void CalObs(const gctl::array<double> &x);
};
ex2::ex2()
{
kernel.resize(M, N);
tmp.resize(M);
obs.resize(M);
srand(time(0));
// 添加一些大数
int tmp_id, tmp_size;
double tmp_val;
for (int i = 0; i < M; i++)
{
tmp_size = random_int(25, 35);
for (int j = 0; j < tmp_size; j++)
{
tmp_id = random_int(0, N);
tmp_val = random_double(-10, 10);
kernel[i][tmp_id] = tmp_val;
}
}
}
double ex2::LGD_Evaluate(const gctl::array<double> &x, gctl::array<double> &g)
{
for (int i = 0; i < M; i++)
{
tmp[i] = 0.0;
for (int j = 0; j < N; j++)
{
tmp[i] += kernel[i][j] * x[j];
}
tmp[i] -= obs[i];
//tmp[i] /= 1e-1;
}
for (int j = 0; j < N; j++)
{
g[j] = 0.0;
for (int i = 0; i < M; i++)
{
g[j] += kernel[i][j]*tmp[i];
}
g[j] *= 2.0/M;
}
double sum = 0.0;
for (int i = 0; i < M; i++)
{
sum += tmp[i]*tmp[i];
}
return sum/M;
}
void ex2::CalObs(const gctl::array<double> &x)
{
// 计算正演值
for (int i = 0; i < M; i++)
{
obs[i] = 0.0;
for (int j = 0; j < N; j++)
{
obs[i] += kernel[i][j]*x[j];
}
// 添加噪声
obs[i] += random_double(-1e-3, 1e-3);
}
}
int main(int argc, char const *argv[])
{
gctl::array<double> m(N, 0.0), mean_m(N, 0.0), stddev_m(N, 0.0), low(N), hig(N);
// 生成一组正演解 包含一些大值和一些小值
gctl::array<double> fm(N);
int N2 = (int) N/2;
for (int i = 0; i < N2; i++)
{
//fm[i] = random_double(5, 10);
fm[i] = 10.0;
}
for (int i = N2; i < N; i++)
{
//fm[i] = random_double(1, 2);
fm[i] = 1.0;
}
for (int i = 0; i < N2; i++)
{
low[i] = 9.0; // 对解的范围进行约束
hig[i] = 11.0;
}
for (int i = N2; i < N; i++)
{
low[i] = 0.0;
hig[i] = 2.0;
}
ex2 e;
e.CalObs(fm);
gctl::lgd_para my_para = e.default_lgd_para();
my_para.flight_times = 20000;
my_para.batch = 100;
e.set_lgd_para(my_para);
e.LGD_Minimize(m, mean_m, stddev_m, low, hig);
for (int i = 0; i < N; i++)
{
std::cout << fm[i] << " " << m[i] << " " << mean_m[i] << " " << stddev_m[i] << " " << fabs(mean_m[i] - fm[i]) << std::endl;
}
return 0;
}

69
example/ex3.cpp Normal file
View File

@ -0,0 +1,69 @@
#include "../lib/optimization.h"
class TEST_FUNC : public gctl::lbfgs_solver
{
public:
TEST_FUNC();
~TEST_FUNC();
virtual double LBFGS_Evaluate(const gctl::array<double> &x, gctl::array<double> &g);
void Routine();
private:
gctl::array<double> m_x;
};
TEST_FUNC::TEST_FUNC()
{
m_x.resize(3, 0.0);
}
TEST_FUNC::~TEST_FUNC(){}
// test functions
// 3 = 3*x1 + x2 + 2*x3*x3
// 1 = -3*x1 + 5*x2*x2 + 2*x1*x3
// -12 = 25*x1*x2 + 20*x3
double TEST_FUNC::LBFGS_Evaluate(const gctl::array<double> &x, gctl::array<double> &g)
{
double f0,f1,f2,temp;
f0 = 3*x[0] + x[1] + 2*x[2]*x[2] - 3.012; //这里添加一点噪声
f1 = -3*x[0] + 5*x[1]*x[1] + 2*x[0]*x[2] - 1.04252;
f2 = 25*x[0]*x[1] + 20*x[2] + 12.12479;
temp = sqrt(f0*f0+f1*f1+f2*f2);
g[0] = 0.5*(6*f0+2*f1*(2*x[2]-3)+50*f2*x[1])/temp;
g[1] = 0.5*(2*f0+20*f1*x[1]+50*f2*x[0])/temp;
g[2] = 0.5*(8*f0*x[2]+4*f1*x[0]+40*f2)/temp;
return temp;
}
void TEST_FUNC::Routine()
{
gctl::lbfgs_para self_para = default_lbfgs_para();
self_para.m = 10;
self_para.past = 5;
self_para.residual = 1e-10;
//self_para.min_step = 1e-30;
//self_para.max_linesearch = 40;
//self_para.linesearch = gctl::LBFGS_LINESEARCH_BACKTRACKING_WOLFE;
set_lbfgs_para(self_para);
std::ofstream ofile("log.txt");
show_lbfgs_para(ofile);
double fx = LBFGS_Minimize(m_x, ofile);
ofile.close();
m_x.show();
return;
}
int main(int argc, char const *argv[])
{
TEST_FUNC test;
test.Routine();
return 0;
}

91
example/ex4.cpp Normal file
View File

@ -0,0 +1,91 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "gctl/core.h"
#include "gctl/algorithm.h"
#include "../lib/optimization.h"
#include "iostream"
#include "iomanip"
using std::cout;
using std::endl;
using std::setw;
int main(int argc, char const *argv[])
{
gctl::matrix<double> A(4, 3);
for (int i = 0; i < A.row_size(); i++)
{
for (int j = 0; j < A.col_size(); j++)
{
A[i][j] = 3*(i+1) + j - 2;
}
}
A[3][1] = 1;
cout<<"A(" << A.row_size() << ", " << A.col_size() << ") = " <<endl;
A.show();
gctl::svd svdd;
svdd.decompose(A);
cout<<"U(" << svdd.U.row_size() << ", " << svdd.U.col_size() << ") = " <<endl;
svdd.U.show();
cout<<"S(" << svdd.S.size() << ") = " << endl;
svdd.S.show();
cout<<"V(" << svdd.V.row_size() << ", " << svdd.V.col_size() << ") = " <<endl;
svdd.V.show();
int sig_num = svdd.get_singular_number();
double tmp_d;
gctl::array<double> tmp(sig_num);
cout<<"U^T * S * V(" << A.row_size() << ", " << A.col_size() << ") = " <<endl;
for(int i=0;i<A.row_size();i++)
{
for (int k = 0; k < sig_num; k++)
{
tmp[k] = svdd.U[k][i] * svdd.S[k];
}
for(int j=0;j<A.col_size();j++)
{
tmp_d = 0.0;
for (int k = 0; k < sig_num; k++)
{
tmp_d += tmp[k] * svdd.V[k][j];
}
cout<<setw(12)<<tmp_d<<' ';
}
cout<<endl;
}
cout<<endl;
return 0;
}

78
example/ex5.cpp Normal file
View File

@ -0,0 +1,78 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "gctl/core.h"
#include "gctl/algorithm.h"
#include "../lib/optimization.h"
int main(int argc, char const *argv[])
{
gctl::matrix<double> A(5, 5);
for (int i = 0; i < 5; i++)
{
for (int j = 0; j < 5; j++)
{
A[i][j] = 3*(i+1) + j - 2;
}
}
// 注意A要满秩
A[1][2] = 3.4;
A[4][1] = 2.1;
A[3][4] = 9.7;
A[2][3] = 2.7;
std::cout<<"A(5, 5) = " <<std::endl;
for(int i=0;i<5;i++){
for(int j=0;j<5;j++){
std::cout<<A[i][j]<<' ';
}
std::cout<<std::endl;
}
std::cout<<std::endl;
gctl::array<double> m(5, 0.5), x(5, 0.0);
gctl::array<double> B(5);
for (int i = 0; i < 5; i++)
{
B[i] = 0.0;
for (int j = 0; j < 5; j++)
{
B[i] += A[i][j] * m[j];
}
}
gctl::lu glu(A);
glu.decompose();
glu.solve(B, x);
for (size_t i = 0; i < 5; i++)
{
std::cout << m[i] << " " << x[i] << std::endl;
}
return 0;
}

90
example/ex6.cpp Normal file
View File

@ -0,0 +1,90 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "gctl/core.h"
#include "gctl/algorithm.h"
#include "../lib/optimization.h"
// get random floating points
double random_double(double l, double t)
{
return (t-l)*rand()*1.0/RAND_MAX + l;
}
int main(int argc, char const *argv[])
{
srand(time(0));
gctl::matrix<double> A(5, 5);
for (int i = 0; i < 5; i++)
{
for (int j = i; j < 5; j++)
{
if (i == j) A[i][j] = random_double(1.0, 3.0);
else A[i][j] = random_double(0.1, 1.0);
}
}
for (int i = 0; i < 5; i++)
{
for (int j = i; j < 5; j++)
{
A[j][i] = A[i][j];
}
}
std::cout<<"A(5, 5) = " <<std::endl;
for(int i=0;i<5;i++){
for(int j=0;j<5;j++){
std::cout<<A[i][j]<<' ';
}
std::cout<<std::endl;
}
std::cout<<std::endl;
gctl::array<double> m(5, 0.5), x(5, 0.0);
gctl::array<double> B(5);
for (int i = 0; i < 5; i++)
{
B[i] = 0.0;
for (int j = 0; j < 5; j++)
{
B[i] += A[i][j] * m[j];
}
}
gctl::cholesky gck(A);
gck.decompose();
gck.solve(B, x);
for (size_t i = 0; i < 5; i++)
{
std::cout << m[i] << " " << x[i] << std::endl;
}
return 0;
}

124
example/ex7.cpp Normal file
View File

@ -0,0 +1,124 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "gctl/core.h"
#include "gctl/algorithm.h"
#include "../lib/optimization.h"
typedef gctl::array<std::complex<double>> cd_array;
#define N 1000
double max_diff(const cd_array &a, const cd_array &b)
{
double max = -1;
std::complex<double> t;
for (size_t i = 0; i < a.size(); i++)
{
t = a[i] - b[i];
max = std::max(std::norm(t), max);
}
return max;
}
class ex7 : public gctl::clcg_solver
{
public:
ex7();
virtual ~ex7();
virtual void CLCG_Ax(const cd_array &x, cd_array &ax, gctl::matrix_layout_e layout, gctl::conjugate_type_e conj);
// 计算共轭梯度的B项
void cal_partb(const cd_array &x, cd_array &B);
private:
gctl::matrix<std::complex<double>> kernel; // 普通二维数组做核矩阵
};
ex7::ex7()
{
gctl::array<double> tmp(round(0.5*(N+1)*N));
gctl::random(tmp, 1.0, 2.0, gctl::RdUniform);
size_t c = 0;
kernel.resize(N, N);
for (int i = 0; i < N; i++)
{
for (int j = i; j < N; j++)
{
kernel[i][j] = tmp[c];
kernel[j][i] = kernel[i][j];
c++;
}
}
}
ex7::~ex7(){}
void ex7::cal_partb(const cd_array &x, cd_array &B)
{
gctl::matvec(B, kernel, x);
return;
}
void ex7::CLCG_Ax(const cd_array &x, cd_array &ax, gctl::matrix_layout_e layout, gctl::conjugate_type_e conj)
{
gctl::matvec(ax, kernel, x, layout, conj);
return;
}
int main(int argc, char const *argv[])
{
// 生成一组正演解
gctl::array<double> tmp(2*N);
gctl::random(tmp, 1.0, 2.0, gctl::RdUniform);
cd_array fm(N);
for (size_t i = 0; i < N; i++)
{
fm[i].real(tmp[2*i]);
fm[i].imag(tmp[2*i + 1]);
}
ex7 test;
// 计算共轭梯度B项
cd_array B(N);
test.cal_partb(fm, B);
// 声明一组解
cd_array m(N, std::complex<double>(0.0, 0.0));
gctl::clcg_para my_para = test.default_clcg_para();
my_para.abs_diff = 1;
test.set_clcg_para(my_para);
test.CLCG_Minimize(m, B, gctl::CLCG_BICG_SYM);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
return 0;
}

233
example/ex8.cpp Normal file
View File

@ -0,0 +1,233 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "gctl/core.h"
#include "gctl/algorithm.h"
#include "../lib/optimization.h"
#define M 1000
#define N 900
// get random floating points
double random_double(double l, double t)
{
return (t-l)*rand()*1.0/RAND_MAX + l;
}
// get random integral numbers
int random_int(int small, int big)
{
return (rand() % (big - small)) + small;
}
double max_diff(const gctl::_1d_array &a, const gctl::_1d_array &b)
{
double max = -1.0;
for (size_t i = 0; i < a.size(); i++)
{
max = std::max(fabs(a[i] - b[i]), max);
}
return max;
}
class ex8 : public gctl::lbfgs_solver, public gctl::grad_norm
{
public:
ex8();
virtual ~ex8();
virtual double LBFGS_Evaluate(const gctl::_1d_array &x, gctl::_1d_array &g);
virtual int LBFGS_Progress(const gctl::_1d_array &x, const gctl::_1d_array &g, const double fx,
const double converge, const double rate, const gctl::lbfgs_para param, int k, int ls, std::ostream &ss);
void CalTarget(const gctl::_1d_array &x);
private:
gctl::_1d_array obs1, obs2, obs3, tmp, grad;
gctl::_2d_matrix k1, k2, k3;
};
ex8::ex8()
{
srand(time(0));
tmp.resize(M);
grad.resize(N);
k1.resize(M, N);
obs1.resize(M);
// 添加一些大数
int tmp_id, tmp_size;
double tmp_val;
for (int i = 0; i < M; i++)
{
tmp_size = random_int(25, 35);
for (int j = 0; j < tmp_size; j++)
{
tmp_id = random_int(0, N);
tmp_val = random_double(-1.0, 1.0);
k1[i][tmp_id] = tmp_val;
}
}
k2.resize(M, N);
obs2.resize(M);
// 添加一些大数
for (int i = 0; i < M; i++)
{
tmp_size = random_int(25, 35);
for (int j = 0; j < tmp_size; j++)
{
tmp_id = random_int(0, N);
tmp_val = random_double(-200.0, 200.0);
k2[i][tmp_id] = tmp_val;
}
}
k3.resize(M, N);
obs3.resize(M);
// 添加一些大数
for (int i = 0; i < M; i++)
{
tmp_size = random_int(25, 35);
for (int j = 0; j < tmp_size; j++)
{
tmp_id = random_int(0, N);
tmp_val = random_double(-0.01, 0.01);
k3[i][tmp_id] = tmp_val;
}
}
}
ex8::~ex8(){}
double ex8::LBFGS_Evaluate(const gctl::_1d_array &x, gctl::_1d_array &g)
{
gctl::matvec(tmp, k1, x);
tmp -= obs1;
gctl::matvec(grad, k1, tmp, gctl::Trans);
gctl::scale(grad, 2.0/M);
AddSingleLoss(gctl::power2(gctl::module(tmp, gctl::L2))/M, grad);
gctl::matvec(tmp, k2, x);
tmp -= obs2;
gctl::matvec(grad, k2, tmp, gctl::Trans);
gctl::scale(grad, 2.0/M);
AddSingleLoss(gctl::power2(gctl::module(tmp, gctl::L2))/M, grad);
gctl::matvec(tmp, k3, x);
tmp -= obs3;
gctl::matvec(grad, k3, tmp, gctl::Trans);
gctl::scale(grad, 2.0/M);
AddSingleLoss(gctl::power2(gctl::module(tmp, gctl::L2))/M, grad);
return GradNormLoss(g);
}
int ex8::LBFGS_Progress(const gctl::_1d_array &x, const gctl::_1d_array &g, const double fx,
const double converge, const double rate, const gctl::lbfgs_para param, int k, int ls, std::ostream &ss)
{
UpdateWeights();
return gctl::lbfgs_solver::LBFGS_Progress(x, g, fx, converge, rate, param, k, ls, ss);
}
void ex8::CalTarget(const gctl::_1d_array &x)
{
// 计算正演值
gctl::matvec(obs1, k1, x);
for (int i = 0; i < M; i++)
{
// 添加噪声
obs1[i] += random_double(-1e-3, 1e-3);
}
gctl::matvec(obs2, k2, x);
for (int i = 0; i < M; i++)
{
// 添加噪声
obs2[i] += random_double(-1e-3, 1e-3);
}
gctl::matvec(obs3, k3, x);
for (int i = 0; i < M; i++)
{
// 添加噪声
obs3[i] += random_double(-1e-3, 1e-3);
}
return;
}
int main(int argc, char const *argv[])
{
// 生成一组正演解
gctl::_1d_array fm(N);
random(fm, 1.0, 2.0, gctl::RdUniform);
ex8 test;
// 计算拟合目标项
test.CalTarget(fm);
// 声明一组解
gctl::_1d_array m(N, 0.0);
gctl::lbfgs_para self_para = test.default_lbfgs_para();
self_para.linesearch = gctl::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
self_para.epsilon = 1e-6;
test.set_lbfgs_para(self_para);
test.show_lbfgs_para();
test.InitGradNorm(3, N);
test.set_control_weight(1.0);
test.set_weight_step(0.00001);
double fx = test.LBFGS_Minimize(m);
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
gctl::_1d_array records;
test.get_records(records);
for (size_t i = 0; i < records.size(); i++)
{
if ((i+1)%3 == 0)
{
std::cout << records[i] << "\n";
}
else std::cout << records[i] << " ";
}
return 0;
}

54
installer Executable file
View File

@ -0,0 +1,54 @@
#!/bin/bash
if [[ $# == 0 || ${1} == "help" ]]; then
echo "Compiles executables/libraries and maintains installed files. Two tools 'Cmake' and 'stow' are empolyed here. For more information, see https://cmake.org and https://www.gnu.org/software/stow/."
echo ""
echo "School of Earth Sciences, Zhejiang University"
echo "Yi Zhang (yizhang-geo@zju.edu.cn)"
echo ""
echo "Usage: ./installer [option] [Cmake options]"
echo ""
echo "Options:"
echo "(1) configure: Configure Cmake project(s). This option could take extra Cmake options as in <option>=<value>."
echo "(2) build: Build executables/libraries."
echo "(3) install: Install executables/libraries to the directory of CMAKE_INSTALL_PREFIX and sym-links them to the target address. This offers a quick and clean remove of the installed files."
echo "(4) clean: Clean build/ folder(s)."
echo "(5) uninstall: Delete the installed files and sym-links."
echo "(6) info: Print out current setups."
echo "(7) help: Show help information."
exit 0
fi
package=gctl_optimization
address=/opt/stow
taress=/usr/local
option="-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${address}/${package}"
if [[ $# -gt 1 ]]; then
for opt in "$@"; do
if [[ ${opt} != "configure" ]]; then
option="${option} -D${opt}"
fi
done
fi
if [[ ${1} == "configure" && ! -d "build/" ]]; then
mkdir build && cd build && cmake .. ${option}
elif [[ ${1} == "configure" ]]; then
cd build && rm -rf * && cmake .. ${option}
elif [[ ${1} == "build" ]]; then
cd build && make
elif [[ ${1} == "install" ]]; then
cd build && sudo make install
sudo stow --dir=${address} --target=${taress} -S ${package}
elif [[ ${1} == "clean" ]]; then
rm -rf build/
elif [[ ${1} == "uninstall" ]]; then
sudo stow --dir=${address} --target=${taress} -D ${package}
sudo rm -rf ${address}/${package}
elif [[ ${1} == "info" ]]; then
echo "package name:" ${package}
echo "stow address:" ${address}
echo "target address:" ${taress}
echo "Cmake options:" ${option}
fi

64
lib/CMakeLists.txt Normal file
View File

@ -0,0 +1,64 @@
#
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
#
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
#
aux_source_directory(optimization/ GCTL_OPTIMAL_SRC)
#
#
# libcmake
add_library(gctl_optimization SHARED ${GCTL_OPTIMAL_SRC})
#
add_library(gctl_optimization_static STATIC ${GCTL_OPTIMAL_SRC})
#
set_target_properties(gctl_optimization_static PROPERTIES OUTPUT_NAME "gctl_optimization")
#
set_target_properties(gctl_optimization PROPERTIES CLEAN_DIRECT_OUTPUT 1)
set_target_properties(gctl_optimization_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
#
set_target_properties(gctl_optimization PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR})
#
set_target_properties(gctl_optimization PROPERTIES INSTALL_RPATH /usr/local/lib)
set_target_properties(gctl_optimization_static PROPERTIES INSTALL_RPATH /usr/local/lib)
set_target_properties(gctl_optimization PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON)
set_target_properties(gctl_optimization_static PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON)
#
target_link_libraries(gctl_optimization PUBLIC ${GCTL_LIB})
target_link_libraries(gctl_optimization_static ${GCTL_LIB})
set(CONFIG_FILE_PATH lib/cmake/${PROJECT_NAME})
configure_package_config_file(${PROJECT_SOURCE_DIR}/${PROJECT_NAME}Config.cmake.in
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
INSTALL_DESTINATION ${CONFIG_FILE_PATH})
write_basic_package_version_file(${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
VERSION ${PROJECT_VERSION}
COMPATIBILITY SameMajorVersion)
#
if(WIN32)
install(TARGETS gctl_optimization DESTINATION lib)
install(TARGETS gctl_optimization_static DESTINATION lib)
else()
install(TARGETS gctl_optimization gctl_optimization_static
EXPORT ${PROJECT_NAME}Targets
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
install(EXPORT ${PROJECT_NAME}Targets
DESTINATION ${CONFIG_FILE_PATH})
install(FILES
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
DESTINATION ${CONFIG_FILE_PATH})
endif()
#
file(GLOB GCTL_HEAD *.h)
file(GLOB GCTL_OPTIMAL_HEAD optimization/*.h)
install(FILES ${GCTL_HEAD} DESTINATION include/gctl)
install(FILES ${GCTL_OPTIMAL_HEAD} DESTINATION include/gctl/optimization)

43
lib/optimization.h Normal file
View File

@ -0,0 +1,43 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_OPTIMIZATION_H
#define _GCTL_OPTIMIZATION_H
#include "optimization/loss_func.h"
#include "optimization/lu.h"
#include "optimization/cholesky.h"
#include "optimization/svd.h"
#include "optimization/lcg.h"
#include "optimization/clcg.h"
#include "optimization/lgd.h"
#include "optimization/lbfgs.h"
#include "optimization/sgd.h"
#include "optimization/gradnorm.h"
#include "optimization/dwa.h"
#endif // _GCTL_OPTIMIZATION_H

View File

@ -0,0 +1,127 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "cholesky.h"
// Constructor
gctl::cholesky::cholesky(matrix<double> &sourceMatrix) : decomposedMatrix(sourceMatrix)
{
if (sourceMatrix.empty() || sourceMatrix.row_size() != sourceMatrix.col_size())
{
throw domain_error("Invalid input matrix. From cholesky::cholesky(...)");
}
}
// Decomposition into triangular matrices
void gctl::cholesky::decompose()
{
// Enumerate matrix columnwise
for (int j = 0; j < decomposedMatrix.col_size(); j++)
{
for (int i = j; i < decomposedMatrix.row_size(); i++)
{
if (i == j)
{
double sum = 0.0;
for (int k = 0; k < i; k++)
{
sum += std::pow(decomposedMatrix[i][k], 2.0);
}
if (decomposedMatrix[i][j] - sum <= 0.0)
{
// Not positive definite matrix
throw runtime_error("The input matrix is not positively defined. From gctl::cholesky::decompose()");
return;
}
decomposedMatrix[i][j] = std::sqrt(decomposedMatrix[i][j] - sum);
}
else
{
double sum = 0.0;
for (int k = 0; k < j; k++)
{
sum += (decomposedMatrix[i][k] * decomposedMatrix[j][k]);
}
decomposedMatrix[i][j] = (1 / decomposedMatrix[j][j]) * (decomposedMatrix[i][j] - sum);
decomposedMatrix[j][i] = decomposedMatrix[i][j];
}
}
}
return;
}
// Solve for x in form Ax = b. A is the original input matrix.
void gctl::cholesky::solve(const array<double>& b, array<double> &x)
{
if (b.empty())
{
throw domain_error("Invalid target vector. From lu<double>::solve(...)");
}
x.resize(b.size());
// First solve lower triangular * x = b with forward substitution
for (int i = 0; i < b.size(); i++)
{
double sum = 0.0;
for (int j = 0; j < i; j++)
{
sum += (decomposedMatrix[i][j] * x[j]);
}
x[i] = (b[i] - sum) / decomposedMatrix[i][i];
}
// Now solve upper triangular (transpose of lower triangular) * x = x with back substitution.
// Note that x can be solved in place using the existing x vector. No need to allocate
// another vector.
for (int i = static_cast<int>(b.size()) - 1; i >= 0; i--)
{
double sum = 0.0;
for (int j = static_cast<int>(b.size()) - 1; j > i; j--)
{
sum += (decomposedMatrix[i][j] * x[j]);
}
x[i] = (x[i] - sum) / decomposedMatrix[i][i];
}
return;
}

View File

@ -0,0 +1,55 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _CHOLESKY_H
#define _CHOLESKY_H
#include "gctl/core.h"
namespace gctl
{
/**
* @brief Cholesky matrix decomposition to lower triangular matrix and its conjugate transpose
*
* @note Restricted to positive-definite matrices
*/
class cholesky
{
public:
cholesky(matrix<double> &sourceMatrix); // Matrix is decomposed in-place
virtual ~cholesky(){}
void decompose(); ///< Decomposition into triangular matrices.
void solve(const array<double>& b, array<double> &x); ///< Solve for x in form Ax = b. A is the original input matrix.
protected:
cholesky(const gctl::cholesky&) = delete;
void operator=(const gctl::cholesky&) = delete;
matrix<double> &decomposedMatrix;
};
}
#endif // _CHOLESKY_H

373
lib/optimization/clcg.cpp Normal file
View File

@ -0,0 +1,373 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "clcg.h"
/**
* Default parameter for conjugate gradient methods
*/
static const gctl::clcg_para clcg_defparam = {0, 1e-8, 0};
int gctl::clcg_solver::CLCG_Progress(const array<std::complex<double> > &m, const double converge, const clcg_para &param, size_t t)
{
if (converge <= param.epsilon)
{
std::clog << GCTL_CLEARLINE << "\rIteration-times: " << t << "\tconvergence: " << converge;
return 0;
}
if (clcg_inter_ > 0 && t%clcg_inter_ == 0)
{
std::clog << GCTL_CLEARLINE << "\rIteration-times: " << t << "\tconvergence: " << converge;
}
return 0;
}
gctl::clcg_solver::clcg_solver()
{
clcg_param_ = clcg_defparam;
clcg_inter_ = 1;
clcg_silent_ = false;
}
gctl::clcg_solver::~clcg_solver(){}
void gctl::clcg_solver::clcg_silent()
{
clcg_silent_ = true;
return;
}
void gctl::clcg_solver::set_clcg_report_interval(size_t inter)
{
clcg_inter_ = inter;
return;
}
void gctl::clcg_solver::set_clcg_para(const clcg_para &in_param)
{
clcg_param_ = in_param;
return;
}
void gctl::clcg_solver::set_clcg_para(const toml::value &toml_data)
{
clcg_param_ = clcg_defparam;
std::string CLCG = "clcg";
if (toml_data.contains(CLCG))
{
if (toml_data.at(CLCG).contains("max_iterations")) clcg_param_.max_iterations = toml::find<int>(toml_data, CLCG, "max_iterations");
if (toml_data.at(CLCG).contains("epsilon")) clcg_param_.epsilon = toml::find<double>(toml_data, CLCG, "epsilon");
if (toml_data.at(CLCG).contains("abs_diff")) clcg_param_.abs_diff = toml::find<int>(toml_data, CLCG, "abs_diff");
}
return;
}
void gctl::clcg_solver::clcg_error_str(clcg_return_code err_code, std::ostream &ss, bool err_throw)
{
#if defined _WINDOWS || __WIN32__
if (!er_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
ss << "Success! ";
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
ss << "Fail! ";
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[1m\033[32mCLCG Success! ";
else
ss << "\033[1m\033[31mCLCG Fail! ";
}
#endif
std::string err_str;
switch (err_code)
{
case CLCG_SUCCESS:
err_str = "Iteration reached convergence."; break;
case CLCG_STOP:
err_str = "Iteration is stopped by the progress evaluation function."; break;
case CLCG_ALREADY_OPTIMIZIED:
err_str = "The variables are already optimized."; break;
case CLCG_UNKNOWN_ERROR:
err_str = "Unknown error."; break;
case CLCG_INVILAD_VARIABLE_SIZE:
err_str = "The size of the variables is negative."; break;
case CLCG_INVILAD_MAX_ITERATIONS:
err_str = "The maximal iteration times is negative."; break;
case CLCG_INVILAD_EPSILON:
err_str = "The epsilon is not in the range (0, 1)."; break;
case CLCG_REACHED_MAX_ITERATIONS:
err_str = "The maximal iteration has been reached."; break;
case CLCG_NAN_VALUE:
err_str = "The model values are NaN."; break;
case CLCG_INVALID_POINTER:
err_str = "Invalid pointer."; break;
case CLCG_SIZE_NOT_MATCH:
err_str = "The sizes of the solution and target do not match."; break;
case CLCG_UNKNOWN_SOLVER:
err_str = "Unknown solver."; break;
default:
err_str = "Unknown error."; break;
}
if (err_throw && err_code < 0) throw std::runtime_error(err_str.c_str());
else ss << err_str;
#if defined _WINDOWS || __WIN32__
if (!er_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[0m" << std::endl;
else
ss << "\033[0m" << std::endl;
}
#endif
return;
}
gctl::clcg_para gctl::clcg_solver::default_clcg_para()
{
clcg_para dp = clcg_defparam;
return dp;
}
void gctl::clcg_solver::CLCG_Minimize(array<std::complex<double> > &m, const array<std::complex<double> > &B,
clcg_solver_type solver_id, std::ostream &ss, bool verbose, bool er_throw)
{
if (clcg_silent_)
{
clcg_return_code ret;
if (solver_id == CLCG_BICG) ret = clbicg(m, B);
else if (solver_id == CLCG_BICG_SYM) ret = clbicg_symmetric(m, B);
else if (solver_id == CLCG_CGS) ret = clcgs(m, B);
else if (solver_id == CLCG_BICGSTAB) ret = clbicgstab(m, B);
else if (solver_id == CLCG_TFQMR) ret = cltfqmr(m, B);
else throw std::invalid_argument("Invalid solver type. gctl::clcg_solver<T>::Minimize(...)");
if (ret < 0) clcg_error_str(ret, ss, true);
return;
}
#ifdef GCTL_OPENMP
double start = omp_get_wtime();
clcg_return_code ret;
if (solver_id == CLCG_BICG) ret = clbicg(m, B);
else if (solver_id == CLCG_BICG_SYM) ret = clbicg_symmetric(m, B);
else if (solver_id == CLCG_CGS) ret = clcgs(m, B);
else if (solver_id == CLCG_BICGSTAB) ret = clbicgstab(m, B);
else if (solver_id == CLCG_TFQMR) ret = cltfqmr(m, B);
else throw std::invalid_argument("Invalid solver type. gctl::clcg_solver<T>::Minimize(...)");
double end = omp_get_wtime();
double costime = 1000*(end-start);
#else
clock_t start = clock();
clcg_return_code ret;
if (solver_id == CLCG_BICG) ret = clbicg(m, B);
else if (solver_id == CLCG_BICG_SYM) ret = clbicg_symmetric(m, B);
else if (solver_id == CLCG_CGS) ret = clcgs(m, B);
else if (solver_id == CLCG_BICGSTAB) ret = clbicgstab(m, B);
else if (solver_id == CLCG_TFQMR) ret = cltfqmr(m, B);
else throw std::invalid_argument("Invalid solver type. gctl::clcg_solver<T>::Minimize(...)");
clock_t end = clock();
double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!er_throw)
{
ss << std::endl;
switch (solver_id)
{
case CLCG_BICG:
std::clog << "Solver: Bi-CG. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_BICG_SYM:
std::clog << "Solver: Bi-CG (symmetrically accelerated). Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_CGS:
std::clog << "Solver: CGS. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_BICGSTAB:
std::clog << "Solver: CGS. Times cost: " << costime << " ms" << std::endl;
break;
case CLCG_TFQMR:
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Times cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) clcg_error_str(ret, ss, er_throw);
else if (ret < 0) clcg_error_str(ret, ss, er_throw);
return;
}
gctl::clcg_return_code gctl::clcg_solver::clbicg(array<std::complex<double> > &m, const array<std::complex<double> > &B)
{
clcg_return_code ret;
return ret;
}
gctl::clcg_return_code gctl::clcg_solver::clbicg_symmetric(array<std::complex<double> > &m, const array<std::complex<double> > &B)
{
size_t n_size = B.size();
//check parameters
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
if (clcg_param_.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
if (clcg_param_.epsilon <= 0.0 || clcg_param_.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
r1k.resize(n_size);
d1k.resize(n_size);
Ax.resize(n_size);
CLCG_Ax(m, Ax, gctl::NoTrans, gctl::NoConj);
std::complex<double> one_z(1.0, 0.0);
vecdiff(r1k, B, Ax, one_z, one_z);
veccpy(d1k, r1k, one_z);
std::complex<double> rkrk = vecdot(r1k, r1k);
double r0_square, rk_square;
std::complex<double> r0_mod, rk_mod;
rk_mod = vecinner(r1k, r1k);
r0_square = rk_square = std::norm(rk_mod);
if (r0_square < 1.0) r0_square = 1.0;
clcg_return_code ret;
if (clcg_param_.abs_diff && sqrt(rk_square)/n_size <= clcg_param_.epsilon)
{
ret = CLCG_ALREADY_OPTIMIZIED;
CLCG_Progress(m, sqrt(rk_square)/n_size, clcg_param_, 0);
return ret;
}
else if (rk_square/r0_square <= clcg_param_.epsilon)
{
ret = CLCG_ALREADY_OPTIMIZIED;
CLCG_Progress(m, rk_square/r0_square, clcg_param_, 0);
return ret;
}
double residual;
std::complex<double> ak, rkrk2, betak, dkAx;
size_t t = 0;
while(1)
{
if (clcg_param_.abs_diff) residual = sqrt(rk_square)/n_size;
else residual = rk_square/r0_square;
if (CLCG_Progress(m, residual, clcg_param_, t))
{
ret = CLCG_STOP; return ret;
}
if (residual <= clcg_param_.epsilon)
{
ret = CLCG_CONVERGENCE; return ret;
}
if (clcg_param_.max_iterations > 0 && t+1 > clcg_param_.max_iterations)
{
ret = CLCG_REACHED_MAX_ITERATIONS;
break;
}
t++;
CLCG_Ax(d1k, Ax, gctl::NoTrans, gctl::NoConj);
dkAx = vecdot(d1k, Ax);
ak = rkrk/dkAx;
vecapp(m, d1k, ak);
vecsub(r1k, Ax, ak);
rk_mod = vecdot(r1k, r1k);
rk_square = std::norm(rk_mod);
if (!vecvalid(m))
{
ret = CLCG_NAN_VALUE; return ret;
}
rkrk2 = vecdot(r1k, r1k);
betak = rkrk2/rkrk;
rkrk = rkrk2;
vecadd(d1k, d1k, r1k, betak, one_z);
}
return ret;
}
gctl::clcg_return_code gctl::clcg_solver::clcgs(array<std::complex<double> > &m, const array<std::complex<double> > &B)
{
clcg_return_code ret;
return ret;
}
gctl::clcg_return_code gctl::clcg_solver::clbicgstab(array<std::complex<double> > &m, const array<std::complex<double> > &B)
{
clcg_return_code ret;
return ret;
}
gctl::clcg_return_code gctl::clcg_solver::cltfqmr(array<std::complex<double> > &m, const array<std::complex<double> > &B)
{
clcg_return_code ret;
return ret;
}

166
lib/optimization/clcg.h Normal file
View File

@ -0,0 +1,166 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_CLCG_H
#define _GCTL_CLCG_H
#include "gctl/core.h"
#include "gctl/maths.h"
#include "gctl/algorithm.h"
#include "gctl_optimization_config.h"
#ifdef GCTL_OPTIMIZATION_TOML
#include "toml.hpp"
#endif // GCTL_OPTIMIZATION_TOML
#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif // _WINDOWS || __WIN32__
namespace gctl
{
/**
* @brief Types of method that could be recognized by the clcg_solver() function.
*/
enum clcg_solver_type
{
/**
* Jacob's Bi-Conjugate Gradient Method
*/
CLCG_BICG,
/**
* Bi-Conjugate Gradient Method accelerated for complex symmetric A
*/
CLCG_BICG_SYM,
/**
* Conjugate Gradient Squared Method with real coefficients.
*/
CLCG_CGS,
/**
* Biconjugate gradient method.
*/
CLCG_BICGSTAB,
/**
* Transpose Free Quasi-Minimal Residual Method
*/
CLCG_TFQMR,
};
/**
* @brief return value of the clcg_solver() function
*/
enum clcg_return_code
{
CLCG_SUCCESS = 0, ///< The solver function terminated successfully.
CLCG_CONVERGENCE = 0, ///< The iteration reached convergence.
CLCG_STOP, ///< The iteration is stopped by the monitoring function.
CLCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
// A negative number means a error
CLCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
CLCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
CLCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
CLCG_INVILAD_EPSILON, ///< The epsilon is negative.
CLCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
CLCG_NAN_VALUE, ///< Nan value.
CLCG_INVALID_POINTER, ///< Invalid pointer.
CLCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
CLCG_UNKNOWN_SOLVER, ///< Unknown solver
};
/**
* @brief Parameters of the conjugate gradient methods.
*/
struct clcg_para
{
/**
* Maximal iteration times. The process will continue till the convergence is met
* if this option is set to zero (default).
*/
int max_iterations;
/**
* Epsilon for convergence test.
* This parameter determines the accuracy with which the solution is to be found.
* A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or sqrt(||g||)/N
* <= epsilon for the lcg_solver() function, where ||.|| denotes the Euclidean (L2) norm.
* The default value of epsilon is 1e-8. For box-constrained methods,the convergence test
* is implemented using ||P(m-g) - m|| <= epsilon, in which P is the projector that
* transfers m into the constrained domain.
*/
double epsilon;
/**
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
* The default value is false which means the gradient based evaluating method is used.
* The AMD based method will be used if this variable is set to true. This parameter is only
* applied to the non-constrained methods.
*/
int abs_diff;
};
class clcg_solver
{
private:
clcg_para clcg_param_;
size_t clcg_inter_;
bool clcg_silent_;
array<std::complex<double> > r1k, r2k, d1k, d2k;
array<std::complex<double> > Ax;
public:
clcg_solver();
virtual ~clcg_solver();
virtual void CLCG_Ax(const array<std::complex<double> > &x, array<std::complex<double> > &ax,
matrix_layout_e layout, conjugate_type_e conj) = 0;
virtual int CLCG_Progress(const array<std::complex<double> > &m, const double converge, const clcg_para &param, size_t t);
void clcg_silent();
void set_clcg_report_interval(size_t inter);
void set_clcg_para(const clcg_para &param);
void clcg_error_str(clcg_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
clcg_para default_clcg_para();
#ifdef GCTL_OPTIMIZATION_TOML
void set_clcg_para(const toml::value &toml_data);
#endif // GCTL_OPTIMIZATION_TOML
clcg_return_code clbicg(array<std::complex<double> > &m, const array<std::complex<double> > &B);
clcg_return_code clbicg_symmetric(array<std::complex<double> > &m, const array<std::complex<double> > &B);
clcg_return_code clcgs(array<std::complex<double> > &m, const array<std::complex<double> > &B);
clcg_return_code clbicgstab(array<std::complex<double> > &m, const array<std::complex<double> > &B);
clcg_return_code cltfqmr(array<std::complex<double> > &m, const array<std::complex<double> > &B);
void CLCG_Minimize(array<std::complex<double> > &m, const array<std::complex<double> > &B,
clcg_solver_type solver_id = CLCG_CGS, std::ostream &ss = std::clog,
bool verbose = true, bool er_throw = false);
};
}
#endif // _GCTL_CLCG_H

129
lib/optimization/dwa.cpp Normal file
View File

@ -0,0 +1,129 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "dwa.h"
gctl::dwa::dwa()
{
fx_c_ = 0;
l_ready_ = false;
}
gctl::dwa::~dwa(){}
void gctl::dwa::InitDWA(size_t num, size_t grad_num)
{
fx_n_ = num;
K_ = 1.0*num;
T_ = 1.0;
wgts_.resize(num, 1.0);
L_p1_.resize(num, 1.0);
L_p2_.resize(num, 1.0);
grad_.resize(grad_num, 0.0);
rcd_wgts_.push_back(wgts_);
return;
}
void gctl::dwa::AddSingleLoss(double fx, const array<double> &g)
{
multi_fx_ += wgts_[fx_c_]*fx;
L_p2_[fx_c_] = L_p1_[fx_c_];
L_p1_[fx_c_] = fx;
for (size_t i = 0; i < g.size(); i++)
{
grad_[i] += wgts_[fx_c_]*g[i];
}
fx_c_++;
return;
}
void gctl::dwa::UpdateWeights()
{
double sum = 0.0;
for (size_t i = 0; i < fx_n_; i++)
{
if (l_ready_) wgts_[i] = exp(L_p1_[i]/(L_p2_[i]*T_));
else wgts_[i] = 1.0;
sum += wgts_[i];
}
for (size_t i = 0; i < fx_n_; i++)
{
wgts_[i] *= K_/sum;
}
l_ready_ = true;
rcd_wgts_.push_back(wgts_);
return;
}
double gctl::dwa::DWALoss(array<double> &g)
{
if (fx_c_ != fx_n_)
{
throw std::runtime_error("Not enough loss functions evaluated. From gctl::dwa::UpdateWeights()");
}
double fx = multi_fx_;
g = grad_;
fx_c_ = 0;
multi_fx_ = 0.0;
grad_.assign_all(0.0);
return fx;
}
void gctl::dwa::set_control_temperature(double t)
{
T_ = t;
return;
}
void gctl::dwa::set_normal_sum(double k)
{
K_ = k;
return;
}
void gctl::dwa::get_records(array<double> &logs)
{
logs.resize(fx_n_*rcd_wgts_.size());
for (size_t i = 0; i < rcd_wgts_.size(); i++)
{
for (size_t j = 0; j < fx_n_; j++)
{
logs[i*fx_n_ + j] = rcd_wgts_[i][j];
}
}
return;
}

114
lib/optimization/dwa.h Normal file
View File

@ -0,0 +1,114 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_DWA_H
#define _GCTL_DWA_H
#include "gctl/core.h"
namespace gctl
{
/**
* @brief Lost balanced multitask evaluation.
*
* @note Reference: 2019. End-to-end multitask learning with attention.
*
*/
class dwa
{
private:
bool l_ready_;
size_t fx_c_, fx_n_;
double K_, T_, multi_fx_;
array<double> wgts_;
array<double> L_p1_, L_p2_;
array<double> grad_;
std::vector<array<double>> rcd_wgts_;
public:
dwa();
virtual ~dwa();
/**
* @brief Initiate the number of loss functions and size of the model gradients.
*
* @note This function must be called at first.
*
* @param num Number of the loss functions
* @param grad_num Size of the model gradients
*/
void InitDWA(size_t num, size_t grad_num);
/**
* @brief Add the value of a single loss function and the current model gradients.
*
* @param fx objective value
* @param g model gradients
*/
void AddSingleLoss(double fx, const array<double> &g);
/**
* @brief Get the merged objective value and the model gradients.
*
* @note All single loss functions must be added before calling this function. The merged objective value and the model gradients will be reset after the calling.
*
* @param g model gradients
*
* @return objective value
*/
double DWALoss(array<double> &g);
/**
* @brief Update weights for single loss functions using the DWA algorithm.
*
*/
void UpdateWeights();
/**
* @brief Set the cooling temperature. The bigger value is, the closer the weights will be to one. The default is 1.0.
*
* @param t Input temperature
*/
void set_control_temperature(double t);
/**
* @brief Set the normal sum of the weights. Ths default equals to function size.
*
* @param k Input sum
*/
void set_normal_sum(double k);
/**
* @brief Get the recorded weights. Size of the log equals the function size times iteration times.
*
* @param logs Output log
*/
void get_records(array<double> &logs);
};
}
#endif // _GCTL_DWA_H

View File

@ -0,0 +1 @@
#define GCTL_OPTIMIZATION_TOML

View File

@ -0,0 +1,328 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "gradnorm.h"
gctl::grad_norm::grad_norm()
{
fx_c_ = 0;
alpha_ = 1.0;
lamda_ = 0.001;
initialized_ = false;
}
gctl::grad_norm::~grad_norm(){}
void gctl::grad_norm::InitGradNorm(size_t num, size_t grad_num)
{
fx_n_ = num;
T_ = 1.0;
resi_T_ = 0.0;
fst_iter_.resize(num, true);
wgts_.resize(num, 1.0/num);
fx0_.resize(num, 0.0);
Gw_.resize(num, 0.0);
Gdw_.resize(num, 0.0);
Lx_.resize(num, 0.0);
grad_.resize(grad_num, 0.0);
rcd_fxs_.resize(num, 0.0);
fixed_wgts_.resize(num, -1.0);
rcd_wgts_.reserve(100000);
for (size_t i = 0; i < fx_n_; i++)
{
rcd_wgts_.push_back(wgts_[i]);
}
initialized_ = true;
return;
}
double gctl::grad_norm::AddSingleLoss(double fx, const array<double> &g)
{
if (fst_iter_[fx_c_])
{
fx0_[fx_c_] = fx;
fst_iter_[fx_c_] = false;
}
Lx_[fx_c_] = fx/fx0_[fx_c_];
double curr_fx = wgts_[fx_c_]*fx;
multi_fx_ += curr_fx;
rcd_fxs_[fx_c_] = fx;
double sum = 0.0;
for (size_t i = 0; i < g.size(); i++)
{
sum += g[i]*g[i];
grad_[i] += wgts_[fx_c_]*g[i];
}
Gw_[fx_c_] = sqrt(wgts_[fx_c_]*wgts_[fx_c_]*sum);
Gdw_[fx_c_] = sqrt(sum); // wgts_[fx_c_]*sum/Gw_[fx_c_]
fx_c_++;
return curr_fx;
}
void gctl::grad_norm::UpdateWeights()
{
double ac = 0;
double avg_Lx = 0.0, avg_Gw = 0.0;
resi_T_ = T_;
for (size_t i = 0; i < fx_n_; i++)
{
if (fixed_wgts_[i] < 0.0)
{
avg_Lx += Lx_[i];
avg_Gw += Gw_[i];
ac += 1.0;
}
else resi_T_ -= fixed_wgts_[i];
}
avg_Lx /= ac;
avg_Gw /= ac;
double r_i, sum = 0.0;
// L1 norm approach
for (size_t i = 0; i < fx_n_; i++)
{
if (fixed_wgts_[i] < 0.0)
{
r_i = Lx_[i]/avg_Lx;
if (Gw_[i] >= avg_Gw*pow(r_i, alpha_))
{
wgts_[i] -= lamda_*Gdw_[i];
}
else wgts_[i] += lamda_*Gdw_[i];
// make sure the weights are positive
wgts_[i] = std::max(wgts_[i], 1e-16);
sum += wgts_[i];
}
}
for (size_t i = 0; i < fx_n_; i++)
{
if (fixed_wgts_[i] < 0.0) wgts_[i] *= resi_T_/sum;
rcd_wgts_.push_back(wgts_[i]);
}
return;
}
void gctl::grad_norm::ShowStatistics(std::ostream &ss, bool one_line)
{
double s, t = 0.0;
if (one_line)
{
ss << "Wgts:";
for (size_t i = 0; i < fx_n_; i++)
{
ss << " " << wgts_[i];
}
ss << ", Loss:";
for (size_t i = 0; i < fx_n_; i++)
{
ss << " " << rcd_fxs_[i];
}
ss << ", WgtLoss:";
for (size_t i = 0; i < fx_n_; i++)
{
s = wgts_[i]*rcd_fxs_[i];
ss << " " << s;
t += s;
}
ss << ", Total: " << t << "\n";
return;
}
ss << "----------------------------\n";
ss << "GradNorm's Progress\n";
ss << "Tasks' weight: ";
for (size_t i = 0; i < fx_n_; i++)
{
ss << wgts_[i] << " | ";
}
ss << "\n";
ss << "Tasks' loss: ";
for (size_t i = 0; i < fx_n_; i++)
{
ss << rcd_fxs_[i] << " | ";
}
ss << "\n";
ss << "Weighted losses: ";
for (size_t i = 0; i < fx_n_; i++)
{
s = wgts_[i]*rcd_fxs_[i];
ss << s << " | ";
t += s;
}
ss << t << " (total) |\n";
ss << "----------------------------\n";
return;
}
double gctl::grad_norm::GradNormLoss(array<double> &g)
{
if (fx_c_ != fx_n_)
{
throw std::runtime_error("Not all loss functions evaluated. From gctl::grad_norm::GradNormLoss()");
}
if (!initialized_)
{
throw std::runtime_error("GradNorm is not initialized. From gctl::grad_norm::GradNormLoss()");
}
double fx = multi_fx_;
g = grad_;
fx_c_ = 0;
multi_fx_ = 0.0;
grad_.assign_all(0.0);
return fx;
}
void gctl::grad_norm::set_control_weight(double a)
{
alpha_ = a;
return;
}
void gctl::grad_norm::set_normal_sum(double t)
{
T_ = t;
return;
}
void gctl::grad_norm::set_weight_step(double l)
{
lamda_ = l;
return;
}
void gctl::grad_norm::set_fixed_weight(int id, double wgt)
{
if (id < 0 || id >= fx_n_)
{
throw std::runtime_error("Invalid loss function's index. From gctl::grad_norm::set_fixed_weight(...)");
}
if (wgt <= 0.0 || wgt >= T_)
{
throw std::runtime_error("Invalid fixed weight value. From gctl::grad_norm::set_fixed_weight(...)");
}
fixed_wgts_[id] = wgt;
wgts_[id] = wgt;
resi_T_ = T_;
double ac = 0.0;
for (size_t i = 0; i < fx_n_; i++)
{
if (fixed_wgts_[i] > 0.0) resi_T_ -= fixed_wgts_[i];
else ac += 1.0;
}
if (resi_T_ <= 0.0)
{
throw std::runtime_error("Invalid tasks' weight detected. From gctl::grad_norm::UpdateWeights()");
}
for (size_t i = 0; i < fx_n_; i++)
{
if (fixed_wgts_[i] < 0.0) wgts_[i] = resi_T_/ac;
}
for (size_t i = 0; i < fx_n_; i++)
{
rcd_wgts_[i] = wgts_[i];
}
return;
}
void gctl::grad_norm::set_initial_weights(const array<double> &w)
{
if (w.size() != fx_n_)
{
throw std::runtime_error("Invalid input array size. From gctl::grad_norm::set_initial_weights(...)");
}
double sum = 0.0;
for (size_t i = 0; i < fx_n_; i++)
{
wgts_[i] = w[i];
sum += wgts_[i];
}
for (size_t i = 0; i < fx_n_; i++)
{
wgts_[i] *= T_/sum;
rcd_wgts_[i] = wgts_[i];
}
return;
}
void gctl::grad_norm::get_records(array<double> &logs)
{
logs.resize(rcd_wgts_.size());
for (size_t i = 0; i < rcd_wgts_.size(); i++)
{
logs[i] = rcd_wgts_[i];
}
return;
}
void gctl::grad_norm::save_records(std::string file)
{
std::ofstream ofile;
open_outfile(ofile, file, ".txt");
ofile << "# 'tw' for 'task weight'\n# ";
for (size_t j = 0; j < fx_n_; j++)
{
ofile << "tw" << std::to_string(j) << " ";
}
ofile << "\n";
for (int i = 0; i < rcd_wgts_.size(); i++)
{
ofile << rcd_wgts_[i] << " ";
if ((i+1)%fx_n_ == 0) ofile << "\n";
}
ofile.close();
}

159
lib/optimization/gradnorm.h Normal file
View File

@ -0,0 +1,159 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_GRADNORM_H
#define _GCTL_GRADNORM_H
#include "gctl/core.h"
#include "gctl/io.h"
namespace gctl
{
/**
* @brief Gradient normalized (balanced) multitask evaluation.
*
* @note Reference: Zhao Chen et. al., 2018. GradNorm: Gradient normalization
* for adaptive loss balancing in deep multitask networks.
*
*/
class grad_norm
{
private:
bool initialized_;
size_t fx_n_, fx_c_;
double resi_T_, T_;
double lamda_, alpha_, multi_fx_;
array<bool> fst_iter_;
array<double> wgts_;
array<double> fx0_;
array<double> Gw_, Gdw_, Lx_;
array<double> grad_;
array<double> rcd_fxs_;
array<double> fixed_wgts_;
std::vector<double> rcd_wgts_;
public:
grad_norm();
virtual ~grad_norm();
/**
* @brief Initiate the number of loss functions and size of the model gradients.
*
* @note This function must be called at first.
*
* @param num Number of the total loss functions
* @param grad_num Size of the model gradients
*/
void InitGradNorm(size_t num, size_t grad_num);
/**
* @brief Add the value of a single loss function and the current model gradients.
*
* @param fx objective value
* @param g model gradients
*
* @return weighted value of the current loss function
*/
double AddSingleLoss(double fx, const array<double> &g);
/**
* @brief Get the merged objective value and the model gradients.
*
* @note All single loss functions must be added before calling this function. The
* merged objective value and the model gradients will be reset after the calling.
*
* @param g model gradients
*
* @return objective value
*/
double GradNormLoss(array<double> &g);
/**
* @brief Update weights for single loss functions using the GradNorm algorithm.
*
*/
void UpdateWeights();
/**
* @brief Show statistics of the tasks' weight and loss function's value.
*
*/
void ShowStatistics(std::ostream &ss = std::clog, bool one_line = false);
/**
* @brief Set the control factor alpha. The default is 1.0
*
* @param a Input alpha
*/
void set_control_weight(double a);
/**
* @brief Set the normal sum of the weights. Ths default equals to function size.
*
* @param t Input sum
*/
void set_normal_sum(double t);
/**
* @brief Set a learning rate of the weights. The default is 0.001
*
* @param l Input learning rate
*/
void set_weight_step(double l);
/**
* @brief Set the fixed weight.
*
* @param id Index of the loss function
* @param wgt weight of the loss function
*/
void set_fixed_weight(int id, double wgt);
/**
* @brief Set the initial weights
*
* @param w Input weights
*/
void set_initial_weights(const array<double> &w);
/**
* @brief Get the recorded weights. Size of the log equals the function size times iteration times.
*
* @param logs Output log
*/
void get_records(array<double> &logs);
/**
* @brief Save recored weights to file.
*
* @param file File name
*/
void save_records(std::string file);
};
}
#endif // _GCTL_GRADNORM_H

1897
lib/optimization/lbfgs.cpp Normal file

File diff suppressed because it is too large Load Diff

559
lib/optimization/lbfgs.h Normal file
View File

@ -0,0 +1,559 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_LBFGS_H
#define _GCTL_LBFGS_H
#include "gctl/core.h"
#include "gctl/maths.h"
#include "gctl/algorithm.h"
#include "gctl_optimization_config.h"
#ifdef GCTL_OPTIMIZATION_TOML
#include "toml.hpp"
#endif // GCTL_OPTIMIZATION_TOML
#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif // _WINDOWS || __WIN32__
namespace gctl
{
/**
* @brief Return value of the lbfgs() function. Roughly speaking, a negative value indicates an error.
*/
enum lbfgs_return_code
{
/** L-BFGS reaches convergence. */
LBFGS_EPS_CONVERGENCE = 0,
LBFGS_DELTA_CONVERGENCE,
LBFGS_RESI_CONVERGENCE,
LBFGS_STOP, //1
/** The initial variables already minimize the objective function. */
LBFGS_ALREADY_MINIMIZED, //2
/** Unknown error. */
LBFGSERR_UNKNOWNERROR = -1024,
/** Logic error. */
LBFGSERR_LOGICERROR, //-1023
/** Insufficient memory. */
LBFGSERR_OUTOFMEMORY, //-1022
/** The minimization process has been canceled. */
LBFGSERR_CANCELED,
/** Invalid number of variables specified. */
LBFGSERR_INVALID_N,
/** Invalid number of variables (for SSE) specified. */
LBFGSERR_INVALID_N_SSE,
/** The array x must be aligned to 16 (for SSE). */
LBFGSERR_INVALID_X_SSE,
/** Invalid parameter lbfgs_para::epsilon specified. */
LBFGSERR_INVALID_EPSILON,
/** Invalid parameter lbfgs_para::past specified. */
LBFGSERR_INVALID_TESTPERIOD,
/** Invalid parameter lbfgs_para::delta specified. */
LBFGSERR_INVALID_DELTA,
/** Invalid parameter lbfgs_para::linesearch specified. */
LBFGSERR_INVALID_LINESEARCH,
/** Invalid parameter lbfgs_para::max_step specified. */
LBFGSERR_INVALID_MINSTEP,
/** Invalid parameter lbfgs_para::max_step specified. */
LBFGSERR_INVALID_MAXSTEP,
/** Invalid parameter lbfgs_para::ftol specified. */
LBFGSERR_INVALID_FTOL,
/** Invalid parameter lbfgs_para::wolfe specified. */
LBFGSERR_INVALID_WOLFE,
/** Invalid parameter lbfgs_para::gtol specified. */
LBFGSERR_INVALID_GTOL,
/** Invalid parameter lbfgs_para::xtol specified. */
LBFGSERR_INVALID_XTOL,
/** Invalid parameter lbfgs_para::max_linesearch specified. */
LBFGSERR_INVALID_MAXLINESEARCH,
/** Invalid parameter lbfgs_para::orthantwise_c specified. */
LBFGSERR_INVALID_ORTHANTWISE,
/** Invalid parameter lbfgs_para::orthantwise_start specified. */
LBFGSERR_INVALID_ORTHANTWISE_START,
/** Invalid parameter lbfgs_para::orthantwise_end specified. */
LBFGSERR_INVALID_ORTHANTWISE_END,
/** The line-search step went out of the interval of uncertainty. */
LBFGSERR_OUTOFINTERVAL,
/** A logic error occurred; alternatively, the interval of uncertainty
became too small. */
LBFGSERR_INCORRECT_TMINMAX,
/** A rounding error occurred; alternatively, no line-search step
satisfies the sufficient decrease and curvature conditions. */
LBFGSERR_ROUNDING_ERROR,
/** The line-search step became smaller than lbfgs_para::min_step. */
LBFGSERR_MINIMUMSTEP,
/** The line-search step became larger than lbfgs_para::max_step. */
LBFGSERR_MAXIMUMSTEP,
/** The line-search routine reaches the maximum number of evaluations. */
LBFGSERR_MAXIMUMLINESEARCH,
/** The algorithm routine reaches the maximum number of iterations. */
LBFGSERR_MAXIMUMITERATION,
/** Relative width of the interval of uncertainty is at most
lbfgs_para::xtol. */
LBFGSERR_WIDTHTOOSMALL,
/** A logic error (negative line-search step) occurred. */
LBFGSERR_INVALIDPARAMETERS,
/** The current search direction increases the objective function value. */
LBFGSERR_INCREASEGRADIENT,
};
// 枚举类型 线性搜索方法
// 0 MoreThuente方法
// 1 Armijo条件方法
// 2 标准Wolfe条件方法
// 3 增强Wolfe条件方法
/**
* @brief Line search algorithms.
*/
enum line_search_type
{
/** The default algorithm (MoreThuente method). */
LBFGS_LINESEARCH_DEFAULT = 0,
/** MoreThuente method proposd by More and Thuente. */
LBFGS_LINESEARCH_MORETHUENTE = 0,
/**
* Backtracking method with the Armijo condition.
* The backtracking method finds the step length such that it satisfies
* the sufficient decrease (Armijo) condition,
* - f(x + a * d) <= f(x) + lbfgs_para::ftol * a * g(x)^T d,
*
* where x is the current point, d is the current search direction, and
* a is the step length.
*/
LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1,
/** The backtracking method with the defualt (regular Wolfe) condition. */
LBFGS_LINESEARCH_BACKTRACKING = 2,
/**
* Backtracking method with regular Wolfe condition.
* The backtracking method finds the step length such that it satisfies
* both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
* and the curvature condition,
* - g(x + a * d)^T d >= lbfgs_para::wolfe * g(x)^T d,
*
* where x is the current point, d is the current search direction, and
* a is the step length.
*/
LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2,
/**
* Backtracking method with strong Wolfe condition.
* The backtracking method finds the step length such that it satisfies
* both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
* and the following condition,
* - |g(x + a * d)^T d| <= lbfgs_para::wolfe * |g(x)^T d|,
*
* where x is the current point, d is the current search direction, and
* a is the step length.
*/
LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3,
LBFGS_LINESEARCH_BACKTRACKING_ARMIJO_QUAD = 4,
//LBFGS_LINESEARCH_BACKTRACKING_QUAD = 5,
//LBFGS_LINESEARCH_BACKTRACKING_WOLFE_QUAD = 5,
//LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE_QUAD = 6,
};
// L-BFGS参数类型。参数很多简要说明如下
// m L-BFGS算法中储存的前序sk与yk向量个数这个值控制了算法使用的内存多少默认值为6不建议小于3的值值多大近似精度越高计算量也越大。
// epsilon 迭代的终止精度默认值为1e-5
// past 以delta(不同迭代次数的目标函数值)为基础的迭代终止条件数past代表了以多少迭代次数之前的目标函数值作为delta计算的间隔默认值为0
// 即不以delta为迭代终止条件。
// delta (f' - f) / f 不同迭代次数时目标函数之差与当前目标函数值之比但past不为0时会计算。
// max_iterations 最大迭代次数为0时表示一直迭代到终止条件被满足或出现其他错误。
// linesearch 线性搜索方式,由此文件前述枚举类型定义。
// max_linesearch 每次迭代中线性搜索的最大次数默认值为40
// min_step 线性搜索中的最小步长默认值为1e-20
// max_step 线性搜索中的最大步长默认值为1e+20
// ftol 线性搜索的精度值默认值为1e-4取值范围0-0.5)。
// wolfe Wolfe线性搜索中的控制参数默认值为0.9大于ftol小于1.0
// gtol 线性搜索中的控制参数默认值为0.9大于ftol小于1.0
// xtol 浮点数精度默认值为1e-16
// orthantwise_c 模型参数x的L1模的乘积参数默认值为0.0此时算法即为L2模形式当此参数大于0时算法即为OWL-QN
// orthantwise_start 开始计算模型参数x的L1模的迭代序号
// orthantwise_end 终止计算模型参数x的L1模的迭代序号
/**
* L-BFGS optimization parameters.
* Call lbfgs_parameter_init() function to initialize parameters to the
* default values.
*/
struct lbfgs_para
{
/**
* The number of corrections to approximate the inverse hessian matrix.
* The L-BFGS routine stores the computation results of previous \ref m
* iterations to approximate the inverse hessian matrix of the current
* iteration. This parameter controls the size of the limited memories
* (corrections). The default value is \c 6. Values less than \c 3 are
* not recommended. Large values will result in excessive computing time.
*/
int m;
/**
* Epsilon for convergence test.
* This parameter determines the accuracy with which the solution is to
* be found. A minimization terminates when
* ||g|| < \ref epsilon * max(1, ||x||),
* where ||.|| denotes the Euclidean (L2) norm. The default value is
* \c 1e-5.
*/
double epsilon;
/**
* Distance for delta-based convergence test.
* This parameter determines the distance, in iterations, to compute
* the rate of decrease of the objective function. If the value of this
* parameter is zero, the library does not perform the delta-based
* convergence test. The default value is \c 0.
*/
int past;
/**
* Delta for convergence test.
* This parameter determines the minimum rate of decrease of the
* objective function. The library stops iterations when the
* following condition is met:
* (f' - f) / f < \ref delta,
* where f' is the objective value of \ref past iterations ago, and f is
* the objective value of the current iteration.
* The default value is \c 1e-5.
*/
double delta;
/**
* Residual for convergence test.
* This parameter determines the accuracy with which the solution is to
* be found. A minimization terminates when
* f(x) <= residual,
* The default value is \c 1e-8.
*
*/
double residual;
/**
* The maximum number of iterations.
* The lbfgs() function terminates an optimization process with
* ::LBFGSERR_MAXIMUMITERATION status code when the iteration count
* exceedes this parameter. Setting this parameter to zero continues an
* optimization process until a convergence or error. The default value
* is \c 0.
*/
int max_iterations;
/**
* The line search algorithm.
* This parameter specifies a line search algorithm to be used by the
* L-BFGS routine.
*/
int linesearch;
/**
* The maximum number of trials for the line search.
* This parameter controls the number of function and gradients evaluations
* per iteration for the line search routine. The default value is \c 40.
*/
int max_linesearch;
/**
* The minimum step of the line search routine.
* The default value is \c 1e-20. This value need not be modified unless
* the exponents are too large for the machine being used, or unless the
* problem is extremely badly scaled (in which case the exponents should
* be increased).
*/
double min_step;
/**
* The maximum step of the line search.
* The default value is \c 1e+20. This value need not be modified unless
* the exponents are too large for the machine being used, or unless the
* problem is extremely badly scaled (in which case the exponents should
* be increased).
*/
double max_step;
/**
* A parameter to control the accuracy of the line search routine.
* The default value is \c 1e-4. This parameter should be greater
* than zero and smaller than \c 0.5.
*/
double ftol;
/**
* A coefficient for the Wolfe condition.
* This parameter is valid only when the backtracking line-search
* algorithm is used with the Wolfe condition,
* ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or
* ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE .
* The default value is \c 0.9. This parameter should be greater
* the \ref ftol parameter and smaller than \c 1.0.
*/
double wolfe;
/**
* A parameter to control the accuracy of the line search routine.
* The default value is \c 0.9. If the function and gradient
* evaluations are inexpensive with respect to the cost of the
* iteration (which is sometimes the case when solving very large
* problems) it may be advantageous to set this parameter to a small
* value. A typical small value is \c 0.1. This parameter shuold be
* greater than the \ref ftol parameter (\c 1e-4) and smaller than
* \c 1.0.
*/
double gtol;
/**
* The machine precision for floating-point values.
* This parameter must be a positive value set by a client program to
* estimate the machine precision. The line search routine will terminate
* with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width
* of the interval of uncertainty is less than this parameter.
*/
double xtol;
/**
* Coeefficient for the L1 norm of variables.
* This parameter should be set to zero for standard minimization
* problems. Setting this parameter to a positive value activates
* Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which
* minimizes the objective function F(x) combined with the L1 norm |x|
* of the variables, {F(x) + C |x|}. This parameter is the coeefficient
* for the |x|, i.e., C. As the L1 norm |x| is not differentiable at
* zero, the library modifies function and gradient evaluations from
* a client program suitably; a client program thus have only to return
* the function value F(x) and gradients G(x) as usual. The default value
* is zero.
*/
double orthantwise_c;
/**
* Start index for computing L1 norm of the variables.
* This parameter is valid only for OWL-QN method
* (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N)
* specifies the index number from which the library computes the
* L1 norm of the variables x,
* |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| .
* In other words, variables x_1, ..., x_{b-1} are not used for
* computing the L1 norm. Setting b (0 < b < N), one can protect
* variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic
* regression) from being regularized. The default value is zero.
*/
int orthantwise_start;
/**
* End index for computing L1 norm of the variables.
* This parameter is valid only for OWL-QN method
* (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N)
* specifies the index number at which the library stops computing the
* L1 norm of the variables x,
*/
int orthantwise_end;
};
class lbfgs_solver
{
private:
lbfgs_para lbfgs_param_; ///< lbfgs 算法参数
bool lbfgs_silent_; ///< 显示运行信息
// 算法函数是私有的不能直接使用通过Minimize函数调用
// 下面是L-BFGS的主函数各个参数的说明简要翻译如下
// n 数组的长度,也就是待求的模型参数的数量
// x 模型参数数组的指针,函数通过指针直接操作模型数组,所以不需要返回计算结果。一开始赋给函数的数组即为
// 初始模型,函数结束后即为最优化结果
// ptr_fx 目标函数的值的指针,设计成指针可以方便在函数外部监控迭代过程的收敛情况
// retval 返回值。无错即为0非0值代表此文件上部枚举类型中的对应错误。此文件下部定义的错误信息显示即利用此返回值与
// 预定义的枚举类型输出相应的错误信息。
/**
* Start a L-BFGS optimization.
*
* @param x The array of variables. A client program can set
* default values for the optimization and receive the
* optimization result through this array. This array
* must be allocated by ::lbfgs_malloc function
* for libLBFGS built with SSE/SSE2 optimization routine
* enabled. The library built without SSE/SSE2
* optimization does not have such a requirement.
* @param ptr_fx The pointer to the variable that receives the final
* value of the objective function for the variables.
* This argument can be set to \c NULL if the final
* value of the objective function is unnecessary.
* @retval The status code. This function returns zero if the
* minimization process terminates without an error. A
* non-zero value indicates an error.
*/
lbfgs_return_code lbfgs(array<double> &x, double &ptr_fx, std::ostream &ss);
lbfgs_return_code lbfgs_preconditioned(array<double> &x, double &ptr_fx, std::ostream &ss);
// 线性搜索方法 内部私有函数 不能直接使用
lbfgs_return_code line_search_backtracking(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
lbfgs_return_code line_search_backtracking_quad(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
lbfgs_return_code line_search_backtracking_owlqn(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
lbfgs_return_code line_search_morethuente(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wa, int &ls);
// 显示lbfgs函数返回值信息 主要是错误信息
void lbfgs_error_str(lbfgs_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
public:
lbfgs_solver();
virtual ~lbfgs_solver();
/**
* @brief
*
*/
void lbfgs_silent();
/**
* @brief
*
* @param in_param
*/
void set_lbfgs_para(const lbfgs_para &in_param);
#ifdef GCTL_OPTIMIZATION_TOML
/**
* @brief
*
* @param toml_data toml数据对象
*/
void set_lbfgs_para(const toml::value &toml_data);
#endif // GCTL_OPTIMIZATION_TOML
/**
* @brief
*
* @return lbfgs_para
*/
lbfgs_para default_lbfgs_para();
/**
* @brief
*
* @param ss
*/
void show_lbfgs_para(std::ostream &ss = std::clog);
// 目标函数与其梯度值计算函数的接口,参数简要说明如下:
// x 当前的模型参数值的指针
// g 当前模型参数值对应的梯度指针
// step 当前线性搜索所使用的步长
// retval 当前模型参数的目标函数值
/**
* Callback interface to provide objective function and gradient evaluations.
*
* The lbfgs() function call this function to obtain the values of objective
* function and its gradients when needed. A client program must implement
* this function to evaluate the values of the objective function and its
* gradients, given current values of variables.
*
* @param x The current values of variables.
* @param g The gradient vector. The callback function must compute
* the gradient values for the current variables.
* @param step The current step of the line search routine.
* @retval double The value of the objective function for the current
* variables.
*/
virtual double LBFGS_Evaluate(const array<double> &x, array<double> &g) = 0;
/**
* Callback interface to implement the preconditioning process.
*
* The lbfgs() function call this function for each iteration. Implementing
* this function, a client program can preform the preconditioning process.
*
* @param x The current values of variables.
* @param g The current gradient values of variables.
* @param d The current values of search directions.
* @param d_pre The values of search directions being preconditioned.
* The callback function must compute these values.
*/
virtual void LBFGS_Precondition(const array<double> &x, const array<double> &g, const array<double> &d, array<double> &d_pre);
// 进程函数的接口,参数简要说明如下:
// x 当前的模型参数值的指针
// g 当前模型参数值对应的梯度指针
// fx 目标函数的值
// xnorm 模型参数数组的L2模长
// gnorm 模型梯度数组的L2模长
// step 当前线性搜索所使用的步长
// k 迭代的次数
// ls 此次迭代所使用的线性搜索次数
// retval 返回0则lbfgs()函数继续,否则终止
/**
* Callback interface to receive the progress of the optimization process.
*
* The lbfgs() function call this function for each iteration. Implementing
* this function, a client program can store or display the current progress
* of the optimization process.
*
* @param x The current values of variables.
* @param g The current gradient values of variables.
* @param fx The current value of the objective function.
* @param converge Current value of the convergence test.
* @param rate Current value of the delta-based convergence test.
* @param param 使
* @param k The iteration count.
* @param ls The number of evaluations called for this iteration.
* @param ss Output stream object.
* @retval int Zero to continue the optimization process. Returning a
* non-zero value will cancel the optimization process.
*/
virtual int LBFGS_Progress(const array<double> &x, const array<double> &g, const double fx,
const double converge, const double rate, const lbfgs_para param, int k, int ls, std::ostream &ss);
/**
* @brief
*
* @param m
* @param ss
* @param err_throw
* @return double
*/
double LBFGS_Minimize(array<double> &m, std::ostream &ss = std::clog, bool err_throw = false);
/**
* @brief
*
* @param m
* @param ss
* @param verbose 使
* @param err_throw
* @return double
*/
double LBFGS_MinimizePreconditioned(array<double> &m, std::ostream &ss = std::clog, bool err_throw = false);
};
}
#endif // _GCTL_LBFGS_H

1002
lib/optimization/lcg.cpp Normal file

File diff suppressed because it is too large Load Diff

387
lib/optimization/lcg.h Normal file
View File

@ -0,0 +1,387 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_LCG_H
#define _GCTL_LCG_H
#include "gctl/core.h"
#include "gctl/maths.h"
#include "gctl/algorithm.h"
#include "gctl_optimization_config.h"
#ifdef GCTL_OPTIMIZATION_TOML
#include "toml.hpp"
#endif // GCTL_OPTIMIZATION_TOML
#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif // _WINDOWS || __WIN32__
namespace gctl
{
/**
* @brief Types of method that could be recognized by the lcg_solver() function.
*/
enum lcg_solver_type
{
/**
* Conjugate gradient method.
*/
LCG_CG,
/**
* Preconditioned conjugate gradient method.
*/
LCG_PCG,
/**
* Conjugate gradient squared method.
*/
LCG_CGS,
/**
* Biconjugate gradient method.
*/
LCG_BICGSTAB,
/**
* Biconjugate gradient method with restart.
*/
LCG_BICGSTAB2,
/**
* Conjugate gradient method with projected gradient for inequality constraints.
* This algorithm comes without non-monotonic linear search for the step length.
*/
LCG_PG,
/**
* Conjugate gradient method with spectral projected gradient for inequality constraints.
* This algorithm comes with non-monotonic linear search for the step length.
*/
LCG_SPG,
};
/**
* @brief return value of the lcg_solver() function
*/
enum lcg_return_code
{
LCG_SUCCESS = 0, ///< The solver function terminated successfully.
LCG_CONVERGENCE = 0, ///< The iteration reached convergence.
LCG_STOP, ///< The iteration is stopped by the monitoring function.
LCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
// A negative number means a error
LCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
LCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
LCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
LCG_INVILAD_EPSILON, ///< The epsilon is negative.
LCG_INVILAD_RESTART_EPSILON, ///< The restart epsilon is negative.
LCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
LCG_NULL_PRECONDITION_MATRIX, ///< Null precondition matrix.
LCG_NAN_VALUE, ///< Nan value.
LCG_INVALID_POINTER, ///< Invalid pointer.
LCG_INVALID_LAMBDA, ///< Invalid range for lambda.
LCG_INVALID_SIGMA, ///< Invalid range for sigma.
LCG_INVALID_BETA, ///< Invalid range for beta.
LCG_INVALID_MAXIM, ///< Invalid range for maxi_m.
LCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
};
/**
* @brief Message type of the LCG algorithms.
*
*/
enum lcg_message_type
{
LCG_THROW, ///< throw error only
LCG_ERROR, ///< display error only
LCG_SOLUTION, ///< display info for evry solution
LCG_ITERATION, ///< display info for every iteration
};
/**
* @brief Parameters of the conjugate gradient methods.
*/
struct lcg_para
{
/**
* Maximal iteration times. The process will continue till the convergence is met
* if this option is set to zero (default).
*/
int max_iterations;
/**
* Epsilon for convergence test.
* This parameter determines the accuracy with which the solution is to be
* found. A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or
* sqrt(||g||)/N <= epsilon for the lcg_solver() function, where ||.|| denotes
* the Euclidean (L2) norm. The default value of epsilon is 1e-8.
*/
double epsilon;
/**
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
* The default value is false which means the gradient based evaluating method is used.
* The AMD based method will be used if this variable is set to true. This parameter is only
* applied to the non-constrained methods.
*/
int abs_diff;
/**
* Restart epsilon for the LCG_BICGSTAB2 algorithm. The default value is 1e-6
*/
double restart_epsilon;
/**
* Initial step length for the project gradient method. The default is 1.0
*/
double step;
/**
* multiplier for updating solutions with the spectral projected gradient method. The range of
* this variable is (0, 1). The default is given as 0.95
*/
double sigma;
/**
* descending ratio for conducting the non-monotonic linear search. The range of
* this variable is (0, 1). The default is given as 0.9
*/
double beta;
/**
* The maximal record times of the objective values for the SPG method. The method use the
* objective values from the most recent maxi_m times to preform the non-monotonic linear search.
* The default value is 10.
*/
int maxi_m;
};
/**
* @brief This abstract class implements conjugate gradient algorithms for solving
* a linear system like Ax = B where A is a N*N matrix. As the actual variable
* we need is the product of 'Ax', the kernel matrix 'A' is not declared within
* the class definition. Instead, a pure virtual function is declared as the
* callback interface for calculating the product of 'Ax' as 'void LCG_Ax(const
* array<double> &x, array<double> &ax)'. A virtual function "int LCG_Progress(const
* array<double> &m, const double converge, const lcg_para &param, size_t t,
* std::ostream &ss)" could be reloaded for customed convergence tests.
*/
class lcg_solver
{
private:
lcg_para lcg_param_;
size_t lcg_inter_;
lcg_message_type lcg_msg_;
// make them class variables are more suitable for repetitively usages
array<double> zk, gk, dk, Adk;
array<double> rk, r0_T, pk, vk;
array<double> Apx, uk, qk, qk_m, wk;
array<double> m_new, gk_new;
array<double> sk, yk;
/**
* @brief Display info of a given return code. This is a private function
* and can only be called by other class functions.
*
* @param err_code Input retrun code
* @param ss Output stream of runtime info.
*/
void lcg_error_str(lcg_return_code err_code, std::ostream &ss);
public:
lcg_solver(); ///< default constructor
virtual ~lcg_solver(); ///< default de-constructor
/**
* @brief Callback interface for calculating the product of 'A' multipled by an arbitrary vector 'x'.
*
* @param x Multipler
* @param ax Product of Ax
*/
virtual void LCG_Ax(const array<double> &x, array<double> &ax) = 0;
/**
* @brief Callback interface for calculating the product of 'M' multipled by an arbitrary vector 'x'.
* In which 'M' is the inverse of the pre-conditioning matrix. This function must be reloaded for the
* LCG_PCG algorithm.
*
* @param x Multipler
* @param mx Product of Ax
*/
virtual void LCG_Mx(const array<double> &x, array<double> &mx);
/**
* @brief Utility function for monitoring the solving process.
*
* @param m Current solution
* @param converge Current convergence
* @param param Employed parameters
* @param t Current iterative times
* @param ss Output stream of runtime info.
* @return Quit the solver if returned a non-zero value.
*/
virtual int LCG_Progress(const array<double> &m, const double converge, const lcg_para &param, size_t t, std::ostream &ss);
/**
* @brief Set the lcg message object
*
* @param msg Input message type.
*/
void set_lcg_message(lcg_message_type msg);
/**
* @brief Set the lcg report intervals
*
* @param inter Input reprot intervals.
*/
void set_lcg_report_interval(size_t inter);
/**
* @brief Set the lcg para object
*
* @param param Input lcg parameters.
*/
void set_lcg_para(const lcg_para &param);
/**
* @brief Return a lcg_para object with default values
*
* @return lcg_para
*/
lcg_para default_lcg_para();
#ifdef GCTL_OPTIMIZATION_TOML
/**
* @brief Set parameters of the conjugate gradient algorithms using a toml file.
* All parameter options must be listed under a top-level table 'lcg'. Available options
* under the 'lcg' table are as declared in the lcg_para structure.
*
* @param toml_data Input toml data
*/
void set_lcg_para(std::string filename);
/**
* @brief Set parameters of the conjugate gradient algorithms using a toml::value object.
* All parameter options must be listed under a top-level table 'lcg'. Available options
* under the 'lcg' table are as declared in the lcg_para structure.
*
* @param toml_data Input toml data
*/
void set_lcg_para(const toml::value &toml_data);
#endif // GCTL_OPTIMIZATION_TOML
/**
* @brief The allback interface for all CG algorithms. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param solver_id Selected solver type.
* @param ss Output stream of runtime info.
*/
void LCG_Minimize(array<double> &m, const array<double> &b, lcg_solver_type solver_id = LCG_CG, std::ostream &ss = std::clog);
/**
* @brief The allback interface for all CG algorithms. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param solver_id Selected solver type.
* @param low Lower bound of acceptable solutions.
* @param hig Higher bound of acceptable solutions.
* @param ss Output stream of runtime info.
*/
void LCG_MinimizeConstrained(array<double> &m, const array<double> &b, const array<double> &low, const array<double> &hig, lcg_solver_type solver_id = LCG_PG, std::ostream &ss = std::clog);
/**
* @brief The standalone callback interface for the CG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param ss Output stream of runtime info.
*/
void lcg(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
/**
* @brief The standalone callback interface for the PCG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param ss Output stream of runtime info.
*/
void lpcg(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
/**
* @brief The standalone callback interface for the CGS algorithm. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param ss Output stream of runtime info.
*/
void lcgs(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
/**
* @brief The standalone callback interface for the BICGSTAB algorithm. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param ss Output stream of runtime info.
*/
void lbicgstab(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
/**
* @brief The standalone callback interface for the BICGSTAB2 algorithm. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param ss Output stream of runtime info.
*/
void lbicgstab2(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
/**
* @brief The standalone callback interface for the PG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param low Lower bound of acceptable solutions.
* @param hig Higher bound of acceptable solutions.
* @param ss Output stream of runtime info.
*/
void lpg(array<double> &m, const array<double> &B, const array<double> &low, const array<double> &hig, std::ostream &ss = std::clog);
/**
* @brief The standalone callback interface for the SPG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
*
* @param m Initial/Input solution.
* @param B Right hand term of the system system.
* @param low Lower bound of acceptable solutions.
* @param hig Higher bound of acceptable solutions.
* @param ss Output stream of runtime info.
*/
void lspg(array<double> &m, const array<double> &B, const array<double> &low, const array<double> &hig, std::ostream &ss = std::clog);
};
}
#endif // _GCTL_LCG_H

505
lib/optimization/lgd.cpp Normal file
View File

@ -0,0 +1,505 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "lgd.h"
/**
* Default parameter for the Lévy-Gradient Descent (L-GD) method.
*/
static const gctl::lgd_para lgd_defparam = {1000, 0, 1e-5, 1.0, 1.5, 0.01, 1e-8, -1.0};
gctl::lgd_solver::lgd_solver()
{
lgd_param_ = lgd_defparam;
lgd_inter_ = 1; lgd_ques_num_ = 0; lgd_trace_times_ = 0;
lgd_silent_ = lgd_has_range_ = lgd_has_alpha_ = lgd_save_trace_ = false;
}
gctl::lgd_solver::~lgd_solver(){}
int gctl::lgd_solver::LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para &param)
{
if (lgd_silent_) return 0;
if (param.epsilon > 0.0 && mean_fx <= param.epsilon)
{
std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t;
return 0;
}
if (lgd_inter_ > 0 && curr_t%lgd_inter_ == 0)
{
std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t;
}
return 0;
}
void gctl::lgd_solver::lgd_silent()
{
lgd_silent_ = true;
return;
}
void gctl::lgd_solver::set_lgd_report_interval(int inter)
{
lgd_inter_ = inter;
return;
}
void gctl::lgd_solver::set_lgd_para(const lgd_para &in_param)
{
lgd_param_ = in_param;
return;
}
#ifdef GCTL_OPTIMIZATION_TOML
void gctl::lgd_solver::set_lgd_para(std::string filename)
{
toml::value toml_data;
toml_data = toml::parse(filename);
set_lgd_para(toml_data);
return;
}
void gctl::lgd_solver::set_lgd_para(const toml::value &toml_data)
{
lgd_param_ = lgd_defparam;
std::string LGD = "lgd";
if (toml_data.contains(LGD))
{
if (toml_data.at(LGD).contains("flight_times")) lgd_param_.flight_times = toml::find<int>(toml_data, LGD, "flight_times");
if (toml_data.at(LGD).contains("batch")) lgd_param_.batch = toml::find<int>(toml_data, LGD, "batch");
if (toml_data.at(LGD).contains("epsilon")) lgd_param_.epsilon = toml::find<double>(toml_data, LGD, "epsilon");
if (toml_data.at(LGD).contains("stddev_v")) lgd_param_.stddev_v = toml::find<double>(toml_data, LGD, "stddev_v");
if (toml_data.at(LGD).contains("beta")) lgd_param_.beta = toml::find<double>(toml_data, LGD, "beta");
if (toml_data.at(LGD).contains("alpha")) lgd_param_.alpha = toml::find<double>(toml_data, LGD, "alpha");
if (toml_data.at(LGD).contains("sigma")) lgd_param_.sigma = toml::find<double>(toml_data, LGD, "sigma");
if (toml_data.at(LGD).contains("lambda")) lgd_param_.lambda = toml::find<double>(toml_data, LGD, "lambda");
}
return;
}
#endif // GCTL_OPTIMIZATION_TOML
void gctl::lgd_solver::set_lgd_record_trace()
{
lgd_save_trace_ = true;
return;
}
void gctl::lgd_solver::show_solver()
{
std::clog << "Solver's Setup Panel\n";
std::clog << "-----------------------------\n";
std::clog << "Solver: LGD\n";
std::clog << "Flights = " << lgd_param_.flight_times << ", Batch = " << lgd_param_.batch << ", Epsilon = " << lgd_param_.epsilon << ", Lambda = " << lgd_param_.lambda << "\n";
std::clog << "STD(v) = " << lgd_param_.stddev_v << ", Beta = " << lgd_param_.beta << ", Alpha = " << lgd_param_.alpha << ", Sigma = " << lgd_param_.sigma << "\n";
std::clog << "=============================\n";
return;
}
void gctl::lgd_solver::save_lgd_trace(std::string trace_file)
{
if (lgd_trace_times_ == 0)
{
GCTL_ShowWhatError("[gctl::lgd_solver] No trace is recorded.", GCTL_WARNING_ERROR, 0, 0, 0);
return;
}
std::ofstream ofile;
open_outfile(ofile, trace_file, ".txt");
int m_size = lgd_trace_.size()/lgd_trace_times_;
ofile << "# L-GD flight traces.\n";
ofile << "# Each row represents an accepted solution.\n";
ofile << "# Model size: " << m_size << "\n";
ofile << "# Accepted solutions: " << lgd_trace_times_ << "\n";
for (size_t i = 0; i < lgd_trace_times_; i++)
{
for (size_t j = 0; j < m_size; j++)
{
ofile << lgd_trace_[i*m_size+j] << " ";
}
ofile << "\n";
}
ofile.close();
return;
}
void gctl::lgd_solver::lgd_error_str(lgd_return_code err_code, std::ostream &ss, bool err_throw)
{
#if defined _WINDOWS || __WIN32__
if (!err_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
ss << "Success! ";
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
ss << "Fail! ";
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[1m\033[32mLGD Success! ";
else
ss << "\033[1m\033[31mLGD Fail! ";
}
#endif
std::string err_str;
switch (err_code)
{
case LGD_CONVERGENCE:
err_str = "The iteration has reached convergence."; break;
case LGD_STOP:
err_str = "The iteration is stopped by the progress monitoring function."; break;
case LGD_REACHED_MAX_ITERATIONS:
err_str = "The maximal flight times has been reached."; break;
case LGD_INVALID_SOLUTION_SIZE:
err_str = "Invalid solution size."; break;
case LGD_INVALID_MAX_ITERATIONS:
err_str = "Invalid flight times."; break;
case LGD_INVALID_EPSILON:
err_str = "Invalid epsilon value."; break;
case LGD_INVALID_STDV:
err_str = "Invalid STD value for generating the levy distribution."; break;
case LGD_INVALID_BETA:
err_str = "Invalid beta value."; break;
case LGD_INVALID_ALPHA:
err_str = "Invalid alpha value."; break;
case LGD_INVALID_SIGMA:
err_str = "Invalid sigma value."; break;
case LGD_NAN_VALUE:
err_str = "NaN values found."; break;
default:
err_str = "Unknown error."; break;
}
if (err_throw && err_code < 0) throw err_str;
else ss << err_str;
#if defined _WINDOWS || __WIN32__
if (!err_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[0m" << std::endl;
else
ss << "\033[0m" << std::endl;
}
#endif
return;
}
gctl::lgd_para gctl::lgd_solver::default_lgd_para()
{
lgd_para dp = lgd_defparam;
return dp;
}
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
std::ostream &ss, bool verbose, bool err_throw)
{
if (lgd_silent_)
{
lgd_return_code ret = lgd(best_m, mean_m, std_m);
if (ret < 0) lgd_error_str(ret, ss, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef GCTL_OPENMP
double start = omp_get_wtime();
lgd_return_code ret = lgd(best_m, mean_m, std_m);
double end = omp_get_wtime();
double costime = 1000*(end-start);
#else
clock_t start = clock();
lgd_return_code ret = lgd(best_m, mean_m, std_m);
clock_t end = clock();
double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!err_throw) std::clog << std::endl << "Solver: LGD. Time cost: " << costime << " ms" << std::endl;
if (verbose) lgd_error_str(ret, ss, err_throw);
else if (ret < 0) lgd_error_str(ret, ss, err_throw);
return;
}
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
const array<double> &alphas, std::ostream &ss, bool verbose, bool err_throw)
{
lgd_ques_num_ = best_m.size();
if (lgd_ques_num_ != alphas.size())
{
throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match.");
}
lgd_alpha_.resize(lgd_ques_num_);
for (int i = 0; i < lgd_ques_num_; i++)
{
if (alphas[i] <= 0.0)
{
throw std::runtime_error("[gctl::lgd_solver] Invalid scaling value.");
}
lgd_alpha_[i] = alphas[i];
}
lgd_has_alpha_ = true;
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
return;
}
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
const array<double> &lows, const array<double> &higs, std::ostream &ss, bool verbose, bool err_throw)
{
lgd_ques_num_ = best_m.size();
if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size())
{
throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match.");
}
lgd_low_.resize(lgd_ques_num_);
lgd_hig_.resize(lgd_ques_num_);
for (int i = 0; i < lgd_ques_num_; i++)
{
if (lows[i] >= higs[i])
{
throw std::runtime_error("[gctl::lgd_solver] Invalid bound value.");
}
lgd_low_[i] = lows[i];
lgd_hig_[i] = higs[i];
}
lgd_has_range_ = true;
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
return;
}
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
double low, double hig, std::ostream &ss, bool verbose, bool err_throw)
{
if (low >= hig)
{
throw std::runtime_error("[gctl::lgd_solver] Invalid bound value.");
}
lgd_ques_num_ = best_m.size();
lgd_low_.resize(lgd_ques_num_, low);
lgd_hig_.resize(lgd_ques_num_, hig);
lgd_has_range_ = true;
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
return;
}
gctl::lgd_return_code gctl::lgd_solver::lgd(array<double> &best_m, array<double> &mean_m, array<double> &std_m)
{
lgd_ques_num_ = best_m.size();
// check parameters
if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE;
if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS;
if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON;
if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV;
if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA;
if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA;
if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA;
// initiate solutions
mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0);
double gamma1 = tgamma(lgd_param_.beta + 1.0);
double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0));
double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta);
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::default_random_engine generator(seed);
std::normal_distribution<double> dist_u(0, stddev_u);
std::normal_distribution<double> dist_v(0, lgd_param_.stddev_v);
std::uniform_real_distribution<double> dist_s(1.0, 2.0);
array<double> g, g_mem, g_orth, new_mean, b_m, alphas;
g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_);
new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_);
// 初始化参数变化范围为lgd_param_.alpha
vecset(alphas, lgd_param_.alpha);
if (lgd_has_range_)
{
vecdiff(alphas, lgd_hig_, lgd_low_);
vecscale(alphas, lgd_param_.alpha);
}
if (lgd_has_alpha_)
{
veccpy(alphas, lgd_alpha_, lgd_param_.alpha);
}
double fx_best, fx_tmp, direct_mod, levy_length;
double fx_mean = NAN;
// 开始飞行
int rcd_times = 0;
lgd_trace_times_ = 0;
for (int ft = 0; ft <= lgd_param_.flight_times; ft++)
{
// 计算尝试解
fx_tmp = LGD_Evaluate(best_m, g);
if (ft == 0 || fx_tmp < fx_best)
{
fx_best = fx_tmp;
veccpy(b_m, best_m);
}
// 记录飞行轨迹
if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda))
{
for (int i = 0; i < lgd_ques_num_; i++)
{
std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]);
mean_m[i] = new_mean[i];
}
rcd_times++;
if (lgd_save_trace_)
{
lgd_trace_.append_array(best_m);
lgd_trace_times_++;
}
}
if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_))
{
// 将迭代结果返还给m
veccpy(best_m, b_m);
return LGD_STOP;
}
if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0)
{
fx_mean = LGD_Evaluate(mean_m, g);
if (fx_mean < lgd_param_.epsilon)
{
LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_);
// 将迭代结果返还给m
veccpy(best_m, b_m);
return LGD_CONVERGENCE;
}
}
// 驻点检测
direct_mod = sqrt(vecdot(g, g));
if (direct_mod < lgd_param_.sigma)
{
if (ft == 0) // 初次飞行时无记录
{
do // 如果梯度消失 则采用一个随机方向
{
for (int i = 0; i < lgd_ques_num_; i++)
{
g[i] = dist_s(generator);
}
direct_mod = sqrt(vecdot(g, g));
}
while (direct_mod < lgd_param_.sigma);
}
else // 如果梯度消失 则朝着上一次迭代方向的正交方向走一步
{
for (int i = 0; i < lgd_ques_num_; i++)
{
g_mem[i+lgd_ques_num_] = dist_s(generator);
}
schmidt_orthogonal(g_mem, g_orth, 2);
for (int i = 0; i < lgd_ques_num_; i++)
{
g[i] = g_orth[i+lgd_ques_num_];
}
direct_mod = 1.0; // 此时的模量为单位模量
}
}
// 莱维飞行的步长 注意原公式中无最外层绝对值符号
// 这是我们需要步长的绝对值 因此取绝对值
levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta));
for (int i = 0; i < lgd_ques_num_; i++)
{
best_m[i] -= levy_length*alphas[i]*g[i]/direct_mod;
}
if (!vecvalid(best_m))
{
return LGD_NAN_VALUE;
}
// 记录梯度方向
for (int i = 0; i < lgd_ques_num_; i++)
{
g_mem[i] = g[i];
}
// 这里可以添加取值范围的约束
if (lgd_has_range_)
{
vecbtm(best_m, lgd_low_);
vectop(best_m, lgd_hig_);
}
}
// 将迭代结果返还给m
veccpy(best_m, b_m);
return LGD_REACHED_MAX_ITERATIONS;
}

212
lib/optimization/lgd.h Normal file
View File

@ -0,0 +1,212 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_LGD_H
#define _GCTL_LGD_H
#include "gctl/core.h"
#include "gctl/io.h"
#include "gctl/maths.h"
#include "gctl/algorithm.h"
#include "gctl_optimization_config.h"
#ifdef GCTL_OPTIMIZATION_TOML
#include "toml.hpp"
#endif // GCTL_OPTIMIZATION_TOML
#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif // _WINDOWS || __WIN32__
#ifdef GSTL_OPENMP
#include "omp.h"
#endif // GSTL_OPENMP
namespace gctl
{
/**
* @brief return value of the lgd_solver class.
*/
enum lgd_return_code
{
LGD_CONVERGENCE = 1, ///< The iteration reached convergence.
LGD_STOP, ///< The iteration stopped by the progress monitoring function.
LGD_REACHED_MAX_ITERATIONS, ///< Iteration reached max limit.
LGD_INVALID_SOLUTION_SIZE = -1024, ///< Invalid solution size.
LGD_INVALID_MAX_ITERATIONS, ///< The maximal iteration times is negative.
LGD_INVALID_EPSILON, ///< The epsilon is negative.
LGD_INVALID_STDV,
LGD_INVALID_BETA, ///< Invalid value for beta.
LGD_INVALID_ALPHA,
LGD_INVALID_SIGMA,
LGD_NAN_VALUE, ///< Nan value.
};
/**
* @brief Parameters of the L-GD method.
*/
struct lgd_para
{
/**
* Maximal times of the lévy flight. The iteration process will stop till the maximal
* flight times is reached unless the mean convergence test is set and satisfied. To
* active the test, set the 'batch' parameter which is shown as below. The default value
* is 1000.
*/
int flight_times;
/**
* Batch size for the mean convergence test. This parameter determines the batch size,
* in recorded solutions, to compute the value of the objective function. Note that
* only qualified solutions will be recorded for analyzing if the 'lambda' parameter is
* set. The library does not perform the mean convergence test if the value of this
* parameter is zero. The default is 0.
*/
int batch;
/**
* Epsilon for the mean convergence test. This parameter determines the accuracy
* with which the mean solution is to be found. The default is 1e-5.
*/
double epsilon;
/**
* Standard deviation of v that is used to calculate the distance of each
* lévy flight in length = stddev_u/|stddev_v|^{1/beta}. This parameter is
* typically given as 1.0.
*/
double stddev_v;
/**
* Scale parameter for calculating stddev_u and the flying length. Must be at
* (1.0, 2.0). The default value is 1.5. The bigger beta is the smaller of the
* range of flying length gets.
*/
double beta;
/**
* Scale parameter multiplied by the flying length. The default value is 0.01.
* The parameter should be set according to the expected convergence speed. Normally,
* The bigger alpha is, the faster the L-GD convergences. However, the L-GD may
* miss the optimized solutions if alpha was too big.
*/
double alpha;
/**
* Sigma for the stagnation point test. The algorithm will take one search
* orthogonal with the last iteration if the module of the gradients is smaller
* than sigma. This mechanism helps the algorithm escaping from stagnation
* points such as local minimal or saddle points.The default is 1e-8.
*/
double sigma;
/**
* Threshold for recording the search paths. If the value is bigger then zero, then
* only values of the objective function that are smaller to equal to the threshold be
* used for statistic analyzing. Otherwise, all records will be used. The recorded paths
* could be save to file using the save_lgd_trace(string) function if set_lgd_record_trace()
* is set. The default is -1.0.
*/
double lambda;
};
class lgd_solver
{
public:
lgd_solver(); ///< Default constructor.
virtual ~lgd_solver(); ///< Default de-constructor.
/**
* @brief Interface for the evaluation of the objective function. Concrete
* contents of this function is determined according to the optimizing problem.
*
* @param x Inputs of the current solution.
* @param g Outputs of the model gradient calculated using the input solution.
* @return Current objective value.
*/
virtual double LGD_Evaluate(const array<double> &x, array<double> &g) = 0;
/**
* @brief Default monitoring function of the optimizing process.
*
* @param best_fx Objective value of the best solution.
* @param curr_fx Objective value of the current solution.
* @param mean_fx Objective value of the mean solution.
* @param param L-GD's parameters used for the optimzing process.
* @param curr_t Current flight times.
* @return The optimizing process will be stopped if a non-zero value is returned.
*/
virtual int LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx,
const double best_fx, const lgd_para &param);
void lgd_silent();
void set_lgd_report_interval(int inter);
void show_solver();
void set_lgd_record_trace(); ///< Turn on the recording of fight traces.
// Save fight traces to file. Not that only qualified solutions will be
// saved if the recording threshold is set.
void save_lgd_trace(std::string trace_file);
lgd_para default_lgd_para();
void set_lgd_para(const lgd_para &param);
#ifdef GCTL_OPTIMIZATION_TOML
void set_lgd_para(std::string filename);
void set_lgd_para(const toml::value &toml_data);
#endif // GCTL_OPTIMIZATION_TOML
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false);
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
const array<double> &alphas, std::ostream &ss = std::clog,
bool verbose = true, bool err_throw = false);
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
const array<double> &lows, const array<double> &higs, std::ostream &ss = std::clog,
bool verbose = true, bool err_throw = false);
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
double low, double hig, std::ostream &ss = std::clog, bool verbose = true,
bool err_throw = false);
private:
void lgd_error_str(lgd_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
lgd_return_code lgd(array<double> &best_m, array<double> &mean_m, array<double> &std_m);
private:
lgd_para lgd_param_;
int lgd_inter_, lgd_ques_num_, lgd_trace_times_;
bool lgd_silent_, lgd_has_range_, lgd_has_alpha_, lgd_save_trace_;
array<double> lgd_low_, lgd_hig_, lgd_alpha_, lgd_trace_;
};
}
#endif // _GCTL_LGD_H

View File

@ -0,0 +1,100 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "loss_func.h"
gctl::loss_func::loss_func()
{
uncer_type_ = 0;
}
gctl::loss_func::loss_func(const array<double> &tar, norm_type_e n_type)
{
uncer_type_ = 0;
init(tar, n_type);
}
gctl::loss_func::~loss_func(){}
void gctl::loss_func::init(const array<double> &tar, norm_type_e n_type)
{
tar_num_ = tar.size();
tars_ = tar;
norm_type_ = n_type;
return;
}
void gctl::loss_func::set_uncertainty(double uncer)
{
uncer_type_ = 1;
uncer_ = uncer;
return;
}
void gctl::loss_func::set_uncertainty(const array<double> &uncer)
{
uncer_type_ = 2;
uncers_ = uncer;
return;
}
double gctl::loss_func::get_loss()
{
double l = loss_;
loss_ = 0.0;
return l;
}
double gctl::loss_func::evaluate(double inp, int id)
{
double val = (inp - tars_[id]);
if (uncer_type_ == 1) val /= uncer_;
else if (uncer_type_ == 2) val /= uncers_[id];
if (norm_type_ == L1) val = fabs(val);
if (norm_type_ == L2) val = val*val;
loss_ += val;
return val/tar_num_;
}
double gctl::loss_func::gradient(double inp, int id)
{
double c;
if (uncer_type_ == 1) c = uncer_;
else if (uncer_type_ == 2) c = uncers_[id];
double val = (inp - tars_[id]);
if (norm_type_ == L1 && val >= 0) val = 1.0;
if (norm_type_ == L1 && val < 0) val = -1.0;
if (norm_type_ == L2) val = 2.0*val;
if (norm_type_ == L1 && uncer_type_ != 0) val /= c;
else if (norm_type_ == L2 && uncer_type_ != 0) val /= (c*c);
return val/tar_num_;
}

View File

@ -0,0 +1,61 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_LOSS_FUNC_H
#define _GCTL_LOSS_FUNC_H
// library's head files
#include "gctl/core.h"
namespace gctl
{
class loss_func
{
public:
loss_func();
loss_func(const array<double> &tar, norm_type_e n_type);
virtual ~loss_func();
void init(const array<double> &tar, norm_type_e n_type);
void set_uncertainty(double uncer);
void set_uncertainty(const array<double> &uncer);
double get_loss();
double evaluate(double inp, int id);
double gradient(double inp, int id);
private:
//unsigned int counter_;
unsigned int tar_num_;
int uncer_type_;
double uncer_, loss_;
norm_type_e norm_type_;
array<double> tars_;
array<double> uncers_;
};
}
#endif // _GCTL_LOSS_FUNC_H

152
lib/optimization/lu.cpp Normal file
View File

@ -0,0 +1,152 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "lu.h"
gctl::lu::lu(matrix<double> &sourceMatrix) : decomposedMatrix(sourceMatrix)
{
if (sourceMatrix.empty() || sourceMatrix.row_size() != sourceMatrix.col_size())
{
throw domain_error("Invalid input matrix. From lu::lu(...)");
}
}
// Decomposition into triangular matrices
void gctl::lu::decompose()
{
// Initialize the permutation vector
int n = decomposedMatrix.row_size();
rowPermutation.resize(n);
for (int i = 0; i < n; i++)
{
rowPermutation[i] = i;
}
// LU factorization
double tmp, det = 1.0;
for (int p = 1; p <= n - 1; p++)
{
// Find pivot element.
for (int i = p + 1; i <= n; i++)
{
if (std::fabs(decomposedMatrix[rowPermutation[i - 1]][p - 1]) > std::fabs(decomposedMatrix[rowPermutation[p - 1]][p - 1]))
{
// Switch the index for the p-1 pivot row if necessary.
tmp = rowPermutation[p - 1]; rowPermutation[p - 1] = rowPermutation[i - 1]; rowPermutation[i - 1] = tmp;
det = -det;
}
}
if (decomposedMatrix[rowPermutation[p - 1]][p - 1] == 0.0)
{
// The matrix is singular, at least to precision of algorithm
throw runtime_error("The input matrix is singular. From gctl::lu::decompose()");
return;
}
// Multiply the diagonal elements.
det = det * decomposedMatrix[rowPermutation[p - 1]][p - 1];
// Form multiplier.
for (int i = p + 1; i <= n; i++)
{
decomposedMatrix[rowPermutation[i - 1]][p - 1] /= decomposedMatrix[rowPermutation[p - 1]][p - 1];
// Eliminate [p-1].
for (int j = p + 1; j <= n; j++)
{
decomposedMatrix[rowPermutation[i - 1]][j - 1] -= decomposedMatrix[rowPermutation[i - 1]][p - 1] * decomposedMatrix[rowPermutation[p - 1]][j - 1];
}
}
}
det = det * decomposedMatrix[rowPermutation[n - 1]][n - 1];
if (det == 0.0)
{
throw runtime_error("Determinant of the input matrix is zero. From gctl::lu::decompose()");
}
return;
}
// solve for x in form Ax = b. A is the original input matrix.
// Note: b is modified in-place for row permutations
void gctl::lu::solve(const array<double>& b, array<double> &x)
{
// Our decomposed matrix is comprised of both the lower and upper diagonal matrices.
// The rows of this matrix have been permutated during the decomposition process. The
// rowPermutation indicates the proper row order.
// The lower diagonal matrix only include elements below the diagonal with diagonal
// elements set to 1.
// The upper diagonal matrix is fully specified.
// First solve Ly = Pb for x using forward substitution. P is a permutated identity matrix.
if (b.empty())
{
throw domain_error("Invalid target vector. From gctl::lu::solve(...)");
}
x.resize(b.size());
for (int i = 0; i < x.size(); i++)
{
int currentRow = rowPermutation[i];
double sum = 0.0;
for (int j = 0; j < i; j++)
{
sum += (decomposedMatrix[currentRow][j] * x[j]);
}
x[i] = (b[currentRow] - sum);
}
// Now solve Uy = x for y using back substitution. Note that
// y can be solved in place using the existing y vector. No need
// to allocate another vector.
for (int i = b.size()-1; i >= 0; i--)
{
int currentRow = rowPermutation[i];
double sum = 0.0;
for (int j = b.size()-1; j > i; j--)
{
sum += (decomposedMatrix[currentRow][j] * x[j]);
}
x[i] = (x[i] - sum) / decomposedMatrix[currentRow][i];
}
return;
}

56
lib/optimization/lu.h Normal file
View File

@ -0,0 +1,56 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_LU_H
#define _GCTL_LU_H
#include "gctl/core.h"
namespace gctl
{
/**
* @brief Lower/upper decomposition of matrix into a lower triangular matrix and a upper triangular matrix.
*
* @tparam T template type
*/
class lu
{
public:
lu(matrix<double> &sourceMatrix); // Matrix is decomposed in-place
virtual ~lu(){}
void decompose(); ///< Decomposition into triangular matrices. Return false if failed
void solve(const array<double>& b, array<double> &x); ///< solve for x in form Ax = b. A is the original input matrix.
protected:
lu(const lu&) = delete;
void operator=(const lu&) = delete;
matrix<double> &decomposedMatrix; // Output matrix after decomposition
array<int> rowPermutation; // Permutation of rows during pivoting
};
}
#endif // _GCTL_LU_H

634
lib/optimization/sgd.cpp Normal file
View File

@ -0,0 +1,634 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "sgd.h"
/**
* Default parameter for the SGD methods.
*/
static const gctl::sgd_para sgd_defparam = {0, 1e-6, 0.01, 0.01, 0.9, 0.999, 1e-8};
gctl::sgd_solver::sgd_solver()
{
sgd_param_ = sgd_defparam;
sgd_inter_ = 1;
sgd_silent_ = false;
solver_name_ = "Undefined";
}
gctl::sgd_solver::~sgd_solver(){}
int gctl::sgd_solver::SGD_Progress(double fx, const array<double> &x, const sgd_para &param, const int k)
{
if (sgd_silent_) return 0;
if (param.epsilon > 0.0 && fx <= param.epsilon)
{
std::clog << GCTL_CLEARLINE << "\rF(x) = " << fx << ", Train-Times = " << k;
return 0;
}
if (sgd_inter_ > 0 && k%sgd_inter_ == 0)
{
std::clog << GCTL_CLEARLINE << "\rF(x) = " << fx << ", Train-Times = " << k;
}
return 0;
}
void gctl::sgd_solver::sgd_silent()
{
sgd_silent_ = true;
return;
}
void gctl::sgd_solver::set_sgd_report_interval(int inter)
{
sgd_inter_ = inter;
return;
}
void gctl::sgd_solver::set_sgd_para(const sgd_para &in_param)
{
sgd_param_ = in_param;
return;
}
void gctl::sgd_solver::set_sgd_para(const toml::value &toml_data)
{
sgd_param_ = sgd_defparam;
std::string SGD = "sgd";
if (toml_data.contains(SGD))
{
if (toml_data.at(SGD).contains("iteration")) sgd_param_.iteration = toml::find<int>(toml_data, SGD, "iteration");
if (toml_data.at(SGD).contains("epsilon")) sgd_param_.epsilon = toml::find<double>(toml_data, SGD, "epsilon");
if (toml_data.at(SGD).contains("mu")) sgd_param_.mu = toml::find<double>(toml_data, SGD, "mu");
if (toml_data.at(SGD).contains("alpha")) sgd_param_.alpha = toml::find<double>(toml_data, SGD, "alpha");
if (toml_data.at(SGD).contains("beta_1")) sgd_param_.beta_1 = toml::find<double>(toml_data, SGD, "beta_1");
if (toml_data.at(SGD).contains("beta_2")) sgd_param_.beta_2 = toml::find<double>(toml_data, SGD, "beta_2");
if (toml_data.at(SGD).contains("sigma")) sgd_param_.sigma = toml::find<double>(toml_data, SGD, "sigma");
}
return;
}
void gctl::sgd_solver::show_solver()
{
std::clog << "Solver's Setup Panel\n";
std::clog << "-----------------------------\n";
std::clog << "Solver: " << solver_name_ << "\n";
std::clog << "Iteration = " << sgd_param_.iteration << ", Epsilon = " << sgd_param_.epsilon << ", Mu = " << sgd_param_.mu << "\n";
std::clog << "Alpha = " << sgd_param_.alpha << ", Beta1 = " << sgd_param_.beta_1 << ", Beta2 = " << sgd_param_.beta_2 << ", Sigma = " << sgd_param_.sigma << "\n";
std::clog << "=============================\n";
return;
}
void gctl::sgd_solver::sgd_error_str(sgd_return_code err_code, std::ostream &ss, bool err_throw)
{
#if defined _WINDOWS || __WIN32__
if (!err_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
ss << "Success! ";
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
ss << "Fail! ";
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[1m\033[32mSGD Success! ";
else
ss << "\033[1m\033[31mSGD Fail! ";
}
#endif
std::string err_str;
switch (err_code)
{
case SGD_SUCCESS:
err_str = "Success."; break;
case SGD_CONVERGENCE:
err_str = "The iteration reached convergence."; break;
case SGD_STOP:
err_str = "The iteration stopped by the progress evaluation function."; break;
case SGD_UNKNOWN_ERROR:
err_str = "Unknown error."; break;
case SGD_INVALID_VARIABLE_SIZE:
err_str = "Invalid array size."; break;
case SGD_REACHED_MAX_ITERATIONS:
err_str = "The maximal iteration is reached."; break;
case SGD_INVALID_EPSILON:
err_str = "Invalid value for epsilon."; break;
case SGD_INVALID_BETA:
err_str = "Invalid value for beta."; break;
case SGD_INVALID_MU:
err_str = "Invalid value for mu."; break;
case SGD_INVALID_ALPHA:
err_str = "Invalid value for alpha."; break;
case SGD_INVALID_SIGMA:
err_str = "Invalid value for sigma."; break;
case SGD_NAN_VALUE:
err_str = "NaN values found."; break;
default:
err_str = "Unknown error."; break;
}
if (err_throw && err_code < 0) throw err_str;
else ss << err_str;
#if defined _WINDOWS || __WIN32__
if (!err_throw)
{
if (err_code >= 0)
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
else
{
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
ss << std::endl;
}
}
#else
if (!err_throw)
{
if (err_code >= 0)
ss << "\033[0m" << std::endl;
else
ss << "\033[0m" << std::endl;
}
#endif
return;
}
gctl::sgd_para gctl::sgd_solver::default_sgd_para()
{
sgd_para dp = sgd_defparam;
return dp;
}
void gctl::sgd_solver::SGD_Minimize(array<double> &m, sgd_solver_type solver_id, std::ostream &ss, bool verbose, bool err_throw)
{
if (sgd_silent_)
{
sgd_return_code ret;
if (solver_id == MOMENTUM) {solver_name_ = "MOMENTUM"; ret = momentum(m);}
else if (solver_id == NAG) {solver_name_ = "NAG"; ret = nag(m);}
else if (solver_id == ADAGRAD) {solver_name_ = "ADAGRAD"; ret = adagrad(m);}
else if (solver_id == RMSPROP) {solver_name_ = "RMSPROP"; ret = rmsprop(m);}
else if (solver_id == ADAM) {solver_name_ = "ADAM"; ret = adam(m);}
else if (solver_id == NADAM) {solver_name_ = "NADAM"; ret = nadam(m);}
else if (solver_id == ADAMAX) {solver_name_ = "ADAMAX"; ret = adamax(m);}
else if (solver_id == ADABELIEF) {solver_name_ = "ADABELIEF"; ret = adabelief(m);}
else throw std::invalid_argument("Invalid solver type. gstl::sgd_solver::SGD_Minimize(...)");
if (ret < 0) sgd_error_str(ret, ss, true);
return;
}
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
#ifdef GCTL_OPENMP
double start = omp_get_wtime();
sgd_return_code ret;
if (solver_id == MOMENTUM) {solver_name_ = "MOMENTUM"; ret = momentum(m);}
else if (solver_id == NAG) {solver_name_ = "NAG"; ret = nag(m);}
else if (solver_id == ADAGRAD) {solver_name_ = "ADAGRAD"; ret = adagrad(m);}
else if (solver_id == RMSPROP) {solver_name_ = "RMSPROP"; ret = rmsprop(m);}
else if (solver_id == ADAM) {solver_name_ = "ADAM"; ret = adam(m);}
else if (solver_id == NADAM) {solver_name_ = "NADAM"; ret = nadam(m);}
else if (solver_id == ADAMAX) {solver_name_ = "ADAMAX"; ret = adamax(m);}
else if (solver_id == ADABELIEF) {solver_name_ = "ADABELIEF"; ret = adabelief(m);}
else throw std::invalid_argument("Invalid solver type. gstl::sgd_solver::SGD_Minimize(...)");
double end = omp_get_wtime();
double costime = 1000*(end-start);
#else
clock_t start = clock();
sgd_return_code ret;
if (solver_id == MOMENTUM) {solver_name_ = "MOMENTUM"; ret = momentum(m);}
else if (solver_id == NAG) {solver_name_ = "NAG"; ret = nag(m);}
else if (solver_id == ADAGRAD) {solver_name_ = "ADAGRAD"; ret = adagrad(m);}
else if (solver_id == RMSPROP) {solver_name_ = "RMSPROP"; ret = rmsprop(m);}
else if (solver_id == ADAM) {solver_name_ = "ADAM"; ret = adam(m);}
else if (solver_id == NADAM) {solver_name_ = "NADAM"; ret = nadam(m);}
else if (solver_id == ADAMAX) {solver_name_ = "ADAMAX"; ret = adamax(m);}
else if (solver_id == ADABELIEF) {solver_name_ = "ADABELIEF"; ret = adabelief(m);}
else throw std::invalid_argument("Invalid solver type. gstl::sgd_solver::SGD_Minimize(...)");
clock_t end = clock();
double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
#endif
if (!err_throw)
{
std::clog << std::endl;
switch (solver_id)
{
case MOMENTUM:
std::clog << "Solver: MOMENTUM. Time cost: " << costime << " ms" << std::endl;
break;
case NAG:
std::clog << "Solver: NAG. Time cost: " << costime << " ms" << std::endl;
break;
case ADAGRAD:
std::clog << "Solver: ADAGRAD. Time cost: " << costime << " ms" << std::endl;
break;
case RMSPROP:
std::clog << "Solver: RMSPROP. Time cost: " << costime << " ms" << std::endl;
break;
case ADAM:
std::clog << "Solver: ADAM. Time cost: " << costime << " ms" << std::endl;
break;
case NADAM:
std::clog << "Solver: NADAM. Time cost: " << costime << " ms" << std::endl;
break;
case ADAMAX:
std::clog << "Solver: ADAMAX. Time cost: " << costime << " ms" << std::endl;
break;
case ADABELIEF:
std::clog << "Solver: ADABELIEF. Time cost: " << costime << " ms" << std::endl;
break;
default:
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
break;
}
}
if (verbose) sgd_error_str(ret, ss, err_throw);
else if (ret < 0) sgd_error_str(ret, ss, err_throw);
return;
}
gctl::sgd_return_code gctl::sgd_solver::momentum(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
if (sgd_param_.mu < 0 || sgd_param_.mu >= 1.0) return SGD_INVALID_MU;
array<double> mk(n_size, 0.0);
array<double> g(n_size);
int t = 0;
double fx;
while (1)
{
fx = SGD_Evaluate(m, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
for (int i = 0; i < n_size; i++)
{
mk[i] = sgd_param_.mu*mk[i] + g[i];
m[i] = m[i] - sgd_param_.alpha * mk[i];
if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}
gctl::sgd_return_code gctl::sgd_solver::nag(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
if (sgd_param_.mu < 0 || sgd_param_.mu >= 1.0) return SGD_INVALID_MU;
array<double> mk(n_size, 0.0);
array<double> xk(n_size);
array<double> g (n_size);
int t = 0;
double fx;
while (1)
{
for (int i = 0; i < n_size; i++)
{
xk[i] = m[i] - sgd_param_.mu*sgd_param_.alpha*mk[i];
}
fx = SGD_Evaluate(xk, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
for (int i = 0; i < n_size; i++)
{
mk[i] = sgd_param_.mu*mk[i] + g[i];
m[i] = m[i] - sgd_param_.alpha * mk[i];
if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}
gctl::sgd_return_code gctl::sgd_solver::adagrad(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0.0) return SGD_INVALID_EPSILON;
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
array<double> mk(n_size, 0.0);
array<double> g (n_size);
int t = 0;
double fx;
while (1)
{
fx = SGD_Evaluate(m, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
for (int i = 0; i < n_size; i++)
{
mk[i] = mk[i] + g[i]*g[i];
m[i] = m[i] - sgd_param_.alpha * g[i]/(sqrt(mk[i]) + sgd_param_.sigma);
if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}
gctl::sgd_return_code gctl::sgd_solver::rmsprop(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0.0) return SGD_INVALID_EPSILON;
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
array<double> vk(n_size, 0.0);
array<double> g (n_size);
int t = 0;
double fx;
while (1)
{
fx = SGD_Evaluate(m, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
for (int i = 0; i < n_size; i++)
{
vk[i] = sgd_param_.beta_2 * vk[i] + (1.0 - sgd_param_.beta_2)*g[i]*g[i];
m[i] = m[i] - sgd_param_.alpha * g[i]/(sqrt(vk[i]) + sgd_param_.sigma);
if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}
gctl::sgd_return_code gctl::sgd_solver::adam(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
array<double> mk(n_size, 0.0);
array<double> vk(n_size, 0.0);
array<double> g (n_size);
double beta_1t = 1.0, beta_2t = 1.0;
double alpha_k;
int t = 0;
double fx;
while (1)
{
fx = SGD_Evaluate(m, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
beta_1t *= sgd_param_.beta_1;
beta_2t *= sgd_param_.beta_2;
alpha_k = sgd_param_.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);
int i;
#pragma omp parallel for private (i) schedule(guided)
for (i = 0; i < n_size; i++)
{
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
vk[i] = sgd_param_.beta_2*vk[i] + (1.0 - sgd_param_.beta_2)*g[i]*g[i];
m[i] = m[i] - alpha_k * mk[i]/(sqrt(vk[i]) + sgd_param_.sigma);
//if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}
gctl::sgd_return_code gctl::sgd_solver::nadam(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
array<double> mk(n_size, 0.0);
array<double> mk_hat(n_size);
array<double> nk(n_size, 0.0);
array<double> nk_hat(n_size);
array<double> g (n_size);
array<double> g_hat(n_size);
double beta_1t = 1.0, beta_1t1 = sgd_param_.beta_1, beta_2t = 1.0;
int t = 0;
double fx;
while (1)
{
fx = SGD_Evaluate(m, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
beta_1t *= sgd_param_.beta_1;
beta_1t1 *= sgd_param_.beta_1;
beta_2t *= sgd_param_.beta_2;
for (int i = 0; i < n_size; i++)
{
g_hat[i] = g[i]/(1.0 - beta_1t);
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
nk[i] = sgd_param_.beta_2*nk[i] + (1.0 - sgd_param_.beta_2)*g[i]*g[i];
mk_hat[i] = mk[i]/(1.0 - beta_1t1);
nk_hat[i] = nk[i]/(1.0 - beta_2t);
m[i] = m[i] - sgd_param_.alpha * ((1.0 - beta_1t)*g_hat[i]
+ beta_1t1*mk_hat[i])/(sqrt(nk_hat[i]) + sgd_param_.sigma);
if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}
gctl::sgd_return_code gctl::sgd_solver::adamax(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
array<double> mk(n_size, 0.0);
array<double> vk(n_size, 0.0);
array<double> g (n_size);
double beta_1t = 1.0;
int t = 0;
double fx;
while (1)
{
fx = SGD_Evaluate(m, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
beta_1t *= sgd_param_.beta_1;
for (int i = 0; i < n_size; i++)
{
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
vk[i] = std::max(sgd_param_.beta_2*vk[i], std::fabs(g[i]));
m[i] = m[i] - sgd_param_.alpha * mk[i]/((1.0 - beta_1t)*vk[i]);
if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}
gctl::sgd_return_code gctl::sgd_solver::adabelief(array<double> &m)
{
int n_size = m.size();
//check parameters
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
array<double> mk(n_size, 0.0);
array<double> vk(n_size, 0.0);
array<double> g (n_size);
double beta_1t = 1.0, beta_2t = 1.0;
double alpha_k;
int t = 0;
double fx;
while (1)
{
fx = SGD_Evaluate(m, g);
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
beta_1t *= sgd_param_.beta_1;
beta_2t *= sgd_param_.beta_2;
alpha_k = sgd_param_.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);
for (int i = 0; i < n_size; i++)
{
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
vk[i] = sgd_param_.beta_2*vk[i] + (1.0 - sgd_param_.beta_2)*(g[i] - mk[i])*(g[i] - mk[i]);
m[i] = m[i] - alpha_k * mk[i]/(sqrt(vk[i]) + sgd_param_.sigma);
if (m[i] != m[i]) return SGD_NAN_VALUE;
}
t++;
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
}
return SGD_REACHED_MAX_ITERATIONS;
}

201
lib/optimization/sgd.h Normal file
View File

@ -0,0 +1,201 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_SGD_H
#define _GCTL_SGD_H
#include "gctl/core.h"
#include "gctl/algorithm.h"
#include "gctl_optimization_config.h"
#ifdef GCTL_OPTIMIZATION_TOML
#include "toml.hpp"
#endif // GCTL_OPTIMIZATION_TOML
#if defined _WINDOWS || __WIN32__
#include "windows.h"
#endif // _WINDOWS || __WIN32__
#ifdef GSTL_OPENMP
#include "omp.h"
#endif // GSTL_OPENMP
namespace gctl
{
/**
* @brief Types of method that could be recognized by the sgd_solver() function.
*/
enum sgd_solver_type
{
/**
* Classic momentum.
*/
MOMENTUM,
/**
* Nesterovs accelerated gradient (NAG)
*/
NAG,
/**
* AdaGrad method.
*/
ADAGRAD,
/**
* RMSProp method.
*/
RMSPROP,
/**
* Adam method.
*/
ADAM,
/**
* Nadam method.
*/
NADAM,
/**
* AdaMax method.
*/
ADAMAX,
/**
* AdaBelief method.
*/
ADABELIEF,
};
/**
* @brief return value of the sgd_solver() function.
*/
enum sgd_return_code
{
SGD_SUCCESS = 0, ///< The optimization terminated successfully.
SGD_CONVERGENCE = 1, ///< The optimization reached convergence.
SGD_STOP, ///< The process stopped by the monitoring function.
SGD_UNKNOWN_ERROR = -1024, ///< Unknown error.
SGD_INVALID_VARIABLE_SIZE, ///< The variable size is negative
SGD_INVALID_EPSILON, ///< The epsilon is negative.
SGD_REACHED_MAX_ITERATIONS, ///< Iteration reached max limit.
SGD_INVALID_MU, ///< Invalid value for mu.
SGD_INVALID_ALPHA, ///< Invalid value for alpha.
SGD_INVALID_BETA, ///< Invalid value for beta.
SGD_INVALID_SIGMA, ///< Invalid value for sigma.
SGD_NAN_VALUE, ///< Nan value.
};
/**
* @brief Parameters of the SGD methods.
*/
struct sgd_para
{
/**
* Maximal iteration times. The iteration won't stop unless the convergence
* is reached if this parameter is equal to or smaller than zero. The default
* is 0.
*/
int iteration;
/**
* Epsilon for convergence test. This parameter determines the accuracy
* with which the solution is to be found. Must be bigger than zero and
* the default is 1e-6.
*/
double epsilon;
/**
* Damping rate of the classic momentum method and the NAG method, which
* is typically given between 0 and 1. The default is 0.01.
*/
double mu;
/**
* Step size of the iteration. The default value is 0.01 for Adam and AdaMax.
*/
double alpha;
/**
* Exponential decay rates for the first order moment estimates. The range of this
* parameter is [0, 1) and the default value is 0.9.
*/
double beta_1;
/**
* Exponential decay rates for the second order moment estimates. The range of this
* parameter is [0, 1) and the default value is 0.999.
*/
double beta_2;
/**
* A small positive number validates the algorithm. The default value is 1e-8.
*/
double sigma;
};
class sgd_solver
{
private:
sgd_para sgd_param_;
int sgd_inter_;
bool sgd_silent_;
std::string solver_name_;
public:
sgd_solver();
virtual ~sgd_solver();
virtual double SGD_Evaluate(const array<double> &x, array<double> &g) = 0;
virtual int SGD_Progress(double fx, const array<double> &x, const sgd_para &param, const int k);
void sgd_silent();
void set_sgd_report_interval(int inter);
void set_sgd_para(const sgd_para &param);
void show_solver();
void sgd_error_str(sgd_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
sgd_para default_sgd_para();
#ifdef GCTL_OPTIMIZATION_TOML
void set_sgd_para(const toml::value &toml_data);
#endif // GCTL_OPTIMIZATION_TOML
sgd_return_code momentum(array<double> &m);
sgd_return_code nag(array<double> &m);
sgd_return_code adagrad(array<double> &m);
sgd_return_code rmsprop(array<double> &m);
sgd_return_code adam(array<double> &m);
sgd_return_code nadam(array<double> &m);
sgd_return_code adamax(array<double> &m);
sgd_return_code adabelief(array<double> &m);
void SGD_Minimize(array<double> &m, sgd_solver_type solver_id = ADAM, std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false);
};
}
#endif // _GCTL_SGD_H

184
lib/optimization/svd.cpp Normal file
View File

@ -0,0 +1,184 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#include "svd.h"
gctl::svd::svd()
{
reset();
}
gctl::svd::svd(const matrix<double> &src_mat) : svd()
{
decompose(src_mat);
}
void gctl::svd::reset()
{
maxi_iteration = 1000;
K = 0;
epsilon = 1e-8;
U.clear();
V.clear();
S.clear();
return;
}
void gctl::svd::set_singular_number(int k)
{
if (k <= 0)
{
throw invalid_argument("Invalid singular number. From gctl::svd::set_singular_number(...)");
}
K = k;
return;
}
void gctl::svd::set_iteration(int t)
{
if (t <= 0)
{
throw invalid_argument("Invalid singular number. From gctl::svd::set_iteration(...)");
}
maxi_iteration = t;
return;
}
void gctl::svd::set_epsilon(double e)
{
if (e <= 0)
{
throw invalid_argument("Invalid singular number. From gctl::svd::set_epsilon(...)");
}
epsilon = e;
return;
}
void gctl::svd::decompose(const matrix<double> &src_mat)
{
int M = src_mat.row_size();
int N = src_mat.col_size();
if (K == 0) K = N;
S.resize(K, 0.0);
U.resize(K, M, 0.0);
V.resize(K, N, 0.0);
srand(time(0));
array<double> left_vector(M), next_left_vector(M);
array<double> right_vector(N), next_right_vector(N);
array<double> U_tmp(M), V_tmp(N);
double diff, r, d;
for(int col=0;col<K;col++)
{
diff = 1;
r = -1;
while(1)
{
for(int i=0;i<M;i++)
left_vector[i]= (double) rand() / RAND_MAX;
if(normalize(left_vector, epsilon) > epsilon)
break;
}
for(int iter=0; diff >= epsilon && iter < maxi_iteration; iter++)
{
next_left_vector.assign_all(0.0);
next_right_vector.assign_all(0.0);
for(int i=0;i<M;i++)
for(int j=0;j<N;j++)
next_right_vector[j]+=left_vector[i]*src_mat[i][j];
r=normalize(next_right_vector, epsilon);
if(r<epsilon) break;
for(int i=0;i<col;i++)
{
for (int j = 0; j < N; j++)
{
V_tmp[j] = V[i][j];
}
orth(V_tmp, next_right_vector);
}
normalize(next_right_vector, epsilon);
for(int i=0;i<M;i++)
for(int j=0;j<N;j++)
next_left_vector[i]+=next_right_vector[j]*src_mat[i][j];
r=normalize(next_left_vector, epsilon);
if(r<epsilon) break;
for(int i=0;i<col;i++)
{
for (int j = 0; j < M; j++)
{
U_tmp[j] = U[i][j];
}
orth(U_tmp, next_left_vector);
}
normalize(next_left_vector, epsilon);
diff=0;
for(int i=0;i<M;i++)
{
d=next_left_vector[i]-left_vector[i];
diff+=d*d;
}
for (int i = 0; i < M; i++)
{
left_vector[i] = next_left_vector[i];
}
for (int i = 0; i < N; i++)
{
right_vector[i] = next_right_vector[i];
}
}
if(r>=epsilon)
{
S[col]=r;
for (int i = 0; i < M; i++)
{
U[col][i] = left_vector[i];
}
for (int i = 0; i < N; i++)
{
V[col][i] = right_vector[i];
}
}
else break;
}
return;
}

71
lib/optimization/svd.h Normal file
View File

@ -0,0 +1,71 @@
/********************************************************
*
*
*
*
*
*
* Geophysical Computational Tools & Library (GCTL)
*
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
*
* GCTL is distributed under a dual licensing scheme. You can redistribute
* it and/or modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation, either version 2
* of the License, or (at your option) any later version. You should have
* received a copy of the GNU Lesser General Public License along with this
* program. If not, see <http://www.gnu.org/licenses/>.
*
* If the terms and conditions of the LGPL v.2. would prevent you from using
* the GCTL, please consider the option to obtain a commercial license for a
* fee. These licenses are offered by the GCTL's original author. As a rule,
* licenses are provided "as-is", unlimited in time for a one time fee. Please
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
* to include some description of your company and the realm of its activities.
* Also add information on how to contact you by electronic and paper mail.
******************************************************/
#ifndef _GCTL_SVD_H
#define _GCTL_SVD_H
#include "gctl/core.h"
#include "gctl/algorithm.h"
namespace gctl
{
/**
* @brief SVD奇异值分解
*
* A = U^T \cdot S \cdot V
* A M*N输入的二维数组
* K
* U U矩阵K*M
* S S矩阵k个特征值1*K
* V V矩阵K*N
*
*/
class svd
{
public:
matrix<double> U, V;
array<double> S;
svd();
svd(const matrix<double> &src_mat);
virtual ~svd(){}
void reset();
void set_singular_number(int k);
void set_iteration(int t);
void set_epsilon(double e);
int get_singular_number(){return K;}
void decompose(const matrix<double> &src_mat);
protected:
int maxi_iteration, K;
double epsilon;
};
}
#endif // _GCTL_SVD_H