initial upload
This commit is contained in:
parent
12b778cd3e
commit
b4b4275576
37
.gitignore
vendored
37
.gitignore
vendored
@ -1,34 +1,3 @@
|
|||||||
# ---> C++
|
.DS_Store
|
||||||
# Prerequisites
|
build/
|
||||||
*.d
|
.vscode/
|
||||||
|
|
||||||
# Compiled Object files
|
|
||||||
*.slo
|
|
||||||
*.lo
|
|
||||||
*.o
|
|
||||||
*.obj
|
|
||||||
|
|
||||||
# Precompiled Headers
|
|
||||||
*.gch
|
|
||||||
*.pch
|
|
||||||
|
|
||||||
# Compiled Dynamic libraries
|
|
||||||
*.so
|
|
||||||
*.dylib
|
|
||||||
*.dll
|
|
||||||
|
|
||||||
# Fortran module files
|
|
||||||
*.mod
|
|
||||||
*.smod
|
|
||||||
|
|
||||||
# Compiled Static libraries
|
|
||||||
*.lai
|
|
||||||
*.la
|
|
||||||
*.a
|
|
||||||
*.lib
|
|
||||||
|
|
||||||
# Executables
|
|
||||||
*.exe
|
|
||||||
*.out
|
|
||||||
*.app
|
|
||||||
|
|
28
CMakeLists.txt
Normal file
28
CMakeLists.txt
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
cmake_minimum_required(VERSION 3.15.2)
|
||||||
|
# 设置项目名称与语言
|
||||||
|
project(GCTL_OPTIMIZATION VERSION 1.0)
|
||||||
|
# 添加配置配件编写的函数
|
||||||
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
|
message(STATUS "Platform: " ${CMAKE_HOST_SYSTEM_NAME})
|
||||||
|
message(STATUS "Install prefix: " ${CMAKE_INSTALL_PREFIX})
|
||||||
|
message(STATUS "Processor: " ${CMAKE_HOST_SYSTEM_PROCESSOR})
|
||||||
|
|
||||||
|
option(GCTL_OPTIMIZATION_TOML "Use the TOML library" ON)
|
||||||
|
|
||||||
|
message(STATUS "[GCTL_OPTIMIZATION] Use the TOML library: " ${GCTL_OPTIMIZATION_TOML})
|
||||||
|
|
||||||
|
find_package(GCTL REQUIRED)
|
||||||
|
include_directories(${GCTL_INC_DIR})
|
||||||
|
|
||||||
|
# 加入一个头文件配置,让cmake对源码进行操作
|
||||||
|
configure_file(
|
||||||
|
"${PROJECT_SOURCE_DIR}/config.h.in"
|
||||||
|
"${PROJECT_SOURCE_DIR}/lib/optimization/gctl_optimization_config.h"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 添加库源文件地址
|
||||||
|
add_subdirectory(lib)
|
||||||
|
|
||||||
|
# 去掉注释编译示例
|
||||||
|
add_subdirectory(example)
|
25
GCTL_OPTIMIZATIONConfig.cmake.in
Normal file
25
GCTL_OPTIMIZATIONConfig.cmake.in
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
|
set(@PROJECT_NAME@_VERSION "@PROJECT_VERSION@")
|
||||||
|
set_and_check(@PROJECT_NAME@_INSTALL_PREFIX "${PACKAGE_PREFIX_DIR}")
|
||||||
|
set_and_check(@PROJECT_NAME@_INC_DIR "${PACKAGE_PREFIX_DIR}/include")
|
||||||
|
set_and_check(@PROJECT_NAME@_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/include")
|
||||||
|
set_and_check(@PROJECT_NAME@_LIB_DIR "${PACKAGE_PREFIX_DIR}/lib")
|
||||||
|
set_and_check(@PROJECT_NAME@_LIBRARY_DIR "${PACKAGE_PREFIX_DIR}/lib")
|
||||||
|
|
||||||
|
set(@PROJECT_NAME@_LIB gctl_optimization)
|
||||||
|
set(@PROJECT_NAME@_LIBRARY gctl_optimization)
|
||||||
|
|
||||||
|
set(@PROJECT_NAME@_TOML @GCTL_OPTIMIZATION_TOML@)
|
||||||
|
|
||||||
|
message(STATUS "[GCTL_OPTIMIZATION] Use the TOML library: " @GCTL_OPTIMIZATION_TOML@)
|
||||||
|
|
||||||
|
if(NOT GCTL_FOUND)
|
||||||
|
find_package(GCTL REQUIRED)
|
||||||
|
include_directories(${GCTL_INC_DIR})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# include target information
|
||||||
|
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
||||||
|
|
||||||
|
check_required_components(@PROJECT_NAME@)
|
17
README.md
17
README.md
@ -1,2 +1,17 @@
|
|||||||
# gctl_optimization
|
## lcg_solver 共轭梯度求解器
|
||||||
|
### 求解器参数设置
|
||||||
|
#### 1. 从toml文件读取参数
|
||||||
|
|
||||||
|
用户可以从toml文件中读取并设置求解器参数。所有的参数都定义在名为lcg的顶级表格下,可设置的参数及类型如下所示:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[lcg]
|
||||||
|
max_iterations=<int>
|
||||||
|
epsilon=<float>
|
||||||
|
abs_diff=0|1
|
||||||
|
restart_epsilon=<float>
|
||||||
|
step=<float>
|
||||||
|
sigma=<float>
|
||||||
|
beta=<float>
|
||||||
|
maxi_m=<int>
|
||||||
|
```
|
1
config.h.in
Normal file
1
config.h.in
Normal file
@ -0,0 +1 @@
|
|||||||
|
#cmakedefine GCTL_OPTIMIZATION_TOML
|
20
example/CMakeLists.txt
Normal file
20
example/CMakeLists.txt
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
|
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin/examples)
|
||||||
|
|
||||||
|
macro(add_example name switch)
|
||||||
|
if(${switch})
|
||||||
|
add_executable(${name} ${name}.cpp)
|
||||||
|
set_target_properties(${name} PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON)
|
||||||
|
target_link_libraries(${name} PRIVATE ${GCTL_LIB})
|
||||||
|
target_link_libraries(${name} PRIVATE gctl_optimization)
|
||||||
|
endif()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
add_example(ex1 ON)
|
||||||
|
add_example(ex2 ON)
|
||||||
|
add_example(ex3 ON)
|
||||||
|
add_example(ex4 ON)
|
||||||
|
add_example(ex5 ON)
|
||||||
|
add_example(ex6 ON)
|
||||||
|
add_example(ex7 ON)
|
||||||
|
add_example(ex8 ON)
|
164
example/ex1.cpp
Normal file
164
example/ex1.cpp
Normal file
@ -0,0 +1,164 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ███████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗███████╗ ██║ ██║
|
||||||
|
* ██║ ██║╚════██║ ██║ ██║
|
||||||
|
* ╚██████╔╝███████║ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚══════╝ ╚═╝ ╚══════╝
|
||||||
|
* Generic Scientific Template Library
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* The GSTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License (LGPL) along with
|
||||||
|
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GSTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GSTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "../lib/optimization.h"
|
||||||
|
|
||||||
|
#define M 1000
|
||||||
|
#define N 800
|
||||||
|
|
||||||
|
double max_diff(const gctl::array<double> &a, const gctl::array<double> &b)
|
||||||
|
{
|
||||||
|
double max = -1.0;
|
||||||
|
for (size_t i = 0; i < a.size(); i++)
|
||||||
|
{
|
||||||
|
max = std::max(sqrt((a[i] - b[i])*(a[i] - b[i])), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ex1 : public gctl::lcg_solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ex1();
|
||||||
|
virtual ~ex1();
|
||||||
|
|
||||||
|
// 计算共轭梯度的B项
|
||||||
|
void cal_partb(const gctl::array<double> &x, gctl::array<double> &B);
|
||||||
|
|
||||||
|
//定义共轭梯度中Ax的算法
|
||||||
|
virtual void LCG_Ax(const gctl::array<double> &x, gctl::array<double> &ax);
|
||||||
|
virtual void LCG_Mx(const gctl::array<double> &x, gctl::array<double> &mx);
|
||||||
|
|
||||||
|
private:
|
||||||
|
gctl::matrix<double> kernel; // 普通二维数组做核矩阵
|
||||||
|
gctl::array<double> tmp_arr; // 中间结果数组
|
||||||
|
gctl::array<double> p; // 预优矩阵
|
||||||
|
};
|
||||||
|
|
||||||
|
ex1::ex1()
|
||||||
|
{
|
||||||
|
kernel.resize(M, N);
|
||||||
|
kernel.random(-1.0, 1.0, gctl::RdUniform);
|
||||||
|
|
||||||
|
tmp_arr.resize(M);
|
||||||
|
p.resize(N);
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
p[i] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
double diag;
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
diag = 0.0;
|
||||||
|
for (size_t j = 0; j < M; j++)
|
||||||
|
{
|
||||||
|
diag += kernel[j][i]*kernel[j][i];
|
||||||
|
}
|
||||||
|
p[i] = 1.0/diag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ex1::~ex1(){}
|
||||||
|
|
||||||
|
void ex1::cal_partb(const gctl::array<double> &x, gctl::array<double> &B)
|
||||||
|
{
|
||||||
|
LCG_Ax(x, B);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ex1::LCG_Ax(const gctl::array<double> &x, gctl::array<double> &ax)
|
||||||
|
{
|
||||||
|
matvec(tmp_arr, kernel, x);
|
||||||
|
matvec(ax, kernel, tmp_arr, gctl::Trans);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ex1::LCG_Mx(const gctl::array<double> &x, gctl::array<double> &mx)
|
||||||
|
{
|
||||||
|
vecmul(mx, p, x);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
// 生成一组正演解
|
||||||
|
gctl::array<double> fm(N);
|
||||||
|
gctl::random(fm, 1.0, 2.0, gctl::RdUniform);
|
||||||
|
|
||||||
|
ex1 test;
|
||||||
|
|
||||||
|
// 计算共轭梯度B项
|
||||||
|
gctl::array<double> B(N);
|
||||||
|
test.cal_partb(fm, B);
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
gctl::array<double> m(N, 0.0);
|
||||||
|
|
||||||
|
test.set_lcg_message(gctl::LCG_SOLUTION);
|
||||||
|
|
||||||
|
std::ofstream ofile("log.txt");
|
||||||
|
test.LCG_Minimize(m, B, gctl::LCG_CG, ofile);
|
||||||
|
ofile << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
|
||||||
|
m.assign_all(0.0);
|
||||||
|
|
||||||
|
test.LCG_Minimize(m, B, gctl::LCG_PCG, ofile);
|
||||||
|
ofile << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
|
||||||
|
m.assign_all(0.0);
|
||||||
|
|
||||||
|
test.LCG_Minimize(m, B, gctl::LCG_CGS, ofile);
|
||||||
|
ofile << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
ofile.close();
|
||||||
|
|
||||||
|
test.set_lcg_message(gctl::LCG_SOLUTION);
|
||||||
|
|
||||||
|
m.assign_all(0.0);
|
||||||
|
|
||||||
|
test.LCG_Minimize(m, B, gctl::LCG_BICGSTAB);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
|
||||||
|
m.assign_all(0.0);
|
||||||
|
|
||||||
|
test.LCG_Minimize(m, B, gctl::LCG_BICGSTAB2);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
|
||||||
|
gctl::array<double> low(N, 1.0);
|
||||||
|
gctl::array<double> hig(N, 2.0);
|
||||||
|
|
||||||
|
m.assign_all(0.0);
|
||||||
|
|
||||||
|
test.LCG_MinimizeConstrained(m, B, low, hig, gctl::LCG_PG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
|
||||||
|
m.assign_all(0.0);
|
||||||
|
|
||||||
|
test.LCG_MinimizeConstrained(m, B, low, hig, gctl::LCG_SPG);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
175
example/ex2.cpp
Normal file
175
example/ex2.cpp
Normal file
@ -0,0 +1,175 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ███████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗███████╗ ██║ ██║
|
||||||
|
* ██║ ██║╚════██║ ██║ ██║
|
||||||
|
* ╚██████╔╝███████║ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚══════╝ ╚═╝ ╚══════╝
|
||||||
|
* Generic Scientific Template Library
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* The GSTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License (LGPL) along with
|
||||||
|
* this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GSTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GSTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "../lib/optimization.h"
|
||||||
|
|
||||||
|
#define M 100
|
||||||
|
#define N 90
|
||||||
|
|
||||||
|
// get random floating points
|
||||||
|
double random_double(double l, double t)
|
||||||
|
{
|
||||||
|
return (t-l)*rand()*1.0/RAND_MAX + l;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get random integral numbers
|
||||||
|
int random_int(int small, int big)
|
||||||
|
{
|
||||||
|
return (rand() % (big - small)) + small;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ex2 : public gctl::lgd_solver
|
||||||
|
{
|
||||||
|
protected:
|
||||||
|
gctl::matrix<double> kernel;
|
||||||
|
gctl::array<double> obs, tmp;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
double LGD_Evaluate(const gctl::array<double> &x, gctl::array<double> &g);
|
||||||
|
|
||||||
|
public:
|
||||||
|
ex2();
|
||||||
|
virtual ~ex2(){}
|
||||||
|
|
||||||
|
void CalObs(const gctl::array<double> &x);
|
||||||
|
};
|
||||||
|
|
||||||
|
ex2::ex2()
|
||||||
|
{
|
||||||
|
kernel.resize(M, N);
|
||||||
|
tmp.resize(M);
|
||||||
|
obs.resize(M);
|
||||||
|
|
||||||
|
srand(time(0));
|
||||||
|
// 添加一些大数
|
||||||
|
int tmp_id, tmp_size;
|
||||||
|
double tmp_val;
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
tmp_size = random_int(25, 35);
|
||||||
|
for (int j = 0; j < tmp_size; j++)
|
||||||
|
{
|
||||||
|
tmp_id = random_int(0, N);
|
||||||
|
tmp_val = random_double(-10, 10);
|
||||||
|
|
||||||
|
kernel[i][tmp_id] = tmp_val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
double ex2::LGD_Evaluate(const gctl::array<double> &x, gctl::array<double> &g)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
tmp[i] = 0.0;
|
||||||
|
for (int j = 0; j < N; j++)
|
||||||
|
{
|
||||||
|
tmp[i] += kernel[i][j] * x[j];
|
||||||
|
}
|
||||||
|
tmp[i] -= obs[i];
|
||||||
|
//tmp[i] /= 1e-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < N; j++)
|
||||||
|
{
|
||||||
|
g[j] = 0.0;
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
g[j] += kernel[i][j]*tmp[i];
|
||||||
|
}
|
||||||
|
g[j] *= 2.0/M;
|
||||||
|
}
|
||||||
|
|
||||||
|
double sum = 0.0;
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
sum += tmp[i]*tmp[i];
|
||||||
|
}
|
||||||
|
return sum/M;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ex2::CalObs(const gctl::array<double> &x)
|
||||||
|
{
|
||||||
|
// 计算正演值
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
obs[i] = 0.0;
|
||||||
|
for (int j = 0; j < N; j++)
|
||||||
|
{
|
||||||
|
obs[i] += kernel[i][j]*x[j];
|
||||||
|
}
|
||||||
|
// 添加噪声
|
||||||
|
obs[i] += random_double(-1e-3, 1e-3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
gctl::array<double> m(N, 0.0), mean_m(N, 0.0), stddev_m(N, 0.0), low(N), hig(N);
|
||||||
|
|
||||||
|
// 生成一组正演解 包含一些大值和一些小值
|
||||||
|
gctl::array<double> fm(N);
|
||||||
|
int N2 = (int) N/2;
|
||||||
|
for (int i = 0; i < N2; i++)
|
||||||
|
{
|
||||||
|
//fm[i] = random_double(5, 10);
|
||||||
|
fm[i] = 10.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = N2; i < N; i++)
|
||||||
|
{
|
||||||
|
//fm[i] = random_double(1, 2);
|
||||||
|
fm[i] = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < N2; i++)
|
||||||
|
{
|
||||||
|
low[i] = 9.0; // 对解的范围进行约束
|
||||||
|
hig[i] = 11.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = N2; i < N; i++)
|
||||||
|
{
|
||||||
|
low[i] = 0.0;
|
||||||
|
hig[i] = 2.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ex2 e;
|
||||||
|
e.CalObs(fm);
|
||||||
|
|
||||||
|
gctl::lgd_para my_para = e.default_lgd_para();
|
||||||
|
my_para.flight_times = 20000;
|
||||||
|
my_para.batch = 100;
|
||||||
|
e.set_lgd_para(my_para);
|
||||||
|
e.LGD_Minimize(m, mean_m, stddev_m, low, hig);
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
std::cout << fm[i] << " " << m[i] << " " << mean_m[i] << " " << stddev_m[i] << " " << fabs(mean_m[i] - fm[i]) << std::endl;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
69
example/ex3.cpp
Normal file
69
example/ex3.cpp
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
#include "../lib/optimization.h"
|
||||||
|
|
||||||
|
class TEST_FUNC : public gctl::lbfgs_solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TEST_FUNC();
|
||||||
|
~TEST_FUNC();
|
||||||
|
|
||||||
|
virtual double LBFGS_Evaluate(const gctl::array<double> &x, gctl::array<double> &g);
|
||||||
|
|
||||||
|
void Routine();
|
||||||
|
|
||||||
|
private:
|
||||||
|
gctl::array<double> m_x;
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_FUNC::TEST_FUNC()
|
||||||
|
{
|
||||||
|
m_x.resize(3, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_FUNC::~TEST_FUNC(){}
|
||||||
|
|
||||||
|
// test functions
|
||||||
|
// 3 = 3*x1 + x2 + 2*x3*x3
|
||||||
|
// 1 = -3*x1 + 5*x2*x2 + 2*x1*x3
|
||||||
|
// -12 = 25*x1*x2 + 20*x3
|
||||||
|
double TEST_FUNC::LBFGS_Evaluate(const gctl::array<double> &x, gctl::array<double> &g)
|
||||||
|
{
|
||||||
|
double f0,f1,f2,temp;
|
||||||
|
f0 = 3*x[0] + x[1] + 2*x[2]*x[2] - 3.012; //这里添加一点噪声
|
||||||
|
f1 = -3*x[0] + 5*x[1]*x[1] + 2*x[0]*x[2] - 1.04252;
|
||||||
|
f2 = 25*x[0]*x[1] + 20*x[2] + 12.12479;
|
||||||
|
temp = sqrt(f0*f0+f1*f1+f2*f2);
|
||||||
|
|
||||||
|
g[0] = 0.5*(6*f0+2*f1*(2*x[2]-3)+50*f2*x[1])/temp;
|
||||||
|
g[1] = 0.5*(2*f0+20*f1*x[1]+50*f2*x[0])/temp;
|
||||||
|
g[2] = 0.5*(8*f0*x[2]+4*f1*x[0]+40*f2)/temp;
|
||||||
|
return temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TEST_FUNC::Routine()
|
||||||
|
{
|
||||||
|
gctl::lbfgs_para self_para = default_lbfgs_para();
|
||||||
|
self_para.m = 10;
|
||||||
|
self_para.past = 5;
|
||||||
|
self_para.residual = 1e-10;
|
||||||
|
//self_para.min_step = 1e-30;
|
||||||
|
//self_para.max_linesearch = 40;
|
||||||
|
//self_para.linesearch = gctl::LBFGS_LINESEARCH_BACKTRACKING_WOLFE;
|
||||||
|
|
||||||
|
set_lbfgs_para(self_para);
|
||||||
|
|
||||||
|
std::ofstream ofile("log.txt");
|
||||||
|
show_lbfgs_para(ofile);
|
||||||
|
|
||||||
|
double fx = LBFGS_Minimize(m_x, ofile);
|
||||||
|
ofile.close();
|
||||||
|
|
||||||
|
m_x.show();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
TEST_FUNC test;
|
||||||
|
test.Routine();
|
||||||
|
return 0;
|
||||||
|
}
|
91
example/ex4.cpp
Normal file
91
example/ex4.cpp
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
#include "../lib/optimization.h"
|
||||||
|
#include "iostream"
|
||||||
|
#include "iomanip"
|
||||||
|
|
||||||
|
using std::cout;
|
||||||
|
using std::endl;
|
||||||
|
using std::setw;
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
gctl::matrix<double> A(4, 3);
|
||||||
|
for (int i = 0; i < A.row_size(); i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < A.col_size(); j++)
|
||||||
|
{
|
||||||
|
A[i][j] = 3*(i+1) + j - 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
A[3][1] = 1;
|
||||||
|
|
||||||
|
cout<<"A(" << A.row_size() << ", " << A.col_size() << ") = " <<endl;
|
||||||
|
A.show();
|
||||||
|
|
||||||
|
gctl::svd svdd;
|
||||||
|
svdd.decompose(A);
|
||||||
|
|
||||||
|
cout<<"U(" << svdd.U.row_size() << ", " << svdd.U.col_size() << ") = " <<endl;
|
||||||
|
svdd.U.show();
|
||||||
|
|
||||||
|
cout<<"S(" << svdd.S.size() << ") = " << endl;
|
||||||
|
svdd.S.show();
|
||||||
|
|
||||||
|
cout<<"V(" << svdd.V.row_size() << ", " << svdd.V.col_size() << ") = " <<endl;
|
||||||
|
svdd.V.show();
|
||||||
|
|
||||||
|
int sig_num = svdd.get_singular_number();
|
||||||
|
double tmp_d;
|
||||||
|
gctl::array<double> tmp(sig_num);
|
||||||
|
|
||||||
|
cout<<"U^T * S * V(" << A.row_size() << ", " << A.col_size() << ") = " <<endl;
|
||||||
|
for(int i=0;i<A.row_size();i++)
|
||||||
|
{
|
||||||
|
for (int k = 0; k < sig_num; k++)
|
||||||
|
{
|
||||||
|
tmp[k] = svdd.U[k][i] * svdd.S[k];
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int j=0;j<A.col_size();j++)
|
||||||
|
{
|
||||||
|
tmp_d = 0.0;
|
||||||
|
for (int k = 0; k < sig_num; k++)
|
||||||
|
{
|
||||||
|
tmp_d += tmp[k] * svdd.V[k][j];
|
||||||
|
}
|
||||||
|
cout<<setw(12)<<tmp_d<<' ';
|
||||||
|
}
|
||||||
|
cout<<endl;
|
||||||
|
}
|
||||||
|
cout<<endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
78
example/ex5.cpp
Normal file
78
example/ex5.cpp
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
#include "../lib/optimization.h"
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
gctl::matrix<double> A(5, 5);
|
||||||
|
for (int i = 0; i < 5; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < 5; j++)
|
||||||
|
{
|
||||||
|
A[i][j] = 3*(i+1) + j - 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 注意A要满秩
|
||||||
|
A[1][2] = 3.4;
|
||||||
|
A[4][1] = 2.1;
|
||||||
|
A[3][4] = 9.7;
|
||||||
|
A[2][3] = 2.7;
|
||||||
|
|
||||||
|
std::cout<<"A(5, 5) = " <<std::endl;
|
||||||
|
for(int i=0;i<5;i++){
|
||||||
|
for(int j=0;j<5;j++){
|
||||||
|
std::cout<<A[i][j]<<' ';
|
||||||
|
}
|
||||||
|
std::cout<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout<<std::endl;
|
||||||
|
|
||||||
|
gctl::array<double> m(5, 0.5), x(5, 0.0);
|
||||||
|
gctl::array<double> B(5);
|
||||||
|
for (int i = 0; i < 5; i++)
|
||||||
|
{
|
||||||
|
B[i] = 0.0;
|
||||||
|
for (int j = 0; j < 5; j++)
|
||||||
|
{
|
||||||
|
B[i] += A[i][j] * m[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::lu glu(A);
|
||||||
|
glu.decompose();
|
||||||
|
glu.solve(B, x);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 5; i++)
|
||||||
|
{
|
||||||
|
std::cout << m[i] << " " << x[i] << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
90
example/ex6.cpp
Normal file
90
example/ex6.cpp
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
#include "../lib/optimization.h"
|
||||||
|
|
||||||
|
// get random floating points
|
||||||
|
double random_double(double l, double t)
|
||||||
|
{
|
||||||
|
return (t-l)*rand()*1.0/RAND_MAX + l;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
srand(time(0));
|
||||||
|
|
||||||
|
gctl::matrix<double> A(5, 5);
|
||||||
|
for (int i = 0; i < 5; i++)
|
||||||
|
{
|
||||||
|
for (int j = i; j < 5; j++)
|
||||||
|
{
|
||||||
|
if (i == j) A[i][j] = random_double(1.0, 3.0);
|
||||||
|
else A[i][j] = random_double(0.1, 1.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 5; i++)
|
||||||
|
{
|
||||||
|
for (int j = i; j < 5; j++)
|
||||||
|
{
|
||||||
|
A[j][i] = A[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout<<"A(5, 5) = " <<std::endl;
|
||||||
|
for(int i=0;i<5;i++){
|
||||||
|
for(int j=0;j<5;j++){
|
||||||
|
std::cout<<A[i][j]<<' ';
|
||||||
|
}
|
||||||
|
std::cout<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout<<std::endl;
|
||||||
|
|
||||||
|
gctl::array<double> m(5, 0.5), x(5, 0.0);
|
||||||
|
gctl::array<double> B(5);
|
||||||
|
for (int i = 0; i < 5; i++)
|
||||||
|
{
|
||||||
|
B[i] = 0.0;
|
||||||
|
for (int j = 0; j < 5; j++)
|
||||||
|
{
|
||||||
|
B[i] += A[i][j] * m[j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::cholesky gck(A);
|
||||||
|
gck.decompose();
|
||||||
|
gck.solve(B, x);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < 5; i++)
|
||||||
|
{
|
||||||
|
std::cout << m[i] << " " << x[i] << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
124
example/ex7.cpp
Normal file
124
example/ex7.cpp
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
#include "../lib/optimization.h"
|
||||||
|
|
||||||
|
typedef gctl::array<std::complex<double>> cd_array;
|
||||||
|
|
||||||
|
#define N 1000
|
||||||
|
|
||||||
|
double max_diff(const cd_array &a, const cd_array &b)
|
||||||
|
{
|
||||||
|
double max = -1;
|
||||||
|
std::complex<double> t;
|
||||||
|
for (size_t i = 0; i < a.size(); i++)
|
||||||
|
{
|
||||||
|
t = a[i] - b[i];
|
||||||
|
max = std::max(std::norm(t), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ex7 : public gctl::clcg_solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ex7();
|
||||||
|
virtual ~ex7();
|
||||||
|
virtual void CLCG_Ax(const cd_array &x, cd_array &ax, gctl::matrix_layout_e layout, gctl::conjugate_type_e conj);
|
||||||
|
|
||||||
|
// 计算共轭梯度的B项
|
||||||
|
void cal_partb(const cd_array &x, cd_array &B);
|
||||||
|
|
||||||
|
private:
|
||||||
|
gctl::matrix<std::complex<double>> kernel; // 普通二维数组做核矩阵
|
||||||
|
};
|
||||||
|
|
||||||
|
ex7::ex7()
|
||||||
|
{
|
||||||
|
gctl::array<double> tmp(round(0.5*(N+1)*N));
|
||||||
|
gctl::random(tmp, 1.0, 2.0, gctl::RdUniform);
|
||||||
|
|
||||||
|
size_t c = 0;
|
||||||
|
kernel.resize(N, N);
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
for (int j = i; j < N; j++)
|
||||||
|
{
|
||||||
|
kernel[i][j] = tmp[c];
|
||||||
|
kernel[j][i] = kernel[i][j];
|
||||||
|
c++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ex7::~ex7(){}
|
||||||
|
|
||||||
|
void ex7::cal_partb(const cd_array &x, cd_array &B)
|
||||||
|
{
|
||||||
|
gctl::matvec(B, kernel, x);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ex7::CLCG_Ax(const cd_array &x, cd_array &ax, gctl::matrix_layout_e layout, gctl::conjugate_type_e conj)
|
||||||
|
{
|
||||||
|
gctl::matvec(ax, kernel, x, layout, conj);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
// 生成一组正演解
|
||||||
|
gctl::array<double> tmp(2*N);
|
||||||
|
gctl::random(tmp, 1.0, 2.0, gctl::RdUniform);
|
||||||
|
|
||||||
|
cd_array fm(N);
|
||||||
|
for (size_t i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
fm[i].real(tmp[2*i]);
|
||||||
|
fm[i].imag(tmp[2*i + 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ex7 test;
|
||||||
|
|
||||||
|
// 计算共轭梯度B项
|
||||||
|
cd_array B(N);
|
||||||
|
test.cal_partb(fm, B);
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
cd_array m(N, std::complex<double>(0.0, 0.0));
|
||||||
|
|
||||||
|
gctl::clcg_para my_para = test.default_clcg_para();
|
||||||
|
my_para.abs_diff = 1;
|
||||||
|
|
||||||
|
test.set_clcg_para(my_para);
|
||||||
|
|
||||||
|
test.CLCG_Minimize(m, B, gctl::CLCG_BICG_SYM);
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
233
example/ex8.cpp
Normal file
233
example/ex8.cpp
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
#include "../lib/optimization.h"
|
||||||
|
|
||||||
|
#define M 1000
|
||||||
|
#define N 900
|
||||||
|
|
||||||
|
// get random floating points
|
||||||
|
double random_double(double l, double t)
|
||||||
|
{
|
||||||
|
return (t-l)*rand()*1.0/RAND_MAX + l;
|
||||||
|
}
|
||||||
|
|
||||||
|
// get random integral numbers
|
||||||
|
int random_int(int small, int big)
|
||||||
|
{
|
||||||
|
return (rand() % (big - small)) + small;
|
||||||
|
}
|
||||||
|
|
||||||
|
double max_diff(const gctl::_1d_array &a, const gctl::_1d_array &b)
|
||||||
|
{
|
||||||
|
double max = -1.0;
|
||||||
|
for (size_t i = 0; i < a.size(); i++)
|
||||||
|
{
|
||||||
|
max = std::max(fabs(a[i] - b[i]), max);
|
||||||
|
}
|
||||||
|
return max;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ex8 : public gctl::lbfgs_solver, public gctl::grad_norm
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ex8();
|
||||||
|
virtual ~ex8();
|
||||||
|
virtual double LBFGS_Evaluate(const gctl::_1d_array &x, gctl::_1d_array &g);
|
||||||
|
virtual int LBFGS_Progress(const gctl::_1d_array &x, const gctl::_1d_array &g, const double fx,
|
||||||
|
const double converge, const double rate, const gctl::lbfgs_para param, int k, int ls, std::ostream &ss);
|
||||||
|
|
||||||
|
void CalTarget(const gctl::_1d_array &x);
|
||||||
|
|
||||||
|
private:
|
||||||
|
gctl::_1d_array obs1, obs2, obs3, tmp, grad;
|
||||||
|
gctl::_2d_matrix k1, k2, k3;
|
||||||
|
};
|
||||||
|
|
||||||
|
ex8::ex8()
|
||||||
|
{
|
||||||
|
srand(time(0));
|
||||||
|
|
||||||
|
tmp.resize(M);
|
||||||
|
grad.resize(N);
|
||||||
|
|
||||||
|
k1.resize(M, N);
|
||||||
|
obs1.resize(M);
|
||||||
|
// 添加一些大数
|
||||||
|
int tmp_id, tmp_size;
|
||||||
|
double tmp_val;
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
tmp_size = random_int(25, 35);
|
||||||
|
for (int j = 0; j < tmp_size; j++)
|
||||||
|
{
|
||||||
|
tmp_id = random_int(0, N);
|
||||||
|
tmp_val = random_double(-1.0, 1.0);
|
||||||
|
|
||||||
|
k1[i][tmp_id] = tmp_val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
k2.resize(M, N);
|
||||||
|
obs2.resize(M);
|
||||||
|
// 添加一些大数
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
tmp_size = random_int(25, 35);
|
||||||
|
for (int j = 0; j < tmp_size; j++)
|
||||||
|
{
|
||||||
|
tmp_id = random_int(0, N);
|
||||||
|
tmp_val = random_double(-200.0, 200.0);
|
||||||
|
|
||||||
|
k2[i][tmp_id] = tmp_val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
k3.resize(M, N);
|
||||||
|
obs3.resize(M);
|
||||||
|
// 添加一些大数
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
tmp_size = random_int(25, 35);
|
||||||
|
for (int j = 0; j < tmp_size; j++)
|
||||||
|
{
|
||||||
|
tmp_id = random_int(0, N);
|
||||||
|
tmp_val = random_double(-0.01, 0.01);
|
||||||
|
|
||||||
|
k3[i][tmp_id] = tmp_val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ex8::~ex8(){}
|
||||||
|
|
||||||
|
double ex8::LBFGS_Evaluate(const gctl::_1d_array &x, gctl::_1d_array &g)
|
||||||
|
{
|
||||||
|
gctl::matvec(tmp, k1, x);
|
||||||
|
tmp -= obs1;
|
||||||
|
|
||||||
|
gctl::matvec(grad, k1, tmp, gctl::Trans);
|
||||||
|
gctl::scale(grad, 2.0/M);
|
||||||
|
|
||||||
|
AddSingleLoss(gctl::power2(gctl::module(tmp, gctl::L2))/M, grad);
|
||||||
|
|
||||||
|
gctl::matvec(tmp, k2, x);
|
||||||
|
tmp -= obs2;
|
||||||
|
|
||||||
|
gctl::matvec(grad, k2, tmp, gctl::Trans);
|
||||||
|
gctl::scale(grad, 2.0/M);
|
||||||
|
|
||||||
|
AddSingleLoss(gctl::power2(gctl::module(tmp, gctl::L2))/M, grad);
|
||||||
|
|
||||||
|
gctl::matvec(tmp, k3, x);
|
||||||
|
tmp -= obs3;
|
||||||
|
|
||||||
|
gctl::matvec(grad, k3, tmp, gctl::Trans);
|
||||||
|
gctl::scale(grad, 2.0/M);
|
||||||
|
|
||||||
|
AddSingleLoss(gctl::power2(gctl::module(tmp, gctl::L2))/M, grad);
|
||||||
|
|
||||||
|
return GradNormLoss(g);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ex8::LBFGS_Progress(const gctl::_1d_array &x, const gctl::_1d_array &g, const double fx,
|
||||||
|
const double converge, const double rate, const gctl::lbfgs_para param, int k, int ls, std::ostream &ss)
|
||||||
|
{
|
||||||
|
UpdateWeights();
|
||||||
|
|
||||||
|
return gctl::lbfgs_solver::LBFGS_Progress(x, g, fx, converge, rate, param, k, ls, ss);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ex8::CalTarget(const gctl::_1d_array &x)
|
||||||
|
{
|
||||||
|
// 计算正演值
|
||||||
|
gctl::matvec(obs1, k1, x);
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
// 添加噪声
|
||||||
|
obs1[i] += random_double(-1e-3, 1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::matvec(obs2, k2, x);
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
// 添加噪声
|
||||||
|
obs2[i] += random_double(-1e-3, 1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::matvec(obs3, k3, x);
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
// 添加噪声
|
||||||
|
obs3[i] += random_double(-1e-3, 1e-3);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char const *argv[])
|
||||||
|
{
|
||||||
|
// 生成一组正演解
|
||||||
|
gctl::_1d_array fm(N);
|
||||||
|
random(fm, 1.0, 2.0, gctl::RdUniform);
|
||||||
|
|
||||||
|
ex8 test;
|
||||||
|
|
||||||
|
// 计算拟合目标项
|
||||||
|
test.CalTarget(fm);
|
||||||
|
|
||||||
|
// 声明一组解
|
||||||
|
gctl::_1d_array m(N, 0.0);
|
||||||
|
|
||||||
|
gctl::lbfgs_para self_para = test.default_lbfgs_para();
|
||||||
|
self_para.linesearch = gctl::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
|
||||||
|
self_para.epsilon = 1e-6;
|
||||||
|
|
||||||
|
test.set_lbfgs_para(self_para);
|
||||||
|
test.show_lbfgs_para();
|
||||||
|
|
||||||
|
test.InitGradNorm(3, N);
|
||||||
|
test.set_control_weight(1.0);
|
||||||
|
test.set_weight_step(0.00001);
|
||||||
|
|
||||||
|
double fx = test.LBFGS_Minimize(m);
|
||||||
|
|
||||||
|
std::clog << "maximal difference: " << max_diff(fm, m) << std::endl;
|
||||||
|
|
||||||
|
gctl::_1d_array records;
|
||||||
|
test.get_records(records);
|
||||||
|
for (size_t i = 0; i < records.size(); i++)
|
||||||
|
{
|
||||||
|
if ((i+1)%3 == 0)
|
||||||
|
{
|
||||||
|
std::cout << records[i] << "\n";
|
||||||
|
}
|
||||||
|
else std::cout << records[i] << " ";
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
54
installer
Executable file
54
installer
Executable file
@ -0,0 +1,54 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [[ $# == 0 || ${1} == "help" ]]; then
|
||||||
|
echo "Compiles executables/libraries and maintains installed files. Two tools 'Cmake' and 'stow' are empolyed here. For more information, see https://cmake.org and https://www.gnu.org/software/stow/."
|
||||||
|
echo ""
|
||||||
|
echo "School of Earth Sciences, Zhejiang University"
|
||||||
|
echo "Yi Zhang (yizhang-geo@zju.edu.cn)"
|
||||||
|
echo ""
|
||||||
|
echo "Usage: ./installer [option] [Cmake options]"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo "(1) configure: Configure Cmake project(s). This option could take extra Cmake options as in <option>=<value>."
|
||||||
|
echo "(2) build: Build executables/libraries."
|
||||||
|
echo "(3) install: Install executables/libraries to the directory of CMAKE_INSTALL_PREFIX and sym-links them to the target address. This offers a quick and clean remove of the installed files."
|
||||||
|
echo "(4) clean: Clean build/ folder(s)."
|
||||||
|
echo "(5) uninstall: Delete the installed files and sym-links."
|
||||||
|
echo "(6) info: Print out current setups."
|
||||||
|
echo "(7) help: Show help information."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
package=gctl_optimization
|
||||||
|
address=/opt/stow
|
||||||
|
taress=/usr/local
|
||||||
|
option="-DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${address}/${package}"
|
||||||
|
|
||||||
|
if [[ $# -gt 1 ]]; then
|
||||||
|
for opt in "$@"; do
|
||||||
|
if [[ ${opt} != "configure" ]]; then
|
||||||
|
option="${option} -D${opt}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ${1} == "configure" && ! -d "build/" ]]; then
|
||||||
|
mkdir build && cd build && cmake .. ${option}
|
||||||
|
elif [[ ${1} == "configure" ]]; then
|
||||||
|
cd build && rm -rf * && cmake .. ${option}
|
||||||
|
elif [[ ${1} == "build" ]]; then
|
||||||
|
cd build && make
|
||||||
|
elif [[ ${1} == "install" ]]; then
|
||||||
|
cd build && sudo make install
|
||||||
|
sudo stow --dir=${address} --target=${taress} -S ${package}
|
||||||
|
elif [[ ${1} == "clean" ]]; then
|
||||||
|
rm -rf build/
|
||||||
|
elif [[ ${1} == "uninstall" ]]; then
|
||||||
|
sudo stow --dir=${address} --target=${taress} -D ${package}
|
||||||
|
sudo rm -rf ${address}/${package}
|
||||||
|
elif [[ ${1} == "info" ]]; then
|
||||||
|
echo "package name:" ${package}
|
||||||
|
echo "stow address:" ${address}
|
||||||
|
echo "target address:" ${taress}
|
||||||
|
echo "Cmake options:" ${option}
|
||||||
|
fi
|
64
lib/CMakeLists.txt
Normal file
64
lib/CMakeLists.txt
Normal file
@ -0,0 +1,64 @@
|
|||||||
|
# 设置编译选项
|
||||||
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
|
||||||
|
# 设置库文件的输出地址
|
||||||
|
set(LIBRARY_OUTPUT_PATH ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
# 设定库源文件文件夹
|
||||||
|
aux_source_directory(optimization/ GCTL_OPTIMAL_SRC)
|
||||||
|
|
||||||
|
# 以下部分为库的编译
|
||||||
|
# 注意目标名必须唯一 所以不能直接生成相同名称的动态库与静态库
|
||||||
|
# 注意此处不必为目标名称添加lib前缀和相应后缀,cmake会自行添加
|
||||||
|
add_library(gctl_optimization SHARED ${GCTL_OPTIMAL_SRC})
|
||||||
|
# 首先添加静态库的生成命令
|
||||||
|
add_library(gctl_optimization_static STATIC ${GCTL_OPTIMAL_SRC})
|
||||||
|
# 设置静态库的输出名称从而获得与动态库名称相同的静态库
|
||||||
|
set_target_properties(gctl_optimization_static PROPERTIES OUTPUT_NAME "gctl_optimization")
|
||||||
|
# 设置输出目标属性以同时输出动态库与静态库
|
||||||
|
set_target_properties(gctl_optimization PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||||
|
set_target_properties(gctl_optimization_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||||
|
# 设置动态库的版本号
|
||||||
|
set_target_properties(gctl_optimization PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR})
|
||||||
|
# 设置动态库的运行搜索地址
|
||||||
|
set_target_properties(gctl_optimization PROPERTIES INSTALL_RPATH /usr/local/lib)
|
||||||
|
set_target_properties(gctl_optimization_static PROPERTIES INSTALL_RPATH /usr/local/lib)
|
||||||
|
set_target_properties(gctl_optimization PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON)
|
||||||
|
set_target_properties(gctl_optimization_static PROPERTIES CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON)
|
||||||
|
|
||||||
|
#连接动态库
|
||||||
|
target_link_libraries(gctl_optimization PUBLIC ${GCTL_LIB})
|
||||||
|
target_link_libraries(gctl_optimization_static ${GCTL_LIB})
|
||||||
|
|
||||||
|
set(CONFIG_FILE_PATH lib/cmake/${PROJECT_NAME})
|
||||||
|
|
||||||
|
configure_package_config_file(${PROJECT_SOURCE_DIR}/${PROJECT_NAME}Config.cmake.in
|
||||||
|
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||||
|
INSTALL_DESTINATION ${CONFIG_FILE_PATH})
|
||||||
|
|
||||||
|
write_basic_package_version_file(${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||||
|
VERSION ${PROJECT_VERSION}
|
||||||
|
COMPATIBILITY SameMajorVersion)
|
||||||
|
|
||||||
|
# 库的安装命令
|
||||||
|
if(WIN32)
|
||||||
|
install(TARGETS gctl_optimization DESTINATION lib)
|
||||||
|
install(TARGETS gctl_optimization_static DESTINATION lib)
|
||||||
|
else()
|
||||||
|
install(TARGETS gctl_optimization gctl_optimization_static
|
||||||
|
EXPORT ${PROJECT_NAME}Targets
|
||||||
|
LIBRARY DESTINATION lib
|
||||||
|
ARCHIVE DESTINATION lib)
|
||||||
|
install(EXPORT ${PROJECT_NAME}Targets
|
||||||
|
DESTINATION ${CONFIG_FILE_PATH})
|
||||||
|
install(FILES
|
||||||
|
${CMAKE_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||||
|
${CMAKE_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake
|
||||||
|
DESTINATION ${CONFIG_FILE_PATH})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# 头文件安装命令
|
||||||
|
file(GLOB GCTL_HEAD *.h)
|
||||||
|
file(GLOB GCTL_OPTIMAL_HEAD optimization/*.h)
|
||||||
|
|
||||||
|
install(FILES ${GCTL_HEAD} DESTINATION include/gctl)
|
||||||
|
install(FILES ${GCTL_OPTIMAL_HEAD} DESTINATION include/gctl/optimization)
|
43
lib/optimization.h
Normal file
43
lib/optimization.h
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_OPTIMIZATION_H
|
||||||
|
#define _GCTL_OPTIMIZATION_H
|
||||||
|
|
||||||
|
#include "optimization/loss_func.h"
|
||||||
|
#include "optimization/lu.h"
|
||||||
|
#include "optimization/cholesky.h"
|
||||||
|
#include "optimization/svd.h"
|
||||||
|
#include "optimization/lcg.h"
|
||||||
|
#include "optimization/clcg.h"
|
||||||
|
#include "optimization/lgd.h"
|
||||||
|
#include "optimization/lbfgs.h"
|
||||||
|
#include "optimization/sgd.h"
|
||||||
|
#include "optimization/gradnorm.h"
|
||||||
|
#include "optimization/dwa.h"
|
||||||
|
|
||||||
|
#endif // _GCTL_OPTIMIZATION_H
|
127
lib/optimization/cholesky.cpp
Normal file
127
lib/optimization/cholesky.cpp
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "cholesky.h"
|
||||||
|
|
||||||
|
// Constructor
|
||||||
|
|
||||||
|
gctl::cholesky::cholesky(matrix<double> &sourceMatrix) : decomposedMatrix(sourceMatrix)
|
||||||
|
{
|
||||||
|
if (sourceMatrix.empty() || sourceMatrix.row_size() != sourceMatrix.col_size())
|
||||||
|
{
|
||||||
|
throw domain_error("Invalid input matrix. From cholesky::cholesky(...)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decomposition into triangular matrices
|
||||||
|
|
||||||
|
void gctl::cholesky::decompose()
|
||||||
|
{
|
||||||
|
// Enumerate matrix columnwise
|
||||||
|
for (int j = 0; j < decomposedMatrix.col_size(); j++)
|
||||||
|
{
|
||||||
|
for (int i = j; i < decomposedMatrix.row_size(); i++)
|
||||||
|
{
|
||||||
|
if (i == j)
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (int k = 0; k < i; k++)
|
||||||
|
{
|
||||||
|
sum += std::pow(decomposedMatrix[i][k], 2.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (decomposedMatrix[i][j] - sum <= 0.0)
|
||||||
|
{
|
||||||
|
// Not positive definite matrix
|
||||||
|
throw runtime_error("The input matrix is not positively defined. From gctl::cholesky::decompose()");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
decomposedMatrix[i][j] = std::sqrt(decomposedMatrix[i][j] - sum);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (int k = 0; k < j; k++)
|
||||||
|
{
|
||||||
|
sum += (decomposedMatrix[i][k] * decomposedMatrix[j][k]);
|
||||||
|
}
|
||||||
|
|
||||||
|
decomposedMatrix[i][j] = (1 / decomposedMatrix[j][j]) * (decomposedMatrix[i][j] - sum);
|
||||||
|
decomposedMatrix[j][i] = decomposedMatrix[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Solve for x in form Ax = b. A is the original input matrix.
|
||||||
|
|
||||||
|
void gctl::cholesky::solve(const array<double>& b, array<double> &x)
|
||||||
|
{
|
||||||
|
if (b.empty())
|
||||||
|
{
|
||||||
|
throw domain_error("Invalid target vector. From lu<double>::solve(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
x.resize(b.size());
|
||||||
|
|
||||||
|
// First solve lower triangular * x = b with forward substitution
|
||||||
|
|
||||||
|
for (int i = 0; i < b.size(); i++)
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (int j = 0; j < i; j++)
|
||||||
|
{
|
||||||
|
sum += (decomposedMatrix[i][j] * x[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
x[i] = (b[i] - sum) / decomposedMatrix[i][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now solve upper triangular (transpose of lower triangular) * x = x with back substitution.
|
||||||
|
// Note that x can be solved in place using the existing x vector. No need to allocate
|
||||||
|
// another vector.
|
||||||
|
|
||||||
|
for (int i = static_cast<int>(b.size()) - 1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (int j = static_cast<int>(b.size()) - 1; j > i; j--)
|
||||||
|
{
|
||||||
|
sum += (decomposedMatrix[i][j] * x[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
x[i] = (x[i] - sum) / decomposedMatrix[i][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
55
lib/optimization/cholesky.h
Normal file
55
lib/optimization/cholesky.h
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _CHOLESKY_H
|
||||||
|
#define _CHOLESKY_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Cholesky matrix decomposition to lower triangular matrix and its conjugate transpose
|
||||||
|
*
|
||||||
|
* @note Restricted to positive-definite matrices
|
||||||
|
*/
|
||||||
|
class cholesky
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
cholesky(matrix<double> &sourceMatrix); // Matrix is decomposed in-place
|
||||||
|
virtual ~cholesky(){}
|
||||||
|
void decompose(); ///< Decomposition into triangular matrices.
|
||||||
|
void solve(const array<double>& b, array<double> &x); ///< Solve for x in form Ax = b. A is the original input matrix.
|
||||||
|
|
||||||
|
protected:
|
||||||
|
cholesky(const gctl::cholesky&) = delete;
|
||||||
|
void operator=(const gctl::cholesky&) = delete;
|
||||||
|
matrix<double> &decomposedMatrix;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _CHOLESKY_H
|
373
lib/optimization/clcg.cpp
Normal file
373
lib/optimization/clcg.cpp
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "clcg.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default parameter for conjugate gradient methods
|
||||||
|
*/
|
||||||
|
static const gctl::clcg_para clcg_defparam = {0, 1e-8, 0};
|
||||||
|
|
||||||
|
int gctl::clcg_solver::CLCG_Progress(const array<std::complex<double> > &m, const double converge, const clcg_para ¶m, size_t t)
|
||||||
|
{
|
||||||
|
if (converge <= param.epsilon)
|
||||||
|
{
|
||||||
|
std::clog << GCTL_CLEARLINE << "\rIteration-times: " << t << "\tconvergence: " << converge;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clcg_inter_ > 0 && t%clcg_inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << GCTL_CLEARLINE << "\rIteration-times: " << t << "\tconvergence: " << converge;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_solver::clcg_solver()
|
||||||
|
{
|
||||||
|
clcg_param_ = clcg_defparam;
|
||||||
|
clcg_inter_ = 1;
|
||||||
|
clcg_silent_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_solver::~clcg_solver(){}
|
||||||
|
|
||||||
|
void gctl::clcg_solver::clcg_silent()
|
||||||
|
{
|
||||||
|
clcg_silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::clcg_solver::set_clcg_report_interval(size_t inter)
|
||||||
|
{
|
||||||
|
clcg_inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::clcg_solver::set_clcg_para(const clcg_para &in_param)
|
||||||
|
{
|
||||||
|
clcg_param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::clcg_solver::set_clcg_para(const toml::value &toml_data)
|
||||||
|
{
|
||||||
|
clcg_param_ = clcg_defparam;
|
||||||
|
|
||||||
|
std::string CLCG = "clcg";
|
||||||
|
if (toml_data.contains(CLCG))
|
||||||
|
{
|
||||||
|
if (toml_data.at(CLCG).contains("max_iterations")) clcg_param_.max_iterations = toml::find<int>(toml_data, CLCG, "max_iterations");
|
||||||
|
if (toml_data.at(CLCG).contains("epsilon")) clcg_param_.epsilon = toml::find<double>(toml_data, CLCG, "epsilon");
|
||||||
|
if (toml_data.at(CLCG).contains("abs_diff")) clcg_param_.abs_diff = toml::find<int>(toml_data, CLCG, "abs_diff");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::clcg_solver::clcg_error_str(clcg_return_code err_code, std::ostream &ss, bool err_throw)
|
||||||
|
{
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
|
||||||
|
ss << "Success! ";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
|
||||||
|
ss << "Fail! ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
ss << "\033[1m\033[32mCLCG Success! ";
|
||||||
|
else
|
||||||
|
ss << "\033[1m\033[31mCLCG Fail! ";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string err_str;
|
||||||
|
switch (err_code)
|
||||||
|
{
|
||||||
|
case CLCG_SUCCESS:
|
||||||
|
err_str = "Iteration reached convergence."; break;
|
||||||
|
case CLCG_STOP:
|
||||||
|
err_str = "Iteration is stopped by the progress evaluation function."; break;
|
||||||
|
case CLCG_ALREADY_OPTIMIZIED:
|
||||||
|
err_str = "The variables are already optimized."; break;
|
||||||
|
case CLCG_UNKNOWN_ERROR:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
case CLCG_INVILAD_VARIABLE_SIZE:
|
||||||
|
err_str = "The size of the variables is negative."; break;
|
||||||
|
case CLCG_INVILAD_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal iteration times is negative."; break;
|
||||||
|
case CLCG_INVILAD_EPSILON:
|
||||||
|
err_str = "The epsilon is not in the range (0, 1)."; break;
|
||||||
|
case CLCG_REACHED_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal iteration has been reached."; break;
|
||||||
|
case CLCG_NAN_VALUE:
|
||||||
|
err_str = "The model values are NaN."; break;
|
||||||
|
case CLCG_INVALID_POINTER:
|
||||||
|
err_str = "Invalid pointer."; break;
|
||||||
|
case CLCG_SIZE_NOT_MATCH:
|
||||||
|
err_str = "The sizes of the solution and target do not match."; break;
|
||||||
|
case CLCG_UNKNOWN_SOLVER:
|
||||||
|
err_str = "Unknown solver."; break;
|
||||||
|
default:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err_throw && err_code < 0) throw std::runtime_error(err_str.c_str());
|
||||||
|
else ss << err_str;
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
ss << std::endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
ss << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
ss << "\033[0m" << std::endl;
|
||||||
|
else
|
||||||
|
ss << "\033[0m" << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_para gctl::clcg_solver::default_clcg_para()
|
||||||
|
{
|
||||||
|
clcg_para dp = clcg_defparam;
|
||||||
|
return dp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::clcg_solver::CLCG_Minimize(array<std::complex<double> > &m, const array<std::complex<double> > &B,
|
||||||
|
clcg_solver_type solver_id, std::ostream &ss, bool verbose, bool er_throw)
|
||||||
|
{
|
||||||
|
if (clcg_silent_)
|
||||||
|
{
|
||||||
|
clcg_return_code ret;
|
||||||
|
if (solver_id == CLCG_BICG) ret = clbicg(m, B);
|
||||||
|
else if (solver_id == CLCG_BICG_SYM) ret = clbicg_symmetric(m, B);
|
||||||
|
else if (solver_id == CLCG_CGS) ret = clcgs(m, B);
|
||||||
|
else if (solver_id == CLCG_BICGSTAB) ret = clbicgstab(m, B);
|
||||||
|
else if (solver_id == CLCG_TFQMR) ret = cltfqmr(m, B);
|
||||||
|
else throw std::invalid_argument("Invalid solver type. gctl::clcg_solver<T>::Minimize(...)");
|
||||||
|
|
||||||
|
if (ret < 0) clcg_error_str(ret, ss, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef GCTL_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
clcg_return_code ret;
|
||||||
|
if (solver_id == CLCG_BICG) ret = clbicg(m, B);
|
||||||
|
else if (solver_id == CLCG_BICG_SYM) ret = clbicg_symmetric(m, B);
|
||||||
|
else if (solver_id == CLCG_CGS) ret = clcgs(m, B);
|
||||||
|
else if (solver_id == CLCG_BICGSTAB) ret = clbicgstab(m, B);
|
||||||
|
else if (solver_id == CLCG_TFQMR) ret = cltfqmr(m, B);
|
||||||
|
else throw std::invalid_argument("Invalid solver type. gctl::clcg_solver<T>::Minimize(...)");
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
double costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
clcg_return_code ret;
|
||||||
|
if (solver_id == CLCG_BICG) ret = clbicg(m, B);
|
||||||
|
else if (solver_id == CLCG_BICG_SYM) ret = clbicg_symmetric(m, B);
|
||||||
|
else if (solver_id == CLCG_CGS) ret = clcgs(m, B);
|
||||||
|
else if (solver_id == CLCG_BICGSTAB) ret = clbicgstab(m, B);
|
||||||
|
else if (solver_id == CLCG_TFQMR) ret = cltfqmr(m, B);
|
||||||
|
else throw std::invalid_argument("Invalid solver type. gctl::clcg_solver<T>::Minimize(...)");
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!er_throw)
|
||||||
|
{
|
||||||
|
ss << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case CLCG_BICG:
|
||||||
|
std::clog << "Solver: Bi-CG. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_BICG_SYM:
|
||||||
|
std::clog << "Solver: Bi-CG (symmetrically accelerated). Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_CGS:
|
||||||
|
std::clog << "Solver: CGS. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_BICGSTAB:
|
||||||
|
std::clog << "Solver: CGS. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case CLCG_TFQMR:
|
||||||
|
std::clog << "Solver: TFQMR. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Times cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) clcg_error_str(ret, ss, er_throw);
|
||||||
|
else if (ret < 0) clcg_error_str(ret, ss, er_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_return_code gctl::clcg_solver::clbicg(array<std::complex<double> > &m, const array<std::complex<double> > &B)
|
||||||
|
{
|
||||||
|
clcg_return_code ret;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_return_code gctl::clcg_solver::clbicg_symmetric(array<std::complex<double> > &m, const array<std::complex<double> > &B)
|
||||||
|
{
|
||||||
|
size_t n_size = B.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return CLCG_INVILAD_VARIABLE_SIZE;
|
||||||
|
if (clcg_param_.max_iterations < 0) return CLCG_INVILAD_MAX_ITERATIONS;
|
||||||
|
if (clcg_param_.epsilon <= 0.0 || clcg_param_.epsilon >= 1.0) return CLCG_INVILAD_EPSILON;
|
||||||
|
|
||||||
|
r1k.resize(n_size);
|
||||||
|
d1k.resize(n_size);
|
||||||
|
Ax.resize(n_size);
|
||||||
|
|
||||||
|
CLCG_Ax(m, Ax, gctl::NoTrans, gctl::NoConj);
|
||||||
|
|
||||||
|
std::complex<double> one_z(1.0, 0.0);
|
||||||
|
vecdiff(r1k, B, Ax, one_z, one_z);
|
||||||
|
veccpy(d1k, r1k, one_z);
|
||||||
|
|
||||||
|
std::complex<double> rkrk = vecdot(r1k, r1k);
|
||||||
|
|
||||||
|
double r0_square, rk_square;
|
||||||
|
std::complex<double> r0_mod, rk_mod;
|
||||||
|
rk_mod = vecinner(r1k, r1k);
|
||||||
|
r0_square = rk_square = std::norm(rk_mod);
|
||||||
|
|
||||||
|
if (r0_square < 1.0) r0_square = 1.0;
|
||||||
|
|
||||||
|
clcg_return_code ret;
|
||||||
|
if (clcg_param_.abs_diff && sqrt(rk_square)/n_size <= clcg_param_.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_ALREADY_OPTIMIZIED;
|
||||||
|
CLCG_Progress(m, sqrt(rk_square)/n_size, clcg_param_, 0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
else if (rk_square/r0_square <= clcg_param_.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_ALREADY_OPTIMIZIED;
|
||||||
|
CLCG_Progress(m, rk_square/r0_square, clcg_param_, 0);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
double residual;
|
||||||
|
std::complex<double> ak, rkrk2, betak, dkAx;
|
||||||
|
|
||||||
|
size_t t = 0;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
if (clcg_param_.abs_diff) residual = sqrt(rk_square)/n_size;
|
||||||
|
else residual = rk_square/r0_square;
|
||||||
|
|
||||||
|
if (CLCG_Progress(m, residual, clcg_param_, t))
|
||||||
|
{
|
||||||
|
ret = CLCG_STOP; return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (residual <= clcg_param_.epsilon)
|
||||||
|
{
|
||||||
|
ret = CLCG_CONVERGENCE; return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (clcg_param_.max_iterations > 0 && t+1 > clcg_param_.max_iterations)
|
||||||
|
{
|
||||||
|
ret = CLCG_REACHED_MAX_ITERATIONS;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
|
||||||
|
CLCG_Ax(d1k, Ax, gctl::NoTrans, gctl::NoConj);
|
||||||
|
dkAx = vecdot(d1k, Ax);
|
||||||
|
ak = rkrk/dkAx;
|
||||||
|
|
||||||
|
vecapp(m, d1k, ak);
|
||||||
|
vecsub(r1k, Ax, ak);
|
||||||
|
|
||||||
|
rk_mod = vecdot(r1k, r1k);
|
||||||
|
rk_square = std::norm(rk_mod);
|
||||||
|
|
||||||
|
if (!vecvalid(m))
|
||||||
|
{
|
||||||
|
ret = CLCG_NAN_VALUE; return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
rkrk2 = vecdot(r1k, r1k);
|
||||||
|
betak = rkrk2/rkrk;
|
||||||
|
rkrk = rkrk2;
|
||||||
|
|
||||||
|
vecadd(d1k, d1k, r1k, betak, one_z);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_return_code gctl::clcg_solver::clcgs(array<std::complex<double> > &m, const array<std::complex<double> > &B)
|
||||||
|
{
|
||||||
|
clcg_return_code ret;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_return_code gctl::clcg_solver::clbicgstab(array<std::complex<double> > &m, const array<std::complex<double> > &B)
|
||||||
|
{
|
||||||
|
clcg_return_code ret;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::clcg_return_code gctl::clcg_solver::cltfqmr(array<std::complex<double> > &m, const array<std::complex<double> > &B)
|
||||||
|
{
|
||||||
|
clcg_return_code ret;
|
||||||
|
return ret;
|
||||||
|
}
|
166
lib/optimization/clcg.h
Normal file
166
lib/optimization/clcg.h
Normal file
@ -0,0 +1,166 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_CLCG_H
|
||||||
|
#define _GCTL_CLCG_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/maths.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
|
||||||
|
#include "gctl_optimization_config.h"
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
#include "toml.hpp"
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
#include "windows.h"
|
||||||
|
#endif // _WINDOWS || __WIN32__
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Types of method that could be recognized by the clcg_solver() function.
|
||||||
|
*/
|
||||||
|
enum clcg_solver_type
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Jacob's Bi-Conjugate Gradient Method
|
||||||
|
*/
|
||||||
|
CLCG_BICG,
|
||||||
|
/**
|
||||||
|
* Bi-Conjugate Gradient Method accelerated for complex symmetric A
|
||||||
|
*/
|
||||||
|
CLCG_BICG_SYM,
|
||||||
|
/**
|
||||||
|
* Conjugate Gradient Squared Method with real coefficients.
|
||||||
|
*/
|
||||||
|
CLCG_CGS,
|
||||||
|
/**
|
||||||
|
* Biconjugate gradient method.
|
||||||
|
*/
|
||||||
|
CLCG_BICGSTAB,
|
||||||
|
/**
|
||||||
|
* Transpose Free Quasi-Minimal Residual Method
|
||||||
|
*/
|
||||||
|
CLCG_TFQMR,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return value of the clcg_solver() function
|
||||||
|
*/
|
||||||
|
enum clcg_return_code
|
||||||
|
{
|
||||||
|
CLCG_SUCCESS = 0, ///< The solver function terminated successfully.
|
||||||
|
CLCG_CONVERGENCE = 0, ///< The iteration reached convergence.
|
||||||
|
CLCG_STOP, ///< The iteration is stopped by the monitoring function.
|
||||||
|
CLCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
|
||||||
|
// A negative number means a error
|
||||||
|
CLCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
|
||||||
|
CLCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
|
||||||
|
CLCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
|
||||||
|
CLCG_INVILAD_EPSILON, ///< The epsilon is negative.
|
||||||
|
CLCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
|
||||||
|
CLCG_NAN_VALUE, ///< Nan value.
|
||||||
|
CLCG_INVALID_POINTER, ///< Invalid pointer.
|
||||||
|
CLCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
|
||||||
|
CLCG_UNKNOWN_SOLVER, ///< Unknown solver
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parameters of the conjugate gradient methods.
|
||||||
|
*/
|
||||||
|
struct clcg_para
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Maximal iteration times. The process will continue till the convergence is met
|
||||||
|
* if this option is set to zero (default).
|
||||||
|
*/
|
||||||
|
int max_iterations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Epsilon for convergence test.
|
||||||
|
* This parameter determines the accuracy with which the solution is to be found.
|
||||||
|
* A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or sqrt(||g||)/N
|
||||||
|
* <= epsilon for the lcg_solver() function, where ||.|| denotes the Euclidean (L2) norm.
|
||||||
|
* The default value of epsilon is 1e-8. For box-constrained methods,the convergence test
|
||||||
|
* is implemented using ||P(m-g) - m|| <= epsilon, in which P is the projector that
|
||||||
|
* transfers m into the constrained domain.
|
||||||
|
*/
|
||||||
|
double epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
|
||||||
|
* The default value is false which means the gradient based evaluating method is used.
|
||||||
|
* The AMD based method will be used if this variable is set to true. This parameter is only
|
||||||
|
* applied to the non-constrained methods.
|
||||||
|
*/
|
||||||
|
int abs_diff;
|
||||||
|
};
|
||||||
|
|
||||||
|
class clcg_solver
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
clcg_para clcg_param_;
|
||||||
|
size_t clcg_inter_;
|
||||||
|
bool clcg_silent_;
|
||||||
|
|
||||||
|
array<std::complex<double> > r1k, r2k, d1k, d2k;
|
||||||
|
array<std::complex<double> > Ax;
|
||||||
|
|
||||||
|
public:
|
||||||
|
clcg_solver();
|
||||||
|
virtual ~clcg_solver();
|
||||||
|
|
||||||
|
virtual void CLCG_Ax(const array<std::complex<double> > &x, array<std::complex<double> > &ax,
|
||||||
|
matrix_layout_e layout, conjugate_type_e conj) = 0;
|
||||||
|
virtual int CLCG_Progress(const array<std::complex<double> > &m, const double converge, const clcg_para ¶m, size_t t);
|
||||||
|
|
||||||
|
void clcg_silent();
|
||||||
|
void set_clcg_report_interval(size_t inter);
|
||||||
|
void set_clcg_para(const clcg_para ¶m);
|
||||||
|
void clcg_error_str(clcg_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
|
||||||
|
clcg_para default_clcg_para();
|
||||||
|
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
void set_clcg_para(const toml::value &toml_data);
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
clcg_return_code clbicg(array<std::complex<double> > &m, const array<std::complex<double> > &B);
|
||||||
|
clcg_return_code clbicg_symmetric(array<std::complex<double> > &m, const array<std::complex<double> > &B);
|
||||||
|
clcg_return_code clcgs(array<std::complex<double> > &m, const array<std::complex<double> > &B);
|
||||||
|
clcg_return_code clbicgstab(array<std::complex<double> > &m, const array<std::complex<double> > &B);
|
||||||
|
clcg_return_code cltfqmr(array<std::complex<double> > &m, const array<std::complex<double> > &B);
|
||||||
|
|
||||||
|
|
||||||
|
void CLCG_Minimize(array<std::complex<double> > &m, const array<std::complex<double> > &B,
|
||||||
|
clcg_solver_type solver_id = CLCG_CGS, std::ostream &ss = std::clog,
|
||||||
|
bool verbose = true, bool er_throw = false);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_CLCG_H
|
129
lib/optimization/dwa.cpp
Normal file
129
lib/optimization/dwa.cpp
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "dwa.h"
|
||||||
|
|
||||||
|
gctl::dwa::dwa()
|
||||||
|
{
|
||||||
|
fx_c_ = 0;
|
||||||
|
l_ready_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::dwa::~dwa(){}
|
||||||
|
|
||||||
|
void gctl::dwa::InitDWA(size_t num, size_t grad_num)
|
||||||
|
{
|
||||||
|
fx_n_ = num;
|
||||||
|
K_ = 1.0*num;
|
||||||
|
T_ = 1.0;
|
||||||
|
|
||||||
|
wgts_.resize(num, 1.0);
|
||||||
|
L_p1_.resize(num, 1.0);
|
||||||
|
L_p2_.resize(num, 1.0);
|
||||||
|
grad_.resize(grad_num, 0.0);
|
||||||
|
|
||||||
|
rcd_wgts_.push_back(wgts_);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::dwa::AddSingleLoss(double fx, const array<double> &g)
|
||||||
|
{
|
||||||
|
multi_fx_ += wgts_[fx_c_]*fx;
|
||||||
|
|
||||||
|
L_p2_[fx_c_] = L_p1_[fx_c_];
|
||||||
|
L_p1_[fx_c_] = fx;
|
||||||
|
|
||||||
|
for (size_t i = 0; i < g.size(); i++)
|
||||||
|
{
|
||||||
|
grad_[i] += wgts_[fx_c_]*g[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
fx_c_++;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::dwa::UpdateWeights()
|
||||||
|
{
|
||||||
|
double sum = 0.0;
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
if (l_ready_) wgts_[i] = exp(L_p1_[i]/(L_p2_[i]*T_));
|
||||||
|
else wgts_[i] = 1.0;
|
||||||
|
|
||||||
|
sum += wgts_[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
wgts_[i] *= K_/sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
l_ready_ = true;
|
||||||
|
rcd_wgts_.push_back(wgts_);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gctl::dwa::DWALoss(array<double> &g)
|
||||||
|
{
|
||||||
|
if (fx_c_ != fx_n_)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Not enough loss functions evaluated. From gctl::dwa::UpdateWeights()");
|
||||||
|
}
|
||||||
|
|
||||||
|
double fx = multi_fx_;
|
||||||
|
g = grad_;
|
||||||
|
|
||||||
|
fx_c_ = 0;
|
||||||
|
multi_fx_ = 0.0;
|
||||||
|
grad_.assign_all(0.0);
|
||||||
|
return fx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::dwa::set_control_temperature(double t)
|
||||||
|
{
|
||||||
|
T_ = t;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::dwa::set_normal_sum(double k)
|
||||||
|
{
|
||||||
|
K_ = k;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::dwa::get_records(array<double> &logs)
|
||||||
|
{
|
||||||
|
logs.resize(fx_n_*rcd_wgts_.size());
|
||||||
|
for (size_t i = 0; i < rcd_wgts_.size(); i++)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < fx_n_; j++)
|
||||||
|
{
|
||||||
|
logs[i*fx_n_ + j] = rcd_wgts_[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
114
lib/optimization/dwa.h
Normal file
114
lib/optimization/dwa.h
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_DWA_H
|
||||||
|
#define _GCTL_DWA_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Lost balanced multitask evaluation.
|
||||||
|
*
|
||||||
|
* @note Reference: 2019. End-to-end multitask learning with attention.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class dwa
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
bool l_ready_;
|
||||||
|
size_t fx_c_, fx_n_;
|
||||||
|
double K_, T_, multi_fx_;
|
||||||
|
array<double> wgts_;
|
||||||
|
array<double> L_p1_, L_p2_;
|
||||||
|
array<double> grad_;
|
||||||
|
std::vector<array<double>> rcd_wgts_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
dwa();
|
||||||
|
virtual ~dwa();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initiate the number of loss functions and size of the model gradients.
|
||||||
|
*
|
||||||
|
* @note This function must be called at first.
|
||||||
|
*
|
||||||
|
* @param num Number of the loss functions
|
||||||
|
* @param grad_num Size of the model gradients
|
||||||
|
*/
|
||||||
|
void InitDWA(size_t num, size_t grad_num);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Add the value of a single loss function and the current model gradients.
|
||||||
|
*
|
||||||
|
* @param fx objective value
|
||||||
|
* @param g model gradients
|
||||||
|
*/
|
||||||
|
void AddSingleLoss(double fx, const array<double> &g);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the merged objective value and the model gradients.
|
||||||
|
*
|
||||||
|
* @note All single loss functions must be added before calling this function. The merged objective value and the model gradients will be reset after the calling.
|
||||||
|
*
|
||||||
|
* @param g model gradients
|
||||||
|
*
|
||||||
|
* @return objective value
|
||||||
|
*/
|
||||||
|
double DWALoss(array<double> &g);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Update weights for single loss functions using the DWA algorithm.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void UpdateWeights();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the cooling temperature. The bigger value is, the closer the weights will be to one. The default is 1.0.
|
||||||
|
*
|
||||||
|
* @param t Input temperature
|
||||||
|
*/
|
||||||
|
void set_control_temperature(double t);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the normal sum of the weights. Ths default equals to function size.
|
||||||
|
*
|
||||||
|
* @param k Input sum
|
||||||
|
*/
|
||||||
|
void set_normal_sum(double k);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the recorded weights. Size of the log equals the function size times iteration times.
|
||||||
|
*
|
||||||
|
* @param logs Output log
|
||||||
|
*/
|
||||||
|
void get_records(array<double> &logs);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_DWA_H
|
1
lib/optimization/gctl_optimization_config.h
Normal file
1
lib/optimization/gctl_optimization_config.h
Normal file
@ -0,0 +1 @@
|
|||||||
|
#define GCTL_OPTIMIZATION_TOML
|
328
lib/optimization/gradnorm.cpp
Normal file
328
lib/optimization/gradnorm.cpp
Normal file
@ -0,0 +1,328 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "gradnorm.h"
|
||||||
|
|
||||||
|
gctl::grad_norm::grad_norm()
|
||||||
|
{
|
||||||
|
fx_c_ = 0;
|
||||||
|
alpha_ = 1.0;
|
||||||
|
lamda_ = 0.001;
|
||||||
|
initialized_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::grad_norm::~grad_norm(){}
|
||||||
|
|
||||||
|
void gctl::grad_norm::InitGradNorm(size_t num, size_t grad_num)
|
||||||
|
{
|
||||||
|
fx_n_ = num;
|
||||||
|
T_ = 1.0;
|
||||||
|
resi_T_ = 0.0;
|
||||||
|
|
||||||
|
fst_iter_.resize(num, true);
|
||||||
|
wgts_.resize(num, 1.0/num);
|
||||||
|
fx0_.resize(num, 0.0);
|
||||||
|
Gw_.resize(num, 0.0);
|
||||||
|
Gdw_.resize(num, 0.0);
|
||||||
|
Lx_.resize(num, 0.0);
|
||||||
|
grad_.resize(grad_num, 0.0);
|
||||||
|
rcd_fxs_.resize(num, 0.0);
|
||||||
|
fixed_wgts_.resize(num, -1.0);
|
||||||
|
|
||||||
|
rcd_wgts_.reserve(100000);
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
rcd_wgts_.push_back(wgts_[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
initialized_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gctl::grad_norm::AddSingleLoss(double fx, const array<double> &g)
|
||||||
|
{
|
||||||
|
if (fst_iter_[fx_c_])
|
||||||
|
{
|
||||||
|
fx0_[fx_c_] = fx;
|
||||||
|
fst_iter_[fx_c_] = false;
|
||||||
|
}
|
||||||
|
Lx_[fx_c_] = fx/fx0_[fx_c_];
|
||||||
|
|
||||||
|
double curr_fx = wgts_[fx_c_]*fx;
|
||||||
|
multi_fx_ += curr_fx;
|
||||||
|
rcd_fxs_[fx_c_] = fx;
|
||||||
|
|
||||||
|
double sum = 0.0;
|
||||||
|
for (size_t i = 0; i < g.size(); i++)
|
||||||
|
{
|
||||||
|
sum += g[i]*g[i];
|
||||||
|
grad_[i] += wgts_[fx_c_]*g[i];
|
||||||
|
}
|
||||||
|
Gw_[fx_c_] = sqrt(wgts_[fx_c_]*wgts_[fx_c_]*sum);
|
||||||
|
Gdw_[fx_c_] = sqrt(sum); // wgts_[fx_c_]*sum/Gw_[fx_c_]
|
||||||
|
|
||||||
|
fx_c_++;
|
||||||
|
return curr_fx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::UpdateWeights()
|
||||||
|
{
|
||||||
|
double ac = 0;
|
||||||
|
double avg_Lx = 0.0, avg_Gw = 0.0;
|
||||||
|
resi_T_ = T_;
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
if (fixed_wgts_[i] < 0.0)
|
||||||
|
{
|
||||||
|
avg_Lx += Lx_[i];
|
||||||
|
avg_Gw += Gw_[i];
|
||||||
|
ac += 1.0;
|
||||||
|
}
|
||||||
|
else resi_T_ -= fixed_wgts_[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
avg_Lx /= ac;
|
||||||
|
avg_Gw /= ac;
|
||||||
|
|
||||||
|
double r_i, sum = 0.0;
|
||||||
|
|
||||||
|
// L1 norm approach
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
if (fixed_wgts_[i] < 0.0)
|
||||||
|
{
|
||||||
|
r_i = Lx_[i]/avg_Lx;
|
||||||
|
|
||||||
|
if (Gw_[i] >= avg_Gw*pow(r_i, alpha_))
|
||||||
|
{
|
||||||
|
wgts_[i] -= lamda_*Gdw_[i];
|
||||||
|
}
|
||||||
|
else wgts_[i] += lamda_*Gdw_[i];
|
||||||
|
|
||||||
|
// make sure the weights are positive
|
||||||
|
wgts_[i] = std::max(wgts_[i], 1e-16);
|
||||||
|
|
||||||
|
sum += wgts_[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
if (fixed_wgts_[i] < 0.0) wgts_[i] *= resi_T_/sum;
|
||||||
|
rcd_wgts_.push_back(wgts_[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::ShowStatistics(std::ostream &ss, bool one_line)
|
||||||
|
{
|
||||||
|
double s, t = 0.0;
|
||||||
|
|
||||||
|
if (one_line)
|
||||||
|
{
|
||||||
|
ss << "Wgts:";
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
ss << " " << wgts_[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << ", Loss:";
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
ss << " " << rcd_fxs_[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << ", WgtLoss:";
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
s = wgts_[i]*rcd_fxs_[i];
|
||||||
|
ss << " " << s;
|
||||||
|
t += s;
|
||||||
|
}
|
||||||
|
ss << ", Total: " << t << "\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
ss << "----------------------------\n";
|
||||||
|
ss << "GradNorm's Progress\n";
|
||||||
|
ss << "Tasks' weight: ";
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
ss << wgts_[i] << " | ";
|
||||||
|
}
|
||||||
|
ss << "\n";
|
||||||
|
|
||||||
|
ss << "Tasks' loss: ";
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
ss << rcd_fxs_[i] << " | ";
|
||||||
|
}
|
||||||
|
ss << "\n";
|
||||||
|
|
||||||
|
ss << "Weighted losses: ";
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
s = wgts_[i]*rcd_fxs_[i];
|
||||||
|
ss << s << " | ";
|
||||||
|
t += s;
|
||||||
|
}
|
||||||
|
ss << t << " (total) |\n";
|
||||||
|
ss << "----------------------------\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gctl::grad_norm::GradNormLoss(array<double> &g)
|
||||||
|
{
|
||||||
|
if (fx_c_ != fx_n_)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Not all loss functions evaluated. From gctl::grad_norm::GradNormLoss()");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!initialized_)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("GradNorm is not initialized. From gctl::grad_norm::GradNormLoss()");
|
||||||
|
}
|
||||||
|
|
||||||
|
double fx = multi_fx_;
|
||||||
|
g = grad_;
|
||||||
|
|
||||||
|
fx_c_ = 0;
|
||||||
|
multi_fx_ = 0.0;
|
||||||
|
grad_.assign_all(0.0);
|
||||||
|
return fx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::set_control_weight(double a)
|
||||||
|
{
|
||||||
|
alpha_ = a;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::set_normal_sum(double t)
|
||||||
|
{
|
||||||
|
T_ = t;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::set_weight_step(double l)
|
||||||
|
{
|
||||||
|
lamda_ = l;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::set_fixed_weight(int id, double wgt)
|
||||||
|
{
|
||||||
|
if (id < 0 || id >= fx_n_)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Invalid loss function's index. From gctl::grad_norm::set_fixed_weight(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (wgt <= 0.0 || wgt >= T_)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Invalid fixed weight value. From gctl::grad_norm::set_fixed_weight(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
fixed_wgts_[id] = wgt;
|
||||||
|
wgts_[id] = wgt;
|
||||||
|
|
||||||
|
resi_T_ = T_;
|
||||||
|
double ac = 0.0;
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
if (fixed_wgts_[i] > 0.0) resi_T_ -= fixed_wgts_[i];
|
||||||
|
else ac += 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resi_T_ <= 0.0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Invalid tasks' weight detected. From gctl::grad_norm::UpdateWeights()");
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
if (fixed_wgts_[i] < 0.0) wgts_[i] = resi_T_/ac;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
rcd_wgts_[i] = wgts_[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::set_initial_weights(const array<double> &w)
|
||||||
|
{
|
||||||
|
if (w.size() != fx_n_)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("Invalid input array size. From gctl::grad_norm::set_initial_weights(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
double sum = 0.0;
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
wgts_[i] = w[i];
|
||||||
|
sum += wgts_[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < fx_n_; i++)
|
||||||
|
{
|
||||||
|
wgts_[i] *= T_/sum;
|
||||||
|
rcd_wgts_[i] = wgts_[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::get_records(array<double> &logs)
|
||||||
|
{
|
||||||
|
logs.resize(rcd_wgts_.size());
|
||||||
|
for (size_t i = 0; i < rcd_wgts_.size(); i++)
|
||||||
|
{
|
||||||
|
logs[i] = rcd_wgts_[i];
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::grad_norm::save_records(std::string file)
|
||||||
|
{
|
||||||
|
std::ofstream ofile;
|
||||||
|
open_outfile(ofile, file, ".txt");
|
||||||
|
|
||||||
|
ofile << "# 'tw' for 'task weight'\n# ";
|
||||||
|
for (size_t j = 0; j < fx_n_; j++)
|
||||||
|
{
|
||||||
|
ofile << "tw" << std::to_string(j) << " ";
|
||||||
|
}
|
||||||
|
ofile << "\n";
|
||||||
|
|
||||||
|
for (int i = 0; i < rcd_wgts_.size(); i++)
|
||||||
|
{
|
||||||
|
ofile << rcd_wgts_[i] << " ";
|
||||||
|
if ((i+1)%fx_n_ == 0) ofile << "\n";
|
||||||
|
}
|
||||||
|
ofile.close();
|
||||||
|
}
|
159
lib/optimization/gradnorm.h
Normal file
159
lib/optimization/gradnorm.h
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_GRADNORM_H
|
||||||
|
#define _GCTL_GRADNORM_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/io.h"
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Gradient normalized (balanced) multitask evaluation.
|
||||||
|
*
|
||||||
|
* @note Reference: Zhao Chen et. al., 2018. GradNorm: Gradient normalization
|
||||||
|
* for adaptive loss balancing in deep multitask networks.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class grad_norm
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
bool initialized_;
|
||||||
|
size_t fx_n_, fx_c_;
|
||||||
|
double resi_T_, T_;
|
||||||
|
double lamda_, alpha_, multi_fx_;
|
||||||
|
array<bool> fst_iter_;
|
||||||
|
array<double> wgts_;
|
||||||
|
array<double> fx0_;
|
||||||
|
array<double> Gw_, Gdw_, Lx_;
|
||||||
|
array<double> grad_;
|
||||||
|
array<double> rcd_fxs_;
|
||||||
|
array<double> fixed_wgts_;
|
||||||
|
std::vector<double> rcd_wgts_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
grad_norm();
|
||||||
|
virtual ~grad_norm();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initiate the number of loss functions and size of the model gradients.
|
||||||
|
*
|
||||||
|
* @note This function must be called at first.
|
||||||
|
*
|
||||||
|
* @param num Number of the total loss functions
|
||||||
|
* @param grad_num Size of the model gradients
|
||||||
|
*/
|
||||||
|
void InitGradNorm(size_t num, size_t grad_num);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Add the value of a single loss function and the current model gradients.
|
||||||
|
*
|
||||||
|
* @param fx objective value
|
||||||
|
* @param g model gradients
|
||||||
|
*
|
||||||
|
* @return weighted value of the current loss function
|
||||||
|
*/
|
||||||
|
double AddSingleLoss(double fx, const array<double> &g);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the merged objective value and the model gradients.
|
||||||
|
*
|
||||||
|
* @note All single loss functions must be added before calling this function. The
|
||||||
|
* merged objective value and the model gradients will be reset after the calling.
|
||||||
|
*
|
||||||
|
* @param g model gradients
|
||||||
|
*
|
||||||
|
* @return objective value
|
||||||
|
*/
|
||||||
|
double GradNormLoss(array<double> &g);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Update weights for single loss functions using the GradNorm algorithm.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void UpdateWeights();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Show statistics of the tasks' weight and loss function's value.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void ShowStatistics(std::ostream &ss = std::clog, bool one_line = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the control factor alpha. The default is 1.0
|
||||||
|
*
|
||||||
|
* @param a Input alpha
|
||||||
|
*/
|
||||||
|
void set_control_weight(double a);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the normal sum of the weights. Ths default equals to function size.
|
||||||
|
*
|
||||||
|
* @param t Input sum
|
||||||
|
*/
|
||||||
|
void set_normal_sum(double t);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set a learning rate of the weights. The default is 0.001
|
||||||
|
*
|
||||||
|
* @param l Input learning rate
|
||||||
|
*/
|
||||||
|
void set_weight_step(double l);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the fixed weight.
|
||||||
|
*
|
||||||
|
* @param id Index of the loss function
|
||||||
|
* @param wgt weight of the loss function
|
||||||
|
*/
|
||||||
|
void set_fixed_weight(int id, double wgt);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the initial weights
|
||||||
|
*
|
||||||
|
* @param w Input weights
|
||||||
|
*/
|
||||||
|
void set_initial_weights(const array<double> &w);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the recorded weights. Size of the log equals the function size times iteration times.
|
||||||
|
*
|
||||||
|
* @param logs Output log
|
||||||
|
*/
|
||||||
|
void get_records(array<double> &logs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Save recored weights to file.
|
||||||
|
*
|
||||||
|
* @param file File name
|
||||||
|
*/
|
||||||
|
void save_records(std::string file);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_GRADNORM_H
|
1897
lib/optimization/lbfgs.cpp
Normal file
1897
lib/optimization/lbfgs.cpp
Normal file
File diff suppressed because it is too large
Load Diff
559
lib/optimization/lbfgs.h
Normal file
559
lib/optimization/lbfgs.h
Normal file
@ -0,0 +1,559 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_LBFGS_H
|
||||||
|
#define _GCTL_LBFGS_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/maths.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
|
||||||
|
#include "gctl_optimization_config.h"
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
#include "toml.hpp"
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
#include "windows.h"
|
||||||
|
#endif // _WINDOWS || __WIN32__
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Return value of the lbfgs() function. Roughly speaking, a negative value indicates an error.
|
||||||
|
*/
|
||||||
|
enum lbfgs_return_code
|
||||||
|
{
|
||||||
|
/** L-BFGS reaches convergence. */
|
||||||
|
LBFGS_EPS_CONVERGENCE = 0,
|
||||||
|
LBFGS_DELTA_CONVERGENCE,
|
||||||
|
LBFGS_RESI_CONVERGENCE,
|
||||||
|
LBFGS_STOP, //1
|
||||||
|
/** The initial variables already minimize the objective function. */
|
||||||
|
LBFGS_ALREADY_MINIMIZED, //2
|
||||||
|
|
||||||
|
/** Unknown error. */
|
||||||
|
LBFGSERR_UNKNOWNERROR = -1024,
|
||||||
|
/** Logic error. */
|
||||||
|
LBFGSERR_LOGICERROR, //-1023
|
||||||
|
/** Insufficient memory. */
|
||||||
|
LBFGSERR_OUTOFMEMORY, //-1022
|
||||||
|
/** The minimization process has been canceled. */
|
||||||
|
LBFGSERR_CANCELED,
|
||||||
|
/** Invalid number of variables specified. */
|
||||||
|
LBFGSERR_INVALID_N,
|
||||||
|
/** Invalid number of variables (for SSE) specified. */
|
||||||
|
LBFGSERR_INVALID_N_SSE,
|
||||||
|
/** The array x must be aligned to 16 (for SSE). */
|
||||||
|
LBFGSERR_INVALID_X_SSE,
|
||||||
|
/** Invalid parameter lbfgs_para::epsilon specified. */
|
||||||
|
LBFGSERR_INVALID_EPSILON,
|
||||||
|
/** Invalid parameter lbfgs_para::past specified. */
|
||||||
|
LBFGSERR_INVALID_TESTPERIOD,
|
||||||
|
/** Invalid parameter lbfgs_para::delta specified. */
|
||||||
|
LBFGSERR_INVALID_DELTA,
|
||||||
|
/** Invalid parameter lbfgs_para::linesearch specified. */
|
||||||
|
LBFGSERR_INVALID_LINESEARCH,
|
||||||
|
/** Invalid parameter lbfgs_para::max_step specified. */
|
||||||
|
LBFGSERR_INVALID_MINSTEP,
|
||||||
|
/** Invalid parameter lbfgs_para::max_step specified. */
|
||||||
|
LBFGSERR_INVALID_MAXSTEP,
|
||||||
|
/** Invalid parameter lbfgs_para::ftol specified. */
|
||||||
|
LBFGSERR_INVALID_FTOL,
|
||||||
|
/** Invalid parameter lbfgs_para::wolfe specified. */
|
||||||
|
LBFGSERR_INVALID_WOLFE,
|
||||||
|
/** Invalid parameter lbfgs_para::gtol specified. */
|
||||||
|
LBFGSERR_INVALID_GTOL,
|
||||||
|
/** Invalid parameter lbfgs_para::xtol specified. */
|
||||||
|
LBFGSERR_INVALID_XTOL,
|
||||||
|
/** Invalid parameter lbfgs_para::max_linesearch specified. */
|
||||||
|
LBFGSERR_INVALID_MAXLINESEARCH,
|
||||||
|
/** Invalid parameter lbfgs_para::orthantwise_c specified. */
|
||||||
|
LBFGSERR_INVALID_ORTHANTWISE,
|
||||||
|
/** Invalid parameter lbfgs_para::orthantwise_start specified. */
|
||||||
|
LBFGSERR_INVALID_ORTHANTWISE_START,
|
||||||
|
/** Invalid parameter lbfgs_para::orthantwise_end specified. */
|
||||||
|
LBFGSERR_INVALID_ORTHANTWISE_END,
|
||||||
|
/** The line-search step went out of the interval of uncertainty. */
|
||||||
|
LBFGSERR_OUTOFINTERVAL,
|
||||||
|
/** A logic error occurred; alternatively, the interval of uncertainty
|
||||||
|
became too small. */
|
||||||
|
LBFGSERR_INCORRECT_TMINMAX,
|
||||||
|
/** A rounding error occurred; alternatively, no line-search step
|
||||||
|
satisfies the sufficient decrease and curvature conditions. */
|
||||||
|
LBFGSERR_ROUNDING_ERROR,
|
||||||
|
/** The line-search step became smaller than lbfgs_para::min_step. */
|
||||||
|
LBFGSERR_MINIMUMSTEP,
|
||||||
|
/** The line-search step became larger than lbfgs_para::max_step. */
|
||||||
|
LBFGSERR_MAXIMUMSTEP,
|
||||||
|
/** The line-search routine reaches the maximum number of evaluations. */
|
||||||
|
LBFGSERR_MAXIMUMLINESEARCH,
|
||||||
|
/** The algorithm routine reaches the maximum number of iterations. */
|
||||||
|
LBFGSERR_MAXIMUMITERATION,
|
||||||
|
/** Relative width of the interval of uncertainty is at most
|
||||||
|
lbfgs_para::xtol. */
|
||||||
|
LBFGSERR_WIDTHTOOSMALL,
|
||||||
|
/** A logic error (negative line-search step) occurred. */
|
||||||
|
LBFGSERR_INVALIDPARAMETERS,
|
||||||
|
/** The current search direction increases the objective function value. */
|
||||||
|
LBFGSERR_INCREASEGRADIENT,
|
||||||
|
};
|
||||||
|
|
||||||
|
// 枚举类型 线性搜索方法
|
||||||
|
// 0 MoreThuente方法
|
||||||
|
// 1 Armijo条件方法
|
||||||
|
// 2 标准Wolfe条件方法
|
||||||
|
// 3 增强Wolfe条件方法
|
||||||
|
/**
|
||||||
|
* @brief Line search algorithms.
|
||||||
|
*/
|
||||||
|
enum line_search_type
|
||||||
|
{
|
||||||
|
/** The default algorithm (MoreThuente method). */
|
||||||
|
LBFGS_LINESEARCH_DEFAULT = 0,
|
||||||
|
/** MoreThuente method proposd by More and Thuente. */
|
||||||
|
LBFGS_LINESEARCH_MORETHUENTE = 0,
|
||||||
|
/**
|
||||||
|
* Backtracking method with the Armijo condition.
|
||||||
|
* The backtracking method finds the step length such that it satisfies
|
||||||
|
* the sufficient decrease (Armijo) condition,
|
||||||
|
* - f(x + a * d) <= f(x) + lbfgs_para::ftol * a * g(x)^T d,
|
||||||
|
*
|
||||||
|
* where x is the current point, d is the current search direction, and
|
||||||
|
* a is the step length.
|
||||||
|
*/
|
||||||
|
LBFGS_LINESEARCH_BACKTRACKING_ARMIJO = 1,
|
||||||
|
/** The backtracking method with the defualt (regular Wolfe) condition. */
|
||||||
|
LBFGS_LINESEARCH_BACKTRACKING = 2,
|
||||||
|
/**
|
||||||
|
* Backtracking method with regular Wolfe condition.
|
||||||
|
* The backtracking method finds the step length such that it satisfies
|
||||||
|
* both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
|
||||||
|
* and the curvature condition,
|
||||||
|
* - g(x + a * d)^T d >= lbfgs_para::wolfe * g(x)^T d,
|
||||||
|
*
|
||||||
|
* where x is the current point, d is the current search direction, and
|
||||||
|
* a is the step length.
|
||||||
|
*/
|
||||||
|
LBFGS_LINESEARCH_BACKTRACKING_WOLFE = 2,
|
||||||
|
/**
|
||||||
|
* Backtracking method with strong Wolfe condition.
|
||||||
|
* The backtracking method finds the step length such that it satisfies
|
||||||
|
* both the Armijo condition (LBFGS_LINESEARCH_BACKTRACKING_ARMIJO)
|
||||||
|
* and the following condition,
|
||||||
|
* - |g(x + a * d)^T d| <= lbfgs_para::wolfe * |g(x)^T d|,
|
||||||
|
*
|
||||||
|
* where x is the current point, d is the current search direction, and
|
||||||
|
* a is the step length.
|
||||||
|
*/
|
||||||
|
LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3,
|
||||||
|
|
||||||
|
LBFGS_LINESEARCH_BACKTRACKING_ARMIJO_QUAD = 4,
|
||||||
|
|
||||||
|
//LBFGS_LINESEARCH_BACKTRACKING_QUAD = 5,
|
||||||
|
|
||||||
|
//LBFGS_LINESEARCH_BACKTRACKING_WOLFE_QUAD = 5,
|
||||||
|
|
||||||
|
//LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE_QUAD = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
// L-BFGS参数类型。参数很多,简要说明如下:
|
||||||
|
// m L-BFGS算法中储存的前序sk与yk向量个数,这个值控制了算法使用的内存多少,默认值为6(不建议小于3的值),值多大近似精度越高,计算量也越大。
|
||||||
|
// epsilon 迭代的终止精度,默认值为1e-5
|
||||||
|
// past 以delta(不同迭代次数的目标函数值)为基础的迭代终止条件数,past代表了以多少迭代次数之前的目标函数值作为delta计算的间隔,默认值为0,
|
||||||
|
// 即不以delta为迭代终止条件。
|
||||||
|
// delta (f' - f) / f 不同迭代次数时目标函数之差与当前目标函数值之比,但past不为0时会计算。
|
||||||
|
// max_iterations 最大迭代次数,为0时表示一直迭代到终止条件被满足或出现其他错误。
|
||||||
|
// linesearch 线性搜索方式,由此文件前述枚举类型定义。
|
||||||
|
// max_linesearch 每次迭代中线性搜索的最大次数,默认值为40
|
||||||
|
// min_step 线性搜索中的最小步长,默认值为1e-20
|
||||||
|
// max_step 线性搜索中的最大步长,默认值为1e+20
|
||||||
|
// ftol 线性搜索的精度值,默认值为1e-4,取值范围(0-0.5)。
|
||||||
|
// wolfe Wolfe线性搜索中的控制参数,默认值为0.9,大于ftol小于1.0
|
||||||
|
// gtol 线性搜索中的控制参数,默认值为0.9,大于ftol小于1.0
|
||||||
|
// xtol 浮点数精度,默认值为1e-16
|
||||||
|
// orthantwise_c 模型参数x的L1模的乘积参数,默认值为0.0,此时算法即为L2模形式,当此参数大于0时,算法即为OWL-QN
|
||||||
|
// orthantwise_start 开始计算模型参数x的L1模的迭代序号
|
||||||
|
// orthantwise_end 终止计算模型参数x的L1模的迭代序号
|
||||||
|
/**
|
||||||
|
* L-BFGS optimization parameters.
|
||||||
|
* Call lbfgs_parameter_init() function to initialize parameters to the
|
||||||
|
* default values.
|
||||||
|
*/
|
||||||
|
struct lbfgs_para
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* The number of corrections to approximate the inverse hessian matrix.
|
||||||
|
* The L-BFGS routine stores the computation results of previous \ref m
|
||||||
|
* iterations to approximate the inverse hessian matrix of the current
|
||||||
|
* iteration. This parameter controls the size of the limited memories
|
||||||
|
* (corrections). The default value is \c 6. Values less than \c 3 are
|
||||||
|
* not recommended. Large values will result in excessive computing time.
|
||||||
|
*/
|
||||||
|
int m;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Epsilon for convergence test.
|
||||||
|
* This parameter determines the accuracy with which the solution is to
|
||||||
|
* be found. A minimization terminates when
|
||||||
|
* ||g|| < \ref epsilon * max(1, ||x||),
|
||||||
|
* where ||.|| denotes the Euclidean (L2) norm. The default value is
|
||||||
|
* \c 1e-5.
|
||||||
|
*/
|
||||||
|
double epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Distance for delta-based convergence test.
|
||||||
|
* This parameter determines the distance, in iterations, to compute
|
||||||
|
* the rate of decrease of the objective function. If the value of this
|
||||||
|
* parameter is zero, the library does not perform the delta-based
|
||||||
|
* convergence test. The default value is \c 0.
|
||||||
|
*/
|
||||||
|
int past;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delta for convergence test.
|
||||||
|
* This parameter determines the minimum rate of decrease of the
|
||||||
|
* objective function. The library stops iterations when the
|
||||||
|
* following condition is met:
|
||||||
|
* (f' - f) / f < \ref delta,
|
||||||
|
* where f' is the objective value of \ref past iterations ago, and f is
|
||||||
|
* the objective value of the current iteration.
|
||||||
|
* The default value is \c 1e-5.
|
||||||
|
*/
|
||||||
|
double delta;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Residual for convergence test.
|
||||||
|
* This parameter determines the accuracy with which the solution is to
|
||||||
|
* be found. A minimization terminates when
|
||||||
|
* f(x) <= residual,
|
||||||
|
* The default value is \c 1e-8.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
double residual;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum number of iterations.
|
||||||
|
* The lbfgs() function terminates an optimization process with
|
||||||
|
* ::LBFGSERR_MAXIMUMITERATION status code when the iteration count
|
||||||
|
* exceedes this parameter. Setting this parameter to zero continues an
|
||||||
|
* optimization process until a convergence or error. The default value
|
||||||
|
* is \c 0.
|
||||||
|
*/
|
||||||
|
int max_iterations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The line search algorithm.
|
||||||
|
* This parameter specifies a line search algorithm to be used by the
|
||||||
|
* L-BFGS routine.
|
||||||
|
*/
|
||||||
|
int linesearch;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum number of trials for the line search.
|
||||||
|
* This parameter controls the number of function and gradients evaluations
|
||||||
|
* per iteration for the line search routine. The default value is \c 40.
|
||||||
|
*/
|
||||||
|
int max_linesearch;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The minimum step of the line search routine.
|
||||||
|
* The default value is \c 1e-20. This value need not be modified unless
|
||||||
|
* the exponents are too large for the machine being used, or unless the
|
||||||
|
* problem is extremely badly scaled (in which case the exponents should
|
||||||
|
* be increased).
|
||||||
|
*/
|
||||||
|
double min_step;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum step of the line search.
|
||||||
|
* The default value is \c 1e+20. This value need not be modified unless
|
||||||
|
* the exponents are too large for the machine being used, or unless the
|
||||||
|
* problem is extremely badly scaled (in which case the exponents should
|
||||||
|
* be increased).
|
||||||
|
*/
|
||||||
|
double max_step;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parameter to control the accuracy of the line search routine.
|
||||||
|
* The default value is \c 1e-4. This parameter should be greater
|
||||||
|
* than zero and smaller than \c 0.5.
|
||||||
|
*/
|
||||||
|
double ftol;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A coefficient for the Wolfe condition.
|
||||||
|
* This parameter is valid only when the backtracking line-search
|
||||||
|
* algorithm is used with the Wolfe condition,
|
||||||
|
* ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE or
|
||||||
|
* ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE .
|
||||||
|
* The default value is \c 0.9. This parameter should be greater
|
||||||
|
* the \ref ftol parameter and smaller than \c 1.0.
|
||||||
|
*/
|
||||||
|
double wolfe;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A parameter to control the accuracy of the line search routine.
|
||||||
|
* The default value is \c 0.9. If the function and gradient
|
||||||
|
* evaluations are inexpensive with respect to the cost of the
|
||||||
|
* iteration (which is sometimes the case when solving very large
|
||||||
|
* problems) it may be advantageous to set this parameter to a small
|
||||||
|
* value. A typical small value is \c 0.1. This parameter shuold be
|
||||||
|
* greater than the \ref ftol parameter (\c 1e-4) and smaller than
|
||||||
|
* \c 1.0.
|
||||||
|
*/
|
||||||
|
double gtol;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The machine precision for floating-point values.
|
||||||
|
* This parameter must be a positive value set by a client program to
|
||||||
|
* estimate the machine precision. The line search routine will terminate
|
||||||
|
* with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width
|
||||||
|
* of the interval of uncertainty is less than this parameter.
|
||||||
|
*/
|
||||||
|
double xtol;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Coeefficient for the L1 norm of variables.
|
||||||
|
* This parameter should be set to zero for standard minimization
|
||||||
|
* problems. Setting this parameter to a positive value activates
|
||||||
|
* Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) method, which
|
||||||
|
* minimizes the objective function F(x) combined with the L1 norm |x|
|
||||||
|
* of the variables, {F(x) + C |x|}. This parameter is the coeefficient
|
||||||
|
* for the |x|, i.e., C. As the L1 norm |x| is not differentiable at
|
||||||
|
* zero, the library modifies function and gradient evaluations from
|
||||||
|
* a client program suitably; a client program thus have only to return
|
||||||
|
* the function value F(x) and gradients G(x) as usual. The default value
|
||||||
|
* is zero.
|
||||||
|
*/
|
||||||
|
double orthantwise_c;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start index for computing L1 norm of the variables.
|
||||||
|
* This parameter is valid only for OWL-QN method
|
||||||
|
* (i.e., \ref orthantwise_c != 0). This parameter b (0 <= b < N)
|
||||||
|
* specifies the index number from which the library computes the
|
||||||
|
* L1 norm of the variables x,
|
||||||
|
* |x| := |x_{b}| + |x_{b+1}| + ... + |x_{N}| .
|
||||||
|
* In other words, variables x_1, ..., x_{b-1} are not used for
|
||||||
|
* computing the L1 norm. Setting b (0 < b < N), one can protect
|
||||||
|
* variables, x_1, ..., x_{b-1} (e.g., a bias term of logistic
|
||||||
|
* regression) from being regularized. The default value is zero.
|
||||||
|
*/
|
||||||
|
int orthantwise_start;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* End index for computing L1 norm of the variables.
|
||||||
|
* This parameter is valid only for OWL-QN method
|
||||||
|
* (i.e., \ref orthantwise_c != 0). This parameter e (0 < e <= N)
|
||||||
|
* specifies the index number at which the library stops computing the
|
||||||
|
* L1 norm of the variables x,
|
||||||
|
*/
|
||||||
|
int orthantwise_end;
|
||||||
|
};
|
||||||
|
|
||||||
|
class lbfgs_solver
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
lbfgs_para lbfgs_param_; ///< lbfgs 算法参数
|
||||||
|
bool lbfgs_silent_; ///< 显示运行信息
|
||||||
|
|
||||||
|
// 算法函数是私有的,不能直接使用,通过Minimize函数调用
|
||||||
|
// 下面是L-BFGS的主函数,各个参数的说明简要翻译如下:
|
||||||
|
// n 数组的长度,也就是待求的模型参数的数量
|
||||||
|
// x 模型参数数组的指针,函数通过指针直接操作模型数组,所以不需要返回计算结果。一开始赋给函数的数组即为
|
||||||
|
// 初始模型,函数结束后即为最优化结果
|
||||||
|
// ptr_fx 目标函数的值的指针,设计成指针可以方便在函数外部监控迭代过程的收敛情况
|
||||||
|
// retval 返回值。无错即为0,非0值代表此文件上部枚举类型中的对应错误。此文件下部定义的错误信息显示即利用此返回值与
|
||||||
|
// 预定义的枚举类型输出相应的错误信息。
|
||||||
|
/**
|
||||||
|
* Start a L-BFGS optimization.
|
||||||
|
*
|
||||||
|
* @param x The array of variables. A client program can set
|
||||||
|
* default values for the optimization and receive the
|
||||||
|
* optimization result through this array. This array
|
||||||
|
* must be allocated by ::lbfgs_malloc function
|
||||||
|
* for libLBFGS built with SSE/SSE2 optimization routine
|
||||||
|
* enabled. The library built without SSE/SSE2
|
||||||
|
* optimization does not have such a requirement.
|
||||||
|
* @param ptr_fx The pointer to the variable that receives the final
|
||||||
|
* value of the objective function for the variables.
|
||||||
|
* This argument can be set to \c NULL if the final
|
||||||
|
* value of the objective function is unnecessary.
|
||||||
|
* @retval The status code. This function returns zero if the
|
||||||
|
* minimization process terminates without an error. A
|
||||||
|
* non-zero value indicates an error.
|
||||||
|
*/
|
||||||
|
lbfgs_return_code lbfgs(array<double> &x, double &ptr_fx, std::ostream &ss);
|
||||||
|
lbfgs_return_code lbfgs_preconditioned(array<double> &x, double &ptr_fx, std::ostream &ss);
|
||||||
|
|
||||||
|
// 线性搜索方法 内部私有函数 不能直接使用
|
||||||
|
lbfgs_return_code line_search_backtracking(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
|
||||||
|
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
|
||||||
|
lbfgs_return_code line_search_backtracking_quad(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
|
||||||
|
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
|
||||||
|
lbfgs_return_code line_search_backtracking_owlqn(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
|
||||||
|
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wp, int &ls);
|
||||||
|
lbfgs_return_code line_search_morethuente(int n, gctl::array<double> &x, double *f, gctl::array<double> &g, gctl::array<double> &s,
|
||||||
|
double *stp, const gctl::array<double> &xp, const gctl::array<double> &gp, gctl::array<double> &wa, int &ls);
|
||||||
|
|
||||||
|
// 显示lbfgs函数返回值信息 主要是错误信息
|
||||||
|
void lbfgs_error_str(lbfgs_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
|
||||||
|
|
||||||
|
public:
|
||||||
|
lbfgs_solver();
|
||||||
|
virtual ~lbfgs_solver();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief 不显示运行信息,仅抛出运行错误。
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void lbfgs_silent();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief 设置算法参数。
|
||||||
|
*
|
||||||
|
* @param in_param 参数对象
|
||||||
|
*/
|
||||||
|
void set_lbfgs_para(const lbfgs_para &in_param);
|
||||||
|
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
/**
|
||||||
|
* @brief 设置算法参数。
|
||||||
|
*
|
||||||
|
* @param toml_data toml数据对象
|
||||||
|
*/
|
||||||
|
void set_lbfgs_para(const toml::value &toml_data);
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief 返回一个全为默认值的参数对象。
|
||||||
|
*
|
||||||
|
* @return lbfgs_para
|
||||||
|
*/
|
||||||
|
lbfgs_para default_lbfgs_para();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief 显示当前运行参数
|
||||||
|
*
|
||||||
|
* @param ss 标准输出流
|
||||||
|
*/
|
||||||
|
void show_lbfgs_para(std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
// 目标函数与其梯度值计算函数的接口,参数简要说明如下:
|
||||||
|
// x 当前的模型参数值的指针
|
||||||
|
// g 当前模型参数值对应的梯度指针
|
||||||
|
// step 当前线性搜索所使用的步长
|
||||||
|
// retval 当前模型参数的目标函数值
|
||||||
|
/**
|
||||||
|
* Callback interface to provide objective function and gradient evaluations.
|
||||||
|
*
|
||||||
|
* The lbfgs() function call this function to obtain the values of objective
|
||||||
|
* function and its gradients when needed. A client program must implement
|
||||||
|
* this function to evaluate the values of the objective function and its
|
||||||
|
* gradients, given current values of variables.
|
||||||
|
*
|
||||||
|
* @param x The current values of variables.
|
||||||
|
* @param g The gradient vector. The callback function must compute
|
||||||
|
* the gradient values for the current variables.
|
||||||
|
* @param step The current step of the line search routine.
|
||||||
|
* @retval double The value of the objective function for the current
|
||||||
|
* variables.
|
||||||
|
*/
|
||||||
|
virtual double LBFGS_Evaluate(const array<double> &x, array<double> &g) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Callback interface to implement the preconditioning process.
|
||||||
|
*
|
||||||
|
* The lbfgs() function call this function for each iteration. Implementing
|
||||||
|
* this function, a client program can preform the preconditioning process.
|
||||||
|
*
|
||||||
|
* @param x The current values of variables.
|
||||||
|
* @param g The current gradient values of variables.
|
||||||
|
* @param d The current values of search directions.
|
||||||
|
* @param d_pre The values of search directions being preconditioned.
|
||||||
|
* The callback function must compute these values.
|
||||||
|
*/
|
||||||
|
virtual void LBFGS_Precondition(const array<double> &x, const array<double> &g, const array<double> &d, array<double> &d_pre);
|
||||||
|
|
||||||
|
// 进程函数的接口,参数简要说明如下:
|
||||||
|
// x 当前的模型参数值的指针
|
||||||
|
// g 当前模型参数值对应的梯度指针
|
||||||
|
// fx 目标函数的值
|
||||||
|
// xnorm 模型参数数组的L2模长
|
||||||
|
// gnorm 模型梯度数组的L2模长
|
||||||
|
// step 当前线性搜索所使用的步长
|
||||||
|
// k 迭代的次数
|
||||||
|
// ls 此次迭代所使用的线性搜索次数
|
||||||
|
// retval 返回0则lbfgs()函数继续,否则终止
|
||||||
|
/**
|
||||||
|
* Callback interface to receive the progress of the optimization process.
|
||||||
|
*
|
||||||
|
* The lbfgs() function call this function for each iteration. Implementing
|
||||||
|
* this function, a client program can store or display the current progress
|
||||||
|
* of the optimization process.
|
||||||
|
*
|
||||||
|
* @param x The current values of variables.
|
||||||
|
* @param g The current gradient values of variables.
|
||||||
|
* @param fx The current value of the objective function.
|
||||||
|
* @param converge Current value of the convergence test.
|
||||||
|
* @param rate Current value of the delta-based convergence test.
|
||||||
|
* @param param 这是我们添加了一个指针以使用参数类型来监控迭代流程
|
||||||
|
* @param k The iteration count.
|
||||||
|
* @param ls The number of evaluations called for this iteration.
|
||||||
|
* @param ss Output stream object.
|
||||||
|
* @retval int Zero to continue the optimization process. Returning a
|
||||||
|
* non-zero value will cancel the optimization process.
|
||||||
|
*/
|
||||||
|
virtual int LBFGS_Progress(const array<double> &x, const array<double> &g, const double fx,
|
||||||
|
const double converge, const double rate, const lbfgs_para param, int k, int ls, std::ostream &ss);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief 调用算法执行最小化流程
|
||||||
|
*
|
||||||
|
* @param m 初始模型,最优化结果也保存在此数组内
|
||||||
|
* @param ss 信息的输出流
|
||||||
|
* @param err_throw 仅抛出错误
|
||||||
|
* @return double 最终的目标函数值
|
||||||
|
*/
|
||||||
|
double LBFGS_Minimize(array<double> &m, std::ostream &ss = std::clog, bool err_throw = false);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief 调用算法执行最小化流程
|
||||||
|
*
|
||||||
|
* @param m 初始模型,最优化结果也保存在此数组内
|
||||||
|
* @param ss 信息的输出流
|
||||||
|
* @param verbose 使用输出详细信息
|
||||||
|
* @param err_throw 仅抛出错误
|
||||||
|
* @return double 最终的目标函数值
|
||||||
|
*/
|
||||||
|
double LBFGS_MinimizePreconditioned(array<double> &m, std::ostream &ss = std::clog, bool err_throw = false);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_LBFGS_H
|
1002
lib/optimization/lcg.cpp
Normal file
1002
lib/optimization/lcg.cpp
Normal file
File diff suppressed because it is too large
Load Diff
387
lib/optimization/lcg.h
Normal file
387
lib/optimization/lcg.h
Normal file
@ -0,0 +1,387 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_LCG_H
|
||||||
|
#define _GCTL_LCG_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/maths.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
|
||||||
|
#include "gctl_optimization_config.h"
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
#include "toml.hpp"
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
#include "windows.h"
|
||||||
|
#endif // _WINDOWS || __WIN32__
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Types of method that could be recognized by the lcg_solver() function.
|
||||||
|
*/
|
||||||
|
enum lcg_solver_type
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Conjugate gradient method.
|
||||||
|
*/
|
||||||
|
LCG_CG,
|
||||||
|
/**
|
||||||
|
* Preconditioned conjugate gradient method.
|
||||||
|
*/
|
||||||
|
LCG_PCG,
|
||||||
|
/**
|
||||||
|
* Conjugate gradient squared method.
|
||||||
|
*/
|
||||||
|
LCG_CGS,
|
||||||
|
/**
|
||||||
|
* Biconjugate gradient method.
|
||||||
|
*/
|
||||||
|
LCG_BICGSTAB,
|
||||||
|
/**
|
||||||
|
* Biconjugate gradient method with restart.
|
||||||
|
*/
|
||||||
|
LCG_BICGSTAB2,
|
||||||
|
/**
|
||||||
|
* Conjugate gradient method with projected gradient for inequality constraints.
|
||||||
|
* This algorithm comes without non-monotonic linear search for the step length.
|
||||||
|
*/
|
||||||
|
LCG_PG,
|
||||||
|
/**
|
||||||
|
* Conjugate gradient method with spectral projected gradient for inequality constraints.
|
||||||
|
* This algorithm comes with non-monotonic linear search for the step length.
|
||||||
|
*/
|
||||||
|
LCG_SPG,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return value of the lcg_solver() function
|
||||||
|
*/
|
||||||
|
enum lcg_return_code
|
||||||
|
{
|
||||||
|
LCG_SUCCESS = 0, ///< The solver function terminated successfully.
|
||||||
|
LCG_CONVERGENCE = 0, ///< The iteration reached convergence.
|
||||||
|
LCG_STOP, ///< The iteration is stopped by the monitoring function.
|
||||||
|
LCG_ALREADY_OPTIMIZIED, ///< The initial solution is already optimized.
|
||||||
|
// A negative number means a error
|
||||||
|
LCG_UNKNOWN_ERROR = -1024, ///< Unknown error.
|
||||||
|
LCG_INVILAD_VARIABLE_SIZE, ///< The variable size is negative
|
||||||
|
LCG_INVILAD_MAX_ITERATIONS, ///< The maximal iteration times is negative.
|
||||||
|
LCG_INVILAD_EPSILON, ///< The epsilon is negative.
|
||||||
|
LCG_INVILAD_RESTART_EPSILON, ///< The restart epsilon is negative.
|
||||||
|
LCG_REACHED_MAX_ITERATIONS, ///< Iteration reached maximal limit.
|
||||||
|
LCG_NULL_PRECONDITION_MATRIX, ///< Null precondition matrix.
|
||||||
|
LCG_NAN_VALUE, ///< Nan value.
|
||||||
|
LCG_INVALID_POINTER, ///< Invalid pointer.
|
||||||
|
LCG_INVALID_LAMBDA, ///< Invalid range for lambda.
|
||||||
|
LCG_INVALID_SIGMA, ///< Invalid range for sigma.
|
||||||
|
LCG_INVALID_BETA, ///< Invalid range for beta.
|
||||||
|
LCG_INVALID_MAXIM, ///< Invalid range for maxi_m.
|
||||||
|
LCG_SIZE_NOT_MATCH, ///< Sizes of m and B do not match
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Message type of the LCG algorithms.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
enum lcg_message_type
|
||||||
|
{
|
||||||
|
LCG_THROW, ///< throw error only
|
||||||
|
LCG_ERROR, ///< display error only
|
||||||
|
LCG_SOLUTION, ///< display info for evry solution
|
||||||
|
LCG_ITERATION, ///< display info for every iteration
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parameters of the conjugate gradient methods.
|
||||||
|
*/
|
||||||
|
struct lcg_para
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Maximal iteration times. The process will continue till the convergence is met
|
||||||
|
* if this option is set to zero (default).
|
||||||
|
*/
|
||||||
|
int max_iterations;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Epsilon for convergence test.
|
||||||
|
* This parameter determines the accuracy with which the solution is to be
|
||||||
|
* found. A minimization terminates when ||g||/max(||g0||, 1.0) <= epsilon or
|
||||||
|
* sqrt(||g||)/N <= epsilon for the lcg_solver() function, where ||.|| denotes
|
||||||
|
* the Euclidean (L2) norm. The default value of epsilon is 1e-8.
|
||||||
|
*/
|
||||||
|
double epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether to use absolute mean differences (AMD) between |Ax - B| to evaluate the process.
|
||||||
|
* The default value is false which means the gradient based evaluating method is used.
|
||||||
|
* The AMD based method will be used if this variable is set to true. This parameter is only
|
||||||
|
* applied to the non-constrained methods.
|
||||||
|
*/
|
||||||
|
int abs_diff;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Restart epsilon for the LCG_BICGSTAB2 algorithm. The default value is 1e-6
|
||||||
|
*/
|
||||||
|
double restart_epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initial step length for the project gradient method. The default is 1.0
|
||||||
|
*/
|
||||||
|
double step;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* multiplier for updating solutions with the spectral projected gradient method. The range of
|
||||||
|
* this variable is (0, 1). The default is given as 0.95
|
||||||
|
*/
|
||||||
|
double sigma;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* descending ratio for conducting the non-monotonic linear search. The range of
|
||||||
|
* this variable is (0, 1). The default is given as 0.9
|
||||||
|
*/
|
||||||
|
double beta;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximal record times of the objective values for the SPG method. The method use the
|
||||||
|
* objective values from the most recent maxi_m times to preform the non-monotonic linear search.
|
||||||
|
* The default value is 10.
|
||||||
|
*/
|
||||||
|
int maxi_m;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief This abstract class implements conjugate gradient algorithms for solving
|
||||||
|
* a linear system like Ax = B where A is a N*N matrix. As the actual variable
|
||||||
|
* we need is the product of 'Ax', the kernel matrix 'A' is not declared within
|
||||||
|
* the class definition. Instead, a pure virtual function is declared as the
|
||||||
|
* callback interface for calculating the product of 'Ax' as 'void LCG_Ax(const
|
||||||
|
* array<double> &x, array<double> &ax)'. A virtual function "int LCG_Progress(const
|
||||||
|
* array<double> &m, const double converge, const lcg_para ¶m, size_t t,
|
||||||
|
* std::ostream &ss)" could be reloaded for customed convergence tests.
|
||||||
|
*/
|
||||||
|
class lcg_solver
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
lcg_para lcg_param_;
|
||||||
|
size_t lcg_inter_;
|
||||||
|
lcg_message_type lcg_msg_;
|
||||||
|
|
||||||
|
// make them class variables are more suitable for repetitively usages
|
||||||
|
array<double> zk, gk, dk, Adk;
|
||||||
|
array<double> rk, r0_T, pk, vk;
|
||||||
|
array<double> Apx, uk, qk, qk_m, wk;
|
||||||
|
array<double> m_new, gk_new;
|
||||||
|
array<double> sk, yk;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Display info of a given return code. This is a private function
|
||||||
|
* and can only be called by other class functions.
|
||||||
|
*
|
||||||
|
* @param err_code Input retrun code
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lcg_error_str(lcg_return_code err_code, std::ostream &ss);
|
||||||
|
|
||||||
|
public:
|
||||||
|
lcg_solver(); ///< default constructor
|
||||||
|
virtual ~lcg_solver(); ///< default de-constructor
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of 'A' multipled by an arbitrary vector 'x'.
|
||||||
|
*
|
||||||
|
* @param x Multipler
|
||||||
|
* @param ax Product of Ax
|
||||||
|
*/
|
||||||
|
virtual void LCG_Ax(const array<double> &x, array<double> &ax) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Callback interface for calculating the product of 'M' multipled by an arbitrary vector 'x'.
|
||||||
|
* In which 'M' is the inverse of the pre-conditioning matrix. This function must be reloaded for the
|
||||||
|
* LCG_PCG algorithm.
|
||||||
|
*
|
||||||
|
* @param x Multipler
|
||||||
|
* @param mx Product of Ax
|
||||||
|
*/
|
||||||
|
virtual void LCG_Mx(const array<double> &x, array<double> &mx);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Utility function for monitoring the solving process.
|
||||||
|
*
|
||||||
|
* @param m Current solution
|
||||||
|
* @param converge Current convergence
|
||||||
|
* @param param Employed parameters
|
||||||
|
* @param t Current iterative times
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
* @return Quit the solver if returned a non-zero value.
|
||||||
|
*/
|
||||||
|
virtual int LCG_Progress(const array<double> &m, const double converge, const lcg_para ¶m, size_t t, std::ostream &ss);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the lcg message object
|
||||||
|
*
|
||||||
|
* @param msg Input message type.
|
||||||
|
*/
|
||||||
|
void set_lcg_message(lcg_message_type msg);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the lcg report intervals
|
||||||
|
*
|
||||||
|
* @param inter Input reprot intervals.
|
||||||
|
*/
|
||||||
|
void set_lcg_report_interval(size_t inter);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the lcg para object
|
||||||
|
*
|
||||||
|
* @param param Input lcg parameters.
|
||||||
|
*/
|
||||||
|
void set_lcg_para(const lcg_para ¶m);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Return a lcg_para object with default values
|
||||||
|
*
|
||||||
|
* @return lcg_para
|
||||||
|
*/
|
||||||
|
lcg_para default_lcg_para();
|
||||||
|
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set parameters of the conjugate gradient algorithms using a toml file.
|
||||||
|
* All parameter options must be listed under a top-level table 'lcg'. Available options
|
||||||
|
* under the 'lcg' table are as declared in the lcg_para structure.
|
||||||
|
*
|
||||||
|
* @param toml_data Input toml data
|
||||||
|
*/
|
||||||
|
void set_lcg_para(std::string filename);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set parameters of the conjugate gradient algorithms using a toml::value object.
|
||||||
|
* All parameter options must be listed under a top-level table 'lcg'. Available options
|
||||||
|
* under the 'lcg' table are as declared in the lcg_para structure.
|
||||||
|
*
|
||||||
|
* @param toml_data Input toml data
|
||||||
|
*/
|
||||||
|
void set_lcg_para(const toml::value &toml_data);
|
||||||
|
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The allback interface for all CG algorithms. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param solver_id Selected solver type.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void LCG_Minimize(array<double> &m, const array<double> &b, lcg_solver_type solver_id = LCG_CG, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The allback interface for all CG algorithms. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param solver_id Selected solver type.
|
||||||
|
* @param low Lower bound of acceptable solutions.
|
||||||
|
* @param hig Higher bound of acceptable solutions.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void LCG_MinimizeConstrained(array<double> &m, const array<double> &b, const array<double> &low, const array<double> &hig, lcg_solver_type solver_id = LCG_PG, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The standalone callback interface for the CG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lcg(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The standalone callback interface for the PCG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lpcg(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The standalone callback interface for the CGS algorithm. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lcgs(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The standalone callback interface for the BICGSTAB algorithm. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lbicgstab(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The standalone callback interface for the BICGSTAB2 algorithm. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lbicgstab2(array<double> &m, const array<double> &B, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The standalone callback interface for the PG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param low Lower bound of acceptable solutions.
|
||||||
|
* @param hig Higher bound of acceptable solutions.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lpg(array<double> &m, const array<double> &B, const array<double> &low, const array<double> &hig, std::ostream &ss = std::clog);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief The standalone callback interface for the SPG algorithm. Set message type to LCG_THROW to suppresses all info outputs.
|
||||||
|
*
|
||||||
|
* @param m Initial/Input solution.
|
||||||
|
* @param B Right hand term of the system system.
|
||||||
|
* @param low Lower bound of acceptable solutions.
|
||||||
|
* @param hig Higher bound of acceptable solutions.
|
||||||
|
* @param ss Output stream of runtime info.
|
||||||
|
*/
|
||||||
|
void lspg(array<double> &m, const array<double> &B, const array<double> &low, const array<double> &hig, std::ostream &ss = std::clog);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_LCG_H
|
505
lib/optimization/lgd.cpp
Normal file
505
lib/optimization/lgd.cpp
Normal file
@ -0,0 +1,505 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "lgd.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default parameter for the Lévy-Gradient Descent (L-GD) method.
|
||||||
|
*/
|
||||||
|
static const gctl::lgd_para lgd_defparam = {1000, 0, 1e-5, 1.0, 1.5, 0.01, 1e-8, -1.0};
|
||||||
|
|
||||||
|
gctl::lgd_solver::lgd_solver()
|
||||||
|
{
|
||||||
|
lgd_param_ = lgd_defparam;
|
||||||
|
lgd_inter_ = 1; lgd_ques_num_ = 0; lgd_trace_times_ = 0;
|
||||||
|
lgd_silent_ = lgd_has_range_ = lgd_has_alpha_ = lgd_save_trace_ = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::lgd_solver::~lgd_solver(){}
|
||||||
|
|
||||||
|
int gctl::lgd_solver::LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para ¶m)
|
||||||
|
{
|
||||||
|
if (lgd_silent_) return 0;
|
||||||
|
|
||||||
|
if (param.epsilon > 0.0 && mean_fx <= param.epsilon)
|
||||||
|
{
|
||||||
|
std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lgd_inter_ > 0 && curr_t%lgd_inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << GCTL_CLEARLINE << "\rF(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::lgd_silent()
|
||||||
|
{
|
||||||
|
lgd_silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::set_lgd_report_interval(int inter)
|
||||||
|
{
|
||||||
|
lgd_inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::set_lgd_para(const lgd_para &in_param)
|
||||||
|
{
|
||||||
|
lgd_param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
void gctl::lgd_solver::set_lgd_para(std::string filename)
|
||||||
|
{
|
||||||
|
toml::value toml_data;
|
||||||
|
toml_data = toml::parse(filename);
|
||||||
|
|
||||||
|
set_lgd_para(toml_data);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::set_lgd_para(const toml::value &toml_data)
|
||||||
|
{
|
||||||
|
lgd_param_ = lgd_defparam;
|
||||||
|
|
||||||
|
std::string LGD = "lgd";
|
||||||
|
if (toml_data.contains(LGD))
|
||||||
|
{
|
||||||
|
if (toml_data.at(LGD).contains("flight_times")) lgd_param_.flight_times = toml::find<int>(toml_data, LGD, "flight_times");
|
||||||
|
if (toml_data.at(LGD).contains("batch")) lgd_param_.batch = toml::find<int>(toml_data, LGD, "batch");
|
||||||
|
if (toml_data.at(LGD).contains("epsilon")) lgd_param_.epsilon = toml::find<double>(toml_data, LGD, "epsilon");
|
||||||
|
if (toml_data.at(LGD).contains("stddev_v")) lgd_param_.stddev_v = toml::find<double>(toml_data, LGD, "stddev_v");
|
||||||
|
if (toml_data.at(LGD).contains("beta")) lgd_param_.beta = toml::find<double>(toml_data, LGD, "beta");
|
||||||
|
if (toml_data.at(LGD).contains("alpha")) lgd_param_.alpha = toml::find<double>(toml_data, LGD, "alpha");
|
||||||
|
if (toml_data.at(LGD).contains("sigma")) lgd_param_.sigma = toml::find<double>(toml_data, LGD, "sigma");
|
||||||
|
if (toml_data.at(LGD).contains("lambda")) lgd_param_.lambda = toml::find<double>(toml_data, LGD, "lambda");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
void gctl::lgd_solver::set_lgd_record_trace()
|
||||||
|
{
|
||||||
|
lgd_save_trace_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::show_solver()
|
||||||
|
{
|
||||||
|
std::clog << "Solver's Setup Panel\n";
|
||||||
|
std::clog << "-----------------------------\n";
|
||||||
|
std::clog << "Solver: LGD\n";
|
||||||
|
std::clog << "Flights = " << lgd_param_.flight_times << ", Batch = " << lgd_param_.batch << ", Epsilon = " << lgd_param_.epsilon << ", Lambda = " << lgd_param_.lambda << "\n";
|
||||||
|
std::clog << "STD(v) = " << lgd_param_.stddev_v << ", Beta = " << lgd_param_.beta << ", Alpha = " << lgd_param_.alpha << ", Sigma = " << lgd_param_.sigma << "\n";
|
||||||
|
std::clog << "=============================\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::save_lgd_trace(std::string trace_file)
|
||||||
|
{
|
||||||
|
if (lgd_trace_times_ == 0)
|
||||||
|
{
|
||||||
|
GCTL_ShowWhatError("[gctl::lgd_solver] No trace is recorded.", GCTL_WARNING_ERROR, 0, 0, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ofstream ofile;
|
||||||
|
open_outfile(ofile, trace_file, ".txt");
|
||||||
|
|
||||||
|
int m_size = lgd_trace_.size()/lgd_trace_times_;
|
||||||
|
ofile << "# L-GD flight traces.\n";
|
||||||
|
ofile << "# Each row represents an accepted solution.\n";
|
||||||
|
ofile << "# Model size: " << m_size << "\n";
|
||||||
|
ofile << "# Accepted solutions: " << lgd_trace_times_ << "\n";
|
||||||
|
|
||||||
|
for (size_t i = 0; i < lgd_trace_times_; i++)
|
||||||
|
{
|
||||||
|
for (size_t j = 0; j < m_size; j++)
|
||||||
|
{
|
||||||
|
ofile << lgd_trace_[i*m_size+j] << " ";
|
||||||
|
}
|
||||||
|
ofile << "\n";
|
||||||
|
}
|
||||||
|
ofile.close();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::lgd_error_str(lgd_return_code err_code, std::ostream &ss, bool err_throw)
|
||||||
|
{
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
|
||||||
|
ss << "Success! ";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
|
||||||
|
ss << "Fail! ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
ss << "\033[1m\033[32mLGD Success! ";
|
||||||
|
else
|
||||||
|
ss << "\033[1m\033[31mLGD Fail! ";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string err_str;
|
||||||
|
switch (err_code)
|
||||||
|
{
|
||||||
|
case LGD_CONVERGENCE:
|
||||||
|
err_str = "The iteration has reached convergence."; break;
|
||||||
|
case LGD_STOP:
|
||||||
|
err_str = "The iteration is stopped by the progress monitoring function."; break;
|
||||||
|
case LGD_REACHED_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal flight times has been reached."; break;
|
||||||
|
case LGD_INVALID_SOLUTION_SIZE:
|
||||||
|
err_str = "Invalid solution size."; break;
|
||||||
|
case LGD_INVALID_MAX_ITERATIONS:
|
||||||
|
err_str = "Invalid flight times."; break;
|
||||||
|
case LGD_INVALID_EPSILON:
|
||||||
|
err_str = "Invalid epsilon value."; break;
|
||||||
|
case LGD_INVALID_STDV:
|
||||||
|
err_str = "Invalid STD value for generating the levy distribution."; break;
|
||||||
|
case LGD_INVALID_BETA:
|
||||||
|
err_str = "Invalid beta value."; break;
|
||||||
|
case LGD_INVALID_ALPHA:
|
||||||
|
err_str = "Invalid alpha value."; break;
|
||||||
|
case LGD_INVALID_SIGMA:
|
||||||
|
err_str = "Invalid sigma value."; break;
|
||||||
|
case LGD_NAN_VALUE:
|
||||||
|
err_str = "NaN values found."; break;
|
||||||
|
default:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err_throw && err_code < 0) throw err_str;
|
||||||
|
else ss << err_str;
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
ss << std::endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
ss << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
ss << "\033[0m" << std::endl;
|
||||||
|
else
|
||||||
|
ss << "\033[0m" << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::lgd_para gctl::lgd_solver::default_lgd_para()
|
||||||
|
{
|
||||||
|
lgd_para dp = lgd_defparam;
|
||||||
|
return dp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
std::ostream &ss, bool verbose, bool err_throw)
|
||||||
|
{
|
||||||
|
if (lgd_silent_)
|
||||||
|
{
|
||||||
|
lgd_return_code ret = lgd(best_m, mean_m, std_m);
|
||||||
|
if (ret < 0) lgd_error_str(ret, ss, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef GCTL_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
lgd_return_code ret = lgd(best_m, mean_m, std_m);
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
double costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
lgd_return_code ret = lgd(best_m, mean_m, std_m);
|
||||||
|
clock_t end = clock();
|
||||||
|
double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!err_throw) std::clog << std::endl << "Solver: LGD. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
if (verbose) lgd_error_str(ret, ss, err_throw);
|
||||||
|
else if (ret < 0) lgd_error_str(ret, ss, err_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
const array<double> &alphas, std::ostream &ss, bool verbose, bool err_throw)
|
||||||
|
{
|
||||||
|
lgd_ques_num_ = best_m.size();
|
||||||
|
if (lgd_ques_num_ != alphas.size())
|
||||||
|
{
|
||||||
|
throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match.");
|
||||||
|
}
|
||||||
|
|
||||||
|
lgd_alpha_.resize(lgd_ques_num_);
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
if (alphas[i] <= 0.0)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("[gctl::lgd_solver] Invalid scaling value.");
|
||||||
|
}
|
||||||
|
|
||||||
|
lgd_alpha_[i] = alphas[i];
|
||||||
|
}
|
||||||
|
lgd_has_alpha_ = true;
|
||||||
|
|
||||||
|
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
const array<double> &lows, const array<double> &higs, std::ostream &ss, bool verbose, bool err_throw)
|
||||||
|
{
|
||||||
|
lgd_ques_num_ = best_m.size();
|
||||||
|
if (lgd_ques_num_ != lows.size() || lgd_ques_num_ != higs.size())
|
||||||
|
{
|
||||||
|
throw std::runtime_error("[gctl::lgd_solver] arraies' size do not match.");
|
||||||
|
}
|
||||||
|
|
||||||
|
lgd_low_.resize(lgd_ques_num_);
|
||||||
|
lgd_hig_.resize(lgd_ques_num_);
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
if (lows[i] >= higs[i])
|
||||||
|
{
|
||||||
|
throw std::runtime_error("[gctl::lgd_solver] Invalid bound value.");
|
||||||
|
}
|
||||||
|
|
||||||
|
lgd_low_[i] = lows[i];
|
||||||
|
lgd_hig_[i] = higs[i];
|
||||||
|
}
|
||||||
|
lgd_has_range_ = true;
|
||||||
|
|
||||||
|
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::lgd_solver::LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
double low, double hig, std::ostream &ss, bool verbose, bool err_throw)
|
||||||
|
{
|
||||||
|
if (low >= hig)
|
||||||
|
{
|
||||||
|
throw std::runtime_error("[gctl::lgd_solver] Invalid bound value.");
|
||||||
|
}
|
||||||
|
|
||||||
|
lgd_ques_num_ = best_m.size();
|
||||||
|
lgd_low_.resize(lgd_ques_num_, low);
|
||||||
|
lgd_hig_.resize(lgd_ques_num_, hig);
|
||||||
|
lgd_has_range_ = true;
|
||||||
|
|
||||||
|
LGD_Minimize(best_m, mean_m, std_m, ss, verbose, err_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::lgd_return_code gctl::lgd_solver::lgd(array<double> &best_m, array<double> &mean_m, array<double> &std_m)
|
||||||
|
{
|
||||||
|
lgd_ques_num_ = best_m.size();
|
||||||
|
// check parameters
|
||||||
|
if (lgd_ques_num_ <= 0) return LGD_INVALID_SOLUTION_SIZE;
|
||||||
|
if (lgd_param_.flight_times <= 0) return LGD_INVALID_MAX_ITERATIONS;
|
||||||
|
if (lgd_param_.epsilon <= 0) return LGD_INVALID_EPSILON;
|
||||||
|
if (lgd_param_.stddev_v <= 0) return LGD_INVALID_STDV;
|
||||||
|
if (lgd_param_.beta <= 1.0 || lgd_param_.beta >= 2.0) return LGD_INVALID_BETA;
|
||||||
|
if (lgd_param_.alpha <= 0) return LGD_INVALID_ALPHA;
|
||||||
|
if (lgd_param_.sigma <= 0) return LGD_INVALID_SIGMA;
|
||||||
|
|
||||||
|
// initiate solutions
|
||||||
|
mean_m.resize(lgd_ques_num_, 0.0); std_m.resize(lgd_ques_num_, 0.0);
|
||||||
|
|
||||||
|
double gamma1 = tgamma(lgd_param_.beta + 1.0);
|
||||||
|
double gamma2 = tgamma(0.5*(lgd_param_.beta + 1.0));
|
||||||
|
double stddev_u = pow((gamma1*sin(0.5*GCTL_Pi*lgd_param_.beta)) / (gamma2*lgd_param_.beta*pow(2, 0.5*(lgd_param_.beta-1.0))), 1.0/lgd_param_.beta);
|
||||||
|
|
||||||
|
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
|
||||||
|
std::default_random_engine generator(seed);
|
||||||
|
std::normal_distribution<double> dist_u(0, stddev_u);
|
||||||
|
std::normal_distribution<double> dist_v(0, lgd_param_.stddev_v);
|
||||||
|
std::uniform_real_distribution<double> dist_s(1.0, 2.0);
|
||||||
|
|
||||||
|
array<double> g, g_mem, g_orth, new_mean, b_m, alphas;
|
||||||
|
g.resize(lgd_ques_num_); g_mem.resize(2*lgd_ques_num_); g_orth.resize(2*lgd_ques_num_);
|
||||||
|
new_mean.resize(lgd_ques_num_); b_m.resize(lgd_ques_num_); alphas.resize(lgd_ques_num_);
|
||||||
|
|
||||||
|
// 初始化参数变化范围为lgd_param_.alpha
|
||||||
|
vecset(alphas, lgd_param_.alpha);
|
||||||
|
|
||||||
|
if (lgd_has_range_)
|
||||||
|
{
|
||||||
|
vecdiff(alphas, lgd_hig_, lgd_low_);
|
||||||
|
vecscale(alphas, lgd_param_.alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lgd_has_alpha_)
|
||||||
|
{
|
||||||
|
veccpy(alphas, lgd_alpha_, lgd_param_.alpha);
|
||||||
|
}
|
||||||
|
|
||||||
|
double fx_best, fx_tmp, direct_mod, levy_length;
|
||||||
|
double fx_mean = NAN;
|
||||||
|
|
||||||
|
// 开始飞行
|
||||||
|
int rcd_times = 0;
|
||||||
|
lgd_trace_times_ = 0;
|
||||||
|
for (int ft = 0; ft <= lgd_param_.flight_times; ft++)
|
||||||
|
{
|
||||||
|
// 计算尝试解
|
||||||
|
fx_tmp = LGD_Evaluate(best_m, g);
|
||||||
|
if (ft == 0 || fx_tmp < fx_best)
|
||||||
|
{
|
||||||
|
fx_best = fx_tmp;
|
||||||
|
veccpy(b_m, best_m);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 记录飞行轨迹
|
||||||
|
if (lgd_param_.lambda <= 0.0 || (lgd_param_.lambda > 0.0 && fx_tmp <= lgd_param_.lambda))
|
||||||
|
{
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
std_m[i] = dynamic_stddev(std_m[i], rcd_times, mean_m[i], best_m[i], new_mean[i]);
|
||||||
|
mean_m[i] = new_mean[i];
|
||||||
|
}
|
||||||
|
rcd_times++;
|
||||||
|
|
||||||
|
if (lgd_save_trace_)
|
||||||
|
{
|
||||||
|
lgd_trace_.append_array(best_m);
|
||||||
|
lgd_trace_times_++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_))
|
||||||
|
{
|
||||||
|
// 将迭代结果返还给m
|
||||||
|
veccpy(best_m, b_m);
|
||||||
|
return LGD_STOP;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lgd_param_.batch > 0 && (rcd_times+1)%lgd_param_.batch == 0)
|
||||||
|
{
|
||||||
|
fx_mean = LGD_Evaluate(mean_m, g);
|
||||||
|
if (fx_mean < lgd_param_.epsilon)
|
||||||
|
{
|
||||||
|
LGD_Progress(ft, fx_tmp, fx_mean, fx_best, lgd_param_);
|
||||||
|
// 将迭代结果返还给m
|
||||||
|
veccpy(best_m, b_m);
|
||||||
|
return LGD_CONVERGENCE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 驻点检测
|
||||||
|
direct_mod = sqrt(vecdot(g, g));
|
||||||
|
if (direct_mod < lgd_param_.sigma)
|
||||||
|
{
|
||||||
|
if (ft == 0) // 初次飞行时无记录
|
||||||
|
{
|
||||||
|
do // 如果梯度消失 则采用一个随机方向
|
||||||
|
{
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
g[i] = dist_s(generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
direct_mod = sqrt(vecdot(g, g));
|
||||||
|
}
|
||||||
|
while (direct_mod < lgd_param_.sigma);
|
||||||
|
}
|
||||||
|
else // 如果梯度消失 则朝着上一次迭代方向的正交方向走一步
|
||||||
|
{
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
g_mem[i+lgd_ques_num_] = dist_s(generator);
|
||||||
|
}
|
||||||
|
|
||||||
|
schmidt_orthogonal(g_mem, g_orth, 2);
|
||||||
|
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
g[i] = g_orth[i+lgd_ques_num_];
|
||||||
|
}
|
||||||
|
direct_mod = 1.0; // 此时的模量为单位模量
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 莱维飞行的步长 注意原公式中无最外层绝对值符号
|
||||||
|
// 这是我们需要步长的绝对值 因此取绝对值
|
||||||
|
levy_length = fabs(dist_u(generator)/pow(fabs(dist_v(generator)), 1.0/lgd_param_.beta));
|
||||||
|
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
best_m[i] -= levy_length*alphas[i]*g[i]/direct_mod;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vecvalid(best_m))
|
||||||
|
{
|
||||||
|
return LGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 记录梯度方向
|
||||||
|
for (int i = 0; i < lgd_ques_num_; i++)
|
||||||
|
{
|
||||||
|
g_mem[i] = g[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// 这里可以添加取值范围的约束
|
||||||
|
if (lgd_has_range_)
|
||||||
|
{
|
||||||
|
vecbtm(best_m, lgd_low_);
|
||||||
|
vectop(best_m, lgd_hig_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将迭代结果返还给m
|
||||||
|
veccpy(best_m, b_m);
|
||||||
|
return LGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
212
lib/optimization/lgd.h
Normal file
212
lib/optimization/lgd.h
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_LGD_H
|
||||||
|
#define _GCTL_LGD_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/io.h"
|
||||||
|
#include "gctl/maths.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
|
||||||
|
#include "gctl_optimization_config.h"
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
#include "toml.hpp"
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
#include "windows.h"
|
||||||
|
#endif // _WINDOWS || __WIN32__
|
||||||
|
|
||||||
|
#ifdef GSTL_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif // GSTL_OPENMP
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief return value of the lgd_solver class.
|
||||||
|
*/
|
||||||
|
enum lgd_return_code
|
||||||
|
{
|
||||||
|
LGD_CONVERGENCE = 1, ///< The iteration reached convergence.
|
||||||
|
LGD_STOP, ///< The iteration stopped by the progress monitoring function.
|
||||||
|
LGD_REACHED_MAX_ITERATIONS, ///< Iteration reached max limit.
|
||||||
|
LGD_INVALID_SOLUTION_SIZE = -1024, ///< Invalid solution size.
|
||||||
|
LGD_INVALID_MAX_ITERATIONS, ///< The maximal iteration times is negative.
|
||||||
|
LGD_INVALID_EPSILON, ///< The epsilon is negative.
|
||||||
|
LGD_INVALID_STDV,
|
||||||
|
LGD_INVALID_BETA, ///< Invalid value for beta.
|
||||||
|
LGD_INVALID_ALPHA,
|
||||||
|
LGD_INVALID_SIGMA,
|
||||||
|
LGD_NAN_VALUE, ///< Nan value.
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parameters of the L-GD method.
|
||||||
|
*/
|
||||||
|
struct lgd_para
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Maximal times of the lévy flight. The iteration process will stop till the maximal
|
||||||
|
* flight times is reached unless the mean convergence test is set and satisfied. To
|
||||||
|
* active the test, set the 'batch' parameter which is shown as below. The default value
|
||||||
|
* is 1000.
|
||||||
|
*/
|
||||||
|
int flight_times;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Batch size for the mean convergence test. This parameter determines the batch size,
|
||||||
|
* in recorded solutions, to compute the value of the objective function. Note that
|
||||||
|
* only qualified solutions will be recorded for analyzing if the 'lambda' parameter is
|
||||||
|
* set. The library does not perform the mean convergence test if the value of this
|
||||||
|
* parameter is zero. The default is 0.
|
||||||
|
*/
|
||||||
|
int batch;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Epsilon for the mean convergence test. This parameter determines the accuracy
|
||||||
|
* with which the mean solution is to be found. The default is 1e-5.
|
||||||
|
*/
|
||||||
|
double epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Standard deviation of v that is used to calculate the distance of each
|
||||||
|
* lévy flight in length = stddev_u/|stddev_v|^{1/beta}. This parameter is
|
||||||
|
* typically given as 1.0.
|
||||||
|
*/
|
||||||
|
double stddev_v;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scale parameter for calculating stddev_u and the flying length. Must be at
|
||||||
|
* (1.0, 2.0). The default value is 1.5. The bigger beta is the smaller of the
|
||||||
|
* range of flying length gets.
|
||||||
|
*/
|
||||||
|
double beta;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scale parameter multiplied by the flying length. The default value is 0.01.
|
||||||
|
* The parameter should be set according to the expected convergence speed. Normally,
|
||||||
|
* The bigger alpha is, the faster the L-GD convergences. However, the L-GD may
|
||||||
|
* miss the optimized solutions if alpha was too big.
|
||||||
|
*/
|
||||||
|
double alpha;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sigma for the stagnation point test. The algorithm will take one search
|
||||||
|
* orthogonal with the last iteration if the module of the gradients is smaller
|
||||||
|
* than sigma. This mechanism helps the algorithm escaping from stagnation
|
||||||
|
* points such as local minimal or saddle points.The default is 1e-8.
|
||||||
|
*/
|
||||||
|
double sigma;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Threshold for recording the search paths. If the value is bigger then zero, then
|
||||||
|
* only values of the objective function that are smaller to equal to the threshold be
|
||||||
|
* used for statistic analyzing. Otherwise, all records will be used. The recorded paths
|
||||||
|
* could be save to file using the save_lgd_trace(string) function if set_lgd_record_trace()
|
||||||
|
* is set. The default is -1.0.
|
||||||
|
*/
|
||||||
|
double lambda;
|
||||||
|
};
|
||||||
|
|
||||||
|
class lgd_solver
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
lgd_solver(); ///< Default constructor.
|
||||||
|
virtual ~lgd_solver(); ///< Default de-constructor.
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Interface for the evaluation of the objective function. Concrete
|
||||||
|
* contents of this function is determined according to the optimizing problem.
|
||||||
|
*
|
||||||
|
* @param x Inputs of the current solution.
|
||||||
|
* @param g Outputs of the model gradient calculated using the input solution.
|
||||||
|
* @return Current objective value.
|
||||||
|
*/
|
||||||
|
virtual double LGD_Evaluate(const array<double> &x, array<double> &g) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Default monitoring function of the optimizing process.
|
||||||
|
*
|
||||||
|
* @param best_fx Objective value of the best solution.
|
||||||
|
* @param curr_fx Objective value of the current solution.
|
||||||
|
* @param mean_fx Objective value of the mean solution.
|
||||||
|
* @param param L-GD's parameters used for the optimzing process.
|
||||||
|
* @param curr_t Current flight times.
|
||||||
|
* @return The optimizing process will be stopped if a non-zero value is returned.
|
||||||
|
*/
|
||||||
|
virtual int LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx,
|
||||||
|
const double best_fx, const lgd_para ¶m);
|
||||||
|
|
||||||
|
void lgd_silent();
|
||||||
|
void set_lgd_report_interval(int inter);
|
||||||
|
void show_solver();
|
||||||
|
|
||||||
|
void set_lgd_record_trace(); ///< Turn on the recording of fight traces.
|
||||||
|
// Save fight traces to file. Not that only qualified solutions will be
|
||||||
|
// saved if the recording threshold is set.
|
||||||
|
void save_lgd_trace(std::string trace_file);
|
||||||
|
|
||||||
|
lgd_para default_lgd_para();
|
||||||
|
void set_lgd_para(const lgd_para ¶m);
|
||||||
|
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
void set_lgd_para(std::string filename);
|
||||||
|
void set_lgd_para(const toml::value &toml_data);
|
||||||
|
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false);
|
||||||
|
|
||||||
|
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
const array<double> &alphas, std::ostream &ss = std::clog,
|
||||||
|
bool verbose = true, bool err_throw = false);
|
||||||
|
|
||||||
|
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
const array<double> &lows, const array<double> &higs, std::ostream &ss = std::clog,
|
||||||
|
bool verbose = true, bool err_throw = false);
|
||||||
|
|
||||||
|
void LGD_Minimize(array<double> &best_m, array<double> &mean_m, array<double> &std_m,
|
||||||
|
double low, double hig, std::ostream &ss = std::clog, bool verbose = true,
|
||||||
|
bool err_throw = false);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void lgd_error_str(lgd_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
|
||||||
|
lgd_return_code lgd(array<double> &best_m, array<double> &mean_m, array<double> &std_m);
|
||||||
|
|
||||||
|
private:
|
||||||
|
lgd_para lgd_param_;
|
||||||
|
int lgd_inter_, lgd_ques_num_, lgd_trace_times_;
|
||||||
|
bool lgd_silent_, lgd_has_range_, lgd_has_alpha_, lgd_save_trace_;
|
||||||
|
array<double> lgd_low_, lgd_hig_, lgd_alpha_, lgd_trace_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_LGD_H
|
100
lib/optimization/loss_func.cpp
Normal file
100
lib/optimization/loss_func.cpp
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "loss_func.h"
|
||||||
|
|
||||||
|
gctl::loss_func::loss_func()
|
||||||
|
{
|
||||||
|
uncer_type_ = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::loss_func::loss_func(const array<double> &tar, norm_type_e n_type)
|
||||||
|
{
|
||||||
|
uncer_type_ = 0;
|
||||||
|
init(tar, n_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::loss_func::~loss_func(){}
|
||||||
|
|
||||||
|
void gctl::loss_func::init(const array<double> &tar, norm_type_e n_type)
|
||||||
|
{
|
||||||
|
tar_num_ = tar.size();
|
||||||
|
tars_ = tar;
|
||||||
|
norm_type_ = n_type;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::loss_func::set_uncertainty(double uncer)
|
||||||
|
{
|
||||||
|
uncer_type_ = 1;
|
||||||
|
uncer_ = uncer;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::loss_func::set_uncertainty(const array<double> &uncer)
|
||||||
|
{
|
||||||
|
uncer_type_ = 2;
|
||||||
|
uncers_ = uncer;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gctl::loss_func::get_loss()
|
||||||
|
{
|
||||||
|
double l = loss_;
|
||||||
|
loss_ = 0.0;
|
||||||
|
return l;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gctl::loss_func::evaluate(double inp, int id)
|
||||||
|
{
|
||||||
|
double val = (inp - tars_[id]);
|
||||||
|
if (uncer_type_ == 1) val /= uncer_;
|
||||||
|
else if (uncer_type_ == 2) val /= uncers_[id];
|
||||||
|
|
||||||
|
if (norm_type_ == L1) val = fabs(val);
|
||||||
|
if (norm_type_ == L2) val = val*val;
|
||||||
|
|
||||||
|
loss_ += val;
|
||||||
|
return val/tar_num_;
|
||||||
|
}
|
||||||
|
|
||||||
|
double gctl::loss_func::gradient(double inp, int id)
|
||||||
|
{
|
||||||
|
double c;
|
||||||
|
if (uncer_type_ == 1) c = uncer_;
|
||||||
|
else if (uncer_type_ == 2) c = uncers_[id];
|
||||||
|
|
||||||
|
double val = (inp - tars_[id]);
|
||||||
|
if (norm_type_ == L1 && val >= 0) val = 1.0;
|
||||||
|
if (norm_type_ == L1 && val < 0) val = -1.0;
|
||||||
|
if (norm_type_ == L2) val = 2.0*val;
|
||||||
|
|
||||||
|
if (norm_type_ == L1 && uncer_type_ != 0) val /= c;
|
||||||
|
else if (norm_type_ == L2 && uncer_type_ != 0) val /= (c*c);
|
||||||
|
|
||||||
|
return val/tar_num_;
|
||||||
|
}
|
61
lib/optimization/loss_func.h
Normal file
61
lib/optimization/loss_func.h
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2023 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_LOSS_FUNC_H
|
||||||
|
#define _GCTL_LOSS_FUNC_H
|
||||||
|
|
||||||
|
// library's head files
|
||||||
|
#include "gctl/core.h"
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
class loss_func
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
loss_func();
|
||||||
|
loss_func(const array<double> &tar, norm_type_e n_type);
|
||||||
|
virtual ~loss_func();
|
||||||
|
|
||||||
|
void init(const array<double> &tar, norm_type_e n_type);
|
||||||
|
void set_uncertainty(double uncer);
|
||||||
|
void set_uncertainty(const array<double> &uncer);
|
||||||
|
double get_loss();
|
||||||
|
double evaluate(double inp, int id);
|
||||||
|
double gradient(double inp, int id);
|
||||||
|
|
||||||
|
private:
|
||||||
|
//unsigned int counter_;
|
||||||
|
unsigned int tar_num_;
|
||||||
|
int uncer_type_;
|
||||||
|
double uncer_, loss_;
|
||||||
|
norm_type_e norm_type_;
|
||||||
|
array<double> tars_;
|
||||||
|
array<double> uncers_;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_LOSS_FUNC_H
|
152
lib/optimization/lu.cpp
Normal file
152
lib/optimization/lu.cpp
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "lu.h"
|
||||||
|
|
||||||
|
gctl::lu::lu(matrix<double> &sourceMatrix) : decomposedMatrix(sourceMatrix)
|
||||||
|
{
|
||||||
|
if (sourceMatrix.empty() || sourceMatrix.row_size() != sourceMatrix.col_size())
|
||||||
|
{
|
||||||
|
throw domain_error("Invalid input matrix. From lu::lu(...)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decomposition into triangular matrices
|
||||||
|
void gctl::lu::decompose()
|
||||||
|
{
|
||||||
|
// Initialize the permutation vector
|
||||||
|
int n = decomposedMatrix.row_size();
|
||||||
|
rowPermutation.resize(n);
|
||||||
|
for (int i = 0; i < n; i++)
|
||||||
|
{
|
||||||
|
rowPermutation[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// LU factorization
|
||||||
|
double tmp, det = 1.0;
|
||||||
|
for (int p = 1; p <= n - 1; p++)
|
||||||
|
{
|
||||||
|
// Find pivot element.
|
||||||
|
for (int i = p + 1; i <= n; i++)
|
||||||
|
{
|
||||||
|
if (std::fabs(decomposedMatrix[rowPermutation[i - 1]][p - 1]) > std::fabs(decomposedMatrix[rowPermutation[p - 1]][p - 1]))
|
||||||
|
{
|
||||||
|
// Switch the index for the p-1 pivot row if necessary.
|
||||||
|
tmp = rowPermutation[p - 1]; rowPermutation[p - 1] = rowPermutation[i - 1]; rowPermutation[i - 1] = tmp;
|
||||||
|
det = -det;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (decomposedMatrix[rowPermutation[p - 1]][p - 1] == 0.0)
|
||||||
|
{
|
||||||
|
// The matrix is singular, at least to precision of algorithm
|
||||||
|
throw runtime_error("The input matrix is singular. From gctl::lu::decompose()");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiply the diagonal elements.
|
||||||
|
det = det * decomposedMatrix[rowPermutation[p - 1]][p - 1];
|
||||||
|
|
||||||
|
// Form multiplier.
|
||||||
|
for (int i = p + 1; i <= n; i++)
|
||||||
|
{
|
||||||
|
decomposedMatrix[rowPermutation[i - 1]][p - 1] /= decomposedMatrix[rowPermutation[p - 1]][p - 1];
|
||||||
|
|
||||||
|
// Eliminate [p-1].
|
||||||
|
for (int j = p + 1; j <= n; j++)
|
||||||
|
{
|
||||||
|
decomposedMatrix[rowPermutation[i - 1]][j - 1] -= decomposedMatrix[rowPermutation[i - 1]][p - 1] * decomposedMatrix[rowPermutation[p - 1]][j - 1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
det = det * decomposedMatrix[rowPermutation[n - 1]][n - 1];
|
||||||
|
if (det == 0.0)
|
||||||
|
{
|
||||||
|
throw runtime_error("Determinant of the input matrix is zero. From gctl::lu::decompose()");
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// solve for x in form Ax = b. A is the original input matrix.
|
||||||
|
// Note: b is modified in-place for row permutations
|
||||||
|
void gctl::lu::solve(const array<double>& b, array<double> &x)
|
||||||
|
{
|
||||||
|
// Our decomposed matrix is comprised of both the lower and upper diagonal matrices.
|
||||||
|
|
||||||
|
// The rows of this matrix have been permutated during the decomposition process. The
|
||||||
|
// rowPermutation indicates the proper row order.
|
||||||
|
|
||||||
|
// The lower diagonal matrix only include elements below the diagonal with diagonal
|
||||||
|
// elements set to 1.
|
||||||
|
|
||||||
|
// The upper diagonal matrix is fully specified.
|
||||||
|
|
||||||
|
// First solve Ly = Pb for x using forward substitution. P is a permutated identity matrix.
|
||||||
|
|
||||||
|
if (b.empty())
|
||||||
|
{
|
||||||
|
throw domain_error("Invalid target vector. From gctl::lu::solve(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
x.resize(b.size());
|
||||||
|
|
||||||
|
for (int i = 0; i < x.size(); i++)
|
||||||
|
{
|
||||||
|
int currentRow = rowPermutation[i];
|
||||||
|
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (int j = 0; j < i; j++)
|
||||||
|
{
|
||||||
|
sum += (decomposedMatrix[currentRow][j] * x[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
x[i] = (b[currentRow] - sum);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now solve Uy = x for y using back substitution. Note that
|
||||||
|
// y can be solved in place using the existing y vector. No need
|
||||||
|
// to allocate another vector.
|
||||||
|
|
||||||
|
for (int i = b.size()-1; i >= 0; i--)
|
||||||
|
{
|
||||||
|
int currentRow = rowPermutation[i];
|
||||||
|
|
||||||
|
double sum = 0.0;
|
||||||
|
|
||||||
|
for (int j = b.size()-1; j > i; j--)
|
||||||
|
{
|
||||||
|
sum += (decomposedMatrix[currentRow][j] * x[j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
x[i] = (x[i] - sum) / decomposedMatrix[currentRow][i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
56
lib/optimization/lu.h
Normal file
56
lib/optimization/lu.h
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_LU_H
|
||||||
|
#define _GCTL_LU_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Lower/upper decomposition of matrix into a lower triangular matrix and a upper triangular matrix.
|
||||||
|
*
|
||||||
|
* @tparam T template type
|
||||||
|
*/
|
||||||
|
class lu
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
lu(matrix<double> &sourceMatrix); // Matrix is decomposed in-place
|
||||||
|
virtual ~lu(){}
|
||||||
|
void decompose(); ///< Decomposition into triangular matrices. Return false if failed
|
||||||
|
void solve(const array<double>& b, array<double> &x); ///< solve for x in form Ax = b. A is the original input matrix.
|
||||||
|
|
||||||
|
protected:
|
||||||
|
lu(const lu&) = delete;
|
||||||
|
void operator=(const lu&) = delete;
|
||||||
|
matrix<double> &decomposedMatrix; // Output matrix after decomposition
|
||||||
|
array<int> rowPermutation; // Permutation of rows during pivoting
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_LU_H
|
634
lib/optimization/sgd.cpp
Normal file
634
lib/optimization/sgd.cpp
Normal file
@ -0,0 +1,634 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "sgd.h"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default parameter for the SGD methods.
|
||||||
|
*/
|
||||||
|
static const gctl::sgd_para sgd_defparam = {0, 1e-6, 0.01, 0.01, 0.9, 0.999, 1e-8};
|
||||||
|
|
||||||
|
gctl::sgd_solver::sgd_solver()
|
||||||
|
{
|
||||||
|
sgd_param_ = sgd_defparam;
|
||||||
|
sgd_inter_ = 1;
|
||||||
|
sgd_silent_ = false;
|
||||||
|
solver_name_ = "Undefined";
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_solver::~sgd_solver(){}
|
||||||
|
|
||||||
|
int gctl::sgd_solver::SGD_Progress(double fx, const array<double> &x, const sgd_para ¶m, const int k)
|
||||||
|
{
|
||||||
|
if (sgd_silent_) return 0;
|
||||||
|
|
||||||
|
if (param.epsilon > 0.0 && fx <= param.epsilon)
|
||||||
|
{
|
||||||
|
std::clog << GCTL_CLEARLINE << "\rF(x) = " << fx << ", Train-Times = " << k;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sgd_inter_ > 0 && k%sgd_inter_ == 0)
|
||||||
|
{
|
||||||
|
std::clog << GCTL_CLEARLINE << "\rF(x) = " << fx << ", Train-Times = " << k;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::sgd_solver::sgd_silent()
|
||||||
|
{
|
||||||
|
sgd_silent_ = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::sgd_solver::set_sgd_report_interval(int inter)
|
||||||
|
{
|
||||||
|
sgd_inter_ = inter;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::sgd_solver::set_sgd_para(const sgd_para &in_param)
|
||||||
|
{
|
||||||
|
sgd_param_ = in_param;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::sgd_solver::set_sgd_para(const toml::value &toml_data)
|
||||||
|
{
|
||||||
|
sgd_param_ = sgd_defparam;
|
||||||
|
|
||||||
|
std::string SGD = "sgd";
|
||||||
|
if (toml_data.contains(SGD))
|
||||||
|
{
|
||||||
|
if (toml_data.at(SGD).contains("iteration")) sgd_param_.iteration = toml::find<int>(toml_data, SGD, "iteration");
|
||||||
|
if (toml_data.at(SGD).contains("epsilon")) sgd_param_.epsilon = toml::find<double>(toml_data, SGD, "epsilon");
|
||||||
|
if (toml_data.at(SGD).contains("mu")) sgd_param_.mu = toml::find<double>(toml_data, SGD, "mu");
|
||||||
|
if (toml_data.at(SGD).contains("alpha")) sgd_param_.alpha = toml::find<double>(toml_data, SGD, "alpha");
|
||||||
|
if (toml_data.at(SGD).contains("beta_1")) sgd_param_.beta_1 = toml::find<double>(toml_data, SGD, "beta_1");
|
||||||
|
if (toml_data.at(SGD).contains("beta_2")) sgd_param_.beta_2 = toml::find<double>(toml_data, SGD, "beta_2");
|
||||||
|
if (toml_data.at(SGD).contains("sigma")) sgd_param_.sigma = toml::find<double>(toml_data, SGD, "sigma");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::sgd_solver::show_solver()
|
||||||
|
{
|
||||||
|
std::clog << "Solver's Setup Panel\n";
|
||||||
|
std::clog << "-----------------------------\n";
|
||||||
|
std::clog << "Solver: " << solver_name_ << "\n";
|
||||||
|
std::clog << "Iteration = " << sgd_param_.iteration << ", Epsilon = " << sgd_param_.epsilon << ", Mu = " << sgd_param_.mu << "\n";
|
||||||
|
std::clog << "Alpha = " << sgd_param_.alpha << ", Beta1 = " << sgd_param_.beta_1 << ", Beta2 = " << sgd_param_.beta_2 << ", Sigma = " << sgd_param_.sigma << "\n";
|
||||||
|
std::clog << "=============================\n";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::sgd_solver::sgd_error_str(sgd_return_code err_code, std::ostream &ss, bool err_throw)
|
||||||
|
{
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_GREEN);
|
||||||
|
ss << "Success! ";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), FOREGROUND_INTENSITY | FOREGROUND_RED);
|
||||||
|
ss << "Fail! ";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
ss << "\033[1m\033[32mSGD Success! ";
|
||||||
|
else
|
||||||
|
ss << "\033[1m\033[31mSGD Fail! ";
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
std::string err_str;
|
||||||
|
switch (err_code)
|
||||||
|
{
|
||||||
|
case SGD_SUCCESS:
|
||||||
|
err_str = "Success."; break;
|
||||||
|
case SGD_CONVERGENCE:
|
||||||
|
err_str = "The iteration reached convergence."; break;
|
||||||
|
case SGD_STOP:
|
||||||
|
err_str = "The iteration stopped by the progress evaluation function."; break;
|
||||||
|
case SGD_UNKNOWN_ERROR:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
case SGD_INVALID_VARIABLE_SIZE:
|
||||||
|
err_str = "Invalid array size."; break;
|
||||||
|
case SGD_REACHED_MAX_ITERATIONS:
|
||||||
|
err_str = "The maximal iteration is reached."; break;
|
||||||
|
case SGD_INVALID_EPSILON:
|
||||||
|
err_str = "Invalid value for epsilon."; break;
|
||||||
|
case SGD_INVALID_BETA:
|
||||||
|
err_str = "Invalid value for beta."; break;
|
||||||
|
case SGD_INVALID_MU:
|
||||||
|
err_str = "Invalid value for mu."; break;
|
||||||
|
case SGD_INVALID_ALPHA:
|
||||||
|
err_str = "Invalid value for alpha."; break;
|
||||||
|
case SGD_INVALID_SIGMA:
|
||||||
|
err_str = "Invalid value for sigma."; break;
|
||||||
|
case SGD_NAN_VALUE:
|
||||||
|
err_str = "NaN values found."; break;
|
||||||
|
default:
|
||||||
|
err_str = "Unknown error."; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err_throw && err_code < 0) throw err_str;
|
||||||
|
else ss << err_str;
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
ss << std::endl;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SetConsoleTextAttribute(GetStdHandle(STD_ERROR_HANDLE), 7);
|
||||||
|
ss << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
if (err_code >= 0)
|
||||||
|
ss << "\033[0m" << std::endl;
|
||||||
|
else
|
||||||
|
ss << "\033[0m" << std::endl;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_para gctl::sgd_solver::default_sgd_para()
|
||||||
|
{
|
||||||
|
sgd_para dp = sgd_defparam;
|
||||||
|
return dp;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::sgd_solver::SGD_Minimize(array<double> &m, sgd_solver_type solver_id, std::ostream &ss, bool verbose, bool err_throw)
|
||||||
|
{
|
||||||
|
if (sgd_silent_)
|
||||||
|
{
|
||||||
|
sgd_return_code ret;
|
||||||
|
if (solver_id == MOMENTUM) {solver_name_ = "MOMENTUM"; ret = momentum(m);}
|
||||||
|
else if (solver_id == NAG) {solver_name_ = "NAG"; ret = nag(m);}
|
||||||
|
else if (solver_id == ADAGRAD) {solver_name_ = "ADAGRAD"; ret = adagrad(m);}
|
||||||
|
else if (solver_id == RMSPROP) {solver_name_ = "RMSPROP"; ret = rmsprop(m);}
|
||||||
|
else if (solver_id == ADAM) {solver_name_ = "ADAM"; ret = adam(m);}
|
||||||
|
else if (solver_id == NADAM) {solver_name_ = "NADAM"; ret = nadam(m);}
|
||||||
|
else if (solver_id == ADAMAX) {solver_name_ = "ADAMAX"; ret = adamax(m);}
|
||||||
|
else if (solver_id == ADABELIEF) {solver_name_ = "ADABELIEF"; ret = adabelief(m);}
|
||||||
|
else throw std::invalid_argument("Invalid solver type. gstl::sgd_solver::SGD_Minimize(...)");
|
||||||
|
|
||||||
|
if (ret < 0) sgd_error_str(ret, ss, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 使用lcg求解 注意当我们使用函数指针来调用求解函数时默认参数不可以省略
|
||||||
|
#ifdef GCTL_OPENMP
|
||||||
|
double start = omp_get_wtime();
|
||||||
|
sgd_return_code ret;
|
||||||
|
if (solver_id == MOMENTUM) {solver_name_ = "MOMENTUM"; ret = momentum(m);}
|
||||||
|
else if (solver_id == NAG) {solver_name_ = "NAG"; ret = nag(m);}
|
||||||
|
else if (solver_id == ADAGRAD) {solver_name_ = "ADAGRAD"; ret = adagrad(m);}
|
||||||
|
else if (solver_id == RMSPROP) {solver_name_ = "RMSPROP"; ret = rmsprop(m);}
|
||||||
|
else if (solver_id == ADAM) {solver_name_ = "ADAM"; ret = adam(m);}
|
||||||
|
else if (solver_id == NADAM) {solver_name_ = "NADAM"; ret = nadam(m);}
|
||||||
|
else if (solver_id == ADAMAX) {solver_name_ = "ADAMAX"; ret = adamax(m);}
|
||||||
|
else if (solver_id == ADABELIEF) {solver_name_ = "ADABELIEF"; ret = adabelief(m);}
|
||||||
|
else throw std::invalid_argument("Invalid solver type. gstl::sgd_solver::SGD_Minimize(...)");
|
||||||
|
double end = omp_get_wtime();
|
||||||
|
|
||||||
|
double costime = 1000*(end-start);
|
||||||
|
#else
|
||||||
|
clock_t start = clock();
|
||||||
|
sgd_return_code ret;
|
||||||
|
if (solver_id == MOMENTUM) {solver_name_ = "MOMENTUM"; ret = momentum(m);}
|
||||||
|
else if (solver_id == NAG) {solver_name_ = "NAG"; ret = nag(m);}
|
||||||
|
else if (solver_id == ADAGRAD) {solver_name_ = "ADAGRAD"; ret = adagrad(m);}
|
||||||
|
else if (solver_id == RMSPROP) {solver_name_ = "RMSPROP"; ret = rmsprop(m);}
|
||||||
|
else if (solver_id == ADAM) {solver_name_ = "ADAM"; ret = adam(m);}
|
||||||
|
else if (solver_id == NADAM) {solver_name_ = "NADAM"; ret = nadam(m);}
|
||||||
|
else if (solver_id == ADAMAX) {solver_name_ = "ADAMAX"; ret = adamax(m);}
|
||||||
|
else if (solver_id == ADABELIEF) {solver_name_ = "ADABELIEF"; ret = adabelief(m);}
|
||||||
|
else throw std::invalid_argument("Invalid solver type. gstl::sgd_solver::SGD_Minimize(...)");
|
||||||
|
clock_t end = clock();
|
||||||
|
|
||||||
|
double costime = 1000*(end-start)/(double)CLOCKS_PER_SEC;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!err_throw)
|
||||||
|
{
|
||||||
|
std::clog << std::endl;
|
||||||
|
switch (solver_id)
|
||||||
|
{
|
||||||
|
case MOMENTUM:
|
||||||
|
std::clog << "Solver: MOMENTUM. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case NAG:
|
||||||
|
std::clog << "Solver: NAG. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case ADAGRAD:
|
||||||
|
std::clog << "Solver: ADAGRAD. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case RMSPROP:
|
||||||
|
std::clog << "Solver: RMSPROP. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case ADAM:
|
||||||
|
std::clog << "Solver: ADAM. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case NADAM:
|
||||||
|
std::clog << "Solver: NADAM. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case ADAMAX:
|
||||||
|
std::clog << "Solver: ADAMAX. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
case ADABELIEF:
|
||||||
|
std::clog << "Solver: ADABELIEF. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
std::clog << "Solver: Unknown. Time cost: " << costime << " ms" << std::endl;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (verbose) sgd_error_str(ret, ss, err_throw);
|
||||||
|
else if (ret < 0) sgd_error_str(ret, ss, err_throw);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::momentum(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.mu < 0 || sgd_param_.mu >= 1.0) return SGD_INVALID_MU;
|
||||||
|
|
||||||
|
array<double> mk(n_size, 0.0);
|
||||||
|
array<double> g(n_size);
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
fx = SGD_Evaluate(m, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
mk[i] = sgd_param_.mu*mk[i] + g[i];
|
||||||
|
|
||||||
|
m[i] = m[i] - sgd_param_.alpha * mk[i];
|
||||||
|
if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::nag(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.mu < 0 || sgd_param_.mu >= 1.0) return SGD_INVALID_MU;
|
||||||
|
|
||||||
|
array<double> mk(n_size, 0.0);
|
||||||
|
array<double> xk(n_size);
|
||||||
|
array<double> g (n_size);
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
xk[i] = m[i] - sgd_param_.mu*sgd_param_.alpha*mk[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
fx = SGD_Evaluate(xk, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
mk[i] = sgd_param_.mu*mk[i] + g[i];
|
||||||
|
|
||||||
|
m[i] = m[i] - sgd_param_.alpha * mk[i];
|
||||||
|
if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::adagrad(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0.0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||||
|
|
||||||
|
array<double> mk(n_size, 0.0);
|
||||||
|
array<double> g (n_size);
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
fx = SGD_Evaluate(m, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
mk[i] = mk[i] + g[i]*g[i];
|
||||||
|
|
||||||
|
m[i] = m[i] - sgd_param_.alpha * g[i]/(sqrt(mk[i]) + sgd_param_.sigma);
|
||||||
|
if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::rmsprop(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0.0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||||
|
|
||||||
|
array<double> vk(n_size, 0.0);
|
||||||
|
array<double> g (n_size);
|
||||||
|
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
fx = SGD_Evaluate(m, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
vk[i] = sgd_param_.beta_2 * vk[i] + (1.0 - sgd_param_.beta_2)*g[i]*g[i];
|
||||||
|
|
||||||
|
m[i] = m[i] - sgd_param_.alpha * g[i]/(sqrt(vk[i]) + sgd_param_.sigma);
|
||||||
|
if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::adam(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
|
||||||
|
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||||
|
|
||||||
|
array<double> mk(n_size, 0.0);
|
||||||
|
array<double> vk(n_size, 0.0);
|
||||||
|
array<double> g (n_size);
|
||||||
|
|
||||||
|
double beta_1t = 1.0, beta_2t = 1.0;
|
||||||
|
double alpha_k;
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
fx = SGD_Evaluate(m, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
beta_1t *= sgd_param_.beta_1;
|
||||||
|
beta_2t *= sgd_param_.beta_2;
|
||||||
|
alpha_k = sgd_param_.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);
|
||||||
|
|
||||||
|
int i;
|
||||||
|
#pragma omp parallel for private (i) schedule(guided)
|
||||||
|
for (i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
|
||||||
|
vk[i] = sgd_param_.beta_2*vk[i] + (1.0 - sgd_param_.beta_2)*g[i]*g[i];
|
||||||
|
|
||||||
|
m[i] = m[i] - alpha_k * mk[i]/(sqrt(vk[i]) + sgd_param_.sigma);
|
||||||
|
//if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::nadam(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
|
||||||
|
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||||
|
|
||||||
|
array<double> mk(n_size, 0.0);
|
||||||
|
array<double> mk_hat(n_size);
|
||||||
|
array<double> nk(n_size, 0.0);
|
||||||
|
array<double> nk_hat(n_size);
|
||||||
|
array<double> g (n_size);
|
||||||
|
array<double> g_hat(n_size);
|
||||||
|
|
||||||
|
double beta_1t = 1.0, beta_1t1 = sgd_param_.beta_1, beta_2t = 1.0;
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
fx = SGD_Evaluate(m, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
beta_1t *= sgd_param_.beta_1;
|
||||||
|
beta_1t1 *= sgd_param_.beta_1;
|
||||||
|
beta_2t *= sgd_param_.beta_2;
|
||||||
|
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
g_hat[i] = g[i]/(1.0 - beta_1t);
|
||||||
|
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
|
||||||
|
nk[i] = sgd_param_.beta_2*nk[i] + (1.0 - sgd_param_.beta_2)*g[i]*g[i];
|
||||||
|
|
||||||
|
mk_hat[i] = mk[i]/(1.0 - beta_1t1);
|
||||||
|
nk_hat[i] = nk[i]/(1.0 - beta_2t);
|
||||||
|
|
||||||
|
m[i] = m[i] - sgd_param_.alpha * ((1.0 - beta_1t)*g_hat[i]
|
||||||
|
+ beta_1t1*mk_hat[i])/(sqrt(nk_hat[i]) + sgd_param_.sigma);
|
||||||
|
if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::adamax(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
|
||||||
|
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||||
|
|
||||||
|
array<double> mk(n_size, 0.0);
|
||||||
|
array<double> vk(n_size, 0.0);
|
||||||
|
array<double> g (n_size);
|
||||||
|
|
||||||
|
double beta_1t = 1.0;
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
fx = SGD_Evaluate(m, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
beta_1t *= sgd_param_.beta_1;
|
||||||
|
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
|
||||||
|
vk[i] = std::max(sgd_param_.beta_2*vk[i], std::fabs(g[i]));
|
||||||
|
|
||||||
|
m[i] = m[i] - sgd_param_.alpha * mk[i]/((1.0 - beta_1t)*vk[i]);
|
||||||
|
if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::sgd_return_code gctl::sgd_solver::adabelief(array<double> &m)
|
||||||
|
{
|
||||||
|
int n_size = m.size();
|
||||||
|
//check parameters
|
||||||
|
if (n_size <= 0) return SGD_INVALID_VARIABLE_SIZE;
|
||||||
|
if (sgd_param_.epsilon < 0) return SGD_INVALID_EPSILON;
|
||||||
|
if (sgd_param_.alpha < 0) return SGD_INVALID_ALPHA;
|
||||||
|
if (sgd_param_.beta_1 < 0.0 || sgd_param_.beta_1 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.beta_2 < 0.0 || sgd_param_.beta_2 >= 1.0) return SGD_INVALID_BETA;
|
||||||
|
if (sgd_param_.sigma < 0.0) return SGD_INVALID_SIGMA;
|
||||||
|
|
||||||
|
array<double> mk(n_size, 0.0);
|
||||||
|
array<double> vk(n_size, 0.0);
|
||||||
|
array<double> g (n_size);
|
||||||
|
|
||||||
|
double beta_1t = 1.0, beta_2t = 1.0;
|
||||||
|
double alpha_k;
|
||||||
|
|
||||||
|
int t = 0;
|
||||||
|
double fx;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
fx = SGD_Evaluate(m, g);
|
||||||
|
|
||||||
|
if (SGD_Progress(fx, m, sgd_param_, t)) return SGD_STOP;
|
||||||
|
if (fx < sgd_param_.epsilon) return SGD_CONVERGENCE;
|
||||||
|
|
||||||
|
beta_1t *= sgd_param_.beta_1;
|
||||||
|
beta_2t *= sgd_param_.beta_2;
|
||||||
|
|
||||||
|
alpha_k = sgd_param_.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);
|
||||||
|
|
||||||
|
for (int i = 0; i < n_size; i++)
|
||||||
|
{
|
||||||
|
mk[i] = sgd_param_.beta_1*mk[i] + (1.0 - sgd_param_.beta_1)*g[i];
|
||||||
|
vk[i] = sgd_param_.beta_2*vk[i] + (1.0 - sgd_param_.beta_2)*(g[i] - mk[i])*(g[i] - mk[i]);
|
||||||
|
|
||||||
|
m[i] = m[i] - alpha_k * mk[i]/(sqrt(vk[i]) + sgd_param_.sigma);
|
||||||
|
if (m[i] != m[i]) return SGD_NAN_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
t++;
|
||||||
|
if (sgd_param_.iteration > 0 && t >= sgd_param_.iteration) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SGD_REACHED_MAX_ITERATIONS;
|
||||||
|
}
|
201
lib/optimization/sgd.h
Normal file
201
lib/optimization/sgd.h
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_SGD_H
|
||||||
|
#define _GCTL_SGD_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
|
||||||
|
#include "gctl_optimization_config.h"
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
#include "toml.hpp"
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
#if defined _WINDOWS || __WIN32__
|
||||||
|
#include "windows.h"
|
||||||
|
#endif // _WINDOWS || __WIN32__
|
||||||
|
|
||||||
|
#ifdef GSTL_OPENMP
|
||||||
|
#include "omp.h"
|
||||||
|
#endif // GSTL_OPENMP
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief Types of method that could be recognized by the sgd_solver() function.
|
||||||
|
*/
|
||||||
|
enum sgd_solver_type
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Classic momentum.
|
||||||
|
*/
|
||||||
|
MOMENTUM,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nesterov’s accelerated gradient (NAG)
|
||||||
|
*/
|
||||||
|
NAG,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AdaGrad method.
|
||||||
|
*/
|
||||||
|
ADAGRAD,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* RMSProp method.
|
||||||
|
*/
|
||||||
|
RMSPROP,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adam method.
|
||||||
|
*/
|
||||||
|
ADAM,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Nadam method.
|
||||||
|
*/
|
||||||
|
NADAM,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AdaMax method.
|
||||||
|
*/
|
||||||
|
ADAMAX,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* AdaBelief method.
|
||||||
|
*/
|
||||||
|
ADABELIEF,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief return value of the sgd_solver() function.
|
||||||
|
*/
|
||||||
|
enum sgd_return_code
|
||||||
|
{
|
||||||
|
SGD_SUCCESS = 0, ///< The optimization terminated successfully.
|
||||||
|
SGD_CONVERGENCE = 1, ///< The optimization reached convergence.
|
||||||
|
SGD_STOP, ///< The process stopped by the monitoring function.
|
||||||
|
SGD_UNKNOWN_ERROR = -1024, ///< Unknown error.
|
||||||
|
SGD_INVALID_VARIABLE_SIZE, ///< The variable size is negative
|
||||||
|
SGD_INVALID_EPSILON, ///< The epsilon is negative.
|
||||||
|
SGD_REACHED_MAX_ITERATIONS, ///< Iteration reached max limit.
|
||||||
|
SGD_INVALID_MU, ///< Invalid value for mu.
|
||||||
|
SGD_INVALID_ALPHA, ///< Invalid value for alpha.
|
||||||
|
SGD_INVALID_BETA, ///< Invalid value for beta.
|
||||||
|
SGD_INVALID_SIGMA, ///< Invalid value for sigma.
|
||||||
|
SGD_NAN_VALUE, ///< Nan value.
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Parameters of the SGD methods.
|
||||||
|
*/
|
||||||
|
struct sgd_para
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Maximal iteration times. The iteration won't stop unless the convergence
|
||||||
|
* is reached if this parameter is equal to or smaller than zero. The default
|
||||||
|
* is 0.
|
||||||
|
*/
|
||||||
|
int iteration;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Epsilon for convergence test. This parameter determines the accuracy
|
||||||
|
* with which the solution is to be found. Must be bigger than zero and
|
||||||
|
* the default is 1e-6.
|
||||||
|
*/
|
||||||
|
double epsilon;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Damping rate of the classic momentum method and the NAG method, which
|
||||||
|
* is typically given between 0 and 1. The default is 0.01.
|
||||||
|
*/
|
||||||
|
double mu;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Step size of the iteration. The default value is 0.01 for Adam and AdaMax.
|
||||||
|
*/
|
||||||
|
double alpha;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exponential decay rates for the first order moment estimates. The range of this
|
||||||
|
* parameter is [0, 1) and the default value is 0.9.
|
||||||
|
*/
|
||||||
|
double beta_1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exponential decay rates for the second order moment estimates. The range of this
|
||||||
|
* parameter is [0, 1) and the default value is 0.999.
|
||||||
|
*/
|
||||||
|
double beta_2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A small positive number validates the algorithm. The default value is 1e-8.
|
||||||
|
*/
|
||||||
|
double sigma;
|
||||||
|
};
|
||||||
|
|
||||||
|
class sgd_solver
|
||||||
|
{
|
||||||
|
private:
|
||||||
|
sgd_para sgd_param_;
|
||||||
|
int sgd_inter_;
|
||||||
|
bool sgd_silent_;
|
||||||
|
std::string solver_name_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
sgd_solver();
|
||||||
|
virtual ~sgd_solver();
|
||||||
|
|
||||||
|
virtual double SGD_Evaluate(const array<double> &x, array<double> &g) = 0;
|
||||||
|
virtual int SGD_Progress(double fx, const array<double> &x, const sgd_para ¶m, const int k);
|
||||||
|
|
||||||
|
void sgd_silent();
|
||||||
|
void set_sgd_report_interval(int inter);
|
||||||
|
void set_sgd_para(const sgd_para ¶m);
|
||||||
|
void show_solver();
|
||||||
|
void sgd_error_str(sgd_return_code err_code, std::ostream &ss = std::clog, bool err_throw = false);
|
||||||
|
sgd_para default_sgd_para();
|
||||||
|
|
||||||
|
#ifdef GCTL_OPTIMIZATION_TOML
|
||||||
|
void set_sgd_para(const toml::value &toml_data);
|
||||||
|
#endif // GCTL_OPTIMIZATION_TOML
|
||||||
|
|
||||||
|
sgd_return_code momentum(array<double> &m);
|
||||||
|
sgd_return_code nag(array<double> &m);
|
||||||
|
sgd_return_code adagrad(array<double> &m);
|
||||||
|
sgd_return_code rmsprop(array<double> &m);
|
||||||
|
sgd_return_code adam(array<double> &m);
|
||||||
|
sgd_return_code nadam(array<double> &m);
|
||||||
|
sgd_return_code adamax(array<double> &m);
|
||||||
|
sgd_return_code adabelief(array<double> &m);
|
||||||
|
|
||||||
|
void SGD_Minimize(array<double> &m, sgd_solver_type solver_id = ADAM, std::ostream &ss = std::clog, bool verbose = true, bool err_throw = false);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_SGD_H
|
184
lib/optimization/svd.cpp
Normal file
184
lib/optimization/svd.cpp
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#include "svd.h"
|
||||||
|
|
||||||
|
gctl::svd::svd()
|
||||||
|
{
|
||||||
|
reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
gctl::svd::svd(const matrix<double> &src_mat) : svd()
|
||||||
|
{
|
||||||
|
decompose(src_mat);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::svd::reset()
|
||||||
|
{
|
||||||
|
maxi_iteration = 1000;
|
||||||
|
K = 0;
|
||||||
|
epsilon = 1e-8;
|
||||||
|
U.clear();
|
||||||
|
V.clear();
|
||||||
|
S.clear();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::svd::set_singular_number(int k)
|
||||||
|
{
|
||||||
|
if (k <= 0)
|
||||||
|
{
|
||||||
|
throw invalid_argument("Invalid singular number. From gctl::svd::set_singular_number(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
K = k;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::svd::set_iteration(int t)
|
||||||
|
{
|
||||||
|
if (t <= 0)
|
||||||
|
{
|
||||||
|
throw invalid_argument("Invalid singular number. From gctl::svd::set_iteration(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
maxi_iteration = t;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::svd::set_epsilon(double e)
|
||||||
|
{
|
||||||
|
if (e <= 0)
|
||||||
|
{
|
||||||
|
throw invalid_argument("Invalid singular number. From gctl::svd::set_epsilon(...)");
|
||||||
|
}
|
||||||
|
|
||||||
|
epsilon = e;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void gctl::svd::decompose(const matrix<double> &src_mat)
|
||||||
|
{
|
||||||
|
int M = src_mat.row_size();
|
||||||
|
int N = src_mat.col_size();
|
||||||
|
if (K == 0) K = N;
|
||||||
|
|
||||||
|
S.resize(K, 0.0);
|
||||||
|
U.resize(K, M, 0.0);
|
||||||
|
V.resize(K, N, 0.0);
|
||||||
|
|
||||||
|
srand(time(0));
|
||||||
|
array<double> left_vector(M), next_left_vector(M);
|
||||||
|
array<double> right_vector(N), next_right_vector(N);
|
||||||
|
array<double> U_tmp(M), V_tmp(N);
|
||||||
|
|
||||||
|
double diff, r, d;
|
||||||
|
for(int col=0;col<K;col++)
|
||||||
|
{
|
||||||
|
diff = 1;
|
||||||
|
r = -1;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
for(int i=0;i<M;i++)
|
||||||
|
left_vector[i]= (double) rand() / RAND_MAX;
|
||||||
|
if(normalize(left_vector, epsilon) > epsilon)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int iter=0; diff >= epsilon && iter < maxi_iteration; iter++)
|
||||||
|
{
|
||||||
|
next_left_vector.assign_all(0.0);
|
||||||
|
next_right_vector.assign_all(0.0);
|
||||||
|
|
||||||
|
for(int i=0;i<M;i++)
|
||||||
|
for(int j=0;j<N;j++)
|
||||||
|
next_right_vector[j]+=left_vector[i]*src_mat[i][j];
|
||||||
|
|
||||||
|
r=normalize(next_right_vector, epsilon);
|
||||||
|
if(r<epsilon) break;
|
||||||
|
|
||||||
|
for(int i=0;i<col;i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < N; j++)
|
||||||
|
{
|
||||||
|
V_tmp[j] = V[i][j];
|
||||||
|
}
|
||||||
|
orth(V_tmp, next_right_vector);
|
||||||
|
}
|
||||||
|
normalize(next_right_vector, epsilon);
|
||||||
|
|
||||||
|
for(int i=0;i<M;i++)
|
||||||
|
for(int j=0;j<N;j++)
|
||||||
|
next_left_vector[i]+=next_right_vector[j]*src_mat[i][j];
|
||||||
|
r=normalize(next_left_vector, epsilon);
|
||||||
|
if(r<epsilon) break;
|
||||||
|
|
||||||
|
for(int i=0;i<col;i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < M; j++)
|
||||||
|
{
|
||||||
|
U_tmp[j] = U[i][j];
|
||||||
|
}
|
||||||
|
orth(U_tmp, next_left_vector);
|
||||||
|
}
|
||||||
|
normalize(next_left_vector, epsilon);
|
||||||
|
|
||||||
|
diff=0;
|
||||||
|
for(int i=0;i<M;i++)
|
||||||
|
{
|
||||||
|
d=next_left_vector[i]-left_vector[i];
|
||||||
|
diff+=d*d;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
left_vector[i] = next_left_vector[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
right_vector[i] = next_right_vector[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(r>=epsilon)
|
||||||
|
{
|
||||||
|
S[col]=r;
|
||||||
|
for (int i = 0; i < M; i++)
|
||||||
|
{
|
||||||
|
U[col][i] = left_vector[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < N; i++)
|
||||||
|
{
|
||||||
|
V[col][i] = right_vector[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else break;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
71
lib/optimization/svd.h
Normal file
71
lib/optimization/svd.h
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
/********************************************************
|
||||||
|
* ██████╗ ██████╗████████╗██╗
|
||||||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||||||
|
* ██║ ███╗██║ ██║ ██║
|
||||||
|
* ██║ ██║██║ ██║ ██║
|
||||||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||||||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||||||
|
* Geophysical Computational Tools & Library (GCTL)
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||||||
|
*
|
||||||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||||||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation, either version 2
|
||||||
|
* of the License, or (at your option) any later version. You should have
|
||||||
|
* received a copy of the GNU Lesser General Public License along with this
|
||||||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||||||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||||||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||||||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||||||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||||||
|
* to include some description of your company and the realm of its activities.
|
||||||
|
* Also add information on how to contact you by electronic and paper mail.
|
||||||
|
******************************************************/
|
||||||
|
|
||||||
|
#ifndef _GCTL_SVD_H
|
||||||
|
#define _GCTL_SVD_H
|
||||||
|
|
||||||
|
#include "gctl/core.h"
|
||||||
|
#include "gctl/algorithm.h"
|
||||||
|
|
||||||
|
namespace gctl
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* @brief SVD奇异值分解
|
||||||
|
*
|
||||||
|
* A = U^T \cdot S \cdot V
|
||||||
|
* A 大小为M*N输入的二维数组
|
||||||
|
* K 待计算的奇异值与奇异向量的数量
|
||||||
|
* U 分解后的U矩阵,大小为K*M
|
||||||
|
* S 分解后的对角S矩阵,以向量的形式表示,即前k个特征值,大小1*K
|
||||||
|
* V 分解后的V矩阵,大小为K*N
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
class svd
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
matrix<double> U, V;
|
||||||
|
array<double> S;
|
||||||
|
|
||||||
|
svd();
|
||||||
|
svd(const matrix<double> &src_mat);
|
||||||
|
virtual ~svd(){}
|
||||||
|
|
||||||
|
void reset();
|
||||||
|
void set_singular_number(int k);
|
||||||
|
void set_iteration(int t);
|
||||||
|
void set_epsilon(double e);
|
||||||
|
int get_singular_number(){return K;}
|
||||||
|
|
||||||
|
void decompose(const matrix<double> &src_mat);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
int maxi_iteration, K;
|
||||||
|
double epsilon;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // _GCTL_SVD_H
|
Loading…
Reference in New Issue
Block a user