542 lines
17 KiB
C++
542 lines
17 KiB
C++
/********************************************************
|
|
* ██████╗ ██████╗████████╗██╗
|
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
|
* ██║ ███╗██║ ██║ ██║
|
|
* ██║ ██║██║ ██║ ██║
|
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
|
* Geophysical Computational Tools & Library (GCTL)
|
|
*
|
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
|
*
|
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation, either version 2
|
|
* of the License, or (at your option) any later version. You should have
|
|
* received a copy of the GNU Lesser General Public License along with this
|
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
|
* the GCTL, please consider the option to obtain a commercial license for a
|
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
|
* to include some description of your company and the realm of its activities.
|
|
* Also add information on how to contact you by electronic and paper mail.
|
|
******************************************************/
|
|
|
|
#include "dnn.h"
|
|
|
|
gctl::dnn::dnn(std::string info)
|
|
{
|
|
hlayer_size_ = 0;
|
|
hind_layers_.reserve(100);
|
|
output_layer_ = nullptr;
|
|
wd_st_ = 0; wd_size_ = 0;
|
|
batch_iter_ = 0;
|
|
h_ss = o_ss = t_ss = 0;
|
|
status_ = NoneSet;
|
|
info_ = info;
|
|
has_stats_ = false;
|
|
}
|
|
|
|
gctl::dnn::~dnn()
|
|
{
|
|
for (int i = 0; i < hind_layers_.size(); i++)
|
|
{
|
|
if (hind_layers_[i] != nullptr)
|
|
{
|
|
delete hind_layers_[i];
|
|
}
|
|
}
|
|
|
|
if (output_layer_ != nullptr) delete output_layer_;
|
|
}
|
|
|
|
void gctl::dnn::load_network(std::string filename)
|
|
{
|
|
std::ifstream ifile;
|
|
open_infile(ifile, filename, ".gctl.dnn", std::ios::in|std::ios::binary);
|
|
|
|
int h_size, stats_int = 0;
|
|
hlayer_type_e l_type;
|
|
dnn_hlayer *hlayer_ptr = nullptr;
|
|
|
|
ifile.read((char*)&h_size, sizeof(int));
|
|
ifile.read((char*)&stats_int, sizeof(int));
|
|
if (stats_int) has_stats_ = true;
|
|
|
|
for (int i = 0; i < h_size; i++)
|
|
{
|
|
ifile.read((char*)&l_type, sizeof(hlayer_type_e));
|
|
if (l_type == FullyConnected) hlayer_ptr = new fully_connected;
|
|
if (l_type == MaxPooling) hlayer_ptr = new maxpooling;
|
|
if (l_type == AvgPooling) hlayer_ptr = new avgpooling;
|
|
if (l_type == Convolution) hlayer_ptr = new convolution;
|
|
|
|
hind_layers_.push_back(hlayer_ptr);
|
|
hlayer_size_++;
|
|
}
|
|
|
|
for (int i = 0; i < h_size; i++)
|
|
{
|
|
hind_layers_[i]->load_layer_setup(ifile);
|
|
}
|
|
|
|
olayer_type_e o_type;
|
|
ifile.read((char*)&o_type, sizeof(olayer_type_e));
|
|
add_output_layer(o_type);
|
|
|
|
ifile.read((char*)&wd_size_, sizeof(int));
|
|
weights_.resize(wd_size_);
|
|
ders_.resize(wd_size_);
|
|
|
|
ifile.read((char*)weights_.get(), wd_size_*sizeof(double));
|
|
if (has_stats_)
|
|
{
|
|
weights_mean_.resize(wd_size_);
|
|
weights_std_.resize(wd_size_);
|
|
ifile.read((char*)weights_mean_.get(), wd_size_*sizeof(double));
|
|
ifile.read((char*)weights_std_.get(), wd_size_*sizeof(double));
|
|
}
|
|
ifile.close();
|
|
|
|
status_ = Trained;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::save_network(std::string filename) const
|
|
{
|
|
if (status_ != Trained)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not trained.");
|
|
}
|
|
|
|
std::ofstream ofile;
|
|
open_outfile(ofile, filename, ".gctl.dnn", std::ios::out|std::ios::binary);
|
|
|
|
// save hind layers' setup
|
|
int stats_int = 0;
|
|
if (has_stats_) stats_int = 1;
|
|
ofile.write((char*)&hlayer_size_, sizeof(int));
|
|
ofile.write((char*)&stats_int, sizeof(int));
|
|
|
|
hlayer_type_e l_type;
|
|
for (int i = 0; i < hlayer_size_; i++)
|
|
{
|
|
l_type = hind_layers_[i]->get_layer_type();
|
|
ofile.write((char*)&l_type, sizeof(hlayer_type_e));
|
|
}
|
|
|
|
for (int i = 0; i < hlayer_size_; i++)
|
|
{
|
|
hind_layers_[i]->save_layer_setup(ofile);
|
|
}
|
|
|
|
// save output layer setup
|
|
olayer_type_e o_type = output_layer_->get_output_type();
|
|
ofile.write((char*)&o_type, sizeof(olayer_type_e));
|
|
|
|
// save hind layers' parameters
|
|
ofile.write((char*)&wd_size_, sizeof(int));
|
|
ofile.write((char*)weights_.get(), wd_size_*sizeof(double));
|
|
|
|
if (has_stats_)
|
|
{
|
|
ofile.write((char*)weights_mean_.get(), wd_size_*sizeof(double));
|
|
ofile.write((char*)weights_std_.get(), wd_size_*sizeof(double));
|
|
}
|
|
ofile.close();
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::save_layer2text(int l_idx, std::string filename) const
|
|
{
|
|
if (hind_layers_.empty())
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly.");
|
|
}
|
|
|
|
if (status_ != Trained)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not trained.");
|
|
}
|
|
|
|
if (l_idx < 0 || l_idx >= hlayer_size_)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] Invalid index of the hinden layers.");
|
|
}
|
|
|
|
std::ofstream ofile;
|
|
open_outfile(ofile, filename + "_weight", ".txt", std::ios::out);
|
|
hind_layers_[l_idx]->save_weights2text(weights_, ofile);
|
|
ofile.close();
|
|
|
|
if (has_stats_)
|
|
{
|
|
open_outfile(ofile, filename + "_weight_mean", ".txt", std::ios::out);
|
|
hind_layers_[l_idx]->save_weights2text(weights_mean_, ofile);
|
|
ofile.close();
|
|
|
|
open_outfile(ofile, filename + "_weight_std", ".txt", std::ios::out);
|
|
hind_layers_[l_idx]->save_weights2text(weights_std_, ofile);
|
|
ofile.close();
|
|
}
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::show_network()
|
|
{
|
|
if (output_layer_ == nullptr || hind_layers_.empty())
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly.");
|
|
}
|
|
|
|
std::clog << "=============================\n";
|
|
std::clog << "GCTL's DNN Setup Panel\n";
|
|
std::clog << "-----------------------------\n";
|
|
std::clog << "Network Info: " << info_ << "\n";
|
|
std::clog << "Observation Number: " << obs_num_;
|
|
if (batch_size_ > 0) std::clog << ", Batch Size: " << batch_size_ << "\n";
|
|
else std::clog << ", Batch Size: Not set\n";
|
|
std::clog << "Output Layer: " << output_layer_->get_output_name() << "\n";
|
|
std::clog << "Hind Layer's Number: " << hlayer_size_ << "\n";
|
|
|
|
for (int i = 0; i < hlayer_size_; i++)
|
|
{
|
|
std::clog << "Layer-" << i+1 << ": " << hind_layers_[i]->layer_info() << "\n";
|
|
}
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::add_hind_layer(int in_s, int out_s, hlayer_type_e h_type, activation_type_e a_type)
|
|
{
|
|
dnn_hlayer *hlayer_ptr = nullptr;
|
|
if (h_type == FullyConnected)
|
|
{
|
|
// 初始化新的隐藏层为全链接层
|
|
hlayer_ptr = new fully_connected(wd_st_, in_s, out_s, a_type);
|
|
hind_layers_.push_back(hlayer_ptr);
|
|
|
|
wd_size_ += out_s*(in_s+1);
|
|
wd_st_ = wd_size_;
|
|
hlayer_size_++;
|
|
}
|
|
else throw std::invalid_argument("[gctl::dnn] Invalid initiate parameters for current layer type.");
|
|
|
|
h_ss = 1;
|
|
if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet;
|
|
else status_ = PartSet;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::add_hind_layer(int in_rows, int in_cols, int pool_rows, int pool_cols, int stride_rows,
|
|
int stride_cols, hlayer_type_e h_type, pad_type_e p_type, activation_type_e acti_type)
|
|
{
|
|
dnn_hlayer *hlayer_ptr = nullptr;
|
|
if (h_type == MaxPooling)
|
|
{
|
|
hlayer_ptr = new maxpooling(in_rows, in_cols, pool_rows, pool_cols, stride_rows, stride_cols, p_type, acti_type);
|
|
hind_layers_.push_back(hlayer_ptr);
|
|
|
|
hlayer_size_++;
|
|
}
|
|
else if (h_type == AvgPooling)
|
|
{
|
|
hlayer_ptr = new avgpooling(in_rows, in_cols, pool_rows, pool_cols, stride_rows, stride_cols, p_type, acti_type);
|
|
hind_layers_.push_back(hlayer_ptr);
|
|
|
|
hlayer_size_++;
|
|
}
|
|
else throw std::invalid_argument("[gctl::dnn] Invalid initiate parameters for current layer type.");
|
|
|
|
h_ss = 1;
|
|
if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet;
|
|
else status_ = PartSet;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::add_hind_layer(int channels, int in_rows, int in_cols, int filter_rows, int filter_cols,
|
|
int stride_rows, int stride_cols, hlayer_type_e h_type, pad_type_e p_type, activation_type_e acti_type)
|
|
{
|
|
dnn_hlayer *hlayer_ptr = nullptr;
|
|
if (h_type == Convolution)
|
|
{
|
|
hlayer_ptr = new convolution(wd_st_, channels, in_rows, in_cols, filter_rows, filter_cols, stride_rows, stride_cols, p_type, acti_type);
|
|
hind_layers_.push_back(hlayer_ptr);
|
|
|
|
wd_size_ += filter_rows*filter_cols*channels + 1;
|
|
wd_st_ = wd_size_;
|
|
hlayer_size_++;
|
|
}
|
|
else throw std::invalid_argument("[gctl::dnn] Invalid initiate parameters for current layer type.");
|
|
|
|
h_ss = 1;
|
|
if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet;
|
|
else status_ = PartSet;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::add_output_layer(olayer_type_e o_type)
|
|
{
|
|
if (o_type == RegressionMSE) output_layer_ = new rmse;
|
|
else if (o_type == MultiClassEntropy) output_layer_ = new multi_entropy;
|
|
else if (o_type == BinaryClassEntropy) output_layer_ = new binary_entropy;
|
|
else throw std::invalid_argument("[gctl::dnn] Invalid output layer type.");
|
|
|
|
o_ss = 1;
|
|
if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet;
|
|
else status_ = PartSet;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::add_train_set(const matrix<double> &train_obs, const matrix<double> &train_tar, int batch_size)
|
|
{
|
|
if (batch_size < 0)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] Invalid batch size.");
|
|
}
|
|
batch_size_ = batch_size;
|
|
|
|
obs_num_ = train_obs.col_size();
|
|
if(obs_num_ != train_tar.col_size())
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] Observation sizes do not match.");
|
|
}
|
|
|
|
if (output_layer_ == nullptr)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly.");
|
|
}
|
|
else output_layer_->check_target_data(train_tar);
|
|
|
|
if (batch_size_ == 0 || batch_size_ >= obs_num_)
|
|
{
|
|
obs_.resize(1);
|
|
targets_.resize(1);
|
|
obs_[0] = train_obs;
|
|
targets_[0] = train_tar;
|
|
|
|
batch_num_ = 1;
|
|
|
|
t_ss = 1;
|
|
if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet;
|
|
else status_ = PartSet;
|
|
return;
|
|
}
|
|
|
|
batch_num_ = ceil(1.0*obs_num_/batch_size_);
|
|
|
|
obs_.resize(batch_num_);
|
|
targets_.resize(batch_num_);
|
|
|
|
for (size_t i = 0; i < batch_num_-1; i++)
|
|
{
|
|
obs_[i].resize(train_obs.row_size(), batch_size);
|
|
targets_[i].resize(train_tar.row_size(), batch_size);
|
|
|
|
for (size_t c = 0; c < batch_size; c++)
|
|
{
|
|
for (size_t r = 0; r < train_obs.row_size(); r++)
|
|
{
|
|
obs_[i][r][c] = train_obs[r][i*batch_size + c];
|
|
}
|
|
|
|
for (size_t r = 0; r < train_tar.row_size(); r++)
|
|
{
|
|
targets_[i][r][c] = train_tar[r][i*batch_size + c];
|
|
}
|
|
}
|
|
}
|
|
|
|
obs_[batch_num_-1].resize(train_obs.row_size(), obs_num_ - (batch_num_-1)*batch_size);
|
|
targets_[batch_num_-1].resize(train_tar.row_size(), obs_num_ - (batch_num_-1)*batch_size);
|
|
|
|
for (size_t c = 0; c < obs_num_ - (batch_num_-1)*batch_size; c++)
|
|
{
|
|
for (size_t r = 0; r < train_obs.row_size(); r++)
|
|
{
|
|
obs_[batch_num_-1][r][c] = train_obs[r][(batch_num_-1)*batch_size + c];
|
|
}
|
|
|
|
for (size_t r = 0; r < train_tar.row_size(); r++)
|
|
{
|
|
targets_[batch_num_-1][r][c] = train_tar[r][(batch_num_-1)*batch_size + c];
|
|
}
|
|
}
|
|
|
|
t_ss = 1;
|
|
if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet;
|
|
else status_ = PartSet;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::init_network(double mu, double sigma, unsigned int seed)
|
|
{
|
|
if (status_ != AllSet)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly.");
|
|
}
|
|
|
|
if (hind_layers_.size() > 1)
|
|
{
|
|
for (int i = 1; i < hind_layers_.size(); i++)
|
|
{
|
|
if (hind_layers_[i]->in_size() != hind_layers_[i-1]->out_size())
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] Layer sizes do not match.");
|
|
}
|
|
}
|
|
}
|
|
|
|
weights_.resize(wd_size_);
|
|
ders_.resize(wd_size_);
|
|
|
|
weights_.random_float(mu, sigma, RdNormal, seed);
|
|
ders_.random_float(mu, sigma, RdNormal, seed);
|
|
|
|
status_ = Initialized;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::train_network(const sgd_para &pa, sgd_solver_type solver)
|
|
{
|
|
if (status_ != Initialized)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not initialized.");
|
|
}
|
|
|
|
set_sgd_para(pa);
|
|
SGD_Minimize(weights_, solver);
|
|
|
|
status_ = Trained;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::train_network(const lgd_para &pa)
|
|
{
|
|
if (status_ != Initialized)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not initialized.");
|
|
}
|
|
|
|
has_stats_ = true;
|
|
|
|
set_lgd_para(pa);
|
|
LGD_Minimize(weights_, weights_mean_, weights_std_);
|
|
|
|
status_ = Trained;
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::predict(const matrix<double> &pre_obs, matrix<double> &predicts)
|
|
{
|
|
if (status_ != Trained)
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] The DNN is not trained.");
|
|
}
|
|
|
|
forward_propagation(pre_obs);
|
|
predicts = hind_layers_.back()->forward_propagation_data();
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::forward_propagation(const matrix<double> &input)
|
|
{
|
|
// First layer
|
|
if (input.row_size() != hind_layers_[0]->in_size())
|
|
{
|
|
throw std::invalid_argument("[gctl::dnn] Input data have incorrect dimension.");
|
|
}
|
|
|
|
hind_layers_[0]->forward_propagation(weights_, input);
|
|
// The following layers
|
|
for (int i = 1; i < hlayer_size_; i++)
|
|
{
|
|
hind_layers_[i]->forward_propagation(weights_, hind_layers_[i-1]->forward_propagation_data());
|
|
}
|
|
return;
|
|
}
|
|
|
|
void gctl::dnn::backward_propagation(const matrix<double> &input, const matrix<double> &target)
|
|
{
|
|
dnn_hlayer *first_layer = hind_layers_.front();
|
|
dnn_hlayer *last_layer = hind_layers_.back();
|
|
|
|
// Let output layer compute back-propagation data
|
|
output_layer_->evaluation(last_layer->forward_propagation_data(), target);
|
|
|
|
// If there is only one hidden layer, "prev_layer_data" will be the input data
|
|
if (hlayer_size_ == 1)
|
|
{
|
|
first_layer->backward_propagation(weights_, ders_, input, output_layer_->backward_propagation_data());
|
|
return;
|
|
}
|
|
|
|
// Compute gradients for the last hidden layer
|
|
last_layer->backward_propagation(
|
|
weights_, ders_,
|
|
hind_layers_[hlayer_size_-2]->forward_propagation_data(),
|
|
output_layer_->backward_propagation_data());
|
|
|
|
// Compute gradients for all the hidden layers except for the first one and the last one
|
|
for (int i = hlayer_size_-2; i > 0; i--)
|
|
{
|
|
hind_layers_[i]->backward_propagation(
|
|
weights_, ders_,
|
|
hind_layers_[i-1]->forward_propagation_data(),
|
|
hind_layers_[i+1]->backward_propagation_data());
|
|
}
|
|
|
|
// Compute gradients for the first layer
|
|
first_layer->backward_propagation(weights_, ders_, input, hind_layers_[1]->backward_propagation_data());
|
|
return;
|
|
}
|
|
|
|
double gctl::dnn::SGD_Evaluate(const array<double> &x, array<double> &g)
|
|
{
|
|
// run the forward and backward process once
|
|
forward_propagation(obs_[batch_iter_%batch_num_]);
|
|
backward_propagation(obs_[batch_iter_%batch_num_], targets_[batch_iter_%batch_num_]);
|
|
batch_iter_++;
|
|
// get the network's derivatives
|
|
g = ders_;
|
|
return output_layer_->loss_value();
|
|
}
|
|
|
|
int gctl::dnn::SGD_Progress(double fx, const array<double> &x, const sgd_para ¶m, const int k)
|
|
{
|
|
// 清屏
|
|
winsize win;
|
|
ioctl(0, TIOCGWINSZ, &win);
|
|
std::clog << "\033[2J" << "\033[" << win.ws_row << "A";
|
|
|
|
show_network();
|
|
sgd_solver::show_solver();
|
|
std::clog << GCTL_CLEARLINE << "\rF(x) = " << fx << ", Train-Times = " << k << "\n";
|
|
return 0;
|
|
}
|
|
|
|
double gctl::dnn::LGD_Evaluate(const array<double> &x, array<double> &g)
|
|
{
|
|
// run the forward and backward process once
|
|
forward_propagation(obs_[batch_iter_%batch_num_]);
|
|
backward_propagation(obs_[batch_iter_%batch_num_], targets_[batch_iter_%batch_num_]);
|
|
batch_iter_++;
|
|
// get the network's derivatives
|
|
g = ders_;
|
|
return output_layer_->loss_value();
|
|
}
|
|
|
|
int gctl::dnn::LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para ¶m)
|
|
{
|
|
// 清屏
|
|
winsize win;
|
|
ioctl(0, TIOCGWINSZ, &win);
|
|
std::clog << "\033[2J" << "\033[" << win.ws_row << "A";
|
|
|
|
show_network();
|
|
lgd_solver::show_solver();
|
|
std::clog << "F(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t << "\n";
|
|
return 0;
|
|
} |