/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #include "dnn.h" gctl::dnn::dnn(std::string info) { hlayer_size_ = 0; hind_layers_.reserve(100); output_layer_ = nullptr; wd_st_ = 0; wd_size_ = 0; batch_iter_ = 0; h_ss = o_ss = t_ss = 0; status_ = NoneSet; info_ = info; has_stats_ = false; } gctl::dnn::~dnn() { for (int i = 0; i < hind_layers_.size(); i++) { if (hind_layers_[i] != nullptr) { delete hind_layers_[i]; } } if (output_layer_ != nullptr) delete output_layer_; } void gctl::dnn::load_network(std::string filename) { std::ifstream ifile; open_infile(ifile, filename, ".gctl.dnn", std::ios::in|std::ios::binary); int h_size, stats_int = 0; hlayer_type_e l_type; dnn_hlayer *hlayer_ptr = nullptr; ifile.read((char*)&h_size, sizeof(int)); ifile.read((char*)&stats_int, sizeof(int)); if (stats_int) has_stats_ = true; for (int i = 0; i < h_size; i++) { ifile.read((char*)&l_type, sizeof(hlayer_type_e)); if (l_type == FullyConnected) hlayer_ptr = new fully_connected; if (l_type == MaxPooling) hlayer_ptr = new maxpooling; if (l_type == AvgPooling) hlayer_ptr = new avgpooling; if (l_type == Convolution) hlayer_ptr = new convolution; hind_layers_.push_back(hlayer_ptr); hlayer_size_++; } for (int i = 0; i < h_size; i++) { hind_layers_[i]->load_layer_setup(ifile); } olayer_type_e o_type; ifile.read((char*)&o_type, sizeof(olayer_type_e)); add_output_layer(o_type); ifile.read((char*)&wd_size_, sizeof(int)); weights_.resize(wd_size_); ders_.resize(wd_size_); ifile.read((char*)weights_.get(), wd_size_*sizeof(double)); if (has_stats_) { weights_mean_.resize(wd_size_); weights_std_.resize(wd_size_); ifile.read((char*)weights_mean_.get(), wd_size_*sizeof(double)); ifile.read((char*)weights_std_.get(), wd_size_*sizeof(double)); } ifile.close(); status_ = Trained; return; } void gctl::dnn::save_network(std::string filename) const { if (status_ != Trained) { throw std::invalid_argument("[gctl::dnn] The DNN is not trained."); } std::ofstream ofile; open_outfile(ofile, filename, ".gctl.dnn", std::ios::out|std::ios::binary); // save hind layers' setup int stats_int = 0; if (has_stats_) stats_int = 1; ofile.write((char*)&hlayer_size_, sizeof(int)); ofile.write((char*)&stats_int, sizeof(int)); hlayer_type_e l_type; for (int i = 0; i < hlayer_size_; i++) { l_type = hind_layers_[i]->get_layer_type(); ofile.write((char*)&l_type, sizeof(hlayer_type_e)); } for (int i = 0; i < hlayer_size_; i++) { hind_layers_[i]->save_layer_setup(ofile); } // save output layer setup olayer_type_e o_type = output_layer_->get_output_type(); ofile.write((char*)&o_type, sizeof(olayer_type_e)); // save hind layers' parameters ofile.write((char*)&wd_size_, sizeof(int)); ofile.write((char*)weights_.get(), wd_size_*sizeof(double)); if (has_stats_) { ofile.write((char*)weights_mean_.get(), wd_size_*sizeof(double)); ofile.write((char*)weights_std_.get(), wd_size_*sizeof(double)); } ofile.close(); return; } void gctl::dnn::save_layer2text(int l_idx, std::string filename) const { if (hind_layers_.empty()) { throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly."); } if (status_ != Trained) { throw std::invalid_argument("[gctl::dnn] The DNN is not trained."); } if (l_idx < 0 || l_idx >= hlayer_size_) { throw std::invalid_argument("[gctl::dnn] Invalid index of the hinden layers."); } std::ofstream ofile; open_outfile(ofile, filename + "_weight", ".txt", std::ios::out); hind_layers_[l_idx]->save_weights2text(weights_, ofile); ofile.close(); if (has_stats_) { open_outfile(ofile, filename + "_weight_mean", ".txt", std::ios::out); hind_layers_[l_idx]->save_weights2text(weights_mean_, ofile); ofile.close(); open_outfile(ofile, filename + "_weight_std", ".txt", std::ios::out); hind_layers_[l_idx]->save_weights2text(weights_std_, ofile); ofile.close(); } return; } void gctl::dnn::show_network() { if (output_layer_ == nullptr || hind_layers_.empty()) { throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly."); } std::clog << "=============================\n"; std::clog << "GCTL's DNN Setup Panel\n"; std::clog << "-----------------------------\n"; std::clog << "Network Info: " << info_ << "\n"; std::clog << "Observation Number: " << obs_num_; if (batch_size_ > 0) std::clog << ", Batch Size: " << batch_size_ << "\n"; else std::clog << ", Batch Size: Not set\n"; std::clog << "Output Layer: " << output_layer_->get_output_name() << "\n"; std::clog << "Hind Layer's Number: " << hlayer_size_ << "\n"; for (int i = 0; i < hlayer_size_; i++) { std::clog << "Layer-" << i+1 << ": " << hind_layers_[i]->layer_info() << "\n"; } return; } void gctl::dnn::add_hind_layer(int in_s, int out_s, hlayer_type_e h_type, activation_type_e a_type) { dnn_hlayer *hlayer_ptr = nullptr; if (h_type == FullyConnected) { // 初始化新的隐藏层为全链接层 hlayer_ptr = new fully_connected(wd_st_, in_s, out_s, a_type); hind_layers_.push_back(hlayer_ptr); wd_size_ += out_s*(in_s+1); wd_st_ = wd_size_; hlayer_size_++; } else throw std::invalid_argument("[gctl::dnn] Invalid initiate parameters for current layer type."); h_ss = 1; if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet; else status_ = PartSet; return; } void gctl::dnn::add_hind_layer(int in_rows, int in_cols, int pool_rows, int pool_cols, int stride_rows, int stride_cols, hlayer_type_e h_type, pad_type_e p_type, activation_type_e acti_type) { dnn_hlayer *hlayer_ptr = nullptr; if (h_type == MaxPooling) { hlayer_ptr = new maxpooling(in_rows, in_cols, pool_rows, pool_cols, stride_rows, stride_cols, p_type, acti_type); hind_layers_.push_back(hlayer_ptr); hlayer_size_++; } else if (h_type == AvgPooling) { hlayer_ptr = new avgpooling(in_rows, in_cols, pool_rows, pool_cols, stride_rows, stride_cols, p_type, acti_type); hind_layers_.push_back(hlayer_ptr); hlayer_size_++; } else throw std::invalid_argument("[gctl::dnn] Invalid initiate parameters for current layer type."); h_ss = 1; if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet; else status_ = PartSet; return; } void gctl::dnn::add_hind_layer(int channels, int in_rows, int in_cols, int filter_rows, int filter_cols, int stride_rows, int stride_cols, hlayer_type_e h_type, pad_type_e p_type, activation_type_e acti_type) { dnn_hlayer *hlayer_ptr = nullptr; if (h_type == Convolution) { hlayer_ptr = new convolution(wd_st_, channels, in_rows, in_cols, filter_rows, filter_cols, stride_rows, stride_cols, p_type, acti_type); hind_layers_.push_back(hlayer_ptr); wd_size_ += filter_rows*filter_cols*channels + 1; wd_st_ = wd_size_; hlayer_size_++; } else throw std::invalid_argument("[gctl::dnn] Invalid initiate parameters for current layer type."); h_ss = 1; if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet; else status_ = PartSet; return; } void gctl::dnn::add_output_layer(olayer_type_e o_type) { if (o_type == RegressionMSE) output_layer_ = new rmse; else if (o_type == MultiClassEntropy) output_layer_ = new multi_entropy; else if (o_type == BinaryClassEntropy) output_layer_ = new binary_entropy; else throw std::invalid_argument("[gctl::dnn] Invalid output layer type."); o_ss = 1; if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet; else status_ = PartSet; return; } void gctl::dnn::add_train_set(const matrix &train_obs, const matrix &train_tar, int batch_size) { if (batch_size < 0) { throw std::invalid_argument("[gctl::dnn] Invalid batch size."); } batch_size_ = batch_size; obs_num_ = train_obs.col_size(); if(obs_num_ != train_tar.col_size()) { throw std::invalid_argument("[gctl::dnn] Observation sizes do not match."); } if (output_layer_ == nullptr) { throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly."); } else output_layer_->check_target_data(train_tar); if (batch_size_ == 0 || batch_size_ >= obs_num_) { obs_.resize(1); targets_.resize(1); obs_[0] = train_obs; targets_[0] = train_tar; batch_num_ = 1; t_ss = 1; if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet; else status_ = PartSet; return; } batch_num_ = ceil(1.0*obs_num_/batch_size_); obs_.resize(batch_num_); targets_.resize(batch_num_); for (size_t i = 0; i < batch_num_-1; i++) { obs_[i].resize(train_obs.row_size(), batch_size); targets_[i].resize(train_tar.row_size(), batch_size); for (size_t c = 0; c < batch_size; c++) { for (size_t r = 0; r < train_obs.row_size(); r++) { obs_[i][r][c] = train_obs[r][i*batch_size + c]; } for (size_t r = 0; r < train_tar.row_size(); r++) { targets_[i][r][c] = train_tar[r][i*batch_size + c]; } } } obs_[batch_num_-1].resize(train_obs.row_size(), obs_num_ - (batch_num_-1)*batch_size); targets_[batch_num_-1].resize(train_tar.row_size(), obs_num_ - (batch_num_-1)*batch_size); for (size_t c = 0; c < obs_num_ - (batch_num_-1)*batch_size; c++) { for (size_t r = 0; r < train_obs.row_size(); r++) { obs_[batch_num_-1][r][c] = train_obs[r][(batch_num_-1)*batch_size + c]; } for (size_t r = 0; r < train_tar.row_size(); r++) { targets_[batch_num_-1][r][c] = train_tar[r][(batch_num_-1)*batch_size + c]; } } t_ss = 1; if ((h_ss + o_ss + t_ss) == 3) status_ = AllSet; else status_ = PartSet; return; } void gctl::dnn::init_network(double mu, double sigma, unsigned int seed) { if (status_ != AllSet) { throw std::invalid_argument("[gctl::dnn] The DNN is not setup properly."); } if (hind_layers_.size() > 1) { for (int i = 1; i < hind_layers_.size(); i++) { if (hind_layers_[i]->in_size() != hind_layers_[i-1]->out_size()) { throw std::invalid_argument("[gctl::dnn] Layer sizes do not match."); } } } weights_.resize(wd_size_); ders_.resize(wd_size_); weights_.random_float(mu, sigma, RdNormal, seed); ders_.random_float(mu, sigma, RdNormal, seed); status_ = Initialized; return; } void gctl::dnn::train_network(const sgd_para &pa, sgd_solver_type solver) { if (status_ != Initialized) { throw std::invalid_argument("[gctl::dnn] The DNN is not initialized."); } set_sgd_para(pa); SGD_Minimize(weights_, solver); status_ = Trained; return; } void gctl::dnn::train_network(const lgd_para &pa) { if (status_ != Initialized) { throw std::invalid_argument("[gctl::dnn] The DNN is not initialized."); } has_stats_ = true; set_lgd_para(pa); LGD_Minimize(weights_, weights_mean_, weights_std_); status_ = Trained; return; } void gctl::dnn::predict(const matrix &pre_obs, matrix &predicts) { if (status_ != Trained) { throw std::invalid_argument("[gctl::dnn] The DNN is not trained."); } forward_propagation(pre_obs); predicts = hind_layers_.back()->forward_propagation_data(); return; } void gctl::dnn::forward_propagation(const matrix &input) { // First layer if (input.row_size() != hind_layers_[0]->in_size()) { throw std::invalid_argument("[gctl::dnn] Input data have incorrect dimension."); } hind_layers_[0]->forward_propagation(weights_, input); // The following layers for (int i = 1; i < hlayer_size_; i++) { hind_layers_[i]->forward_propagation(weights_, hind_layers_[i-1]->forward_propagation_data()); } return; } void gctl::dnn::backward_propagation(const matrix &input, const matrix &target) { dnn_hlayer *first_layer = hind_layers_.front(); dnn_hlayer *last_layer = hind_layers_.back(); // Let output layer compute back-propagation data output_layer_->evaluation(last_layer->forward_propagation_data(), target); // If there is only one hidden layer, "prev_layer_data" will be the input data if (hlayer_size_ == 1) { first_layer->backward_propagation(weights_, ders_, input, output_layer_->backward_propagation_data()); return; } // Compute gradients for the last hidden layer last_layer->backward_propagation( weights_, ders_, hind_layers_[hlayer_size_-2]->forward_propagation_data(), output_layer_->backward_propagation_data()); // Compute gradients for all the hidden layers except for the first one and the last one for (int i = hlayer_size_-2; i > 0; i--) { hind_layers_[i]->backward_propagation( weights_, ders_, hind_layers_[i-1]->forward_propagation_data(), hind_layers_[i+1]->backward_propagation_data()); } // Compute gradients for the first layer first_layer->backward_propagation(weights_, ders_, input, hind_layers_[1]->backward_propagation_data()); return; } double gctl::dnn::SGD_Evaluate(const array &x, array &g) { // run the forward and backward process once forward_propagation(obs_[batch_iter_%batch_num_]); backward_propagation(obs_[batch_iter_%batch_num_], targets_[batch_iter_%batch_num_]); batch_iter_++; // get the network's derivatives g = ders_; return output_layer_->loss_value(); } int gctl::dnn::SGD_Progress(double fx, const array &x, const sgd_para ¶m, const int k) { // 清屏 winsize win; ioctl(0, TIOCGWINSZ, &win); std::clog << "\033[2J" << "\033[" << win.ws_row << "A"; show_network(); sgd_solver::show_solver(); std::clog << GCTL_CLEARLINE << "\rF(x) = " << fx << ", Train-Times = " << k << "\n"; return 0; } double gctl::dnn::LGD_Evaluate(const array &x, array &g) { // run the forward and backward process once forward_propagation(obs_[batch_iter_%batch_num_]); backward_propagation(obs_[batch_iter_%batch_num_], targets_[batch_iter_%batch_num_]); batch_iter_++; // get the network's derivatives g = ders_; return output_layer_->loss_value(); } int gctl::dnn::LGD_Progress(const int curr_t, const double curr_fx, const double mean_fx, const double best_fx, const lgd_para ¶m) { // 清屏 winsize win; ioctl(0, TIOCGWINSZ, &win); std::clog << "\033[2J" << "\033[" << win.ws_row << "A"; show_network(); lgd_solver::show_solver(); std::clog << "F(x) = " << curr_fx << ", Mean F(x) = " << mean_fx << ", Best F(x) = " << best_fx << ", Times = " << curr_t << "\n"; return 0; }