216 lines
7.3 KiB
C++
216 lines
7.3 KiB
C++
/********************************************************
|
|
* ██████╗ ██████╗████████╗██╗
|
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
|
* ██║ ███╗██║ ██║ ██║
|
|
* ██║ ██║██║ ██║ ██║
|
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
|
* Geophysical Computational Tools & Library (GCTL)
|
|
*
|
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
|
*
|
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation, either version 2
|
|
* of the License, or (at your option) any later version. You should have
|
|
* received a copy of the GNU Lesser General Public License along with this
|
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
|
* the GCTL, please consider the option to obtain a commercial license for a
|
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
|
* to include some description of your company and the realm of its activities.
|
|
* Also add information on how to contact you by electronic and paper mail.
|
|
******************************************************/
|
|
|
|
#include "hlayer_fully_connected.h"
|
|
|
|
gctl::fully_connected::fully_connected(){}
|
|
|
|
gctl::fully_connected::fully_connected(int p_st, int p_ins, int p_outs, activation_type_e acti_type)
|
|
{
|
|
init_fully_connected(p_st, p_ins, p_outs, acti_type);
|
|
}
|
|
|
|
gctl::fully_connected::~fully_connected(){}
|
|
|
|
void gctl::fully_connected::init_fully_connected(int p_st, int p_ins, int p_outs, activation_type_e acti_type)
|
|
{
|
|
w_st_ = p_st; w_is_ = p_ins; w_outs_ = p_outs;
|
|
b_st_ = p_st + p_ins*p_outs;
|
|
|
|
if (acti_type == Identity) activator_ = new identity;
|
|
else if (acti_type == Mish) activator_ = new mish;
|
|
else if (acti_type == ReLU) activator_ = new relu;
|
|
else if (acti_type == PReLU) activator_ = new prelu;
|
|
else if (acti_type == Sigmoid) activator_ = new sigmoid;
|
|
else if (acti_type == SoftMax) activator_ = new softmax;
|
|
else if (acti_type == Tanh) activator_ = new tanh;
|
|
else throw std::invalid_argument("[gctl::fully_connected] Invalid activation type.");
|
|
return;
|
|
}
|
|
|
|
void gctl::fully_connected::forward_propagation(const array<double> &all_weights, const matrix<double> &prev_layer_data)
|
|
{
|
|
// z_: out_size x nobs
|
|
// a_: out_size x nobs
|
|
o_is_ = prev_layer_data.col_size();
|
|
// Forward linear terms
|
|
z_.resize(w_outs_, o_is_);
|
|
a_.resize(w_outs_, o_is_);
|
|
|
|
// Linear term z = W^T * in + b
|
|
int i, j, k;
|
|
#pragma omp parallel for private (i, j, k) schedule(guided)
|
|
for (i = 0; i < w_outs_; i++)
|
|
{
|
|
for (j = 0; j < o_is_; j++)
|
|
{
|
|
z_[i][j] = 0.0;
|
|
for (k = 0; k < w_is_; k++)
|
|
{
|
|
z_[i][j] += all_weights[w_st_ + i + k*w_outs_]*prev_layer_data[k][j];
|
|
}
|
|
}
|
|
}
|
|
|
|
//#pragma omp parallel for private (i, j) schedule(guided)
|
|
for (j = 0; j < o_is_; j++)
|
|
{
|
|
for (i = 0; i < w_outs_; i++)
|
|
{
|
|
z_[i][j] += all_weights[b_st_ + i];
|
|
}
|
|
}
|
|
|
|
// Apply activation function
|
|
activator_->activate(z_, a_);
|
|
/*
|
|
for (j = 0; j < o_is_; j++)
|
|
{
|
|
for (i = 0; i < w_outs_; i++)
|
|
{
|
|
std::cout << a_[i][j] << "\n";
|
|
}
|
|
}
|
|
|
|
std::cout << "done\n";
|
|
*/
|
|
return;
|
|
}
|
|
|
|
void gctl::fully_connected::backward_propagation(const array<double> &all_weights, const array<double> &all_ders,
|
|
const matrix<double> &prev_layer_data, const matrix<double> &next_layer_data)
|
|
{
|
|
der_z_.resize(w_outs_, o_is_);
|
|
der_in_.resize(w_is_, o_is_);
|
|
// prev_layer_data: in_size x nobs
|
|
// next_layer_data: out_size x nobs
|
|
// After forward stage, m_z contains z = W' * in + b
|
|
// Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
|
|
// d(L) / d(a) is computed in the next layer, contained in next_layer_data
|
|
// The Jacobian matrix J = d(a) / d(z) is determined by the activation function
|
|
// der_z_: out_size x nobs
|
|
activator_->apply_jacobian(z_, a_, next_layer_data, der_z_);
|
|
// Now dLz contains d(L) / d(z)
|
|
// Derivative for weights, d(L) / d(W) = [d(L) / d(z)] * in'
|
|
int i, j, k;
|
|
#pragma omp parallel for private (i, j, k) schedule(guided)
|
|
for (i = 0; i < w_is_; i++)
|
|
{
|
|
for (j = 0; j < w_outs_; j++)
|
|
{
|
|
all_ders[w_st_ + i*w_outs_ + j] = 0;
|
|
for (k = 0; k < o_is_; k++)
|
|
{
|
|
all_ders[w_st_ + i*w_outs_ + j] += prev_layer_data[i][k]*der_z_[j][k];
|
|
}
|
|
all_ders[w_st_ + i*w_outs_ + j] /= o_is_;
|
|
}
|
|
}
|
|
// Derivative for bias, d(L) / d(b) = d(L) / d(z)
|
|
#pragma omp parallel for private (i, j) schedule(guided)
|
|
for (i = 0; i < w_outs_; i++)
|
|
{
|
|
all_ders[b_st_ + i] = 0.0;
|
|
for (j = 0; j < o_is_; j++)
|
|
{
|
|
all_ders[b_st_ + i] += der_z_[i][j];
|
|
}
|
|
all_ders[b_st_ + i] /= o_is_;
|
|
}
|
|
// Compute d(L) / d_in = W * [d(L) / d(z)]
|
|
// der_in_: in_size x nobs
|
|
#pragma omp parallel for private (i, j, k) schedule(guided)
|
|
for (i = 0; i < w_is_; i++)
|
|
{
|
|
for (j = 0; j < o_is_; j++)
|
|
{
|
|
der_in_[i][j] = 0.0;
|
|
for (k = 0; k < w_outs_; k++)
|
|
{
|
|
der_in_[i][j] += all_weights[w_st_ + i*w_outs_ + k]*der_z_[k][j];
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
gctl::hlayer_type_e gctl::fully_connected::get_layer_type() const
|
|
{
|
|
return FullyConnected;
|
|
}
|
|
|
|
std::string gctl::fully_connected::get_layer_name() const
|
|
{
|
|
return "FullyConnected";
|
|
}
|
|
|
|
std::string gctl::fully_connected::layer_info() const
|
|
{
|
|
std::string info = std::to_string(w_is_) + "x" + std::to_string(w_outs_) + ", FullyConnected, " + activator_->activation_name();
|
|
return info;
|
|
}
|
|
|
|
void gctl::fully_connected::save_layer_setup(std::ofstream &os) const
|
|
{
|
|
hlayer_type_e l_type = get_layer_type();
|
|
activation_type_e a_type = get_activation_type();
|
|
|
|
os.write((char*)&w_st_, sizeof(int));
|
|
os.write((char*)&w_is_, sizeof(int));
|
|
os.write((char*)&w_outs_, sizeof(int));
|
|
os.write((char*)&l_type, sizeof(hlayer_type_e));
|
|
os.write((char*)&a_type, sizeof(activation_type_e));
|
|
return;
|
|
}
|
|
|
|
void gctl::fully_connected::load_layer_setup(std::ifstream &is)
|
|
{
|
|
int st, iss, outs;
|
|
hlayer_type_e l_type;
|
|
activation_type_e a_type;
|
|
|
|
is.read((char*)&st, sizeof(int));
|
|
is.read((char*)&iss, sizeof(int));
|
|
is.read((char*)&outs, sizeof(int));
|
|
is.read((char*)&l_type, sizeof(hlayer_type_e));
|
|
is.read((char*)&a_type, sizeof(activation_type_e));
|
|
|
|
init_fully_connected(st, iss, outs, a_type);
|
|
return;
|
|
}
|
|
|
|
void gctl::fully_connected::save_weights2text(const array<double> &all_weights, std::ofstream &os) const
|
|
{
|
|
for (int i = 0; i < w_outs_; i++)
|
|
{
|
|
for (int k = 0; k < w_is_; k++)
|
|
{
|
|
os << k << " " << i << " " << all_weights[w_st_ + i + k*w_outs_] << "\n";
|
|
}
|
|
}
|
|
return;
|
|
} |