303 lines
11 KiB
C++
303 lines
11 KiB
C++
|
/********************************************************
|
||
|
* ██████╗ ██████╗████████╗██╗
|
||
|
* ██╔════╝ ██╔════╝╚══██╔══╝██║
|
||
|
* ██║ ███╗██║ ██║ ██║
|
||
|
* ██║ ██║██║ ██║ ██║
|
||
|
* ╚██████╔╝╚██████╗ ██║ ███████╗
|
||
|
* ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝
|
||
|
* Geophysical Computational Tools & Library (GCTL)
|
||
|
*
|
||
|
* Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn)
|
||
|
*
|
||
|
* GCTL is distributed under a dual licensing scheme. You can redistribute
|
||
|
* it and/or modify it under the terms of the GNU Lesser General Public
|
||
|
* License as published by the Free Software Foundation, either version 2
|
||
|
* of the License, or (at your option) any later version. You should have
|
||
|
* received a copy of the GNU Lesser General Public License along with this
|
||
|
* program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*
|
||
|
* If the terms and conditions of the LGPL v.2. would prevent you from using
|
||
|
* the GCTL, please consider the option to obtain a commercial license for a
|
||
|
* fee. These licenses are offered by the GCTL's original author. As a rule,
|
||
|
* licenses are provided "as-is", unlimited in time for a one time fee. Please
|
||
|
* send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
|
||
|
* to include some description of your company and the realm of its activities.
|
||
|
* Also add information on how to contact you by electronic and paper mail.
|
||
|
******************************************************/
|
||
|
|
||
|
#include "hlayer_convolution.h"
|
||
|
|
||
|
gctl::convolution::convolution(){}
|
||
|
|
||
|
gctl::convolution::convolution(int p_st, int channels, int in_rows, int in_cols, int filter_rows, int filter_cols, int stride_rows, int stride_cols,
|
||
|
pad_type_e pl_type, activation_type_e acti_type)
|
||
|
{
|
||
|
init_convolution(p_st, channels, in_rows, in_cols, filter_rows, filter_cols, stride_rows, stride_cols, pl_type, acti_type);
|
||
|
}
|
||
|
|
||
|
gctl::convolution::~convolution(){}
|
||
|
|
||
|
void gctl::convolution::init_convolution(int p_st, int channels, int in_rows, int in_cols, int filter_rows, int filter_cols,
|
||
|
int stride_rows, int stride_cols, pad_type_e pl_type, activation_type_e acti_type)
|
||
|
{
|
||
|
chls_ = channels;
|
||
|
i_rows_ = in_rows; i_cols_ = in_cols;
|
||
|
f_rows_ = filter_rows; f_cols_ = filter_cols;
|
||
|
s_rows_ = stride_rows; s_cols_ = stride_cols;
|
||
|
p_type_ = pl_type;
|
||
|
|
||
|
if (acti_type == Identity) activator_ = new identity;
|
||
|
else if (acti_type == Mish) activator_ = new mish;
|
||
|
else if (acti_type == ReLU) activator_ = new relu;
|
||
|
else if (acti_type == PReLU) activator_ = new prelu;
|
||
|
else if (acti_type == Sigmoid) activator_ = new sigmoid;
|
||
|
else if (acti_type == SoftMax) activator_ = new softmax;
|
||
|
else if (acti_type == Tanh) activator_ = new tanh;
|
||
|
else throw std::invalid_argument("[gctl::convolution] Invalid activation type.");
|
||
|
|
||
|
// 计算输出大小
|
||
|
o_rows_ = (in_rows + s_rows_ - f_rows_)/s_rows_;
|
||
|
o_cols_ = (in_cols + s_cols_ - f_cols_)/s_cols_;
|
||
|
if (pl_type == Same && (in_rows + s_rows_ - f_rows_)%s_rows_ != 0)
|
||
|
{
|
||
|
o_rows_++;
|
||
|
u_pad_ = ((in_rows + s_rows_ - f_rows_)%s_rows_)/2;
|
||
|
}
|
||
|
else u_pad_ = 0;
|
||
|
|
||
|
if (pl_type == Same && (in_cols + s_cols_ - f_cols_)%s_cols_ != 0)
|
||
|
{
|
||
|
o_cols_++;
|
||
|
l_pad_ = ((in_cols + s_cols_ - f_cols_)%s_cols_)/2;
|
||
|
}
|
||
|
else l_pad_ = 0;
|
||
|
|
||
|
w_is_ = i_rows_*i_cols_;
|
||
|
w_outs_ = o_rows_*o_cols_;
|
||
|
w_st_ = p_st;
|
||
|
b_st_ = p_st + chls_*f_rows_*f_cols_; // Size of filter's parameters is chls_*f_rows_*f_cols_
|
||
|
p_idx_.resize(f_rows_*f_cols_);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
void gctl::convolution::forward_propagation(const array<double> &all_weights, const matrix<double> &prev_layer_data)
|
||
|
{
|
||
|
if (prev_layer_data.col_size()%chls_ != 0)
|
||
|
{
|
||
|
throw std::invalid_argument("[gctl::convolution] Incompatible observation size to the channel size.");
|
||
|
}
|
||
|
|
||
|
o_is_ = prev_layer_data.col_size()/chls_;
|
||
|
// Forward linear terms
|
||
|
// z_: out_size x nobs
|
||
|
// a_: out_size x nobs
|
||
|
z_.resize(w_outs_, o_is_);
|
||
|
a_.resize(w_outs_, o_is_);
|
||
|
|
||
|
double p_val;
|
||
|
for (size_t i = 0; i < o_rows_; i++)
|
||
|
{
|
||
|
for (size_t j = 0; j < o_cols_; j++)
|
||
|
{
|
||
|
if (p_type_ == Valid) cal_valid_padding_idx(p_idx_, i, j, i_rows_, i_cols_, f_rows_, f_cols_, s_rows_, s_cols_);
|
||
|
if (p_type_ == Same) cal_same_padding_idx(p_idx_, i, j, i_rows_, i_cols_, f_rows_, f_cols_, s_rows_, s_cols_, u_pad_, l_pad_);
|
||
|
|
||
|
for (size_t o = 0; o < o_is_; o++)
|
||
|
{
|
||
|
p_val = 0.0;
|
||
|
for (size_t c = 0; c < chls_; c++)
|
||
|
{
|
||
|
for (size_t p = 0; p < f_rows_*f_cols_; p++)
|
||
|
{
|
||
|
if (p_idx_[p] != -1) p_val += all_weights[w_st_ + p + c*f_rows_*f_cols_] * prev_layer_data[p_idx_[p]][chls_*o + c];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
z_[i*o_cols_ + j][o] = p_val + all_weights[b_st_]; // one filter has one bias value
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Apply activation function
|
||
|
activator_->activate(z_, a_);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
void gctl::convolution::backward_propagation(const array<double> &all_weights, const array<double> &all_ders,
|
||
|
const matrix<double> &prev_layer_data, const matrix<double> &next_layer_data)
|
||
|
{
|
||
|
der_z_.resize(w_outs_, o_is_);
|
||
|
der_in_.resize(w_is_, o_is_*chls_);
|
||
|
der_in_.assign_all(0.0);
|
||
|
|
||
|
all_ders_count_.resize(chls_*f_rows_*f_cols_);
|
||
|
all_ders_count_.assign_all(0);
|
||
|
|
||
|
der_in_count_.resize(w_is_, o_is_*chls_);
|
||
|
der_in_count_.assign_all(0);
|
||
|
|
||
|
|
||
|
// After forward stage, m_z contains z = conv(in, w) + b
|
||
|
// Now we need to calculate d(L) / d(z) = [d(a) / d(z)] * [d(L) / d(a)]
|
||
|
// d(L) / d(a) is computed in the next layer, contained in next_layer_data
|
||
|
// The Jacobian matrix J = d(a) / d(z) is determined by the activation function
|
||
|
activator_->apply_jacobian(z_, a_, next_layer_data, der_z_);
|
||
|
|
||
|
// z_j = sum_i(conv(in_i, w_ij)) + b_j
|
||
|
//
|
||
|
// d(z_k) / d(w_ij) = 0, if k != j
|
||
|
// d(L) / d(w_ij) = [d(z_j) / d(w_ij)] * [d(L) / d(z_j)] = sum_i{ [d(z_j) / d(w_ij)] * [d(L) / d(z_j)] }
|
||
|
// = sum_i(conv(in_i, d(L) / d(z_j)))
|
||
|
//
|
||
|
// z_j is an image (matrix), b_j is a scalar
|
||
|
// d(z_j) / d(b_j) = a matrix of the same size of d(z_j) filled with 1
|
||
|
// d(L) / d(b_j) = (d(L) / d(z_j)).sum()
|
||
|
//
|
||
|
// d(z_j) / d(in_i) = conv_full_op(w_ij_rotate)
|
||
|
// d(L) / d(in_i) = sum_j((d(z_j) / d(in_i)) * (d(L) / d(z_j))) = sum_j(conv_full(d(L) / d(z_j), w_ij_rotate))
|
||
|
// Derivative for weights
|
||
|
|
||
|
for (size_t h = 0; h < chls_; h++)
|
||
|
{
|
||
|
for (size_t p = 0; p < f_rows_*f_cols_; p++)
|
||
|
{
|
||
|
all_ders[w_st_ + h*f_rows_*f_cols_ + p] = 0.0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (size_t j = 0; j < o_is_; j++)
|
||
|
{
|
||
|
for (size_t h = 0; h < chls_; h++)
|
||
|
{
|
||
|
for (size_t r = 0; r < o_rows_; r++)
|
||
|
{
|
||
|
for (size_t c = 0; c < o_cols_; c++)
|
||
|
{
|
||
|
if (p_type_ == Valid) cal_valid_padding_idx(p_idx_, r, c, i_rows_, i_cols_, f_rows_, f_cols_, s_rows_, s_cols_);
|
||
|
if (p_type_ == Same) cal_same_padding_idx(p_idx_, r, c, i_rows_, i_cols_, f_rows_, f_cols_, s_rows_, s_cols_, u_pad_, l_pad_);
|
||
|
|
||
|
for (size_t p = 0; p < f_rows_*f_cols_; p++)
|
||
|
{
|
||
|
if (p_idx_[p] != -1)
|
||
|
{
|
||
|
all_ders[w_st_ + h*f_rows_*f_cols_ + p] += prev_layer_data[p_idx_[p]][j*chls_ + h]*der_z_[r*o_cols_ + c][j];
|
||
|
all_ders_count_[h*f_rows_*f_cols_ + p]++;
|
||
|
|
||
|
der_in_[p_idx_[p]][j*chls_ + h] += all_weights[w_st_ + h*f_rows_*f_cols_ + p]*der_z_[r*o_cols_ + c][j];
|
||
|
der_in_count_[p_idx_[p]][j*chls_ + h]++;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (size_t h = 0; h < chls_; h++)
|
||
|
{
|
||
|
for (size_t p = 0; p < f_rows_*f_cols_; p++)
|
||
|
{
|
||
|
all_ders[w_st_ + h*f_rows_*f_cols_ + p] /= all_ders_count_[h*f_rows_*f_cols_ + p];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (size_t j = 0; j < o_is_; j++)
|
||
|
{
|
||
|
for (size_t h = 0; h < chls_; h++)
|
||
|
{
|
||
|
for (size_t i = 0; i < w_outs_; i++)
|
||
|
{
|
||
|
if (der_in_count_[i][j*chls_ + h] != 0) der_in_[i][j*chls_ + h] /= der_in_count_[i][j*chls_ + h];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Derivative for bias, d(L) / d(b) = d(L) / d(z)
|
||
|
all_ders[b_st_] = 0.0;
|
||
|
for (size_t i = 0; i < w_outs_; i++)
|
||
|
{
|
||
|
for (size_t j = 0; j < o_is_; j++)
|
||
|
{
|
||
|
all_ders[b_st_] += der_z_[i][j];
|
||
|
}
|
||
|
}
|
||
|
all_ders[b_st_] /= (o_is_*w_outs_);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
gctl::hlayer_type_e gctl::convolution::get_layer_type() const
|
||
|
{
|
||
|
return Convolution;
|
||
|
}
|
||
|
|
||
|
std::string gctl::convolution::get_layer_name() const
|
||
|
{
|
||
|
return "Convolution";
|
||
|
}
|
||
|
|
||
|
std::string gctl::convolution::layer_info() const
|
||
|
{
|
||
|
std::string info = std::to_string(w_is_) + "x" + std::to_string(w_outs_)
|
||
|
+ " (" + std::to_string(i_rows_) + "," + std::to_string(i_cols_) + ")x"
|
||
|
+ "(" + std::to_string(o_rows_) + "," + std::to_string(o_cols_) + "), "
|
||
|
+ std::to_string(f_rows_) + "x" + std::to_string(f_cols_)
|
||
|
+ ", " + std::to_string(s_rows_) + "x" + std::to_string(s_cols_)
|
||
|
+ ", " + std::to_string(chls_) + " channel(s), Convolution ";
|
||
|
|
||
|
if (p_type_ == Same) info += "(Same), ";
|
||
|
if (p_type_ == Valid) info += "(Valid), ";
|
||
|
|
||
|
info += activator_->activation_name();
|
||
|
return info;
|
||
|
}
|
||
|
|
||
|
void gctl::convolution::save_layer_setup(std::ofstream &os) const
|
||
|
{
|
||
|
hlayer_type_e h_type = get_layer_type();
|
||
|
activation_type_e a_type = get_activation_type();
|
||
|
|
||
|
os.write((char*)&w_st_, sizeof(int));
|
||
|
os.write((char*)&chls_, sizeof(int));
|
||
|
os.write((char*)&i_rows_, sizeof(int));
|
||
|
os.write((char*)&i_cols_, sizeof(int));
|
||
|
os.write((char*)&f_rows_, sizeof(int));
|
||
|
os.write((char*)&f_cols_, sizeof(int));
|
||
|
os.write((char*)&s_rows_, sizeof(int));
|
||
|
os.write((char*)&s_cols_, sizeof(int));
|
||
|
os.write((char*)&h_type, sizeof(hlayer_type_e));
|
||
|
os.write((char*)&p_type_, sizeof(pad_type_e));
|
||
|
os.write((char*)&a_type, sizeof(activation_type_e));
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
void gctl::convolution::load_layer_setup(std::ifstream &is)
|
||
|
{
|
||
|
int st, chls, in_rows, in_cols, filter_rows, filter_cols, stride_rows, stride_cols;
|
||
|
hlayer_type_e h_type;
|
||
|
pad_type_e p_type;
|
||
|
activation_type_e a_type;
|
||
|
|
||
|
is.read((char*)&st, sizeof(int));
|
||
|
is.read((char*)&chls, sizeof(int));
|
||
|
is.read((char*)&in_rows, sizeof(int));
|
||
|
is.read((char*)&in_cols, sizeof(int));
|
||
|
is.read((char*)&filter_rows, sizeof(int));
|
||
|
is.read((char*)&filter_cols, sizeof(int));
|
||
|
is.read((char*)&stride_rows, sizeof(int));
|
||
|
is.read((char*)&stride_cols, sizeof(int));
|
||
|
is.read((char*)&h_type, sizeof(hlayer_type_e));
|
||
|
is.read((char*)&p_type, sizeof(pad_type_e));
|
||
|
is.read((char*)&a_type, sizeof(activation_type_e));
|
||
|
|
||
|
if (h_type != Convolution)
|
||
|
{
|
||
|
throw std::invalid_argument("[gctl::convolution] Invalid hind layer type.");
|
||
|
}
|
||
|
|
||
|
init_convolution(st, chls, in_rows, in_cols, filter_rows, filter_cols, stride_rows, stride_cols, p_type, a_type);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
void gctl::convolution::save_weights2text(const array<double> &all_weights, std::ofstream &os) const
|
||
|
{
|
||
|
return;
|
||
|
}
|