gctl_ai/lib/dnn/olayer.h

/********************************************************
 *  ██████╗  ██████╗████████╗██╗
 * ██╔════╝ ██╔════╝╚══██╔══╝██║
 * ██║  ███╗██║        ██║   ██║
 * ██║   ██║██║        ██║   ██║
 * ╚██████╔╝╚██████╗   ██║   ███████╗
 *  ╚═════╝  ╚═════╝   ╚═╝   ╚══════╝
 * Geophysical Computational Tools & Library (GCTL)
 *
 * Copyright (c) 2022  Yi Zhang (yizhang-geo@zju.edu.cn)
 *
 * GCTL is distributed under a dual licensing scheme. You can redistribute
 * it and/or modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation, either version 2
 * of the License, or (at your option) any later version. You should have
 * received a copy of the GNU Lesser General Public License along with this
 * program. If not, see <http://www.gnu.org/licenses/>.
 *
 * If the terms and conditions of the LGPL v.2. would prevent you from using
 * the GCTL, please consider the option to obtain a commercial license for a
 * fee. These licenses are offered by the GCTL's original author. As a rule,
 * licenses are provided "as-is", unlimited in time for a one time fee. Please
 * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget
 * to include some description of your company and the realm of its activities.
 * Also add information on how to contact you by electronic and paper mail.
 ******************************************************/

#ifndef _GCTL_DNN_OLAYER_H
#define _GCTL_DNN_OLAYER_H

#include "gctl/core.h"
#include "gctl/algorithms.h"

namespace gctl
{
    enum olayer_type_e
    {
        RegressionMSE,
        MultiClassEntropy,
        BinaryClassEntropy,
    };

    class dnn_olayer
    {
    public:
        dnn_olayer();
        virtual ~dnn_olayer();

        // The derivative of the input of this layer, which is also the derivative
        // of the output of previous layer
        const matrix<double> &backward_propagation_data();

        // Check the format of target data, e.g. in classification problems the
        // target data should be binary (either 0 or 1)
        virtual void check_target_data(const matrix<double> &target);

        // Another type of target data where each element is a class label
        // This version may not be sensible for regression tasks, so by default
        // we raise an exception
        virtual void check_target_data(const array<int> &target);

        // A combination of the forward stage and the back-propagation stage for the output layer
        // The computed derivative of the input should be stored in this layer, and can be retrieved by
        // the backward_propagation_data() function
        virtual void evaluation(const matrix<double> &prev_layer_data, const matrix<double> &target) = 0;

        // Another type of target data where each element is a class label
        // This version may not be sensible for regression tasks, so by default
        // we raise an exception
        virtual void evaluation(const matrix<double> &prev_layer_data, const array<int> &target);

        // Return the loss function value after the evaluation
        // This function can be assumed to be called after evaluate(), so that it can make use of the
        // intermediate result to save some computation
        virtual double loss_value() const = 0;

        // Return the output layer name. It is used to export the NN model.
        virtual std::string get_output_name() const = 0;

        // Return the output layer type. It is used to export the NN model.
        virtual olayer_type_e get_output_type() const = 0;

    protected:
        matrix<double> der_in_; // Derivative of the input of this layer
        // Note that input of this layer is also the output of previous layer
    };
}

#endif // _GCTL_DNN_OLAYER_H