/******************************************************** * ██████╗ ██████╗████████╗██╗ * ██╔════╝ ██╔════╝╚══██╔══╝██║ * ██║ ███╗██║ ██║ ██║ * ██║ ██║██║ ██║ ██║ * ╚██████╔╝╚██████╗ ██║ ███████╗ * ╚═════╝ ╚═════╝ ╚═╝ ╚══════╝ * Geophysical Computational Tools & Library (GCTL) * * Copyright (c) 2022 Yi Zhang (yizhang-geo@zju.edu.cn) * * GCTL is distributed under a dual licensing scheme. You can redistribute * it and/or modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation, either version 2 * of the License, or (at your option) any later version. You should have * received a copy of the GNU Lesser General Public License along with this * program. If not, see . * * If the terms and conditions of the LGPL v.2. would prevent you from using * the GCTL, please consider the option to obtain a commercial license for a * fee. These licenses are offered by the GCTL's original author. As a rule, * licenses are provided "as-is", unlimited in time for a one time fee. Please * send corresponding requests to: yizhang-geo@zju.edu.cn. Please do not forget * to include some description of your company and the realm of its activities. * Also add information on how to contact you by electronic and paper mail. ******************************************************/ #include "activation_mish.h" gctl::mish::mish() {} gctl::mish::~mish() {} void gctl::mish::activate(const matrix &z, matrix &a) { // Mish(x) = x * tanh(softplus(x)) // softplus(x) = log(1 + exp(x)) // a = activation(z) = Mish(z) // Z = [z1, ..., zn], A = [a1, ..., an], n observations // h(x) = tanh(softplus(x)) = (1 + exp(x))^2 - 1 // ------------------ // (1 + exp(x))^2 + 1 // Let s = exp(-abs(x)), t = 1 + s // If x >= 0, then h(x) = (t^2 - s^2) / (t^2 + s^2) // If x <= 0, then h(x) = (t^2 - 1) / (t^2 + 1) a.resize(z.row_size(), z.col_size()); int i, j; #pragma omp parallel for private (i, j) schedule(guided) for (i = 0; i < a.row_size(); i++) { for (j = 0; j < a.col_size(); j++) { a[i][j] = z[i][j]*std::tanh(log(1.0 + exp(z[i][j]))); } } return; } void gctl::mish::apply_jacobian(const matrix &z, const matrix &a, const matrix &f, matrix &g) { // Apply the Jacobian matrix J to a vector f // J = d_a / d_z = diag(Mish'(z)) // g = J * f = Mish'(z) .* f // Z = [z1, ..., zn], G = [g1, ..., gn], F = [f1, ..., fn] // Note: When entering this function, Z and G may point to the same matrix // Let h(x) = tanh(softplus(x)) // Mish'(x) = h(x) + x * h'(x) // h'(x) = tanh'(softplus(x)) * softplus'(x) // = [1 - h(x)^2] * exp(x) / (1 + exp(x)) // = [1 - h(x)^2] / (1 + exp(-x)) // Mish'(x) = h(x) + [x - Mish(x) * h(x)] / (1 + exp(-x)) // A = Mish(Z) = Z .* h(Z) => h(Z) = A ./ Z, h(0) = 0.6 g.resize(a.row_size(), a.col_size()); int i, j; #pragma omp parallel for private (i, j) schedule(guided) for (i = 0; i < g.row_size(); i++) { for (j = 0; j < g.col_size(); j++) { g[i][j] = std::tanh(log(1.0 + exp(z[i][j]))); } for (j = 0; j < g.col_size(); j++) { g[i][j] = f[i][j]*(g[i][j] + (z[i][j] - a[i][j]*g[i][j])/(1.0 + exp(-1.0*z[i][j]))); } } return; } std::string gctl::mish::activation_name() const { return "Mish"; } gctl::activation_type_e gctl::mish::activation_type() const { return Mish; }