diff --git a/src/lib/sgd.cpp b/src/lib/sgd.cpp index 5ccc7e3..a0f04bd 100644 --- a/src/lib/sgd.cpp +++ b/src/lib/sgd.cpp @@ -203,11 +203,14 @@ int momentum(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_ mk[i] = 0.0; } + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { + *fx = 0.0; for (int m = 0; m < m_size; m++) { - *fx = Evafp(instance, x, g, n_size, m); + tmp_fx = Evafp(instance, x, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -217,6 +220,7 @@ int momentum(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_ if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE; @@ -248,8 +252,10 @@ int nag(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float mk[i] = 0.0; } + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { + *fx = 0.0; for (int m = 0; m < m_size; m++) { for (int i = 0; i < n_size; i++) @@ -257,7 +263,8 @@ int nag(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float xk[i] = x[i] - para.mu*para.alpha*mk[i]; } - *fx = Evafp(instance, xk, g, n_size, m); + tmp_fx = Evafp(instance, xk, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -267,6 +274,7 @@ int nag(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE; @@ -298,11 +306,14 @@ int adagrad(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f mk[i] = 0.0; } + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { + *fx = 0.0; for (int m = 0; m < m_size; m++) { - *fx = Evafp(instance, x, g, n_size, m); + tmp_fx = Evafp(instance, x, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -312,6 +323,7 @@ int adagrad(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE; @@ -342,11 +354,14 @@ int rmsprop(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f vk[i] = 0.0; } + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { + *fx = 0.0; for (int m = 0; m < m_size; m++) { - *fx = Evafp(instance, x, g, n_size, m); + tmp_fx = Evafp(instance, x, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -356,6 +371,7 @@ int rmsprop(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE; @@ -393,6 +409,7 @@ int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_floa sgd_float beta_1t = 1.0, beta_2t = 1.0; sgd_float alpha_k; + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { beta_1t *= para.beta_1; @@ -400,9 +417,11 @@ int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_floa alpha_k = para.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t); + *fx = 0.0; for (int m = 0; m < m_size; m++) { - *fx = Evafp(instance, x, g, n_size, m); + tmp_fx = Evafp(instance, x, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -413,6 +432,7 @@ int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_floa if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE; @@ -453,15 +473,18 @@ int nadam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_flo sgd_float beta_1t = 1.0, beta_1t1 = para.beta_1, beta_2t = 1.0; + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { beta_1t *= para.beta_1; beta_1t1 *= para.beta_1; beta_2t *= para.beta_2; + *fx = 0.0; for (int m = 0; m < m_size; m++) { - *fx = Evafp(instance, x, g, n_size, m); + tmp_fx = Evafp(instance, x, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -477,6 +500,7 @@ int nadam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_flo if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE; @@ -518,13 +542,16 @@ int adamax(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_fl sgd_float beta_1t = 1.0; + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { beta_1t *= para.beta_1; + *fx = 0.0; for (int m = 0; m < m_size; m++) { - *fx = Evafp(instance, x, g, n_size, m); + tmp_fx = Evafp(instance, x, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -535,6 +562,7 @@ int adamax(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_fl if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE; @@ -573,6 +601,7 @@ int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd sgd_float beta_1t = 1.0, beta_2t = 1.0; sgd_float alpha_k; + sgd_float tmp_fx; for (int t = 0; t < para.iteration; t++) { beta_1t *= para.beta_1; @@ -580,9 +609,11 @@ int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd alpha_k = para.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t); + *fx = 0.0; for (int m = 0; m < m_size; m++) { - *fx = Evafp(instance, x, g, n_size, m); + tmp_fx = Evafp(instance, x, g, n_size, m); + *fx += tmp_fx; for (int i = 0; i < n_size; i++) { @@ -593,6 +624,7 @@ int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd if (x[i] != x[i]) return SGD_NAN_VALUE; } } + *fx /= m_size; if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP; if (*fx < para.epsilon) return SGD_CONVERGENCE;