update on algorithm

2020-10-21 18:16:06 +08:00
parent 056f396ad4
commit 86e539756b
1 changed files with 40 additions and 8 deletions
--- a/src/lib/sgd.cpp
+++ b/src/lib/sgd.cpp
@@ -203,11 +203,14 @@ int momentum(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_
 		mk[i] = 0.0;
 	}

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
-			*fx = Evafp(instance, x, g, n_size, m);
+			tmp_fx = Evafp(instance, x, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -217,6 +220,7 @@ int momentum(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;
@@ -248,8 +252,10 @@ int nag(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float
 		mk[i] = 0.0;
 	}

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
 			for (int i = 0; i < n_size; i++)
@@ -257,7 +263,8 @@ int nag(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float
 				xk[i] = x[i] - para.mu*para.alpha*mk[i];
 			}

-			*fx = Evafp(instance, xk, g, n_size, m);
+			tmp_fx = Evafp(instance, xk, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -267,6 +274,7 @@ int nag(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_float
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;
@@ -298,11 +306,14 @@ int adagrad(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f
 		mk[i] = 0.0;
 	}

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
-			*fx = Evafp(instance, x, g, n_size, m);
+			tmp_fx = Evafp(instance, x, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -312,6 +323,7 @@ int adagrad(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;
@@ -342,11 +354,14 @@ int rmsprop(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f
 		vk[i] = 0.0;
 	}

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
-			*fx = Evafp(instance, x, g, n_size, m);
+			tmp_fx = Evafp(instance, x, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -356,6 +371,7 @@ int rmsprop(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_f
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;
@@ -393,6 +409,7 @@ int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_floa
 	sgd_float beta_1t = 1.0, beta_2t = 1.0;
 	sgd_float alpha_k;

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
 		beta_1t *= para.beta_1;
@@ -400,9 +417,11 @@ int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_floa

 		alpha_k = para.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);

+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
-			*fx = Evafp(instance, x, g, n_size, m);
+			tmp_fx = Evafp(instance, x, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -413,6 +432,7 @@ int adam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_floa
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;
@@ -453,15 +473,18 @@ int nadam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_flo

 	sgd_float beta_1t = 1.0, beta_1t1 = para.beta_1, beta_2t = 1.0;

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
 		beta_1t  *= para.beta_1;
 		beta_1t1 *= para.beta_1;
 		beta_2t  *= para.beta_2;

+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
-			*fx = Evafp(instance, x, g, n_size, m);
+			tmp_fx = Evafp(instance, x, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -477,6 +500,7 @@ int nadam(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_flo
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;
@@ -518,13 +542,16 @@ int adamax(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_fl

 	sgd_float beta_1t = 1.0;

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
 		beta_1t *= para.beta_1;

+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
-			*fx = Evafp(instance, x, g, n_size, m);
+			tmp_fx = Evafp(instance, x, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -535,6 +562,7 @@ int adamax(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd_fl
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;
@@ -573,6 +601,7 @@ int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd
 	sgd_float beta_1t = 1.0, beta_2t = 1.0;
 	sgd_float alpha_k;

+	sgd_float tmp_fx;
 	for (int t = 0; t < para.iteration; t++)
 	{
 		beta_1t *= para.beta_1;
@@ -580,9 +609,11 @@ int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd

 		alpha_k = para.alpha * sqrt(1.0 - beta_2t)/(1.0 - beta_1t);

+		*fx = 0.0;
 		for (int m = 0; m < m_size; m++)
 		{
-			*fx = Evafp(instance, x, g, n_size, m);
+			tmp_fx = Evafp(instance, x, g, n_size, m);
+			*fx += tmp_fx;

 			for (int i = 0; i < n_size; i++)
 			{
@@ -593,6 +624,7 @@ int adabelief(sgd_evaulate_ptr Evafp, sgd_progress_ptr Profp, sgd_float *fx, sgd
 				if (x[i] != x[i]) return SGD_NAN_VALUE;
 			}
 		}
+		*fx /= m_size;

 		if (Profp(instance, *fx, x, g, param, n_size, t)) return SGD_STOP;
 		if (*fx < para.epsilon) return SGD_CONVERGENCE;