src/mlp_train.c - Issue 882843002: Update to opus-HEAD-66611f1.

Side by Side Diff: src/mlp_train.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /* Copyright (c) 2008-2011 Octasic Inc.	1 /* Copyright (c) 2008-2011 Octasic Inc.

2 Written by Jean-Marc Valin */	2 Written by Jean-Marc Valin */

3 /*	3 /*

4 Redistribution and use in source and binary forms, with or without	4 Redistribution and use in source and binary forms, with or without

5 modification, are permitted provided that the following conditions	5 modification, are permitted provided that the following conditions

6 are met:	6 are met:

7	7

8 - Redistributions of source code must retain the above copyright	8 - Redistributions of source code must retain the above copyright

9 notice, this list of conditions and the following disclaimer.	9 notice, this list of conditions and the following disclaimer.

10	10

(...skipping 21 matching lines...) Expand all Loading...
32 #include <string.h>	32 #include <string.h>

33 #include <semaphore.h>	33 #include <semaphore.h>

34 #include <pthread.h>	34 #include <pthread.h>

35 #include <time.h>	35 #include <time.h>

36 #include <signal.h>	36 #include <signal.h>

37	37

38 int stopped = 0;	38 int stopped = 0;

39	39

40 void handler(int sig)	40 void handler(int sig)

41 {	41 {

42 » stopped = 1;	42 stopped = 1;

43 » signal(sig, handler);	43 signal(sig, handler);

44 }	44 }

45	45

46 MLPTrain * mlp_init(int topo, int nbLayers, float inputs, float *outputs, int nbSamples)	46 MLPTrain * mlp_init(int topo, int nbLayers, float inputs, float *outputs, int nbSamples)

47 {	47 {

48 » int i, j, k;	48 int i, j, k;

49 » MLPTrain *net;	49 MLPTrain *net;

50 » int inDim, outDim;	50 int inDim, outDim;

51 » net = malloc(sizeof(*net));	51 net = malloc(sizeof(*net));

52 » net->topo = malloc(nbLayers*sizeof(net->topo[0]));	52 net->topo = malloc(nbLayers*sizeof(net->topo[0]));

53 » for (i=0;i<nbLayers;i++)	53 for (i=0;i<nbLayers;i++)

54 » » net->topo[i] = topo[i];	54 net->topo[i] = topo[i];

55 » inDim = topo[0];	55 inDim = topo[0];

56 » outDim = topo[nbLayers-1];	56 outDim = topo[nbLayers-1];

57 » net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0]));	57 net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0]));

58 » net->weights = malloc((nbLayers-1)*sizeof(net->weights));	58 net->weights = malloc((nbLayers-1)*sizeof(net->weights));

59 » net->best_weights = malloc((nbLayers-1)*sizeof(net->weights));	59 net->best_weights = malloc((nbLayers-1)*sizeof(net->weights));

60 » for (i=0;i<nbLayers-1;i++)	60 for (i=0;i<nbLayers-1;i++)

61 » {	61 {

62 » » net->weights[i] = malloc((topo[i]+1)topo[i+1]sizeof(net->weigh ts[0][0]));	62 net->weights[i] = malloc((topo[i]+1)topo[i+1]sizeof(net->weights[0][0] ));

63 » » net->best_weights[i] = malloc((topo[i]+1)topo[i+1]sizeof(net-> weights[0][0]));	63 net->best_weights[i] = malloc((topo[i]+1)topo[i+1]sizeof(net->weights[ 0][0]));

64 » }	64 }

65 » double inMean[inDim];	65 double inMean[inDim];

66 » for (j=0;j<inDim;j++)	66 for (j=0;j<inDim;j++)

67 » {	67 {

68 » » double std=0;	68 double std=0;

69 » » inMean[j] = 0;	69 inMean[j] = 0;

70 » » for (i=0;i<nbSamples;i++)	70 for (i=0;i<nbSamples;i++)

71 » » {	71 {

72 » » » inMean[j] += inputs[i*inDim+j];	72 inMean[j] += inputs[i*inDim+j];

73 » » » std += inputs[iinDim+j]inputs[i*inDim+j];	73 std += inputs[iinDim+j]inputs[i*inDim+j];

74 » » }	74 }

75 » » inMean[j] /= nbSamples;	75 inMean[j] /= nbSamples;

76 » » std /= nbSamples;	76 std /= nbSamples;

77 » » net->in_rate[1+j] = .5/(.0001+std);	77 net->in_rate[1+j] = .5/(.0001+std);

78 » » std = std-inMean[j]*inMean[j];	78 std = std-inMean[j]*inMean[j];

79 » » if (std<.001)	79 if (std<.001)

80 » » » std = .001;	80 std = .001;

81 » » std = 1/sqrt(inDim*std);	81 std = 1/sqrt(inDim*std);

82 » » for (k=0;k<topo[1];k++)	82 for (k=0;k<topo[1];k++)

83 » » » net->weights[0][k*(topo[0]+1)+j+1] = randn(std);	83 net->weights[0][k*(topo[0]+1)+j+1] = randn(std);

84 » }	84 }

85 » net->in_rate[0] = 1;	85 net->in_rate[0] = 1;

86 » for (j=0;j<topo[1];j++)	86 for (j=0;j<topo[1];j++)

87 » {	87 {

88 » » double sum = 0;	88 double sum = 0;

89 » » for (k=0;k<inDim;k++)	89 for (k=0;k<inDim;k++)

90 » » » sum += inMean[k]net->weights[0][j(topo[0]+1)+k+1];	90 sum += inMean[k]net->weights[0][j(topo[0]+1)+k+1];

91 » » net->weights[0][j*(topo[0]+1)] = -sum;	91 net->weights[0][j*(topo[0]+1)] = -sum;

92 » }	92 }

93 » for (j=0;j<outDim;j++)	93 for (j=0;j<outDim;j++)

94 » {	94 {

95 » » double mean = 0;	95 double mean = 0;

96 » » double std;	96 double std;

97 » » for (i=0;i<nbSamples;i++)	97 for (i=0;i<nbSamples;i++)

98 » » » mean += outputs[i*outDim+j];	98 mean += outputs[i*outDim+j];

99 » » mean /= nbSamples;	99 mean /= nbSamples;

100 » » std = 1/sqrt(topo[nbLayers-2]);	100 std = 1/sqrt(topo[nbLayers-2]);

101 » » net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean;	101 net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean;

102 » » for (k=0;k<topo[nbLayers-2];k++)	102 for (k=0;k<topo[nbLayers-2];k++)

103 » » » net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = r andn(std);	103 net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = randn(std);

104 » }	104 }

105 » return net;	105 return net;

106 }	106 }

107	107

108 #define MAX_NEURONS 100	108 #define MAX_NEURONS 100

109 #define MAX_OUT 10	109 #define MAX_OUT 10

110	110

111 double compute_gradient(MLPTrain net, float inputs, float outputs, int nbSamp les, double W0_grad, double W1_grad, double error_rate)	111 double compute_gradient(MLPTrain net, float inputs, float outputs, int nbSamp les, double W0_grad, double W1_grad, double error_rate)

112 {	112 {

113 » int i,j;	113 int i,j;

114 » int s;	114 int s;

115 » int inDim, outDim, hiddenDim;	115 int inDim, outDim, hiddenDim;

116 » int *topo;	116 int *topo;

117 » double W0, W1;	117 double W0, W1;

118 » double rms=0;	118 double rms=0;

119 » int W0_size, W1_size;	119 int W0_size, W1_size;

120 » double hidden[MAX_NEURONS];	120 double hidden[MAX_NEURONS];

121 » double netOut[MAX_NEURONS];	121 double netOut[MAX_NEURONS];

122 » double error[MAX_NEURONS];	122 double error[MAX_NEURONS];

123	123

124 » for (i=0;i<outDim;i++)	124 topo = net->topo;

125 » error_rate[i] = 0;	125 inDim = net->topo[0];

126 » topo = net->topo;	126 hiddenDim = net->topo[1];

127 » inDim = net->topo[0];	127 outDim = net->topo[2];

128 » hiddenDim = net->topo[1];	128 W0_size = (topo[0]+1)*topo[1];

129 » outDim = net->topo[2];	129 W1_size = (topo[1]+1)*topo[2];

130 » W0_size = (topo[0]+1)*topo[1];	130 W0 = net->weights[0];

131 » W1_size = (topo[1]+1)*topo[2];	131 W1 = net->weights[1];

132 » W0 = net->weights[0];	132 memset(W0_grad, 0, W0_size*sizeof(double));

133 » W1 = net->weights[1];	133 memset(W1_grad, 0, W1_size*sizeof(double));

134 » memset(W0_grad, 0, W0_size*sizeof(double));	134 for (i=0;i<outDim;i++)

135 » memset(W1_grad, 0, W1_size*sizeof(double));	135 netOut[i] = outputs[i];

136 » for (i=0;i<outDim;i++)	136 for (i=0;i<outDim;i++)

137 » » netOut[i] = outputs[i];	137 error_rate[i] = 0;

138 » for (s=0;s<nbSamples;s++)	138 for (s=0;s<nbSamples;s++)

139 » {	139 {

140 » » float in, out;	140 float in, out;

141 » » in = inputs+s*inDim;	141 in = inputs+s*inDim;

142 » » out = outputs + s*outDim;	142 out = outputs + s*outDim;

143 » » for (i=0;i<hiddenDim;i++)	143 for (i=0;i<hiddenDim;i++)

144 » » {	144 {

145 » » » double sum = W0[i*(inDim+1)];	145 double sum = W0[i*(inDim+1)];

146 » » » for (j=0;j<inDim;j++)	146 for (j=0;j<inDim;j++)

147 » » » » sum += W0[i(inDim+1)+j+1]in[j];	147 sum += W0[i(inDim+1)+j+1]in[j];

148 » » » hidden[i] = tansig_approx(sum);	148 hidden[i] = tansig_approx(sum);

149 » » }	149 }

150 » » for (i=0;i<outDim;i++)	150 for (i=0;i<outDim;i++)

151 » » {	151 {

152 » » » double sum = W1[i*(hiddenDim+1)];	152 double sum = W1[i*(hiddenDim+1)];

153 » » » for (j=0;j<hiddenDim;j++)	153 for (j=0;j<hiddenDim;j++)

154 » » » » sum += W1[i(hiddenDim+1)+j+1]hidden[j];	154 sum += W1[i(hiddenDim+1)+j+1]hidden[j];

155 » » » netOut[i] = tansig_approx(sum);	155 netOut[i] = tansig_approx(sum);

156 » » » error[i] = out[i] - netOut[i];	156 error[i] = out[i] - netOut[i];

157 » » » rms += error[i]*error[i];	157 rms += error[i]*error[i];

158 » » » error_rate[i] += fabs(error[i])>1;	158 error_rate[i] += fabs(error[i])>1;

159 » » » /error[i] = error[i]/(1+fabs(error[i]));/	159 /error[i] = error[i]/(1+fabs(error[i]));/

160 » » }	160 }

161 » » /* Back-propagate error */	161 /* Back-propagate error */

162 » » for (i=0;i<outDim;i++)	162 for (i=0;i<outDim;i++)

163 » » {	163 {

164 float grad = 1-netOut[i]*netOut[i];	164 float grad = 1-netOut[i]*netOut[i];

165 » » » W1_grad[i(hiddenDim+1)] += error[i]grad;	165 W1_grad[i(hiddenDim+1)] += error[i]grad;

166 » » » for (j=0;j<hiddenDim;j++)	166 for (j=0;j<hiddenDim;j++)

167 » » » » W1_grad[i(hiddenDim+1)+j+1] += graderror[i]*hi dden[j];	167 W1_grad[i(hiddenDim+1)+j+1] += graderror[i]*hidden[j];

168 » » }	168 }

169 » » for (i=0;i<hiddenDim;i++)	169 for (i=0;i<hiddenDim;i++)

170 » » {	170 {

171 » » » double grad;	171 double grad;

172 » » » grad = 0;	172 grad = 0;

173 » » » for (j=0;j<outDim;j++)	173 for (j=0;j<outDim;j++)

174 » » » » grad += error[j]W1[j(hiddenDim+1)+i+1];	174 grad += error[j]W1[j(hiddenDim+1)+i+1];

175 » » » grad = 1-hidden[i]hidden[i];	175 grad = 1-hidden[i]hidden[i];

176 » » » W0_grad[i*(inDim+1)] += grad;	176 W0_grad[i*(inDim+1)] += grad;

177 » » » for (j=0;j<inDim;j++)	177 for (j=0;j<inDim;j++)

178 » » » » W0_grad[i(inDim+1)+j+1] += gradin[j];	178 W0_grad[i(inDim+1)+j+1] += gradin[j];

179 » » }	179 }

180 » }	180 }

181 » return rms;	181 return rms;

182 }	182 }

183	183

184 #define NB_THREADS 8	184 #define NB_THREADS 8

185	185

186 sem_t sem_begin[NB_THREADS];	186 sem_t sem_begin[NB_THREADS];

187 sem_t sem_end[NB_THREADS];	187 sem_t sem_end[NB_THREADS];

188	188

189 struct GradientArg {	189 struct GradientArg {

190 » int id;	190 int id;

191 » int done;	191 int done;

192 » MLPTrain *net;	192 MLPTrain *net;

193 » float *inputs;	193 float *inputs;

194 » float *outputs;	194 float *outputs;

195 » int nbSamples;	195 int nbSamples;

196 » double *W0_grad;	196 double *W0_grad;

197 » double *W1_grad;	197 double *W1_grad;

198 » double rms;	198 double rms;

199 » double error_rate[MAX_OUT];	199 double error_rate[MAX_OUT];

200 };	200 };

201	201

202 void gradient_thread_process(void _arg)	202 void gradient_thread_process(void _arg)

203 {	203 {

204 » int W0_size, W1_size;	204 int W0_size, W1_size;

205 » struct GradientArg *arg = _arg;	205 struct GradientArg *arg = _arg;

206 » int *topo = arg->net->topo;	206 int *topo = arg->net->topo;

207 » W0_size = (topo[0]+1)*topo[1];	207 W0_size = (topo[0]+1)*topo[1];

208 » W1_size = (topo[1]+1)*topo[2];	208 W1_size = (topo[1]+1)*topo[2];

209 » double W0_grad[W0_size];	209 double W0_grad[W0_size];

210 » double W1_grad[W1_size];	210 double W1_grad[W1_size];

211 » arg->W0_grad = W0_grad;	211 arg->W0_grad = W0_grad;

212 » arg->W1_grad = W1_grad;	212 arg->W1_grad = W1_grad;

213 » while (1)	213 while (1)

214 » {	214 {

215 » » sem_wait(&sem_begin[arg->id]);	215 sem_wait(&sem_begin[arg->id]);

216 » » if (arg->done)	216 if (arg->done)

217 » » » break;	217 break;

218 » » arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, arg->error_rate);	218 arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nb Samples, arg->W0_grad, arg->W1_grad, arg->error_rate);

219 » » sem_post(&sem_end[arg->id]);	219 sem_post(&sem_end[arg->id]);

220 » }	220 }

221 » fprintf(stderr, "done\n");	221 fprintf(stderr, "done\n");

222 » return NULL;	222 return NULL;

223 }	223 }

224	224

225 float mlp_train_backprop(MLPTrain net, float inputs, float *outputs, int nbSam ples, int nbEpoch, float rate)	225 float mlp_train_backprop(MLPTrain net, float inputs, float *outputs, int nbSam ples, int nbEpoch, float rate)

226 {	226 {

227 » int i, j;	227 int i, j;

228 » int e;	228 int e;

229 » float best_rms = 1e10;	229 float best_rms = 1e10;

230 » int inDim, outDim, hiddenDim;	230 int inDim, outDim, hiddenDim;

231 » int *topo;	231 int *topo;

232 » double W0, W1, best_W0, best_W1;	232 double W0, W1, best_W0, best_W1;

233 » double W0_old, W1_old;	233 double W0_old, W1_old;

234 » double W0_old2, W1_old2;	234 double W0_old2, W1_old2;

235 » double W0_grad, W1_grad;	235 double W0_grad, W1_grad;

236 » double W0_oldgrad, W1_oldgrad;	236 double W0_oldgrad, W1_oldgrad;

237 » double W0_rate, W1_rate;	237 double W0_rate, W1_rate;

238 » double best_W0_rate, best_W1_rate;	238 double best_W0_rate, best_W1_rate;

239 » int W0_size, W1_size;	239 int W0_size, W1_size;

240 » topo = net->topo;	240 topo = net->topo;

241 » W0_size = (topo[0]+1)*topo[1];	241 W0_size = (topo[0]+1)*topo[1];

242 » W1_size = (topo[1]+1)*topo[2];	242 W1_size = (topo[1]+1)*topo[2];

243 » struct GradientArg args[NB_THREADS];	243 struct GradientArg args[NB_THREADS];

244 » pthread_t thread[NB_THREADS];	244 pthread_t thread[NB_THREADS];

245 » int samplePerPart = nbSamples/NB_THREADS;	245 int samplePerPart = nbSamples/NB_THREADS;

246 » int count_worse=0;	246 int count_worse=0;

247 » int count_retries=0;	247 int count_retries=0;

248	248

249 » topo = net->topo;	249 topo = net->topo;

250 » inDim = net->topo[0];	250 inDim = net->topo[0];

251 » hiddenDim = net->topo[1];	251 hiddenDim = net->topo[1];

252 » outDim = net->topo[2];	252 outDim = net->topo[2];

253 » W0 = net->weights[0];	253 W0 = net->weights[0];

254 » W1 = net->weights[1];	254 W1 = net->weights[1];

255 » best_W0 = net->best_weights[0];	255 best_W0 = net->best_weights[0];

256 » best_W1 = net->best_weights[1];	256 best_W1 = net->best_weights[1];

257 » W0_old = malloc(W0_size*sizeof(double));	257 W0_old = malloc(W0_size*sizeof(double));

258 » W1_old = malloc(W1_size*sizeof(double));	258 W1_old = malloc(W1_size*sizeof(double));

259 » W0_old2 = malloc(W0_size*sizeof(double));	259 W0_old2 = malloc(W0_size*sizeof(double));

260 » W1_old2 = malloc(W1_size*sizeof(double));	260 W1_old2 = malloc(W1_size*sizeof(double));

261 » W0_grad = malloc(W0_size*sizeof(double));	261 W0_grad = malloc(W0_size*sizeof(double));

262 » W1_grad = malloc(W1_size*sizeof(double));	262 W1_grad = malloc(W1_size*sizeof(double));

263 » W0_oldgrad = malloc(W0_size*sizeof(double));	263 W0_oldgrad = malloc(W0_size*sizeof(double));

264 » W1_oldgrad = malloc(W1_size*sizeof(double));	264 W1_oldgrad = malloc(W1_size*sizeof(double));

265 » W0_rate = malloc(W0_size*sizeof(double));	265 W0_rate = malloc(W0_size*sizeof(double));

266 » W1_rate = malloc(W1_size*sizeof(double));	266 W1_rate = malloc(W1_size*sizeof(double));

267 » best_W0_rate = malloc(W0_size*sizeof(double));	267 best_W0_rate = malloc(W0_size*sizeof(double));

268 » best_W1_rate = malloc(W1_size*sizeof(double));	268 best_W1_rate = malloc(W1_size*sizeof(double));

269 » memcpy(W0_old, W0, W0_size*sizeof(double));	269 memcpy(W0_old, W0, W0_size*sizeof(double));

270 » memcpy(W0_old2, W0, W0_size*sizeof(double));	270 memcpy(W0_old2, W0, W0_size*sizeof(double));

271 » memset(W0_grad, 0, W0_size*sizeof(double));	271 memset(W0_grad, 0, W0_size*sizeof(double));

272 » memset(W0_oldgrad, 0, W0_size*sizeof(double));	272 memset(W0_oldgrad, 0, W0_size*sizeof(double));

273 » memcpy(W1_old, W1, W1_size*sizeof(double));	273 memcpy(W1_old, W1, W1_size*sizeof(double));

274 » memcpy(W1_old2, W1, W1_size*sizeof(double));	274 memcpy(W1_old2, W1, W1_size*sizeof(double));

275 » memset(W1_grad, 0, W1_size*sizeof(double));	275 memset(W1_grad, 0, W1_size*sizeof(double));

276 » memset(W1_oldgrad, 0, W1_size*sizeof(double));	276 memset(W1_oldgrad, 0, W1_size*sizeof(double));

277 »	277

278 » rate /= nbSamples;	278 rate /= nbSamples;

279 » for (i=0;i<hiddenDim;i++)	279 for (i=0;i<hiddenDim;i++)

280 » » for (j=0;j<inDim+1;j++)	280 for (j=0;j<inDim+1;j++)

281 » » » W0_rate[i(inDim+1)+j] = ratenet->in_rate[j];	281 W0_rate[i(inDim+1)+j] = ratenet->in_rate[j];

282 » for (i=0;i<W1_size;i++)	282 for (i=0;i<W1_size;i++)

283 » » W1_rate[i] = rate;	283 W1_rate[i] = rate;

284 »	284

285 » for (i=0;i<NB_THREADS;i++)	285 for (i=0;i<NB_THREADS;i++)

286 » {	286 {

287 » » args[i].net = net;	287 args[i].net = net;

288 » » args[i].inputs = inputs+isamplePerPartinDim;	288 args[i].inputs = inputs+isamplePerPartinDim;

289 » » args[i].outputs = outputs+isamplePerPartoutDim;	289 args[i].outputs = outputs+isamplePerPartoutDim;

290 » » args[i].nbSamples = samplePerPart;	290 args[i].nbSamples = samplePerPart;

291 » » args[i].id = i;	291 args[i].id = i;

292 » » args[i].done = 0;	292 args[i].done = 0;

293 » » sem_init(&sem_begin[i], 0, 0);	293 sem_init(&sem_begin[i], 0, 0);

294 » » sem_init(&sem_end[i], 0, 0);	294 sem_init(&sem_end[i], 0, 0);

295 » » pthread_create(&thread[i], NULL, gradient_thread_process, &args[ i]);	295 pthread_create(&thread[i], NULL, gradient_thread_process, &args[i]);

296 » }	296 }

297 » for (e=0;e<nbEpoch;e++)	297 for (e=0;e<nbEpoch;e++)

298 » {	298 {

299 » » double rms=0;	299 double rms=0;

300 » » double error_rate[2] = {0,0};	300 double error_rate[2] = {0,0};

301 » » for (i=0;i<NB_THREADS;i++)	301 for (i=0;i<NB_THREADS;i++)

302 » » {	302 {

303 » » » sem_post(&sem_begin[i]);	303 sem_post(&sem_begin[i]);

304 » » }	304 }

305 » » memset(W0_grad, 0, W0_size*sizeof(double));	305 memset(W0_grad, 0, W0_size*sizeof(double));

306 » » memset(W1_grad, 0, W1_size*sizeof(double));	306 memset(W1_grad, 0, W1_size*sizeof(double));

307 » » for (i=0;i<NB_THREADS;i++)	307 for (i=0;i<NB_THREADS;i++)

308 » » {	308 {

309 » » » sem_wait(&sem_end[i]);	309 sem_wait(&sem_end[i]);

310 » » » rms += args[i].rms;	310 rms += args[i].rms;

311 » » » error_rate[0] += args[i].error_rate[0];	311 error_rate[0] += args[i].error_rate[0];

312 error_rate[1] += args[i].error_rate[1];	312 error_rate[1] += args[i].error_rate[1];

313 » » » for (j=0;j<W0_size;j++)	313 for (j=0;j<W0_size;j++)

314 » » » » W0_grad[j] += args[i].W0_grad[j];	314 W0_grad[j] += args[i].W0_grad[j];

315 » » » for (j=0;j<W1_size;j++)	315 for (j=0;j<W1_size;j++)

316 » » » » W1_grad[j] += args[i].W1_grad[j];	316 W1_grad[j] += args[i].W1_grad[j];

317 » » }	317 }

318	318

319 » » float mean_rate = 0, min_rate = 1e10;	319 float mean_rate = 0, min_rate = 1e10;

320 » » rms = (rms/(outDim*nbSamples));	320 rms = (rms/(outDim*nbSamples));

321 » » error_rate[0] = (error_rate[0]/(nbSamples));	321 error_rate[0] = (error_rate[0]/(nbSamples));

322 error_rate[1] = (error_rate[1]/(nbSamples));	322 error_rate[1] = (error_rate[1]/(nbSamples));

323 » » fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, best_rms);	323 fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, be st_rms);

324 » » if (rms < best_rms)	324 if (rms < best_rms)

325 » » {	325 {

326 » » » best_rms = rms;	326 best_rms = rms;

327 » » » for (i=0;i<W0_size;i++)	327 for (i=0;i<W0_size;i++)

328 » » » {	328 {

329 » » » » best_W0[i] = W0[i];	329 best_W0[i] = W0[i];

330 » » » » best_W0_rate[i] = W0_rate[i];	330 best_W0_rate[i] = W0_rate[i];

331 » » » }	331 }

332 » » » for (i=0;i<W1_size;i++)	332 for (i=0;i<W1_size;i++)

333 » » » {	333 {

334 » » » » best_W1[i] = W1[i];	334 best_W1[i] = W1[i];

335 » » » » best_W1_rate[i] = W1_rate[i];	335 best_W1_rate[i] = W1_rate[i];

336 » » » }	336 }

337 » » » count_worse=0;	337 count_worse=0;

338 » » » count_retries=0;	338 count_retries=0;

339 » » } else {	339 } else {

340 » » » count_worse++;	340 count_worse++;

341 » » » if (count_worse>30)	341 if (count_worse>30)

342 » » » {	342 {

343 » » » count_retries++;	343 count_retries++;

344 » » » » count_worse=0;	344 count_worse=0;

345 » » » » for (i=0;i<W0_size;i++)	345 for (i=0;i<W0_size;i++)

346 » » » » {	346 {

347 » » » » » W0[i] = best_W0[i];	347 W0[i] = best_W0[i];

348 » » » » » best_W0_rate[i] *= .7;	348 best_W0_rate[i] *= .7;

349 » » » » » if (best_W0_rate[i]<1e-15) best_W0_rate[ i]=1e-15;	349 if (best_W0_rate[i]<1e-15) best_W0_rate[i]=1e-15;

350 » » » » » W0_rate[i] = best_W0_rate[i];	350 W0_rate[i] = best_W0_rate[i];

351 » » » » » W0_grad[i] = 0;	351 W0_grad[i] = 0;

352 » » » » }	352 }

353 » » » » for (i=0;i<W1_size;i++)	353 for (i=0;i<W1_size;i++)

354 » » » » {	354 {

355 » » » » » W1[i] = best_W1[i];	355 W1[i] = best_W1[i];

356 » » » » » best_W1_rate[i] *= .8;	356 best_W1_rate[i] *= .8;

357 » » » » » if (best_W1_rate[i]<1e-15) best_W1_rate[ i]=1e-15;	357 if (best_W1_rate[i]<1e-15) best_W1_rate[i]=1e-15;

358 » » » » » W1_rate[i] = best_W1_rate[i];	358 W1_rate[i] = best_W1_rate[i];

359 » » » » » W1_grad[i] = 0;	359 W1_grad[i] = 0;

360 » » » » }	360 }

361 » » » }	361 }

362 » » }	362 }

363 » » if (count_retries>10)	363 if (count_retries>10)

364 » » break;	364 break;

365 » » for (i=0;i<W0_size;i++)	365 for (i=0;i<W0_size;i++)

366 » » {	366 {

367 » » » if (W0_oldgrad[i]*W0_grad[i] > 0)	367 if (W0_oldgrad[i]*W0_grad[i] > 0)

368 » » » » W0_rate[i] *= 1.01;	368 W0_rate[i] *= 1.01;

369 » » » else if (W0_oldgrad[i]*W0_grad[i] < 0)	369 else if (W0_oldgrad[i]*W0_grad[i] < 0)

370 » » » » W0_rate[i] *= .9;	370 W0_rate[i] *= .9;

371 » » » mean_rate += W0_rate[i];	371 mean_rate += W0_rate[i];

372 » » » if (W0_rate[i] < min_rate)	372 if (W0_rate[i] < min_rate)

373 » » » » min_rate = W0_rate[i];	373 min_rate = W0_rate[i];

374 » » » if (W0_rate[i] < 1e-15)	374 if (W0_rate[i] < 1e-15)

375 » » » » W0_rate[i] = 1e-15;	375 W0_rate[i] = 1e-15;

376 » » » /*if (W0_rate[i] > .01)	376 /*if (W0_rate[i] > .01)

377 » » » » W0_rate[i] = .01;*/	377 W0_rate[i] = .01;*/

378 » » » W0_oldgrad[i] = W0_grad[i];	378 W0_oldgrad[i] = W0_grad[i];

379 » » » W0_old2[i] = W0_old[i];	379 W0_old2[i] = W0_old[i];

380 » » » W0_old[i] = W0[i];	380 W0_old[i] = W0[i];

381 » » » W0[i] += W0_grad[i]*W0_rate[i];	381 W0[i] += W0_grad[i]*W0_rate[i];

382 » » }	382 }

383 » » for (i=0;i<W1_size;i++)	383 for (i=0;i<W1_size;i++)

384 » » {	384 {

385 » » » if (W1_oldgrad[i]*W1_grad[i] > 0)	385 if (W1_oldgrad[i]*W1_grad[i] > 0)

386 » » » » W1_rate[i] *= 1.01;	386 W1_rate[i] *= 1.01;

387 » » » else if (W1_oldgrad[i]*W1_grad[i] < 0)	387 else if (W1_oldgrad[i]*W1_grad[i] < 0)

388 » » » » W1_rate[i] *= .9;	388 W1_rate[i] *= .9;

389 » » » mean_rate += W1_rate[i];	389 mean_rate += W1_rate[i];

390 » » » if (W1_rate[i] < min_rate)	390 if (W1_rate[i] < min_rate)

391 » » » » min_rate = W1_rate[i];	391 min_rate = W1_rate[i];

392 » » » if (W1_rate[i] < 1e-15)	392 if (W1_rate[i] < 1e-15)

393 » » » » W1_rate[i] = 1e-15;	393 W1_rate[i] = 1e-15;

394 » » » W1_oldgrad[i] = W1_grad[i];	394 W1_oldgrad[i] = W1_grad[i];

395 » » » W1_old2[i] = W1_old[i];	395 W1_old2[i] = W1_old[i];

396 » » » W1_old[i] = W1[i];	396 W1_old[i] = W1[i];

397 » » » W1[i] += W1_grad[i]*W1_rate[i];	397 W1[i] += W1_grad[i]*W1_rate[i];

398 » » }	398 }

399 » » mean_rate /= (topo[0]+1)topo[1] + (topo[1]+1)topo[2];	399 mean_rate /= (topo[0]+1)topo[1] + (topo[1]+1)topo[2];

400 » » fprintf (stderr, "%g %d", mean_rate, e);	400 fprintf (stderr, "%g %d", mean_rate, e);

401 » » if (count_retries)	401 if (count_retries)

402 » » fprintf(stderr, " %d", count_retries);	402 fprintf(stderr, " %d", count_retries);

403 » » fprintf(stderr, "\n");	403 fprintf(stderr, "\n");

404 » » if (stopped)	404 if (stopped)

405 » » » break;	405 break;

406 » }	406 }

407 » for (i=0;i<NB_THREADS;i++)	407 for (i=0;i<NB_THREADS;i++)

408 » {	408 {

409 » » args[i].done = 1;	409 args[i].done = 1;

410 » » sem_post(&sem_begin[i]);	410 sem_post(&sem_begin[i]);

411 » » pthread_join(thread[i], NULL);	411 pthread_join(thread[i], NULL);

412 » » fprintf (stderr, "joined %d\n", i);	412 fprintf (stderr, "joined %d\n", i);

413 » }	413 }

414 » free(W0_old);	414 free(W0_old);

415 » free(W1_old);	415 free(W1_old);

416 » free(W0_grad);	416 free(W0_grad);

417 » free(W1_grad);	417 free(W1_grad);

418 » free(W0_rate);	418 free(W0_rate);

419 » free(W1_rate);	419 free(W1_rate);

420 » return best_rms;	420 return best_rms;

421 }	421 }

422	422

423 int main(int argc, char **argv)	423 int main(int argc, char **argv)

424 {	424 {

425 » int i, j;	425 int i, j;

426 » int nbInputs;	426 int nbInputs;

427 » int nbOutputs;	427 int nbOutputs;

428 » int nbHidden;	428 int nbHidden;

429 » int nbSamples;	429 int nbSamples;

430 » int nbEpoch;	430 int nbEpoch;

431 » int nbRealInputs;	431 int nbRealInputs;

432 » unsigned int seed;	432 unsigned int seed;

433 » int ret;	433 int ret;

434 » float rms;	434 float rms;

435 » float *inputs;	435 float *inputs;

436 » float *outputs;	436 float *outputs;

437 » if (argc!=6)	437 if (argc!=6)

438 » {	438 {

439 » » fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> < nb samples> <nb epoch>\n");	439 fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> <nb sampl es> <nb epoch>\n");

440 » » return 1;	440 return 1;

441 » }	441 }

442 » nbInputs = atoi(argv[1]);	442 nbInputs = atoi(argv[1]);

443 » nbHidden = atoi(argv[2]);	443 nbHidden = atoi(argv[2]);

444 » nbOutputs = atoi(argv[3]);	444 nbOutputs = atoi(argv[3]);

445 » nbSamples = atoi(argv[4]);	445 nbSamples = atoi(argv[4]);

446 » nbEpoch = atoi(argv[5]);	446 nbEpoch = atoi(argv[5]);

447 » nbRealInputs = nbInputs;	447 nbRealInputs = nbInputs;

448 » inputs = malloc(nbInputsnbSamplessizeof(*inputs));	448 inputs = malloc(nbInputsnbSamplessizeof(*inputs));

449 » outputs = malloc(nbOutputsnbSamplessizeof(*outputs));	449 outputs = malloc(nbOutputsnbSamplessizeof(*outputs));

450 »	450

451 » seed = time(NULL);	451 seed = time(NULL);

452 /seed = 1361480659;/	452 /seed = 1361480659;/

453 » fprintf (stderr, "Seed is %u\n", seed);	453 fprintf (stderr, "Seed is %u\n", seed);

454 » srand(seed);	454 srand(seed);

455 » build_tansig_table();	455 build_tansig_table();

456 » signal(SIGTERM, handler);	456 signal(SIGTERM, handler);

457 » signal(SIGINT, handler);	457 signal(SIGINT, handler);

458 » signal(SIGHUP, handler);	458 signal(SIGHUP, handler);

459 » for (i=0;i<nbSamples;i++)	459 for (i=0;i<nbSamples;i++)

460 » {	460 {

461 » » for (j=0;j<nbRealInputs;j++)	461 for (j=0;j<nbRealInputs;j++)

462 » » » ret = scanf(" %f", &inputs[i*nbInputs+j]);	462 ret = scanf(" %f", &inputs[i*nbInputs+j]);

463 » » for (j=0;j<nbOutputs;j++)	463 for (j=0;j<nbOutputs;j++)

464 » » » ret = scanf(" %f", &outputs[i*nbOutputs+j]);	464 ret = scanf(" %f", &outputs[i*nbOutputs+j]);

465 » » if (feof(stdin))	465 if (feof(stdin))

466 » » {	466 {

467 » » » nbSamples = i;	467 nbSamples = i;

468 » » » break;	468 break;

469 » » }	469 }

470 » }	470 }

471 » int topo[3] = {nbInputs, nbHidden, nbOutputs};	471 int topo[3] = {nbInputs, nbHidden, nbOutputs};

472 » MLPTrain *net;	472 MLPTrain *net;

473	473

474 » fprintf (stderr, "Got %d samples\n", nbSamples);	474 fprintf (stderr, "Got %d samples\n", nbSamples);

475 » net = mlp_init(topo, 3, inputs, outputs, nbSamples);	475 net = mlp_init(topo, 3, inputs, outputs, nbSamples);

476 » rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1);	476 rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1);

477 » printf ("#include \"mlp.h\"\n\n");	477 printf ("#include \"mlp.h\"\n\n");

478 » printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed);	478 printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed);

479 » printf ("static const float weights[%d] = {\n", (topo[0]+1)topo[1] + (t opo[1]+1)topo[2]);	479 printf ("static const float weights[%d] = {\n", (topo[0]+1)topo[1] + (topo[ 1]+1)topo[2]);

480 » printf ("\n/* hidden layer */\n");	480 printf ("\n/* hidden layer */\n");

481 » for (i=0;i<(topo[0]+1)*topo[1];i++)	481 for (i=0;i<(topo[0]+1)*topo[1];i++)

482 » {	482 {

483 » » printf ("%gf, ", net->weights[0][i]);	483 printf ("%gf, ", net->weights[0][i]);

484 » » if (i%5==4)	484 if (i%5==4)

485 » » » printf("\n");	485 printf("\n");

486 » }	486 }

487 » printf ("\n/* output layer */\n");	487 printf ("\n/* output layer */\n");

488 » for (i=0;i<(topo[1]+1)*topo[2];i++)	488 for (i=0;i<(topo[1]+1)*topo[2];i++)

489 » {	489 {

490 » » printf ("%g, ", net->weights[1][i]);	490 printf ("%g, ", net->weights[1][i]);

491 » » if (i%5==4)	491 if (i%5==4)

492 » » » printf("\n");	492 printf("\n");

493 » }	493 }

494 » printf ("};\n\n");	494 printf ("};\n\n");

495 » printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1] , topo[2]);	495 printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1], to po[2]);

496 » printf ("const MLP net = {\n");	496 printf ("const MLP net = {\n");

497 » printf ("\t3,\n");	497 printf ("\t3,\n");

498 » printf ("\ttopo,\n");	498 printf ("\ttopo,\n");

499 » printf ("\tweights\n};\n");	499 printf ("\tweights\n};\n");

500 » return 0;	500 return 0;

501 }	501 }

OLD	NEW

« doc/release.txt ('K') | « src/mlp_train.h ('k') | src/opus.c » ('j') | update_version » ('J')