Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(104)

Side by Side Diff: src/mlp_train.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/mlp_train.h ('k') | src/opus.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2008-2011 Octasic Inc. 1 /* Copyright (c) 2008-2011 Octasic Inc.
2 Written by Jean-Marc Valin */ 2 Written by Jean-Marc Valin */
3 /* 3 /*
4 Redistribution and use in source and binary forms, with or without 4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions 5 modification, are permitted provided that the following conditions
6 are met: 6 are met:
7 7
8 - Redistributions of source code must retain the above copyright 8 - Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer. 9 notice, this list of conditions and the following disclaimer.
10 10
(...skipping 21 matching lines...) Expand all
32 #include <string.h> 32 #include <string.h>
33 #include <semaphore.h> 33 #include <semaphore.h>
34 #include <pthread.h> 34 #include <pthread.h>
35 #include <time.h> 35 #include <time.h>
36 #include <signal.h> 36 #include <signal.h>
37 37
38 int stopped = 0; 38 int stopped = 0;
39 39
40 void handler(int sig) 40 void handler(int sig)
41 { 41 {
42 » stopped = 1; 42 stopped = 1;
43 » signal(sig, handler); 43 signal(sig, handler);
44 } 44 }
45 45
46 MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int nbSamples) 46 MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int nbSamples)
47 { 47 {
48 » int i, j, k; 48 int i, j, k;
49 » MLPTrain *net; 49 MLPTrain *net;
50 » int inDim, outDim; 50 int inDim, outDim;
51 » net = malloc(sizeof(*net)); 51 net = malloc(sizeof(*net));
52 » net->topo = malloc(nbLayers*sizeof(net->topo[0])); 52 net->topo = malloc(nbLayers*sizeof(net->topo[0]));
53 » for (i=0;i<nbLayers;i++) 53 for (i=0;i<nbLayers;i++)
54 » » net->topo[i] = topo[i]; 54 net->topo[i] = topo[i];
55 » inDim = topo[0]; 55 inDim = topo[0];
56 » outDim = topo[nbLayers-1]; 56 outDim = topo[nbLayers-1];
57 » net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0])); 57 net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0]));
58 » net->weights = malloc((nbLayers-1)*sizeof(net->weights)); 58 net->weights = malloc((nbLayers-1)*sizeof(net->weights));
59 » net->best_weights = malloc((nbLayers-1)*sizeof(net->weights)); 59 net->best_weights = malloc((nbLayers-1)*sizeof(net->weights));
60 » for (i=0;i<nbLayers-1;i++) 60 for (i=0;i<nbLayers-1;i++)
61 » { 61 {
62 » » net->weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weigh ts[0][0])); 62 net->weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0] ));
63 » » net->best_weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net-> weights[0][0])); 63 net->best_weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[ 0][0]));
64 » } 64 }
65 » double inMean[inDim]; 65 double inMean[inDim];
66 » for (j=0;j<inDim;j++) 66 for (j=0;j<inDim;j++)
67 » { 67 {
68 » » double std=0; 68 double std=0;
69 » » inMean[j] = 0; 69 inMean[j] = 0;
70 » » for (i=0;i<nbSamples;i++) 70 for (i=0;i<nbSamples;i++)
71 » » { 71 {
72 » » » inMean[j] += inputs[i*inDim+j]; 72 inMean[j] += inputs[i*inDim+j];
73 » » » std += inputs[i*inDim+j]*inputs[i*inDim+j]; 73 std += inputs[i*inDim+j]*inputs[i*inDim+j];
74 » » } 74 }
75 » » inMean[j] /= nbSamples; 75 inMean[j] /= nbSamples;
76 » » std /= nbSamples; 76 std /= nbSamples;
77 » » net->in_rate[1+j] = .5/(.0001+std); 77 net->in_rate[1+j] = .5/(.0001+std);
78 » » std = std-inMean[j]*inMean[j]; 78 std = std-inMean[j]*inMean[j];
79 » » if (std<.001) 79 if (std<.001)
80 » » » std = .001; 80 std = .001;
81 » » std = 1/sqrt(inDim*std); 81 std = 1/sqrt(inDim*std);
82 » » for (k=0;k<topo[1];k++) 82 for (k=0;k<topo[1];k++)
83 » » » net->weights[0][k*(topo[0]+1)+j+1] = randn(std); 83 net->weights[0][k*(topo[0]+1)+j+1] = randn(std);
84 » } 84 }
85 » net->in_rate[0] = 1; 85 net->in_rate[0] = 1;
86 » for (j=0;j<topo[1];j++) 86 for (j=0;j<topo[1];j++)
87 » { 87 {
88 » » double sum = 0; 88 double sum = 0;
89 » » for (k=0;k<inDim;k++) 89 for (k=0;k<inDim;k++)
90 » » » sum += inMean[k]*net->weights[0][j*(topo[0]+1)+k+1]; 90 sum += inMean[k]*net->weights[0][j*(topo[0]+1)+k+1];
91 » » net->weights[0][j*(topo[0]+1)] = -sum; 91 net->weights[0][j*(topo[0]+1)] = -sum;
92 » } 92 }
93 » for (j=0;j<outDim;j++) 93 for (j=0;j<outDim;j++)
94 » { 94 {
95 » » double mean = 0; 95 double mean = 0;
96 » » double std; 96 double std;
97 » » for (i=0;i<nbSamples;i++) 97 for (i=0;i<nbSamples;i++)
98 » » » mean += outputs[i*outDim+j]; 98 mean += outputs[i*outDim+j];
99 » » mean /= nbSamples; 99 mean /= nbSamples;
100 » » std = 1/sqrt(topo[nbLayers-2]); 100 std = 1/sqrt(topo[nbLayers-2]);
101 » » net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean; 101 net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean;
102 » » for (k=0;k<topo[nbLayers-2];k++) 102 for (k=0;k<topo[nbLayers-2];k++)
103 » » » net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = r andn(std); 103 net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = randn(std);
104 » } 104 }
105 » return net; 105 return net;
106 } 106 }
107 107
108 #define MAX_NEURONS 100 108 #define MAX_NEURONS 100
109 #define MAX_OUT 10 109 #define MAX_OUT 10
110 110
111 double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp les, double *W0_grad, double *W1_grad, double *error_rate) 111 double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamp les, double *W0_grad, double *W1_grad, double *error_rate)
112 { 112 {
113 » int i,j; 113 int i,j;
114 » int s; 114 int s;
115 » int inDim, outDim, hiddenDim; 115 int inDim, outDim, hiddenDim;
116 » int *topo; 116 int *topo;
117 » double *W0, *W1; 117 double *W0, *W1;
118 » double rms=0; 118 double rms=0;
119 » int W0_size, W1_size; 119 int W0_size, W1_size;
120 » double hidden[MAX_NEURONS]; 120 double hidden[MAX_NEURONS];
121 » double netOut[MAX_NEURONS]; 121 double netOut[MAX_NEURONS];
122 » double error[MAX_NEURONS]; 122 double error[MAX_NEURONS];
123 123
124 » for (i=0;i<outDim;i++) 124 topo = net->topo;
125 » error_rate[i] = 0; 125 inDim = net->topo[0];
126 » topo = net->topo; 126 hiddenDim = net->topo[1];
127 » inDim = net->topo[0]; 127 outDim = net->topo[2];
128 » hiddenDim = net->topo[1]; 128 W0_size = (topo[0]+1)*topo[1];
129 » outDim = net->topo[2]; 129 W1_size = (topo[1]+1)*topo[2];
130 » W0_size = (topo[0]+1)*topo[1]; 130 W0 = net->weights[0];
131 » W1_size = (topo[1]+1)*topo[2]; 131 W1 = net->weights[1];
132 » W0 = net->weights[0]; 132 memset(W0_grad, 0, W0_size*sizeof(double));
133 » W1 = net->weights[1]; 133 memset(W1_grad, 0, W1_size*sizeof(double));
134 » memset(W0_grad, 0, W0_size*sizeof(double)); 134 for (i=0;i<outDim;i++)
135 » memset(W1_grad, 0, W1_size*sizeof(double)); 135 netOut[i] = outputs[i];
136 » for (i=0;i<outDim;i++) 136 for (i=0;i<outDim;i++)
137 » » netOut[i] = outputs[i]; 137 error_rate[i] = 0;
138 » for (s=0;s<nbSamples;s++) 138 for (s=0;s<nbSamples;s++)
139 » { 139 {
140 » » float *in, *out; 140 float *in, *out;
141 » » in = inputs+s*inDim; 141 in = inputs+s*inDim;
142 » » out = outputs + s*outDim; 142 out = outputs + s*outDim;
143 » » for (i=0;i<hiddenDim;i++) 143 for (i=0;i<hiddenDim;i++)
144 » » { 144 {
145 » » » double sum = W0[i*(inDim+1)]; 145 double sum = W0[i*(inDim+1)];
146 » » » for (j=0;j<inDim;j++) 146 for (j=0;j<inDim;j++)
147 » » » » sum += W0[i*(inDim+1)+j+1]*in[j]; 147 sum += W0[i*(inDim+1)+j+1]*in[j];
148 » » » hidden[i] = tansig_approx(sum); 148 hidden[i] = tansig_approx(sum);
149 » » } 149 }
150 » » for (i=0;i<outDim;i++) 150 for (i=0;i<outDim;i++)
151 » » { 151 {
152 » » » double sum = W1[i*(hiddenDim+1)]; 152 double sum = W1[i*(hiddenDim+1)];
153 » » » for (j=0;j<hiddenDim;j++) 153 for (j=0;j<hiddenDim;j++)
154 » » » » sum += W1[i*(hiddenDim+1)+j+1]*hidden[j]; 154 sum += W1[i*(hiddenDim+1)+j+1]*hidden[j];
155 » » » netOut[i] = tansig_approx(sum); 155 netOut[i] = tansig_approx(sum);
156 » » » error[i] = out[i] - netOut[i]; 156 error[i] = out[i] - netOut[i];
157 » » » rms += error[i]*error[i]; 157 rms += error[i]*error[i];
158 » » » error_rate[i] += fabs(error[i])>1; 158 error_rate[i] += fabs(error[i])>1;
159 » » » /*error[i] = error[i]/(1+fabs(error[i]));*/ 159 /*error[i] = error[i]/(1+fabs(error[i]));*/
160 » » } 160 }
161 » » /* Back-propagate error */ 161 /* Back-propagate error */
162 » » for (i=0;i<outDim;i++) 162 for (i=0;i<outDim;i++)
163 » » { 163 {
164 float grad = 1-netOut[i]*netOut[i]; 164 float grad = 1-netOut[i]*netOut[i];
165 » » » W1_grad[i*(hiddenDim+1)] += error[i]*grad; 165 W1_grad[i*(hiddenDim+1)] += error[i]*grad;
166 » » » for (j=0;j<hiddenDim;j++) 166 for (j=0;j<hiddenDim;j++)
167 » » » » W1_grad[i*(hiddenDim+1)+j+1] += grad*error[i]*hi dden[j]; 167 W1_grad[i*(hiddenDim+1)+j+1] += grad*error[i]*hidden[j];
168 » » } 168 }
169 » » for (i=0;i<hiddenDim;i++) 169 for (i=0;i<hiddenDim;i++)
170 » » { 170 {
171 » » » double grad; 171 double grad;
172 » » » grad = 0; 172 grad = 0;
173 » » » for (j=0;j<outDim;j++) 173 for (j=0;j<outDim;j++)
174 » » » » grad += error[j]*W1[j*(hiddenDim+1)+i+1]; 174 grad += error[j]*W1[j*(hiddenDim+1)+i+1];
175 » » » grad *= 1-hidden[i]*hidden[i]; 175 grad *= 1-hidden[i]*hidden[i];
176 » » » W0_grad[i*(inDim+1)] += grad; 176 W0_grad[i*(inDim+1)] += grad;
177 » » » for (j=0;j<inDim;j++) 177 for (j=0;j<inDim;j++)
178 » » » » W0_grad[i*(inDim+1)+j+1] += grad*in[j]; 178 W0_grad[i*(inDim+1)+j+1] += grad*in[j];
179 » » } 179 }
180 » } 180 }
181 » return rms; 181 return rms;
182 } 182 }
183 183
184 #define NB_THREADS 8 184 #define NB_THREADS 8
185 185
186 sem_t sem_begin[NB_THREADS]; 186 sem_t sem_begin[NB_THREADS];
187 sem_t sem_end[NB_THREADS]; 187 sem_t sem_end[NB_THREADS];
188 188
189 struct GradientArg { 189 struct GradientArg {
190 » int id; 190 int id;
191 » int done; 191 int done;
192 » MLPTrain *net; 192 MLPTrain *net;
193 » float *inputs; 193 float *inputs;
194 » float *outputs; 194 float *outputs;
195 » int nbSamples; 195 int nbSamples;
196 » double *W0_grad; 196 double *W0_grad;
197 » double *W1_grad; 197 double *W1_grad;
198 » double rms; 198 double rms;
199 » double error_rate[MAX_OUT]; 199 double error_rate[MAX_OUT];
200 }; 200 };
201 201
202 void *gradient_thread_process(void *_arg) 202 void *gradient_thread_process(void *_arg)
203 { 203 {
204 » int W0_size, W1_size; 204 int W0_size, W1_size;
205 » struct GradientArg *arg = _arg; 205 struct GradientArg *arg = _arg;
206 » int *topo = arg->net->topo; 206 int *topo = arg->net->topo;
207 » W0_size = (topo[0]+1)*topo[1]; 207 W0_size = (topo[0]+1)*topo[1];
208 » W1_size = (topo[1]+1)*topo[2]; 208 W1_size = (topo[1]+1)*topo[2];
209 » double W0_grad[W0_size]; 209 double W0_grad[W0_size];
210 » double W1_grad[W1_size]; 210 double W1_grad[W1_size];
211 » arg->W0_grad = W0_grad; 211 arg->W0_grad = W0_grad;
212 » arg->W1_grad = W1_grad; 212 arg->W1_grad = W1_grad;
213 » while (1) 213 while (1)
214 » { 214 {
215 » » sem_wait(&sem_begin[arg->id]); 215 sem_wait(&sem_begin[arg->id]);
216 » » if (arg->done) 216 if (arg->done)
217 » » » break; 217 break;
218 » » arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, arg->error_rate); 218 arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nb Samples, arg->W0_grad, arg->W1_grad, arg->error_rate);
219 » » sem_post(&sem_end[arg->id]); 219 sem_post(&sem_end[arg->id]);
220 » } 220 }
221 » fprintf(stderr, "done\n"); 221 fprintf(stderr, "done\n");
222 » return NULL; 222 return NULL;
223 } 223 }
224 224
225 float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam ples, int nbEpoch, float rate) 225 float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSam ples, int nbEpoch, float rate)
226 { 226 {
227 » int i, j; 227 int i, j;
228 » int e; 228 int e;
229 » float best_rms = 1e10; 229 float best_rms = 1e10;
230 » int inDim, outDim, hiddenDim; 230 int inDim, outDim, hiddenDim;
231 » int *topo; 231 int *topo;
232 » double *W0, *W1, *best_W0, *best_W1; 232 double *W0, *W1, *best_W0, *best_W1;
233 » double *W0_old, *W1_old; 233 double *W0_old, *W1_old;
234 » double *W0_old2, *W1_old2; 234 double *W0_old2, *W1_old2;
235 » double *W0_grad, *W1_grad; 235 double *W0_grad, *W1_grad;
236 » double *W0_oldgrad, *W1_oldgrad; 236 double *W0_oldgrad, *W1_oldgrad;
237 » double *W0_rate, *W1_rate; 237 double *W0_rate, *W1_rate;
238 » double *best_W0_rate, *best_W1_rate; 238 double *best_W0_rate, *best_W1_rate;
239 » int W0_size, W1_size; 239 int W0_size, W1_size;
240 » topo = net->topo; 240 topo = net->topo;
241 » W0_size = (topo[0]+1)*topo[1]; 241 W0_size = (topo[0]+1)*topo[1];
242 » W1_size = (topo[1]+1)*topo[2]; 242 W1_size = (topo[1]+1)*topo[2];
243 » struct GradientArg args[NB_THREADS]; 243 struct GradientArg args[NB_THREADS];
244 » pthread_t thread[NB_THREADS]; 244 pthread_t thread[NB_THREADS];
245 » int samplePerPart = nbSamples/NB_THREADS; 245 int samplePerPart = nbSamples/NB_THREADS;
246 » int count_worse=0; 246 int count_worse=0;
247 » int count_retries=0; 247 int count_retries=0;
248 248
249 » topo = net->topo; 249 topo = net->topo;
250 » inDim = net->topo[0]; 250 inDim = net->topo[0];
251 » hiddenDim = net->topo[1]; 251 hiddenDim = net->topo[1];
252 » outDim = net->topo[2]; 252 outDim = net->topo[2];
253 » W0 = net->weights[0]; 253 W0 = net->weights[0];
254 » W1 = net->weights[1]; 254 W1 = net->weights[1];
255 » best_W0 = net->best_weights[0]; 255 best_W0 = net->best_weights[0];
256 » best_W1 = net->best_weights[1]; 256 best_W1 = net->best_weights[1];
257 » W0_old = malloc(W0_size*sizeof(double)); 257 W0_old = malloc(W0_size*sizeof(double));
258 » W1_old = malloc(W1_size*sizeof(double)); 258 W1_old = malloc(W1_size*sizeof(double));
259 » W0_old2 = malloc(W0_size*sizeof(double)); 259 W0_old2 = malloc(W0_size*sizeof(double));
260 » W1_old2 = malloc(W1_size*sizeof(double)); 260 W1_old2 = malloc(W1_size*sizeof(double));
261 » W0_grad = malloc(W0_size*sizeof(double)); 261 W0_grad = malloc(W0_size*sizeof(double));
262 » W1_grad = malloc(W1_size*sizeof(double)); 262 W1_grad = malloc(W1_size*sizeof(double));
263 » W0_oldgrad = malloc(W0_size*sizeof(double)); 263 W0_oldgrad = malloc(W0_size*sizeof(double));
264 » W1_oldgrad = malloc(W1_size*sizeof(double)); 264 W1_oldgrad = malloc(W1_size*sizeof(double));
265 » W0_rate = malloc(W0_size*sizeof(double)); 265 W0_rate = malloc(W0_size*sizeof(double));
266 » W1_rate = malloc(W1_size*sizeof(double)); 266 W1_rate = malloc(W1_size*sizeof(double));
267 » best_W0_rate = malloc(W0_size*sizeof(double)); 267 best_W0_rate = malloc(W0_size*sizeof(double));
268 » best_W1_rate = malloc(W1_size*sizeof(double)); 268 best_W1_rate = malloc(W1_size*sizeof(double));
269 » memcpy(W0_old, W0, W0_size*sizeof(double)); 269 memcpy(W0_old, W0, W0_size*sizeof(double));
270 » memcpy(W0_old2, W0, W0_size*sizeof(double)); 270 memcpy(W0_old2, W0, W0_size*sizeof(double));
271 » memset(W0_grad, 0, W0_size*sizeof(double)); 271 memset(W0_grad, 0, W0_size*sizeof(double));
272 » memset(W0_oldgrad, 0, W0_size*sizeof(double)); 272 memset(W0_oldgrad, 0, W0_size*sizeof(double));
273 » memcpy(W1_old, W1, W1_size*sizeof(double)); 273 memcpy(W1_old, W1, W1_size*sizeof(double));
274 » memcpy(W1_old2, W1, W1_size*sizeof(double)); 274 memcpy(W1_old2, W1, W1_size*sizeof(double));
275 » memset(W1_grad, 0, W1_size*sizeof(double)); 275 memset(W1_grad, 0, W1_size*sizeof(double));
276 » memset(W1_oldgrad, 0, W1_size*sizeof(double)); 276 memset(W1_oldgrad, 0, W1_size*sizeof(double));
277 » 277
278 » rate /= nbSamples; 278 rate /= nbSamples;
279 » for (i=0;i<hiddenDim;i++) 279 for (i=0;i<hiddenDim;i++)
280 » » for (j=0;j<inDim+1;j++) 280 for (j=0;j<inDim+1;j++)
281 » » » W0_rate[i*(inDim+1)+j] = rate*net->in_rate[j]; 281 W0_rate[i*(inDim+1)+j] = rate*net->in_rate[j];
282 » for (i=0;i<W1_size;i++) 282 for (i=0;i<W1_size;i++)
283 » » W1_rate[i] = rate; 283 W1_rate[i] = rate;
284 » 284
285 » for (i=0;i<NB_THREADS;i++) 285 for (i=0;i<NB_THREADS;i++)
286 » { 286 {
287 » » args[i].net = net; 287 args[i].net = net;
288 » » args[i].inputs = inputs+i*samplePerPart*inDim; 288 args[i].inputs = inputs+i*samplePerPart*inDim;
289 » » args[i].outputs = outputs+i*samplePerPart*outDim; 289 args[i].outputs = outputs+i*samplePerPart*outDim;
290 » » args[i].nbSamples = samplePerPart; 290 args[i].nbSamples = samplePerPart;
291 » » args[i].id = i; 291 args[i].id = i;
292 » » args[i].done = 0; 292 args[i].done = 0;
293 » » sem_init(&sem_begin[i], 0, 0); 293 sem_init(&sem_begin[i], 0, 0);
294 » » sem_init(&sem_end[i], 0, 0); 294 sem_init(&sem_end[i], 0, 0);
295 » » pthread_create(&thread[i], NULL, gradient_thread_process, &args[ i]); 295 pthread_create(&thread[i], NULL, gradient_thread_process, &args[i]);
296 » } 296 }
297 » for (e=0;e<nbEpoch;e++) 297 for (e=0;e<nbEpoch;e++)
298 » { 298 {
299 » » double rms=0; 299 double rms=0;
300 » » double error_rate[2] = {0,0}; 300 double error_rate[2] = {0,0};
301 » » for (i=0;i<NB_THREADS;i++) 301 for (i=0;i<NB_THREADS;i++)
302 » » { 302 {
303 » » » sem_post(&sem_begin[i]); 303 sem_post(&sem_begin[i]);
304 » » } 304 }
305 » » memset(W0_grad, 0, W0_size*sizeof(double)); 305 memset(W0_grad, 0, W0_size*sizeof(double));
306 » » memset(W1_grad, 0, W1_size*sizeof(double)); 306 memset(W1_grad, 0, W1_size*sizeof(double));
307 » » for (i=0;i<NB_THREADS;i++) 307 for (i=0;i<NB_THREADS;i++)
308 » » { 308 {
309 » » » sem_wait(&sem_end[i]); 309 sem_wait(&sem_end[i]);
310 » » » rms += args[i].rms; 310 rms += args[i].rms;
311 » » » error_rate[0] += args[i].error_rate[0]; 311 error_rate[0] += args[i].error_rate[0];
312 error_rate[1] += args[i].error_rate[1]; 312 error_rate[1] += args[i].error_rate[1];
313 » » » for (j=0;j<W0_size;j++) 313 for (j=0;j<W0_size;j++)
314 » » » » W0_grad[j] += args[i].W0_grad[j]; 314 W0_grad[j] += args[i].W0_grad[j];
315 » » » for (j=0;j<W1_size;j++) 315 for (j=0;j<W1_size;j++)
316 » » » » W1_grad[j] += args[i].W1_grad[j]; 316 W1_grad[j] += args[i].W1_grad[j];
317 » » } 317 }
318 318
319 » » float mean_rate = 0, min_rate = 1e10; 319 float mean_rate = 0, min_rate = 1e10;
320 » » rms = (rms/(outDim*nbSamples)); 320 rms = (rms/(outDim*nbSamples));
321 » » error_rate[0] = (error_rate[0]/(nbSamples)); 321 error_rate[0] = (error_rate[0]/(nbSamples));
322 error_rate[1] = (error_rate[1]/(nbSamples)); 322 error_rate[1] = (error_rate[1]/(nbSamples));
323 » » fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, best_rms); 323 fprintf (stderr, "%f %f (%f %f) ", error_rate[0], error_rate[1], rms, be st_rms);
324 » » if (rms < best_rms) 324 if (rms < best_rms)
325 » » { 325 {
326 » » » best_rms = rms; 326 best_rms = rms;
327 » » » for (i=0;i<W0_size;i++) 327 for (i=0;i<W0_size;i++)
328 » » » { 328 {
329 » » » » best_W0[i] = W0[i]; 329 best_W0[i] = W0[i];
330 » » » » best_W0_rate[i] = W0_rate[i]; 330 best_W0_rate[i] = W0_rate[i];
331 » » » } 331 }
332 » » » for (i=0;i<W1_size;i++) 332 for (i=0;i<W1_size;i++)
333 » » » { 333 {
334 » » » » best_W1[i] = W1[i]; 334 best_W1[i] = W1[i];
335 » » » » best_W1_rate[i] = W1_rate[i]; 335 best_W1_rate[i] = W1_rate[i];
336 » » » } 336 }
337 » » » count_worse=0; 337 count_worse=0;
338 » » » count_retries=0; 338 count_retries=0;
339 » » } else { 339 } else {
340 » » » count_worse++; 340 count_worse++;
341 » » » if (count_worse>30) 341 if (count_worse>30)
342 » » » { 342 {
343 » » » count_retries++; 343 count_retries++;
344 » » » » count_worse=0; 344 count_worse=0;
345 » » » » for (i=0;i<W0_size;i++) 345 for (i=0;i<W0_size;i++)
346 » » » » { 346 {
347 » » » » » W0[i] = best_W0[i]; 347 W0[i] = best_W0[i];
348 » » » » » best_W0_rate[i] *= .7; 348 best_W0_rate[i] *= .7;
349 » » » » » if (best_W0_rate[i]<1e-15) best_W0_rate[ i]=1e-15; 349 if (best_W0_rate[i]<1e-15) best_W0_rate[i]=1e-15;
350 » » » » » W0_rate[i] = best_W0_rate[i]; 350 W0_rate[i] = best_W0_rate[i];
351 » » » » » W0_grad[i] = 0; 351 W0_grad[i] = 0;
352 » » » » } 352 }
353 » » » » for (i=0;i<W1_size;i++) 353 for (i=0;i<W1_size;i++)
354 » » » » { 354 {
355 » » » » » W1[i] = best_W1[i]; 355 W1[i] = best_W1[i];
356 » » » » » best_W1_rate[i] *= .8; 356 best_W1_rate[i] *= .8;
357 » » » » » if (best_W1_rate[i]<1e-15) best_W1_rate[ i]=1e-15; 357 if (best_W1_rate[i]<1e-15) best_W1_rate[i]=1e-15;
358 » » » » » W1_rate[i] = best_W1_rate[i]; 358 W1_rate[i] = best_W1_rate[i];
359 » » » » » W1_grad[i] = 0; 359 W1_grad[i] = 0;
360 » » » » } 360 }
361 » » » } 361 }
362 » » } 362 }
363 » » if (count_retries>10) 363 if (count_retries>10)
364 » » break; 364 break;
365 » » for (i=0;i<W0_size;i++) 365 for (i=0;i<W0_size;i++)
366 » » { 366 {
367 » » » if (W0_oldgrad[i]*W0_grad[i] > 0) 367 if (W0_oldgrad[i]*W0_grad[i] > 0)
368 » » » » W0_rate[i] *= 1.01; 368 W0_rate[i] *= 1.01;
369 » » » else if (W0_oldgrad[i]*W0_grad[i] < 0) 369 else if (W0_oldgrad[i]*W0_grad[i] < 0)
370 » » » » W0_rate[i] *= .9; 370 W0_rate[i] *= .9;
371 » » » mean_rate += W0_rate[i]; 371 mean_rate += W0_rate[i];
372 » » » if (W0_rate[i] < min_rate) 372 if (W0_rate[i] < min_rate)
373 » » » » min_rate = W0_rate[i]; 373 min_rate = W0_rate[i];
374 » » » if (W0_rate[i] < 1e-15) 374 if (W0_rate[i] < 1e-15)
375 » » » » W0_rate[i] = 1e-15; 375 W0_rate[i] = 1e-15;
376 » » » /*if (W0_rate[i] > .01) 376 /*if (W0_rate[i] > .01)
377 » » » » W0_rate[i] = .01;*/ 377 W0_rate[i] = .01;*/
378 » » » W0_oldgrad[i] = W0_grad[i]; 378 W0_oldgrad[i] = W0_grad[i];
379 » » » W0_old2[i] = W0_old[i]; 379 W0_old2[i] = W0_old[i];
380 » » » W0_old[i] = W0[i]; 380 W0_old[i] = W0[i];
381 » » » W0[i] += W0_grad[i]*W0_rate[i]; 381 W0[i] += W0_grad[i]*W0_rate[i];
382 » » } 382 }
383 » » for (i=0;i<W1_size;i++) 383 for (i=0;i<W1_size;i++)
384 » » { 384 {
385 » » » if (W1_oldgrad[i]*W1_grad[i] > 0) 385 if (W1_oldgrad[i]*W1_grad[i] > 0)
386 » » » » W1_rate[i] *= 1.01; 386 W1_rate[i] *= 1.01;
387 » » » else if (W1_oldgrad[i]*W1_grad[i] < 0) 387 else if (W1_oldgrad[i]*W1_grad[i] < 0)
388 » » » » W1_rate[i] *= .9; 388 W1_rate[i] *= .9;
389 » » » mean_rate += W1_rate[i]; 389 mean_rate += W1_rate[i];
390 » » » if (W1_rate[i] < min_rate) 390 if (W1_rate[i] < min_rate)
391 » » » » min_rate = W1_rate[i]; 391 min_rate = W1_rate[i];
392 » » » if (W1_rate[i] < 1e-15) 392 if (W1_rate[i] < 1e-15)
393 » » » » W1_rate[i] = 1e-15; 393 W1_rate[i] = 1e-15;
394 » » » W1_oldgrad[i] = W1_grad[i]; 394 W1_oldgrad[i] = W1_grad[i];
395 » » » W1_old2[i] = W1_old[i]; 395 W1_old2[i] = W1_old[i];
396 » » » W1_old[i] = W1[i]; 396 W1_old[i] = W1[i];
397 » » » W1[i] += W1_grad[i]*W1_rate[i]; 397 W1[i] += W1_grad[i]*W1_rate[i];
398 » » } 398 }
399 » » mean_rate /= (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2]; 399 mean_rate /= (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2];
400 » » fprintf (stderr, "%g %d", mean_rate, e); 400 fprintf (stderr, "%g %d", mean_rate, e);
401 » » if (count_retries) 401 if (count_retries)
402 » » fprintf(stderr, " %d", count_retries); 402 fprintf(stderr, " %d", count_retries);
403 » » fprintf(stderr, "\n"); 403 fprintf(stderr, "\n");
404 » » if (stopped) 404 if (stopped)
405 » » » break; 405 break;
406 » } 406 }
407 » for (i=0;i<NB_THREADS;i++) 407 for (i=0;i<NB_THREADS;i++)
408 » { 408 {
409 » » args[i].done = 1; 409 args[i].done = 1;
410 » » sem_post(&sem_begin[i]); 410 sem_post(&sem_begin[i]);
411 » » pthread_join(thread[i], NULL); 411 pthread_join(thread[i], NULL);
412 » » fprintf (stderr, "joined %d\n", i); 412 fprintf (stderr, "joined %d\n", i);
413 » } 413 }
414 » free(W0_old); 414 free(W0_old);
415 » free(W1_old); 415 free(W1_old);
416 » free(W0_grad); 416 free(W0_grad);
417 » free(W1_grad); 417 free(W1_grad);
418 » free(W0_rate); 418 free(W0_rate);
419 » free(W1_rate); 419 free(W1_rate);
420 » return best_rms; 420 return best_rms;
421 } 421 }
422 422
423 int main(int argc, char **argv) 423 int main(int argc, char **argv)
424 { 424 {
425 » int i, j; 425 int i, j;
426 » int nbInputs; 426 int nbInputs;
427 » int nbOutputs; 427 int nbOutputs;
428 » int nbHidden; 428 int nbHidden;
429 » int nbSamples; 429 int nbSamples;
430 » int nbEpoch; 430 int nbEpoch;
431 » int nbRealInputs; 431 int nbRealInputs;
432 » unsigned int seed; 432 unsigned int seed;
433 » int ret; 433 int ret;
434 » float rms; 434 float rms;
435 » float *inputs; 435 float *inputs;
436 » float *outputs; 436 float *outputs;
437 » if (argc!=6) 437 if (argc!=6)
438 » { 438 {
439 » » fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> < nb samples> <nb epoch>\n"); 439 fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> <nb sampl es> <nb epoch>\n");
440 » » return 1; 440 return 1;
441 » } 441 }
442 » nbInputs = atoi(argv[1]); 442 nbInputs = atoi(argv[1]);
443 » nbHidden = atoi(argv[2]); 443 nbHidden = atoi(argv[2]);
444 » nbOutputs = atoi(argv[3]); 444 nbOutputs = atoi(argv[3]);
445 » nbSamples = atoi(argv[4]); 445 nbSamples = atoi(argv[4]);
446 » nbEpoch = atoi(argv[5]); 446 nbEpoch = atoi(argv[5]);
447 » nbRealInputs = nbInputs; 447 nbRealInputs = nbInputs;
448 » inputs = malloc(nbInputs*nbSamples*sizeof(*inputs)); 448 inputs = malloc(nbInputs*nbSamples*sizeof(*inputs));
449 » outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs)); 449 outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
450 » 450
451 » seed = time(NULL); 451 seed = time(NULL);
452 /*seed = 1361480659;*/ 452 /*seed = 1361480659;*/
453 » fprintf (stderr, "Seed is %u\n", seed); 453 fprintf (stderr, "Seed is %u\n", seed);
454 » srand(seed); 454 srand(seed);
455 » build_tansig_table(); 455 build_tansig_table();
456 » signal(SIGTERM, handler); 456 signal(SIGTERM, handler);
457 » signal(SIGINT, handler); 457 signal(SIGINT, handler);
458 » signal(SIGHUP, handler); 458 signal(SIGHUP, handler);
459 » for (i=0;i<nbSamples;i++) 459 for (i=0;i<nbSamples;i++)
460 » { 460 {
461 » » for (j=0;j<nbRealInputs;j++) 461 for (j=0;j<nbRealInputs;j++)
462 » » » ret = scanf(" %f", &inputs[i*nbInputs+j]); 462 ret = scanf(" %f", &inputs[i*nbInputs+j]);
463 » » for (j=0;j<nbOutputs;j++) 463 for (j=0;j<nbOutputs;j++)
464 » » » ret = scanf(" %f", &outputs[i*nbOutputs+j]); 464 ret = scanf(" %f", &outputs[i*nbOutputs+j]);
465 » » if (feof(stdin)) 465 if (feof(stdin))
466 » » { 466 {
467 » » » nbSamples = i; 467 nbSamples = i;
468 » » » break; 468 break;
469 » » } 469 }
470 » } 470 }
471 » int topo[3] = {nbInputs, nbHidden, nbOutputs}; 471 int topo[3] = {nbInputs, nbHidden, nbOutputs};
472 » MLPTrain *net; 472 MLPTrain *net;
473 473
474 » fprintf (stderr, "Got %d samples\n", nbSamples); 474 fprintf (stderr, "Got %d samples\n", nbSamples);
475 » net = mlp_init(topo, 3, inputs, outputs, nbSamples); 475 net = mlp_init(topo, 3, inputs, outputs, nbSamples);
476 » rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1); 476 rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1);
477 » printf ("#include \"mlp.h\"\n\n"); 477 printf ("#include \"mlp.h\"\n\n");
478 » printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed); 478 printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed);
479 » printf ("static const float weights[%d] = {\n", (topo[0]+1)*topo[1] + (t opo[1]+1)*topo[2]); 479 printf ("static const float weights[%d] = {\n", (topo[0]+1)*topo[1] + (topo[ 1]+1)*topo[2]);
480 » printf ("\n/* hidden layer */\n"); 480 printf ("\n/* hidden layer */\n");
481 » for (i=0;i<(topo[0]+1)*topo[1];i++) 481 for (i=0;i<(topo[0]+1)*topo[1];i++)
482 » { 482 {
483 » » printf ("%gf, ", net->weights[0][i]); 483 printf ("%gf, ", net->weights[0][i]);
484 » » if (i%5==4) 484 if (i%5==4)
485 » » » printf("\n"); 485 printf("\n");
486 » } 486 }
487 » printf ("\n/* output layer */\n"); 487 printf ("\n/* output layer */\n");
488 » for (i=0;i<(topo[1]+1)*topo[2];i++) 488 for (i=0;i<(topo[1]+1)*topo[2];i++)
489 » { 489 {
490 » » printf ("%g, ", net->weights[1][i]); 490 printf ("%g, ", net->weights[1][i]);
491 » » if (i%5==4) 491 if (i%5==4)
492 » » » printf("\n"); 492 printf("\n");
493 » } 493 }
494 » printf ("};\n\n"); 494 printf ("};\n\n");
495 » printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1] , topo[2]); 495 printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1], to po[2]);
496 » printf ("const MLP net = {\n"); 496 printf ("const MLP net = {\n");
497 » printf ("\t3,\n"); 497 printf ("\t3,\n");
498 » printf ("\ttopo,\n"); 498 printf ("\ttopo,\n");
499 » printf ("\tweights\n};\n"); 499 printf ("\tweights\n};\n");
500 » return 0; 500 return 0;
501 } 501 }
OLDNEW
« no previous file with comments | « src/mlp_train.h ('k') | src/opus.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698