OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include <math.h> |
| 12 #include <stdio.h> |
| 13 #include <stdlib.h> |
| 14 #include <sys/resource.h> |
| 15 #include <sys/time.h> |
| 16 #include <unistd.h> |
| 17 |
| 18 #include "dl/sp/api/armSP.h" |
| 19 #include "dl/sp/api/omxSP.h" |
| 20 #include "dl/sp/src/test/aligned_ptr.h" |
| 21 #include "dl/sp/src/test/gensig.h" |
| 22 |
| 23 #define MAX_FFT_ORDER TWIDDLE_TABLE_ORDER |
| 24 #define MAX_FFT_ORDER_FIXED_POINT 12 |
| 25 |
| 26 void TimeOneFloatFFT(int count, int fft_log_size, float signal_value, |
| 27 int signal_type); |
| 28 void TimeFloatFFT(int count, float signal_value, int signal_type); |
| 29 void TimeOneFloatRFFT(int count, int fft_log_size, float signal_value, |
| 30 int signal_type); |
| 31 void TimeFloatRFFT(int count, float signal_value, int signal_type); |
| 32 void TimeOneSC32FFT(int count, int fft_log_size, float signal_value, |
| 33 int signal_type); |
| 34 void TimeSC32FFT(int count, float signal_value, int signal_type); |
| 35 void TimeOneRFFT16(int count, int fft_log_size, float signal_value, |
| 36 int signal_type); |
| 37 void TimeRFFT16(int count, float signal_value, int signal_type); |
| 38 void TimeOneRFFT32(int count, int fft_log_size, float signal_value, |
| 39 int signal_type); |
| 40 void TimeRFFT32(int count, float signal_value, int signal_type); |
| 41 |
| 42 static int verbose = 1; |
| 43 static int include_conversion = 0; |
| 44 static int adapt_count = 1; |
| 45 static int do_forward_test = 1; |
| 46 static int do_inverse_test = 1; |
| 47 static int min_fft_order = 2; |
| 48 static int max_fft_order = MAX_FFT_ORDER; |
| 49 |
| 50 void TimeFFTUsage(const char* prog) { |
| 51 fprintf(stderr, |
| 52 "%s: [-hTFICA] [-f fft] [-c count] [-n logsize] [-s scale]\n" |
| 53 " [-g signal-type] [-S signal value]\n" |
| 54 " [-m minFFTsize] [-M maxFFTsize]\n", |
| 55 ProgramName(prog)); |
| 56 fprintf(stderr, |
| 57 "Simple FFT timing tests\n" |
| 58 " -h This help\n" |
| 59 " -v level Verbose output level (default = 1)\n" |
| 60 " -F Skip forward FFT tests\n" |
| 61 " -I Skip inverse FFT tests\n" |
| 62 " -C Include float-to-fixed and fixed-to-float cost for" |
| 63 " real\n" |
| 64 " 16-bit FFT (forward and inverse)\n" |
| 65 " -c count Number of FFTs to compute for timing. This is a" |
| 66 " lower\n" |
| 67 " lower limit; shorter FFTs will do more FFTs such" |
| 68 " that the\n" |
| 69 " elapsed time is very roughly constant, if -A is" |
| 70 " not given.\n" |
| 71 " -A Don't adapt the count given by -c; use specified" |
| 72 " value\n" |
| 73 " -m min Mininum FFT order to test\n" |
| 74 " -M max Maximum FFT order to test\n" |
| 75 " -T Run just one FFT timing test\n" |
| 76 " -f FFT type:\n" |
| 77 " 0 - Complex Float\n" |
| 78 " 1 - Real Float\n" |
| 79 " 2 - Complex 32-bit\n" |
| 80 " 3 - Real 16-bit\n" |
| 81 " 4 - Real 32-bit\n" |
| 82 " -n logsize Log2 of FFT size\n" |
| 83 " -s scale Scale factor for forward FFT (default = 0)\n" |
| 84 " -S signal Base value for the test signal (default = 1024)\n" |
| 85 " -g type Input signal type:\n" |
| 86 " 0 - Constant signal S + i*S. (Default value.)\n" |
| 87 " 1 - Real ramp starting at S/N, N = FFT size\n" |
| 88 " 2 - Sine wave of amplitude S\n" |
| 89 " 3 - Complex signal whose transform is a sine wave.\n" |
| 90 "\n" |
| 91 "Use -v 0 in combination with -F or -I to get output that can\n" |
| 92 "be pasted into a spreadsheet.\n" |
| 93 "\n" |
| 94 "Most of the options listed after -T above are only applicable\n" |
| 95 "when -T is given to test just one FFT size and FFT type.\n" |
| 96 "\n"); |
| 97 exit(0); |
| 98 } |
| 99 |
| 100 void main(int argc, char* argv[]) { |
| 101 int fft_log_size = 4; |
| 102 float signal_value = 1024; |
| 103 int signal_type = 0; |
| 104 int test_mode = 1; |
| 105 int count = 100; |
| 106 int fft_type = 0; |
| 107 int fft_type_given = 0; |
| 108 |
| 109 int opt; |
| 110 |
| 111 while ((opt = getopt(argc, argv, "hTFICAc:n:s:S:g:v:f:m:M:")) != -1) { |
| 112 switch (opt) { |
| 113 case 'h': |
| 114 TimeFFTUsage(argv[0]); |
| 115 break; |
| 116 case 'T': |
| 117 test_mode = 0; |
| 118 break; |
| 119 case 'C': |
| 120 include_conversion = 1; |
| 121 break; |
| 122 case 'F': |
| 123 do_forward_test = 0; |
| 124 break; |
| 125 case 'I': |
| 126 do_inverse_test = 0; |
| 127 break; |
| 128 case 'A': |
| 129 adapt_count = 0; |
| 130 break; |
| 131 case 'c': |
| 132 count = atoi(optarg); |
| 133 break; |
| 134 case 'n': |
| 135 fft_log_size = atoi(optarg); |
| 136 break; |
| 137 case 'S': |
| 138 signal_value = atof(optarg); |
| 139 break; |
| 140 case 'g': |
| 141 signal_type = atoi(optarg); |
| 142 break; |
| 143 case 'v': |
| 144 verbose = atoi(optarg); |
| 145 break; |
| 146 case 'f': |
| 147 fft_type = atoi(optarg); |
| 148 fft_type_given = 1; |
| 149 break; |
| 150 case 'm': |
| 151 min_fft_order = atoi(optarg); |
| 152 if (min_fft_order <= 2) { |
| 153 fprintf(stderr, "Setting min FFT order to 2 (from %d)\n", |
| 154 min_fft_order); |
| 155 min_fft_order = 2; |
| 156 } |
| 157 break; |
| 158 case 'M': |
| 159 max_fft_order = atoi(optarg); |
| 160 if (max_fft_order > MAX_FFT_ORDER) { |
| 161 fprintf(stderr, "Setting max FFT order to %d (from %d)\n", |
| 162 MAX_FFT_ORDER, max_fft_order); |
| 163 max_fft_order = MAX_FFT_ORDER; |
| 164 } |
| 165 break; |
| 166 default: |
| 167 TimeFFTUsage(argv[0]); |
| 168 break; |
| 169 } |
| 170 } |
| 171 |
| 172 if (test_mode && fft_type_given) |
| 173 printf("Warning: -f ignored when -T not specified\n"); |
| 174 |
| 175 if (test_mode) { |
| 176 TimeFloatFFT(count, signal_value, signal_type); |
| 177 TimeFloatRFFT(count, signal_value, signal_type); |
| 178 TimeSC32FFT(count, signal_value, signal_type); |
| 179 TimeRFFT16(count, signal_value, signal_type); |
| 180 TimeRFFT32(count, signal_value, signal_type); |
| 181 } else { |
| 182 switch (fft_type) { |
| 183 case 0: |
| 184 TimeOneFloatFFT(count, fft_log_size, signal_value, signal_type); |
| 185 break; |
| 186 case 1: |
| 187 TimeOneFloatRFFT(count, fft_log_size, signal_value, signal_type); |
| 188 break; |
| 189 case 2: |
| 190 TimeOneSC32FFT(count, fft_log_size, signal_value, signal_type); |
| 191 break; |
| 192 case 3: |
| 193 TimeOneRFFT16(count, fft_log_size, signal_value, signal_type); |
| 194 break; |
| 195 case 4: |
| 196 TimeOneRFFT32(count, fft_log_size, signal_value, signal_type); |
| 197 break; |
| 198 default: |
| 199 fprintf(stderr, "Unknown FFT type: %d\n", fft_type); |
| 200 break; |
| 201 } |
| 202 } |
| 203 } |
| 204 |
| 205 void GetUserTime(struct timeval* time) { |
| 206 struct rusage usage; |
| 207 getrusage(RUSAGE_SELF, &usage); |
| 208 memcpy(time, &usage.ru_utime, sizeof(*time)); |
| 209 } |
| 210 |
| 211 double TimeDifference(const struct timeval * start, |
| 212 const struct timeval * end) { |
| 213 double start_time; |
| 214 double end_time; |
| 215 start_time = start->tv_sec + start->tv_usec * 1e-6; |
| 216 end_time = end->tv_sec + end->tv_usec * 1e-6; |
| 217 |
| 218 return end_time - start_time; |
| 219 } |
| 220 |
| 221 void PrintResult(const char* prefix, int fft_log_size, double elapsed_time, |
| 222 int count) { |
| 223 if (verbose == 0) { |
| 224 printf("%2d\t%8.4f\t%8d\t%.4e\n", |
| 225 fft_log_size, elapsed_time, count, 1000 * elapsed_time / count); |
| 226 } else { |
| 227 printf("%-18s: order %2d: %8.4f sec for %8d FFTs: %.4e msec/FFT\n", |
| 228 prefix, fft_log_size, elapsed_time, count, |
| 229 1000 * elapsed_time / count); |
| 230 } |
| 231 } |
| 232 |
| 233 int ComputeCount(int nominal_count, int fft_log_size) { |
| 234 /* |
| 235 * Try to figure out how many repetitions to do for a given FFT |
| 236 * order (fft_log_size) given that we want a repetition of |
| 237 * nominal_count for order 15 FFTs to be the approsimate amount of |
| 238 * time we want to for all tests. |
| 239 */ |
| 240 |
| 241 int count; |
| 242 if (adapt_count) { |
| 243 double maxTime = ((double) nominal_count) * (1 << MAX_FFT_ORDER) |
| 244 * MAX_FFT_ORDER; |
| 245 double c = maxTime / ((1 << fft_log_size) * fft_log_size); |
| 246 const int max_count = 10000000; |
| 247 |
| 248 count = (c > max_count) ? max_count : c; |
| 249 } else { |
| 250 count = nominal_count; |
| 251 } |
| 252 |
| 253 return count; |
| 254 } |
| 255 |
| 256 void TimeOneFloatFFT(int count, int fft_log_size, float signal_value, |
| 257 int signal_type) { |
| 258 struct AlignedPtr* x_aligned; |
| 259 struct AlignedPtr* y_aligned; |
| 260 struct AlignedPtr* z_aligned; |
| 261 |
| 262 struct ComplexFloat* x; |
| 263 struct ComplexFloat* y; |
| 264 OMX_FC32* z; |
| 265 |
| 266 struct ComplexFloat* y_true; |
| 267 |
| 268 OMX_INT n, fft_spec_buffer_size; |
| 269 OMXFFTSpec_C_FC32 * fft_fwd_spec = NULL; |
| 270 OMXFFTSpec_C_FC32 * fft_inv_spec = NULL; |
| 271 int fft_size; |
| 272 struct timeval start_time; |
| 273 struct timeval end_time; |
| 274 double elapsed_time; |
| 275 |
| 276 fft_size = 1 << fft_log_size; |
| 277 |
| 278 x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); |
| 279 y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2)); |
| 280 z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); |
| 281 |
| 282 y_true = (struct ComplexFloat*) malloc(sizeof(*y_true) * fft_size); |
| 283 |
| 284 x = x_aligned->aligned_pointer_; |
| 285 y = y_aligned->aligned_pointer_; |
| 286 z = z_aligned->aligned_pointer_; |
| 287 |
| 288 GenerateTestSignalAndFFT(x, y_true, fft_size, signal_type, signal_value, 0); |
| 289 |
| 290 omxSP_FFTGetBufSize_C_FC32(fft_log_size, &fft_spec_buffer_size); |
| 291 |
| 292 fft_fwd_spec = (OMXFFTSpec_C_FC32*) malloc(fft_spec_buffer_size); |
| 293 fft_inv_spec = (OMXFFTSpec_C_FC32*) malloc(fft_spec_buffer_size); |
| 294 omxSP_FFTInit_C_FC32(fft_fwd_spec, fft_log_size); |
| 295 omxSP_FFTInit_C_FC32(fft_inv_spec, fft_log_size); |
| 296 |
| 297 if (do_forward_test) { |
| 298 GetUserTime(&start_time); |
| 299 for (n = 0; n < count; ++n) { |
| 300 omxSP_FFTFwd_CToC_FC32_Sfs(x, y, fft_fwd_spec); |
| 301 } |
| 302 GetUserTime(&end_time); |
| 303 |
| 304 elapsed_time = TimeDifference(&start_time, &end_time); |
| 305 |
| 306 PrintResult("Forward Float FFT", fft_log_size, elapsed_time, count); |
| 307 } |
| 308 |
| 309 if (do_inverse_test) { |
| 310 GetUserTime(&start_time); |
| 311 for (n = 0; n < count; ++n) { |
| 312 omxSP_FFTInv_CToC_FC32_Sfs(y, z, fft_inv_spec); |
| 313 } |
| 314 GetUserTime(&end_time); |
| 315 |
| 316 elapsed_time = TimeDifference(&start_time, &end_time); |
| 317 |
| 318 PrintResult("Inverse Float FFT", fft_log_size, elapsed_time, count); |
| 319 } |
| 320 |
| 321 FreeAlignedPointer(x_aligned); |
| 322 FreeAlignedPointer(y_aligned); |
| 323 FreeAlignedPointer(z_aligned); |
| 324 free(y_true); |
| 325 free(fft_fwd_spec); |
| 326 free(fft_inv_spec); |
| 327 } |
| 328 |
| 329 void TimeFloatFFT(int count, float signal_value, int signal_type) { |
| 330 int k; |
| 331 |
| 332 if (verbose == 0) |
| 333 printf("Float FFT\n"); |
| 334 |
| 335 for (k = min_fft_order; k <= max_fft_order; ++k) { |
| 336 int testCount = ComputeCount(count, k); |
| 337 TimeOneFloatFFT(testCount, k, signal_value, signal_type); |
| 338 } |
| 339 } |
| 340 |
| 341 void GenerateRealFloatSignal(OMX_F32* x, OMX_FC32* fft, int size, |
| 342 int signal_type, float signal_value) |
| 343 { |
| 344 int k; |
| 345 struct ComplexFloat *test_signal; |
| 346 struct ComplexFloat *true_fft; |
| 347 |
| 348 test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size); |
| 349 true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size); |
| 350 GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type, |
| 351 signal_value, 1); |
| 352 |
| 353 /* |
| 354 * Convert the complex result to what we want |
| 355 */ |
| 356 |
| 357 for (k = 0; k < size; ++k) { |
| 358 x[k] = test_signal[k].Re; |
| 359 } |
| 360 |
| 361 for (k = 0; k < size / 2 + 1; ++k) { |
| 362 fft[k].Re = true_fft[k].Re; |
| 363 fft[k].Im = true_fft[k].Im; |
| 364 } |
| 365 |
| 366 free(test_signal); |
| 367 free(true_fft); |
| 368 } |
| 369 |
| 370 void TimeOneFloatRFFT(int count, int fft_log_size, float signal_value, |
| 371 int signal_type) { |
| 372 OMX_F32* x; /* Source */ |
| 373 OMX_F32* y; /* Transform */ |
| 374 OMX_F32* z; /* Inverse transform */ |
| 375 |
| 376 OMX_F32* y_true; /* True FFT */ |
| 377 |
| 378 struct AlignedPtr* x_aligned; |
| 379 struct AlignedPtr* y_aligned; |
| 380 struct AlignedPtr* z_aligned; |
| 381 |
| 382 |
| 383 OMX_INT n, fft_spec_buffer_size; |
| 384 OMXResult status; |
| 385 OMXFFTSpec_R_F32 * fft_fwd_spec = NULL; |
| 386 OMXFFTSpec_R_F32 * fft_inv_spec = NULL; |
| 387 int fft_size; |
| 388 struct timeval start_time; |
| 389 struct timeval end_time; |
| 390 double elapsed_time; |
| 391 |
| 392 fft_size = 1 << fft_log_size; |
| 393 |
| 394 x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); |
| 395 /* The transformed value is in CCS format and is has fft_size + 2 values */ |
| 396 y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2)); |
| 397 z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); |
| 398 |
| 399 x = x_aligned->aligned_pointer_; |
| 400 y = y_aligned->aligned_pointer_; |
| 401 z = z_aligned->aligned_pointer_; |
| 402 |
| 403 y_true = (OMX_F32*) malloc(sizeof(*y_true) * (fft_size + 2)); |
| 404 |
| 405 GenerateRealFloatSignal(x, (OMX_FC32*) y_true, fft_size, signal_type, |
| 406 signal_value); |
| 407 |
| 408 status = omxSP_FFTGetBufSize_R_F32(fft_log_size, &fft_spec_buffer_size); |
| 409 |
| 410 fft_fwd_spec = (OMXFFTSpec_R_F32*) malloc(fft_spec_buffer_size); |
| 411 fft_inv_spec = (OMXFFTSpec_R_F32*) malloc(fft_spec_buffer_size); |
| 412 status = omxSP_FFTInit_R_F32(fft_fwd_spec, fft_log_size); |
| 413 |
| 414 status = omxSP_FFTInit_R_F32(fft_inv_spec, fft_log_size); |
| 415 |
| 416 if (do_forward_test) { |
| 417 GetUserTime(&start_time); |
| 418 for (n = 0; n < count; ++n) { |
| 419 omxSP_FFTFwd_RToCCS_F32_Sfs(x, y, fft_fwd_spec); |
| 420 } |
| 421 GetUserTime(&end_time); |
| 422 |
| 423 elapsed_time = TimeDifference(&start_time, &end_time); |
| 424 |
| 425 PrintResult("Forward Float RFFT", fft_log_size, elapsed_time, count); |
| 426 } |
| 427 |
| 428 if (do_inverse_test) { |
| 429 GetUserTime(&start_time); |
| 430 for (n = 0; n < count; ++n) { |
| 431 omxSP_FFTInv_CCSToR_F32_Sfs(y, z, fft_inv_spec); |
| 432 } |
| 433 GetUserTime(&end_time); |
| 434 |
| 435 elapsed_time = TimeDifference(&start_time, &end_time); |
| 436 |
| 437 PrintResult("Inverse Float RFFT", fft_log_size, elapsed_time, count); |
| 438 } |
| 439 |
| 440 FreeAlignedPointer(x_aligned); |
| 441 FreeAlignedPointer(y_aligned); |
| 442 FreeAlignedPointer(z_aligned); |
| 443 free(fft_fwd_spec); |
| 444 free(fft_inv_spec); |
| 445 } |
| 446 |
| 447 void TimeFloatRFFT(int count, float signal_value, int signal_type) { |
| 448 int k; |
| 449 |
| 450 if (verbose == 0) |
| 451 printf("Float RFFT\n"); |
| 452 |
| 453 for (k = min_fft_order; k <= max_fft_order; ++k) { |
| 454 int testCount = ComputeCount(count, k); |
| 455 TimeOneFloatRFFT(testCount, k, signal_value, signal_type); |
| 456 } |
| 457 } |
| 458 |
| 459 void generateSC32Signal(OMX_SC32* x, OMX_SC32* fft, int size, int signal_type, |
| 460 float signal_value) { |
| 461 int k; |
| 462 struct ComplexFloat *test_signal; |
| 463 struct ComplexFloat *true_fft; |
| 464 |
| 465 test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size); |
| 466 true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size); |
| 467 GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type, |
| 468 signal_value, 0); |
| 469 |
| 470 /* |
| 471 * Convert the complex result to what we want |
| 472 */ |
| 473 |
| 474 for (k = 0; k < size; ++k) { |
| 475 x[k].Re = 0.5 + test_signal[k].Re; |
| 476 x[k].Im = 0.5 + test_signal[k].Im; |
| 477 fft[k].Re = 0.5 + true_fft[k].Re; |
| 478 fft[k].Im = 0.5 + true_fft[k].Im; |
| 479 } |
| 480 |
| 481 free(test_signal); |
| 482 free(true_fft); |
| 483 } |
| 484 |
| 485 void TimeOneSC32FFT(int count, int fft_log_size, float signal_value, |
| 486 int signal_type) { |
| 487 OMX_SC32* x; |
| 488 OMX_SC32* y; |
| 489 OMX_SC32* z; |
| 490 |
| 491 struct AlignedPtr* x_aligned; |
| 492 struct AlignedPtr* y_aligned; |
| 493 struct AlignedPtr* z_aligned; |
| 494 |
| 495 OMX_SC32* y_true; |
| 496 OMX_SC32* temp32a; |
| 497 OMX_SC32* temp32b; |
| 498 |
| 499 OMX_INT n, fft_spec_buffer_size; |
| 500 OMXResult status; |
| 501 OMXFFTSpec_C_SC32 * fft_fwd_spec = NULL; |
| 502 OMXFFTSpec_C_SC32 * fft_inv_spec = NULL; |
| 503 int fft_size; |
| 504 struct timeval start_time; |
| 505 struct timeval end_time; |
| 506 double elapsed_time; |
| 507 |
| 508 fft_size = 1 << fft_log_size; |
| 509 |
| 510 x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); |
| 511 y_aligned = AllocAlignedPointer(32, sizeof(*y) * fft_size); |
| 512 z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); |
| 513 y_true = (OMX_SC32*) malloc(sizeof(*y_true) * fft_size); |
| 514 temp32a = (OMX_SC32*) malloc(sizeof(*temp32a) * fft_size); |
| 515 temp32b = (OMX_SC32*) malloc(sizeof(*temp32b) * fft_size); |
| 516 |
| 517 x = x_aligned->aligned_pointer_; |
| 518 y = y_aligned->aligned_pointer_; |
| 519 z = z_aligned->aligned_pointer_; |
| 520 |
| 521 generateSC32Signal(x, y_true, fft_size, signal_type, signal_value); |
| 522 |
| 523 status = omxSP_FFTGetBufSize_C_SC32(fft_log_size, &fft_spec_buffer_size); |
| 524 |
| 525 fft_fwd_spec = (OMXFFTSpec_C_SC32*) malloc(fft_spec_buffer_size); |
| 526 fft_inv_spec = (OMXFFTSpec_C_SC32*) malloc(fft_spec_buffer_size); |
| 527 status = omxSP_FFTInit_C_SC32(fft_fwd_spec, fft_log_size); |
| 528 |
| 529 status = omxSP_FFTInit_C_SC32(fft_inv_spec, fft_log_size); |
| 530 |
| 531 if (do_forward_test) { |
| 532 if (include_conversion) { |
| 533 int k; |
| 534 float factor = -1; |
| 535 |
| 536 GetUserTime(&start_time); |
| 537 for (k = 0; k < count; ++k) { |
| 538 for (n = 0; n < fft_size; ++n) { |
| 539 if (fabs(x[n].Re) > factor) { |
| 540 factor = fabs(x[n].Re); |
| 541 } |
| 542 if (fabs(x[n].Im) > factor) { |
| 543 factor = fabs(x[n].Im); |
| 544 } |
| 545 } |
| 546 |
| 547 factor = ((1 << 18) - 1) / factor; |
| 548 for (n = 0; n < fft_size; ++n) { |
| 549 temp32a[n].Re = factor * x[n].Re; |
| 550 temp32a[n].Im = factor * x[n].Im; |
| 551 } |
| 552 |
| 553 omxSP_FFTFwd_CToC_SC32_Sfs(x, y, fft_fwd_spec, 0); |
| 554 |
| 555 factor = 1 / factor; |
| 556 for (n = 0; n < fft_size; ++n) { |
| 557 temp32b[n].Re = y[n].Re * factor; |
| 558 temp32b[n].Im = y[n].Im * factor; |
| 559 } |
| 560 } |
| 561 GetUserTime(&end_time); |
| 562 } else { |
| 563 GetUserTime(&start_time); |
| 564 for (n = 0; n < count; ++n) { |
| 565 omxSP_FFTFwd_CToC_SC32_Sfs(x, y, fft_fwd_spec, 0); |
| 566 } |
| 567 GetUserTime(&end_time); |
| 568 } |
| 569 |
| 570 elapsed_time = TimeDifference(&start_time, &end_time); |
| 571 |
| 572 PrintResult("Forward SC32 FFT", fft_log_size, elapsed_time, count); |
| 573 } |
| 574 |
| 575 if (do_inverse_test) { |
| 576 if (include_conversion) { |
| 577 int k; |
| 578 float factor = -1; |
| 579 |
| 580 GetUserTime(&start_time); |
| 581 for (k = 0; k < count; ++k) { |
| 582 for (n = 0; n < fft_size; ++n) { |
| 583 if (fabs(x[n].Re) > factor) { |
| 584 factor = fabs(x[n].Re); |
| 585 } |
| 586 if (fabs(x[n].Im) > factor) { |
| 587 factor = fabs(x[n].Im); |
| 588 } |
| 589 } |
| 590 factor = ((1 << 18) - 1) / factor; |
| 591 for (n = 0; n < fft_size; ++n) { |
| 592 temp32a[n].Re = factor * x[n].Re; |
| 593 temp32a[n].Im = factor * x[n].Im; |
| 594 } |
| 595 |
| 596 status = omxSP_FFTInv_CToC_SC32_Sfs(y, z, fft_inv_spec, 0); |
| 597 |
| 598 factor = 1 / factor; |
| 599 for (n = 0; n < fft_size; ++n) { |
| 600 temp32b[n].Re = y[n].Re * factor; |
| 601 temp32b[n].Im = y[n].Im * factor; |
| 602 } |
| 603 } |
| 604 GetUserTime(&end_time); |
| 605 } else { |
| 606 GetUserTime(&start_time); |
| 607 for (n = 0; n < count; ++n) { |
| 608 status = omxSP_FFTInv_CToC_SC32_Sfs(y, z, fft_inv_spec, 0); |
| 609 } |
| 610 GetUserTime(&end_time); |
| 611 } |
| 612 |
| 613 elapsed_time = TimeDifference(&start_time, &end_time); |
| 614 |
| 615 PrintResult("Inverse SC32 FFT", fft_log_size, elapsed_time, count); |
| 616 } |
| 617 |
| 618 FreeAlignedPointer(x_aligned); |
| 619 FreeAlignedPointer(y_aligned); |
| 620 FreeAlignedPointer(z_aligned); |
| 621 free(temp32a); |
| 622 free(temp32b); |
| 623 free(fft_fwd_spec); |
| 624 free(fft_inv_spec); |
| 625 } |
| 626 |
| 627 void TimeSC32FFT(int count, float signal_value, int signal_type) { |
| 628 int k; |
| 629 int max_order = (max_fft_order > MAX_FFT_ORDER_FIXED_POINT) |
| 630 ? MAX_FFT_ORDER_FIXED_POINT : max_fft_order; |
| 631 |
| 632 if (verbose == 0) |
| 633 printf("SC32 FFT\n"); |
| 634 |
| 635 for (k = min_fft_order; k <= max_order; ++k) { |
| 636 int testCount = ComputeCount(count, k); |
| 637 TimeOneSC32FFT(testCount, k, signal_value, signal_type); |
| 638 } |
| 639 } |
| 640 |
| 641 void GenerateRFFT16Signal(OMX_S16* x, OMX_SC32* fft, int size, int signal_type, |
| 642 float signal_value) { |
| 643 int k; |
| 644 struct ComplexFloat *test_signal; |
| 645 struct ComplexFloat *true_fft; |
| 646 |
| 647 test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size); |
| 648 true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size); |
| 649 GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type, |
| 650 signal_value, 1); |
| 651 |
| 652 /* |
| 653 * Convert the complex result to what we want |
| 654 */ |
| 655 |
| 656 for (k = 0; k < size; ++k) { |
| 657 x[k] = test_signal[k].Re; |
| 658 } |
| 659 |
| 660 for (k = 0; k < size / 2 + 1; ++k) { |
| 661 fft[k].Re = true_fft[k].Re; |
| 662 fft[k].Im = true_fft[k].Im; |
| 663 } |
| 664 |
| 665 free(test_signal); |
| 666 free(true_fft); |
| 667 } |
| 668 |
| 669 void TimeOneRFFT16(int count, int fft_log_size, float signal_value, |
| 670 int signal_type) { |
| 671 OMX_S16* x; |
| 672 OMX_S32* y; |
| 673 OMX_S16* z; |
| 674 OMX_S32* y_true; |
| 675 OMX_F32* xr; |
| 676 OMX_F32* yrTrue; |
| 677 |
| 678 struct AlignedPtr* x_aligned; |
| 679 struct AlignedPtr* y_aligned; |
| 680 struct AlignedPtr* z_aligned; |
| 681 struct AlignedPtr* y_trueAligned; |
| 682 struct AlignedPtr* xr_aligned; |
| 683 struct AlignedPtr* yr_true_aligned; |
| 684 |
| 685 |
| 686 OMX_S16* temp16; |
| 687 OMX_S32* temp32; |
| 688 |
| 689 |
| 690 OMX_INT n, fft_spec_buffer_size; |
| 691 OMXResult status; |
| 692 OMXFFTSpec_R_S16S32 * fft_fwd_spec = NULL; |
| 693 OMXFFTSpec_R_S16S32 * fft_inv_spec = NULL; |
| 694 int fft_size; |
| 695 struct timeval start_time; |
| 696 struct timeval end_time; |
| 697 double elapsed_time; |
| 698 int scaleFactor; |
| 699 |
| 700 fft_size = 1 << fft_log_size; |
| 701 scaleFactor = fft_log_size; |
| 702 |
| 703 x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); |
| 704 y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2)); |
| 705 z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); |
| 706 |
| 707 y_trueAligned = AllocAlignedPointer(32, sizeof(*y_true) * (fft_size + 2)); |
| 708 |
| 709 xr_aligned = AllocAlignedPointer(32, sizeof(*xr) * fft_size); |
| 710 yr_true_aligned = AllocAlignedPointer(32, sizeof(*yrTrue) * (fft_size + 2)); |
| 711 |
| 712 x = x_aligned->aligned_pointer_; |
| 713 y = y_aligned->aligned_pointer_; |
| 714 z = z_aligned->aligned_pointer_; |
| 715 y_true = y_trueAligned->aligned_pointer_; |
| 716 xr = xr_aligned->aligned_pointer_; |
| 717 yrTrue = yr_true_aligned->aligned_pointer_; |
| 718 |
| 719 temp16 = (OMX_S16*) malloc(sizeof(*temp16) * fft_size); |
| 720 temp32 = (OMX_S32*) malloc(sizeof(*temp32) * fft_size); |
| 721 |
| 722 |
| 723 GenerateRFFT16Signal(x, (OMX_SC32*) y_true, fft_size, signal_type, |
| 724 signal_value); |
| 725 /* |
| 726 * Generate a real version so we can measure scaling costs |
| 727 */ |
| 728 GenerateRealFloatSignal(xr, (OMX_FC32*) yrTrue, fft_size, signal_type, |
| 729 signal_value); |
| 730 |
| 731 status = omxSP_FFTGetBufSize_R_S16S32(fft_log_size, &fft_spec_buffer_size); |
| 732 |
| 733 fft_fwd_spec = (OMXFFTSpec_R_S16S32*) malloc(fft_spec_buffer_size); |
| 734 fft_inv_spec = (OMXFFTSpec_R_S16S32*) malloc(fft_spec_buffer_size); |
| 735 status = omxSP_FFTInit_R_S16S32(fft_fwd_spec, fft_log_size); |
| 736 |
| 737 status = omxSP_FFTInit_R_S16S32(fft_inv_spec, fft_log_size); |
| 738 |
| 739 if (do_forward_test) { |
| 740 if (include_conversion) { |
| 741 int k; |
| 742 float factor = -1; |
| 743 |
| 744 GetUserTime(&start_time); |
| 745 for (k = 0; k < count; ++k) { |
| 746 /* |
| 747 * Spend some time computing the max of the signal, and then scaling it. |
| 748 */ |
| 749 for (n = 0; n < fft_size; ++n) { |
| 750 if (fabs(xr[n]) > factor) { |
| 751 factor = fabs(xr[n]); |
| 752 } |
| 753 } |
| 754 |
| 755 factor = 32767 / factor; |
| 756 for (n = 0; n < fft_size; ++n) { |
| 757 temp16[n] = factor * xr[n]; |
| 758 } |
| 759 |
| 760 status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec, |
| 761 (OMX_INT) scaleFactor); |
| 762 |
| 763 /* |
| 764 * Now spend some time converting the fixed-point FFT back to float. |
| 765 */ |
| 766 factor = 1 / factor; |
| 767 for (n = 0; n < fft_size + 2; ++n) { |
| 768 xr[n] = y[n] * factor; |
| 769 } |
| 770 } |
| 771 GetUserTime(&end_time); |
| 772 } else { |
| 773 float factor = -1; |
| 774 |
| 775 GetUserTime(&start_time); |
| 776 for (n = 0; n < count; ++n) { |
| 777 status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec, |
| 778 (OMX_INT) scaleFactor); |
| 779 } |
| 780 GetUserTime(&end_time); |
| 781 } |
| 782 |
| 783 elapsed_time = TimeDifference(&start_time, &end_time); |
| 784 |
| 785 PrintResult("Forward RFFT16", fft_log_size, elapsed_time, count); |
| 786 } |
| 787 |
| 788 if (do_inverse_test) { |
| 789 if (include_conversion) { |
| 790 int k; |
| 791 float factor = -1; |
| 792 |
| 793 GetUserTime(&start_time); |
| 794 for (k = 0; k < count; ++k) { |
| 795 /* |
| 796 * Spend some time scaling the FFT signal to fixed point. |
| 797 */ |
| 798 for (n = 0; n < fft_size; ++n) { |
| 799 if (fabs(yrTrue[n]) > factor) { |
| 800 factor = fabs(yrTrue[n]); |
| 801 } |
| 802 } |
| 803 for (n = 0; n < fft_size; ++n) { |
| 804 temp32[n] = factor * yrTrue[n]; |
| 805 } |
| 806 |
| 807 status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec, |
| 808 (OMX_INT) scaleFactor); |
| 809 |
| 810 /* |
| 811 * Spend some time converting the result back to float |
| 812 */ |
| 813 factor = 1 / factor; |
| 814 for (n = 0; n < fft_size; ++n) { |
| 815 xr[n] = factor * z[n]; |
| 816 } |
| 817 } |
| 818 GetUserTime(&end_time); |
| 819 } else { |
| 820 GetUserTime(&start_time); |
| 821 for (n = 0; n < count; ++n) { |
| 822 status = omxSP_FFTInv_CCSToR_S32S16_Sfs(y, z, fft_inv_spec, 0); |
| 823 } |
| 824 GetUserTime(&end_time); |
| 825 } |
| 826 |
| 827 elapsed_time = TimeDifference(&start_time, &end_time); |
| 828 |
| 829 PrintResult("Inverse RFFT16", fft_log_size, elapsed_time, count); |
| 830 } |
| 831 |
| 832 FreeAlignedPointer(x_aligned); |
| 833 FreeAlignedPointer(y_aligned); |
| 834 FreeAlignedPointer(z_aligned); |
| 835 FreeAlignedPointer(y_trueAligned); |
| 836 FreeAlignedPointer(xr_aligned); |
| 837 FreeAlignedPointer(yr_true_aligned); |
| 838 free(fft_fwd_spec); |
| 839 free(fft_inv_spec); |
| 840 } |
| 841 |
| 842 void TimeRFFT16(int count, float signal_value, int signal_type) { |
| 843 int k; |
| 844 int max_order = (max_fft_order > MAX_FFT_ORDER_FIXED_POINT) |
| 845 ? MAX_FFT_ORDER_FIXED_POINT : max_fft_order; |
| 846 |
| 847 if (verbose == 0) |
| 848 printf("RFFT16\n"); |
| 849 |
| 850 for (k = min_fft_order; k <= max_order; ++k) { |
| 851 int testCount = ComputeCount(count, k); |
| 852 TimeOneRFFT16(testCount, k, signal_value, signal_type); |
| 853 } |
| 854 } |
| 855 |
| 856 void GenerateRFFT32Signal(OMX_S32* x, OMX_SC32* fft, int size, int signal_type, |
| 857 float signal_value) { |
| 858 int k; |
| 859 struct ComplexFloat *test_signal; |
| 860 struct ComplexFloat *true_fft; |
| 861 |
| 862 test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size); |
| 863 true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size); |
| 864 GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type, |
| 865 signal_value, 1); |
| 866 |
| 867 /* |
| 868 * Convert the complex result to what we want |
| 869 */ |
| 870 |
| 871 for (k = 0; k < size; ++k) { |
| 872 x[k] = test_signal[k].Re; |
| 873 } |
| 874 |
| 875 for (k = 0; k < size / 2 + 1; ++k) { |
| 876 fft[k].Re = true_fft[k].Re; |
| 877 fft[k].Im = true_fft[k].Im; |
| 878 } |
| 879 |
| 880 free(test_signal); |
| 881 free(true_fft); |
| 882 } |
| 883 |
| 884 void TimeOneRFFT32(int count, int fft_log_size, float signal_value, |
| 885 int signal_type) { |
| 886 OMX_S32* x; |
| 887 OMX_S32* y; |
| 888 OMX_S32* z; |
| 889 OMX_S32* y_true; |
| 890 OMX_F32* xr; |
| 891 OMX_F32* yrTrue; |
| 892 |
| 893 struct AlignedPtr* x_aligned; |
| 894 struct AlignedPtr* y_aligned; |
| 895 struct AlignedPtr* z_aligned; |
| 896 struct AlignedPtr* y_true_aligned; |
| 897 |
| 898 OMX_S32* temp1; |
| 899 OMX_S32* temp2; |
| 900 |
| 901 OMX_INT n, fft_spec_buffer_size; |
| 902 OMXResult status; |
| 903 OMXFFTSpec_R_S16S32 * fft_fwd_spec = NULL; |
| 904 OMXFFTSpec_R_S16S32 * fft_inv_spec = NULL; |
| 905 int fft_size; |
| 906 struct timeval start_time; |
| 907 struct timeval end_time; |
| 908 double elapsed_time; |
| 909 int scaleFactor; |
| 910 |
| 911 fft_size = 1 << fft_log_size; |
| 912 |
| 913 x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size); |
| 914 y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2)); |
| 915 z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size); |
| 916 |
| 917 y_true_aligned = AllocAlignedPointer(32, sizeof(*y_true) * (fft_size + 2)); |
| 918 |
| 919 x = x_aligned->aligned_pointer_; |
| 920 y = y_aligned->aligned_pointer_; |
| 921 z = z_aligned->aligned_pointer_; |
| 922 y_true = y_true_aligned->aligned_pointer_; |
| 923 |
| 924 if (verbose > 3) { |
| 925 printf("x = %p\n", (void*)x); |
| 926 printf("y = %p\n", (void*)y); |
| 927 printf("z = %p\n", (void*)z); |
| 928 } |
| 929 |
| 930 xr = (OMX_F32*) malloc(sizeof(*x) * fft_size); |
| 931 yrTrue = (OMX_F32*) malloc(sizeof(*y) * (fft_size + 2)); |
| 932 temp1 = (OMX_S32*) malloc(sizeof(*temp1) * fft_size); |
| 933 temp2 = (OMX_S32*) malloc(sizeof(*temp2) * (fft_size + 2)); |
| 934 |
| 935 GenerateRFFT32Signal(x, (OMX_SC32*) y_true, fft_size, signal_type, |
| 936 signal_value); |
| 937 |
| 938 if (verbose > 63) { |
| 939 printf("Signal\n"); |
| 940 printf("n\tx[n]\n"); |
| 941 for (n = 0; n < fft_size; ++n) { |
| 942 printf("%4d\t%d\n", n, x[n]); |
| 943 } |
| 944 } |
| 945 |
| 946 status = omxSP_FFTGetBufSize_R_S32(fft_log_size, &fft_spec_buffer_size); |
| 947 if (verbose > 3) { |
| 948 printf("fft_spec_buffer_size = %d\n", fft_spec_buffer_size); |
| 949 } |
| 950 |
| 951 fft_fwd_spec = (OMXFFTSpec_R_S32*) malloc(fft_spec_buffer_size); |
| 952 fft_inv_spec = (OMXFFTSpec_R_S32*) malloc(fft_spec_buffer_size); |
| 953 status = omxSP_FFTInit_R_S32(fft_fwd_spec, fft_log_size); |
| 954 if (status) { |
| 955 printf("Failed to init forward FFT: status = %d\n", status); |
| 956 } |
| 957 |
| 958 status = omxSP_FFTInit_R_S32(fft_inv_spec, fft_log_size); |
| 959 if (status) { |
| 960 printf("Failed to init backward FFT: status = %d\n", status); |
| 961 } |
| 962 |
| 963 if (do_forward_test) { |
| 964 if (include_conversion) { |
| 965 int k; |
| 966 float factor = -1; |
| 967 |
| 968 GetUserTime(&start_time); |
| 969 for (k = 0; k < count; ++k) { |
| 970 /* |
| 971 * Spend some time computing the max of the signal, and then scaling it. |
| 972 */ |
| 973 for (n = 0; n < fft_size; ++n) { |
| 974 if (fabs(xr[n]) > factor) { |
| 975 factor = fabs(xr[n]); |
| 976 } |
| 977 } |
| 978 |
| 979 factor = (1 << 20) / factor; |
| 980 for (n = 0; n < fft_size; ++n) { |
| 981 temp1[n] = factor * xr[n]; |
| 982 } |
| 983 |
| 984 status = omxSP_FFTFwd_RToCCS_S32_Sfs(x, y, fft_fwd_spec, |
| 985 (OMX_INT) scaleFactor); |
| 986 |
| 987 /* |
| 988 * Now spend some time converting the fixed-point FFT back to float. |
| 989 */ |
| 990 factor = 1 / factor; |
| 991 for (n = 0; n < fft_size + 2; ++n) { |
| 992 xr[n] = y[n] * factor; |
| 993 } |
| 994 } |
| 995 GetUserTime(&end_time); |
| 996 } else { |
| 997 float factor = -1; |
| 998 |
| 999 GetUserTime(&start_time); |
| 1000 for (n = 0; n < count; ++n) { |
| 1001 status = omxSP_FFTFwd_RToCCS_S32_Sfs(x, y, fft_fwd_spec, |
| 1002 (OMX_INT) scaleFactor); |
| 1003 } |
| 1004 GetUserTime(&end_time); |
| 1005 } |
| 1006 |
| 1007 elapsed_time = TimeDifference(&start_time, &end_time); |
| 1008 |
| 1009 PrintResult("Forward RFFT32", fft_log_size, elapsed_time, count); |
| 1010 } |
| 1011 |
| 1012 if (do_inverse_test) { |
| 1013 if (include_conversion) { |
| 1014 int k; |
| 1015 float factor = -1; |
| 1016 |
| 1017 GetUserTime(&start_time); |
| 1018 for (k = 0; k < count; ++k) { |
| 1019 /* |
| 1020 * Spend some time scaling the FFT signal to fixed point. |
| 1021 */ |
| 1022 for (n = 0; n < fft_size + 2; ++n) { |
| 1023 if (fabs(yrTrue[n]) > factor) { |
| 1024 factor = fabs(yrTrue[n]); |
| 1025 } |
| 1026 } |
| 1027 for (n = 0; n < fft_size + 2; ++n) { |
| 1028 temp2[n] = factor * yrTrue[n]; |
| 1029 } |
| 1030 |
| 1031 status = omxSP_FFTInv_CCSToR_S32_Sfs(y, z, fft_inv_spec, 0); |
| 1032 |
| 1033 /* |
| 1034 * Spend some time converting the result back to float |
| 1035 */ |
| 1036 factor = 1 / factor; |
| 1037 for (n = 0; n < fft_size; ++n) { |
| 1038 xr[n] = factor * z[n]; |
| 1039 } |
| 1040 } |
| 1041 GetUserTime(&end_time); |
| 1042 } else { |
| 1043 GetUserTime(&start_time); |
| 1044 for (n = 0; n < count; ++n) { |
| 1045 status = omxSP_FFTInv_CCSToR_S32_Sfs(y, z, fft_inv_spec, 0); |
| 1046 } |
| 1047 GetUserTime(&end_time); |
| 1048 } |
| 1049 |
| 1050 elapsed_time = TimeDifference(&start_time, &end_time); |
| 1051 |
| 1052 PrintResult("Inverse RFFT32", fft_log_size, elapsed_time, count); |
| 1053 } |
| 1054 |
| 1055 FreeAlignedPointer(x_aligned); |
| 1056 FreeAlignedPointer(y_aligned); |
| 1057 FreeAlignedPointer(z_aligned); |
| 1058 FreeAlignedPointer(y_true_aligned); |
| 1059 free(fft_fwd_spec); |
| 1060 free(fft_inv_spec); |
| 1061 } |
| 1062 |
| 1063 void TimeRFFT32(int count, float signal_value, int signal_type) { |
| 1064 int k; |
| 1065 int max_order = (max_fft_order > MAX_FFT_ORDER_FIXED_POINT) |
| 1066 ? MAX_FFT_ORDER_FIXED_POINT : max_fft_order; |
| 1067 |
| 1068 if (verbose == 0) |
| 1069 printf("RFFT32\n"); |
| 1070 |
| 1071 for (k = min_fft_order; k <= max_order; ++k) { |
| 1072 int testCount = ComputeCount(count, k); |
| 1073 TimeOneRFFT32(testCount, k, signal_value, signal_type); |
| 1074 } |
| 1075 } |
OLD | NEW |