| Index: third_party/openmax_dl/dl/sp/src/test/test_fft_time.c
|
| diff --git a/third_party/openmax_dl/dl/sp/src/test/test_fft_time.c b/third_party/openmax_dl/dl/sp/src/test/test_fft_time.c
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..a401594b5137a3e19a96a895dc637e83af2fd10d
|
| --- /dev/null
|
| +++ b/third_party/openmax_dl/dl/sp/src/test/test_fft_time.c
|
| @@ -0,0 +1,1075 @@
|
| +/*
|
| + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
| + *
|
| + * Use of this source code is governed by a BSD-style license
|
| + * that can be found in the LICENSE file in the root of the source
|
| + * tree. An additional intellectual property rights grant can be found
|
| + * in the file PATENTS. All contributing project authors may
|
| + * be found in the AUTHORS file in the root of the source tree.
|
| + */
|
| +
|
| +#include <math.h>
|
| +#include <stdio.h>
|
| +#include <stdlib.h>
|
| +#include <sys/resource.h>
|
| +#include <sys/time.h>
|
| +#include <unistd.h>
|
| +
|
| +#include "dl/sp/api/armSP.h"
|
| +#include "dl/sp/api/omxSP.h"
|
| +#include "dl/sp/src/test/aligned_ptr.h"
|
| +#include "dl/sp/src/test/gensig.h"
|
| +
|
| +#define MAX_FFT_ORDER TWIDDLE_TABLE_ORDER
|
| +#define MAX_FFT_ORDER_FIXED_POINT 12
|
| +
|
| +void TimeOneFloatFFT(int count, int fft_log_size, float signal_value,
|
| + int signal_type);
|
| +void TimeFloatFFT(int count, float signal_value, int signal_type);
|
| +void TimeOneFloatRFFT(int count, int fft_log_size, float signal_value,
|
| + int signal_type);
|
| +void TimeFloatRFFT(int count, float signal_value, int signal_type);
|
| +void TimeOneSC32FFT(int count, int fft_log_size, float signal_value,
|
| + int signal_type);
|
| +void TimeSC32FFT(int count, float signal_value, int signal_type);
|
| +void TimeOneRFFT16(int count, int fft_log_size, float signal_value,
|
| + int signal_type);
|
| +void TimeRFFT16(int count, float signal_value, int signal_type);
|
| +void TimeOneRFFT32(int count, int fft_log_size, float signal_value,
|
| + int signal_type);
|
| +void TimeRFFT32(int count, float signal_value, int signal_type);
|
| +
|
| +static int verbose = 1;
|
| +static int include_conversion = 0;
|
| +static int adapt_count = 1;
|
| +static int do_forward_test = 1;
|
| +static int do_inverse_test = 1;
|
| +static int min_fft_order = 2;
|
| +static int max_fft_order = MAX_FFT_ORDER;
|
| +
|
| +void TimeFFTUsage(const char* prog) {
|
| + fprintf(stderr,
|
| + "%s: [-hTFICA] [-f fft] [-c count] [-n logsize] [-s scale]\n"
|
| + " [-g signal-type] [-S signal value]\n"
|
| + " [-m minFFTsize] [-M maxFFTsize]\n",
|
| + ProgramName(prog));
|
| + fprintf(stderr,
|
| + "Simple FFT timing tests\n"
|
| + " -h This help\n"
|
| + " -v level Verbose output level (default = 1)\n"
|
| + " -F Skip forward FFT tests\n"
|
| + " -I Skip inverse FFT tests\n"
|
| + " -C Include float-to-fixed and fixed-to-float cost for"
|
| + " real\n"
|
| + " 16-bit FFT (forward and inverse)\n"
|
| + " -c count Number of FFTs to compute for timing. This is a"
|
| + " lower\n"
|
| + " lower limit; shorter FFTs will do more FFTs such"
|
| + " that the\n"
|
| + " elapsed time is very roughly constant, if -A is"
|
| + " not given.\n"
|
| + " -A Don't adapt the count given by -c; use specified"
|
| + " value\n"
|
| + " -m min Mininum FFT order to test\n"
|
| + " -M max Maximum FFT order to test\n"
|
| + " -T Run just one FFT timing test\n"
|
| + " -f FFT type:\n"
|
| + " 0 - Complex Float\n"
|
| + " 1 - Real Float\n"
|
| + " 2 - Complex 32-bit\n"
|
| + " 3 - Real 16-bit\n"
|
| + " 4 - Real 32-bit\n"
|
| + " -n logsize Log2 of FFT size\n"
|
| + " -s scale Scale factor for forward FFT (default = 0)\n"
|
| + " -S signal Base value for the test signal (default = 1024)\n"
|
| + " -g type Input signal type:\n"
|
| + " 0 - Constant signal S + i*S. (Default value.)\n"
|
| + " 1 - Real ramp starting at S/N, N = FFT size\n"
|
| + " 2 - Sine wave of amplitude S\n"
|
| + " 3 - Complex signal whose transform is a sine wave.\n"
|
| + "\n"
|
| + "Use -v 0 in combination with -F or -I to get output that can\n"
|
| + "be pasted into a spreadsheet.\n"
|
| + "\n"
|
| + "Most of the options listed after -T above are only applicable\n"
|
| + "when -T is given to test just one FFT size and FFT type.\n"
|
| + "\n");
|
| + exit(0);
|
| +}
|
| +
|
| +void main(int argc, char* argv[]) {
|
| + int fft_log_size = 4;
|
| + float signal_value = 1024;
|
| + int signal_type = 0;
|
| + int test_mode = 1;
|
| + int count = 100;
|
| + int fft_type = 0;
|
| + int fft_type_given = 0;
|
| +
|
| + int opt;
|
| +
|
| + while ((opt = getopt(argc, argv, "hTFICAc:n:s:S:g:v:f:m:M:")) != -1) {
|
| + switch (opt) {
|
| + case 'h':
|
| + TimeFFTUsage(argv[0]);
|
| + break;
|
| + case 'T':
|
| + test_mode = 0;
|
| + break;
|
| + case 'C':
|
| + include_conversion = 1;
|
| + break;
|
| + case 'F':
|
| + do_forward_test = 0;
|
| + break;
|
| + case 'I':
|
| + do_inverse_test = 0;
|
| + break;
|
| + case 'A':
|
| + adapt_count = 0;
|
| + break;
|
| + case 'c':
|
| + count = atoi(optarg);
|
| + break;
|
| + case 'n':
|
| + fft_log_size = atoi(optarg);
|
| + break;
|
| + case 'S':
|
| + signal_value = atof(optarg);
|
| + break;
|
| + case 'g':
|
| + signal_type = atoi(optarg);
|
| + break;
|
| + case 'v':
|
| + verbose = atoi(optarg);
|
| + break;
|
| + case 'f':
|
| + fft_type = atoi(optarg);
|
| + fft_type_given = 1;
|
| + break;
|
| + case 'm':
|
| + min_fft_order = atoi(optarg);
|
| + if (min_fft_order <= 2) {
|
| + fprintf(stderr, "Setting min FFT order to 2 (from %d)\n",
|
| + min_fft_order);
|
| + min_fft_order = 2;
|
| + }
|
| + break;
|
| + case 'M':
|
| + max_fft_order = atoi(optarg);
|
| + if (max_fft_order > MAX_FFT_ORDER) {
|
| + fprintf(stderr, "Setting max FFT order to %d (from %d)\n",
|
| + MAX_FFT_ORDER, max_fft_order);
|
| + max_fft_order = MAX_FFT_ORDER;
|
| + }
|
| + break;
|
| + default:
|
| + TimeFFTUsage(argv[0]);
|
| + break;
|
| + }
|
| + }
|
| +
|
| + if (test_mode && fft_type_given)
|
| + printf("Warning: -f ignored when -T not specified\n");
|
| +
|
| + if (test_mode) {
|
| + TimeFloatFFT(count, signal_value, signal_type);
|
| + TimeFloatRFFT(count, signal_value, signal_type);
|
| + TimeSC32FFT(count, signal_value, signal_type);
|
| + TimeRFFT16(count, signal_value, signal_type);
|
| + TimeRFFT32(count, signal_value, signal_type);
|
| + } else {
|
| + switch (fft_type) {
|
| + case 0:
|
| + TimeOneFloatFFT(count, fft_log_size, signal_value, signal_type);
|
| + break;
|
| + case 1:
|
| + TimeOneFloatRFFT(count, fft_log_size, signal_value, signal_type);
|
| + break;
|
| + case 2:
|
| + TimeOneSC32FFT(count, fft_log_size, signal_value, signal_type);
|
| + break;
|
| + case 3:
|
| + TimeOneRFFT16(count, fft_log_size, signal_value, signal_type);
|
| + break;
|
| + case 4:
|
| + TimeOneRFFT32(count, fft_log_size, signal_value, signal_type);
|
| + break;
|
| + default:
|
| + fprintf(stderr, "Unknown FFT type: %d\n", fft_type);
|
| + break;
|
| + }
|
| + }
|
| +}
|
| +
|
| +void GetUserTime(struct timeval* time) {
|
| + struct rusage usage;
|
| + getrusage(RUSAGE_SELF, &usage);
|
| + memcpy(time, &usage.ru_utime, sizeof(*time));
|
| +}
|
| +
|
| +double TimeDifference(const struct timeval * start,
|
| + const struct timeval * end) {
|
| + double start_time;
|
| + double end_time;
|
| + start_time = start->tv_sec + start->tv_usec * 1e-6;
|
| + end_time = end->tv_sec + end->tv_usec * 1e-6;
|
| +
|
| + return end_time - start_time;
|
| +}
|
| +
|
| +void PrintResult(const char* prefix, int fft_log_size, double elapsed_time,
|
| + int count) {
|
| + if (verbose == 0) {
|
| + printf("%2d\t%8.4f\t%8d\t%.4e\n",
|
| + fft_log_size, elapsed_time, count, 1000 * elapsed_time / count);
|
| + } else {
|
| + printf("%-18s: order %2d: %8.4f sec for %8d FFTs: %.4e msec/FFT\n",
|
| + prefix, fft_log_size, elapsed_time, count,
|
| + 1000 * elapsed_time / count);
|
| + }
|
| +}
|
| +
|
| +int ComputeCount(int nominal_count, int fft_log_size) {
|
| + /*
|
| + * Try to figure out how many repetitions to do for a given FFT
|
| + * order (fft_log_size) given that we want a repetition of
|
| + * nominal_count for order 15 FFTs to be the approsimate amount of
|
| + * time we want to for all tests.
|
| + */
|
| +
|
| + int count;
|
| + if (adapt_count) {
|
| + double maxTime = ((double) nominal_count) * (1 << MAX_FFT_ORDER)
|
| + * MAX_FFT_ORDER;
|
| + double c = maxTime / ((1 << fft_log_size) * fft_log_size);
|
| + const int max_count = 10000000;
|
| +
|
| + count = (c > max_count) ? max_count : c;
|
| + } else {
|
| + count = nominal_count;
|
| + }
|
| +
|
| + return count;
|
| +}
|
| +
|
| +void TimeOneFloatFFT(int count, int fft_log_size, float signal_value,
|
| + int signal_type) {
|
| + struct AlignedPtr* x_aligned;
|
| + struct AlignedPtr* y_aligned;
|
| + struct AlignedPtr* z_aligned;
|
| +
|
| + struct ComplexFloat* x;
|
| + struct ComplexFloat* y;
|
| + OMX_FC32* z;
|
| +
|
| + struct ComplexFloat* y_true;
|
| +
|
| + OMX_INT n, fft_spec_buffer_size;
|
| + OMXFFTSpec_C_FC32 * fft_fwd_spec = NULL;
|
| + OMXFFTSpec_C_FC32 * fft_inv_spec = NULL;
|
| + int fft_size;
|
| + struct timeval start_time;
|
| + struct timeval end_time;
|
| + double elapsed_time;
|
| +
|
| + fft_size = 1 << fft_log_size;
|
| +
|
| + x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size);
|
| + y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2));
|
| + z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size);
|
| +
|
| + y_true = (struct ComplexFloat*) malloc(sizeof(*y_true) * fft_size);
|
| +
|
| + x = x_aligned->aligned_pointer_;
|
| + y = y_aligned->aligned_pointer_;
|
| + z = z_aligned->aligned_pointer_;
|
| +
|
| + GenerateTestSignalAndFFT(x, y_true, fft_size, signal_type, signal_value, 0);
|
| +
|
| + omxSP_FFTGetBufSize_C_FC32(fft_log_size, &fft_spec_buffer_size);
|
| +
|
| + fft_fwd_spec = (OMXFFTSpec_C_FC32*) malloc(fft_spec_buffer_size);
|
| + fft_inv_spec = (OMXFFTSpec_C_FC32*) malloc(fft_spec_buffer_size);
|
| + omxSP_FFTInit_C_FC32(fft_fwd_spec, fft_log_size);
|
| + omxSP_FFTInit_C_FC32(fft_inv_spec, fft_log_size);
|
| +
|
| + if (do_forward_test) {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + omxSP_FFTFwd_CToC_FC32_Sfs(x, y, fft_fwd_spec);
|
| + }
|
| + GetUserTime(&end_time);
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Forward Float FFT", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + if (do_inverse_test) {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + omxSP_FFTInv_CToC_FC32_Sfs(y, z, fft_inv_spec);
|
| + }
|
| + GetUserTime(&end_time);
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Inverse Float FFT", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + FreeAlignedPointer(x_aligned);
|
| + FreeAlignedPointer(y_aligned);
|
| + FreeAlignedPointer(z_aligned);
|
| + free(y_true);
|
| + free(fft_fwd_spec);
|
| + free(fft_inv_spec);
|
| +}
|
| +
|
| +void TimeFloatFFT(int count, float signal_value, int signal_type) {
|
| + int k;
|
| +
|
| + if (verbose == 0)
|
| + printf("Float FFT\n");
|
| +
|
| + for (k = min_fft_order; k <= max_fft_order; ++k) {
|
| + int testCount = ComputeCount(count, k);
|
| + TimeOneFloatFFT(testCount, k, signal_value, signal_type);
|
| + }
|
| +}
|
| +
|
| +void GenerateRealFloatSignal(OMX_F32* x, OMX_FC32* fft, int size,
|
| + int signal_type, float signal_value)
|
| +{
|
| + int k;
|
| + struct ComplexFloat *test_signal;
|
| + struct ComplexFloat *true_fft;
|
| +
|
| + test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size);
|
| + true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size);
|
| + GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type,
|
| + signal_value, 1);
|
| +
|
| + /*
|
| + * Convert the complex result to what we want
|
| + */
|
| +
|
| + for (k = 0; k < size; ++k) {
|
| + x[k] = test_signal[k].Re;
|
| + }
|
| +
|
| + for (k = 0; k < size / 2 + 1; ++k) {
|
| + fft[k].Re = true_fft[k].Re;
|
| + fft[k].Im = true_fft[k].Im;
|
| + }
|
| +
|
| + free(test_signal);
|
| + free(true_fft);
|
| +}
|
| +
|
| +void TimeOneFloatRFFT(int count, int fft_log_size, float signal_value,
|
| + int signal_type) {
|
| + OMX_F32* x; /* Source */
|
| + OMX_F32* y; /* Transform */
|
| + OMX_F32* z; /* Inverse transform */
|
| +
|
| + OMX_F32* y_true; /* True FFT */
|
| +
|
| + struct AlignedPtr* x_aligned;
|
| + struct AlignedPtr* y_aligned;
|
| + struct AlignedPtr* z_aligned;
|
| +
|
| +
|
| + OMX_INT n, fft_spec_buffer_size;
|
| + OMXResult status;
|
| + OMXFFTSpec_R_F32 * fft_fwd_spec = NULL;
|
| + OMXFFTSpec_R_F32 * fft_inv_spec = NULL;
|
| + int fft_size;
|
| + struct timeval start_time;
|
| + struct timeval end_time;
|
| + double elapsed_time;
|
| +
|
| + fft_size = 1 << fft_log_size;
|
| +
|
| + x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size);
|
| + /* The transformed value is in CCS format and is has fft_size + 2 values */
|
| + y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2));
|
| + z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size);
|
| +
|
| + x = x_aligned->aligned_pointer_;
|
| + y = y_aligned->aligned_pointer_;
|
| + z = z_aligned->aligned_pointer_;
|
| +
|
| + y_true = (OMX_F32*) malloc(sizeof(*y_true) * (fft_size + 2));
|
| +
|
| + GenerateRealFloatSignal(x, (OMX_FC32*) y_true, fft_size, signal_type,
|
| + signal_value);
|
| +
|
| + status = omxSP_FFTGetBufSize_R_F32(fft_log_size, &fft_spec_buffer_size);
|
| +
|
| + fft_fwd_spec = (OMXFFTSpec_R_F32*) malloc(fft_spec_buffer_size);
|
| + fft_inv_spec = (OMXFFTSpec_R_F32*) malloc(fft_spec_buffer_size);
|
| + status = omxSP_FFTInit_R_F32(fft_fwd_spec, fft_log_size);
|
| +
|
| + status = omxSP_FFTInit_R_F32(fft_inv_spec, fft_log_size);
|
| +
|
| + if (do_forward_test) {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + omxSP_FFTFwd_RToCCS_F32_Sfs(x, y, fft_fwd_spec);
|
| + }
|
| + GetUserTime(&end_time);
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Forward Float RFFT", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + if (do_inverse_test) {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + omxSP_FFTInv_CCSToR_F32_Sfs(y, z, fft_inv_spec);
|
| + }
|
| + GetUserTime(&end_time);
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Inverse Float RFFT", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + FreeAlignedPointer(x_aligned);
|
| + FreeAlignedPointer(y_aligned);
|
| + FreeAlignedPointer(z_aligned);
|
| + free(fft_fwd_spec);
|
| + free(fft_inv_spec);
|
| +}
|
| +
|
| +void TimeFloatRFFT(int count, float signal_value, int signal_type) {
|
| + int k;
|
| +
|
| + if (verbose == 0)
|
| + printf("Float RFFT\n");
|
| +
|
| + for (k = min_fft_order; k <= max_fft_order; ++k) {
|
| + int testCount = ComputeCount(count, k);
|
| + TimeOneFloatRFFT(testCount, k, signal_value, signal_type);
|
| + }
|
| +}
|
| +
|
| +void generateSC32Signal(OMX_SC32* x, OMX_SC32* fft, int size, int signal_type,
|
| + float signal_value) {
|
| + int k;
|
| + struct ComplexFloat *test_signal;
|
| + struct ComplexFloat *true_fft;
|
| +
|
| + test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size);
|
| + true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size);
|
| + GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type,
|
| + signal_value, 0);
|
| +
|
| + /*
|
| + * Convert the complex result to what we want
|
| + */
|
| +
|
| + for (k = 0; k < size; ++k) {
|
| + x[k].Re = 0.5 + test_signal[k].Re;
|
| + x[k].Im = 0.5 + test_signal[k].Im;
|
| + fft[k].Re = 0.5 + true_fft[k].Re;
|
| + fft[k].Im = 0.5 + true_fft[k].Im;
|
| + }
|
| +
|
| + free(test_signal);
|
| + free(true_fft);
|
| +}
|
| +
|
| +void TimeOneSC32FFT(int count, int fft_log_size, float signal_value,
|
| + int signal_type) {
|
| + OMX_SC32* x;
|
| + OMX_SC32* y;
|
| + OMX_SC32* z;
|
| +
|
| + struct AlignedPtr* x_aligned;
|
| + struct AlignedPtr* y_aligned;
|
| + struct AlignedPtr* z_aligned;
|
| +
|
| + OMX_SC32* y_true;
|
| + OMX_SC32* temp32a;
|
| + OMX_SC32* temp32b;
|
| +
|
| + OMX_INT n, fft_spec_buffer_size;
|
| + OMXResult status;
|
| + OMXFFTSpec_C_SC32 * fft_fwd_spec = NULL;
|
| + OMXFFTSpec_C_SC32 * fft_inv_spec = NULL;
|
| + int fft_size;
|
| + struct timeval start_time;
|
| + struct timeval end_time;
|
| + double elapsed_time;
|
| +
|
| + fft_size = 1 << fft_log_size;
|
| +
|
| + x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size);
|
| + y_aligned = AllocAlignedPointer(32, sizeof(*y) * fft_size);
|
| + z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size);
|
| + y_true = (OMX_SC32*) malloc(sizeof(*y_true) * fft_size);
|
| + temp32a = (OMX_SC32*) malloc(sizeof(*temp32a) * fft_size);
|
| + temp32b = (OMX_SC32*) malloc(sizeof(*temp32b) * fft_size);
|
| +
|
| + x = x_aligned->aligned_pointer_;
|
| + y = y_aligned->aligned_pointer_;
|
| + z = z_aligned->aligned_pointer_;
|
| +
|
| + generateSC32Signal(x, y_true, fft_size, signal_type, signal_value);
|
| +
|
| + status = omxSP_FFTGetBufSize_C_SC32(fft_log_size, &fft_spec_buffer_size);
|
| +
|
| + fft_fwd_spec = (OMXFFTSpec_C_SC32*) malloc(fft_spec_buffer_size);
|
| + fft_inv_spec = (OMXFFTSpec_C_SC32*) malloc(fft_spec_buffer_size);
|
| + status = omxSP_FFTInit_C_SC32(fft_fwd_spec, fft_log_size);
|
| +
|
| + status = omxSP_FFTInit_C_SC32(fft_inv_spec, fft_log_size);
|
| +
|
| + if (do_forward_test) {
|
| + if (include_conversion) {
|
| + int k;
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (k = 0; k < count; ++k) {
|
| + for (n = 0; n < fft_size; ++n) {
|
| + if (fabs(x[n].Re) > factor) {
|
| + factor = fabs(x[n].Re);
|
| + }
|
| + if (fabs(x[n].Im) > factor) {
|
| + factor = fabs(x[n].Im);
|
| + }
|
| + }
|
| +
|
| + factor = ((1 << 18) - 1) / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + temp32a[n].Re = factor * x[n].Re;
|
| + temp32a[n].Im = factor * x[n].Im;
|
| + }
|
| +
|
| + omxSP_FFTFwd_CToC_SC32_Sfs(x, y, fft_fwd_spec, 0);
|
| +
|
| + factor = 1 / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + temp32b[n].Re = y[n].Re * factor;
|
| + temp32b[n].Im = y[n].Im * factor;
|
| + }
|
| + }
|
| + GetUserTime(&end_time);
|
| + } else {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + omxSP_FFTFwd_CToC_SC32_Sfs(x, y, fft_fwd_spec, 0);
|
| + }
|
| + GetUserTime(&end_time);
|
| + }
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Forward SC32 FFT", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + if (do_inverse_test) {
|
| + if (include_conversion) {
|
| + int k;
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (k = 0; k < count; ++k) {
|
| + for (n = 0; n < fft_size; ++n) {
|
| + if (fabs(x[n].Re) > factor) {
|
| + factor = fabs(x[n].Re);
|
| + }
|
| + if (fabs(x[n].Im) > factor) {
|
| + factor = fabs(x[n].Im);
|
| + }
|
| + }
|
| + factor = ((1 << 18) - 1) / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + temp32a[n].Re = factor * x[n].Re;
|
| + temp32a[n].Im = factor * x[n].Im;
|
| + }
|
| +
|
| + status = omxSP_FFTInv_CToC_SC32_Sfs(y, z, fft_inv_spec, 0);
|
| +
|
| + factor = 1 / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + temp32b[n].Re = y[n].Re * factor;
|
| + temp32b[n].Im = y[n].Im * factor;
|
| + }
|
| + }
|
| + GetUserTime(&end_time);
|
| + } else {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + status = omxSP_FFTInv_CToC_SC32_Sfs(y, z, fft_inv_spec, 0);
|
| + }
|
| + GetUserTime(&end_time);
|
| + }
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Inverse SC32 FFT", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + FreeAlignedPointer(x_aligned);
|
| + FreeAlignedPointer(y_aligned);
|
| + FreeAlignedPointer(z_aligned);
|
| + free(temp32a);
|
| + free(temp32b);
|
| + free(fft_fwd_spec);
|
| + free(fft_inv_spec);
|
| +}
|
| +
|
| +void TimeSC32FFT(int count, float signal_value, int signal_type) {
|
| + int k;
|
| + int max_order = (max_fft_order > MAX_FFT_ORDER_FIXED_POINT)
|
| + ? MAX_FFT_ORDER_FIXED_POINT : max_fft_order;
|
| +
|
| + if (verbose == 0)
|
| + printf("SC32 FFT\n");
|
| +
|
| + for (k = min_fft_order; k <= max_order; ++k) {
|
| + int testCount = ComputeCount(count, k);
|
| + TimeOneSC32FFT(testCount, k, signal_value, signal_type);
|
| + }
|
| +}
|
| +
|
| +void GenerateRFFT16Signal(OMX_S16* x, OMX_SC32* fft, int size, int signal_type,
|
| + float signal_value) {
|
| + int k;
|
| + struct ComplexFloat *test_signal;
|
| + struct ComplexFloat *true_fft;
|
| +
|
| + test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size);
|
| + true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size);
|
| + GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type,
|
| + signal_value, 1);
|
| +
|
| + /*
|
| + * Convert the complex result to what we want
|
| + */
|
| +
|
| + for (k = 0; k < size; ++k) {
|
| + x[k] = test_signal[k].Re;
|
| + }
|
| +
|
| + for (k = 0; k < size / 2 + 1; ++k) {
|
| + fft[k].Re = true_fft[k].Re;
|
| + fft[k].Im = true_fft[k].Im;
|
| + }
|
| +
|
| + free(test_signal);
|
| + free(true_fft);
|
| +}
|
| +
|
| +void TimeOneRFFT16(int count, int fft_log_size, float signal_value,
|
| + int signal_type) {
|
| + OMX_S16* x;
|
| + OMX_S32* y;
|
| + OMX_S16* z;
|
| + OMX_S32* y_true;
|
| + OMX_F32* xr;
|
| + OMX_F32* yrTrue;
|
| +
|
| + struct AlignedPtr* x_aligned;
|
| + struct AlignedPtr* y_aligned;
|
| + struct AlignedPtr* z_aligned;
|
| + struct AlignedPtr* y_trueAligned;
|
| + struct AlignedPtr* xr_aligned;
|
| + struct AlignedPtr* yr_true_aligned;
|
| +
|
| +
|
| + OMX_S16* temp16;
|
| + OMX_S32* temp32;
|
| +
|
| +
|
| + OMX_INT n, fft_spec_buffer_size;
|
| + OMXResult status;
|
| + OMXFFTSpec_R_S16S32 * fft_fwd_spec = NULL;
|
| + OMXFFTSpec_R_S16S32 * fft_inv_spec = NULL;
|
| + int fft_size;
|
| + struct timeval start_time;
|
| + struct timeval end_time;
|
| + double elapsed_time;
|
| + int scaleFactor;
|
| +
|
| + fft_size = 1 << fft_log_size;
|
| + scaleFactor = fft_log_size;
|
| +
|
| + x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size);
|
| + y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2));
|
| + z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size);
|
| +
|
| + y_trueAligned = AllocAlignedPointer(32, sizeof(*y_true) * (fft_size + 2));
|
| +
|
| + xr_aligned = AllocAlignedPointer(32, sizeof(*xr) * fft_size);
|
| + yr_true_aligned = AllocAlignedPointer(32, sizeof(*yrTrue) * (fft_size + 2));
|
| +
|
| + x = x_aligned->aligned_pointer_;
|
| + y = y_aligned->aligned_pointer_;
|
| + z = z_aligned->aligned_pointer_;
|
| + y_true = y_trueAligned->aligned_pointer_;
|
| + xr = xr_aligned->aligned_pointer_;
|
| + yrTrue = yr_true_aligned->aligned_pointer_;
|
| +
|
| + temp16 = (OMX_S16*) malloc(sizeof(*temp16) * fft_size);
|
| + temp32 = (OMX_S32*) malloc(sizeof(*temp32) * fft_size);
|
| +
|
| +
|
| + GenerateRFFT16Signal(x, (OMX_SC32*) y_true, fft_size, signal_type,
|
| + signal_value);
|
| + /*
|
| + * Generate a real version so we can measure scaling costs
|
| + */
|
| + GenerateRealFloatSignal(xr, (OMX_FC32*) yrTrue, fft_size, signal_type,
|
| + signal_value);
|
| +
|
| + status = omxSP_FFTGetBufSize_R_S16S32(fft_log_size, &fft_spec_buffer_size);
|
| +
|
| + fft_fwd_spec = (OMXFFTSpec_R_S16S32*) malloc(fft_spec_buffer_size);
|
| + fft_inv_spec = (OMXFFTSpec_R_S16S32*) malloc(fft_spec_buffer_size);
|
| + status = omxSP_FFTInit_R_S16S32(fft_fwd_spec, fft_log_size);
|
| +
|
| + status = omxSP_FFTInit_R_S16S32(fft_inv_spec, fft_log_size);
|
| +
|
| + if (do_forward_test) {
|
| + if (include_conversion) {
|
| + int k;
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (k = 0; k < count; ++k) {
|
| + /*
|
| + * Spend some time computing the max of the signal, and then scaling it.
|
| + */
|
| + for (n = 0; n < fft_size; ++n) {
|
| + if (fabs(xr[n]) > factor) {
|
| + factor = fabs(xr[n]);
|
| + }
|
| + }
|
| +
|
| + factor = 32767 / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + temp16[n] = factor * xr[n];
|
| + }
|
| +
|
| + status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec,
|
| + (OMX_INT) scaleFactor);
|
| +
|
| + /*
|
| + * Now spend some time converting the fixed-point FFT back to float.
|
| + */
|
| + factor = 1 / factor;
|
| + for (n = 0; n < fft_size + 2; ++n) {
|
| + xr[n] = y[n] * factor;
|
| + }
|
| + }
|
| + GetUserTime(&end_time);
|
| + } else {
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec,
|
| + (OMX_INT) scaleFactor);
|
| + }
|
| + GetUserTime(&end_time);
|
| + }
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Forward RFFT16", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + if (do_inverse_test) {
|
| + if (include_conversion) {
|
| + int k;
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (k = 0; k < count; ++k) {
|
| + /*
|
| + * Spend some time scaling the FFT signal to fixed point.
|
| + */
|
| + for (n = 0; n < fft_size; ++n) {
|
| + if (fabs(yrTrue[n]) > factor) {
|
| + factor = fabs(yrTrue[n]);
|
| + }
|
| + }
|
| + for (n = 0; n < fft_size; ++n) {
|
| + temp32[n] = factor * yrTrue[n];
|
| + }
|
| +
|
| + status = omxSP_FFTFwd_RToCCS_S16S32_Sfs(x, y, fft_fwd_spec,
|
| + (OMX_INT) scaleFactor);
|
| +
|
| + /*
|
| + * Spend some time converting the result back to float
|
| + */
|
| + factor = 1 / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + xr[n] = factor * z[n];
|
| + }
|
| + }
|
| + GetUserTime(&end_time);
|
| + } else {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + status = omxSP_FFTInv_CCSToR_S32S16_Sfs(y, z, fft_inv_spec, 0);
|
| + }
|
| + GetUserTime(&end_time);
|
| + }
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Inverse RFFT16", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + FreeAlignedPointer(x_aligned);
|
| + FreeAlignedPointer(y_aligned);
|
| + FreeAlignedPointer(z_aligned);
|
| + FreeAlignedPointer(y_trueAligned);
|
| + FreeAlignedPointer(xr_aligned);
|
| + FreeAlignedPointer(yr_true_aligned);
|
| + free(fft_fwd_spec);
|
| + free(fft_inv_spec);
|
| +}
|
| +
|
| +void TimeRFFT16(int count, float signal_value, int signal_type) {
|
| + int k;
|
| + int max_order = (max_fft_order > MAX_FFT_ORDER_FIXED_POINT)
|
| + ? MAX_FFT_ORDER_FIXED_POINT : max_fft_order;
|
| +
|
| + if (verbose == 0)
|
| + printf("RFFT16\n");
|
| +
|
| + for (k = min_fft_order; k <= max_order; ++k) {
|
| + int testCount = ComputeCount(count, k);
|
| + TimeOneRFFT16(testCount, k, signal_value, signal_type);
|
| + }
|
| +}
|
| +
|
| +void GenerateRFFT32Signal(OMX_S32* x, OMX_SC32* fft, int size, int signal_type,
|
| + float signal_value) {
|
| + int k;
|
| + struct ComplexFloat *test_signal;
|
| + struct ComplexFloat *true_fft;
|
| +
|
| + test_signal = (struct ComplexFloat*) malloc(sizeof(*test_signal) * size);
|
| + true_fft = (struct ComplexFloat*) malloc(sizeof(*true_fft) * size);
|
| + GenerateTestSignalAndFFT(test_signal, true_fft, size, signal_type,
|
| + signal_value, 1);
|
| +
|
| + /*
|
| + * Convert the complex result to what we want
|
| + */
|
| +
|
| + for (k = 0; k < size; ++k) {
|
| + x[k] = test_signal[k].Re;
|
| + }
|
| +
|
| + for (k = 0; k < size / 2 + 1; ++k) {
|
| + fft[k].Re = true_fft[k].Re;
|
| + fft[k].Im = true_fft[k].Im;
|
| + }
|
| +
|
| + free(test_signal);
|
| + free(true_fft);
|
| +}
|
| +
|
| +void TimeOneRFFT32(int count, int fft_log_size, float signal_value,
|
| + int signal_type) {
|
| + OMX_S32* x;
|
| + OMX_S32* y;
|
| + OMX_S32* z;
|
| + OMX_S32* y_true;
|
| + OMX_F32* xr;
|
| + OMX_F32* yrTrue;
|
| +
|
| + struct AlignedPtr* x_aligned;
|
| + struct AlignedPtr* y_aligned;
|
| + struct AlignedPtr* z_aligned;
|
| + struct AlignedPtr* y_true_aligned;
|
| +
|
| + OMX_S32* temp1;
|
| + OMX_S32* temp2;
|
| +
|
| + OMX_INT n, fft_spec_buffer_size;
|
| + OMXResult status;
|
| + OMXFFTSpec_R_S16S32 * fft_fwd_spec = NULL;
|
| + OMXFFTSpec_R_S16S32 * fft_inv_spec = NULL;
|
| + int fft_size;
|
| + struct timeval start_time;
|
| + struct timeval end_time;
|
| + double elapsed_time;
|
| + int scaleFactor;
|
| +
|
| + fft_size = 1 << fft_log_size;
|
| +
|
| + x_aligned = AllocAlignedPointer(32, sizeof(*x) * fft_size);
|
| + y_aligned = AllocAlignedPointer(32, sizeof(*y) * (fft_size + 2));
|
| + z_aligned = AllocAlignedPointer(32, sizeof(*z) * fft_size);
|
| +
|
| + y_true_aligned = AllocAlignedPointer(32, sizeof(*y_true) * (fft_size + 2));
|
| +
|
| + x = x_aligned->aligned_pointer_;
|
| + y = y_aligned->aligned_pointer_;
|
| + z = z_aligned->aligned_pointer_;
|
| + y_true = y_true_aligned->aligned_pointer_;
|
| +
|
| + if (verbose > 3) {
|
| + printf("x = %p\n", (void*)x);
|
| + printf("y = %p\n", (void*)y);
|
| + printf("z = %p\n", (void*)z);
|
| + }
|
| +
|
| + xr = (OMX_F32*) malloc(sizeof(*x) * fft_size);
|
| + yrTrue = (OMX_F32*) malloc(sizeof(*y) * (fft_size + 2));
|
| + temp1 = (OMX_S32*) malloc(sizeof(*temp1) * fft_size);
|
| + temp2 = (OMX_S32*) malloc(sizeof(*temp2) * (fft_size + 2));
|
| +
|
| + GenerateRFFT32Signal(x, (OMX_SC32*) y_true, fft_size, signal_type,
|
| + signal_value);
|
| +
|
| + if (verbose > 63) {
|
| + printf("Signal\n");
|
| + printf("n\tx[n]\n");
|
| + for (n = 0; n < fft_size; ++n) {
|
| + printf("%4d\t%d\n", n, x[n]);
|
| + }
|
| + }
|
| +
|
| + status = omxSP_FFTGetBufSize_R_S32(fft_log_size, &fft_spec_buffer_size);
|
| + if (verbose > 3) {
|
| + printf("fft_spec_buffer_size = %d\n", fft_spec_buffer_size);
|
| + }
|
| +
|
| + fft_fwd_spec = (OMXFFTSpec_R_S32*) malloc(fft_spec_buffer_size);
|
| + fft_inv_spec = (OMXFFTSpec_R_S32*) malloc(fft_spec_buffer_size);
|
| + status = omxSP_FFTInit_R_S32(fft_fwd_spec, fft_log_size);
|
| + if (status) {
|
| + printf("Failed to init forward FFT: status = %d\n", status);
|
| + }
|
| +
|
| + status = omxSP_FFTInit_R_S32(fft_inv_spec, fft_log_size);
|
| + if (status) {
|
| + printf("Failed to init backward FFT: status = %d\n", status);
|
| + }
|
| +
|
| + if (do_forward_test) {
|
| + if (include_conversion) {
|
| + int k;
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (k = 0; k < count; ++k) {
|
| + /*
|
| + * Spend some time computing the max of the signal, and then scaling it.
|
| + */
|
| + for (n = 0; n < fft_size; ++n) {
|
| + if (fabs(xr[n]) > factor) {
|
| + factor = fabs(xr[n]);
|
| + }
|
| + }
|
| +
|
| + factor = (1 << 20) / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + temp1[n] = factor * xr[n];
|
| + }
|
| +
|
| + status = omxSP_FFTFwd_RToCCS_S32_Sfs(x, y, fft_fwd_spec,
|
| + (OMX_INT) scaleFactor);
|
| +
|
| + /*
|
| + * Now spend some time converting the fixed-point FFT back to float.
|
| + */
|
| + factor = 1 / factor;
|
| + for (n = 0; n < fft_size + 2; ++n) {
|
| + xr[n] = y[n] * factor;
|
| + }
|
| + }
|
| + GetUserTime(&end_time);
|
| + } else {
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + status = omxSP_FFTFwd_RToCCS_S32_Sfs(x, y, fft_fwd_spec,
|
| + (OMX_INT) scaleFactor);
|
| + }
|
| + GetUserTime(&end_time);
|
| + }
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Forward RFFT32", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + if (do_inverse_test) {
|
| + if (include_conversion) {
|
| + int k;
|
| + float factor = -1;
|
| +
|
| + GetUserTime(&start_time);
|
| + for (k = 0; k < count; ++k) {
|
| + /*
|
| + * Spend some time scaling the FFT signal to fixed point.
|
| + */
|
| + for (n = 0; n < fft_size + 2; ++n) {
|
| + if (fabs(yrTrue[n]) > factor) {
|
| + factor = fabs(yrTrue[n]);
|
| + }
|
| + }
|
| + for (n = 0; n < fft_size + 2; ++n) {
|
| + temp2[n] = factor * yrTrue[n];
|
| + }
|
| +
|
| + status = omxSP_FFTInv_CCSToR_S32_Sfs(y, z, fft_inv_spec, 0);
|
| +
|
| + /*
|
| + * Spend some time converting the result back to float
|
| + */
|
| + factor = 1 / factor;
|
| + for (n = 0; n < fft_size; ++n) {
|
| + xr[n] = factor * z[n];
|
| + }
|
| + }
|
| + GetUserTime(&end_time);
|
| + } else {
|
| + GetUserTime(&start_time);
|
| + for (n = 0; n < count; ++n) {
|
| + status = omxSP_FFTInv_CCSToR_S32_Sfs(y, z, fft_inv_spec, 0);
|
| + }
|
| + GetUserTime(&end_time);
|
| + }
|
| +
|
| + elapsed_time = TimeDifference(&start_time, &end_time);
|
| +
|
| + PrintResult("Inverse RFFT32", fft_log_size, elapsed_time, count);
|
| + }
|
| +
|
| + FreeAlignedPointer(x_aligned);
|
| + FreeAlignedPointer(y_aligned);
|
| + FreeAlignedPointer(z_aligned);
|
| + FreeAlignedPointer(y_true_aligned);
|
| + free(fft_fwd_spec);
|
| + free(fft_inv_spec);
|
| +}
|
| +
|
| +void TimeRFFT32(int count, float signal_value, int signal_type) {
|
| + int k;
|
| + int max_order = (max_fft_order > MAX_FFT_ORDER_FIXED_POINT)
|
| + ? MAX_FFT_ORDER_FIXED_POINT : max_fft_order;
|
| +
|
| + if (verbose == 0)
|
| + printf("RFFT32\n");
|
| +
|
| + for (k = min_fft_order; k <= max_order; ++k) {
|
| + int testCount = ComputeCount(count, k);
|
| + TimeOneRFFT32(testCount, k, signal_value, signal_type);
|
| + }
|
| +}
|
|
|