Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(220)

Side by Side Diff: media/base/sinc_resampler_unittest.cc

Issue 10960023: Add ARM NEON intrinsic optimizations for SincResampler. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Comments. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « media/base/sinc_resampler.cc ('k') | media/media.gyp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // MSVC++ requires this to be set before any other includes to get M_PI. 5 // MSVC++ requires this to be set before any other includes to get M_PI.
6 #define _USE_MATH_DEFINES 6 #define _USE_MATH_DEFINES
7 7
8 #include <cmath> 8 #include <cmath>
9 9
10 #include "base/bind.h" 10 #include "base/bind.h"
11 #include "base/bind_helpers.h" 11 #include "base/bind_helpers.h"
12 #include "base/command_line.h" 12 #include "base/command_line.h"
13 #include "base/logging.h" 13 #include "base/logging.h"
14 #include "base/string_number_conversions.h" 14 #include "base/string_number_conversions.h"
15 #include "base/stringize_macros.h"
15 #include "base/time.h" 16 #include "base/time.h"
17 #include "build/build_config.h"
16 #include "media/base/sinc_resampler.h" 18 #include "media/base/sinc_resampler.h"
17 #include "testing/gmock/include/gmock/gmock.h" 19 #include "testing/gmock/include/gmock/gmock.h"
18 #include "testing/gtest/include/gtest/gtest.h" 20 #include "testing/gtest/include/gtest/gtest.h"
19 21
20 using testing::_; 22 using testing::_;
21 23
22 namespace media { 24 namespace media {
23 25
24 static const double kSampleRateRatio = 192000.0 / 44100.0; 26 static const double kSampleRateRatio = 192000.0 / 44100.0;
25 static const double kKernelInterpolationFactor = 0.5; 27 static const double kKernelInterpolationFactor = 0.5;
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
82 resampler.Resample(resampled_destination.get(), resampler.ChunkSize() / 2); 84 resampler.Resample(resampled_destination.get(), resampler.ChunkSize() / 2);
83 ASSERT_NE(resampled_destination[0], 0); 85 ASSERT_NE(resampled_destination[0], 0);
84 86
85 // Flush and request more data, which should all be zeros now. 87 // Flush and request more data, which should all be zeros now.
86 resampler.Flush(); 88 resampler.Flush();
87 testing::Mock::VerifyAndClear(&mock_source); 89 testing::Mock::VerifyAndClear(&mock_source);
88 EXPECT_CALL(mock_source, ProvideInput(_, _)) 90 EXPECT_CALL(mock_source, ProvideInput(_, _))
89 .Times(1).WillOnce(ClearBuffer()); 91 .Times(1).WillOnce(ClearBuffer());
90 resampler.Resample(resampled_destination.get(), resampler.ChunkSize() / 2); 92 resampler.Resample(resampled_destination.get(), resampler.ChunkSize() / 2);
91 for (int i = 0; i < resampler.ChunkSize() / 2; ++i) 93 for (int i = 0; i < resampler.ChunkSize() / 2; ++i)
92 ASSERT_EQ(resampled_destination[i], 0); 94 ASSERT_FLOAT_EQ(resampled_destination[i], 0);
93 } 95 }
94 96
95 // Ensure various optimized Convolve() methods return the same value. Only run 97 // Ensure various optimized Convolve() methods return the same value. Only run
96 // this test if other optimized methods exist, otherwise the default Convolve() 98 // this test if other optimized methods exist, otherwise the default Convolve()
97 // will be tested by the parameterized SincResampler tests below. 99 // will be tested by the parameterized SincResampler tests below.
98 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) 100 #if (defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)) || \
101 (defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON))
99 TEST(SincResamplerTest, Convolve) { 102 TEST(SincResamplerTest, Convolve) {
100 // Initialize a dummy resampler. 103 // Initialize a dummy resampler.
101 MockSource mock_source; 104 MockSource mock_source;
102 SincResampler resampler( 105 SincResampler resampler(
103 kSampleRateRatio, 106 kSampleRateRatio,
104 base::Bind(&MockSource::ProvideInput, base::Unretained(&mock_source))); 107 base::Bind(&MockSource::ProvideInput, base::Unretained(&mock_source)));
105 108
106 // Convolve_SSE() is slightly more precise than Convolve_C(), so comparison 109 // Convolve_SSE() is slightly more precise than Convolve_C(), so comparison
107 // must be done using an epsilon. 110 // must be done using an epsilon.
108 static const double kEpsilon = 0.00000005; 111 static const double kEpsilon = 0.00000005;
109 112
110 // Use a kernel from SincResampler as input and kernel data, this has the 113 // Use a kernel from SincResampler as input and kernel data, this has the
111 // benefit of already being properly sized and aligned for Convolve_SSE(). 114 // benefit of already being properly sized and aligned for Convolve_SSE().
112 double result = resampler.Convolve_C( 115 double result = resampler.Convolve_C(
113 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(), 116 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(),
114 resampler.kernel_storage_.get(), kKernelInterpolationFactor); 117 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
118 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
115 double result2 = resampler.Convolve_SSE( 119 double result2 = resampler.Convolve_SSE(
116 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(), 120 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(),
117 resampler.kernel_storage_.get(), kKernelInterpolationFactor); 121 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
122 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
123 double result2 = resampler.Convolve_NEON(
124 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(),
125 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
126 #else
127 #error This test should only be compiled when SSE or NEON is available.
Ami GONE FROM CHROMIUM 2012/09/24 20:04:16 This'd be a lot clearer if the #if test was revers
Ami GONE FROM CHROMIUM 2012/09/24 20:04:16 I wonder if all our bots have one or the other. I
DaleCurtis 2012/09/24 20:13:50 I don't follow. Can you elaborate? You mean #if !n
DaleCurtis 2012/09/24 20:13:50 The #if || check @100 prevents any issues here :)
128 #endif
118 EXPECT_NEAR(result2, result, kEpsilon); 129 EXPECT_NEAR(result2, result, kEpsilon);
119 130
120 // Test Convolve_SSE() w/ unaligned input pointer. 131 // Test Convolve() w/ unaligned input pointer.
121 result = resampler.Convolve_C( 132 result = resampler.Convolve_C(
122 resampler.kernel_storage_.get() + 1, resampler.kernel_storage_.get(), 133 resampler.kernel_storage_.get() + 1, resampler.kernel_storage_.get(),
123 resampler.kernel_storage_.get(), kKernelInterpolationFactor); 134 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
135 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
Ami GONE FROM CHROMIUM 2012/09/24 20:04:16 you could avoid repeating these by defining an OPT
DaleCurtis 2012/09/24 20:22:27 Split out into a common section for clarity.
124 result2 = resampler.Convolve_SSE( 136 result2 = resampler.Convolve_SSE(
125 resampler.kernel_storage_.get() + 1, resampler.kernel_storage_.get(), 137 resampler.kernel_storage_.get() + 1, resampler.kernel_storage_.get(),
126 resampler.kernel_storage_.get(), kKernelInterpolationFactor); 138 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
139 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(__ARM_NEON__) && defined(USE_NEON)
Ami GONE FROM CHROMIUM 2012/09/24 20:04:16 drop arm_neon
DaleCurtis 2012/09/24 20:22:27 Done.
140 result2 = resampler.Convolve_NEON(
141 resampler.kernel_storage_.get() + 1, resampler.kernel_storage_.get(),
142 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
143 #endif
127 EXPECT_NEAR(result2, result, kEpsilon); 144 EXPECT_NEAR(result2, result, kEpsilon);
128 } 145 }
129 #endif 146 #endif
130 147
131 // Benchmark for the various Convolve() methods. Make sure to build with 148 // Benchmark for the various Convolve() methods. Make sure to build with
132 // branding=Chrome so that DCHECKs are compiled out when benchmarking. Original 149 // branding=Chrome so that DCHECKs are compiled out when benchmarking. Original
133 // benchmarks were run with --convolve-iterations=50000000. 150 // benchmarks were run with --convolve-iterations=50000000.
134 TEST(SincResamplerTest, ConvolveBenchmark) { 151 TEST(SincResamplerTest, ConvolveBenchmark) {
135 // Initialize a dummy resampler. 152 // Initialize a dummy resampler.
136 MockSource mock_source; 153 MockSource mock_source;
(...skipping 14 matching lines...) Expand all
151 base::TimeTicks start = base::TimeTicks::HighResNow(); 168 base::TimeTicks start = base::TimeTicks::HighResNow();
152 for (int i = 0; i < convolve_iterations; ++i) { 169 for (int i = 0; i < convolve_iterations; ++i) {
153 resampler.Convolve_C( 170 resampler.Convolve_C(
154 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(), 171 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(),
155 resampler.kernel_storage_.get(), kKernelInterpolationFactor); 172 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
156 } 173 }
157 double total_time_c_ms = 174 double total_time_c_ms =
158 (base::TimeTicks::HighResNow() - start).InMillisecondsF(); 175 (base::TimeTicks::HighResNow() - start).InMillisecondsF();
159 printf("Convolve_C took %.2fms.\n", total_time_c_ms); 176 printf("Convolve_C took %.2fms.\n", total_time_c_ms);
160 177
178 #if (defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)) || \
179 (defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON))
161 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__) 180 #if defined(ARCH_CPU_X86_FAMILY) && defined(__SSE__)
162 // Benchmark Convolve_SSE() with unaligned input pointer. 181 #define CONVOLVE_FUNC Convolve_SSE
182 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
183 #define CONVOLVE_FUNC Convolve_NEON
Ami GONE FROM CHROMIUM 2012/09/24 20:04:16 Could as well go in the first #if, to save repetit
DaleCurtis 2012/09/24 20:13:50 I don't see how, the first if is a || check. We do
184 #else
185 #error This benchmark should only be compiled when SSE or NEON is available.
186 #endif
187 // Benchmark with unaligned input pointer.
163 start = base::TimeTicks::HighResNow(); 188 start = base::TimeTicks::HighResNow();
164 for (int j = 0; j < convolve_iterations; ++j) { 189 for (int j = 0; j < convolve_iterations; ++j) {
165 resampler.Convolve_SSE( 190 resampler.CONVOLVE_FUNC(
166 resampler.kernel_storage_.get() + 1, resampler.kernel_storage_.get(), 191 resampler.kernel_storage_.get() + 1, resampler.kernel_storage_.get(),
167 resampler.kernel_storage_.get(), kKernelInterpolationFactor); 192 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
168 } 193 }
169 double total_time_sse_unaligned_ms = 194 double total_time_optimized_unaligned_ms =
170 (base::TimeTicks::HighResNow() - start).InMillisecondsF(); 195 (base::TimeTicks::HighResNow() - start).InMillisecondsF();
171 printf("Convolve_SSE (unaligned) took %.2fms; which is %.2fx faster than" 196 printf(STRINGIZE(CONVOLVE_FUNC) "(unaligned) took %.2fms; which is %.2fx "
172 " Convolve_C.\n", total_time_sse_unaligned_ms, 197 "faster than Convolve_C.\n", total_time_optimized_unaligned_ms,
173 total_time_c_ms / total_time_sse_unaligned_ms); 198 total_time_c_ms / total_time_optimized_unaligned_ms);
174 199
175 // Benchmark Convolve_SSE() with aligned input pointer. 200 // Benchmark with aligned input pointer.
176 start = base::TimeTicks::HighResNow(); 201 start = base::TimeTicks::HighResNow();
177 for (int j = 0; j < convolve_iterations; ++j) { 202 for (int j = 0; j < convolve_iterations; ++j) {
178 resampler.Convolve_SSE( 203 resampler.CONVOLVE_FUNC(
179 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(), 204 resampler.kernel_storage_.get(), resampler.kernel_storage_.get(),
180 resampler.kernel_storage_.get(), kKernelInterpolationFactor); 205 resampler.kernel_storage_.get(), kKernelInterpolationFactor);
181 } 206 }
182 double total_time_sse_aligned_ms = 207 double total_time_optimized_aligned_ms =
183 (base::TimeTicks::HighResNow() - start).InMillisecondsF(); 208 (base::TimeTicks::HighResNow() - start).InMillisecondsF();
184 printf("Convolve_SSE (aligned) took %.2fms; which is %.2fx faster than" 209 printf(STRINGIZE(CONVOLVE_FUNC) " (aligned) took %.2fms; which is %.2fx "
185 " Convolve_C and %.2fx faster than Convolve_SSE (unaligned).\n", 210 "faster than Convolve_C and %.2fx faster than "
186 total_time_sse_aligned_ms, total_time_c_ms / total_time_sse_aligned_ms, 211 STRINGIZE(CONVOLVE_FUNC) " (unaligned).\n",
187 total_time_sse_unaligned_ms / total_time_sse_aligned_ms); 212 total_time_optimized_aligned_ms,
213 total_time_c_ms / total_time_optimized_aligned_ms,
214 total_time_optimized_unaligned_ms / total_time_optimized_aligned_ms);
215 #undef CONVOLVE_FUNC
188 #endif 216 #endif
189 } 217 }
190 218
191 // Fake audio source for testing the resampler. Generates a sinusoidal linear 219 // Fake audio source for testing the resampler. Generates a sinusoidal linear
192 // chirp (http://en.wikipedia.org/wiki/Chirp) which can be tuned to stress the 220 // chirp (http://en.wikipedia.org/wiki/Chirp) which can be tuned to stress the
193 // resampler for the specific sample rate conversion being used. 221 // resampler for the specific sample rate conversion being used.
194 class SinusoidalLinearChirpSource { 222 class SinusoidalLinearChirpSource {
195 public: 223 public:
196 SinusoidalLinearChirpSource(int sample_rate, int samples, 224 SinusoidalLinearChirpSource(int sample_rate, int samples,
197 double max_frequency) 225 double max_frequency)
(...skipping 182 matching lines...) Expand 10 before | Expand all | Expand 10 after
380 std::tr1::make_tuple(11025, 192000, kResamplingRMSError, -62.61), 408 std::tr1::make_tuple(11025, 192000, kResamplingRMSError, -62.61),
381 std::tr1::make_tuple(16000, 192000, kResamplingRMSError, -63.14), 409 std::tr1::make_tuple(16000, 192000, kResamplingRMSError, -63.14),
382 std::tr1::make_tuple(22050, 192000, kResamplingRMSError, -62.42), 410 std::tr1::make_tuple(22050, 192000, kResamplingRMSError, -62.42),
383 std::tr1::make_tuple(32000, 192000, kResamplingRMSError, -63.38), 411 std::tr1::make_tuple(32000, 192000, kResamplingRMSError, -63.38),
384 std::tr1::make_tuple(44100, 192000, kResamplingRMSError, -62.63), 412 std::tr1::make_tuple(44100, 192000, kResamplingRMSError, -62.63),
385 std::tr1::make_tuple(48000, 192000, kResamplingRMSError, -73.44), 413 std::tr1::make_tuple(48000, 192000, kResamplingRMSError, -73.44),
386 std::tr1::make_tuple(96000, 192000, kResamplingRMSError, -73.52), 414 std::tr1::make_tuple(96000, 192000, kResamplingRMSError, -73.52),
387 std::tr1::make_tuple(192000, 192000, kResamplingRMSError, -73.52))); 415 std::tr1::make_tuple(192000, 192000, kResamplingRMSError, -73.52)));
388 416
389 } // namespace media 417 } // namespace media
OLDNEW
« no previous file with comments | « media/base/sinc_resampler.cc ('k') | media/media.gyp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698