media/base/vector_math_perftest.cc - Issue 2556993002: Experiment with AVX optimizations for FMAC, FMUL operations.

Side by Side Diff: media/base/vector_math_perftest.cc

Issue 2556993002: Experiment with AVX optimizations for FMAC, FMUL operations.

Patch Set: Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <memory>	5 #include <memory>

6	6

	7 #include "base/bind.h"

	8 #include "base/cpu.h"

7 #include "base/macros.h"	9 #include "base/macros.h"

8 #include "base/memory/aligned_memory.h"	10 #include "base/memory/aligned_memory.h"

9 #include "base/time/time.h"	11 #include "base/time/time.h"

10 #include "build/build_config.h"	12 #include "build/build_config.h"

	13 #include "media/base/sinc_resampler.h"

11 #include "media/base/vector_math.h"	14 #include "media/base/vector_math.h"

12 #include "media/base/vector_math_testing.h"	15 #include "media/base/vector_math_testing.h"

13 #include "testing/gtest/include/gtest/gtest.h"	16 #include "testing/gtest/include/gtest/gtest.h"

14 #include "testing/perf/perf_test.h"	17 #include "testing/perf/perf_test.h"

15	18

16 using base::TimeTicks;	19 using base::TimeTicks;

17 using std::fill;	20 using std::fill;

18	21

19 namespace media {	22 namespace media {

20	23

21 static const int kBenchmarkIterations = 200000;	24 static const int kBenchmarkIterations = 200000;

22 static const int kEWMABenchmarkIterations = 50000;	25 static const int kEWMABenchmarkIterations = 50000;

23 static const float kScale = 0.5;	26 static const float kScale = 0.5;

24 static const int kVectorSize = 8192;	27 static const int kVectorSize = 8192;

25	28

	29 static const int kSincResamplerBenchmarkIterations = 50000000;

	30 static const double kSampleRateRatio = 192000.0 / 44100.0;

	31 static const double kKernelInterpolationFactor = 0.5;

	32

	33 // Helper function to provide no input to SincResampler's Convolve benchmark.

	34 static void DoNothing(int frames, float* destination) {}

	35

26 class VectorMathPerfTest : public testing::Test {	36 class VectorMathPerfTest : public testing::Test {

27 public:	37 public:

28 VectorMathPerfTest() {	38 VectorMathPerfTest() {

29 // Initialize input and output vectors.	39 // Initialize input and output vectors.

30 input_vector_.reset(static_cast<float*>(base::AlignedAlloc(	40 input_vector_.reset(static_cast<float*>(base::AlignedAlloc(

31 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));	41 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));

32 output_vector_.reset(static_cast<float*>(base::AlignedAlloc(	42 output_vector_.reset(static_cast<float*>(base::AlignedAlloc(

33 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));	43 sizeof(float) * kVectorSize, vector_math::kRequiredAlignment)));

34 fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f);	44 fill(input_vector_.get(), input_vector_.get() + kVectorSize, 1.0f);

35 fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f);	45 fill(output_vector_.get(), output_vector_.get() + kVectorSize, 0.0f);

36 }	46 }

37	47

	48 void RunBenchmark(float (*fn)(const float[], const float[], int),

	49 bool aligned,

	50 const std::string& test_name,

	51 const std::string& trace_name) {

	52 TimeTicks start = TimeTicks::Now();

	53 for (int i = 0; i < kBenchmarkIterations; ++i) {

	54 fn(input_vector_.get(), output_vector_.get(),

	55 kVectorSize - (aligned ? 0 : 1));

	56 }

	57 double total_time_milliseconds =

	58 (TimeTicks::Now() - start).InMillisecondsF();

	59 perf_test::PrintResult(test_name, "", trace_name,

	60 kBenchmarkIterations / total_time_milliseconds,

	61 "runs/ms", true);

	62 }

	63

38 void RunBenchmark(void (*fn)(const float[], float, int, float[]),	64 void RunBenchmark(void (*fn)(const float[], float, int, float[]),

39 bool aligned,	65 bool aligned,

40 const std::string& test_name,	66 const std::string& test_name,

41 const std::string& trace_name) {	67 const std::string& trace_name) {

42 TimeTicks start = TimeTicks::Now();	68 TimeTicks start = TimeTicks::Now();

43 for (int i = 0; i < kBenchmarkIterations; ++i) {	69 for (int i = 0; i < kBenchmarkIterations; ++i) {

44 fn(input_vector_.get(),	70 fn(input_vector_.get(), kScale, kVectorSize - (aligned ? 0 : 1),

45 kScale,

46 kVectorSize - (aligned ? 0 : 1),

47 output_vector_.get());	71 output_vector_.get());

48 }	72 }

49 double total_time_milliseconds =	73 double total_time_milliseconds =

50 (TimeTicks::Now() - start).InMillisecondsF();	74 (TimeTicks::Now() - start).InMillisecondsF();

51 perf_test::PrintResult(test_name,	75 perf_test::PrintResult(test_name, "", trace_name,

52 "",

53 trace_name,

54 kBenchmarkIterations / total_time_milliseconds,	76 kBenchmarkIterations / total_time_milliseconds,

55 "runs/ms",	77 "runs/ms", true);

56 true);

57 }	78 }

58	79

59 void RunBenchmark(	80 void RunBenchmark(

60 std::pair<float, float> (*fn)(float, const float[], int, float),	81 std::pair<float, float> (*fn)(float, const float[], int, float),

61 int len,	82 int len,

62 const std::string& test_name,	83 const std::string& test_name,

63 const std::string& trace_name) {	84 const std::string& trace_name) {

64 TimeTicks start = TimeTicks::Now();	85 TimeTicks start = TimeTicks::Now();

65 for (int i = 0; i < kEWMABenchmarkIterations; ++i) {	86 for (int i = 0; i < kEWMABenchmarkIterations; ++i) {

66 fn(0.5f, input_vector_.get(), len, 0.1f);	87 fn(0.5f, input_vector_.get(), len, 0.1f);

67 }	88 }

68 double total_time_milliseconds =	89 double total_time_milliseconds =

69 (TimeTicks::Now() - start).InMillisecondsF();	90 (TimeTicks::Now() - start).InMillisecondsF();

70 perf_test::PrintResult(test_name,	91 perf_test::PrintResult(test_name, "", trace_name,

71 "",

72 trace_name,

73 kEWMABenchmarkIterations / total_time_milliseconds,	92 kEWMABenchmarkIterations / total_time_milliseconds,

74 "runs/ms",	93 "runs/ms", true);

75 true);	94 }

	95

	96 void RunBenchmark(

	97 SincResampler* resampler,

	98 float (convolve_fn)(const float, const float, const float, double),

	99 bool aligned,

	100 const std::string& trace_name) {

	101 base::TimeTicks start = base::TimeTicks::Now();

	102 for (int i = 0; i < kSincResamplerBenchmarkIterations; ++i) {

	103 convolve_fn(resampler->get_kernel_for_testing() + (aligned ? 0 : 1),

	104 resampler->get_kernel_for_testing(),

	105 resampler->get_kernel_for_testing(),

	106 kKernelInterpolationFactor);

	107 }

	108 double total_time_milliseconds =

	109 (base::TimeTicks::Now() - start).InMillisecondsF();

	110 perf_test::PrintResult(

	111 "sinc_resampler_convolve", "", trace_name,

	112 kSincResamplerBenchmarkIterations / total_time_milliseconds, "runs/ms",

	113 true);

76 }	114 }

77	115

78 protected:	116 protected:

79 std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_;	117 std::unique_ptr<float, base::AlignedFreeDeleter> input_vector_;

80 std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_;	118 std::unique_ptr<float, base::AlignedFreeDeleter> output_vector_;

81	119

82 DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest);	120 DISALLOW_COPY_AND_ASSIGN(VectorMathPerfTest);

83 };	121 };

84	122

85 // Define platform dependent function names for SIMD optimized methods.	123 // Define platform dependent function names for SIMD optimized methods.

86 #if defined(ARCH_CPU_X86_FAMILY)	124 #if defined(ARCH_CPU_X86_FAMILY)

	125 #define CONVOLVE_FUNC Convolve_SSE

	126 #define DOTPRODUCT_FUNC DotProduct_SSE

	127 #define DOTPRODUCT_FUNC2 DotProduct_AVX

87 #define FMAC_FUNC FMAC_SSE	128 #define FMAC_FUNC FMAC_SSE

88 #define FMUL_FUNC FMUL_SSE	129 #define FMUL_FUNC FMUL_SSE

	130 #define FMAC_FUNC2 FMAC_AVX

89 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE	131 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_SSE

90 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)	132 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)

	133 #define CONVOLVE_FUNC Convolve_NEON

	134 #define DOTPRODUCT_FUNC DotProduct_NEON

91 #define FMAC_FUNC FMAC_NEON	135 #define FMAC_FUNC FMAC_NEON

92 #define FMUL_FUNC FMUL_NEON	136 #define FMUL_FUNC FMUL_NEON

93 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON	137 #define EWMAAndMaxPower_FUNC EWMAAndMaxPower_NEON

94 #endif	138 #endif

95	139

	140 // Benchmark for the various Convolve() methods. Make sure to build with

	141 // branding=Chrome so that DCHECKs are compiled out when benchmarking.

	142 TEST_F(VectorMathPerfTest, Convolve) {

	143 SincResampler resampler(kSampleRateRatio, SincResampler::kDefaultRequestSize,

	144 base::Bind(&DoNothing));

	145

	146 RunBenchmark(&resampler, vector_math::Convolve_C, true,

	147 "unoptimized_aligned");

	148

	149 #if defined(CONVOLVE_FUNC)

	150 RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, true,

	151 "optimized_aligned");

	152 RunBenchmark(&resampler, vector_math::CONVOLVE_FUNC, false,

	153 "optimized_unaligned");

	154 #endif

	155 }

	156

	157 // Benchmark for each optimized vector_math::FMAC() method.

	158 TEST_F(VectorMathPerfTest, DotProduct) {

	159 // Benchmark DotProduct_C().

	160 RunBenchmark(vector_math::DotProduct_C, true, "vector_math_dotproduct",

	161 "unoptimized");

	162 #if defined(DOTPRODUCT_FUNC)

	163 // Benchmark DOTPRODUCT_FUNC() with unaligned size.

	164 ASSERT_NE(

	165 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

	166 0U);

	167 RunBenchmark(vector_math::DOTPRODUCT_FUNC, false, "vector_math_dotproduct",

	168 "optimized_unaligned");

	169 // Benchmark DOTPRODUCT_FUNC() with aligned size.

	170 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

	171 0U);

	172 RunBenchmark(vector_math::DOTPRODUCT_FUNC, true, "vector_math_dotproduct",

	173 "optimized_aligned");

	174 #if defined(FMAC_FUNC2)

	175 // Benchmark DOTPRODUCT_FUNC() with unaligned size.

	176 if (!base::CPU().has_avx())

	177 return;

	178 ASSERT_NE(

	179 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

	180 0U);

	181 RunBenchmark(vector_math::DOTPRODUCT_FUNC2, false, "vector_math_dotproduct",

	182 "optimized2_unaligned");

	183 // Benchmark FMAC_FUNC() with aligned size.

	184 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

	185 0U);

	186 RunBenchmark(vector_math::DOTPRODUCT_FUNC2, true, "vector_math_dotproduct",

	187 "optimized2_aligned");

	188 #endif

	189 #endif

	190 }

	191

96 // Benchmark for each optimized vector_math::FMAC() method.	192 // Benchmark for each optimized vector_math::FMAC() method.

97 TEST_F(VectorMathPerfTest, FMAC) {	193 TEST_F(VectorMathPerfTest, FMAC) {

98 // Benchmark FMAC_C().	194 // Benchmark FMAC_C().

99 RunBenchmark(	195 RunBenchmark(vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized");

100 vector_math::FMAC_C, true, "vector_math_fmac", "unoptimized");

101 #if defined(FMAC_FUNC)	196 #if defined(FMAC_FUNC)

102 // Benchmark FMAC_FUNC() with unaligned size.	197 // Benchmark FMAC_FUNC() with unaligned size.

103 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /	198 ASSERT_NE(

104 sizeof(float)), 0U);	199 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

105 RunBenchmark(	200 0U);

106 vector_math::FMAC_FUNC, false, "vector_math_fmac", "optimized_unaligned");	201 RunBenchmark(vector_math::FMAC_FUNC, false, "vector_math_fmac",

	202 "optimized_unaligned");

107 // Benchmark FMAC_FUNC() with aligned size.	203 // Benchmark FMAC_FUNC() with aligned size.

108 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),	204 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

109 0U);	205 0U);

110 RunBenchmark(	206 RunBenchmark(vector_math::FMAC_FUNC, true, "vector_math_fmac",

111 vector_math::FMAC_FUNC, true, "vector_math_fmac", "optimized_aligned");	207 "optimized_aligned");

	208 #if defined(FMAC_FUNC2)

	209 // Benchmark FMAC_FUNC() with unaligned size.

	210 if (!base::CPU().has_avx())

	211 return;

	212 ASSERT_NE(

	213 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

	214 0U);

	215 RunBenchmark(vector_math::FMAC_FUNC2, false, "vector_math_fmac",

	216 "optimized2_unaligned");

	217 // Benchmark FMAC_FUNC() with aligned size.

	218 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

	219 0U);

	220 RunBenchmark(vector_math::FMAC_FUNC2, true, "vector_math_fmac",

	221 "optimized2_aligned");

	222 #endif

112 #endif	223 #endif

113 }	224 }

114	225

115 // Benchmark for each optimized vector_math::FMUL() method.	226 // Benchmark for each optimized vector_math::FMUL() method.

116 TEST_F(VectorMathPerfTest, FMUL) {	227 TEST_F(VectorMathPerfTest, FMUL) {

117 // Benchmark FMUL_C().	228 // Benchmark FMUL_C().

118 RunBenchmark(	229 RunBenchmark(vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized");

119 vector_math::FMUL_C, true, "vector_math_fmul", "unoptimized");

120 #if defined(FMUL_FUNC)	230 #if defined(FMUL_FUNC)

121 // Benchmark FMUL_FUNC() with unaligned size.	231 // Benchmark FMUL_FUNC() with unaligned size.

122 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /	232 ASSERT_NE(

123 sizeof(float)), 0U);	233 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

124 RunBenchmark(	234 0U);

125 vector_math::FMUL_FUNC, false, "vector_math_fmul", "optimized_unaligned");	235 RunBenchmark(vector_math::FMUL_FUNC, false, "vector_math_fmul",

	236 "optimized_unaligned");

126 // Benchmark FMUL_FUNC() with aligned size.	237 // Benchmark FMUL_FUNC() with aligned size.

127 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),	238 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

128 0U);	239 0U);

129 RunBenchmark(	240 RunBenchmark(vector_math::FMUL_FUNC, true, "vector_math_fmul",

130 vector_math::FMUL_FUNC, true, "vector_math_fmul", "optimized_aligned");	241 "optimized_aligned");

131 #endif	242 #endif

132 }	243 }

133	244

134 // Benchmark for each optimized vector_math::EWMAAndMaxPower() method.	245 // Benchmark for each optimized vector_math::EWMAAndMaxPower() method.

135 TEST_F(VectorMathPerfTest, EWMAAndMaxPower) {	246 TEST_F(VectorMathPerfTest, EWMAAndMaxPower) {

136 // Benchmark EWMAAndMaxPower_C().	247 // Benchmark EWMAAndMaxPower_C().

137 RunBenchmark(vector_math::EWMAAndMaxPower_C,	248 RunBenchmark(vector_math::EWMAAndMaxPower_C, kVectorSize,

138 kVectorSize,	249 "vector_math_ewma_and_max_power", "unoptimized");

139 "vector_math_ewma_and_max_power",

140 "unoptimized");

141 #if defined(EWMAAndMaxPower_FUNC)	250 #if defined(EWMAAndMaxPower_FUNC)

142 // Benchmark EWMAAndMaxPower_FUNC() with unaligned size.	251 // Benchmark EWMAAndMaxPower_FUNC() with unaligned size.

143 ASSERT_NE((kVectorSize - 1) % (vector_math::kRequiredAlignment /	252 ASSERT_NE(

144 sizeof(float)), 0U);	253 (kVectorSize - 1) % (vector_math::kRequiredAlignment / sizeof(float)),

145 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,	254 0U);

146 kVectorSize - 1,	255 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize - 1,

147 "vector_math_ewma_and_max_power",	256 "vector_math_ewma_and_max_power", "optimized_unaligned");

148 "optimized_unaligned");

149 // Benchmark EWMAAndMaxPower_FUNC() with aligned size.	257 // Benchmark EWMAAndMaxPower_FUNC() with aligned size.

150 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),	258 ASSERT_EQ(kVectorSize % (vector_math::kRequiredAlignment / sizeof(float)),

151 0U);	259 0U);

152 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC,	260 RunBenchmark(vector_math::EWMAAndMaxPower_FUNC, kVectorSize,

153 kVectorSize,	261 "vector_math_ewma_and_max_power", "optimized_aligned");

154 "vector_math_ewma_and_max_power",

155 "optimized_aligned");

156 #endif	262 #endif

157 }	263 }

158	264

159 } // namespace media	265 } // namespace media

OLD	NEW

« no previous file with comments | « media/base/vector_math_avx.cc ('k') | media/base/vector_math_testing.h » ('j') | no next file with comments »