skia/ext/convolver_unittest.cc - Issue 6334070: SIMD implementation of Convolver for Lanczos filter etc.

Side by Side Diff: skia/ext/convolver_unittest.cc

Issue 6334070: SIMD implementation of Convolver for Lanczos filter etc. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: resolve 32 bits posix system had -msse2 disabled when build chrome. merge two versions Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include <string.h>	5 #include <string.h>

6 #include <time.h>	6 #include <time.h>

7 #include <vector>	7 #include <vector>

8	8

9 #include "base/basictypes.h"	9 #include "base/basictypes.h"

	10 #include "base/logging.h"

	11 #include "base/time.h"

10 #include "skia/ext/convolver.h"	12 #include "skia/ext/convolver.h"

11 #include "testing/gtest/include/gtest/gtest.h"	13 #include "testing/gtest/include/gtest/gtest.h"

	14 #include "third_party/skia/include/core/SkBitmap.h"

	15 #include "third_party/skia/include/core/SkColorPriv.h"

	16 #include "third_party/skia/include/core/SkRect.h"

	17 #include "third_party/skia/include/core/SkTypes.h"

12	18

13 namespace skia {	19 namespace skia {

14	20

15 namespace {	21 namespace {

16	22

17 // Fills the given filter with impulse functions for the range 0->num_entries.	23 // Fills the given filter with impulse functions for the range 0->num_entries.

18 void FillImpulseFilter(int num_entries, ConvolutionFilter1D* filter) {	24 void FillImpulseFilter(int num_entries, ConvolutionFilter1D* filter) {

19 float one = 1.0f;	25 float one = 1.0f;

20 for (int i = 0; i < num_entries; i++)	26 for (int i = 0; i < num_entries; i++)

21 filter->AddFilter(i, &one, 1);	27 filter->AddFilter(i, &one, 1);

22 }	28 }

23	29

24 // Filters the given input with the impulse function, and verifies that it	30 // Filters the given input with the impulse function, and verifies that it

25 // does not change.	31 // does not change.

26 void TestImpulseConvolution(const unsigned char* data, int width, int height) {	32 void TestImpulseConvolution(const unsigned char* data, int width, int height) {

27 int byte_count = width * height * 4;	33 int byte_count = width * height * 4;

28	34

29 ConvolutionFilter1D filter_x;	35 ConvolutionFilter1D filter_x;

30 FillImpulseFilter(width, &filter_x);	36 FillImpulseFilter(width, &filter_x);

31	37

32 ConvolutionFilter1D filter_y;	38 ConvolutionFilter1D filter_y;

33 FillImpulseFilter(height, &filter_y);	39 FillImpulseFilter(height, &filter_y);

34	40

35 std::vector<unsigned char> output;	41 std::vector<unsigned char> output;

36 output.resize(byte_count);	42 output.resize(byte_count);

37 BGRAConvolve2D(data, width * 4, true, filter_x, filter_y,	43 BGRAConvolve2D(data, width * 4, true, filter_x, filter_y,

38 filter_x.num_values() * 4, &output[0]);	44 filter_x.num_values() * 4, &output[0], false);

39	45

40 // Output should exactly match input.	46 // Output should exactly match input.

41 EXPECT_EQ(0, memcmp(data, &output[0], byte_count));	47 EXPECT_EQ(0, memcmp(data, &output[0], byte_count));

42 }	48 }

43	49

44 // Fills the destination filter with a box filter averaging every two pixels	50 // Fills the destination filter with a box filter averaging every two pixels

45 // to produce the output.	51 // to produce the output.

46 void FillBoxFilter(int size, ConvolutionFilter1D* filter) {	52 void FillBoxFilter(int size, ConvolutionFilter1D* filter) {

47 const float box[2] = { 0.5, 0.5 };	53 const float box[2] = { 0.5, 0.5 };

48 for (int i = 0; i < size; i++)	54 for (int i = 0; i < size; i++)

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
99 for (int i = 0; i < src_byte_count; i++)	105 for (int i = 0; i < src_byte_count; i++)

100 input[i] = rand() * 255 / RAND_MAX;	106 input[i] = rand() * 255 / RAND_MAX;

101	107

102 // Compute the filters.	108 // Compute the filters.

103 ConvolutionFilter1D filter_x, filter_y;	109 ConvolutionFilter1D filter_x, filter_y;

104 FillBoxFilter(dest_width, &filter_x);	110 FillBoxFilter(dest_width, &filter_x);

105 FillBoxFilter(dest_height, &filter_y);	111 FillBoxFilter(dest_height, &filter_y);

106	112

107 // Do the convolution.	113 // Do the convolution.

108 BGRAConvolve2D(&input[0], src_width, true, filter_x, filter_y,	114 BGRAConvolve2D(&input[0], src_width, true, filter_x, filter_y,

109 filter_x.num_values() * 4, &output[0]);	115 filter_x.num_values() * 4, &output[0], false);

110	116

111 // Compute the expected results and check, allowing for a small difference	117 // Compute the expected results and check, allowing for a small difference

112 // to account for rounding errors.	118 // to account for rounding errors.

113 for (int y = 0; y < dest_height; y++) {	119 for (int y = 0; y < dest_height; y++) {

114 for (int x = 0; x < dest_width; x++) {	120 for (int x = 0; x < dest_width; x++) {

115 for (int channel = 0; channel < 4; channel++) {	121 for (int channel = 0; channel < 4; channel++) {

116 int src_offset = (y * 2 * src_row_stride + x * 2 * 4) + channel;	122 int src_offset = (y * 2 * src_row_stride + x * 2 * 4) + channel;

117 int value = input[src_offset] + // Top left source pixel.	123 int value = input[src_offset] + // Top left source pixel.

118 input[src_offset + 4] + // Top right source pixel.	124 input[src_offset + 4] + // Top right source pixel.

119 input[src_offset + src_row_stride] + // Lower left.	125 input[src_offset + src_row_stride] + // Lower left.

(...skipping 77 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
197 filter.AddFilter(66, factors6, arraysize(factors6));	203 filter.AddFilter(66, factors6, arraysize(factors6));

198 ASSERT_EQ(6, filter.max_filter());	204 ASSERT_EQ(6, filter.max_filter());

199 ASSERT_EQ(6, filter.num_values());	205 ASSERT_EQ(6, filter.num_values());

200	206

201 values = filter.FilterForValue(5, &filter_offset, &filter_length);	207 values = filter.FilterForValue(5, &filter_offset, &filter_length);

202 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL	208 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL

203 ASSERT_EQ(66, filter_offset); // value passed in	209 ASSERT_EQ(66, filter_offset); // value passed in

204 ASSERT_EQ(0, filter_length);	210 ASSERT_EQ(0, filter_length);

205 }	211 }

206	212

	213 TEST(Convolver, SIMDVerification) {

	214 #if defined(ARCH_CPU_X86_FAMILY)

	215 #if defined(OS_WIN) \|\| defined(__SSE2__)

	216 base::CPU cpu;

	217 if (!cpu.has_sse2()) return;

	218

	219 int source_sizes[][2] = { {1920, 1080}, {720, 480}, {1377, 523}, {325, 241} };

	220 int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} };

	221 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f };

	222

	223 srand(static_cast<unsigned int>(time(0)));

	224

	225 // Loop over some specific source and destination dimensions.

	226 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) {

	227 unsigned int source_width = source_sizes[i][0];

	228 unsigned int source_height = source_sizes[i][1];

	229 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) {

	230 unsigned int dest_width = source_sizes[j][0];

	231 unsigned int dest_height = source_sizes[j][1];

	232

	233 // Preparing convolve coefficients.

	234 ConvolutionFilter1D x_filter, y_filter;

	235 for (unsigned int p = 0; p < dest_width; ++p) {

	236 unsigned int offset = source_width * p / dest_width;

	237 if (offset > source_width - arraysize(filter))

	238 offset = source_width - arraysize(filter);

	239 x_filter.AddFilter(offset, filter, arraysize(filter));

	240 }

	241 for (unsigned int p = 0; p < dest_height; ++p) {

	242 unsigned int offset = source_height * p / dest_height;

	243 if (offset > source_height - arraysize(filter))

	244 offset = source_height - arraysize(filter);

	245 y_filter.AddFilter(offset, filter, arraysize(filter));

	246 }

	247

	248 // Allocate input and output skia bitmap.

	249 SkBitmap source, result_c, result_sse;

	250 source.setConfig(SkBitmap::kARGB_8888_Config,

	251 source_width, source_height);

	252 source.allocPixels();

	253 result_c.setConfig(SkBitmap::kARGB_8888_Config,

	254 dest_width, dest_height);

	255 result_c.allocPixels();

	256 result_sse.setConfig(SkBitmap::kARGB_8888_Config,

	257 dest_width, dest_height);

	258 result_sse.allocPixels();

	259

	260 // Randomize source bitmap for testing.

	261 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels());

	262 for (int y = 0; y < source.height(); y++) {

	263 for (int x = 0; x < source.rowBytes(); x++)

	264 src_ptr[x] = rand() % 255;

	265 src_ptr += source.rowBytes();

	266 }

	267

	268 // Test both cases with different has_alpha.

	269 for (int alpha = 0; alpha < 2; alpha++) {

	270 // Convolve using C code.

	271 base::TimeTicks resize_start;

	272 base::TimeDelta delta_c, delta_sse;

	273 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels());

	274 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels());

	275

	276 resize_start = base::TimeTicks::Now();

	277 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),

	278 static_cast<int>(source.rowBytes()),

	279 alpha ? true : false, x_filter, y_filter,

	280 static_cast<int>(result_c.rowBytes()), r1, false);

	281 delta_c = base::TimeTicks::Now() - resize_start;

	282

	283 resize_start = base::TimeTicks::Now();

	284 // Convolve using SSE2 code

	285 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),

	286 static_cast<int>(source.rowBytes()),

	287 alpha ? true : false, x_filter, y_filter,

	288 static_cast<int>(result_sse.rowBytes()), r2, true);

	289 delta_sse = base::TimeTicks::Now() - resize_start;

	290

	291 // Unfortunately I could not enable the performance check now.

	292 // Most bots use debug version, and there are great difference between

	293 // the code generation for intrinsic, etc. In release version speed

	294 // difference was 150%-200% depend on alpha channel presence;

	295 // while in debug version speed difference was 96%-120%.

	296 // TODO(jiesun): optimize further until we could enable this for

	297 // debug version too.

	298 // EXPECT_LE(delta_sse, delta_c);

	299

	300 int64 c_us = delta_c.InMicroseconds();

	301 int64 sse_us = delta_sse.InMicroseconds();

	302 LOG(INFO) << "from:" << source_width << "x" << source_height

	303 << " to:" << dest_width << "x" << dest_height

	304 << (alpha ? " with alpha" : " w/o alpha");

	305 LOG(INFO) << "c:" << c_us << " sse:" << sse_us;

	306 LOG(INFO) << "ratio:" << static_cast<float>(c_us) / sse_us;

	307

	308 // Comparing result.

	309 for (unsigned int i = 0; i < dest_height; i++) {

	310 for (unsigned int x = 0; x < dest_width * 4; x++) { // RGBA always.

	311 EXPECT_EQ(r1[x], r2[x]);

	312 }

	313 r1 += result_c.rowBytes();

	314 r2 += result_sse.rowBytes();

	315 }

	316 }

	317 }

	318 }

	319 #endif

	320 #endif

	321 }

	322

207 } // namespace skia	323 } // namespace skia

OLD	NEW

« skia/ext/convolver.cc ('K') | « skia/ext/convolver.cc ('k') | skia/ext/image_operations.cc » ('j') | no next file with comments »