OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <string.h> | 5 #include <string.h> |
6 #include <time.h> | 6 #include <time.h> |
7 #include <vector> | 7 #include <vector> |
8 | 8 |
9 #include "base/basictypes.h" | 9 #include "base/basictypes.h" |
| 10 #include "base/logging.h" |
| 11 #include "base/time.h" |
10 #include "skia/ext/convolver.h" | 12 #include "skia/ext/convolver.h" |
11 #include "testing/gtest/include/gtest/gtest.h" | 13 #include "testing/gtest/include/gtest/gtest.h" |
| 14 #include "third_party/skia/include/core/SkBitmap.h" |
| 15 #include "third_party/skia/include/core/SkColorPriv.h" |
| 16 #include "third_party/skia/include/core/SkRect.h" |
| 17 #include "third_party/skia/include/core/SkTypes.h" |
12 | 18 |
13 namespace skia { | 19 namespace skia { |
14 | 20 |
15 namespace { | 21 namespace { |
16 | 22 |
17 // Fills the given filter with impulse functions for the range 0->num_entries. | 23 // Fills the given filter with impulse functions for the range 0->num_entries. |
18 void FillImpulseFilter(int num_entries, ConvolutionFilter1D* filter) { | 24 void FillImpulseFilter(int num_entries, ConvolutionFilter1D* filter) { |
19 float one = 1.0f; | 25 float one = 1.0f; |
20 for (int i = 0; i < num_entries; i++) | 26 for (int i = 0; i < num_entries; i++) |
21 filter->AddFilter(i, &one, 1); | 27 filter->AddFilter(i, &one, 1); |
22 } | 28 } |
23 | 29 |
24 // Filters the given input with the impulse function, and verifies that it | 30 // Filters the given input with the impulse function, and verifies that it |
25 // does not change. | 31 // does not change. |
26 void TestImpulseConvolution(const unsigned char* data, int width, int height) { | 32 void TestImpulseConvolution(const unsigned char* data, int width, int height) { |
27 int byte_count = width * height * 4; | 33 int byte_count = width * height * 4; |
28 | 34 |
29 ConvolutionFilter1D filter_x; | 35 ConvolutionFilter1D filter_x; |
30 FillImpulseFilter(width, &filter_x); | 36 FillImpulseFilter(width, &filter_x); |
31 | 37 |
32 ConvolutionFilter1D filter_y; | 38 ConvolutionFilter1D filter_y; |
33 FillImpulseFilter(height, &filter_y); | 39 FillImpulseFilter(height, &filter_y); |
34 | 40 |
35 std::vector<unsigned char> output; | 41 std::vector<unsigned char> output; |
36 output.resize(byte_count); | 42 output.resize(byte_count); |
37 BGRAConvolve2D(data, width * 4, true, filter_x, filter_y, | 43 BGRAConvolve2D(data, width * 4, true, filter_x, filter_y, |
38 filter_x.num_values() * 4, &output[0]); | 44 filter_x.num_values() * 4, &output[0], false); |
39 | 45 |
40 // Output should exactly match input. | 46 // Output should exactly match input. |
41 EXPECT_EQ(0, memcmp(data, &output[0], byte_count)); | 47 EXPECT_EQ(0, memcmp(data, &output[0], byte_count)); |
42 } | 48 } |
43 | 49 |
44 // Fills the destination filter with a box filter averaging every two pixels | 50 // Fills the destination filter with a box filter averaging every two pixels |
45 // to produce the output. | 51 // to produce the output. |
46 void FillBoxFilter(int size, ConvolutionFilter1D* filter) { | 52 void FillBoxFilter(int size, ConvolutionFilter1D* filter) { |
47 const float box[2] = { 0.5, 0.5 }; | 53 const float box[2] = { 0.5, 0.5 }; |
48 for (int i = 0; i < size; i++) | 54 for (int i = 0; i < size; i++) |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
99 for (int i = 0; i < src_byte_count; i++) | 105 for (int i = 0; i < src_byte_count; i++) |
100 input[i] = rand() * 255 / RAND_MAX; | 106 input[i] = rand() * 255 / RAND_MAX; |
101 | 107 |
102 // Compute the filters. | 108 // Compute the filters. |
103 ConvolutionFilter1D filter_x, filter_y; | 109 ConvolutionFilter1D filter_x, filter_y; |
104 FillBoxFilter(dest_width, &filter_x); | 110 FillBoxFilter(dest_width, &filter_x); |
105 FillBoxFilter(dest_height, &filter_y); | 111 FillBoxFilter(dest_height, &filter_y); |
106 | 112 |
107 // Do the convolution. | 113 // Do the convolution. |
108 BGRAConvolve2D(&input[0], src_width, true, filter_x, filter_y, | 114 BGRAConvolve2D(&input[0], src_width, true, filter_x, filter_y, |
109 filter_x.num_values() * 4, &output[0]); | 115 filter_x.num_values() * 4, &output[0], false); |
110 | 116 |
111 // Compute the expected results and check, allowing for a small difference | 117 // Compute the expected results and check, allowing for a small difference |
112 // to account for rounding errors. | 118 // to account for rounding errors. |
113 for (int y = 0; y < dest_height; y++) { | 119 for (int y = 0; y < dest_height; y++) { |
114 for (int x = 0; x < dest_width; x++) { | 120 for (int x = 0; x < dest_width; x++) { |
115 for (int channel = 0; channel < 4; channel++) { | 121 for (int channel = 0; channel < 4; channel++) { |
116 int src_offset = (y * 2 * src_row_stride + x * 2 * 4) + channel; | 122 int src_offset = (y * 2 * src_row_stride + x * 2 * 4) + channel; |
117 int value = input[src_offset] + // Top left source pixel. | 123 int value = input[src_offset] + // Top left source pixel. |
118 input[src_offset + 4] + // Top right source pixel. | 124 input[src_offset + 4] + // Top right source pixel. |
119 input[src_offset + src_row_stride] + // Lower left. | 125 input[src_offset + src_row_stride] + // Lower left. |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
197 filter.AddFilter(66, factors6, arraysize(factors6)); | 203 filter.AddFilter(66, factors6, arraysize(factors6)); |
198 ASSERT_EQ(6, filter.max_filter()); | 204 ASSERT_EQ(6, filter.max_filter()); |
199 ASSERT_EQ(6, filter.num_values()); | 205 ASSERT_EQ(6, filter.num_values()); |
200 | 206 |
201 values = filter.FilterForValue(5, &filter_offset, &filter_length); | 207 values = filter.FilterForValue(5, &filter_offset, &filter_length); |
202 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL | 208 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL |
203 ASSERT_EQ(66, filter_offset); // value passed in | 209 ASSERT_EQ(66, filter_offset); // value passed in |
204 ASSERT_EQ(0, filter_length); | 210 ASSERT_EQ(0, filter_length); |
205 } | 211 } |
206 | 212 |
| 213 TEST(Convolver, SIMDVerification) { |
| 214 #if defined(SIMD_SSE2) |
| 215 base::CPU cpu; |
| 216 if (!cpu.has_sse2()) return; |
| 217 |
| 218 int source_sizes[][2] = { {1920, 1080}, {720, 480}, {1377, 523}, {325, 241} }; |
| 219 int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} }; |
| 220 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f }; |
| 221 |
| 222 srand(static_cast<unsigned int>(time(0))); |
| 223 |
| 224 // Loop over some specific source and destination dimensions. |
| 225 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) { |
| 226 unsigned int source_width = source_sizes[i][0]; |
| 227 unsigned int source_height = source_sizes[i][1]; |
| 228 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) { |
| 229 unsigned int dest_width = source_sizes[j][0]; |
| 230 unsigned int dest_height = source_sizes[j][1]; |
| 231 |
| 232 // Preparing convolve coefficients. |
| 233 ConvolutionFilter1D x_filter, y_filter; |
| 234 for (unsigned int p = 0; p < dest_width; ++p) { |
| 235 unsigned int offset = source_width * p / dest_width; |
| 236 if (offset > source_width - arraysize(filter)) |
| 237 offset = source_width - arraysize(filter); |
| 238 x_filter.AddFilter(offset, filter, arraysize(filter)); |
| 239 } |
| 240 for (unsigned int p = 0; p < dest_height; ++p) { |
| 241 unsigned int offset = source_height * p / dest_height; |
| 242 if (offset > source_height - arraysize(filter)) |
| 243 offset = source_height - arraysize(filter); |
| 244 y_filter.AddFilter(offset, filter, arraysize(filter)); |
| 245 } |
| 246 |
| 247 // Allocate input and output skia bitmap. |
| 248 SkBitmap source, result_c, result_sse; |
| 249 source.setConfig(SkBitmap::kARGB_8888_Config, |
| 250 source_width, source_height); |
| 251 source.allocPixels(); |
| 252 result_c.setConfig(SkBitmap::kARGB_8888_Config, |
| 253 dest_width, dest_height); |
| 254 result_c.allocPixels(); |
| 255 result_sse.setConfig(SkBitmap::kARGB_8888_Config, |
| 256 dest_width, dest_height); |
| 257 result_sse.allocPixels(); |
| 258 |
| 259 // Randomize source bitmap for testing. |
| 260 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels()); |
| 261 for (int y = 0; y < source.height(); y++) { |
| 262 for (int x = 0; x < source.rowBytes(); x++) |
| 263 src_ptr[x] = rand() % 255; |
| 264 src_ptr += source.rowBytes(); |
| 265 } |
| 266 |
| 267 // Test both cases with different has_alpha. |
| 268 for (int alpha = 0; alpha < 2; alpha++) { |
| 269 // Convolve using C code. |
| 270 base::TimeTicks resize_start; |
| 271 base::TimeDelta delta_c, delta_sse; |
| 272 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels()); |
| 273 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels()); |
| 274 |
| 275 resize_start = base::TimeTicks::Now(); |
| 276 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), |
| 277 static_cast<int>(source.rowBytes()), |
| 278 alpha ? true : false, x_filter, y_filter, |
| 279 static_cast<int>(result_c.rowBytes()), r1, false); |
| 280 delta_c = base::TimeTicks::Now() - resize_start; |
| 281 |
| 282 resize_start = base::TimeTicks::Now(); |
| 283 // Convolve using SSE2 code |
| 284 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), |
| 285 static_cast<int>(source.rowBytes()), |
| 286 alpha ? true : false, x_filter, y_filter, |
| 287 static_cast<int>(result_sse.rowBytes()), r2, true); |
| 288 delta_sse = base::TimeTicks::Now() - resize_start; |
| 289 |
| 290 // Unfortunately I could not enable the performance check now. |
| 291 // Most bots use debug version, and there are great difference between |
| 292 // the code generation for intrinsic, etc. In release version speed |
| 293 // difference was 150%-200% depend on alpha channel presence; |
| 294 // while in debug version speed difference was 96%-120%. |
| 295 // TODO(jiesun): optimize further until we could enable this for |
| 296 // debug version too. |
| 297 // EXPECT_LE(delta_sse, delta_c); |
| 298 |
| 299 int64 c_us = delta_c.InMicroseconds(); |
| 300 int64 sse_us = delta_sse.InMicroseconds(); |
| 301 LOG(INFO) << "from:" << source_width << "x" << source_height |
| 302 << " to:" << dest_width << "x" << dest_height |
| 303 << (alpha ? " with alpha" : " w/o alpha"); |
| 304 LOG(INFO) << "c:" << c_us << " sse:" << sse_us; |
| 305 LOG(INFO) << "ratio:" << static_cast<float>(c_us) / sse_us; |
| 306 |
| 307 // Comparing result. |
| 308 for (unsigned int i = 0; i < dest_height; i++) { |
| 309 for (unsigned int x = 0; x < dest_width * 4; x++) { // RGBA always. |
| 310 EXPECT_EQ(r1[x], r2[x]); |
| 311 } |
| 312 r1 += result_c.rowBytes(); |
| 313 r2 += result_sse.rowBytes(); |
| 314 } |
| 315 } |
| 316 } |
| 317 } |
| 318 #endif |
| 319 } |
| 320 |
207 } // namespace skia | 321 } // namespace skia |
OLD | NEW |