OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <string.h> | 5 #include <string.h> |
6 #include <time.h> | 6 #include <time.h> |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <numeric> | 8 #include <numeric> |
9 #include <vector> | 9 #include <vector> |
10 | 10 |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
205 filter.AddFilter(66, factors6, arraysize(factors6)); | 205 filter.AddFilter(66, factors6, arraysize(factors6)); |
206 ASSERT_EQ(6, filter.max_filter()); | 206 ASSERT_EQ(6, filter.max_filter()); |
207 ASSERT_EQ(6, filter.num_values()); | 207 ASSERT_EQ(6, filter.num_values()); |
208 | 208 |
209 values = filter.FilterForValue(5, &filter_offset, &filter_length); | 209 values = filter.FilterForValue(5, &filter_offset, &filter_length); |
210 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL | 210 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL |
211 ASSERT_EQ(66, filter_offset); // value passed in | 211 ASSERT_EQ(66, filter_offset); // value passed in |
212 ASSERT_EQ(0, filter_length); | 212 ASSERT_EQ(0, filter_length); |
213 } | 213 } |
214 | 214 |
215 #if defined(THREAD_SANITIZER) | 215 void VerifySIMD(unsigned int source_width, |
216 // Times out under ThreadSanitizer, http://crbug.com/134400. | 216 unsigned int source_height, |
217 #define MAYBE_SIMDVerification DISABLED_SIMDVerification | 217 unsigned int dest_width, |
218 #else | 218 unsigned int dest_height) { |
219 #define MAYBE_SIMDVerification SIMDVerification | |
220 #endif | |
221 TEST(Convolver, MAYBE_SIMDVerification) { | |
222 int source_sizes[][2] = { | |
223 {1,1}, {1,2}, {1,3}, {1,4}, {1,5}, | |
224 {2,1}, {2,2}, {2,3}, {2,4}, {2,5}, | |
225 {3,1}, {3,2}, {3,3}, {3,4}, {3,5}, | |
226 {4,1}, {4,2}, {4,3}, {4,4}, {4,5}, | |
227 #ifdef NDEBUG | |
228 {1920, 1080}, | |
229 {720, 480}, | |
230 {1377, 523}, | |
231 #endif | |
232 {325, 241} | |
233 }; | |
234 #ifdef NDEBUG | |
235 int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} }; | |
236 #else | |
237 int dest_sizes[][2] = { {128, 102}, {48, 27}, {17, 13} }; | |
238 #endif | |
239 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f }; | 219 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f }; |
220 // Preparing convolve coefficients. | |
221 ConvolutionFilter1D x_filter, y_filter; | |
222 for (unsigned int p = 0; p < dest_width; ++p) { | |
223 unsigned int offset = source_width * p / dest_width; | |
224 EXPECT_LT(offset, source_width); | |
225 x_filter.AddFilter(offset, filter, | |
226 std::min<int>(arraysize(filter), | |
227 source_width - offset)); | |
228 } | |
229 x_filter.PaddingForSIMD(); | |
230 for (unsigned int p = 0; p < dest_height; ++p) { | |
231 unsigned int offset = source_height * p / dest_height; | |
232 y_filter.AddFilter(offset, filter, | |
233 std::min<int>(arraysize(filter), | |
234 source_height - offset)); | |
235 } | |
236 y_filter.PaddingForSIMD(); | |
237 | |
238 // Allocate input and output skia bitmap. | |
239 SkBitmap source, result_c, result_sse; | |
240 source.setConfig(SkBitmap::kARGB_8888_Config, | |
241 source_width, source_height); | |
242 source.allocPixels(); | |
243 result_c.setConfig(SkBitmap::kARGB_8888_Config, | |
244 dest_width, dest_height); | |
245 result_c.allocPixels(); | |
246 result_sse.setConfig(SkBitmap::kARGB_8888_Config, | |
247 dest_width, dest_height); | |
248 result_sse.allocPixels(); | |
249 | |
250 // Randomize source bitmap for testing. | |
251 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels()); | |
252 for (int y = 0; y < source.height(); y++) { | |
253 for (unsigned int x = 0; x < source.rowBytes(); x++) | |
254 src_ptr[x] = rand() % 255; | |
255 src_ptr += source.rowBytes(); | |
256 } | |
257 | |
258 // Test both cases with different has_alpha. | |
259 for (int alpha = 0; alpha < 2; alpha++) { | |
260 // Convolve using C code. | |
261 base::TimeTicks resize_start; | |
262 base::TimeDelta delta_c, delta_sse; | |
263 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels()); | |
264 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels()); | |
265 | |
266 resize_start = base::TimeTicks::Now(); | |
267 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
268 static_cast<int>(source.rowBytes()), | |
269 (alpha != 0), x_filter, y_filter, | |
270 static_cast<int>(result_c.rowBytes()), r1, false); | |
271 delta_c = base::TimeTicks::Now() - resize_start; | |
272 | |
273 resize_start = base::TimeTicks::Now(); | |
274 // Convolve using SSE2 code | |
275 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
276 static_cast<int>(source.rowBytes()), | |
277 (alpha != 0), x_filter, y_filter, | |
278 static_cast<int>(result_sse.rowBytes()), r2, true); | |
279 delta_sse = base::TimeTicks::Now() - resize_start; | |
280 | |
281 // Unfortunately I could not enable the performance check now. | |
282 // Most bots use debug version, and there are great difference between | |
283 // the code generation for intrinsic, etc. In release version speed | |
284 // difference was 150%-200% depend on alpha channel presence; | |
285 // while in debug version speed difference was 96%-120%. | |
286 // TODO(jiesun): optimize further until we could enable this for | |
287 // debug version too. | |
288 // EXPECT_LE(delta_sse, delta_c); | |
289 | |
290 int64 c_us = delta_c.InMicroseconds(); | |
291 int64 sse_us = delta_sse.InMicroseconds(); | |
292 VLOG(1) << "from:" << source_width << "x" << source_height | |
293 << " to:" << dest_width << "x" << dest_height | |
294 << (alpha ? " with alpha" : " w/o alpha"); | |
295 VLOG(1) << "c:" << c_us << " sse:" << sse_us; | |
296 VLOG(1) << "ratio:" << static_cast<float>(c_us) / sse_us; | |
297 | |
298 // Comparing result. | |
299 for (unsigned int i = 0; i < dest_height; i++) { | |
300 EXPECT_FALSE(memcmp(r1, r2, dest_width * 4)); // RGBA always | |
301 r1 += result_c.rowBytes(); | |
302 r2 += result_sse.rowBytes(); | |
303 } | |
304 } | |
305 } | |
306 | |
307 TEST(Convolver, VerifySIMDEdgeCases) { | |
308 srand(static_cast<unsigned int>(time(0))); | |
309 // Loop over all possible (small) image sizes | |
310 for (unsigned int width = 1; width < 20; width++) { | |
311 for (unsigned int height = 1; height < 20; height++) { | |
312 VerifySIMD(width, height, 8, 8); | |
313 VerifySIMD(8, 8, width, height); | |
314 } | |
315 } | |
316 } | |
317 | |
318 // Verify that lage upscales/downscales produce the same result | |
319 // with and without SIMD. | |
320 TEST(Convolver, VerifySIMDPrecision) { | |
321 int source_sizes[][2] = { {1920, 1080}, {1377, 523}, {325, 241} }; | |
322 int dest_sizes[][2] = { {1280, 1024}, {177, 123} }; | |
Stephen White
2014/02/25 21:13:59
I'm surprised these run in reasonable time in DEBU
hubbe
2014/02/25 21:59:18
A good chunk of time was spent comparing the image
| |
240 | 323 |
241 srand(static_cast<unsigned int>(time(0))); | 324 srand(static_cast<unsigned int>(time(0))); |
242 | 325 |
243 // Loop over some specific source and destination dimensions. | 326 // Loop over some specific source and destination dimensions. |
244 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) { | 327 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) { |
245 unsigned int source_width = source_sizes[i][0]; | 328 unsigned int source_width = source_sizes[i][0]; |
246 unsigned int source_height = source_sizes[i][1]; | 329 unsigned int source_height = source_sizes[i][1]; |
247 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) { | 330 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) { |
248 unsigned int dest_width = dest_sizes[j][0]; | 331 unsigned int dest_width = dest_sizes[j][0]; |
249 unsigned int dest_height = dest_sizes[j][1]; | 332 unsigned int dest_height = dest_sizes[j][1]; |
250 | 333 VerifySIMD(source_width, source_height, dest_width, dest_height); |
251 // Preparing convolve coefficients. | |
252 ConvolutionFilter1D x_filter, y_filter; | |
253 for (unsigned int p = 0; p < dest_width; ++p) { | |
254 unsigned int offset = source_width * p / dest_width; | |
255 EXPECT_LT(offset, source_width); | |
256 x_filter.AddFilter(offset, filter, | |
257 std::min<int>(arraysize(filter), | |
258 source_width - offset)); | |
259 } | |
260 x_filter.PaddingForSIMD(); | |
261 for (unsigned int p = 0; p < dest_height; ++p) { | |
262 unsigned int offset = source_height * p / dest_height; | |
263 y_filter.AddFilter(offset, filter, | |
264 std::min<int>(arraysize(filter), | |
265 source_height - offset)); | |
266 } | |
267 y_filter.PaddingForSIMD(); | |
268 | |
269 // Allocate input and output skia bitmap. | |
270 SkBitmap source, result_c, result_sse; | |
271 source.setConfig(SkBitmap::kARGB_8888_Config, | |
272 source_width, source_height); | |
273 source.allocPixels(); | |
274 result_c.setConfig(SkBitmap::kARGB_8888_Config, | |
275 dest_width, dest_height); | |
276 result_c.allocPixels(); | |
277 result_sse.setConfig(SkBitmap::kARGB_8888_Config, | |
278 dest_width, dest_height); | |
279 result_sse.allocPixels(); | |
280 | |
281 // Randomize source bitmap for testing. | |
282 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels()); | |
283 for (int y = 0; y < source.height(); y++) { | |
284 for (unsigned int x = 0; x < source.rowBytes(); x++) | |
285 src_ptr[x] = rand() % 255; | |
286 src_ptr += source.rowBytes(); | |
287 } | |
288 | |
289 // Test both cases with different has_alpha. | |
290 for (int alpha = 0; alpha < 2; alpha++) { | |
291 // Convolve using C code. | |
292 base::TimeTicks resize_start; | |
293 base::TimeDelta delta_c, delta_sse; | |
294 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels()); | |
295 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels()); | |
296 | |
297 resize_start = base::TimeTicks::Now(); | |
298 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
299 static_cast<int>(source.rowBytes()), | |
300 (alpha != 0), x_filter, y_filter, | |
301 static_cast<int>(result_c.rowBytes()), r1, false); | |
302 delta_c = base::TimeTicks::Now() - resize_start; | |
303 | |
304 resize_start = base::TimeTicks::Now(); | |
305 // Convolve using SSE2 code | |
306 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
307 static_cast<int>(source.rowBytes()), | |
308 (alpha != 0), x_filter, y_filter, | |
309 static_cast<int>(result_sse.rowBytes()), r2, true); | |
310 delta_sse = base::TimeTicks::Now() - resize_start; | |
311 | |
312 // Unfortunately I could not enable the performance check now. | |
313 // Most bots use debug version, and there are great difference between | |
314 // the code generation for intrinsic, etc. In release version speed | |
315 // difference was 150%-200% depend on alpha channel presence; | |
316 // while in debug version speed difference was 96%-120%. | |
317 // TODO(jiesun): optimize further until we could enable this for | |
318 // debug version too. | |
319 // EXPECT_LE(delta_sse, delta_c); | |
320 | |
321 int64 c_us = delta_c.InMicroseconds(); | |
322 int64 sse_us = delta_sse.InMicroseconds(); | |
323 VLOG(1) << "from:" << source_width << "x" << source_height | |
324 << " to:" << dest_width << "x" << dest_height | |
325 << (alpha ? " with alpha" : " w/o alpha"); | |
326 VLOG(1) << "c:" << c_us << " sse:" << sse_us; | |
327 VLOG(1) << "ratio:" << static_cast<float>(c_us) / sse_us; | |
328 | |
329 // Comparing result. | |
330 for (unsigned int i = 0; i < dest_height; i++) { | |
331 for (unsigned int x = 0; x < dest_width * 4; x++) { // RGBA always. | |
332 EXPECT_EQ(r1[x], r2[x]); | |
333 } | |
334 r1 += result_c.rowBytes(); | |
335 r2 += result_sse.rowBytes(); | |
336 } | |
337 } | |
338 } | 334 } |
339 } | 335 } |
340 } | 336 } |
341 | 337 |
342 TEST(Convolver, SeparableSingleConvolution) { | 338 TEST(Convolver, SeparableSingleConvolution) { |
343 static const int kImgWidth = 1024; | 339 static const int kImgWidth = 1024; |
344 static const int kImgHeight = 1024; | 340 static const int kImgHeight = 1024; |
345 static const int kChannelCount = 3; | 341 static const int kChannelCount = 3; |
346 static const int kStrideSlack = 22; | 342 static const int kStrideSlack = 22; |
347 ConvolutionFilter1D filter; | 343 ConvolutionFilter1D filter; |
(...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
533 fp_gradient_kernel.end()), -1.5f); | 529 fp_gradient_kernel.end()), -1.5f); |
534 EXPECT_LT(*std::min_element(fp_gradient_kernel.begin(), | 530 EXPECT_LT(*std::min_element(fp_gradient_kernel.begin(), |
535 fp_gradient_kernel.end()), 0.0f); | 531 fp_gradient_kernel.end()), 0.0f); |
536 EXPECT_LT(*std::max_element(fp_gradient_kernel.begin(), | 532 EXPECT_LT(*std::max_element(fp_gradient_kernel.begin(), |
537 fp_gradient_kernel.end()), 1.5f); | 533 fp_gradient_kernel.end()), 1.5f); |
538 EXPECT_GT(*std::max_element(fp_gradient_kernel.begin(), | 534 EXPECT_GT(*std::max_element(fp_gradient_kernel.begin(), |
539 fp_gradient_kernel.end()), 0.0f); | 535 fp_gradient_kernel.end()), 0.0f); |
540 } | 536 } |
541 | 537 |
542 } // namespace skia | 538 } // namespace skia |
OLD | NEW |