Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include <string.h> | 5 #include <string.h> |
| 6 #include <time.h> | 6 #include <time.h> |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <numeric> | 8 #include <numeric> |
| 9 #include <vector> | 9 #include <vector> |
| 10 | 10 |
| (...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 205 filter.AddFilter(66, factors6, arraysize(factors6)); | 205 filter.AddFilter(66, factors6, arraysize(factors6)); |
| 206 ASSERT_EQ(6, filter.max_filter()); | 206 ASSERT_EQ(6, filter.max_filter()); |
| 207 ASSERT_EQ(6, filter.num_values()); | 207 ASSERT_EQ(6, filter.num_values()); |
| 208 | 208 |
| 209 values = filter.FilterForValue(5, &filter_offset, &filter_length); | 209 values = filter.FilterForValue(5, &filter_offset, &filter_length); |
| 210 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL | 210 ASSERT_TRUE(values == NULL); // filter_length == 0 => values is NULL |
| 211 ASSERT_EQ(66, filter_offset); // value passed in | 211 ASSERT_EQ(66, filter_offset); // value passed in |
| 212 ASSERT_EQ(0, filter_length); | 212 ASSERT_EQ(0, filter_length); |
| 213 } | 213 } |
| 214 | 214 |
| 215 #if defined(THREAD_SANITIZER) | 215 void VerifySIMD(unsigned int source_width, |
| 216 // Times out under ThreadSanitizer, http://crbug.com/134400. | 216 unsigned int source_height, |
| 217 #define MAYBE_SIMDVerification DISABLED_SIMDVerification | 217 unsigned int dest_width, |
| 218 #else | 218 unsigned int dest_height) { |
| 219 #define MAYBE_SIMDVerification SIMDVerification | |
| 220 #endif | |
| 221 TEST(Convolver, MAYBE_SIMDVerification) { | |
| 222 int source_sizes[][2] = { | |
| 223 {1,1}, {1,2}, {1,3}, {1,4}, {1,5}, | |
| 224 {2,1}, {2,2}, {2,3}, {2,4}, {2,5}, | |
| 225 {3,1}, {3,2}, {3,3}, {3,4}, {3,5}, | |
| 226 {4,1}, {4,2}, {4,3}, {4,4}, {4,5}, | |
| 227 #ifdef NDEBUG | |
| 228 {1920, 1080}, | |
| 229 {720, 480}, | |
| 230 {1377, 523}, | |
| 231 #endif | |
| 232 {325, 241} | |
| 233 }; | |
| 234 #ifdef NDEBUG | |
| 235 int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} }; | |
| 236 #else | |
| 237 int dest_sizes[][2] = { {128, 102}, {48, 27}, {17, 13} }; | |
| 238 #endif | |
| 239 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f }; | 219 float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f }; |
| 220 // Preparing convolve coefficients. | |
| 221 ConvolutionFilter1D x_filter, y_filter; | |
| 222 for (unsigned int p = 0; p < dest_width; ++p) { | |
| 223 unsigned int offset = source_width * p / dest_width; | |
| 224 EXPECT_LT(offset, source_width); | |
| 225 x_filter.AddFilter(offset, filter, | |
| 226 std::min<int>(arraysize(filter), | |
| 227 source_width - offset)); | |
| 228 } | |
| 229 x_filter.PaddingForSIMD(); | |
| 230 for (unsigned int p = 0; p < dest_height; ++p) { | |
| 231 unsigned int offset = source_height * p / dest_height; | |
| 232 y_filter.AddFilter(offset, filter, | |
| 233 std::min<int>(arraysize(filter), | |
| 234 source_height - offset)); | |
| 235 } | |
| 236 y_filter.PaddingForSIMD(); | |
| 237 | |
| 238 // Allocate input and output skia bitmap. | |
| 239 SkBitmap source, result_c, result_sse; | |
| 240 source.setConfig(SkBitmap::kARGB_8888_Config, | |
| 241 source_width, source_height); | |
| 242 source.allocPixels(); | |
| 243 result_c.setConfig(SkBitmap::kARGB_8888_Config, | |
| 244 dest_width, dest_height); | |
| 245 result_c.allocPixels(); | |
| 246 result_sse.setConfig(SkBitmap::kARGB_8888_Config, | |
| 247 dest_width, dest_height); | |
| 248 result_sse.allocPixels(); | |
| 249 | |
| 250 // Randomize source bitmap for testing. | |
| 251 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels()); | |
| 252 for (int y = 0; y < source.height(); y++) { | |
| 253 for (unsigned int x = 0; x < source.rowBytes(); x++) | |
| 254 src_ptr[x] = rand() % 255; | |
| 255 src_ptr += source.rowBytes(); | |
| 256 } | |
| 257 | |
| 258 // Test both cases with different has_alpha. | |
| 259 for (int alpha = 0; alpha < 2; alpha++) { | |
| 260 // Convolve using C code. | |
| 261 base::TimeTicks resize_start; | |
| 262 base::TimeDelta delta_c, delta_sse; | |
| 263 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels()); | |
| 264 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels()); | |
| 265 | |
| 266 resize_start = base::TimeTicks::Now(); | |
| 267 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
| 268 static_cast<int>(source.rowBytes()), | |
| 269 (alpha != 0), x_filter, y_filter, | |
| 270 static_cast<int>(result_c.rowBytes()), r1, false); | |
| 271 delta_c = base::TimeTicks::Now() - resize_start; | |
| 272 | |
| 273 resize_start = base::TimeTicks::Now(); | |
| 274 // Convolve using SSE2 code | |
| 275 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
| 276 static_cast<int>(source.rowBytes()), | |
| 277 (alpha != 0), x_filter, y_filter, | |
| 278 static_cast<int>(result_sse.rowBytes()), r2, true); | |
| 279 delta_sse = base::TimeTicks::Now() - resize_start; | |
| 280 | |
| 281 // Unfortunately I could not enable the performance check now. | |
| 282 // Most bots use debug version, and there are great difference between | |
| 283 // the code generation for intrinsic, etc. In release version speed | |
| 284 // difference was 150%-200% depend on alpha channel presence; | |
| 285 // while in debug version speed difference was 96%-120%. | |
| 286 // TODO(jiesun): optimize further until we could enable this for | |
| 287 // debug version too. | |
| 288 // EXPECT_LE(delta_sse, delta_c); | |
| 289 | |
| 290 int64 c_us = delta_c.InMicroseconds(); | |
| 291 int64 sse_us = delta_sse.InMicroseconds(); | |
| 292 VLOG(1) << "from:" << source_width << "x" << source_height | |
| 293 << " to:" << dest_width << "x" << dest_height | |
| 294 << (alpha ? " with alpha" : " w/o alpha"); | |
| 295 VLOG(1) << "c:" << c_us << " sse:" << sse_us; | |
| 296 VLOG(1) << "ratio:" << static_cast<float>(c_us) / sse_us; | |
| 297 | |
| 298 // Comparing result. | |
| 299 for (unsigned int i = 0; i < dest_height; i++) { | |
| 300 EXPECT_FALSE(memcmp(r1, r2, dest_width * 4)); // RGBA always | |
| 301 r1 += result_c.rowBytes(); | |
| 302 r2 += result_sse.rowBytes(); | |
| 303 } | |
| 304 } | |
| 305 } | |
| 306 | |
| 307 TEST(Convolver, VerifySIMDEdgeCases) { | |
| 308 srand(static_cast<unsigned int>(time(0))); | |
| 309 // Loop over all possible (small) image sizes | |
| 310 for (unsigned int width = 1; width < 20; width++) { | |
| 311 for (unsigned int height = 1; height < 20; height++) { | |
| 312 VerifySIMD(width, height, 8, 8); | |
| 313 VerifySIMD(8, 8, width, height); | |
| 314 } | |
| 315 } | |
| 316 } | |
| 317 | |
| 318 // Verify that lage upscales/downscales produce the same result | |
| 319 // with and without SIMD. | |
| 320 TEST(Convolver, VerifySIMDPrecision) { | |
| 321 int source_sizes[][2] = { {1920, 1080}, {1377, 523}, {325, 241} }; | |
| 322 int dest_sizes[][2] = { {1280, 1024}, {177, 123} }; | |
|
Stephen White
2014/02/25 21:13:59
I'm surprised these run in reasonable time in DEBU
hubbe
2014/02/25 21:59:18
A good chunk of time was spent comparing the image
| |
| 240 | 323 |
| 241 srand(static_cast<unsigned int>(time(0))); | 324 srand(static_cast<unsigned int>(time(0))); |
| 242 | 325 |
| 243 // Loop over some specific source and destination dimensions. | 326 // Loop over some specific source and destination dimensions. |
| 244 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) { | 327 for (unsigned int i = 0; i < arraysize(source_sizes); ++i) { |
| 245 unsigned int source_width = source_sizes[i][0]; | 328 unsigned int source_width = source_sizes[i][0]; |
| 246 unsigned int source_height = source_sizes[i][1]; | 329 unsigned int source_height = source_sizes[i][1]; |
| 247 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) { | 330 for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) { |
| 248 unsigned int dest_width = dest_sizes[j][0]; | 331 unsigned int dest_width = dest_sizes[j][0]; |
| 249 unsigned int dest_height = dest_sizes[j][1]; | 332 unsigned int dest_height = dest_sizes[j][1]; |
| 250 | 333 VerifySIMD(source_width, source_height, dest_width, dest_height); |
| 251 // Preparing convolve coefficients. | |
| 252 ConvolutionFilter1D x_filter, y_filter; | |
| 253 for (unsigned int p = 0; p < dest_width; ++p) { | |
| 254 unsigned int offset = source_width * p / dest_width; | |
| 255 EXPECT_LT(offset, source_width); | |
| 256 x_filter.AddFilter(offset, filter, | |
| 257 std::min<int>(arraysize(filter), | |
| 258 source_width - offset)); | |
| 259 } | |
| 260 x_filter.PaddingForSIMD(); | |
| 261 for (unsigned int p = 0; p < dest_height; ++p) { | |
| 262 unsigned int offset = source_height * p / dest_height; | |
| 263 y_filter.AddFilter(offset, filter, | |
| 264 std::min<int>(arraysize(filter), | |
| 265 source_height - offset)); | |
| 266 } | |
| 267 y_filter.PaddingForSIMD(); | |
| 268 | |
| 269 // Allocate input and output skia bitmap. | |
| 270 SkBitmap source, result_c, result_sse; | |
| 271 source.setConfig(SkBitmap::kARGB_8888_Config, | |
| 272 source_width, source_height); | |
| 273 source.allocPixels(); | |
| 274 result_c.setConfig(SkBitmap::kARGB_8888_Config, | |
| 275 dest_width, dest_height); | |
| 276 result_c.allocPixels(); | |
| 277 result_sse.setConfig(SkBitmap::kARGB_8888_Config, | |
| 278 dest_width, dest_height); | |
| 279 result_sse.allocPixels(); | |
| 280 | |
| 281 // Randomize source bitmap for testing. | |
| 282 unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels()); | |
| 283 for (int y = 0; y < source.height(); y++) { | |
| 284 for (unsigned int x = 0; x < source.rowBytes(); x++) | |
| 285 src_ptr[x] = rand() % 255; | |
| 286 src_ptr += source.rowBytes(); | |
| 287 } | |
| 288 | |
| 289 // Test both cases with different has_alpha. | |
| 290 for (int alpha = 0; alpha < 2; alpha++) { | |
| 291 // Convolve using C code. | |
| 292 base::TimeTicks resize_start; | |
| 293 base::TimeDelta delta_c, delta_sse; | |
| 294 unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels()); | |
| 295 unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels()); | |
| 296 | |
| 297 resize_start = base::TimeTicks::Now(); | |
| 298 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
| 299 static_cast<int>(source.rowBytes()), | |
| 300 (alpha != 0), x_filter, y_filter, | |
| 301 static_cast<int>(result_c.rowBytes()), r1, false); | |
| 302 delta_c = base::TimeTicks::Now() - resize_start; | |
| 303 | |
| 304 resize_start = base::TimeTicks::Now(); | |
| 305 // Convolve using SSE2 code | |
| 306 BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()), | |
| 307 static_cast<int>(source.rowBytes()), | |
| 308 (alpha != 0), x_filter, y_filter, | |
| 309 static_cast<int>(result_sse.rowBytes()), r2, true); | |
| 310 delta_sse = base::TimeTicks::Now() - resize_start; | |
| 311 | |
| 312 // Unfortunately I could not enable the performance check now. | |
| 313 // Most bots use debug version, and there are great difference between | |
| 314 // the code generation for intrinsic, etc. In release version speed | |
| 315 // difference was 150%-200% depend on alpha channel presence; | |
| 316 // while in debug version speed difference was 96%-120%. | |
| 317 // TODO(jiesun): optimize further until we could enable this for | |
| 318 // debug version too. | |
| 319 // EXPECT_LE(delta_sse, delta_c); | |
| 320 | |
| 321 int64 c_us = delta_c.InMicroseconds(); | |
| 322 int64 sse_us = delta_sse.InMicroseconds(); | |
| 323 VLOG(1) << "from:" << source_width << "x" << source_height | |
| 324 << " to:" << dest_width << "x" << dest_height | |
| 325 << (alpha ? " with alpha" : " w/o alpha"); | |
| 326 VLOG(1) << "c:" << c_us << " sse:" << sse_us; | |
| 327 VLOG(1) << "ratio:" << static_cast<float>(c_us) / sse_us; | |
| 328 | |
| 329 // Comparing result. | |
| 330 for (unsigned int i = 0; i < dest_height; i++) { | |
| 331 for (unsigned int x = 0; x < dest_width * 4; x++) { // RGBA always. | |
| 332 EXPECT_EQ(r1[x], r2[x]); | |
| 333 } | |
| 334 r1 += result_c.rowBytes(); | |
| 335 r2 += result_sse.rowBytes(); | |
| 336 } | |
| 337 } | |
| 338 } | 334 } |
| 339 } | 335 } |
| 340 } | 336 } |
| 341 | 337 |
| 342 TEST(Convolver, SeparableSingleConvolution) { | 338 TEST(Convolver, SeparableSingleConvolution) { |
| 343 static const int kImgWidth = 1024; | 339 static const int kImgWidth = 1024; |
| 344 static const int kImgHeight = 1024; | 340 static const int kImgHeight = 1024; |
| 345 static const int kChannelCount = 3; | 341 static const int kChannelCount = 3; |
| 346 static const int kStrideSlack = 22; | 342 static const int kStrideSlack = 22; |
| 347 ConvolutionFilter1D filter; | 343 ConvolutionFilter1D filter; |
| (...skipping 185 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 533 fp_gradient_kernel.end()), -1.5f); | 529 fp_gradient_kernel.end()), -1.5f); |
| 534 EXPECT_LT(*std::min_element(fp_gradient_kernel.begin(), | 530 EXPECT_LT(*std::min_element(fp_gradient_kernel.begin(), |
| 535 fp_gradient_kernel.end()), 0.0f); | 531 fp_gradient_kernel.end()), 0.0f); |
| 536 EXPECT_LT(*std::max_element(fp_gradient_kernel.begin(), | 532 EXPECT_LT(*std::max_element(fp_gradient_kernel.begin(), |
| 537 fp_gradient_kernel.end()), 1.5f); | 533 fp_gradient_kernel.end()), 1.5f); |
| 538 EXPECT_GT(*std::max_element(fp_gradient_kernel.begin(), | 534 EXPECT_GT(*std::max_element(fp_gradient_kernel.begin(), |
| 539 fp_gradient_kernel.end()), 0.0f); | 535 fp_gradient_kernel.end()), 0.0f); |
| 540 } | 536 } |
| 541 | 537 |
| 542 } // namespace skia | 538 } // namespace skia |
| OLD | NEW |