| Index: skia/ext/convolver_unittest.cc
|
| diff --git a/skia/ext/convolver_unittest.cc b/skia/ext/convolver_unittest.cc
|
| index 5520b2c151e841f76695b76d854725494dbbe2ab..9ac40c9197a1d64a19c819b3a59736ec9d14fedc 100644
|
| --- a/skia/ext/convolver_unittest.cc
|
| +++ b/skia/ext/convolver_unittest.cc
|
| @@ -7,8 +7,14 @@
|
| #include <vector>
|
|
|
| #include "base/basictypes.h"
|
| +#include "base/logging.h"
|
| +#include "base/time.h"
|
| #include "skia/ext/convolver.h"
|
| #include "testing/gtest/include/gtest/gtest.h"
|
| +#include "third_party/skia/include/core/SkBitmap.h"
|
| +#include "third_party/skia/include/core/SkColorPriv.h"
|
| +#include "third_party/skia/include/core/SkRect.h"
|
| +#include "third_party/skia/include/core/SkTypes.h"
|
|
|
| namespace skia {
|
|
|
| @@ -35,7 +41,7 @@ void TestImpulseConvolution(const unsigned char* data, int width, int height) {
|
| std::vector<unsigned char> output;
|
| output.resize(byte_count);
|
| BGRAConvolve2D(data, width * 4, true, filter_x, filter_y,
|
| - filter_x.num_values() * 4, &output[0]);
|
| + filter_x.num_values() * 4, &output[0], false);
|
|
|
| // Output should exactly match input.
|
| EXPECT_EQ(0, memcmp(data, &output[0], byte_count));
|
| @@ -106,7 +112,7 @@ TEST(Convolver, Halve) {
|
|
|
| // Do the convolution.
|
| BGRAConvolve2D(&input[0], src_width, true, filter_x, filter_y,
|
| - filter_x.num_values() * 4, &output[0]);
|
| + filter_x.num_values() * 4, &output[0], false);
|
|
|
| // Compute the expected results and check, allowing for a small difference
|
| // to account for rounding errors.
|
| @@ -204,4 +210,112 @@ TEST(Convolver, AddFilter) {
|
| ASSERT_EQ(0, filter_length);
|
| }
|
|
|
| +TEST(Convolver, SIMDVerification) {
|
| +#if defined(SIMD_SSE2)
|
| + base::CPU cpu;
|
| + if (!cpu.has_sse2()) return;
|
| +
|
| + int source_sizes[][2] = { {1920, 1080}, {720, 480}, {1377, 523}, {325, 241} };
|
| + int dest_sizes[][2] = { {1280, 1024}, {480, 270}, {177, 123} };
|
| + float filter[] = { 0.05f, -0.15f, 0.6f, 0.6f, -0.15f, 0.05f };
|
| +
|
| + srand(static_cast<unsigned int>(time(0)));
|
| +
|
| + // Loop over some specific source and destination dimensions.
|
| + for (unsigned int i = 0; i < arraysize(source_sizes); ++i) {
|
| + unsigned int source_width = source_sizes[i][0];
|
| + unsigned int source_height = source_sizes[i][1];
|
| + for (unsigned int j = 0; j < arraysize(dest_sizes); ++j) {
|
| + unsigned int dest_width = source_sizes[j][0];
|
| + unsigned int dest_height = source_sizes[j][1];
|
| +
|
| + // Preparing convolve coefficients.
|
| + ConvolutionFilter1D x_filter, y_filter;
|
| + for (unsigned int p = 0; p < dest_width; ++p) {
|
| + unsigned int offset = source_width * p / dest_width;
|
| + if (offset > source_width - arraysize(filter))
|
| + offset = source_width - arraysize(filter);
|
| + x_filter.AddFilter(offset, filter, arraysize(filter));
|
| + }
|
| + for (unsigned int p = 0; p < dest_height; ++p) {
|
| + unsigned int offset = source_height * p / dest_height;
|
| + if (offset > source_height - arraysize(filter))
|
| + offset = source_height - arraysize(filter);
|
| + y_filter.AddFilter(offset, filter, arraysize(filter));
|
| + }
|
| +
|
| + // Allocate input and output skia bitmap.
|
| + SkBitmap source, result_c, result_sse;
|
| + source.setConfig(SkBitmap::kARGB_8888_Config,
|
| + source_width, source_height);
|
| + source.allocPixels();
|
| + result_c.setConfig(SkBitmap::kARGB_8888_Config,
|
| + dest_width, dest_height);
|
| + result_c.allocPixels();
|
| + result_sse.setConfig(SkBitmap::kARGB_8888_Config,
|
| + dest_width, dest_height);
|
| + result_sse.allocPixels();
|
| +
|
| + // Randomize source bitmap for testing.
|
| + unsigned char* src_ptr = static_cast<unsigned char*>(source.getPixels());
|
| + for (int y = 0; y < source.height(); y++) {
|
| + for (int x = 0; x < source.rowBytes(); x++)
|
| + src_ptr[x] = rand() % 255;
|
| + src_ptr += source.rowBytes();
|
| + }
|
| +
|
| + // Test both cases with different has_alpha.
|
| + for (int alpha = 0; alpha < 2; alpha++) {
|
| + // Convolve using C code.
|
| + base::TimeTicks resize_start;
|
| + base::TimeDelta delta_c, delta_sse;
|
| + unsigned char* r1 = static_cast<unsigned char*>(result_c.getPixels());
|
| + unsigned char* r2 = static_cast<unsigned char*>(result_sse.getPixels());
|
| +
|
| + resize_start = base::TimeTicks::Now();
|
| + BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),
|
| + static_cast<int>(source.rowBytes()),
|
| + alpha ? true : false, x_filter, y_filter,
|
| + static_cast<int>(result_c.rowBytes()), r1, false);
|
| + delta_c = base::TimeTicks::Now() - resize_start;
|
| +
|
| + resize_start = base::TimeTicks::Now();
|
| + // Convolve using SSE2 code
|
| + BGRAConvolve2D(static_cast<const uint8*>(source.getPixels()),
|
| + static_cast<int>(source.rowBytes()),
|
| + alpha ? true : false, x_filter, y_filter,
|
| + static_cast<int>(result_sse.rowBytes()), r2, true);
|
| + delta_sse = base::TimeTicks::Now() - resize_start;
|
| +
|
| + // Unfortunately I could not enable the performance check now.
|
| + // Most bots use debug version, and there are great difference between
|
| + // the code generation for intrinsic, etc. In release version speed
|
| + // difference was 150%-200% depend on alpha channel presence;
|
| + // while in debug version speed difference was 96%-120%.
|
| + // TODO(jiesun): optimize further until we could enable this for
|
| + // debug version too.
|
| + // EXPECT_LE(delta_sse, delta_c);
|
| +
|
| + int64 c_us = delta_c.InMicroseconds();
|
| + int64 sse_us = delta_sse.InMicroseconds();
|
| + LOG(INFO) << "from:" << source_width << "x" << source_height
|
| + << " to:" << dest_width << "x" << dest_height
|
| + << (alpha ? " with alpha" : " w/o alpha");
|
| + LOG(INFO) << "c:" << c_us << " sse:" << sse_us;
|
| + LOG(INFO) << "ratio:" << static_cast<float>(c_us) / sse_us;
|
| +
|
| + // Comparing result.
|
| + for (unsigned int i = 0; i < dest_height; i++) {
|
| + for (unsigned int x = 0; x < dest_width * 4; x++) { // RGBA always.
|
| + EXPECT_EQ(r1[x], r2[x]);
|
| + }
|
| + r1 += result_c.rowBytes();
|
| + r2 += result_sse.rowBytes();
|
| + }
|
| + }
|
| + }
|
| + }
|
| +#endif
|
| +}
|
| +
|
| } // namespace skia
|
|
|