| Index: bench/SkBlend_optsBench.cpp
|
| diff --git a/bench/SkBlend_optsBench.cpp b/bench/SkBlend_optsBench.cpp
|
| index 29f3ed8331f4d835d46a6143f99e7481fe5f9fa5..4dfaaef85864f0a8d9c4ba425459ab9c6700661d 100644
|
| --- a/bench/SkBlend_optsBench.cpp
|
| +++ b/bench/SkBlend_optsBench.cpp
|
| @@ -14,49 +14,96 @@
|
| #include "SkImage_Base.h"
|
| #include "SkNx.h"
|
| #include "SkOpts.h"
|
| +#include "SkPM4fPriv.h"
|
| #include "SkString.h"
|
|
|
| #define INNER_LOOPS 10
|
|
|
| -namespace sk_default {
|
| -extern void brute_force_srcover_srgb_srgb(
|
| - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
|
| +static void brute_force_srcover_srgb_srgb(
|
| + uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
|
| + while (ndst > 0) {
|
| + int n = SkTMin(ndst, nsrc);
|
| +
|
| + for (int i = 0; i < n; i++) {
|
| + srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
|
| + }
|
| + ndst -= n;
|
| + }
|
| +}
|
| +
|
| +static void best_non_simd_srcover_srgb_srgb(
|
| + uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
|
| + uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
|
| +
|
| + auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
|
| + srcover_srgb8888_srgb_1(dst++, *src++);
|
| + srcover_srgb8888_srgb_1(dst, *src);
|
| + };
|
| +
|
| + while (ndst >0) {
|
| + int count = SkTMin(ndst, nsrc);
|
| + ndst -= count;
|
| + const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
|
| + const uint64_t* end = dsrc + (count >> 1);
|
| + do {
|
| + if ((~*dsrc & 0xFF000000FF000000) == 0) {
|
| + do {
|
| + *ddst++ = *dsrc++;
|
| + } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
|
| + } else if ((*dsrc & 0xFF000000FF000000) == 0) {
|
| + do {
|
| + dsrc++;
|
| + ddst++;
|
| + } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
|
| + } else {
|
| + srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
|
| + reinterpret_cast<const uint32_t*>(dsrc++));
|
| + }
|
| + } while (dsrc < end);
|
| +
|
| + if ((count & 1) != 0) {
|
| + srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
|
| + *reinterpret_cast<const uint32_t*>(dsrc));
|
| + }
|
| + }
|
| +}
|
| +
|
| +static void trivial_srcover_srgb_srgb(
|
| + uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
|
| + while (ndst > 0) {
|
| + int n = SkTMin(ndst, nsrc);
|
| +
|
| + for (int i = 0; i < n; i++) {
|
| + srcover_srgb8888_srgb_1(dst++, src[i]);
|
| + }
|
| + ndst -= n;
|
| + }
|
| }
|
|
|
| class SrcOverVSkOptsBruteForce {
|
| public:
|
| static SkString Name() { return SkString{"VSkOptsBruteForce"}; }
|
| static bool WorksOnCpu() { return true; }
|
| - static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
|
| - sk_default::brute_force_srcover_srgb_srgb(dst, src, count, count);
|
| + static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
| + brute_force_srcover_srgb_srgb(dst, src, count, count);
|
| }
|
| };
|
|
|
| -namespace sk_default {
|
| -extern void trivial_srcover_srgb_srgb(
|
| - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
|
| -}
|
| -
|
| class SrcOverVSkOptsTrivial {
|
| public:
|
| static SkString Name() { return SkString{"VSkOptsTrivial"}; }
|
| static bool WorksOnCpu() { return true; }
|
| - static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
|
| - sk_default::trivial_srcover_srgb_srgb(dst, src, count, count);
|
| + static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
| + trivial_srcover_srgb_srgb(dst, src, count, count);
|
| }
|
| };
|
|
|
| -namespace sk_default {
|
| -extern void best_non_simd_srcover_srgb_srgb(
|
| - uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
|
| -}
|
| -
|
| class SrcOverVSkOptsNonSimdCore {
|
| public:
|
| static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; }
|
| static bool WorksOnCpu() { return true; }
|
| - static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
|
| - sk_default::best_non_simd_srcover_srgb_srgb(dst, src, count, count);
|
| + static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
| + best_non_simd_srcover_srgb_srgb(dst, src, count, count);
|
| }
|
| };
|
|
|
| @@ -69,7 +116,7 @@ class SrcOverVSkOptsDefault {
|
| public:
|
| static SkString Name() { return SkString{"VSkOptsDefault"}; }
|
| static bool WorksOnCpu() { return true; }
|
| - static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
|
| + static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
| sk_default::srcover_srgb_srgb(dst, src, count, count);
|
| }
|
| };
|
| @@ -83,7 +130,7 @@ class SrcOverVSkOptsSSE41 {
|
| public:
|
| static SkString Name() { return SkString{"VSkOptsSSE41"}; }
|
| static bool WorksOnCpu() { return SkCpu::Supports(SkCpu::SSE41); }
|
| - static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
|
| + static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
|
| sk_sse41::srcover_srgb_srgb(dst, src, count, count);
|
| }
|
| };
|
| @@ -94,7 +141,7 @@ template <typename Blender>
|
| class LinearSrcOverBench : public Benchmark {
|
| public:
|
| LinearSrcOverBench(const char* fileName) : fFileName(fileName) {
|
| - fName = "LinearSrcOver";
|
| + fName = "LinearSrcOver_";
|
| fName.append(fileName);
|
| fName.append(Blender::Name());
|
| }
|
| @@ -115,7 +162,7 @@ protected:
|
| bm.peekPixels(&fPixmap);
|
| fCount = fPixmap.rowBytesAsPixels();
|
| fDst.reset(fCount);
|
| - memset(fDst.get(), 0, fPixmap.rowBytes());
|
| + sk_bzero(fDst.get(), fPixmap.rowBytes());
|
| }
|
| }
|
|
|
| @@ -127,7 +174,7 @@ protected:
|
| for (int i = 0; i < loops * INNER_LOOPS; ++i) {
|
| const uint32_t* src = fPixmap.addr32();
|
| for (int y = 0; y < fPixmap.height(); y++) {
|
| - Blender::BlendN(fDst.get(), width, src);
|
| + Blender::BlendN(fDst.get(), src, width);
|
| src += width;
|
| }
|
| }
|
|
|