Index: bench/SkBlend_optsBench.cpp |
diff --git a/bench/SkBlend_optsBench.cpp b/bench/SkBlend_optsBench.cpp |
index 29f3ed8331f4d835d46a6143f99e7481fe5f9fa5..4dfaaef85864f0a8d9c4ba425459ab9c6700661d 100644 |
--- a/bench/SkBlend_optsBench.cpp |
+++ b/bench/SkBlend_optsBench.cpp |
@@ -14,49 +14,96 @@ |
#include "SkImage_Base.h" |
#include "SkNx.h" |
#include "SkOpts.h" |
+#include "SkPM4fPriv.h" |
#include "SkString.h" |
#define INNER_LOOPS 10 |
-namespace sk_default { |
-extern void brute_force_srcover_srgb_srgb( |
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); |
+static void brute_force_srcover_srgb_srgb( |
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
+ while (ndst > 0) { |
+ int n = SkTMin(ndst, nsrc); |
+ |
+ for (int i = 0; i < n; i++) { |
+ srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i]))); |
+ } |
+ ndst -= n; |
+ } |
+} |
+ |
+static void best_non_simd_srcover_srgb_srgb( |
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
+ uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); |
+ |
+ auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) { |
+ srcover_srgb8888_srgb_1(dst++, *src++); |
+ srcover_srgb8888_srgb_1(dst, *src); |
+ }; |
+ |
+ while (ndst >0) { |
+ int count = SkTMin(ndst, nsrc); |
+ ndst -= count; |
+ const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src); |
+ const uint64_t* end = dsrc + (count >> 1); |
+ do { |
+ if ((~*dsrc & 0xFF000000FF000000) == 0) { |
+ do { |
+ *ddst++ = *dsrc++; |
+ } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0); |
+ } else if ((*dsrc & 0xFF000000FF000000) == 0) { |
+ do { |
+ dsrc++; |
+ ddst++; |
+ } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0); |
+ } else { |
+ srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++), |
+ reinterpret_cast<const uint32_t*>(dsrc++)); |
+ } |
+ } while (dsrc < end); |
+ |
+ if ((count & 1) != 0) { |
+ srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst), |
+ *reinterpret_cast<const uint32_t*>(dsrc)); |
+ } |
+ } |
+} |
+ |
+static void trivial_srcover_srgb_srgb( |
+ uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
+ while (ndst > 0) { |
+ int n = SkTMin(ndst, nsrc); |
+ |
+ for (int i = 0; i < n; i++) { |
+ srcover_srgb8888_srgb_1(dst++, src[i]); |
+ } |
+ ndst -= n; |
+ } |
} |
class SrcOverVSkOptsBruteForce { |
public: |
static SkString Name() { return SkString{"VSkOptsBruteForce"}; } |
static bool WorksOnCpu() { return true; } |
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) { |
- sk_default::brute_force_srcover_srgb_srgb(dst, src, count, count); |
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
+ brute_force_srcover_srgb_srgb(dst, src, count, count); |
} |
}; |
-namespace sk_default { |
-extern void trivial_srcover_srgb_srgb( |
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); |
-} |
- |
class SrcOverVSkOptsTrivial { |
public: |
static SkString Name() { return SkString{"VSkOptsTrivial"}; } |
static bool WorksOnCpu() { return true; } |
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) { |
- sk_default::trivial_srcover_srgb_srgb(dst, src, count, count); |
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
+ trivial_srcover_srgb_srgb(dst, src, count, count); |
} |
}; |
-namespace sk_default { |
-extern void best_non_simd_srcover_srgb_srgb( |
- uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc); |
-} |
- |
class SrcOverVSkOptsNonSimdCore { |
public: |
static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; } |
static bool WorksOnCpu() { return true; } |
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) { |
- sk_default::best_non_simd_srcover_srgb_srgb(dst, src, count, count); |
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
+ best_non_simd_srcover_srgb_srgb(dst, src, count, count); |
} |
}; |
@@ -69,7 +116,7 @@ class SrcOverVSkOptsDefault { |
public: |
static SkString Name() { return SkString{"VSkOptsDefault"}; } |
static bool WorksOnCpu() { return true; } |
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) { |
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
sk_default::srcover_srgb_srgb(dst, src, count, count); |
} |
}; |
@@ -83,7 +130,7 @@ class SrcOverVSkOptsSSE41 { |
public: |
static SkString Name() { return SkString{"VSkOptsSSE41"}; } |
static bool WorksOnCpu() { return SkCpu::Supports(SkCpu::SSE41); } |
- static void BlendN(uint32_t* dst, int count, const uint32_t* src) { |
+ static void BlendN(uint32_t* dst, const uint32_t* src, int count) { |
sk_sse41::srcover_srgb_srgb(dst, src, count, count); |
} |
}; |
@@ -94,7 +141,7 @@ template <typename Blender> |
class LinearSrcOverBench : public Benchmark { |
public: |
LinearSrcOverBench(const char* fileName) : fFileName(fileName) { |
- fName = "LinearSrcOver"; |
+ fName = "LinearSrcOver_"; |
fName.append(fileName); |
fName.append(Blender::Name()); |
} |
@@ -115,7 +162,7 @@ protected: |
bm.peekPixels(&fPixmap); |
fCount = fPixmap.rowBytesAsPixels(); |
fDst.reset(fCount); |
- memset(fDst.get(), 0, fPixmap.rowBytes()); |
+ sk_bzero(fDst.get(), fPixmap.rowBytes()); |
} |
} |
@@ -127,7 +174,7 @@ protected: |
for (int i = 0; i < loops * INNER_LOOPS; ++i) { |
const uint32_t* src = fPixmap.addr32(); |
for (int y = 0; y < fPixmap.height(); y++) { |
- Blender::BlendN(fDst.get(), width, src); |
+ Blender::BlendN(fDst.get(), src, width); |
src += width; |
} |
} |