| Index: bench/SkBlend_optsBench.cpp
 | 
| diff --git a/bench/SkBlend_optsBench.cpp b/bench/SkBlend_optsBench.cpp
 | 
| index 29f3ed8331f4d835d46a6143f99e7481fe5f9fa5..4dfaaef85864f0a8d9c4ba425459ab9c6700661d 100644
 | 
| --- a/bench/SkBlend_optsBench.cpp
 | 
| +++ b/bench/SkBlend_optsBench.cpp
 | 
| @@ -14,49 +14,96 @@
 | 
|  #include "SkImage_Base.h"
 | 
|  #include "SkNx.h"
 | 
|  #include "SkOpts.h"
 | 
| +#include "SkPM4fPriv.h"
 | 
|  #include "SkString.h"
 | 
|  
 | 
|  #define INNER_LOOPS 10
 | 
|  
 | 
| -namespace sk_default {
 | 
| -extern void brute_force_srcover_srgb_srgb(
 | 
| -    uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
 | 
| +static void brute_force_srcover_srgb_srgb(
 | 
| +    uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
 | 
| +    while (ndst > 0) {
 | 
| +        int n = SkTMin(ndst, nsrc);
 | 
| +
 | 
| +        for (int i = 0; i < n; i++) {
 | 
| +            srcover_blend_srgb8888_srgb_1(dst++, srgb_to_linear(to_4f(src[i])));
 | 
| +        }
 | 
| +        ndst -= n;
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +static void best_non_simd_srcover_srgb_srgb(
 | 
| +    uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
 | 
| +    uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
 | 
| +
 | 
| +    auto srcover_srgb_srgb_2 = [](uint32_t* dst, const uint32_t* src) {
 | 
| +        srcover_srgb8888_srgb_1(dst++, *src++);
 | 
| +        srcover_srgb8888_srgb_1(dst, *src);
 | 
| +    };
 | 
| +
 | 
| +    while (ndst >0) {
 | 
| +        int count = SkTMin(ndst, nsrc);
 | 
| +        ndst -= count;
 | 
| +        const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
 | 
| +        const uint64_t* end = dsrc + (count >> 1);
 | 
| +        do {
 | 
| +            if ((~*dsrc & 0xFF000000FF000000) == 0) {
 | 
| +                do {
 | 
| +                    *ddst++ = *dsrc++;
 | 
| +                } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
 | 
| +            } else if ((*dsrc & 0xFF000000FF000000) == 0) {
 | 
| +                do {
 | 
| +                    dsrc++;
 | 
| +                    ddst++;
 | 
| +                } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
 | 
| +            } else {
 | 
| +                srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
 | 
| +                                    reinterpret_cast<const uint32_t*>(dsrc++));
 | 
| +            }
 | 
| +        } while (dsrc < end);
 | 
| +
 | 
| +        if ((count & 1) != 0) {
 | 
| +            srcover_srgb8888_srgb_1(reinterpret_cast<uint32_t*>(ddst),
 | 
| +                                    *reinterpret_cast<const uint32_t*>(dsrc));
 | 
| +        }
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +static void trivial_srcover_srgb_srgb(
 | 
| +    uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
 | 
| +    while (ndst > 0) {
 | 
| +        int n = SkTMin(ndst, nsrc);
 | 
| +
 | 
| +        for (int i = 0; i < n; i++) {
 | 
| +            srcover_srgb8888_srgb_1(dst++, src[i]);
 | 
| +        }
 | 
| +        ndst -= n;
 | 
| +    }
 | 
|  }
 | 
|  
 | 
|  class SrcOverVSkOptsBruteForce {
 | 
|  public:
 | 
|      static SkString Name() { return SkString{"VSkOptsBruteForce"}; }
 | 
|      static bool WorksOnCpu() { return true; }
 | 
| -    static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
 | 
| -        sk_default::brute_force_srcover_srgb_srgb(dst, src, count, count);
 | 
| +    static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
 | 
| +        brute_force_srcover_srgb_srgb(dst, src, count, count);
 | 
|      }
 | 
|  };
 | 
|  
 | 
| -namespace sk_default {
 | 
| -extern void trivial_srcover_srgb_srgb(
 | 
| -    uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
 | 
| -}
 | 
| -
 | 
|  class SrcOverVSkOptsTrivial {
 | 
|  public:
 | 
|      static SkString Name() { return SkString{"VSkOptsTrivial"}; }
 | 
|      static bool WorksOnCpu() { return true; }
 | 
| -    static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
 | 
| -        sk_default::trivial_srcover_srgb_srgb(dst, src, count, count);
 | 
| +    static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
 | 
| +        trivial_srcover_srgb_srgb(dst, src, count, count);
 | 
|      }
 | 
|  };
 | 
|  
 | 
| -namespace sk_default {
 | 
| -extern void best_non_simd_srcover_srgb_srgb(
 | 
| -    uint32_t* dst, const uint32_t* const srcStart, int ndst, const int nsrc);
 | 
| -}
 | 
| -
 | 
|  class SrcOverVSkOptsNonSimdCore {
 | 
|  public:
 | 
|      static SkString Name() { return SkString{"VSkOptsNonSimdCore"}; }
 | 
|      static bool WorksOnCpu() { return true; }
 | 
| -    static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
 | 
| -        sk_default::best_non_simd_srcover_srgb_srgb(dst, src, count, count);
 | 
| +    static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
 | 
| +        best_non_simd_srcover_srgb_srgb(dst, src, count, count);
 | 
|      }
 | 
|  };
 | 
|  
 | 
| @@ -69,7 +116,7 @@ class SrcOverVSkOptsDefault {
 | 
|  public:
 | 
|      static SkString Name() { return SkString{"VSkOptsDefault"}; }
 | 
|      static bool WorksOnCpu() { return true; }
 | 
| -    static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
 | 
| +    static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
 | 
|          sk_default::srcover_srgb_srgb(dst, src, count, count);
 | 
|      }
 | 
|  };
 | 
| @@ -83,7 +130,7 @@ class SrcOverVSkOptsSSE41 {
 | 
|  public:
 | 
|      static SkString Name() { return SkString{"VSkOptsSSE41"}; }
 | 
|      static bool WorksOnCpu() { return SkCpu::Supports(SkCpu::SSE41); }
 | 
| -    static void BlendN(uint32_t* dst, int count, const uint32_t* src) {
 | 
| +    static void BlendN(uint32_t* dst, const uint32_t* src, int count) {
 | 
|          sk_sse41::srcover_srgb_srgb(dst, src, count, count);
 | 
|      }
 | 
|  };
 | 
| @@ -94,7 +141,7 @@ template <typename Blender>
 | 
|  class LinearSrcOverBench : public Benchmark {
 | 
|  public:
 | 
|      LinearSrcOverBench(const char* fileName) : fFileName(fileName) {
 | 
| -        fName = "LinearSrcOver";
 | 
| +        fName = "LinearSrcOver_";
 | 
|          fName.append(fileName);
 | 
|          fName.append(Blender::Name());
 | 
|      }
 | 
| @@ -115,7 +162,7 @@ protected:
 | 
|              bm.peekPixels(&fPixmap);
 | 
|              fCount = fPixmap.rowBytesAsPixels();
 | 
|              fDst.reset(fCount);
 | 
| -            memset(fDst.get(), 0, fPixmap.rowBytes());
 | 
| +            sk_bzero(fDst.get(), fPixmap.rowBytes());
 | 
|          }
 | 
|      }
 | 
|  
 | 
| @@ -127,7 +174,7 @@ protected:
 | 
|          for (int i = 0; i < loops * INNER_LOOPS; ++i) {
 | 
|              const uint32_t* src = fPixmap.addr32();
 | 
|              for (int y = 0; y < fPixmap.height(); y++) {
 | 
| -                Blender::BlendN(fDst.get(), width, src);
 | 
| +                Blender::BlendN(fDst.get(), src, width);
 | 
|                  src += width;
 | 
|              }
 | 
|          }
 | 
| 
 |