| Index: bench/SkRasterPipelineBench.cpp
|
| diff --git a/bench/SkRasterPipelineBench.cpp b/bench/SkRasterPipelineBench.cpp
|
| index 0243940a106e13b94d4225c283502b41239b90a9..a5263d770f7b06dc3dca23ef7328245a8b17ea37 100644
|
| --- a/bench/SkRasterPipelineBench.cpp
|
| +++ b/bench/SkRasterPipelineBench.cpp
|
| @@ -23,10 +23,8 @@ static uint8_t mask[N];
|
| // - store src back as srgb
|
| // Every stage except for srcover interacts with memory, and so will need _tail variants.
|
|
|
| -static void SK_VECTORCALL load_s_srgb(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<const uint32_t*>() + x;
|
| +SK_RASTER_STAGE(load_s_srgb) {
|
| + auto ptr = (const uint32_t*)ctx + x;
|
|
|
| r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
|
| sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
|
| @@ -44,27 +42,19 @@ static void SK_VECTORCALL load_s_srgb(SkRasterPipeline::Stage* st, size_t x,
|
| sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
|
|
|
| a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
|
| -
|
| - st->next(x, r,g,b,a, dr,dg,db,da);
|
| }
|
|
|
| -static void SK_VECTORCALL load_s_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<const uint32_t*>() + x;
|
| +SK_RASTER_STAGE(load_s_srgb_tail) {
|
| + auto ptr = (const uint32_t*)ctx + x;
|
|
|
| r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 };
|
| g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 };
|
| b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 };
|
| a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 };
|
| -
|
| - st->next(x, r,g,b,a, dr,dg,db,da);
|
| }
|
|
|
| -static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<const uint32_t*>() + x;
|
| +SK_RASTER_STAGE(load_d_srgb) {
|
| + auto ptr = (const uint32_t*)ctx + x;
|
|
|
| dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff],
|
| sk_linear_from_srgb[(ptr[1] >> 0) & 0xff],
|
| @@ -82,67 +72,47 @@ static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x,
|
| sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] };
|
|
|
| da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f);
|
| -
|
| - st->next(x, r,g,b,a, dr,dg,db,da);
|
| }
|
|
|
| -static void SK_VECTORCALL load_d_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<const uint32_t*>() + x;
|
| +SK_RASTER_STAGE(load_d_srgb_tail) {
|
| + auto ptr = (const uint32_t*)ctx + x;
|
|
|
| dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 };
|
| dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 };
|
| db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 };
|
| da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 };
|
| -
|
| - st->next(x, r,g,b,a, dr,dg,db,da);
|
| }
|
|
|
| -static void SK_VECTORCALL scale_u8(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<const uint8_t*>() + x;
|
| +SK_RASTER_STAGE(scale_u8) {
|
| + auto ptr = (const uint8_t*)ctx + x;
|
|
|
| auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f);
|
| r *= c;
|
| g *= c;
|
| b *= c;
|
| a *= c;
|
| -
|
| - st->next(x, r,g,b,a, dr,dg,db,da);
|
| }
|
|
|
| -static void SK_VECTORCALL scale_u8_tail(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<const uint8_t*>() + x;
|
| +SK_RASTER_STAGE(scale_u8_tail) {
|
| + auto ptr = (const uint8_t*)ctx + x;
|
|
|
| auto c = *ptr * (1/255.0f);
|
| r *= c;
|
| g *= c;
|
| b *= c;
|
| a *= c;
|
| -
|
| - st->next(x, r,g,b,a, dr,dg,db,da);
|
| }
|
|
|
| -static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| +SK_RASTER_STAGE(srcover) {
|
| auto A = 1.0f - a;
|
| r += dr * A;
|
| g += dg * A;
|
| b += db * A;
|
| a += da * A;
|
| -
|
| - st->next(x, r,g,b,a, dr,dg,db,da);
|
| }
|
|
|
| -static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<uint32_t*>() + x;
|
| +SK_RASTER_STAGE(store_srgb) {
|
| + auto ptr = (uint32_t*)ctx + x;
|
|
|
| ( sk_linear_to_srgb(r)
|
| | sk_linear_to_srgb(g) << 8
|
| @@ -150,10 +120,8 @@ static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x,
|
| | Sk4f_round(255.0f*a) << 24).store(ptr);
|
| }
|
|
|
| -static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
|
| - Sk4f r, Sk4f g, Sk4f b, Sk4f a,
|
| - Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) {
|
| - auto ptr = st->ctx<uint32_t*>() + x;
|
| +SK_RASTER_STAGE(store_srgb_tail) {
|
| + auto ptr = (uint32_t*)ctx + x;
|
|
|
| Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0});
|
| rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)};
|
| @@ -163,23 +131,56 @@ static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x,
|
|
|
| class SkRasterPipelineBench : public Benchmark {
|
| public:
|
| - SkRasterPipelineBench() {}
|
| + SkRasterPipelineBench(bool fused) : fFused(fused) {}
|
|
|
| bool isSuitableFor(Backend backend) override { return backend == kNonRendering_Backend; }
|
| - const char* onGetName() override { return "SkRasterPipelineBench"; }
|
| + const char* onGetName() override { return fFused ? "SkRasterPipelineBench_fused"
|
| + : "SkRasterPipelineBench_pipeline"; }
|
|
|
| void onDraw(int loops, SkCanvas*) override {
|
| - SkRasterPipeline p;
|
| - p.append(load_s_srgb, load_s_srgb_tail, src);
|
| - p.append( scale_u8, scale_u8_tail, mask);
|
| - p.append(load_d_srgb, load_d_srgb_tail, dst);
|
| - p.append(srcover);
|
| - p.append( store_srgb, store_srgb_tail, dst);
|
| -
|
| while (loops --> 0) {
|
| - p.run(N);
|
| + fFused ? this->runFused() : this->runPipeline();
|
| }
|
| }
|
| +
|
| + void runFused() {
|
| + Sk4f r,g,b,a, dr,dg,db,da;
|
| + size_t x = 0, n = N;
|
| + while (n >= 4) {
|
| + load_s_srgb(src , x, r,g,b,a, dr,dg,db,da);
|
| + scale_u8 (mask , x, r,g,b,a, dr,dg,da,da);
|
| + load_d_srgb(dst , x, r,g,b,a, dr,dg,da,da);
|
| + srcover (nullptr, x, r,g,b,a, dr,dg,da,da);
|
| + store_srgb (dst , x, r,g,b,a, dr,dg,da,da);
|
| +
|
| + x += 4;
|
| + n -= 4;
|
| + }
|
| + while (n > 0) {
|
| + load_s_srgb_tail(src , x, r,g,b,a, dr,dg,db,da);
|
| + scale_u8_tail (mask , x, r,g,b,a, dr,dg,da,da);
|
| + load_d_srgb_tail(dst , x, r,g,b,a, dr,dg,da,da);
|
| + srcover (nullptr, x, r,g,b,a, dr,dg,da,da);
|
| + store_srgb_tail (dst , x, r,g,b,a, dr,dg,da,da);
|
| +
|
| + x += 1;
|
| + n -= 1;
|
| + }
|
| + }
|
| +
|
| + void runPipeline() {
|
| + SkRasterPipeline p;
|
| + p.append<load_s_srgb, load_s_srgb_tail>( src);
|
| + p.append< scale_u8, scale_u8_tail>(mask);
|
| + p.append<load_d_srgb, load_d_srgb_tail>( dst);
|
| + p.append<srcover>();
|
| + p.append< store_srgb, store_srgb_tail>( dst);
|
| +
|
| + p.run(N);
|
| + }
|
| +
|
| + bool fFused;
|
| };
|
|
|
| -DEF_BENCH( return new SkRasterPipelineBench; )
|
| +DEF_BENCH( return new SkRasterPipelineBench(true); )
|
| +DEF_BENCH( return new SkRasterPipelineBench(false); )
|
|
|