| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "Benchmark.h" | 8 #include "Benchmark.h" |
| 9 #include "SkRasterPipeline.h" | 9 #include "SkRasterPipeline.h" |
| 10 #include "SkSRGB.h" | 10 #include "SkSRGB.h" |
| 11 | 11 |
| 12 static const int N = 1023; | 12 static const int N = 1023; |
| 13 | 13 |
| 14 static uint32_t dst[N], | 14 static uint32_t dst[N], |
| 15 src[N]; | 15 src[N]; |
| 16 static uint8_t mask[N]; | 16 static uint8_t mask[N]; |
| 17 | 17 |
| 18 // We'll build up a somewhat realistic useful pipeline: | 18 // We'll build up a somewhat realistic useful pipeline: |
| 19 // - load srgb src | 19 // - load srgb src |
| 20 // - scale src by 8-bit mask | 20 // - scale src by 8-bit mask |
| 21 // - load srgb dst | 21 // - load srgb dst |
| 22 // - src = srcover(dst, src) | 22 // - src = srcover(dst, src) |
| 23 // - store src back as srgb | 23 // - store src back as srgb |
| 24 // Every stage except for srcover interacts with memory, and so will need _tail
variants. | 24 // Every stage except for srcover interacts with memory, and so will need _tail
variants. |
| 25 | 25 |
| 26 static void SK_VECTORCALL load_s_srgb(SkRasterPipeline::Stage* st, size_t x, | 26 SK_RASTER_STAGE(load_s_srgb) { |
| 27 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 27 auto ptr = (const uint32_t*)ctx + x; |
| 28 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 29 auto ptr = st->ctx<const uint32_t*>() + x; | |
| 30 | 28 |
| 31 r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], | 29 r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], |
| 32 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], | 30 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], |
| 33 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], | 31 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], |
| 34 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; | 32 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; |
| 35 | 33 |
| 36 g = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], | 34 g = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], |
| 37 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], | 35 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], |
| 38 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], | 36 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], |
| 39 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; | 37 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; |
| 40 | 38 |
| 41 b = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], | 39 b = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], |
| 42 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], | 40 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], |
| 43 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], | 41 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], |
| 44 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; | 42 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; |
| 45 | 43 |
| 46 a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); | 44 a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); |
| 47 | |
| 48 st->next(x, r,g,b,a, dr,dg,db,da); | |
| 49 } | 45 } |
| 50 | 46 |
| 51 static void SK_VECTORCALL load_s_srgb_tail(SkRasterPipeline::Stage* st, size_t x
, | 47 SK_RASTER_STAGE(load_s_srgb_tail) { |
| 52 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 48 auto ptr = (const uint32_t*)ctx + x; |
| 53 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 54 auto ptr = st->ctx<const uint32_t*>() + x; | |
| 55 | 49 |
| 56 r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; | 50 r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; |
| 57 g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; | 51 g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; |
| 58 b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; | 52 b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; |
| 59 a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; | 53 a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; |
| 60 | |
| 61 st->next(x, r,g,b,a, dr,dg,db,da); | |
| 62 } | 54 } |
| 63 | 55 |
| 64 static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x, | 56 SK_RASTER_STAGE(load_d_srgb) { |
| 65 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 57 auto ptr = (const uint32_t*)ctx + x; |
| 66 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 67 auto ptr = st->ctx<const uint32_t*>() + x; | |
| 68 | 58 |
| 69 dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], | 59 dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], |
| 70 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], | 60 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], |
| 71 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], | 61 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], |
| 72 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; | 62 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; |
| 73 | 63 |
| 74 dg = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], | 64 dg = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], |
| 75 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], | 65 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], |
| 76 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], | 66 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], |
| 77 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; | 67 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; |
| 78 | 68 |
| 79 db = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], | 69 db = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], |
| 80 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], | 70 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], |
| 81 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], | 71 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], |
| 82 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; | 72 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; |
| 83 | 73 |
| 84 da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); | 74 da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); |
| 85 | |
| 86 st->next(x, r,g,b,a, dr,dg,db,da); | |
| 87 } | 75 } |
| 88 | 76 |
| 89 static void SK_VECTORCALL load_d_srgb_tail(SkRasterPipeline::Stage* st, size_t x
, | 77 SK_RASTER_STAGE(load_d_srgb_tail) { |
| 90 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 78 auto ptr = (const uint32_t*)ctx + x; |
| 91 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 92 auto ptr = st->ctx<const uint32_t*>() + x; | |
| 93 | 79 |
| 94 dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; | 80 dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; |
| 95 dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; | 81 dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; |
| 96 db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; | 82 db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; |
| 97 da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; | 83 da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; |
| 98 | |
| 99 st->next(x, r,g,b,a, dr,dg,db,da); | |
| 100 } | 84 } |
| 101 | 85 |
| 102 static void SK_VECTORCALL scale_u8(SkRasterPipeline::Stage* st, size_t x, | 86 SK_RASTER_STAGE(scale_u8) { |
| 103 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 87 auto ptr = (const uint8_t*)ctx + x; |
| 104 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 105 auto ptr = st->ctx<const uint8_t*>() + x; | |
| 106 | 88 |
| 107 auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f); | 89 auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f); |
| 108 r *= c; | 90 r *= c; |
| 109 g *= c; | 91 g *= c; |
| 110 b *= c; | 92 b *= c; |
| 111 a *= c; | 93 a *= c; |
| 112 | |
| 113 st->next(x, r,g,b,a, dr,dg,db,da); | |
| 114 } | 94 } |
| 115 | 95 |
| 116 static void SK_VECTORCALL scale_u8_tail(SkRasterPipeline::Stage* st, size_t x, | 96 SK_RASTER_STAGE(scale_u8_tail) { |
| 117 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 97 auto ptr = (const uint8_t*)ctx + x; |
| 118 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 119 auto ptr = st->ctx<const uint8_t*>() + x; | |
| 120 | 98 |
| 121 auto c = *ptr * (1/255.0f); | 99 auto c = *ptr * (1/255.0f); |
| 122 r *= c; | 100 r *= c; |
| 123 g *= c; | 101 g *= c; |
| 124 b *= c; | 102 b *= c; |
| 125 a *= c; | 103 a *= c; |
| 126 | |
| 127 st->next(x, r,g,b,a, dr,dg,db,da); | |
| 128 } | 104 } |
| 129 | 105 |
| 130 static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x, | 106 SK_RASTER_STAGE(srcover) { |
| 131 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | |
| 132 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 133 auto A = 1.0f - a; | 107 auto A = 1.0f - a; |
| 134 r += dr * A; | 108 r += dr * A; |
| 135 g += dg * A; | 109 g += dg * A; |
| 136 b += db * A; | 110 b += db * A; |
| 137 a += da * A; | 111 a += da * A; |
| 138 | |
| 139 st->next(x, r,g,b,a, dr,dg,db,da); | |
| 140 } | 112 } |
| 141 | 113 |
| 142 static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x, | 114 SK_RASTER_STAGE(store_srgb) { |
| 143 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 115 auto ptr = (uint32_t*)ctx + x; |
| 144 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 145 auto ptr = st->ctx<uint32_t*>() + x; | |
| 146 | 116 |
| 147 ( sk_linear_to_srgb(r) | 117 ( sk_linear_to_srgb(r) |
| 148 | sk_linear_to_srgb(g) << 8 | 118 | sk_linear_to_srgb(g) << 8 |
| 149 | sk_linear_to_srgb(b) << 16 | 119 | sk_linear_to_srgb(b) << 16 |
| 150 | Sk4f_round(255.0f*a) << 24).store(ptr); | 120 | Sk4f_round(255.0f*a) << 24).store(ptr); |
| 151 } | 121 } |
| 152 | 122 |
| 153 static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x, | 123 SK_RASTER_STAGE(store_srgb_tail) { |
| 154 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 124 auto ptr = (uint32_t*)ctx + x; |
| 155 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
| 156 auto ptr = st->ctx<uint32_t*>() + x; | |
| 157 | 125 |
| 158 Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); | 126 Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); |
| 159 rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)}; | 127 rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)}; |
| 160 | 128 |
| 161 SkNx_cast<uint8_t>(rgba).store(ptr); | 129 SkNx_cast<uint8_t>(rgba).store(ptr); |
| 162 } | 130 } |
| 163 | 131 |
| 164 class SkRasterPipelineBench : public Benchmark { | 132 class SkRasterPipelineBench : public Benchmark { |
| 165 public: | 133 public: |
| 166 SkRasterPipelineBench() {} | 134 SkRasterPipelineBench(bool fused) : fFused(fused) {} |
| 167 | 135 |
| 168 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } | 136 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } |
| 169 const char* onGetName() override { return "SkRasterPipelineBench"; } | 137 const char* onGetName() override { return fFused ? "SkRasterPipelineBench_fu
sed" |
| 138 : "SkRasterPipelineBench_pi
peline"; } |
| 170 | 139 |
| 171 void onDraw(int loops, SkCanvas*) override { | 140 void onDraw(int loops, SkCanvas*) override { |
| 172 SkRasterPipeline p; | |
| 173 p.append(load_s_srgb, load_s_srgb_tail, src); | |
| 174 p.append( scale_u8, scale_u8_tail, mask); | |
| 175 p.append(load_d_srgb, load_d_srgb_tail, dst); | |
| 176 p.append(srcover); | |
| 177 p.append( store_srgb, store_srgb_tail, dst); | |
| 178 | |
| 179 while (loops --> 0) { | 141 while (loops --> 0) { |
| 180 p.run(N); | 142 fFused ? this->runFused() : this->runPipeline(); |
| 181 } | 143 } |
| 182 } | 144 } |
| 145 |
| 146 void runFused() { |
| 147 Sk4f r,g,b,a, dr,dg,db,da; |
| 148 size_t x = 0, n = N; |
| 149 while (n >= 4) { |
| 150 load_s_srgb(src , x, r,g,b,a, dr,dg,db,da); |
| 151 scale_u8 (mask , x, r,g,b,a, dr,dg,da,da); |
| 152 load_d_srgb(dst , x, r,g,b,a, dr,dg,da,da); |
| 153 srcover (nullptr, x, r,g,b,a, dr,dg,da,da); |
| 154 store_srgb (dst , x, r,g,b,a, dr,dg,da,da); |
| 155 |
| 156 x += 4; |
| 157 n -= 4; |
| 158 } |
| 159 while (n > 0) { |
| 160 load_s_srgb_tail(src , x, r,g,b,a, dr,dg,db,da); |
| 161 scale_u8_tail (mask , x, r,g,b,a, dr,dg,da,da); |
| 162 load_d_srgb_tail(dst , x, r,g,b,a, dr,dg,da,da); |
| 163 srcover (nullptr, x, r,g,b,a, dr,dg,da,da); |
| 164 store_srgb_tail (dst , x, r,g,b,a, dr,dg,da,da); |
| 165 |
| 166 x += 1; |
| 167 n -= 1; |
| 168 } |
| 169 } |
| 170 |
| 171 void runPipeline() { |
| 172 SkRasterPipeline p; |
| 173 p.append<load_s_srgb, load_s_srgb_tail>( src); |
| 174 p.append< scale_u8, scale_u8_tail>(mask); |
| 175 p.append<load_d_srgb, load_d_srgb_tail>( dst); |
| 176 p.append<srcover>(); |
| 177 p.append< store_srgb, store_srgb_tail>( dst); |
| 178 |
| 179 p.run(N); |
| 180 } |
| 181 |
| 182 bool fFused; |
| 183 }; | 183 }; |
| 184 | 184 |
| 185 DEF_BENCH( return new SkRasterPipelineBench; ) | 185 DEF_BENCH( return new SkRasterPipelineBench(true); ) |
| 186 DEF_BENCH( return new SkRasterPipelineBench(false); ) |
| OLD | NEW |