OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "Benchmark.h" | 8 #include "Benchmark.h" |
9 #include "SkRasterPipeline.h" | 9 #include "SkRasterPipeline.h" |
10 #include "SkSRGB.h" | 10 #include "SkSRGB.h" |
11 | 11 |
12 static const int N = 1023; | 12 static const int N = 1023; |
13 | 13 |
14 static uint32_t dst[N], | 14 static uint32_t dst[N], |
15 src[N]; | 15 src[N]; |
16 static uint8_t mask[N]; | 16 static uint8_t mask[N]; |
17 | 17 |
18 // We'll build up a somewhat realistic useful pipeline: | 18 // We'll build up a somewhat realistic useful pipeline: |
19 // - load srgb src | 19 // - load srgb src |
20 // - scale src by 8-bit mask | 20 // - scale src by 8-bit mask |
21 // - load srgb dst | 21 // - load srgb dst |
22 // - src = srcover(dst, src) | 22 // - src = srcover(dst, src) |
23 // - store src back as srgb | 23 // - store src back as srgb |
24 // Every stage except for srcover interacts with memory, and so will need _tail
variants. | 24 // Every stage except for srcover interacts with memory, and so will need _tail
variants. |
25 | 25 |
26 static void SK_VECTORCALL load_s_srgb(SkRasterPipeline::Stage* st, size_t x, | 26 SK_RASTER_STAGE(load_s_srgb) { |
27 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 27 auto ptr = (const uint32_t*)ctx + x; |
28 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
29 auto ptr = st->ctx<const uint32_t*>() + x; | |
30 | 28 |
31 r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], | 29 r = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], |
32 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], | 30 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], |
33 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], | 31 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], |
34 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; | 32 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; |
35 | 33 |
36 g = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], | 34 g = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], |
37 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], | 35 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], |
38 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], | 36 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], |
39 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; | 37 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; |
40 | 38 |
41 b = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], | 39 b = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], |
42 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], | 40 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], |
43 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], | 41 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], |
44 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; | 42 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; |
45 | 43 |
46 a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); | 44 a = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); |
47 | |
48 st->next(x, r,g,b,a, dr,dg,db,da); | |
49 } | 45 } |
50 | 46 |
51 static void SK_VECTORCALL load_s_srgb_tail(SkRasterPipeline::Stage* st, size_t x
, | 47 SK_RASTER_STAGE(load_s_srgb_tail) { |
52 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 48 auto ptr = (const uint32_t*)ctx + x; |
53 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
54 auto ptr = st->ctx<const uint32_t*>() + x; | |
55 | 49 |
56 r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; | 50 r = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; |
57 g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; | 51 g = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; |
58 b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; | 52 b = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; |
59 a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; | 53 a = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; |
60 | |
61 st->next(x, r,g,b,a, dr,dg,db,da); | |
62 } | 54 } |
63 | 55 |
64 static void SK_VECTORCALL load_d_srgb(SkRasterPipeline::Stage* st, size_t x, | 56 SK_RASTER_STAGE(load_d_srgb) { |
65 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 57 auto ptr = (const uint32_t*)ctx + x; |
66 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
67 auto ptr = st->ctx<const uint32_t*>() + x; | |
68 | 58 |
69 dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], | 59 dr = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 0) & 0xff], |
70 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], | 60 sk_linear_from_srgb[(ptr[1] >> 0) & 0xff], |
71 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], | 61 sk_linear_from_srgb[(ptr[2] >> 0) & 0xff], |
72 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; | 62 sk_linear_from_srgb[(ptr[3] >> 0) & 0xff] }; |
73 | 63 |
74 dg = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], | 64 dg = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 8) & 0xff], |
75 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], | 65 sk_linear_from_srgb[(ptr[1] >> 8) & 0xff], |
76 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], | 66 sk_linear_from_srgb[(ptr[2] >> 8) & 0xff], |
77 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; | 67 sk_linear_from_srgb[(ptr[3] >> 8) & 0xff] }; |
78 | 68 |
79 db = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], | 69 db = Sk4f{ sk_linear_from_srgb[(ptr[0] >> 16) & 0xff], |
80 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], | 70 sk_linear_from_srgb[(ptr[1] >> 16) & 0xff], |
81 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], | 71 sk_linear_from_srgb[(ptr[2] >> 16) & 0xff], |
82 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; | 72 sk_linear_from_srgb[(ptr[3] >> 16) & 0xff] }; |
83 | 73 |
84 da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); | 74 da = SkNx_cast<float>((Sk4i::Load(ptr) >> 24) & 0xff) * (1/255.0f); |
85 | |
86 st->next(x, r,g,b,a, dr,dg,db,da); | |
87 } | 75 } |
88 | 76 |
89 static void SK_VECTORCALL load_d_srgb_tail(SkRasterPipeline::Stage* st, size_t x
, | 77 SK_RASTER_STAGE(load_d_srgb_tail) { |
90 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 78 auto ptr = (const uint32_t*)ctx + x; |
91 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
92 auto ptr = st->ctx<const uint32_t*>() + x; | |
93 | 79 |
94 dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; | 80 dr = Sk4f{ sk_linear_from_srgb[(*ptr >> 0) & 0xff], 0,0,0 }; |
95 dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; | 81 dg = Sk4f{ sk_linear_from_srgb[(*ptr >> 8) & 0xff], 0,0,0 }; |
96 db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; | 82 db = Sk4f{ sk_linear_from_srgb[(*ptr >> 16) & 0xff], 0,0,0 }; |
97 da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; | 83 da = Sk4f{ (*ptr >> 24) * (1/255.0f), 0,0,0 }; |
98 | |
99 st->next(x, r,g,b,a, dr,dg,db,da); | |
100 } | 84 } |
101 | 85 |
102 static void SK_VECTORCALL scale_u8(SkRasterPipeline::Stage* st, size_t x, | 86 SK_RASTER_STAGE(scale_u8) { |
103 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 87 auto ptr = (const uint8_t*)ctx + x; |
104 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
105 auto ptr = st->ctx<const uint8_t*>() + x; | |
106 | 88 |
107 auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f); | 89 auto c = SkNx_cast<float>(Sk4b::Load(ptr)) * (1/255.0f); |
108 r *= c; | 90 r *= c; |
109 g *= c; | 91 g *= c; |
110 b *= c; | 92 b *= c; |
111 a *= c; | 93 a *= c; |
112 | |
113 st->next(x, r,g,b,a, dr,dg,db,da); | |
114 } | 94 } |
115 | 95 |
116 static void SK_VECTORCALL scale_u8_tail(SkRasterPipeline::Stage* st, size_t x, | 96 SK_RASTER_STAGE(scale_u8_tail) { |
117 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 97 auto ptr = (const uint8_t*)ctx + x; |
118 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
119 auto ptr = st->ctx<const uint8_t*>() + x; | |
120 | 98 |
121 auto c = *ptr * (1/255.0f); | 99 auto c = *ptr * (1/255.0f); |
122 r *= c; | 100 r *= c; |
123 g *= c; | 101 g *= c; |
124 b *= c; | 102 b *= c; |
125 a *= c; | 103 a *= c; |
126 | |
127 st->next(x, r,g,b,a, dr,dg,db,da); | |
128 } | 104 } |
129 | 105 |
130 static void SK_VECTORCALL srcover(SkRasterPipeline::Stage* st, size_t x, | 106 SK_RASTER_STAGE(srcover) { |
131 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | |
132 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
133 auto A = 1.0f - a; | 107 auto A = 1.0f - a; |
134 r += dr * A; | 108 r += dr * A; |
135 g += dg * A; | 109 g += dg * A; |
136 b += db * A; | 110 b += db * A; |
137 a += da * A; | 111 a += da * A; |
138 | |
139 st->next(x, r,g,b,a, dr,dg,db,da); | |
140 } | 112 } |
141 | 113 |
142 static void SK_VECTORCALL store_srgb(SkRasterPipeline::Stage* st, size_t x, | 114 SK_RASTER_STAGE(store_srgb) { |
143 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 115 auto ptr = (uint32_t*)ctx + x; |
144 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
145 auto ptr = st->ctx<uint32_t*>() + x; | |
146 | 116 |
147 ( sk_linear_to_srgb(r) | 117 ( sk_linear_to_srgb(r) |
148 | sk_linear_to_srgb(g) << 8 | 118 | sk_linear_to_srgb(g) << 8 |
149 | sk_linear_to_srgb(b) << 16 | 119 | sk_linear_to_srgb(b) << 16 |
150 | Sk4f_round(255.0f*a) << 24).store(ptr); | 120 | Sk4f_round(255.0f*a) << 24).store(ptr); |
151 } | 121 } |
152 | 122 |
153 static void SK_VECTORCALL store_srgb_tail(SkRasterPipeline::Stage* st, size_t x, | 123 SK_RASTER_STAGE(store_srgb_tail) { |
154 Sk4f r, Sk4f g, Sk4f b, Sk4f a, | 124 auto ptr = (uint32_t*)ctx + x; |
155 Sk4f dr, Sk4f dg, Sk4f db, Sk4f da) { | |
156 auto ptr = st->ctx<uint32_t*>() + x; | |
157 | 125 |
158 Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); | 126 Sk4i rgba = sk_linear_to_srgb({r[0], g[0], b[0], 0}); |
159 rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)}; | 127 rgba = {rgba[0], rgba[1], rgba[2], (int)(255.0f * a[0] + 0.5f)}; |
160 | 128 |
161 SkNx_cast<uint8_t>(rgba).store(ptr); | 129 SkNx_cast<uint8_t>(rgba).store(ptr); |
162 } | 130 } |
163 | 131 |
164 class SkRasterPipelineBench : public Benchmark { | 132 class SkRasterPipelineBench : public Benchmark { |
165 public: | 133 public: |
166 SkRasterPipelineBench() {} | 134 SkRasterPipelineBench(bool fused) : fFused(fused) {} |
167 | 135 |
168 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } | 136 bool isSuitableFor(Backend backend) override { return backend == kNonRenderi
ng_Backend; } |
169 const char* onGetName() override { return "SkRasterPipelineBench"; } | 137 const char* onGetName() override { return fFused ? "SkRasterPipelineBench_fu
sed" |
| 138 : "SkRasterPipelineBench_pi
peline"; } |
170 | 139 |
171 void onDraw(int loops, SkCanvas*) override { | 140 void onDraw(int loops, SkCanvas*) override { |
172 SkRasterPipeline p; | |
173 p.append(load_s_srgb, load_s_srgb_tail, src); | |
174 p.append( scale_u8, scale_u8_tail, mask); | |
175 p.append(load_d_srgb, load_d_srgb_tail, dst); | |
176 p.append(srcover); | |
177 p.append( store_srgb, store_srgb_tail, dst); | |
178 | |
179 while (loops --> 0) { | 141 while (loops --> 0) { |
180 p.run(N); | 142 fFused ? this->runFused() : this->runPipeline(); |
181 } | 143 } |
182 } | 144 } |
| 145 |
| 146 void runFused() { |
| 147 Sk4f r,g,b,a, dr,dg,db,da; |
| 148 size_t x = 0, n = N; |
| 149 while (n >= 4) { |
| 150 load_s_srgb(src , x, r,g,b,a, dr,dg,db,da); |
| 151 scale_u8 (mask , x, r,g,b,a, dr,dg,da,da); |
| 152 load_d_srgb(dst , x, r,g,b,a, dr,dg,da,da); |
| 153 srcover (nullptr, x, r,g,b,a, dr,dg,da,da); |
| 154 store_srgb (dst , x, r,g,b,a, dr,dg,da,da); |
| 155 |
| 156 x += 4; |
| 157 n -= 4; |
| 158 } |
| 159 while (n > 0) { |
| 160 load_s_srgb_tail(src , x, r,g,b,a, dr,dg,db,da); |
| 161 scale_u8_tail (mask , x, r,g,b,a, dr,dg,da,da); |
| 162 load_d_srgb_tail(dst , x, r,g,b,a, dr,dg,da,da); |
| 163 srcover (nullptr, x, r,g,b,a, dr,dg,da,da); |
| 164 store_srgb_tail (dst , x, r,g,b,a, dr,dg,da,da); |
| 165 |
| 166 x += 1; |
| 167 n -= 1; |
| 168 } |
| 169 } |
| 170 |
| 171 void runPipeline() { |
| 172 SkRasterPipeline p; |
| 173 p.append<load_s_srgb, load_s_srgb_tail>( src); |
| 174 p.append< scale_u8, scale_u8_tail>(mask); |
| 175 p.append<load_d_srgb, load_d_srgb_tail>( dst); |
| 176 p.append<srcover>(); |
| 177 p.append< store_srgb, store_srgb_tail>( dst); |
| 178 |
| 179 p.run(N); |
| 180 } |
| 181 |
| 182 bool fFused; |
183 }; | 183 }; |
184 | 184 |
185 DEF_BENCH( return new SkRasterPipelineBench; ) | 185 DEF_BENCH( return new SkRasterPipelineBench(true); ) |
| 186 DEF_BENCH( return new SkRasterPipelineBench(false); ) |
OLD | NEW |