Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(49)

Side by Side Diff: src/opts/SkBlend_opts.h

Issue 1939513002: Add specialized sRGB blitter for SkOpts (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Fix bad assert. Created 4 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « bench/SkBlend_optsBench.cpp ('k') | src/opts/SkOpts_sse41.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 /*
9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q
10 */
11
8 #ifndef SkBlend_opts_DEFINED 12 #ifndef SkBlend_opts_DEFINED
9 #define SkBlend_opts_DEFINED 13 #define SkBlend_opts_DEFINED
10 14
15 #include "SkNx.h"
16 #include "SkPM4fPriv.h"
17
11 namespace SK_OPTS_NS { 18 namespace SK_OPTS_NS {
12 19
13 #if 0 20 // An implementation of SrcOver from bytes to bytes in linear space that takes a dvantage of the
14 21 // observation that the 255's cancel.
22 // invA = 1 - (As / 255);
23 //
24 // R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
25 // => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
26 // => R = sqrt(Rs^2 + Rd^2 * invA)
27 static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
28 Sk4f s = srgb_to_linear(to_4f(pixel));
29 Sk4f d = srgb_to_linear(to_4f(*dst));
30 Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f);
31 Sk4f r = linear_to_srgb(s + d * invAlpha) + 0.5f;
32 *dst = to_4b(r);
33 }
34
35 static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
36 if ((~pixel & 0xFF000000) == 0) {
37 *dst = pixel;
38 } else if ((pixel & 0xFF000000) != 0) {
39 blend_srgb_srgb_1(dst, pixel);
40 }
41 }
42
43 static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) {
44 srcover_srgb_srgb_1(dst++, *src++);
45 srcover_srgb_srgb_1(dst, *src);
46 }
47
48 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
49 srcover_srgb_srgb_1(dst++, *src++);
50 srcover_srgb_srgb_1(dst++, *src++);
51 srcover_srgb_srgb_1(dst++, *src++);
52 srcover_srgb_srgb_1(dst, *src);
53 }
54
55 void best_non_simd_srcover_srgb_srgb(
56 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
57 uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
58
59 while (ndst >0) {
60 int count = SkTMin(ndst, nsrc);
61 ndst -= count;
62 const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
63 const uint64_t* end = dsrc + (count >> 1);
64 do {
65 if ((~*dsrc & 0xFF000000FF000000) == 0) {
66 do {
67 *ddst++ = *dsrc++;
68 } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
69 } else if ((*dsrc & 0xFF000000FF000000) == 0) {
70 do {
71 dsrc++;
72 ddst++;
73 } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
74 } else {
75 srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
76 reinterpret_cast<const uint32_t*>(dsrc++));
77 }
78 } while (dsrc < end);
79
80 if ((count & 1) != 0) {
81 srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
82 *reinterpret_cast<const uint32_t*>(dsrc));
83 }
84 }
85 }
86
87 void brute_force_srcover_srgb_srgb(
88 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
89 while (ndst > 0) {
90 int n = SkTMin(ndst, nsrc);
91
92 for (int i = 0; i < n; i++) {
93 blend_srgb_srgb_1(dst++, src[i]);
94 }
95 ndst -= n;
96 }
97 }
98
99 void trivial_srcover_srgb_srgb(
100 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
101 while (ndst > 0) {
102 int n = SkTMin(ndst, nsrc);
103
104 for (int i = 0; i < n; i++) {
105 srcover_srgb_srgb_1(dst++, src[i]);
106 }
107 ndst -= n;
108 }
109 }
110
111 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
112
113 static inline __m128i load(const uint32_t* p) {
114 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
115 }
116
117 static inline void store(uint32_t* p, __m128i v) {
118 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
119 }
120
121 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE41
122
123 void srcover_srgb_srgb(
124 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) {
125 const __m128i alphaMask = _mm_set1_epi32(0xFF000000);
126 while (ndst > 0) {
127 int count = SkTMin(ndst, nsrc);
128 ndst -= count;
129 const uint32_t* src = srcStart;
130 const uint32_t* end = src + (count & ~3);
131
132 while (src < end) {
133 __m128i pixels = load(src);
134 if (_mm_testc_si128(pixels, alphaMask)) {
135 do {
136 store(dst, pixels);
137 dst += 4;
138 src += 4;
139 } while (src < end && _mm_testc_si128(pixels = load(src) , alphaMask));
140 } else if (_mm_testz_si128(pixels, alphaMask)) {
141 do {
142 dst += 4;
143 src += 4;
144 } while (src < end && _mm_testz_si128(pixels = load(src) , alphaMask));
145 } else {
146 do {
147 srcover_srgb_srgb_4(dst, src);
148 dst += 4;
149 src += 4;
150 } while (src < end && _mm_testnzc_si128(pixels = load(sr c), alphaMask));
151 }
152 }
153
154 count = count & 3;
155 while (count-- > 0) {
156 srcover_srgb_srgb_1(dst++, *src++);
157 }
158 }
159 }
160 #else
161 // SSE2 versions
162 static inline bool check_opaque_alphas(__m128i pixels) {
163 int mask =
164 _mm_movemask_epi8(
165 _mm_cmpeq_epi32(
166 _mm_andnot_si128(pixels, _mm_set1_epi32(0xFF000000)),
167 _mm_setzero_si128()));
168 return mask == 0xFFFF;
169 }
170
171 static inline bool check_transparent_alphas(__m128i pixels) {
172 int mask =
173 _mm_movemask_epi8(
174 _mm_cmpeq_epi32(
175 _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000)),
176 _mm_setzero_si128()));
177 return mask == 0xFFFF;
178 }
179
180 static inline bool check_partial_alphas(__m128i pixels) {
181 __m128i alphas = _mm_and_si128(pixels, _mm_set1_epi32(0xFF000000));
182 int mask =
183 _mm_movemask_epi8(
184 _mm_cmpeq_epi8(
185 _mm_srai_epi32(alphas, 8),
186 alphas));
187 return mask == 0xFFFF;
188 }
189
190 void srcover_srgb_srgb(
191 uint32_t* dst, const uint32_t* const srcStart, int ndst, const int n src) {
192 while (ndst > 0) {
193 int count = SkTMin(ndst, nsrc);
194 ndst -= count;
195 const uint32_t* src = srcStart;
196 const uint32_t* end = src + (count & ~3);
197
198 __m128i pixels = load(src);
199 do {
200 if (check_opaque_alphas(pixels)) {
201 do {
202 store(dst, pixels);
203 dst += 4;
204 src += 4;
205 } while (src < end && check_opaque_alphas(pixels = load( src)));
206 } else if (check_transparent_alphas(pixels)) {
207 const uint32_t* start = src;
208 do {
209 src += 4;
210 } while (src < end && check_transparent_alphas(pixels = load(src)));
211 dst += src - start;
212 } else {
213 do {
214 srcover_srgb_srgb_4(dst, src);
215 dst += 4;
216 src += 4;
217 } while (src < end && check_partial_alphas(pixels = load (src)));
218 }
219 } while (src < end);
220
221 count = count & 3;
222 while (count-- > 0) {
223 srcover_srgb_srgb_1(dst++, *src++);
224 }
225 }
226 }
227 #endif
15 #else 228 #else
16 229
17 static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { 230 void srcover_srgb_srgb(
18 switch (src >> 24) { 231 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
19 case 0x00: return; 232 trivial_srcover_srgb_srgb(dst, src, ndst, nsrc);
20 case 0xff: *dst = src; return; 233 }
21 } 234
22
23 Sk4f d = SkNx_cast<float>(Sk4b::Load( dst)),
24 s = SkNx_cast<float>(Sk4b::Load(&src));
25
26 // Approximate sRGB gamma as 2.0.
27 Sk4f d_sq = d*d,
28 s_sq = s*s;
29 d = Sk4f{d_sq[0], d_sq[1], d_sq[2], d[3]};
30 s = Sk4f{s_sq[0], s_sq[1], s_sq[2], s[3]};
31
32 // SrcOver.
33 Sk4f invA = 1.0f - s[3]*(1/255.0f);
34 d = s + d * invA;
35
36 // Re-apply approximate sRGB gamma.
37 Sk4f d_sqrt = d.sqrt();
38 d = Sk4f{d_sqrt[0], d_sqrt[1], d_sqrt[2], d[3]};
39
40 SkNx_cast<uint8_t>(d).store(dst);
41 }
42
43 static inline void srcover_srgb_srgb(uint32_t* dst, const uint32_t* const sr c, int ndst, const int nsrc) {
44 while (ndst > 0) {
45 int n = SkTMin(ndst, nsrc);
46
47 for (int i = 0; i < n; i++) {
48 srcover_srgb_srgb_1(dst++, src[i]);
49 }
50 ndst -= n;
51 }
52 }
53
54 #endif 235 #endif
55 236
56 } // namespace SK_OPTS_NS 237 } // namespace SK_OPTS_NS
57 238
58 #endif//SkBlend_opts_DEFINED 239 #endif//SkBlend_opts_DEFINED
OLDNEW
« no previous file with comments | « bench/SkBlend_optsBench.cpp ('k') | src/opts/SkOpts_sse41.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698