OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 /* | 8 /* |
9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q | 9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q |
10 */ | 10 */ |
11 | 11 |
12 #ifndef SkBlend_opts_DEFINED | 12 #ifndef SkBlend_opts_DEFINED |
13 #define SkBlend_opts_DEFINED | 13 #define SkBlend_opts_DEFINED |
14 | 14 |
15 #include "SkNx.h" | 15 #include "SkNx.h" |
16 #include "SkPM4fPriv.h" | 16 #include "SkPM4fPriv.h" |
17 | 17 |
18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
19 #include <immintrin.h> | 19 #include <immintrin.h> |
20 #endif | 20 #endif |
21 | 21 |
22 namespace SK_OPTS_NS { | 22 namespace SK_OPTS_NS { |
23 | 23 |
24 // An implementation of SrcOver from bytes to bytes in linear space that takes a
dvantage of the | 24 static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel)
{ |
25 // observation that the 255's cancel. | |
26 // invA = 1 - (As / 255); | |
27 // | |
28 // R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA) | |
29 // => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2) | |
30 // => R = sqrt(Rs^2 + Rd^2 * invA) | |
31 static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) { | |
32 Sk4f s = srgb_to_linear(to_4f(pixel)); | |
33 Sk4f d = srgb_to_linear(to_4f(*dst)); | |
34 Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f); | |
35 Sk4f r = linear_to_srgb(s + d * invAlpha) + 0.5f; | |
36 *dst = to_4b(r); | |
37 } | |
38 | |
39 static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) { | |
40 if ((~pixel & 0xFF000000) == 0) { | 25 if ((~pixel & 0xFF000000) == 0) { |
41 *dst = pixel; | 26 *dst = pixel; |
42 } else if ((pixel & 0xFF000000) != 0) { | 27 } else if ((pixel & 0xFF000000) != 0) { |
43 blend_srgb_srgb_1(dst, pixel); | 28 srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel))); |
44 } | 29 } |
45 } | 30 } |
46 | 31 |
47 static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) { | |
48 srcover_srgb_srgb_1(dst++, *src++); | |
49 srcover_srgb_srgb_1(dst, *src); | |
50 } | |
51 | |
52 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { | 32 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { |
53 srcover_srgb_srgb_1(dst++, *src++); | 33 srcover_srgb8888_srgb_1(dst++, *src++); |
54 srcover_srgb_srgb_1(dst++, *src++); | 34 srcover_srgb8888_srgb_1(dst++, *src++); |
55 srcover_srgb_srgb_1(dst++, *src++); | 35 srcover_srgb8888_srgb_1(dst++, *src++); |
56 srcover_srgb_srgb_1(dst, *src); | 36 srcover_srgb8888_srgb_1(dst, *src); |
57 } | |
58 | |
59 void best_non_simd_srcover_srgb_srgb( | |
60 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { | |
61 uint64_t* ddst = reinterpret_cast<uint64_t*>(dst); | |
62 | |
63 while (ndst >0) { | |
64 int count = SkTMin(ndst, nsrc); | |
65 ndst -= count; | |
66 const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src); | |
67 const uint64_t* end = dsrc + (count >> 1); | |
68 do { | |
69 if ((~*dsrc & 0xFF000000FF000000) == 0) { | |
70 do { | |
71 *ddst++ = *dsrc++; | |
72 } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0); | |
73 } else if ((*dsrc & 0xFF000000FF000000) == 0) { | |
74 do { | |
75 dsrc++; | |
76 ddst++; | |
77 } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0); | |
78 } else { | |
79 srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++), | |
80 reinterpret_cast<const uint32_t*>(dsrc++)); | |
81 } | |
82 } while (dsrc < end); | |
83 | |
84 if ((count & 1) != 0) { | |
85 srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst), | |
86 *reinterpret_cast<const uint32_t*>(dsrc)); | |
87 } | |
88 } | |
89 } | |
90 | |
91 void brute_force_srcover_srgb_srgb( | |
92 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { | |
93 while (ndst > 0) { | |
94 int n = SkTMin(ndst, nsrc); | |
95 | |
96 for (int i = 0; i < n; i++) { | |
97 blend_srgb_srgb_1(dst++, src[i]); | |
98 } | |
99 ndst -= n; | |
100 } | |
101 } | |
102 | |
103 void trivial_srcover_srgb_srgb( | |
104 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { | |
105 while (ndst > 0) { | |
106 int n = SkTMin(ndst, nsrc); | |
107 | |
108 for (int i = 0; i < n; i++) { | |
109 srcover_srgb_srgb_1(dst++, src[i]); | |
110 } | |
111 ndst -= n; | |
112 } | |
113 } | 37 } |
114 | 38 |
115 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 39 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
116 | 40 |
117 static inline __m128i load(const uint32_t* p) { | 41 static inline __m128i load(const uint32_t* p) { |
118 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); | 42 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); |
119 } | 43 } |
120 | 44 |
121 static inline void store(uint32_t* p, __m128i v) { | 45 static inline void store(uint32_t* p, __m128i v) { |
122 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); | 46 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
156 srcover_srgb_srgb_4(dst, dst + delta); | 80 srcover_srgb_srgb_4(dst, dst + delta); |
157 dst += 4; | 81 dst += 4; |
158 } while (dst < end | 82 } while (dst < end |
159 && _mm_testnzc_si128(pixels = load(dst + delta)
, alphaMask)); | 83 && _mm_testnzc_si128(pixels = load(dst + delta)
, alphaMask)); |
160 src += dst - start; | 84 src += dst - start; |
161 } | 85 } |
162 } | 86 } |
163 | 87 |
164 count = count & 3; | 88 count = count & 3; |
165 while (count-- > 0) { | 89 while (count-- > 0) { |
166 srcover_srgb_srgb_1(dst++, *src++); | 90 srcover_srgb8888_srgb_1(dst++, *src++); |
167 } | 91 } |
168 } | 92 } |
169 } | 93 } |
170 #else | 94 #else |
171 // SSE2 versions | 95 // SSE2 versions |
172 | 96 |
173 // Note: In the next three comparisons a group of 4 pixels is converted
to a group of | 97 // Note: In the next three comparisons a group of 4 pixels is converted
to a group of |
174 // "signed" pixels because the sse2 does not have an unsigned comparison
. | 98 // "signed" pixels because the sse2 does not have an unsigned comparison
. |
175 // Make it so that we can use the signed comparison operators by biasing | 99 // Make it so that we can use the signed comparison operators by biasing |
176 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x
ffxxxxxx to | 100 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x
ffxxxxxx to |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
228 do { | 152 do { |
229 srcover_srgb_srgb_4(dst, dst + delta); | 153 srcover_srgb_srgb_4(dst, dst + delta); |
230 dst += 4; | 154 dst += 4; |
231 } while (dst < end && check_partial_alphas(pixels = load
(dst + delta))); | 155 } while (dst < end && check_partial_alphas(pixels = load
(dst + delta))); |
232 src += dst - start; | 156 src += dst - start; |
233 } | 157 } |
234 } while (dst < end); | 158 } while (dst < end); |
235 | 159 |
236 count = count & 3; | 160 count = count & 3; |
237 while (count-- > 0) { | 161 while (count-- > 0) { |
238 srcover_srgb_srgb_1(dst++, *src++); | 162 srcover_srgb8888_srgb_1(dst++, *src++); |
239 } | 163 } |
240 } | 164 } |
241 } | 165 } |
242 #endif | 166 #endif |
243 #else | 167 #else |
244 | 168 |
245 void srcover_srgb_srgb( | 169 void srcover_srgb_srgb( |
246 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { | 170 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
247 trivial_srcover_srgb_srgb(dst, src, ndst, nsrc); | 171 while (ndst > 0) { |
| 172 int n = SkTMin(ndst, nsrc); |
| 173 |
| 174 for (int i = 0; i < n; i++) { |
| 175 srcover_srgb8888_srgb_1(dst++, src[i]); |
| 176 } |
| 177 ndst -= n; |
| 178 } |
248 } | 179 } |
249 | 180 |
250 #endif | 181 #endif |
251 | 182 |
252 } // namespace SK_OPTS_NS | 183 } // namespace SK_OPTS_NS |
253 | 184 |
254 #endif//SkBlend_opts_DEFINED | 185 #endif//SkBlend_opts_DEFINED |
OLD | NEW |