OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 Google Inc. | 2 * Copyright 2016 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 /* | 8 /* |
9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q | 9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an
d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q |
10 */ | 10 */ |
11 | 11 |
12 #ifndef SkBlend_opts_DEFINED | 12 #ifndef SkBlend_opts_DEFINED |
13 #define SkBlend_opts_DEFINED | 13 #define SkBlend_opts_DEFINED |
14 | 14 |
15 #include "SkNx.h" | 15 #include "SkNx.h" |
16 #include "SkPM4fPriv.h" | 16 #include "SkPM4fPriv.h" |
17 | 17 |
18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
19 #include <immintrin.h> | 19 #include <immintrin.h> |
20 #endif | 20 #endif |
21 | 21 |
22 namespace SK_OPTS_NS { | 22 namespace SK_OPTS_NS { |
23 | 23 |
24 static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel)
{ | 24 static inline void srcover_srgb_srgb_1(uint32_t* dst, uint32_t src) { |
25 if ((~pixel & 0xFF000000) == 0) { | 25 if (src >= 0xFF000000) { |
26 *dst = pixel; | 26 *dst = src; |
27 } else if ((pixel & 0xFF000000) != 0) { | 27 return; |
28 srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel))); | |
29 } | 28 } |
| 29 auto d = Sk4f_fromS32(*dst), |
| 30 s = Sk4f_fromS32( src); |
| 31 *dst = Sk4f_toS32(s + d * (1.0f - s[3])); |
30 } | 32 } |
31 | 33 |
32 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { | 34 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { |
33 srcover_srgb8888_srgb_1(dst++, *src++); | 35 srcover_srgb_srgb_1(dst++, *src++); |
34 srcover_srgb8888_srgb_1(dst++, *src++); | 36 srcover_srgb_srgb_1(dst++, *src++); |
35 srcover_srgb8888_srgb_1(dst++, *src++); | 37 srcover_srgb_srgb_1(dst++, *src++); |
36 srcover_srgb8888_srgb_1(dst, *src); | 38 srcover_srgb_srgb_1(dst , *src ); |
37 } | 39 } |
38 | 40 |
39 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 41 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
40 | 42 |
41 static inline __m128i load(const uint32_t* p) { | 43 static inline __m128i load(const uint32_t* p) { |
42 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); | 44 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); |
43 } | 45 } |
44 | 46 |
45 static inline void store(uint32_t* p, __m128i v) { | 47 static inline void store(uint32_t* p, __m128i v) { |
46 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); | 48 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
80 srcover_srgb_srgb_4(dst, dst + delta); | 82 srcover_srgb_srgb_4(dst, dst + delta); |
81 dst += 4; | 83 dst += 4; |
82 } while (dst < end | 84 } while (dst < end |
83 && _mm_testnzc_si128(pixels = load(dst + delta)
, alphaMask)); | 85 && _mm_testnzc_si128(pixels = load(dst + delta)
, alphaMask)); |
84 src += dst - start; | 86 src += dst - start; |
85 } | 87 } |
86 } | 88 } |
87 | 89 |
88 count = count & 3; | 90 count = count & 3; |
89 while (count-- > 0) { | 91 while (count-- > 0) { |
90 srcover_srgb8888_srgb_1(dst++, *src++); | 92 srcover_srgb_srgb_1(dst++, *src++); |
91 } | 93 } |
92 } | 94 } |
93 } | 95 } |
94 #else | 96 #else |
95 // SSE2 versions | 97 // SSE2 versions |
96 | 98 |
97 // Note: In the next three comparisons a group of 4 pixels is converted
to a group of | 99 // Note: In the next three comparisons a group of 4 pixels is converted
to a group of |
98 // "signed" pixels because the sse2 does not have an unsigned comparison
. | 100 // "signed" pixels because the sse2 does not have an unsigned comparison
. |
99 // Make it so that we can use the signed comparison operators by biasing | 101 // Make it so that we can use the signed comparison operators by biasing |
100 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x
ffxxxxxx to | 102 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x
ffxxxxxx to |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
152 do { | 154 do { |
153 srcover_srgb_srgb_4(dst, dst + delta); | 155 srcover_srgb_srgb_4(dst, dst + delta); |
154 dst += 4; | 156 dst += 4; |
155 } while (dst < end && check_partial_alphas(pixels = load
(dst + delta))); | 157 } while (dst < end && check_partial_alphas(pixels = load
(dst + delta))); |
156 src += dst - start; | 158 src += dst - start; |
157 } | 159 } |
158 } while (dst < end); | 160 } while (dst < end); |
159 | 161 |
160 count = count & 3; | 162 count = count & 3; |
161 while (count-- > 0) { | 163 while (count-- > 0) { |
162 srcover_srgb8888_srgb_1(dst++, *src++); | 164 srcover_srgb_srgb_1(dst++, *src++); |
163 } | 165 } |
164 } | 166 } |
165 } | 167 } |
166 #endif | 168 #endif |
167 #else | 169 #else |
168 | 170 |
169 static void srcover_srgb_srgb( | 171 static void srcover_srgb_srgb( |
170 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { | 172 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { |
171 while (ndst > 0) { | 173 while (ndst > 0) { |
172 int n = SkTMin(ndst, nsrc); | 174 int n = SkTMin(ndst, nsrc); |
173 | 175 |
174 for (int i = 0; i < n; i++) { | 176 for (int i = 0; i < n; i++) { |
175 srcover_srgb8888_srgb_1(dst++, src[i]); | 177 srcover_srgb_srgb_1(dst++, src[i]); |
176 } | 178 } |
177 ndst -= n; | 179 ndst -= n; |
178 } | 180 } |
179 } | 181 } |
180 | 182 |
181 #endif | 183 #endif |
182 | 184 |
183 } // namespace SK_OPTS_NS | 185 } // namespace SK_OPTS_NS |
184 | 186 |
185 #endif//SkBlend_opts_DEFINED | 187 #endif//SkBlend_opts_DEFINED |
OLD | NEW |