Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Side by Side Diff: src/opts/SkBlend_opts.h

Issue 2130183003: Remove bloat from SkBlend_opts. (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkPM4fPriv.h ('k') | tests/SkBlend_optsTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 Google Inc. 2 * Copyright 2016 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 /* 8 /*
9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q 9 ninja -C out/Release dm nanobench ; and ./out/Release/dm --match Blend_opts ; an d ./out/Release/nanobench --samples 300 --nompd --match LinearSrcOver -q
10 */ 10 */
11 11
12 #ifndef SkBlend_opts_DEFINED 12 #ifndef SkBlend_opts_DEFINED
13 #define SkBlend_opts_DEFINED 13 #define SkBlend_opts_DEFINED
14 14
15 #include "SkNx.h" 15 #include "SkNx.h"
16 #include "SkPM4fPriv.h" 16 #include "SkPM4fPriv.h"
17 17
18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 18 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
19 #include <immintrin.h> 19 #include <immintrin.h>
20 #endif 20 #endif
21 21
22 namespace SK_OPTS_NS { 22 namespace SK_OPTS_NS {
23 23
24 // An implementation of SrcOver from bytes to bytes in linear space that takes a dvantage of the 24 static inline void srcover_srgb8888_srgb_1(uint32_t* dst, const uint32_t pixel) {
25 // observation that the 255's cancel.
26 // invA = 1 - (As / 255);
27 //
28 // R = 255 * sqrt((Rs/255)^2 + (Rd/255)^2 * invA)
29 // => R = 255 * sqrt((Rs^2 + Rd^2 * invA)/255^2)
30 // => R = sqrt(Rs^2 + Rd^2 * invA)
31 static inline void blend_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
32 Sk4f s = srgb_to_linear(to_4f(pixel));
33 Sk4f d = srgb_to_linear(to_4f(*dst));
34 Sk4f invAlpha = 1.0f - Sk4f{s[SkPM4f::A]} * (1.0f / 255.0f);
35 Sk4f r = linear_to_srgb(s + d * invAlpha) + 0.5f;
36 *dst = to_4b(r);
37 }
38
39 static inline void srcover_srgb_srgb_1(uint32_t* dst, const uint32_t pixel) {
40 if ((~pixel & 0xFF000000) == 0) { 25 if ((~pixel & 0xFF000000) == 0) {
41 *dst = pixel; 26 *dst = pixel;
42 } else if ((pixel & 0xFF000000) != 0) { 27 } else if ((pixel & 0xFF000000) != 0) {
43 blend_srgb_srgb_1(dst, pixel); 28 srcover_blend_srgb8888_srgb_1(dst, srgb_to_linear(to_4f(pixel)));
44 } 29 }
45 } 30 }
46 31
47 static inline void srcover_srgb_srgb_2(uint32_t* dst, const uint32_t* src) {
48 srcover_srgb_srgb_1(dst++, *src++);
49 srcover_srgb_srgb_1(dst, *src);
50 }
51
52 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) { 32 static inline void srcover_srgb_srgb_4(uint32_t* dst, const uint32_t* src) {
53 srcover_srgb_srgb_1(dst++, *src++); 33 srcover_srgb8888_srgb_1(dst++, *src++);
54 srcover_srgb_srgb_1(dst++, *src++); 34 srcover_srgb8888_srgb_1(dst++, *src++);
55 srcover_srgb_srgb_1(dst++, *src++); 35 srcover_srgb8888_srgb_1(dst++, *src++);
56 srcover_srgb_srgb_1(dst, *src); 36 srcover_srgb8888_srgb_1(dst, *src);
57 }
58
59 void best_non_simd_srcover_srgb_srgb(
60 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
61 uint64_t* ddst = reinterpret_cast<uint64_t*>(dst);
62
63 while (ndst >0) {
64 int count = SkTMin(ndst, nsrc);
65 ndst -= count;
66 const uint64_t* dsrc = reinterpret_cast<const uint64_t*>(src);
67 const uint64_t* end = dsrc + (count >> 1);
68 do {
69 if ((~*dsrc & 0xFF000000FF000000) == 0) {
70 do {
71 *ddst++ = *dsrc++;
72 } while (dsrc < end && (~*dsrc & 0xFF000000FF000000) == 0);
73 } else if ((*dsrc & 0xFF000000FF000000) == 0) {
74 do {
75 dsrc++;
76 ddst++;
77 } while (dsrc < end && (*dsrc & 0xFF000000FF000000) == 0);
78 } else {
79 srcover_srgb_srgb_2(reinterpret_cast<uint32_t*>(ddst++),
80 reinterpret_cast<const uint32_t*>(dsrc++));
81 }
82 } while (dsrc < end);
83
84 if ((count & 1) != 0) {
85 srcover_srgb_srgb_1(reinterpret_cast<uint32_t*>(ddst),
86 *reinterpret_cast<const uint32_t*>(dsrc));
87 }
88 }
89 }
90
91 void brute_force_srcover_srgb_srgb(
92 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
93 while (ndst > 0) {
94 int n = SkTMin(ndst, nsrc);
95
96 for (int i = 0; i < n; i++) {
97 blend_srgb_srgb_1(dst++, src[i]);
98 }
99 ndst -= n;
100 }
101 }
102
103 void trivial_srcover_srgb_srgb(
104 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
105 while (ndst > 0) {
106 int n = SkTMin(ndst, nsrc);
107
108 for (int i = 0; i < n; i++) {
109 srcover_srgb_srgb_1(dst++, src[i]);
110 }
111 ndst -= n;
112 }
113 } 37 }
114 38
115 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 39 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
116 40
117 static inline __m128i load(const uint32_t* p) { 41 static inline __m128i load(const uint32_t* p) {
118 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)); 42 return _mm_loadu_si128(reinterpret_cast<const __m128i*>(p));
119 } 43 }
120 44
121 static inline void store(uint32_t* p, __m128i v) { 45 static inline void store(uint32_t* p, __m128i v) {
122 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v); 46 _mm_storeu_si128(reinterpret_cast<__m128i*>(p), v);
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 srcover_srgb_srgb_4(dst, dst + delta); 80 srcover_srgb_srgb_4(dst, dst + delta);
157 dst += 4; 81 dst += 4;
158 } while (dst < end 82 } while (dst < end
159 && _mm_testnzc_si128(pixels = load(dst + delta) , alphaMask)); 83 && _mm_testnzc_si128(pixels = load(dst + delta) , alphaMask));
160 src += dst - start; 84 src += dst - start;
161 } 85 }
162 } 86 }
163 87
164 count = count & 3; 88 count = count & 3;
165 while (count-- > 0) { 89 while (count-- > 0) {
166 srcover_srgb_srgb_1(dst++, *src++); 90 srcover_srgb8888_srgb_1(dst++, *src++);
167 } 91 }
168 } 92 }
169 } 93 }
170 #else 94 #else
171 // SSE2 versions 95 // SSE2 versions
172 96
173 // Note: In the next three comparisons a group of 4 pixels is converted to a group of 97 // Note: In the next three comparisons a group of 4 pixels is converted to a group of
174 // "signed" pixels because the sse2 does not have an unsigned comparison . 98 // "signed" pixels because the sse2 does not have an unsigned comparison .
175 // Make it so that we can use the signed comparison operators by biasing 99 // Make it so that we can use the signed comparison operators by biasing
176 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x ffxxxxxx to 100 // 0x00xxxxxx to 0x80xxxxxxx which is the smallest values and biasing 0x ffxxxxxx to
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 do { 152 do {
229 srcover_srgb_srgb_4(dst, dst + delta); 153 srcover_srgb_srgb_4(dst, dst + delta);
230 dst += 4; 154 dst += 4;
231 } while (dst < end && check_partial_alphas(pixels = load (dst + delta))); 155 } while (dst < end && check_partial_alphas(pixels = load (dst + delta)));
232 src += dst - start; 156 src += dst - start;
233 } 157 }
234 } while (dst < end); 158 } while (dst < end);
235 159
236 count = count & 3; 160 count = count & 3;
237 while (count-- > 0) { 161 while (count-- > 0) {
238 srcover_srgb_srgb_1(dst++, *src++); 162 srcover_srgb8888_srgb_1(dst++, *src++);
239 } 163 }
240 } 164 }
241 } 165 }
242 #endif 166 #endif
243 #else 167 #else
244 168
245 void srcover_srgb_srgb( 169 void srcover_srgb_srgb(
246 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) { 170 uint32_t* dst, const uint32_t* const src, int ndst, const int nsrc) {
247 trivial_srcover_srgb_srgb(dst, src, ndst, nsrc); 171 while (ndst > 0) {
172 int n = SkTMin(ndst, nsrc);
173
174 for (int i = 0; i < n; i++) {
175 srcover_srgb8888_srgb_1(dst++, src[i]);
176 }
177 ndst -= n;
178 }
248 } 179 }
249 180
250 #endif 181 #endif
251 182
252 } // namespace SK_OPTS_NS 183 } // namespace SK_OPTS_NS
253 184
254 #endif//SkBlend_opts_DEFINED 185 #endif//SkBlend_opts_DEFINED
OLDNEW
« no previous file with comments | « src/core/SkPM4fPriv.h ('k') | tests/SkBlend_optsTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698