Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(44)

Side by Side Diff: src/core/Sk4px.h

Issue 1245673002: 565 support for SIMD xfermodes (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: fix typo Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | src/core/Sk4pxXfermode.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef Sk4px_DEFINED 8 #ifndef Sk4px_DEFINED
9 #define Sk4px_DEFINED 9 #define Sk4px_DEFINED
10 10
11 #include "SkNx.h" 11 #include "SkNx.h"
12 #include "SkColor.h" 12 #include "SkColor.h"
13 #include "SkColorPriv.h"
13 14
14 // This file may be included multiple times by .cpp files with different flags, leading 15 // This file may be included multiple times by .cpp files with different flags, leading
15 // to different definitions. Usually that doesn't matter because it's all inlin ed, but 16 // to different definitions. Usually that doesn't matter because it's all inlin ed, but
16 // in Debug modes the compilers may not inline everything. So wrap everything i n an 17 // in Debug modes the compilers may not inline everything. So wrap everything i n an
17 // anonymous namespace to give each includer their own silo of this code (or the linker 18 // anonymous namespace to give each includer their own silo of this code (or the linker
18 // will probably pick one randomly for us, which is rarely correct). 19 // will probably pick one randomly for us, which is rarely correct).
19 namespace { 20 namespace {
20 21
21 // 1, 2 or 4 SkPMColors, generally vectorized. 22 // 1, 2 or 4 SkPMColors, generally vectorized.
22 class Sk4px : public Sk16b { 23 class Sk4px : public Sk16b {
(...skipping 17 matching lines...) Expand all
40 static Sk4px Load1(const SkPMColor[1]); // PMColor[1] -> ARGB ???? ???? ??? ? 41 static Sk4px Load1(const SkPMColor[1]); // PMColor[1] -> ARGB ???? ???? ??? ?
41 42
42 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px. 43 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px.
43 static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx 44 static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx
44 static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ???? 45 static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ????
45 46
46 void store4(SkPMColor[4]) const; 47 void store4(SkPMColor[4]) const;
47 void store2(SkPMColor[2]) const; 48 void store2(SkPMColor[2]) const;
48 void store1(SkPMColor[1]) const; 49 void store1(SkPMColor[1]) const;
49 50
51 // Same as above for 565.
52 static Sk4px Load4(const SkPMColor16 src[4]);
53 static Sk4px Load2(const SkPMColor16 src[2]);
54 static Sk4px Load1(const SkPMColor16 src[1]);
55 void store4(SkPMColor16 dst[4]) const;
56 void store2(SkPMColor16 dst[2]) const;
57 void store1(SkPMColor16 dst[1]) const;
58
50 // 1, 2, or 4 SkPMColors with 16-bit components. 59 // 1, 2, or 4 SkPMColors with 16-bit components.
51 // This is most useful as the result of a multiply, e.g. from mulWiden(). 60 // This is most useful as the result of a multiply, e.g. from mulWiden().
52 class Wide : public Sk16h { 61 class Wide : public Sk16h {
53 public: 62 public:
54 Wide(const Sk16h& v) : Sk16h(v) {} 63 Wide(const Sk16h& v) : Sk16h(v) {}
55 64
56 // Pack the top byte of each component back down into 4 SkPMColors. 65 // Pack the top byte of each component back down into 4 SkPMColors.
57 Sk4px addNarrowHi(const Sk16h&) const; 66 Sk4px addNarrowHi(const Sk16h&) const;
58 67
59 // Rounds, i.e. (x+127) / 255. 68 // Rounds, i.e. (x+127) / 255.
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
92 101
93 // Generally faster than (*this * o).div255(). 102 // Generally faster than (*this * o).div255().
94 // May be incorrect by +-1, but is always exactly correct when *this or o is 0 or 255. 103 // May be incorrect by +-1, but is always exactly correct when *this or o is 0 or 255.
95 Sk4px approxMulDiv255(const Sk16b& o) const { 104 Sk4px approxMulDiv255(const Sk16b& o) const {
96 // (x*y + x) / 256 meets these criteria. (As of course does (x*y + y) / 256 by symmetry.) 105 // (x*y + x) / 256 meets these criteria. (As of course does (x*y + y) / 256 by symmetry.)
97 return this->widenLo().addNarrowHi(*this * o); 106 return this->widenLo().addNarrowHi(*this * o);
98 } 107 }
99 108
100 // A generic driver that maps fn over a src array into a dst array. 109 // A generic driver that maps fn over a src array into a dst array.
101 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels) . 110 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels) .
102 template <typename Fn> 111 template <typename Fn, typename Dst>
103 static void MapSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn& fn ) { 112 static void MapSrc(int n, Dst* dst, const SkPMColor* src, const Fn& fn) {
104 // This looks a bit odd, but it helps loop-invariant hoisting across dif ferent calls to fn. 113 // This looks a bit odd, but it helps loop-invariant hoisting across dif ferent calls to fn.
105 // Basically, we need to make sure we keep things inside a single loop. 114 // Basically, we need to make sure we keep things inside a single loop.
106 while (n > 0) { 115 while (n > 0) {
107 if (n >= 8) { 116 if (n >= 8) {
108 Sk4px dst0 = fn(Load4(src+0)), 117 Sk4px dst0 = fn(Load4(src+0)),
109 dst4 = fn(Load4(src+4)); 118 dst4 = fn(Load4(src+4));
110 dst0.store4(dst+0); 119 dst0.store4(dst+0);
111 dst4.store4(dst+4); 120 dst4.store4(dst+4);
112 dst += 8; src += 8; n -= 8; 121 dst += 8; src += 8; n -= 8;
113 continue; // Keep our stride at 8 pixels as long as possible. 122 continue; // Keep our stride at 8 pixels as long as possible.
114 } 123 }
115 SkASSERT(n <= 7); 124 SkASSERT(n <= 7);
116 if (n >= 4) { 125 if (n >= 4) {
117 fn(Load4(src)).store4(dst); 126 fn(Load4(src)).store4(dst);
118 dst += 4; src += 4; n -= 4; 127 dst += 4; src += 4; n -= 4;
119 } 128 }
120 if (n >= 2) { 129 if (n >= 2) {
121 fn(Load2(src)).store2(dst); 130 fn(Load2(src)).store2(dst);
122 dst += 2; src += 2; n -= 2; 131 dst += 2; src += 2; n -= 2;
123 } 132 }
124 if (n >= 1) { 133 if (n >= 1) {
125 fn(Load1(src)).store1(dst); 134 fn(Load1(src)).store1(dst);
126 } 135 }
127 break; 136 break;
128 } 137 }
129 } 138 }
130 139
131 // As above, but with dst4' = fn(dst4, src4). 140 // As above, but with dst4' = fn(dst4, src4).
132 template <typename Fn> 141 template <typename Fn, typename Dst>
133 static void MapDstSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn& fn) { 142 static void MapDstSrc(int n, Dst* dst, const SkPMColor* src, const Fn& fn) {
134 while (n > 0) { 143 while (n > 0) {
135 if (n >= 8) { 144 if (n >= 8) {
136 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)), 145 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)),
137 dst4 = fn(Load4(dst+4), Load4(src+4)); 146 dst4 = fn(Load4(dst+4), Load4(src+4));
138 dst0.store4(dst+0); 147 dst0.store4(dst+0);
139 dst4.store4(dst+4); 148 dst4.store4(dst+4);
140 dst += 8; src += 8; n -= 8; 149 dst += 8; src += 8; n -= 8;
141 continue; // Keep our stride at 8 pixels as long as possible. 150 continue; // Keep our stride at 8 pixels as long as possible.
142 } 151 }
143 SkASSERT(n <= 7); 152 SkASSERT(n <= 7);
144 if (n >= 4) { 153 if (n >= 4) {
145 fn(Load4(dst), Load4(src)).store4(dst); 154 fn(Load4(dst), Load4(src)).store4(dst);
146 dst += 4; src += 4; n -= 4; 155 dst += 4; src += 4; n -= 4;
147 } 156 }
148 if (n >= 2) { 157 if (n >= 2) {
149 fn(Load2(dst), Load2(src)).store2(dst); 158 fn(Load2(dst), Load2(src)).store2(dst);
150 dst += 2; src += 2; n -= 2; 159 dst += 2; src += 2; n -= 2;
151 } 160 }
152 if (n >= 1) { 161 if (n >= 1) {
153 fn(Load1(dst), Load1(src)).store1(dst); 162 fn(Load1(dst), Load1(src)).store1(dst);
154 } 163 }
155 break; 164 break;
156 } 165 }
157 } 166 }
158 167
159 // As above, but with dst4' = fn(dst4, src4, alpha4). 168 // As above, but with dst4' = fn(dst4, src4, alpha4).
160 template <typename Fn> 169 template <typename Fn, typename Dst>
161 static void MapDstSrcAlpha(int n, SkPMColor* dst, const SkPMColor* src, cons t SkAlpha* a, 170 static void MapDstSrcAlpha(int n, Dst* dst, const SkPMColor* src, const SkAl pha* a,
162 const Fn& fn) { 171 const Fn& fn) {
163 while (n > 0) { 172 while (n > 0) {
164 if (n >= 8) { 173 if (n >= 8) {
165 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)), 174 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)),
166 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4)); 175 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4));
167 dst0.store4(dst+0); 176 dst0.store4(dst+0);
168 dst4.store4(dst+4); 177 dst4.store4(dst+4);
169 dst += 8; src += 8; a += 8; n -= 8; 178 dst += 8; src += 8; a += 8; n -= 8;
170 continue; // Keep our stride at 8 pixels as long as possible. 179 continue; // Keep our stride at 8 pixels as long as possible.
171 } 180 }
(...skipping 25 matching lines...) Expand all
197 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 206 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
198 #include "../opts/Sk4px_SSE2.h" 207 #include "../opts/Sk4px_SSE2.h"
199 #elif defined(SK_ARM_HAS_NEON) 208 #elif defined(SK_ARM_HAS_NEON)
200 #include "../opts/Sk4px_NEON.h" 209 #include "../opts/Sk4px_NEON.h"
201 #else 210 #else
202 #include "../opts/Sk4px_none.h" 211 #include "../opts/Sk4px_none.h"
203 #endif 212 #endif
204 #endif 213 #endif
205 214
206 #endif//Sk4px_DEFINED 215 #endif//Sk4px_DEFINED
OLDNEW
« no previous file with comments | « no previous file | src/core/Sk4pxXfermode.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698