OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef Sk4px_DEFINED | 8 #ifndef Sk4px_DEFINED |
9 #define Sk4px_DEFINED | 9 #define Sk4px_DEFINED |
10 | 10 |
(...skipping 30 matching lines...) Expand all Loading... |
41 static Sk4px Load1(const SkPMColor[1]); // PMColor[1] -> ARGB ???? ???? ???
? | 41 static Sk4px Load1(const SkPMColor[1]); // PMColor[1] -> ARGB ???? ???? ???
? |
42 | 42 |
43 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px. | 43 // Ditto for Alphas... Load2Alphas fills the low two lanes of Sk4px. |
44 static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx | 44 static Sk4px Load4Alphas(const SkAlpha[4]); // AaXx -> AAAA aaaa XXXX xxxx |
45 static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ???? | 45 static Sk4px Load2Alphas(const SkAlpha[2]); // Aa -> AAAA aaaa ???? ???? |
46 | 46 |
47 void store4(SkPMColor[4]) const; | 47 void store4(SkPMColor[4]) const; |
48 void store2(SkPMColor[2]) const; | 48 void store2(SkPMColor[2]) const; |
49 void store1(SkPMColor[1]) const; | 49 void store1(SkPMColor[1]) const; |
50 | 50 |
51 // Same as above for 565. | |
52 static Sk4px Load4(const SkPMColor16 src[4]); | |
53 static Sk4px Load2(const SkPMColor16 src[2]); | |
54 static Sk4px Load1(const SkPMColor16 src[1]); | |
55 void store4(SkPMColor16 dst[4]) const; | |
56 void store2(SkPMColor16 dst[2]) const; | |
57 void store1(SkPMColor16 dst[1]) const; | |
58 | |
59 // 1, 2, or 4 SkPMColors with 16-bit components. | 51 // 1, 2, or 4 SkPMColors with 16-bit components. |
60 // This is most useful as the result of a multiply, e.g. from mulWiden(). | 52 // This is most useful as the result of a multiply, e.g. from mulWiden(). |
61 class Wide : public Sk16h { | 53 class Wide : public Sk16h { |
62 public: | 54 public: |
63 Wide(const Sk16h& v) : Sk16h(v) {} | 55 Wide(const Sk16h& v) : Sk16h(v) {} |
64 | 56 |
65 // Pack the top byte of each component back down into 4 SkPMColors. | 57 // Pack the top byte of each component back down into 4 SkPMColors. |
66 Sk4px addNarrowHi(const Sk16h&) const; | 58 Sk4px addNarrowHi(const Sk16h&) const; |
67 | 59 |
68 // Rounds, i.e. (x+127) / 255. | 60 // Rounds, i.e. (x+127) / 255. |
(...skipping 29 matching lines...) Expand all Loading... |
98 // Generally faster than (*this * o).div255(). | 90 // Generally faster than (*this * o).div255(). |
99 // May be incorrect by +-1, but is always exactly correct when *this or o is
0 or 255. | 91 // May be incorrect by +-1, but is always exactly correct when *this or o is
0 or 255. |
100 Sk4px approxMulDiv255(const Sk16b& o) const { | 92 Sk4px approxMulDiv255(const Sk16b& o) const { |
101 // (x*y + x) / 256 meets these criteria. (As of course does (x*y + y) /
256 by symmetry.) | 93 // (x*y + x) / 256 meets these criteria. (As of course does (x*y + y) /
256 by symmetry.) |
102 // FYI: (x*y + 255) / 256 also meets these criteria. In my brief testin
g, it was slower. | 94 // FYI: (x*y + 255) / 256 also meets these criteria. In my brief testin
g, it was slower. |
103 return this->widenLo().addNarrowHi(*this * o); | 95 return this->widenLo().addNarrowHi(*this * o); |
104 } | 96 } |
105 | 97 |
106 // A generic driver that maps fn over a src array into a dst array. | 98 // A generic driver that maps fn over a src array into a dst array. |
107 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels)
. | 99 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels)
. |
108 template <typename Fn, typename Dst> | 100 template <typename Fn> |
109 static void MapSrc(int n, Dst* dst, const SkPMColor* src, const Fn& fn) { | 101 static void MapSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn& fn
) { |
110 SkASSERT(dst); | 102 SkASSERT(dst); |
111 SkASSERT(src); | 103 SkASSERT(src); |
112 // This looks a bit odd, but it helps loop-invariant hoisting across dif
ferent calls to fn. | 104 // This looks a bit odd, but it helps loop-invariant hoisting across dif
ferent calls to fn. |
113 // Basically, we need to make sure we keep things inside a single loop. | 105 // Basically, we need to make sure we keep things inside a single loop. |
114 while (n > 0) { | 106 while (n > 0) { |
115 if (n >= 8) { | 107 if (n >= 8) { |
116 Sk4px dst0 = fn(Load4(src+0)), | 108 Sk4px dst0 = fn(Load4(src+0)), |
117 dst4 = fn(Load4(src+4)); | 109 dst4 = fn(Load4(src+4)); |
118 dst0.store4(dst+0); | 110 dst0.store4(dst+0); |
119 dst4.store4(dst+4); | 111 dst4.store4(dst+4); |
(...skipping 10 matching lines...) Expand all Loading... |
130 dst += 2; src += 2; n -= 2; | 122 dst += 2; src += 2; n -= 2; |
131 } | 123 } |
132 if (n >= 1) { | 124 if (n >= 1) { |
133 fn(Load1(src)).store1(dst); | 125 fn(Load1(src)).store1(dst); |
134 } | 126 } |
135 break; | 127 break; |
136 } | 128 } |
137 } | 129 } |
138 | 130 |
139 // As above, but with dst4' = fn(dst4, src4). | 131 // As above, but with dst4' = fn(dst4, src4). |
140 template <typename Fn, typename Dst> | 132 template <typename Fn> |
141 static void MapDstSrc(int n, Dst* dst, const SkPMColor* src, const Fn& fn) { | 133 static void MapDstSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn&
fn) { |
142 SkASSERT(dst); | 134 SkASSERT(dst); |
143 SkASSERT(src); | 135 SkASSERT(src); |
144 while (n > 0) { | 136 while (n > 0) { |
145 if (n >= 8) { | 137 if (n >= 8) { |
146 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)), | 138 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)), |
147 dst4 = fn(Load4(dst+4), Load4(src+4)); | 139 dst4 = fn(Load4(dst+4), Load4(src+4)); |
148 dst0.store4(dst+0); | 140 dst0.store4(dst+0); |
149 dst4.store4(dst+4); | 141 dst4.store4(dst+4); |
150 dst += 8; src += 8; n -= 8; | 142 dst += 8; src += 8; n -= 8; |
151 continue; // Keep our stride at 8 pixels as long as possible. | 143 continue; // Keep our stride at 8 pixels as long as possible. |
152 } | 144 } |
153 SkASSERT(n <= 7); | 145 SkASSERT(n <= 7); |
154 if (n >= 4) { | 146 if (n >= 4) { |
155 fn(Load4(dst), Load4(src)).store4(dst); | 147 fn(Load4(dst), Load4(src)).store4(dst); |
156 dst += 4; src += 4; n -= 4; | 148 dst += 4; src += 4; n -= 4; |
157 } | 149 } |
158 if (n >= 2) { | 150 if (n >= 2) { |
159 fn(Load2(dst), Load2(src)).store2(dst); | 151 fn(Load2(dst), Load2(src)).store2(dst); |
160 dst += 2; src += 2; n -= 2; | 152 dst += 2; src += 2; n -= 2; |
161 } | 153 } |
162 if (n >= 1) { | 154 if (n >= 1) { |
163 fn(Load1(dst), Load1(src)).store1(dst); | 155 fn(Load1(dst), Load1(src)).store1(dst); |
164 } | 156 } |
165 break; | 157 break; |
166 } | 158 } |
167 } | 159 } |
168 | 160 |
169 // As above, but with dst4' = fn(dst4, alpha4). | 161 // As above, but with dst4' = fn(dst4, alpha4). |
170 template <typename Fn, typename Dst> | 162 template <typename Fn> |
171 static void MapDstAlpha(int n, Dst* dst, const SkAlpha* a, const Fn& fn) { | 163 static void MapDstAlpha(int n, SkPMColor* dst, const SkAlpha* a, const Fn& f
n) { |
172 SkASSERT(dst); | 164 SkASSERT(dst); |
173 SkASSERT(a); | 165 SkASSERT(a); |
174 while (n > 0) { | 166 while (n > 0) { |
175 if (n >= 8) { | 167 if (n >= 8) { |
176 Sk4px dst0 = fn(Load4(dst+0), Load4Alphas(a+0)), | 168 Sk4px dst0 = fn(Load4(dst+0), Load4Alphas(a+0)), |
177 dst4 = fn(Load4(dst+4), Load4Alphas(a+4)); | 169 dst4 = fn(Load4(dst+4), Load4Alphas(a+4)); |
178 dst0.store4(dst+0); | 170 dst0.store4(dst+0); |
179 dst4.store4(dst+4); | 171 dst4.store4(dst+4); |
180 dst += 8; a += 8; n -= 8; | 172 dst += 8; a += 8; n -= 8; |
181 continue; // Keep our stride at 8 pixels as long as possible. | 173 continue; // Keep our stride at 8 pixels as long as possible. |
182 } | 174 } |
183 SkASSERT(n <= 7); | 175 SkASSERT(n <= 7); |
184 if (n >= 4) { | 176 if (n >= 4) { |
185 fn(Load4(dst), Load4Alphas(a)).store4(dst); | 177 fn(Load4(dst), Load4Alphas(a)).store4(dst); |
186 dst += 4; a += 4; n -= 4; | 178 dst += 4; a += 4; n -= 4; |
187 } | 179 } |
188 if (n >= 2) { | 180 if (n >= 2) { |
189 fn(Load2(dst), Load2Alphas(a)).store2(dst); | 181 fn(Load2(dst), Load2Alphas(a)).store2(dst); |
190 dst += 2; a += 2; n -= 2; | 182 dst += 2; a += 2; n -= 2; |
191 } | 183 } |
192 if (n >= 1) { | 184 if (n >= 1) { |
193 fn(Load1(dst), DupAlpha(*a)).store1(dst); | 185 fn(Load1(dst), DupAlpha(*a)).store1(dst); |
194 } | 186 } |
195 break; | 187 break; |
196 } | 188 } |
197 } | 189 } |
198 | 190 |
199 // As above, but with dst4' = fn(dst4, src4, alpha4). | 191 // As above, but with dst4' = fn(dst4, src4, alpha4). |
200 template <typename Fn, typename Dst> | 192 template <typename Fn> |
201 static void MapDstSrcAlpha(int n, Dst* dst, const SkPMColor* src, const SkAl
pha* a, | 193 static void MapDstSrcAlpha(int n, SkPMColor* dst, const SkPMColor* src, cons
t SkAlpha* a, |
202 const Fn& fn) { | 194 const Fn& fn) { |
203 SkASSERT(dst); | 195 SkASSERT(dst); |
204 SkASSERT(src); | 196 SkASSERT(src); |
205 SkASSERT(a); | 197 SkASSERT(a); |
206 while (n > 0) { | 198 while (n > 0) { |
207 if (n >= 8) { | 199 if (n >= 8) { |
208 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)), | 200 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)), |
209 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4)); | 201 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4)); |
210 dst0.store4(dst+0); | 202 dst0.store4(dst+0); |
211 dst4.store4(dst+4); | 203 dst4.store4(dst+4); |
(...skipping 28 matching lines...) Expand all Loading... |
240 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 232 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
241 #include "../opts/Sk4px_SSE2.h" | 233 #include "../opts/Sk4px_SSE2.h" |
242 #elif defined(SK_ARM_HAS_NEON) | 234 #elif defined(SK_ARM_HAS_NEON) |
243 #include "../opts/Sk4px_NEON.h" | 235 #include "../opts/Sk4px_NEON.h" |
244 #else | 236 #else |
245 #include "../opts/Sk4px_none.h" | 237 #include "../opts/Sk4px_none.h" |
246 #endif | 238 #endif |
247 #endif | 239 #endif |
248 | 240 |
249 #endif//Sk4px_DEFINED | 241 #endif//Sk4px_DEFINED |
OLD | NEW |