OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef Sk4px_DEFINED | 8 #ifndef Sk4px_DEFINED |
9 #define Sk4px_DEFINED | 9 #define Sk4px_DEFINED |
10 | 10 |
11 #include "SkNx.h" | 11 #include "SkNx.h" |
12 #include "SkColor.h" | 12 #include "SkColor.h" |
13 | 13 |
14 // This file may be included multiple times by .cpp files with different flags,
leading | 14 // This file may be included multiple times by .cpp files with different flags,
leading |
15 // to different definitions. Usually that doesn't matter because it's all inlin
ed, but | 15 // to different definitions. Usually that doesn't matter because it's all inlin
ed, but |
16 // in Debug modes the compilers may not inline everything. So wrap everything i
n an | 16 // in Debug modes the compilers may not inline everything. So wrap everything i
n an |
17 // anonymous namespace to give each includer their own silo of this code (or the
linker | 17 // anonymous namespace to give each includer their own silo of this code (or the
linker |
18 // will probably pick one randomly for us, which is rarely correct). | 18 // will probably pick one randomly for us, which is rarely correct). |
19 namespace { | 19 namespace { |
20 | 20 |
21 // 1, 2 or 4 SkPMColors, generally vectorized. | 21 // 1, 2 or 4 SkPMColors, generally vectorized. |
22 class Sk4px : public Sk16b { | 22 class Sk4px : public Sk16b { |
23 public: | 23 public: |
24 Sk4px(SkAlpha a) : INHERITED(a) {} // Duplicate 16x: a -> aaaa aaaa aaaa
aaaa | 24 static Sk4px DupAlpha(SkAlpha a) { return Sk16b(a); } // a -> aaaa aaaa
aaaa aaaa |
25 Sk4px(SkPMColor); // Duplicate 4x: argb -> argb argb argb
argb | 25 static Sk4px DupPMColor(SkPMColor c); // argb -> argb argb
argb argb |
| 26 |
26 Sk4px(const Sk16b& v) : INHERITED(v) {} | 27 Sk4px(const Sk16b& v) : INHERITED(v) {} |
27 | 28 |
28 Sk4px alphas() const; // ARGB argb XYZW xyzw -> AAAA aaaa XXXX xxxx | 29 Sk4px alphas() const; // ARGB argb XYZW xyzw -> AAAA aaaa XXXX xxxx |
29 | 30 |
30 // Mask away color or alpha lanes. | 31 // Mask away color or alpha lanes. |
31 Sk4px zeroColors() const; // ARGB argb XYZW xyzw -> A000 a000 X000 x000 | 32 Sk4px zeroColors() const; // ARGB argb XYZW xyzw -> A000 a000 X000 x000 |
32 Sk4px zeroAlphas() const; // ARGB argb XYZW xyzw -> 0RGB 0rgb 0YZW 0yzw | 33 Sk4px zeroAlphas() const; // ARGB argb XYZW xyzw -> 0RGB 0rgb 0YZW 0yzw |
33 | 34 |
34 Sk4px inv() const { return Sk16b(255) - *this; } | 35 Sk4px inv() const { return Sk16b(255) - *this; } |
35 | 36 |
(...skipping 12 matching lines...) Expand all Loading... |
48 | 49 |
49 // 1, 2, or 4 SkPMColors with 16-bit components. | 50 // 1, 2, or 4 SkPMColors with 16-bit components. |
50 // This is most useful as the result of a multiply, e.g. from mulWiden(). | 51 // This is most useful as the result of a multiply, e.g. from mulWiden(). |
51 class Wide : public Sk16h { | 52 class Wide : public Sk16h { |
52 public: | 53 public: |
53 Wide(const Sk16h& v) : Sk16h(v) {} | 54 Wide(const Sk16h& v) : Sk16h(v) {} |
54 | 55 |
55 // Pack the top byte of each component back down into 4 SkPMColors. | 56 // Pack the top byte of each component back down into 4 SkPMColors. |
56 Sk4px addNarrowHi(const Sk16h&) const; | 57 Sk4px addNarrowHi(const Sk16h&) const; |
57 | 58 |
58 Sk4px div255TruncNarrow() const { return this->addNarrowHi(*this >> 8);
} | 59 // Rounds, i.e. (x+127) / 255. |
59 Sk4px div255RoundNarrow() const { | 60 Sk4px div255() const { |
60 return Sk4px::Wide(*this + Sk16h(128)).div255TruncNarrow(); | 61 // Calculated as ((x+128) + ((x+128)>>8)) >> 8. |
| 62 auto v = *this + Sk16h(128); |
| 63 return v.addNarrowHi(v >> 8); |
61 } | 64 } |
62 | 65 |
| 66 // These just keep the types as Wide so the user doesn't have to keep ca
sting. |
| 67 Wide operator * (const Wide& o) const { return INHERITED::operator*(o);
} |
| 68 Wide operator + (const Wide& o) const { return INHERITED::operator+(o);
} |
| 69 Wide operator - (const Wide& o) const { return INHERITED::operator-(o);
} |
| 70 Wide operator >> (int bits) const { return INHERITED::operator>>(bits);
} |
| 71 Wide operator << (int bits) const { return INHERITED::operator<<(bits);
} |
| 72 static Wide Min(const Wide& a, const Wide& b) { return INHERITED::Min(a,
b); } |
| 73 |
63 private: | 74 private: |
64 typedef Sk16h INHERITED; | 75 typedef Sk16h INHERITED; |
65 }; | 76 }; |
66 | 77 |
67 Wide widenLo() const; // ARGB -> 0A 0R 0G 0B | 78 Wide widenLo() const; // ARGB -> 0A 0R 0G 0B |
68 Wide widenHi() const; // ARGB -> A0 R0 G0 B0 | 79 Wide widenHi() const; // ARGB -> A0 R0 G0 B0 |
69 Wide mulWiden(const Sk16b&) const; // 8-bit x 8-bit -> 16-bit components. | 80 Wide mulWiden(const Sk16b&) const; // 8-bit x 8-bit -> 16-bit components. |
70 Wide mul255Widen() const { | |
71 // TODO: x*255 = x*256-x, so something like this->widenHi() - this->wide
nLo()? | |
72 return this->mulWiden(Sk16b(255)); | |
73 } | |
74 | 81 |
75 // Generally faster than this->mulWiden(other).div255RoundNarrow(). | 82 // The only 8-bit multiply we use is 8-bit x 8-bit -> 16-bit. Might as well
make it pithy. |
76 // May be incorrect by +-1, but is always exactly correct when *this or othe
r is 0 or 255. | 83 Wide operator * (const Sk4px& o) const { return this->mulWiden(o); } |
77 Sk4px fastMulDiv255Round(const Sk16b& other) const { | 84 |
| 85 // These just keep the types as Sk4px so the user doesn't have to keep casti
ng. |
| 86 Sk4px operator + (const Sk4px& o) const { return INHERITED::operator+(o); } |
| 87 Sk4px operator - (const Sk4px& o) const { return INHERITED::operator-(o); } |
| 88 |
| 89 // Generally faster than (*this * o).div255(). |
| 90 // May be incorrect by +-1, but is always exactly correct when *this or o is
0 or 255. |
| 91 Sk4px approxMulDiv255(const Sk16b& o) const { |
78 // (x*y + x) / 256 meets these criteria. (As of course does (x*y + y) /
256 by symmetry.) | 92 // (x*y + x) / 256 meets these criteria. (As of course does (x*y + y) /
256 by symmetry.) |
79 Sk4px::Wide x = this->widenLo(), | 93 return this->widenLo().addNarrowHi(*this * o); |
80 xy = this->mulWiden(other); | |
81 return x.addNarrowHi(xy); | |
82 } | 94 } |
83 | 95 |
84 // A generic driver that maps fn over a src array into a dst array. | 96 // A generic driver that maps fn over a src array into a dst array. |
85 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels)
. | 97 // fn should take an Sk4px (4 src pixels) and return an Sk4px (4 dst pixels)
. |
86 template <typename Fn> | 98 template <typename Fn> |
87 static void MapSrc(int count, SkPMColor* dst, const SkPMColor* src, Fn fn) { | 99 static void MapSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn& fn
) { |
88 // This looks a bit odd, but it helps loop-invariant hoisting across dif
ferent calls to fn. | 100 // This looks a bit odd, but it helps loop-invariant hoisting across dif
ferent calls to fn. |
89 // Basically, we need to make sure we keep things inside a single loop. | 101 // Basically, we need to make sure we keep things inside a single loop. |
90 while (count > 0) { | 102 while (n > 0) { |
91 if (count >= 8) { | 103 if (n >= 8) { |
92 Sk4px dst0 = fn(Load4(src+0)), | 104 Sk4px dst0 = fn(Load4(src+0)), |
93 dst4 = fn(Load4(src+4)); | 105 dst4 = fn(Load4(src+4)); |
94 dst0.store4(dst+0); | 106 dst0.store4(dst+0); |
95 dst4.store4(dst+4); | 107 dst4.store4(dst+4); |
96 dst += 8; src += 8; count -= 8; | 108 dst += 8; src += 8; n -= 8; |
97 continue; // Keep our stride at 8 pixels as long as possible. | 109 continue; // Keep our stride at 8 pixels as long as possible. |
98 } | 110 } |
99 SkASSERT(count <= 7); | 111 SkASSERT(n <= 7); |
100 if (count >= 4) { | 112 if (n >= 4) { |
101 fn(Load4(src)).store4(dst); | 113 fn(Load4(src)).store4(dst); |
102 dst += 4; src += 4; count -= 4; | 114 dst += 4; src += 4; n -= 4; |
103 } | 115 } |
104 if (count >= 2) { | 116 if (n >= 2) { |
105 fn(Load2(src)).store2(dst); | 117 fn(Load2(src)).store2(dst); |
106 dst += 2; src += 2; count -= 2; | 118 dst += 2; src += 2; n -= 2; |
107 } | 119 } |
108 if (count >= 1) { | 120 if (n >= 1) { |
109 fn(Load1(src)).store1(dst); | 121 fn(Load1(src)).store1(dst); |
110 } | 122 } |
111 break; | 123 break; |
112 } | 124 } |
113 } | 125 } |
114 | 126 |
115 // As above, but with dst4' = fn(dst4, src4). | 127 // As above, but with dst4' = fn(dst4, src4). |
116 template <typename Fn> | 128 template <typename Fn> |
117 static void MapDstSrc(int count, SkPMColor* dst, const SkPMColor* src, Fn fn
) { | 129 static void MapDstSrc(int n, SkPMColor* dst, const SkPMColor* src, const Fn&
fn) { |
118 while (count > 0) { | 130 while (n > 0) { |
119 if (count >= 8) { | 131 if (n >= 8) { |
120 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)), | 132 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0)), |
121 dst4 = fn(Load4(dst+4), Load4(src+4)); | 133 dst4 = fn(Load4(dst+4), Load4(src+4)); |
122 dst0.store4(dst+0); | 134 dst0.store4(dst+0); |
123 dst4.store4(dst+4); | 135 dst4.store4(dst+4); |
124 dst += 8; src += 8; count -= 8; | 136 dst += 8; src += 8; n -= 8; |
125 continue; // Keep our stride at 8 pixels as long as possible. | 137 continue; // Keep our stride at 8 pixels as long as possible. |
126 } | 138 } |
127 SkASSERT(count <= 7); | 139 SkASSERT(n <= 7); |
128 if (count >= 4) { | 140 if (n >= 4) { |
129 fn(Load4(dst), Load4(src)).store4(dst); | 141 fn(Load4(dst), Load4(src)).store4(dst); |
130 dst += 4; src += 4; count -= 4; | 142 dst += 4; src += 4; n -= 4; |
131 } | 143 } |
132 if (count >= 2) { | 144 if (n >= 2) { |
133 fn(Load2(dst), Load2(src)).store2(dst); | 145 fn(Load2(dst), Load2(src)).store2(dst); |
134 dst += 2; src += 2; count -= 2; | 146 dst += 2; src += 2; n -= 2; |
135 } | 147 } |
136 if (count >= 1) { | 148 if (n >= 1) { |
137 fn(Load1(dst), Load1(src)).store1(dst); | 149 fn(Load1(dst), Load1(src)).store1(dst); |
138 } | 150 } |
139 break; | 151 break; |
140 } | 152 } |
141 } | 153 } |
142 | 154 |
143 // As above, but with dst4' = fn(dst4, src4, alpha4). | 155 // As above, but with dst4' = fn(dst4, src4, alpha4). |
144 template <typename Fn> | 156 template <typename Fn> |
145 static void MapDstSrcAlpha( | 157 static void MapDstSrcAlpha(int n, SkPMColor* dst, const SkPMColor* src, cons
t SkAlpha* a, |
146 int count, SkPMColor* dst, const SkPMColor* src, const SkAlpha* a, F
n fn) { | 158 const Fn& fn) { |
147 while (count > 0) { | 159 while (n > 0) { |
148 if (count >= 8) { | 160 if (n >= 8) { |
149 Sk4px alpha0 = Load4Alphas(a+0), | 161 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), Load4Alphas(a+0)), |
150 alpha4 = Load4Alphas(a+4); | 162 dst4 = fn(Load4(dst+4), Load4(src+4), Load4Alphas(a+4)); |
151 Sk4px dst0 = fn(Load4(dst+0), Load4(src+0), alpha0), | |
152 dst4 = fn(Load4(dst+4), Load4(src+4), alpha4); | |
153 dst0.store4(dst+0); | 163 dst0.store4(dst+0); |
154 dst4.store4(dst+4); | 164 dst4.store4(dst+4); |
155 dst += 8; src += 8; a += 8; count -= 8; | 165 dst += 8; src += 8; a += 8; n -= 8; |
156 continue; // Keep our stride at 8 pixels as long as possible. | 166 continue; // Keep our stride at 8 pixels as long as possible. |
157 } | 167 } |
158 SkASSERT(count <= 7); | 168 SkASSERT(n <= 7); |
159 if (count >= 4) { | 169 if (n >= 4) { |
160 Sk4px alpha = Load4Alphas(a); | 170 fn(Load4(dst), Load4(src), Load4Alphas(a)).store4(dst); |
161 fn(Load4(dst), Load4(src), alpha).store4(dst); | 171 dst += 4; src += 4; a += 4; n -= 4; |
162 dst += 4; src += 4; a += 4; count -= 4; | |
163 } | 172 } |
164 if (count >= 2) { | 173 if (n >= 2) { |
165 Sk4px alpha = Load2Alphas(a); | 174 fn(Load2(dst), Load2(src), Load2Alphas(a)).store2(dst); |
166 fn(Load2(dst), Load2(src), alpha).store2(dst); | 175 dst += 2; src += 2; a += 2; n -= 2; |
167 dst += 2; src += 2; a += 2; count -= 2; | |
168 } | 176 } |
169 if (count >= 1) { | 177 if (n >= 1) { |
170 Sk4px alpha(*a); | 178 fn(Load1(dst), Load1(src), DupAlpha(*a)).store1(dst); |
171 fn(Load1(dst), Load1(src), alpha).store1(dst); | |
172 } | 179 } |
173 break; | 180 break; |
174 } | 181 } |
175 } | 182 } |
176 | 183 |
177 private: | 184 private: |
178 typedef Sk16b INHERITED; | 185 typedef Sk16b INHERITED; |
179 }; | 186 }; |
180 | 187 |
181 } // namespace | 188 } // namespace |
182 | 189 |
183 #ifdef SKNX_NO_SIMD | 190 #ifdef SKNX_NO_SIMD |
184 #include "../opts/Sk4px_none.h" | 191 #include "../opts/Sk4px_none.h" |
185 #else | 192 #else |
186 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 | 193 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
187 #include "../opts/Sk4px_SSE2.h" | 194 #include "../opts/Sk4px_SSE2.h" |
188 #elif defined(SK_ARM_HAS_NEON) | 195 #elif defined(SK_ARM_HAS_NEON) |
189 #include "../opts/Sk4px_NEON.h" | 196 #include "../opts/Sk4px_NEON.h" |
190 #else | 197 #else |
191 #include "../opts/Sk4px_none.h" | 198 #include "../opts/Sk4px_none.h" |
192 #endif | 199 #endif |
193 #endif | 200 #endif |
194 | 201 |
195 #endif//Sk4px_DEFINED | 202 #endif//Sk4px_DEFINED |
OLD | NEW |