OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Sz
eged | |
3 * | |
4 * Redistribution and use in source and binary forms, with or without | |
5 * modification, are permitted provided that the following conditions | |
6 * are met: | |
7 * 1. Redistributions of source code must retain the above copyright | |
8 * notice, this list of conditions and the following disclaimer. | |
9 * 2. Redistributions in binary form must reproduce the above copyright | |
10 * notice, this list of conditions and the following disclaimer in the | |
11 * documentation and/or other materials provided with the distribution. | |
12 * | |
13 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY | |
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR | |
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 */ | |
25 | |
26 #ifndef WebGLImageConversionNEON_h | |
27 #define WebGLImageConversionNEON_h | |
28 | |
29 #if HAVE(ARM_NEON_INTRINSICS) | |
30 | |
31 #include <arm_neon.h> | |
32 | |
33 namespace blink { | |
34 | |
35 namespace SIMD { | |
36 | |
37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | |
38 { | |
39 unsigned componentsPerRow = pixelsPerRow * 4; | |
40 unsigned tailComponents = componentsPerRow % 16; | |
41 unsigned componentsSize = componentsPerRow - tailComponents; | |
42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source); | |
43 | |
44 for (unsigned i = 0; i < componentsSize; i += 16) { | |
45 uint8x16x2_t components = vld2q_u8(src + i * 2); | |
46 vst1q_u8(destination + i, components.val[1]); | |
47 } | |
48 | |
49 source += componentsSize; | |
50 destination += componentsSize; | |
51 pixelsPerRow = tailComponents / 4; | |
52 } | |
53 | |
54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) | |
55 { | |
56 unsigned componentsPerRow = pixelsPerRow * 3; | |
57 unsigned tailComponents = componentsPerRow % 24; | |
58 unsigned componentsSize = componentsPerRow - tailComponents; | |
59 | |
60 uint8x8_t componentA = vdup_n_u8(0xFF); | |
61 for (unsigned i = 0; i < componentsSize; i += 24) { | |
62 uint16x8x3_t RGB16 = vld3q_u16(source + i); | |
63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); | |
64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); | |
65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); | |
66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | |
67 vst4_u8(destination, RGBA8); | |
68 destination += 32; | |
69 } | |
70 | |
71 source += componentsSize; | |
72 pixelsPerRow = tailComponents / 3; | |
73 } | |
74 | |
75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | |
76 { | |
77 unsigned componentsPerRow = pixelsPerRow * 4; | |
78 unsigned tailComponents = componentsPerRow % 32; | |
79 unsigned componentsSize = componentsPerRow - tailComponents; | |
80 | |
81 for (unsigned i = 0; i < componentsSize; i += 32) { | |
82 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | |
83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | |
84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | |
85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | |
86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | |
87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | |
88 vst4_u8(destination + i, RGBA8); | |
89 } | |
90 | |
91 source += componentsSize; | |
92 destination += componentsSize; | |
93 pixelsPerRow = tailComponents / 4; | |
94 } | |
95 | |
96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | |
97 { | |
98 unsigned componentsPerRow = pixelsPerRow * 4; | |
99 unsigned tailComponents = componentsPerRow % 32; | |
100 unsigned componentsSize = componentsPerRow - tailComponents; | |
101 | |
102 for (unsigned i = 0; i < componentsSize; i += 32) { | |
103 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | |
104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | |
105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | |
106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | |
107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | |
108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | |
109 vst4_u8(destination + i, RGBA8); | |
110 } | |
111 | |
112 source += componentsSize; | |
113 destination += componentsSize; | |
114 pixelsPerRow = tailComponents / 4; | |
115 } | |
116 | |
117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | |
118 { | |
119 unsigned tailPixels = pixelsPerRow % 8; | |
120 unsigned pixelSize = pixelsPerRow - tailPixels; | |
121 | |
122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); | |
123 for (unsigned i = 0; i < pixelSize; i += 8) { | |
124 uint16x8_t eightPixels = vld1q_u16(source + i); | |
125 | |
126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); | |
127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8),
immediate0x0f)); | |
128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4),
immediate0x0f)); | |
129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f))
; | |
130 | |
131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); | |
132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); | |
133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); | |
134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); | |
135 | |
136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | |
137 vst4_u8(destination, destComponents); | |
138 destination += 32; | |
139 } | |
140 | |
141 source += pixelSize; | |
142 pixelsPerRow = tailPixels; | |
143 } | |
144 | |
145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | |
146 { | |
147 unsigned componentsPerRow = pixelsPerRow * 4; | |
148 unsigned tailComponents = componentsPerRow % 32; | |
149 unsigned componentsSize = componentsPerRow - tailComponents; | |
150 | |
151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | |
152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); | |
153 for (unsigned i = 0; i < componentsSize; i += 32) { | |
154 uint8x8x4_t RGBA8 = vld4_u8(source + i); | |
155 | |
156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); | |
157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4
); | |
158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); | |
159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4
); | |
160 | |
161 uint8x8x2_t RGBA4; | |
162 RGBA4.val[0] = vorr_u8(componentB, componentA); | |
163 RGBA4.val[1] = vorr_u8(componentR, componentG); | |
164 vst2_u8(dst, RGBA4); | |
165 dst += 16; | |
166 } | |
167 | |
168 source += componentsSize; | |
169 destination += componentsSize / 4; | |
170 pixelsPerRow = tailComponents / 4; | |
171 } | |
172 | |
173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | |
174 { | |
175 unsigned tailPixels = pixelsPerRow % 8; | |
176 unsigned pixelSize = pixelsPerRow - tailPixels; | |
177 | |
178 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | |
179 uint8x8_t immediate0xff = vdup_n_u8(0xFF); | |
180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | |
181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1); | |
182 | |
183 for (unsigned i = 0; i < pixelSize; i += 8) { | |
184 uint16x8_t eightPixels = vld1q_u16(source + i); | |
185 | |
186 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); | |
187 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6),
immediate0x1f)); | |
188 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1),
immediate0x1f)); | |
189 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1)); | |
190 | |
191 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immed
iate0x7)); | |
192 componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immed
iate0x7)); | |
193 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immed
iate0x7)); | |
194 componentA = vmul_u8(componentA, immediate0xff); | |
195 | |
196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | |
197 vst4_u8(destination, destComponents); | |
198 destination += 32; | |
199 } | |
200 | |
201 source += pixelSize; | |
202 pixelsPerRow = tailPixels; | |
203 } | |
204 | |
205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | |
206 { | |
207 unsigned componentsPerRow = pixelsPerRow * 4; | |
208 unsigned tailComponents = componentsPerRow % 32; | |
209 unsigned componentsSize = componentsPerRow - tailComponents; | |
210 | |
211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | |
212 | |
213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | |
214 uint8x8_t immediate0x18 = vdup_n_u8(0x18); | |
215 for (unsigned i = 0; i < componentsSize; i += 32) { | |
216 uint8x8x4_t RGBA8 = vld4_u8(source + i); | |
217 | |
218 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); | |
219 uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5); | |
220 | |
221 uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18
), 3); | |
222 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2
); | |
223 uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7); | |
224 | |
225 uint8x8x2_t RGBA5551; | |
226 RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), component
A); | |
227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); | |
228 vst2_u8(dst, RGBA5551); | |
229 dst += 16; | |
230 } | |
231 | |
232 source += componentsSize; | |
233 destination += componentsSize / 4; | |
234 pixelsPerRow = tailComponents / 4; | |
235 } | |
236 | |
237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*
& destination, unsigned& pixelsPerRow) | |
238 { | |
239 unsigned tailPixels = pixelsPerRow % 8; | |
240 unsigned pixelSize = pixelsPerRow - tailPixels; | |
241 | |
242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); | |
243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | |
244 uint8x8_t immediate0x3 = vdup_n_u8(0x3); | |
245 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | |
246 | |
247 uint8x8_t componentA = vdup_n_u8(0xFF); | |
248 | |
249 for (unsigned i = 0; i < pixelSize; i += 8) { | |
250 uint16x8_t eightPixels = vld1q_u16(source + i); | |
251 | |
252 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); | |
253 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5),
immediate0x3f)); | |
254 uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f))
; | |
255 | |
256 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immed
iate0x7)); | |
257 componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immed
iate0x3)); | |
258 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immed
iate0x7)); | |
259 | |
260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | |
261 vst4_u8(destination, destComponents); | |
262 destination += 32; | |
263 } | |
264 | |
265 source += pixelSize; | |
266 pixelsPerRow = tailPixels; | |
267 } | |
268 | |
269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, u
int16_t*& destination, unsigned& pixelsPerRow) | |
270 { | |
271 unsigned componentsPerRow = pixelsPerRow * 4; | |
272 unsigned tailComponents = componentsPerRow % 32; | |
273 unsigned componentsSize = componentsPerRow - tailComponents; | |
274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | |
275 | |
276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | |
277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C); | |
278 for (unsigned i = 0; i < componentsSize; i += 32) { | |
279 uint8x8x4_t RGBA8 = vld4_u8(source + i); | |
280 | |
281 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); | |
282 uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5); | |
283 uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1
c), 3); | |
284 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3
); | |
285 | |
286 uint8x8x2_t RGB565; | |
287 RGB565.val[0] = vorr_u8(componentGRight, componentB); | |
288 RGB565.val[1] = vorr_u8(componentR, componentGLeft); | |
289 vst2_u8(dst, RGB565); | |
290 dst += 16; | |
291 } | |
292 | |
293 source += componentsSize; | |
294 destination += componentsSize / 4; | |
295 pixelsPerRow = tailComponents / 4; | |
296 } | |
297 | |
298 } // namespace SIMD | |
299 | |
300 } // namespace blink | |
301 | |
302 #endif // HAVE(ARM_NEON_INTRINSICS) | |
303 | |
304 #endif // WebGLImageConversionNEON_h | |
OLD | NEW |