OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Sz
eged | 2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Sz
eged |
3 * | 3 * |
4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
6 * are met: | 6 * are met: |
7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
(...skipping 16 matching lines...) Expand all Loading... |
27 #define WebGLImageConversionNEON_h | 27 #define WebGLImageConversionNEON_h |
28 | 28 |
29 #if HAVE(ARM_NEON_INTRINSICS) | 29 #if HAVE(ARM_NEON_INTRINSICS) |
30 | 30 |
31 #include <arm_neon.h> | 31 #include <arm_neon.h> |
32 | 32 |
33 namespace blink { | 33 namespace blink { |
34 | 34 |
35 namespace SIMD { | 35 namespace SIMD { |
36 | 36 |
37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | 37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8NEON(const uint16_t*& source
, uint8_t*& destination, unsigned& pixelsPerRow) |
38 { | 38 { |
39 unsigned componentsPerRow = pixelsPerRow * 4; | 39 unsigned componentsPerRow = pixelsPerRow * 4; |
40 unsigned tailComponents = componentsPerRow % 16; | 40 unsigned tailComponents = componentsPerRow % 16; |
41 unsigned componentsSize = componentsPerRow - tailComponents; | 41 unsigned componentsSize = componentsPerRow - tailComponents; |
42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source); | 42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source); |
43 | 43 |
44 for (unsigned i = 0; i < componentsSize; i += 16) { | 44 for (unsigned i = 0; i < componentsSize; i += 16) { |
45 uint8x16x2_t components = vld2q_u8(src + i * 2); | 45 uint8x16x2_t components = vld2q_u8(src + i * 2); |
46 vst1q_u8(destination + i, components.val[1]); | 46 vst1q_u8(destination + i, components.val[1]); |
47 } | 47 } |
48 | 48 |
49 source += componentsSize; | 49 source += componentsSize; |
50 destination += componentsSize; | 50 destination += componentsSize; |
51 pixelsPerRow = tailComponents / 4; | 51 pixelsPerRow = tailComponents / 4; |
52 } | 52 } |
53 | 53 |
54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) | 54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8NEON(const uint16_t*& source,
uint8_t*& destination, unsigned& pixelsPerRow) |
55 { | 55 { |
56 unsigned componentsPerRow = pixelsPerRow * 3; | 56 unsigned componentsPerRow = pixelsPerRow * 3; |
57 unsigned tailComponents = componentsPerRow % 24; | 57 unsigned tailComponents = componentsPerRow % 24; |
58 unsigned componentsSize = componentsPerRow - tailComponents; | 58 unsigned componentsSize = componentsPerRow - tailComponents; |
59 | 59 |
60 uint8x8_t componentA = vdup_n_u8(0xFF); | 60 uint8x8_t componentA = vdup_n_u8(0xFF); |
61 for (unsigned i = 0; i < componentsSize; i += 24) { | 61 for (unsigned i = 0; i < componentsSize; i += 24) { |
62 uint16x8x3_t RGB16 = vld3q_u16(source + i); | 62 uint16x8x3_t RGB16 = vld3q_u16(source + i); |
63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); | 63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); |
64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); | 64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); |
65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); | 65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); |
66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | 66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; |
67 vst4_u8(destination, RGBA8); | 67 vst4_u8(destination, RGBA8); |
68 destination += 32; | 68 destination += 32; |
69 } | 69 } |
70 | 70 |
71 source += componentsSize; | 71 source += componentsSize; |
72 pixelsPerRow = tailComponents / 3; | 72 pixelsPerRow = tailComponents / 3; |
73 } | 73 } |
74 | 74 |
75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | 75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8NEON(const uint16_t*& source
, uint8_t*& destination, unsigned& pixelsPerRow) |
76 { | 76 { |
77 unsigned componentsPerRow = pixelsPerRow * 4; | 77 unsigned componentsPerRow = pixelsPerRow * 4; |
78 unsigned tailComponents = componentsPerRow % 32; | 78 unsigned tailComponents = componentsPerRow % 32; |
79 unsigned componentsSize = componentsPerRow - tailComponents; | 79 unsigned componentsSize = componentsPerRow - tailComponents; |
80 | 80 |
81 for (unsigned i = 0; i < componentsSize; i += 32) { | 81 for (unsigned i = 0; i < componentsSize; i += 32) { |
82 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | 82 uint16x8x4_t ARGB16 = vld4q_u16(source + i); |
83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | 83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); |
84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | 84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); |
85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | 85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); |
86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | 86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); |
87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | 87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; |
88 vst4_u8(destination + i, RGBA8); | 88 vst4_u8(destination + i, RGBA8); |
89 } | 89 } |
90 | 90 |
91 source += componentsSize; | 91 source += componentsSize; |
92 destination += componentsSize; | 92 destination += componentsSize; |
93 pixelsPerRow = tailComponents / 4; | 93 pixelsPerRow = tailComponents / 4; |
94 } | 94 } |
95 | 95 |
96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | 96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8NEON(const uint16_t*& source
, uint8_t*& destination, unsigned& pixelsPerRow) |
97 { | 97 { |
98 unsigned componentsPerRow = pixelsPerRow * 4; | 98 unsigned componentsPerRow = pixelsPerRow * 4; |
99 unsigned tailComponents = componentsPerRow % 32; | 99 unsigned tailComponents = componentsPerRow % 32; |
100 unsigned componentsSize = componentsPerRow - tailComponents; | 100 unsigned componentsSize = componentsPerRow - tailComponents; |
101 | 101 |
102 for (unsigned i = 0; i < componentsSize; i += 32) { | 102 for (unsigned i = 0; i < componentsSize; i += 32) { |
103 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | 103 uint16x8x4_t ARGB16 = vld4q_u16(source + i); |
104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | 104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); |
105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | 105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); |
106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | 106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); |
107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | 107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); |
108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | 108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; |
109 vst4_u8(destination + i, RGBA8); | 109 vst4_u8(destination + i, RGBA8); |
110 } | 110 } |
111 | 111 |
112 source += componentsSize; | 112 source += componentsSize; |
113 destination += componentsSize; | 113 destination += componentsSize; |
114 pixelsPerRow = tailComponents / 4; | 114 pixelsPerRow = tailComponents / 4; |
115 } | 115 } |
116 | 116 |
117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | 117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8NEON(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) |
118 { | 118 { |
119 unsigned tailPixels = pixelsPerRow % 8; | 119 unsigned tailPixels = pixelsPerRow % 8; |
120 unsigned pixelSize = pixelsPerRow - tailPixels; | 120 unsigned pixelSize = pixelsPerRow - tailPixels; |
121 | 121 |
122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); | 122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); |
123 for (unsigned i = 0; i < pixelSize; i += 8) { | 123 for (unsigned i = 0; i < pixelSize; i += 8) { |
124 uint16x8_t eightPixels = vld1q_u16(source + i); | 124 uint16x8_t eightPixels = vld1q_u16(source + i); |
125 | 125 |
126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); | 126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); |
127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8),
immediate0x0f)); | 127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8),
immediate0x0f)); |
128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4),
immediate0x0f)); | 128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4),
immediate0x0f)); |
129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f))
; | 129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f))
; |
130 | 130 |
131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); | 131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); |
132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); | 132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); |
133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); | 133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); |
134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); | 134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); |
135 | 135 |
136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | 136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; |
137 vst4_u8(destination, destComponents); | 137 vst4_u8(destination, destComponents); |
138 destination += 32; | 138 destination += 32; |
139 } | 139 } |
140 | 140 |
141 source += pixelSize; | 141 source += pixelSize; |
142 pixelsPerRow = tailPixels; | 142 pixelsPerRow = tailPixels; |
143 } | 143 } |
144 | 144 |
145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | 145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444NEON(const uint8_t*& sour
ce, uint16_t*& destination, unsigned& pixelsPerRow) |
146 { | 146 { |
147 unsigned componentsPerRow = pixelsPerRow * 4; | 147 unsigned componentsPerRow = pixelsPerRow * 4; |
148 unsigned tailComponents = componentsPerRow % 32; | 148 unsigned tailComponents = componentsPerRow % 32; |
149 unsigned componentsSize = componentsPerRow - tailComponents; | 149 unsigned componentsSize = componentsPerRow - tailComponents; |
150 | 150 |
151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | 151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); |
152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); | 152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); |
153 for (unsigned i = 0; i < componentsSize; i += 32) { | 153 for (unsigned i = 0; i < componentsSize; i += 32) { |
154 uint8x8x4_t RGBA8 = vld4_u8(source + i); | 154 uint8x8x4_t RGBA8 = vld4_u8(source + i); |
155 | 155 |
156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); | 156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); |
157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4
); | 157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4
); |
158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); | 158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); |
159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4
); | 159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4
); |
160 | 160 |
161 uint8x8x2_t RGBA4; | 161 uint8x8x2_t RGBA4; |
162 RGBA4.val[0] = vorr_u8(componentB, componentA); | 162 RGBA4.val[0] = vorr_u8(componentB, componentA); |
163 RGBA4.val[1] = vorr_u8(componentR, componentG); | 163 RGBA4.val[1] = vorr_u8(componentR, componentG); |
164 vst2_u8(dst, RGBA4); | 164 vst2_u8(dst, RGBA4); |
165 dst += 16; | 165 dst += 16; |
166 } | 166 } |
167 | 167 |
168 source += componentsSize; | 168 source += componentsSize; |
169 destination += componentsSize / 4; | 169 destination += componentsSize / 4; |
170 pixelsPerRow = tailComponents / 4; | 170 pixelsPerRow = tailComponents / 4; |
171 } | 171 } |
172 | 172 |
173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | 173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8NEON(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) |
174 { | 174 { |
175 unsigned tailPixels = pixelsPerRow % 8; | 175 unsigned tailPixels = pixelsPerRow % 8; |
176 unsigned pixelSize = pixelsPerRow - tailPixels; | 176 unsigned pixelSize = pixelsPerRow - tailPixels; |
177 | 177 |
178 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | 178 uint8x8_t immediate0x7 = vdup_n_u8(0x7); |
179 uint8x8_t immediate0xff = vdup_n_u8(0xFF); | 179 uint8x8_t immediate0xff = vdup_n_u8(0xFF); |
180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | 180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); |
181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1); | 181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1); |
182 | 182 |
183 for (unsigned i = 0; i < pixelSize; i += 8) { | 183 for (unsigned i = 0; i < pixelSize; i += 8) { |
(...skipping 11 matching lines...) Expand all Loading... |
195 | 195 |
196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | 196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; |
197 vst4_u8(destination, destComponents); | 197 vst4_u8(destination, destComponents); |
198 destination += 32; | 198 destination += 32; |
199 } | 199 } |
200 | 200 |
201 source += pixelSize; | 201 source += pixelSize; |
202 pixelsPerRow = tailPixels; | 202 pixelsPerRow = tailPixels; |
203 } | 203 } |
204 | 204 |
205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | 205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551NEON(const uint8_t*& sour
ce, uint16_t*& destination, unsigned& pixelsPerRow) |
206 { | 206 { |
207 unsigned componentsPerRow = pixelsPerRow * 4; | 207 unsigned componentsPerRow = pixelsPerRow * 4; |
208 unsigned tailComponents = componentsPerRow % 32; | 208 unsigned tailComponents = componentsPerRow % 32; |
209 unsigned componentsSize = componentsPerRow - tailComponents; | 209 unsigned componentsSize = componentsPerRow - tailComponents; |
210 | 210 |
211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | 211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); |
212 | 212 |
213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | 213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); |
214 uint8x8_t immediate0x18 = vdup_n_u8(0x18); | 214 uint8x8_t immediate0x18 = vdup_n_u8(0x18); |
215 for (unsigned i = 0; i < componentsSize; i += 32) { | 215 for (unsigned i = 0; i < componentsSize; i += 32) { |
(...skipping 11 matching lines...) Expand all Loading... |
227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); | 227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); |
228 vst2_u8(dst, RGBA5551); | 228 vst2_u8(dst, RGBA5551); |
229 dst += 16; | 229 dst += 16; |
230 } | 230 } |
231 | 231 |
232 source += componentsSize; | 232 source += componentsSize; |
233 destination += componentsSize / 4; | 233 destination += componentsSize / 4; |
234 pixelsPerRow = tailComponents / 4; | 234 pixelsPerRow = tailComponents / 4; |
235 } | 235 } |
236 | 236 |
237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*
& destination, unsigned& pixelsPerRow) | 237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8NEON(const uint16_t*& source, uint
8_t*& destination, unsigned& pixelsPerRow) |
238 { | 238 { |
239 unsigned tailPixels = pixelsPerRow % 8; | 239 unsigned tailPixels = pixelsPerRow % 8; |
240 unsigned pixelSize = pixelsPerRow - tailPixels; | 240 unsigned pixelSize = pixelsPerRow - tailPixels; |
241 | 241 |
242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); | 242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); |
243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | 243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); |
244 uint8x8_t immediate0x3 = vdup_n_u8(0x3); | 244 uint8x8_t immediate0x3 = vdup_n_u8(0x3); |
245 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | 245 uint8x8_t immediate0x7 = vdup_n_u8(0x7); |
246 | 246 |
247 uint8x8_t componentA = vdup_n_u8(0xFF); | 247 uint8x8_t componentA = vdup_n_u8(0xFF); |
(...skipping 11 matching lines...) Expand all Loading... |
259 | 259 |
260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | 260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; |
261 vst4_u8(destination, destComponents); | 261 vst4_u8(destination, destComponents); |
262 destination += 32; | 262 destination += 32; |
263 } | 263 } |
264 | 264 |
265 source += pixelSize; | 265 source += pixelSize; |
266 pixelsPerRow = tailPixels; | 266 pixelsPerRow = tailPixels; |
267 } | 267 } |
268 | 268 |
269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, u
int16_t*& destination, unsigned& pixelsPerRow) | 269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565NEON(const uint8_t*& sourc
e, uint16_t*& destination, unsigned& pixelsPerRow) |
270 { | 270 { |
271 unsigned componentsPerRow = pixelsPerRow * 4; | 271 unsigned componentsPerRow = pixelsPerRow * 4; |
272 unsigned tailComponents = componentsPerRow % 32; | 272 unsigned tailComponents = componentsPerRow % 32; |
273 unsigned componentsSize = componentsPerRow - tailComponents; | 273 unsigned componentsSize = componentsPerRow - tailComponents; |
274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | 274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); |
275 | 275 |
276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | 276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); |
277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C); | 277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C); |
278 for (unsigned i = 0; i < componentsSize; i += 32) { | 278 for (unsigned i = 0; i < componentsSize; i += 32) { |
279 uint8x8x4_t RGBA8 = vld4_u8(source + i); | 279 uint8x8x4_t RGBA8 = vld4_u8(source + i); |
(...skipping 15 matching lines...) Expand all Loading... |
295 pixelsPerRow = tailComponents / 4; | 295 pixelsPerRow = tailComponents / 4; |
296 } | 296 } |
297 | 297 |
298 } // namespace SIMD | 298 } // namespace SIMD |
299 | 299 |
300 } // namespace blink | 300 } // namespace blink |
301 | 301 |
302 #endif // HAVE(ARM_NEON_INTRINSICS) | 302 #endif // HAVE(ARM_NEON_INTRINSICS) |
303 | 303 |
304 #endif // WebGLImageConversionNEON_h | 304 #endif // WebGLImageConversionNEON_h |
OLD | NEW |