| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Sz
eged | |
| 3 * | |
| 4 * Redistribution and use in source and binary forms, with or without | |
| 5 * modification, are permitted provided that the following conditions | |
| 6 * are met: | |
| 7 * 1. Redistributions of source code must retain the above copyright | |
| 8 * notice, this list of conditions and the following disclaimer. | |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | |
| 10 * notice, this list of conditions and the following disclaimer in the | |
| 11 * documentation and/or other materials provided with the distribution. | |
| 12 * | |
| 13 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY | |
| 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR | |
| 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 24 */ | |
| 25 | |
| 26 #ifndef WebGLImageConversionNEON_h | |
| 27 #define WebGLImageConversionNEON_h | |
| 28 | |
| 29 #if HAVE(ARM_NEON_INTRINSICS) | |
| 30 | |
| 31 #include <arm_neon.h> | |
| 32 | |
| 33 namespace blink { | |
| 34 | |
| 35 namespace SIMD { | |
| 36 | |
| 37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | |
| 38 { | |
| 39 unsigned componentsPerRow = pixelsPerRow * 4; | |
| 40 unsigned tailComponents = componentsPerRow % 16; | |
| 41 unsigned componentsSize = componentsPerRow - tailComponents; | |
| 42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source); | |
| 43 | |
| 44 for (unsigned i = 0; i < componentsSize; i += 16) { | |
| 45 uint8x16x2_t components = vld2q_u8(src + i * 2); | |
| 46 vst1q_u8(destination + i, components.val[1]); | |
| 47 } | |
| 48 | |
| 49 source += componentsSize; | |
| 50 destination += componentsSize; | |
| 51 pixelsPerRow = tailComponents / 4; | |
| 52 } | |
| 53 | |
| 54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) | |
| 55 { | |
| 56 unsigned componentsPerRow = pixelsPerRow * 3; | |
| 57 unsigned tailComponents = componentsPerRow % 24; | |
| 58 unsigned componentsSize = componentsPerRow - tailComponents; | |
| 59 | |
| 60 uint8x8_t componentA = vdup_n_u8(0xFF); | |
| 61 for (unsigned i = 0; i < componentsSize; i += 24) { | |
| 62 uint16x8x3_t RGB16 = vld3q_u16(source + i); | |
| 63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); | |
| 64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); | |
| 65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); | |
| 66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | |
| 67 vst4_u8(destination, RGBA8); | |
| 68 destination += 32; | |
| 69 } | |
| 70 | |
| 71 source += componentsSize; | |
| 72 pixelsPerRow = tailComponents / 3; | |
| 73 } | |
| 74 | |
| 75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | |
| 76 { | |
| 77 unsigned componentsPerRow = pixelsPerRow * 4; | |
| 78 unsigned tailComponents = componentsPerRow % 32; | |
| 79 unsigned componentsSize = componentsPerRow - tailComponents; | |
| 80 | |
| 81 for (unsigned i = 0; i < componentsSize; i += 32) { | |
| 82 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | |
| 83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | |
| 84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | |
| 85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | |
| 86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | |
| 87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | |
| 88 vst4_u8(destination + i, RGBA8); | |
| 89 } | |
| 90 | |
| 91 source += componentsSize; | |
| 92 destination += componentsSize; | |
| 93 pixelsPerRow = tailComponents / 4; | |
| 94 } | |
| 95 | |
| 96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | |
| 97 { | |
| 98 unsigned componentsPerRow = pixelsPerRow * 4; | |
| 99 unsigned tailComponents = componentsPerRow % 32; | |
| 100 unsigned componentsSize = componentsPerRow - tailComponents; | |
| 101 | |
| 102 for (unsigned i = 0; i < componentsSize; i += 32) { | |
| 103 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | |
| 104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | |
| 105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | |
| 106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | |
| 107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | |
| 108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | |
| 109 vst4_u8(destination + i, RGBA8); | |
| 110 } | |
| 111 | |
| 112 source += componentsSize; | |
| 113 destination += componentsSize; | |
| 114 pixelsPerRow = tailComponents / 4; | |
| 115 } | |
| 116 | |
| 117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | |
| 118 { | |
| 119 unsigned tailPixels = pixelsPerRow % 8; | |
| 120 unsigned pixelSize = pixelsPerRow - tailPixels; | |
| 121 | |
| 122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); | |
| 123 for (unsigned i = 0; i < pixelSize; i += 8) { | |
| 124 uint16x8_t eightPixels = vld1q_u16(source + i); | |
| 125 | |
| 126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); | |
| 127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8),
immediate0x0f)); | |
| 128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4),
immediate0x0f)); | |
| 129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f))
; | |
| 130 | |
| 131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); | |
| 132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); | |
| 133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); | |
| 134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); | |
| 135 | |
| 136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | |
| 137 vst4_u8(destination, destComponents); | |
| 138 destination += 32; | |
| 139 } | |
| 140 | |
| 141 source += pixelSize; | |
| 142 pixelsPerRow = tailPixels; | |
| 143 } | |
| 144 | |
| 145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | |
| 146 { | |
| 147 unsigned componentsPerRow = pixelsPerRow * 4; | |
| 148 unsigned tailComponents = componentsPerRow % 32; | |
| 149 unsigned componentsSize = componentsPerRow - tailComponents; | |
| 150 | |
| 151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | |
| 152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); | |
| 153 for (unsigned i = 0; i < componentsSize; i += 32) { | |
| 154 uint8x8x4_t RGBA8 = vld4_u8(source + i); | |
| 155 | |
| 156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); | |
| 157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4
); | |
| 158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); | |
| 159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4
); | |
| 160 | |
| 161 uint8x8x2_t RGBA4; | |
| 162 RGBA4.val[0] = vorr_u8(componentB, componentA); | |
| 163 RGBA4.val[1] = vorr_u8(componentR, componentG); | |
| 164 vst2_u8(dst, RGBA4); | |
| 165 dst += 16; | |
| 166 } | |
| 167 | |
| 168 source += componentsSize; | |
| 169 destination += componentsSize / 4; | |
| 170 pixelsPerRow = tailComponents / 4; | |
| 171 } | |
| 172 | |
| 173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | |
| 174 { | |
| 175 unsigned tailPixels = pixelsPerRow % 8; | |
| 176 unsigned pixelSize = pixelsPerRow - tailPixels; | |
| 177 | |
| 178 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | |
| 179 uint8x8_t immediate0xff = vdup_n_u8(0xFF); | |
| 180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | |
| 181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1); | |
| 182 | |
| 183 for (unsigned i = 0; i < pixelSize; i += 8) { | |
| 184 uint16x8_t eightPixels = vld1q_u16(source + i); | |
| 185 | |
| 186 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); | |
| 187 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6),
immediate0x1f)); | |
| 188 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1),
immediate0x1f)); | |
| 189 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1)); | |
| 190 | |
| 191 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immed
iate0x7)); | |
| 192 componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immed
iate0x7)); | |
| 193 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immed
iate0x7)); | |
| 194 componentA = vmul_u8(componentA, immediate0xff); | |
| 195 | |
| 196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | |
| 197 vst4_u8(destination, destComponents); | |
| 198 destination += 32; | |
| 199 } | |
| 200 | |
| 201 source += pixelSize; | |
| 202 pixelsPerRow = tailPixels; | |
| 203 } | |
| 204 | |
| 205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | |
| 206 { | |
| 207 unsigned componentsPerRow = pixelsPerRow * 4; | |
| 208 unsigned tailComponents = componentsPerRow % 32; | |
| 209 unsigned componentsSize = componentsPerRow - tailComponents; | |
| 210 | |
| 211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | |
| 212 | |
| 213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | |
| 214 uint8x8_t immediate0x18 = vdup_n_u8(0x18); | |
| 215 for (unsigned i = 0; i < componentsSize; i += 32) { | |
| 216 uint8x8x4_t RGBA8 = vld4_u8(source + i); | |
| 217 | |
| 218 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); | |
| 219 uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5); | |
| 220 | |
| 221 uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18
), 3); | |
| 222 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2
); | |
| 223 uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7); | |
| 224 | |
| 225 uint8x8x2_t RGBA5551; | |
| 226 RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), component
A); | |
| 227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); | |
| 228 vst2_u8(dst, RGBA5551); | |
| 229 dst += 16; | |
| 230 } | |
| 231 | |
| 232 source += componentsSize; | |
| 233 destination += componentsSize / 4; | |
| 234 pixelsPerRow = tailComponents / 4; | |
| 235 } | |
| 236 | |
| 237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*
& destination, unsigned& pixelsPerRow) | |
| 238 { | |
| 239 unsigned tailPixels = pixelsPerRow % 8; | |
| 240 unsigned pixelSize = pixelsPerRow - tailPixels; | |
| 241 | |
| 242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); | |
| 243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | |
| 244 uint8x8_t immediate0x3 = vdup_n_u8(0x3); | |
| 245 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | |
| 246 | |
| 247 uint8x8_t componentA = vdup_n_u8(0xFF); | |
| 248 | |
| 249 for (unsigned i = 0; i < pixelSize; i += 8) { | |
| 250 uint16x8_t eightPixels = vld1q_u16(source + i); | |
| 251 | |
| 252 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11)); | |
| 253 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5),
immediate0x3f)); | |
| 254 uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f))
; | |
| 255 | |
| 256 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immed
iate0x7)); | |
| 257 componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immed
iate0x3)); | |
| 258 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immed
iate0x7)); | |
| 259 | |
| 260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | |
| 261 vst4_u8(destination, destComponents); | |
| 262 destination += 32; | |
| 263 } | |
| 264 | |
| 265 source += pixelSize; | |
| 266 pixelsPerRow = tailPixels; | |
| 267 } | |
| 268 | |
| 269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, u
int16_t*& destination, unsigned& pixelsPerRow) | |
| 270 { | |
| 271 unsigned componentsPerRow = pixelsPerRow * 4; | |
| 272 unsigned tailComponents = componentsPerRow % 32; | |
| 273 unsigned componentsSize = componentsPerRow - tailComponents; | |
| 274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | |
| 275 | |
| 276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | |
| 277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C); | |
| 278 for (unsigned i = 0; i < componentsSize; i += 32) { | |
| 279 uint8x8x4_t RGBA8 = vld4_u8(source + i); | |
| 280 | |
| 281 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8); | |
| 282 uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5); | |
| 283 uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1
c), 3); | |
| 284 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3
); | |
| 285 | |
| 286 uint8x8x2_t RGB565; | |
| 287 RGB565.val[0] = vorr_u8(componentGRight, componentB); | |
| 288 RGB565.val[1] = vorr_u8(componentR, componentGLeft); | |
| 289 vst2_u8(dst, RGB565); | |
| 290 dst += 16; | |
| 291 } | |
| 292 | |
| 293 source += componentsSize; | |
| 294 destination += componentsSize / 4; | |
| 295 pixelsPerRow = tailComponents / 4; | |
| 296 } | |
| 297 | |
| 298 } // namespace SIMD | |
| 299 | |
| 300 } // namespace blink | |
| 301 | |
| 302 #endif // HAVE(ARM_NEON_INTRINSICS) | |
| 303 | |
| 304 #endif // WebGLImageConversionNEON_h | |
| OLD | NEW |