| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Sz
eged | 2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Sz
eged |
| 3 * | 3 * |
| 4 * Redistribution and use in source and binary forms, with or without | 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions | 5 * modification, are permitted provided that the following conditions |
| 6 * are met: | 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright | 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. | 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright | 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the | 10 * notice, this list of conditions and the following disclaimer in the |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 #define WebGLImageConversionNEON_h | 27 #define WebGLImageConversionNEON_h |
| 28 | 28 |
| 29 #if HAVE(ARM_NEON_INTRINSICS) | 29 #if HAVE(ARM_NEON_INTRINSICS) |
| 30 | 30 |
| 31 #include <arm_neon.h> | 31 #include <arm_neon.h> |
| 32 | 32 |
| 33 namespace blink { | 33 namespace blink { |
| 34 | 34 |
| 35 namespace SIMD { | 35 namespace SIMD { |
| 36 | 36 |
| 37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | 37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8NEON(const uint16_t*& source
, uint8_t*& destination, unsigned& pixelsPerRow) |
| 38 { | 38 { |
| 39 unsigned componentsPerRow = pixelsPerRow * 4; | 39 unsigned componentsPerRow = pixelsPerRow * 4; |
| 40 unsigned tailComponents = componentsPerRow % 16; | 40 unsigned tailComponents = componentsPerRow % 16; |
| 41 unsigned componentsSize = componentsPerRow - tailComponents; | 41 unsigned componentsSize = componentsPerRow - tailComponents; |
| 42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source); | 42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source); |
| 43 | 43 |
| 44 for (unsigned i = 0; i < componentsSize; i += 16) { | 44 for (unsigned i = 0; i < componentsSize; i += 16) { |
| 45 uint8x16x2_t components = vld2q_u8(src + i * 2); | 45 uint8x16x2_t components = vld2q_u8(src + i * 2); |
| 46 vst1q_u8(destination + i, components.val[1]); | 46 vst1q_u8(destination + i, components.val[1]); |
| 47 } | 47 } |
| 48 | 48 |
| 49 source += componentsSize; | 49 source += componentsSize; |
| 50 destination += componentsSize; | 50 destination += componentsSize; |
| 51 pixelsPerRow = tailComponents / 4; | 51 pixelsPerRow = tailComponents / 4; |
| 52 } | 52 } |
| 53 | 53 |
| 54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uin
t8_t*& destination, unsigned& pixelsPerRow) | 54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8NEON(const uint16_t*& source,
uint8_t*& destination, unsigned& pixelsPerRow) |
| 55 { | 55 { |
| 56 unsigned componentsPerRow = pixelsPerRow * 3; | 56 unsigned componentsPerRow = pixelsPerRow * 3; |
| 57 unsigned tailComponents = componentsPerRow % 24; | 57 unsigned tailComponents = componentsPerRow % 24; |
| 58 unsigned componentsSize = componentsPerRow - tailComponents; | 58 unsigned componentsSize = componentsPerRow - tailComponents; |
| 59 | 59 |
| 60 uint8x8_t componentA = vdup_n_u8(0xFF); | 60 uint8x8_t componentA = vdup_n_u8(0xFF); |
| 61 for (unsigned i = 0; i < componentsSize; i += 24) { | 61 for (unsigned i = 0; i < componentsSize; i += 24) { |
| 62 uint16x8x3_t RGB16 = vld3q_u16(source + i); | 62 uint16x8x3_t RGB16 = vld3q_u16(source + i); |
| 63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); | 63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8)); |
| 64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); | 64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8)); |
| 65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); | 65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8)); |
| 66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | 66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; |
| 67 vst4_u8(destination, RGBA8); | 67 vst4_u8(destination, RGBA8); |
| 68 destination += 32; | 68 destination += 32; |
| 69 } | 69 } |
| 70 | 70 |
| 71 source += componentsSize; | 71 source += componentsSize; |
| 72 pixelsPerRow = tailComponents / 3; | 72 pixelsPerRow = tailComponents / 3; |
| 73 } | 73 } |
| 74 | 74 |
| 75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | 75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8NEON(const uint16_t*& source
, uint8_t*& destination, unsigned& pixelsPerRow) |
| 76 { | 76 { |
| 77 unsigned componentsPerRow = pixelsPerRow * 4; | 77 unsigned componentsPerRow = pixelsPerRow * 4; |
| 78 unsigned tailComponents = componentsPerRow % 32; | 78 unsigned tailComponents = componentsPerRow % 32; |
| 79 unsigned componentsSize = componentsPerRow - tailComponents; | 79 unsigned componentsSize = componentsPerRow - tailComponents; |
| 80 | 80 |
| 81 for (unsigned i = 0; i < componentsSize; i += 32) { | 81 for (unsigned i = 0; i < componentsSize; i += 32) { |
| 82 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | 82 uint16x8x4_t ARGB16 = vld4q_u16(source + i); |
| 83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | 83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); |
| 84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | 84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); |
| 85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | 85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); |
| 86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | 86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); |
| 87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | 87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; |
| 88 vst4_u8(destination + i, RGBA8); | 88 vst4_u8(destination + i, RGBA8); |
| 89 } | 89 } |
| 90 | 90 |
| 91 source += componentsSize; | 91 source += componentsSize; |
| 92 destination += componentsSize; | 92 destination += componentsSize; |
| 93 pixelsPerRow = tailComponents / 4; | 93 pixelsPerRow = tailComponents / 4; |
| 94 } | 94 } |
| 95 | 95 |
| 96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) | 96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8NEON(const uint16_t*& source
, uint8_t*& destination, unsigned& pixelsPerRow) |
| 97 { | 97 { |
| 98 unsigned componentsPerRow = pixelsPerRow * 4; | 98 unsigned componentsPerRow = pixelsPerRow * 4; |
| 99 unsigned tailComponents = componentsPerRow % 32; | 99 unsigned tailComponents = componentsPerRow % 32; |
| 100 unsigned componentsSize = componentsPerRow - tailComponents; | 100 unsigned componentsSize = componentsPerRow - tailComponents; |
| 101 | 101 |
| 102 for (unsigned i = 0; i < componentsSize; i += 32) { | 102 for (unsigned i = 0; i < componentsSize; i += 32) { |
| 103 uint16x8x4_t ARGB16 = vld4q_u16(source + i); | 103 uint16x8x4_t ARGB16 = vld4q_u16(source + i); |
| 104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); | 104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8)); |
| 105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); | 105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8)); |
| 106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); | 106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8)); |
| 107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); | 107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8)); |
| 108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; | 108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}}; |
| 109 vst4_u8(destination + i, RGBA8); | 109 vst4_u8(destination + i, RGBA8); |
| 110 } | 110 } |
| 111 | 111 |
| 112 source += componentsSize; | 112 source += componentsSize; |
| 113 destination += componentsSize; | 113 destination += componentsSize; |
| 114 pixelsPerRow = tailComponents / 4; | 114 pixelsPerRow = tailComponents / 4; |
| 115 } | 115 } |
| 116 | 116 |
| 117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | 117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8NEON(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) |
| 118 { | 118 { |
| 119 unsigned tailPixels = pixelsPerRow % 8; | 119 unsigned tailPixels = pixelsPerRow % 8; |
| 120 unsigned pixelSize = pixelsPerRow - tailPixels; | 120 unsigned pixelSize = pixelsPerRow - tailPixels; |
| 121 | 121 |
| 122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); | 122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F); |
| 123 for (unsigned i = 0; i < pixelSize; i += 8) { | 123 for (unsigned i = 0; i < pixelSize; i += 8) { |
| 124 uint16x8_t eightPixels = vld1q_u16(source + i); | 124 uint16x8_t eightPixels = vld1q_u16(source + i); |
| 125 | 125 |
| 126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); | 126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12)); |
| 127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8),
immediate0x0f)); | 127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8),
immediate0x0f)); |
| 128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4),
immediate0x0f)); | 128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4),
immediate0x0f)); |
| 129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f))
; | 129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f))
; |
| 130 | 130 |
| 131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); | 131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR); |
| 132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); | 132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG); |
| 133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); | 133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB); |
| 134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); | 134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA); |
| 135 | 135 |
| 136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | 136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; |
| 137 vst4_u8(destination, destComponents); | 137 vst4_u8(destination, destComponents); |
| 138 destination += 32; | 138 destination += 32; |
| 139 } | 139 } |
| 140 | 140 |
| 141 source += pixelSize; | 141 source += pixelSize; |
| 142 pixelsPerRow = tailPixels; | 142 pixelsPerRow = tailPixels; |
| 143 } | 143 } |
| 144 | 144 |
| 145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | 145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444NEON(const uint8_t*& sour
ce, uint16_t*& destination, unsigned& pixelsPerRow) |
| 146 { | 146 { |
| 147 unsigned componentsPerRow = pixelsPerRow * 4; | 147 unsigned componentsPerRow = pixelsPerRow * 4; |
| 148 unsigned tailComponents = componentsPerRow % 32; | 148 unsigned tailComponents = componentsPerRow % 32; |
| 149 unsigned componentsSize = componentsPerRow - tailComponents; | 149 unsigned componentsSize = componentsPerRow - tailComponents; |
| 150 | 150 |
| 151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | 151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); |
| 152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); | 152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0); |
| 153 for (unsigned i = 0; i < componentsSize; i += 32) { | 153 for (unsigned i = 0; i < componentsSize; i += 32) { |
| 154 uint8x8x4_t RGBA8 = vld4_u8(source + i); | 154 uint8x8x4_t RGBA8 = vld4_u8(source + i); |
| 155 | 155 |
| 156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); | 156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0); |
| 157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4
); | 157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4
); |
| 158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); | 158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0); |
| 159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4
); | 159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4
); |
| 160 | 160 |
| 161 uint8x8x2_t RGBA4; | 161 uint8x8x2_t RGBA4; |
| 162 RGBA4.val[0] = vorr_u8(componentB, componentA); | 162 RGBA4.val[0] = vorr_u8(componentB, componentA); |
| 163 RGBA4.val[1] = vorr_u8(componentR, componentG); | 163 RGBA4.val[1] = vorr_u8(componentR, componentG); |
| 164 vst2_u8(dst, RGBA4); | 164 vst2_u8(dst, RGBA4); |
| 165 dst += 16; | 165 dst += 16; |
| 166 } | 166 } |
| 167 | 167 |
| 168 source += componentsSize; | 168 source += componentsSize; |
| 169 destination += componentsSize / 4; | 169 destination += componentsSize / 4; |
| 170 pixelsPerRow = tailComponents / 4; | 170 pixelsPerRow = tailComponents / 4; |
| 171 } | 171 } |
| 172 | 172 |
| 173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t*& source, uint8_
t*& destination, unsigned& pixelsPerRow) | 173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8NEON(const uint16_t*& source, ui
nt8_t*& destination, unsigned& pixelsPerRow) |
| 174 { | 174 { |
| 175 unsigned tailPixels = pixelsPerRow % 8; | 175 unsigned tailPixels = pixelsPerRow % 8; |
| 176 unsigned pixelSize = pixelsPerRow - tailPixels; | 176 unsigned pixelSize = pixelsPerRow - tailPixels; |
| 177 | 177 |
| 178 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | 178 uint8x8_t immediate0x7 = vdup_n_u8(0x7); |
| 179 uint8x8_t immediate0xff = vdup_n_u8(0xFF); | 179 uint8x8_t immediate0xff = vdup_n_u8(0xFF); |
| 180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | 180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); |
| 181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1); | 181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1); |
| 182 | 182 |
| 183 for (unsigned i = 0; i < pixelSize; i += 8) { | 183 for (unsigned i = 0; i < pixelSize; i += 8) { |
| (...skipping 11 matching lines...) Expand all Loading... |
| 195 | 195 |
| 196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | 196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; |
| 197 vst4_u8(destination, destComponents); | 197 vst4_u8(destination, destComponents); |
| 198 destination += 32; | 198 destination += 32; |
| 199 } | 199 } |
| 200 | 200 |
| 201 source += pixelSize; | 201 source += pixelSize; |
| 202 pixelsPerRow = tailPixels; | 202 pixelsPerRow = tailPixels; |
| 203 } | 203 } |
| 204 | 204 |
| 205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t*& source,
uint16_t*& destination, unsigned& pixelsPerRow) | 205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551NEON(const uint8_t*& sour
ce, uint16_t*& destination, unsigned& pixelsPerRow) |
| 206 { | 206 { |
| 207 unsigned componentsPerRow = pixelsPerRow * 4; | 207 unsigned componentsPerRow = pixelsPerRow * 4; |
| 208 unsigned tailComponents = componentsPerRow % 32; | 208 unsigned tailComponents = componentsPerRow % 32; |
| 209 unsigned componentsSize = componentsPerRow - tailComponents; | 209 unsigned componentsSize = componentsPerRow - tailComponents; |
| 210 | 210 |
| 211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | 211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); |
| 212 | 212 |
| 213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | 213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); |
| 214 uint8x8_t immediate0x18 = vdup_n_u8(0x18); | 214 uint8x8_t immediate0x18 = vdup_n_u8(0x18); |
| 215 for (unsigned i = 0; i < componentsSize; i += 32) { | 215 for (unsigned i = 0; i < componentsSize; i += 32) { |
| (...skipping 11 matching lines...) Expand all Loading... |
| 227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); | 227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit); |
| 228 vst2_u8(dst, RGBA5551); | 228 vst2_u8(dst, RGBA5551); |
| 229 dst += 16; | 229 dst += 16; |
| 230 } | 230 } |
| 231 | 231 |
| 232 source += componentsSize; | 232 source += componentsSize; |
| 233 destination += componentsSize / 4; | 233 destination += componentsSize / 4; |
| 234 pixelsPerRow = tailComponents / 4; | 234 pixelsPerRow = tailComponents / 4; |
| 235 } | 235 } |
| 236 | 236 |
| 237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t*& source, uint8_t*
& destination, unsigned& pixelsPerRow) | 237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8NEON(const uint16_t*& source, uint
8_t*& destination, unsigned& pixelsPerRow) |
| 238 { | 238 { |
| 239 unsigned tailPixels = pixelsPerRow % 8; | 239 unsigned tailPixels = pixelsPerRow % 8; |
| 240 unsigned pixelSize = pixelsPerRow - tailPixels; | 240 unsigned pixelSize = pixelsPerRow - tailPixels; |
| 241 | 241 |
| 242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); | 242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F); |
| 243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); | 243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F); |
| 244 uint8x8_t immediate0x3 = vdup_n_u8(0x3); | 244 uint8x8_t immediate0x3 = vdup_n_u8(0x3); |
| 245 uint8x8_t immediate0x7 = vdup_n_u8(0x7); | 245 uint8x8_t immediate0x7 = vdup_n_u8(0x7); |
| 246 | 246 |
| 247 uint8x8_t componentA = vdup_n_u8(0xFF); | 247 uint8x8_t componentA = vdup_n_u8(0xFF); |
| (...skipping 11 matching lines...) Expand all Loading... |
| 259 | 259 |
| 260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; | 260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo
nentA}}; |
| 261 vst4_u8(destination, destComponents); | 261 vst4_u8(destination, destComponents); |
| 262 destination += 32; | 262 destination += 32; |
| 263 } | 263 } |
| 264 | 264 |
| 265 source += pixelSize; | 265 source += pixelSize; |
| 266 pixelsPerRow = tailPixels; | 266 pixelsPerRow = tailPixels; |
| 267 } | 267 } |
| 268 | 268 |
| 269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t*& source, u
int16_t*& destination, unsigned& pixelsPerRow) | 269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565NEON(const uint8_t*& sourc
e, uint16_t*& destination, unsigned& pixelsPerRow) |
| 270 { | 270 { |
| 271 unsigned componentsPerRow = pixelsPerRow * 4; | 271 unsigned componentsPerRow = pixelsPerRow * 4; |
| 272 unsigned tailComponents = componentsPerRow % 32; | 272 unsigned tailComponents = componentsPerRow % 32; |
| 273 unsigned componentsSize = componentsPerRow - tailComponents; | 273 unsigned componentsSize = componentsPerRow - tailComponents; |
| 274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); | 274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination); |
| 275 | 275 |
| 276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); | 276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8); |
| 277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C); | 277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C); |
| 278 for (unsigned i = 0; i < componentsSize; i += 32) { | 278 for (unsigned i = 0; i < componentsSize; i += 32) { |
| 279 uint8x8x4_t RGBA8 = vld4_u8(source + i); | 279 uint8x8x4_t RGBA8 = vld4_u8(source + i); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 295 pixelsPerRow = tailComponents / 4; | 295 pixelsPerRow = tailComponents / 4; |
| 296 } | 296 } |
| 297 | 297 |
| 298 } // namespace SIMD | 298 } // namespace SIMD |
| 299 | 299 |
| 300 } // namespace blink | 300 } // namespace blink |
| 301 | 301 |
| 302 #endif // HAVE(ARM_NEON_INTRINSICS) | 302 #endif // HAVE(ARM_NEON_INTRINSICS) |
| 303 | 303 |
| 304 #endif // WebGLImageConversionNEON_h | 304 #endif // WebGLImageConversionNEON_h |
| OLD | NEW |