Source/platform/graphics/cpu/arm/WebGLImageConversionNEON.h - Issue 604373003: [WIP] Supporting arm_neon_optional flag for blink platform.

Side by Side Diff: Source/platform/graphics/cpu/arm/WebGLImageConversionNEON.h

Issue 604373003: [WIP] Supporting arm_neon_optional flag for blink platform. Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright (C) 2012 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Sz eged

3 *

4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions

6 * are met:

7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the

11 * documentation and/or other materials provided with the distribution.

12 *

13 * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY

14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR

17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

24 */

25

26 #ifndef WebGLImageConversionNEON_h

27 #define WebGLImageConversionNEON_h

28

29 #if HAVE(ARM_NEON_INTRINSICS)

30

31 #include <arm_neon.h>

32

33 namespace blink {

34

35 namespace SIMD {

36

37 ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t& source, ui nt8_t& destination, unsigned& pixelsPerRow)

38 {

39 unsigned componentsPerRow = pixelsPerRow * 4;

40 unsigned tailComponents = componentsPerRow % 16;

41 unsigned componentsSize = componentsPerRow - tailComponents;

42 const uint8_t* src = reinterpret_cast<const uint8_t*>(source);

43

44 for (unsigned i = 0; i < componentsSize; i += 16) {

45 uint8x16x2_t components = vld2q_u8(src + i * 2);

46 vst1q_u8(destination + i, components.val[1]);

47 }

48

49 source += componentsSize;

50 destination += componentsSize;

51 pixelsPerRow = tailComponents / 4;

52 }

53

54 ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t& source, uin t8_t& destination, unsigned& pixelsPerRow)

55 {

56 unsigned componentsPerRow = pixelsPerRow * 3;

57 unsigned tailComponents = componentsPerRow % 24;

58 unsigned componentsSize = componentsPerRow - tailComponents;

59

60 uint8x8_t componentA = vdup_n_u8(0xFF);

61 for (unsigned i = 0; i < componentsSize; i += 24) {

62 uint16x8x3_t RGB16 = vld3q_u16(source + i);

63 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8));

64 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8));

65 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8));

66 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};

67 vst4_u8(destination, RGBA8);

68 destination += 32;

69 }

70

71 source += componentsSize;

72 pixelsPerRow = tailComponents / 3;

73 }

74

75 ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t& source, ui nt8_t& destination, unsigned& pixelsPerRow)

76 {

77 unsigned componentsPerRow = pixelsPerRow * 4;

78 unsigned tailComponents = componentsPerRow % 32;

79 unsigned componentsSize = componentsPerRow - tailComponents;

80

81 for (unsigned i = 0; i < componentsSize; i += 32) {

82 uint16x8x4_t ARGB16 = vld4q_u16(source + i);

83 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));

84 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));

85 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));

86 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));

87 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};

88 vst4_u8(destination + i, RGBA8);

89 }

90

91 source += componentsSize;

92 destination += componentsSize;

93 pixelsPerRow = tailComponents / 4;

94 }

95

96 ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t& source, ui nt8_t& destination, unsigned& pixelsPerRow)

97 {

98 unsigned componentsPerRow = pixelsPerRow * 4;

99 unsigned tailComponents = componentsPerRow % 32;

100 unsigned componentsSize = componentsPerRow - tailComponents;

101

102 for (unsigned i = 0; i < componentsSize; i += 32) {

103 uint16x8x4_t ARGB16 = vld4q_u16(source + i);

104 uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));

105 uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));

106 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));

107 uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));

108 uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};

109 vst4_u8(destination + i, RGBA8);

110 }

111

112 source += componentsSize;

113 destination += componentsSize;

114 pixelsPerRow = tailComponents / 4;

115 }

116

117 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t& source, uint8_ t& destination, unsigned& pixelsPerRow)

118 {

119 unsigned tailPixels = pixelsPerRow % 8;

120 unsigned pixelSize = pixelsPerRow - tailPixels;

121

122 uint16x8_t immediate0x0f = vdupq_n_u16(0x0F);

123 for (unsigned i = 0; i < pixelSize; i += 8) {

124 uint16x8_t eightPixels = vld1q_u16(source + i);

125

126 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 12));

127 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 8), immediate0x0f));

128 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 4), immediate0x0f));

129 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x0f)) ;

130

131 componentR = vorr_u8(vshl_n_u8(componentR, 4), componentR);

132 componentG = vorr_u8(vshl_n_u8(componentG, 4), componentG);

133 componentB = vorr_u8(vshl_n_u8(componentB, 4), componentB);

134 componentA = vorr_u8(vshl_n_u8(componentA, 4), componentA);

135

136 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo nentA}};

137 vst4_u8(destination, destComponents);

138 destination += 32;

139 }

140

141 source += pixelSize;

142 pixelsPerRow = tailPixels;

143 }

144

145 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444(const uint8_t& source, uint16_t& destination, unsigned& pixelsPerRow)

146 {

147 unsigned componentsPerRow = pixelsPerRow * 4;

148 unsigned tailComponents = componentsPerRow % 32;

149 unsigned componentsSize = componentsPerRow - tailComponents;

150

151 uint8_t* dst = reinterpret_cast<uint8_t*>(destination);

152 uint8x8_t immediate0xf0 = vdup_n_u8(0xF0);

153 for (unsigned i = 0; i < componentsSize; i += 32) {

154 uint8x8x4_t RGBA8 = vld4_u8(source + i);

155

156 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf0);

157 uint8x8_t componentG = vshr_n_u8(vand_u8(RGBA8.val[1], immediate0xf0), 4 );

158 uint8x8_t componentB = vand_u8(RGBA8.val[2], immediate0xf0);

159 uint8x8_t componentA = vshr_n_u8(vand_u8(RGBA8.val[3], immediate0xf0), 4 );

160

161 uint8x8x2_t RGBA4;

162 RGBA4.val[0] = vorr_u8(componentB, componentA);

163 RGBA4.val[1] = vorr_u8(componentR, componentG);

164 vst2_u8(dst, RGBA4);

165 dst += 16;

166 }

167

168 source += componentsSize;

169 destination += componentsSize / 4;

170 pixelsPerRow = tailComponents / 4;

171 }

172

173 ALWAYS_INLINE void unpackOneRowOfRGBA5551ToRGBA8(const uint16_t& source, uint8_ t& destination, unsigned& pixelsPerRow)

174 {

175 unsigned tailPixels = pixelsPerRow % 8;

176 unsigned pixelSize = pixelsPerRow - tailPixels;

177

178 uint8x8_t immediate0x7 = vdup_n_u8(0x7);

179 uint8x8_t immediate0xff = vdup_n_u8(0xFF);

180 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);

181 uint16x8_t immediate0x1 = vdupq_n_u16(0x1);

182

183 for (unsigned i = 0; i < pixelSize; i += 8) {

184 uint16x8_t eightPixels = vld1q_u16(source + i);

185

186 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));

187 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 6), immediate0x1f));

188 uint8x8_t componentB = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 1), immediate0x1f));

189 uint8x8_t componentA = vqmovn_u16(vandq_u16(eightPixels, immediate0x1));

190

191 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immed iate0x7));

192 componentG = vorr_u8(vshl_n_u8(componentG, 3), vand_u8(componentG, immed iate0x7));

193 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immed iate0x7));

194 componentA = vmul_u8(componentA, immediate0xff);

195

196 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo nentA}};

197 vst4_u8(destination, destComponents);

198 destination += 32;

199 }

200

201 source += pixelSize;

202 pixelsPerRow = tailPixels;

203 }

204

205 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort5551(const uint8_t& source, uint16_t& destination, unsigned& pixelsPerRow)

206 {

207 unsigned componentsPerRow = pixelsPerRow * 4;

208 unsigned tailComponents = componentsPerRow % 32;

209 unsigned componentsSize = componentsPerRow - tailComponents;

210

211 uint8_t* dst = reinterpret_cast<uint8_t*>(destination);

212

213 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);

214 uint8x8_t immediate0x18 = vdup_n_u8(0x18);

215 for (unsigned i = 0; i < componentsSize; i += 32) {

216 uint8x8x4_t RGBA8 = vld4_u8(source + i);

217

218 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);

219 uint8x8_t componentG3bit = vshr_n_u8(RGBA8.val[1], 5);

220

221 uint8x8_t componentG2bit = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x18 ), 3);

222 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 2 );

223 uint8x8_t componentA = vshr_n_u8(RGBA8.val[3], 7);

224

225 uint8x8x2_t RGBA5551;

226 RGBA5551.val[0] = vorr_u8(vorr_u8(componentG2bit, componentB), component A);

227 RGBA5551.val[1] = vorr_u8(componentR, componentG3bit);

228 vst2_u8(dst, RGBA5551);

229 dst += 16;

230 }

231

232 source += componentsSize;

233 destination += componentsSize / 4;

234 pixelsPerRow = tailComponents / 4;

235 }

236

237 ALWAYS_INLINE void unpackOneRowOfRGB565ToRGBA8(const uint16_t& source, uint8_t & destination, unsigned& pixelsPerRow)

238 {

239 unsigned tailPixels = pixelsPerRow % 8;

240 unsigned pixelSize = pixelsPerRow - tailPixels;

241

242 uint16x8_t immediate0x3f = vdupq_n_u16(0x3F);

243 uint16x8_t immediate0x1f = vdupq_n_u16(0x1F);

244 uint8x8_t immediate0x3 = vdup_n_u8(0x3);

245 uint8x8_t immediate0x7 = vdup_n_u8(0x7);

246

247 uint8x8_t componentA = vdup_n_u8(0xFF);

248

249 for (unsigned i = 0; i < pixelSize; i += 8) {

250 uint16x8_t eightPixels = vld1q_u16(source + i);

251

252 uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(eightPixels, 11));

253 uint8x8_t componentG = vqmovn_u16(vandq_u16(vshrq_n_u16(eightPixels, 5), immediate0x3f));

254 uint8x8_t componentB = vqmovn_u16(vandq_u16(eightPixels, immediate0x1f)) ;

255

256 componentR = vorr_u8(vshl_n_u8(componentR, 3), vand_u8(componentR, immed iate0x7));

257 componentG = vorr_u8(vshl_n_u8(componentG, 2), vand_u8(componentG, immed iate0x3));

258 componentB = vorr_u8(vshl_n_u8(componentB, 3), vand_u8(componentB, immed iate0x7));

259

260 uint8x8x4_t destComponents = {{componentR, componentG, componentB, compo nentA}};

261 vst4_u8(destination, destComponents);

262 destination += 32;

263 }

264

265 source += pixelSize;

266 pixelsPerRow = tailPixels;

267 }

268

269 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort565(const uint8_t& source, u int16_t& destination, unsigned& pixelsPerRow)

270 {

271 unsigned componentsPerRow = pixelsPerRow * 4;

272 unsigned tailComponents = componentsPerRow % 32;

273 unsigned componentsSize = componentsPerRow - tailComponents;

274 uint8_t* dst = reinterpret_cast<uint8_t*>(destination);

275

276 uint8x8_t immediate0xf8 = vdup_n_u8(0xF8);

277 uint8x8_t immediate0x1c = vdup_n_u8(0x1C);

278 for (unsigned i = 0; i < componentsSize; i += 32) {

279 uint8x8x4_t RGBA8 = vld4_u8(source + i);

280

281 uint8x8_t componentR = vand_u8(RGBA8.val[0], immediate0xf8);

282 uint8x8_t componentGLeft = vshr_n_u8(RGBA8.val[1], 5);

283 uint8x8_t componentGRight = vshl_n_u8(vand_u8(RGBA8.val[1], immediate0x1 c), 3);

284 uint8x8_t componentB = vshr_n_u8(vand_u8(RGBA8.val[2], immediate0xf8), 3 );

285

286 uint8x8x2_t RGB565;

287 RGB565.val[0] = vorr_u8(componentGRight, componentB);

288 RGB565.val[1] = vorr_u8(componentR, componentGLeft);

289 vst2_u8(dst, RGB565);

290 dst += 16;

291 }

292

293 source += componentsSize;

294 destination += componentsSize / 4;

295 pixelsPerRow = tailComponents / 4;

296 }

297

298 } // namespace SIMD

299

300 } // namespace blink

301

302 #endif // HAVE(ARM_NEON_INTRINSICS)

303

304 #endif // WebGLImageConversionNEON_h

OLD	NEW

« no previous file with comments | « Source/platform/blink_platform.gypi ('k') | Source/platform/graphics/cpu/arm/filters/FEBlendNEON.h » ('j') | no next file with comments »