third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h - Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions

Side by Side Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CommonMacrosMSA_h	5 #ifndef CommonMacrosMSA_h

6 #define CommonMacrosMSA_h	6 #define CommonMacrosMSA_h

7	7

8 #include <msa.h>	8 #include <msa.h>

9 #include <stdint.h>	9 #include <stdint.h>

10	10

11 #if defined(__clang__)	11 #if defined(__clang__)

12 #define CLANG_BUILD	12 #define CLANG_BUILD

13 #endif	13 #endif

14	14

15 #ifdef CLANG_BUILD	15 #ifdef CLANG_BUILD

	16 #define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)

16 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)	17 #define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)

	18 #define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)

17 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)	19 #define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)

	20 #define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)

18 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)	21 #define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)

	22 #define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)

19 #else	23 #else

	24 #define SRLI_B(a, b) ((v16u8)a >> b)

20 #define SRLI_H(a, b) ((v8u16)a >> b)	25 #define SRLI_H(a, b) ((v8u16)a >> b)

	26 #define SLLI_B(a, b) ((v16i8)a << b)

21 #define SLLI_H(a, b) ((v8i16)a << b)	27 #define SLLI_H(a, b) ((v8i16)a << b)

	28 #define CEQI_B(a, b) (a == b)

22 #define CEQI_H(a, b) (a == b)	29 #define CEQI_H(a, b) (a == b)

	30 #define ANDI_B(a, b) ((v16u8)a & b)

23 #endif	31 #endif

24	32

25 #define LD_V(RTYPE, psrc) ((RTYPE)(psrc))	33 #define LD_V(RTYPE, psrc) ((RTYPE)(psrc))

26 #define LD_UB(...) LD_V(v16u8, __VA_ARGS__)	34 #define LD_UB(...) LD_V(v16u8, __VA_ARGS__)

27 #define LD_UH(...) LD_V(v8u16, __VA_ARGS__)	35 #define LD_UH(...) LD_V(v8u16, __VA_ARGS__)

28 #define LD_SP(...) LD_V(v4f32, __VA_ARGS__)	36 #define LD_SP(...) LD_V(v4f32, __VA_ARGS__)

29 #define LD_DP(...) LD_V(v2f64, __VA_ARGS__)	37 #define LD_DP(...) LD_V(v2f64, __VA_ARGS__)

30	38

31 #define ST_V(RTYPE, in, pdst) ((RTYPE)(pdst)) = in	39 #define ST_V(RTYPE, in, pdst) ((RTYPE)(pdst)) = in

32 #define ST_UB(...) ST_V(v16u8, __VA_ARGS__)	40 #define ST_UB(...) ST_V(v16u8, __VA_ARGS__)

(...skipping 94 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
127	135

128 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \	136 #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \

129 { \	137 { \

130 LD_V2(RTYPE, psrc, stride, out0, out1); \	138 LD_V2(RTYPE, psrc, stride, out0, out1); \

131 LD_V2(RTYPE, psrc, stride, out2, out3); \	139 LD_V2(RTYPE, psrc, stride, out2, out3); \

132 }	140 }

133 #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__)	141 #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__)

134 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)	142 #define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)

135 #define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__)	143 #define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__)

136	144

	145 #define LD_V6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \

	146 { \

	147 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \

	148 LD_V2(RTYPE, psrc, stride, out4, out5); \

	149 }

	150 #define LD_UB6(...) LD_V6(v16u8, __VA_ARGS__)

	151 #define LD_UH6(...) LD_V6(v8u16, __VA_ARGS__)

	152 #define LD_SP6(...) LD_V6(v4f32, __VA_ARGS__)

	153

	154 #define LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, out 7) \

	155 { \

	156 LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \

	157 LD_V4(RTYPE, psrc, stride, out4, out5, out6, out7); \

	158 }

	159 #define LD_UB8(...) LD_V8(v16u8, __VA_ARGS__)

	160 #define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__)

	161 #define LD_SP8(...) LD_V8(v4f32, __VA_ARGS__)

	162 #define LD_DP8(...) LD_V8(v2f64, __VA_ARGS__)

	163

137 /* Description : Store vectors of elements with stride	164 /* Description : Store vectors of elements with stride

138 * Arguments : Inputs - in0, in1, pdst, stride	165 * Arguments : Inputs - in0, in1, pdst, stride

139 * Details : Store elements from 'in0' to (pdst)	166 * Details : Store elements from 'in0' to (pdst)

140 * Store elements from 'in1' to (pdst + stride)	167 * Store elements from 'in1' to (pdst + stride)

141 */	168 */

142 #define ST_V2(RTYPE, in0, in1, pdst, stride) \	169 #define ST_V2(RTYPE, in0, in1, pdst, stride) \

143 { \	170 { \

144 ST_V(RTYPE, in0, pdst); \	171 ST_V(RTYPE, in0, pdst); \

145 pdst += stride; \	172 pdst += stride; \

146 ST_V(RTYPE, in1, pdst); \	173 ST_V(RTYPE, in1, pdst); \

(...skipping 13 matching lines...) Expand all Loading...
160 #define ST_UH3(...) ST_V3(v8u16, __VA_ARGS__)	187 #define ST_UH3(...) ST_V3(v8u16, __VA_ARGS__)

161	188

162 #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \	189 #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \

163 { \	190 { \

164 ST_V2(RTYPE, in0, in1, pdst, stride); \	191 ST_V2(RTYPE, in0, in1, pdst, stride); \

165 ST_V2(RTYPE, in2, in3, pdst, stride); \	192 ST_V2(RTYPE, in2, in3, pdst, stride); \

166 }	193 }

167 #define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__)	194 #define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__)

168 #define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__)	195 #define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__)

169 #define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__)	196 #define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__)

	197

170 #define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \	198 #define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \

171 { \	199 { \

172 ST_V3(RTYPE, in0, in1, in2, pdst, stride); \	200 ST_V3(RTYPE, in0, in1, in2, pdst, stride); \

173 ST_V3(RTYPE, in3, in4, in5, pdst, stride); \	201 ST_V3(RTYPE, in3, in4, in5, pdst, stride); \

174 }	202 }

175 #define ST_UB6(...) ST_V6(v16u8, __VA_ARGS__)	203 #define ST_UB6(...) ST_V6(v16u8, __VA_ARGS__)

176 #define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__)	204 #define ST_SP6(...) ST_V6(v4f32, __VA_ARGS__)

177	205

178 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \	206 #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \

179 { \	207 { \

180 ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \	208 ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \

181 ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \	209 ST_V4(RTYPE, in4, in5, in6, in7, pdst, stride); \

182 }	210 }

183 #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__)	211 #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__)

184 #define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__)	212 #define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__)

185	213

	214 /* Description : Logical and in0 and in1.

	215 Arguments : Inputs - in0, in1, in2, in3,

	216 Outputs - out0, out1, out2, out3

	217 Return Type - as per RTYPE

	218 Details : Each unsigned word element from 'in0' vector is added with

	219 each unsigned word element from 'in1' vector. Then the average

	220 is calculated and written to 'out0'

	221 */

	222 #define AND_V2(RTYPE, in0, in1, mask, out0, out1) \

	223 { \

	224 out0 = (RTYPE)__msa_and_v((v16u8)in0, (v16u8)mask); \

	225 out1 = (RTYPE)__msa_and_v((v16u8)in1, (v16u8)mask); \

	226 }

	227 #define AND_V2_UB(...) AND_V2(v16u8, __VA_ARGS__)

	228

	229 #define AND_V4(RTYPE, in0, in1, in2, in3, mask, out0, out1, out2, out3) \

	230 { \

	231 AND_V2(RTYPE, in0, in1, mask, out0, out1); \

	232 AND_V2(RTYPE, in2, in3, mask, out2, out3); \

	233 }

	234 #define AND_V4_UB(...) AND_V4(v16u8, __VA_ARGS__)

	235

	236 /* Description : Logical equate of input vectors with immediate value

	237 Arguments : Inputs - in0, in1, val

	238 Outputs - in place operation

	239 Return Type - as per RTYPE

	240 Details : Each unsigned byte element from input vector 'in0' & 'in1' is

	241 logically and'ed with immediate mask and the result

	242 is stored in-place.

	243 */

	244 #define CEQI_B2(RTYPE, in0, in1, val, out0, out1) \

	245 { \

	246 out0 = CEQI_B(in0, val); \

	247 out1 = CEQI_B(in1, val); \

	248 }

	249 #define CEQI_B2_UB(...) CEQI_B2(v16u8, __VA_ARGS__)

	250

	251 #define CEQI_B4(RTYPE, in0, in1, in2, in3, val, out0, out1, out2, out3) \

	252 { \

	253 CEQI_B2(RTYPE, in0, in1, val, out0, out1); \

	254 CEQI_B2(RTYPE, in2, in3, val, out2, out3); \

	255 }

	256 #define CEQI_B4_UB(...) CEQI_B4(v16u8, __VA_ARGS__)

	257

	258 /* Description : Immediate number of elements to slide

	259 * Arguments : Inputs - in0, in1, slide_val

	260 * Outputs - out

	261 * Return Type - as per RTYPE

	262 * Details : Byte elements from 'in1' vector are slid into 'in0' by

	263 * value specified in the 'slide_val'

	264 */

	265 #define SLDI_B(RTYPE, in0, in1, slide_val) \

	266 (RTYPE)__msa_sldi_b((v16i8)in0, (v16i8)in1, slide_val)

	267 #define SLDI_UB(...) SLDI_B(v16u8, __VA_ARGS__)

	268 #define SLDI_D(...) SLDI_B(v2f64, __VA_ARGS__)

	269

	270 /* Description : Immediate number of elements to slide

	271 Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val

	272 Outputs - out0, out1

	273 Return Type - as per RTYPE

	274 Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by

	275 value specified in the 'slide_val'

	276 */

	277 #define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \

	278 { \

	279 out0 = SLDI_B(RTYPE, in0_0, in1_0, slide_val); \

	280 out1 = SLDI_B(RTYPE, in0_1, in1_1, slide_val); \

	281 }

	282 #define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)

	283

	284 /* Description : Shuffle byte vector elements as per variable

	285 Arguments : Inputs - in0, in1, shf_val

	286 Outputs - out0, out1

	287 Return Type - as per RTYPE

	288 Details : Byte elements from 'in0' & 'in1' are copied selectively to

	289 'out0' as per control variable 'shf_val'.

	290 */

	291 #define SHF_B2(RTYPE, in0, in1, shf_val) \

	292 { \

	293 in0 = (RTYPE)__msa_shf_b((v16i8)in0, shf_val); \

	294 in1 = (RTYPE)__msa_shf_b((v16i8)in1, shf_val); \

	295 }

	296 #define SHF_B2_UB(...) SHF_B2(v16u8, __VA_ARGS__)

	297 #define SHF_B2_UH(...) SHF_B2(v8u16, __VA_ARGS__)

	298

	299 #define SHF_B4(RTYPE, in0, in1, in2, in3, shf_val) \

	300 { \

	301 SHF_B2(RTYPE, in0, in1, shf_val); \

	302 SHF_B2(RTYPE, in2, in3, shf_val); \

	303 }

	304 #define SHF_B4_UB(...) SHF_B4(v16u8, __VA_ARGS__)

	305 #define SHF_B4_UH(...) SHF_B4(v8u16, __VA_ARGS__)

	306

	307 /* Description : Interleave even byte elements from vectors

	308 Arguments : Inputs - in0, in1, in2, in3

	309 Outputs - out0, out1

	310 Return Type - as per RTYPE

	311 Details : Even byte elements of 'in0' and 'in1' are interleaved

	312 and written to 'out0'

	313 */

	314 #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \

	315 { \

	316 out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \

	317 out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \

	318 }

	319 #define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)

	320 #define ILVEV_B2_UH(...) ILVEV_B2(v8u16, __VA_ARGS__)

	321

	322 #define ILVEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \

	323 { \

	324 ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \

	325 out2 = (RTYPE)__msa_ilvev_b((v16i8)in5, (v16i8)in4); \

	326 }

	327 #define ILVEV_B3_UH(...) ILVEV_B3(v8u16, __VA_ARGS__)

	328

186 /* Description : Interleave even halfword elements from vectors	329 /* Description : Interleave even halfword elements from vectors

187 Arguments : Inputs - in0, in1, in2, in3	330 Arguments : Inputs - in0, in1, in2, in3

188 Outputs - out0, out1	331 Outputs - out0, out1

189 Return Type - as per RTYPE	332 Return Type - as per RTYPE

190 Details : Even halfword elements of 'in0' and 'in1' are interleaved	333 Details : Even halfword elements of 'in0' and 'in1' are interleaved

191 and written to 'out0'	334 and written to 'out0'

192 */	335 */

193 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \	336 #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \

194 { \	337 { \

195 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \	338 out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \

196 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \	339 out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \

197 }	340 }

198 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)	341 #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__)

199	342

200 /* Description : Interleave both left and right half of input vectors	343 /* Description : Interleave both left and right half of input vectors

201 Arguments : Inputs - in0, in1	344 Arguments : Inputs - in0, in1

202 Outputs - out0, out1	345 Outputs - out0, out1

203 Return Type - as per RTYPE	346 Return Type - as per RTYPE

204 Details : Right half of byte elements from 'in0' and 'in1' are	347 Details : Right half of byte elements from 'in0' and 'in1' are

205 interleaved and written to 'out0'	348 interleaved and written to 'out0'

206 */	349 */

207 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \	350 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \

208 { \	351 { \

209 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \	352 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \

210 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \	353 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \

211 }	354 }

212 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)	355 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)

213	356

	357 #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \

	358 { \

	359 out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \

	360 out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \

	361 }

	362 #define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__)

	363

	364 /* Description : Interleave both odd and even half of input vectors

	365 Arguments : Inputs - in0, in1

	366 Outputs - out0, out1

	367 Return Type - as per RTYPE

	368 Details : Odd half of byte elements from 'in0' and 'in1' are

	369 interleaved and written to 'out0'

	370 */

	371 #define ILVODEV_B2(RTYPE, in0, in1, out0, out1) \

	372 { \

	373 out0 = (RTYPE)__msa_ilvod_b((v16i8)in0, (v16i8)in1); \

	374 out1 = (RTYPE)__msa_ilvev_b((v16i8)in0, (v16i8)in1); \

	375 }

	376 #define ILVODEV_B2_UB(...) ILVODEV_B2(v16u8, __VA_ARGS__)

	377

	378 /* Description : Pack even halfword elements of vector pairs

	379 Arguments : Inputs - in0, in1, in2, in3

	380 Outputs - out0, out1

	381 Return Type - as per RTYPE

	382 Details : Even halfword elements of 'in0' are copied to the left half of

	383 'out0' & even halfword elements of 'in1' are copied to the

	384 right half of 'out0'.

	385 */

	386 #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \

	387 { \

	388 out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \

	389 out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \

	390 }

	391 #define PCKEV_H2_UB(...) PCKEV_H2(v16u8, __VA_ARGS__)

	392

	393 #define PCKEV_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \

	394 { \

	395 PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

	396 out2 = (RTYPE)__msa_pckev_h((v8i16)in4, (v8i16)in5); \

	397 }

	398 #define PCKEV_H3_UB(...) PCKEV_H3(v16u8, __VA_ARGS__)

	399

	400 #define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2 , out3) \

	401 { \

	402 PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

	403 PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \

	404 }

	405 #define PCKEV_H4_UB(...) PCKEV_H4(v16u8, __VA_ARGS__)

	406

	407 /* Description : Pack odd halfword elements of vector pairs

	408 Arguments : Inputs - in0, in1, in2, in3

	409 Outputs - out0, out1

	410 Return Type - as per RTYPE

	411 Details : Odd halfword elements of 'in0' are copied to the left half of

	412 'out0' & odd halfword elements of 'in1' are copied to the

	413 right half of 'out0'.

	414 */

	415 #define PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \

	416 { \

	417 out0 = (RTYPE)__msa_pckod_h((v8i16)in0, (v8i16)in1); \

	418 out1 = (RTYPE)__msa_pckod_h((v8i16)in2, (v8i16)in3); \

	419 }

	420 #define PCKOD_H2_UB(...) PCKOD_H2(v16u8, __VA_ARGS__)

	421

	422 #define PCKOD_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \

	423 { \

	424 PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

	425 out2 = (RTYPE)__msa_pckod_h((v8i16)in4, (v8i16)in5); \

	426 }

	427 #define PCKOD_H3_UB(...) PCKOD_H3(v16u8, __VA_ARGS__)

	428

	429 #define PCKOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2 , out3) \

	430 { \

	431 PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

	432 PCKOD_H2(RTYPE, in4, in5, in6, in7, out2, out3); \

	433 }

	434 #define PCKOD_H4_UB(...) PCKOD_H4(v16u8, __VA_ARGS__)

	435

	436 /* Description : Logical shift right all elements of half-word vector

	437 Arguments : Inputs - in0, in1, shift

	438 Outputs - in place operation

	439 Return Type - as per input vector RTYPE

	440 Details : Each element of vector 'in0' is right shifted by 'shift' and

	441 the result is written in-place. 'shift' is a GP variable.

	442 */

	443 #define SRLI_B2(RTYPE, in0, in1, shift_val) \

	444 { \

	445 in0 = (RTYPE)SRLI_B(in0, shift_val); \

	446 in1 = (RTYPE)SRLI_B(in1, shift_val); \

	447 }

	448 #define SRLI_B2_UB(...) SRLI_B2(v16u8, __VA_ARGS__)

	449

	450 #define SRLI_B3(RTYPE, in0, in1, in2, shift_val) \

	451 { \

	452 SRLI_B2(RTYPE, in0, in1, shift_val); \

	453 in2 = (RTYPE)SRLI_B(in2, shift_val); \

	454 }

	455 #define SRLI_B3_UB(...) SRLI_B3(v16u8, __VA_ARGS__)

	456

	457 #define SRLI_B4(RTYPE, in0, in1, in2, in3, shift_val) \

	458 { \

	459 SRLI_B2(RTYPE, in0, in1, shift_val); \

	460 SRLI_B2(RTYPE, in2, in3, shift_val); \

	461 }

	462 #define SRLI_B4_UB(...) SRLI_B4(v16u8, __VA_ARGS__)

	463

	464 /* Description : Immediate Bit Insert Right (immediate)

	465 Arguments : Inputs - in0, in1, in2, in3, shift

	466 Outputs - out0, out1

	467 Return Type - as per RTYPE

	468 Details : Copy least significant (right) bits in each element of vector

	469 'in1' to elements in vector in0 while preserving the most

	470 significant (left) bits. The number of bits to copy is given

	471 by the immediate 'shift + 1'.

	472 */

	473 #define BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift) \

	474 { \

	475 out0 = (RTYPE)__msa_binsri_b((v16u8)in0, (v16u8)in1, shift); \

	476 out1 = (RTYPE)__msa_binsri_b((v16u8)in2, (v16u8)in3, shift); \

	477 }

	478 #define BINSRI_B2_UB(...) BINSRI_B2(v16u8, __VA_ARGS__)

	479

	480 #define BINSRI_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2, shift) \

	481 { \

	482 BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift); \

	483 out2 = (RTYPE)__msa_binsri_b((v16u8)in4, (v16u8)in5, shift); \

	484 }

	485 #define BINSRI_B3_UB(...) BINSRI_B3(v16u8, __VA_ARGS__)

	486

	487 /* Description : Multiplication of pairs of vectors

	488 Arguments : Inputs - in0, in1, in2, in3

	489 Outputs - out0, out1

	490 Details : Each element from 'in0' is multiplied with elements from 'in1'

	491 and the result is written to 'out0'

	492 */

	493 #define MUL2(in0, in1, in2, in3, out0, out1) \

	494 { \

	495 out0 = in0 * in1; \

	496 out1 = in2 * in3; \

	497 }

	498 #define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

	499 { \

	500 MUL2(in0, in1, in2, in3, out0, out1); \

	501 MUL2(in4, in5, in6, in7, out2, out3); \

	502 }

	503

	504 /* Description : Division of pairs of vectors

	505 Arguments : Inputs - in0, in1, in2, in3

	506 Outputs - out0, out1

	507 Details : Each element from 'in0' is divided by elements from 'in1'

	508 and the result is written to 'out0'

	509 */

	510 #define DIV2(in0, in1, in2, in3, out0, out1) \

	511 { \

	512 out0 = in0 / in1; \

	513 out1 = in2 / in3; \

	514 }

	515 #define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

	516 { \

	517 DIV2(in0, in1, in2, in3, out0, out1); \

	518 DIV2(in4, in5, in6, in7, out2, out3); \

	519 }

	520

	521 /* Description : Vector Floating-Point Convert from Unsigned Integer

	522 Arguments : Inputs - in0, in1

	523 Outputs - out0, out1

	524 Details :

	525 */

	526 #define FFINTU_W2(RTYPE, in0, in1, out0, out1) \

	527 { \

	528 out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \

	529 out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \

	530 }

	531 #define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)

	532

	533 #define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

	534 { \

	535 FFINTU_W2(RTYPE, in0, in1, out0, out1); \

	536 FFINTU_W2(RTYPE, in2, in3, out2, out3); \

	537 }

	538 #define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)

	539

	540 /* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer

	541 Arguments : Inputs - in0, in1

	542 Outputs - out0, out1

	543 Details :

	544 */

	545 #define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \

	546 { \

	547 out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \

	548 out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \

	549 }

	550 #define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)

	551

	552 #define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

	553 { \

	554 FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \

	555 FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \

	556 }

	557 #define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)

	558

214 #endif // CommonMacrosMSA_h	559 #endif // CommonMacrosMSA_h

OLD	NEW

« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h » ('j') | no next file with comments »