third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h - Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions

Unified Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

diff --git a/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

index 2363c658e3f74b958924e028c4a07863540575fe..0aa5165a20655595299defa133260ddbdeeba31c 100644

--- a/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

+++ b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

@@ -13,13 +13,21 @@

#endif

#ifdef CLANG_BUILD

+#define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)

#define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)

+#define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)

#define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)

+#define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)

#define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)

+#define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)

#else

+#define SRLI_B(a, b) ((v16u8)a >> b)

#define SRLI_H(a, b) ((v8u16)a >> b)

+#define SLLI_B(a, b) ((v16i8)a << b)

#define SLLI_H(a, b) ((v8i16)a << b)

+#define CEQI_B(a, b) (a == b)

#define CEQI_H(a, b) (a == b)

+#define ANDI_B(a, b) ((v16u8)a & b)

#endif

#define LD_V(RTYPE, psrc) *((RTYPE*)(psrc))

@@ -134,6 +142,25 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);

#define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)

#define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__)

+#define LD_V6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \

+{ \

+ LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \

+ LD_V2(RTYPE, psrc, stride, out4, out5); \

+#define LD_UB6(...) LD_V6(v16u8, __VA_ARGS__)

+#define LD_UH6(...) LD_V6(v8u16, __VA_ARGS__)

+#define LD_SP6(...) LD_V6(v4f32, __VA_ARGS__)

+#define LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, out7) \

+{ \

+ LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \

+ LD_V4(RTYPE, psrc, stride, out4, out5, out6, out7); \

+#define LD_UB8(...) LD_V8(v16u8, __VA_ARGS__)

+#define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__)

+#define LD_SP8(...) LD_V8(v4f32, __VA_ARGS__)

+#define LD_DP8(...) LD_V8(v2f64, __VA_ARGS__)

/* Description : Store vectors of elements with stride

* Arguments : Inputs - in0, in1, pdst, stride

* Details : Store elements from 'in0' to (pdst)

@@ -167,6 +194,7 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);

#define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__)

#define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__)

#define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__)

#define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \

{ \

ST_V3(RTYPE, in0, in1, in2, pdst, stride); \

@@ -183,6 +211,121 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);

#define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__)

#define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__)

+/* Description : Logical and in0 and in1.

+ Arguments : Inputs - in0, in1, in2, in3,

+ Outputs - out0, out1, out2, out3

+ Return Type - as per RTYPE

+ Details : Each unsigned word element from 'in0' vector is added with

+ each unsigned word element from 'in1' vector. Then the average

+ is calculated and written to 'out0'

+*/

+#define AND_V2(RTYPE, in0, in1, mask, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_and_v((v16u8)in0, (v16u8)mask); \

+ out1 = (RTYPE)__msa_and_v((v16u8)in1, (v16u8)mask); \

+#define AND_V2_UB(...) AND_V2(v16u8, __VA_ARGS__)

+#define AND_V4(RTYPE, in0, in1, in2, in3, mask, out0, out1, out2, out3) \

+{ \

+ AND_V2(RTYPE, in0, in1, mask, out0, out1); \

+ AND_V2(RTYPE, in2, in3, mask, out2, out3); \

+#define AND_V4_UB(...) AND_V4(v16u8, __VA_ARGS__)

+/* Description : Logical equate of input vectors with immediate value

+ Arguments : Inputs - in0, in1, val

+ Outputs - in place operation

+ Return Type - as per RTYPE

+ Details : Each unsigned byte element from input vector 'in0' & 'in1' is

+ logically and'ed with immediate mask and the result

+ is stored in-place.

+*/

+#define CEQI_B2(RTYPE, in0, in1, val, out0, out1) \

+{ \

+ out0 = CEQI_B(in0, val); \

+ out1 = CEQI_B(in1, val); \

+#define CEQI_B2_UB(...) CEQI_B2(v16u8, __VA_ARGS__)

+#define CEQI_B4(RTYPE, in0, in1, in2, in3, val, out0, out1, out2, out3) \

+{ \

+ CEQI_B2(RTYPE, in0, in1, val, out0, out1); \

+ CEQI_B2(RTYPE, in2, in3, val, out2, out3); \

+#define CEQI_B4_UB(...) CEQI_B4(v16u8, __VA_ARGS__)

+/* Description : Immediate number of elements to slide

+ * Arguments : Inputs - in0, in1, slide_val

+ * Outputs - out

+ * Return Type - as per RTYPE

+ * Details : Byte elements from 'in1' vector are slid into 'in0' by

+ * value specified in the 'slide_val'

+ */

+#define SLDI_B(RTYPE, in0, in1, slide_val) \

+ (RTYPE)__msa_sldi_b((v16i8)in0, (v16i8)in1, slide_val)

+#define SLDI_UB(...) SLDI_B(v16u8, __VA_ARGS__)

+#define SLDI_D(...) SLDI_B(v2f64, __VA_ARGS__)

+/* Description : Immediate number of elements to slide

+ Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val

+ Outputs - out0, out1

+ Return Type - as per RTYPE

+ Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by

+ value specified in the 'slide_val'

+*/

+#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \

+{ \

+ out0 = SLDI_B(RTYPE, in0_0, in1_0, slide_val); \

+ out1 = SLDI_B(RTYPE, in0_1, in1_1, slide_val); \

+#define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)

+/* Description : Shuffle byte vector elements as per variable

+ Arguments : Inputs - in0, in1, shf_val

+ Outputs - out0, out1

+ Return Type - as per RTYPE

+ Details : Byte elements from 'in0' & 'in1' are copied selectively to

+ 'out0' as per control variable 'shf_val'.

+*/

+#define SHF_B2(RTYPE, in0, in1, shf_val) \

+{ \

+ in0 = (RTYPE)__msa_shf_b((v16i8)in0, shf_val); \

+ in1 = (RTYPE)__msa_shf_b((v16i8)in1, shf_val); \

+#define SHF_B2_UB(...) SHF_B2(v16u8, __VA_ARGS__)

+#define SHF_B2_UH(...) SHF_B2(v8u16, __VA_ARGS__)

+#define SHF_B4(RTYPE, in0, in1, in2, in3, shf_val) \

+{ \

+ SHF_B2(RTYPE, in0, in1, shf_val); \

+ SHF_B2(RTYPE, in2, in3, shf_val); \

+#define SHF_B4_UB(...) SHF_B4(v16u8, __VA_ARGS__)

+#define SHF_B4_UH(...) SHF_B4(v8u16, __VA_ARGS__)

+/* Description : Interleave even byte elements from vectors

+ Arguments : Inputs - in0, in1, in2, in3

+ Outputs - out0, out1

+ Return Type - as per RTYPE

+ Details : Even byte elements of 'in0' and 'in1' are interleaved

+ and written to 'out0'

+*/

+#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \

+ out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \

+#define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)

+#define ILVEV_B2_UH(...) ILVEV_B2(v8u16, __VA_ARGS__)

+#define ILVEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \

+{ \

+ ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \

+ out2 = (RTYPE)__msa_ilvev_b((v16i8)in5, (v16i8)in4); \

+#define ILVEV_B3_UH(...) ILVEV_B3(v8u16, __VA_ARGS__)

/* Description : Interleave even halfword elements from vectors

Arguments : Inputs - in0, in1, in2, in3

Outputs - out0, out1

@@ -211,4 +354,206 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);

}

#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)

+#define ILVRL_H2(RTYPE, in0, in1, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \

+ out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \

+#define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__)

+/* Description : Interleave both odd and even half of input vectors

+ Arguments : Inputs - in0, in1

+ Outputs - out0, out1

+ Return Type - as per RTYPE

+ Details : Odd half of byte elements from 'in0' and 'in1' are

+ interleaved and written to 'out0'

+*/

+#define ILVODEV_B2(RTYPE, in0, in1, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_ilvod_b((v16i8)in0, (v16i8)in1); \

+ out1 = (RTYPE)__msa_ilvev_b((v16i8)in0, (v16i8)in1); \

+#define ILVODEV_B2_UB(...) ILVODEV_B2(v16u8, __VA_ARGS__)

+/* Description : Pack even halfword elements of vector pairs

+ Arguments : Inputs - in0, in1, in2, in3

+ Outputs - out0, out1

+ Return Type - as per RTYPE

+ Details : Even halfword elements of 'in0' are copied to the left half of

+ 'out0' & even halfword elements of 'in1' are copied to the

+ right half of 'out0'.

+*/

+#define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \

+ out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \

+#define PCKEV_H2_UB(...) PCKEV_H2(v16u8, __VA_ARGS__)

+#define PCKEV_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \

+{ \

+ PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

+ out2 = (RTYPE)__msa_pckev_h((v8i16)in4, (v8i16)in5); \

+#define PCKEV_H3_UB(...) PCKEV_H3(v16u8, __VA_ARGS__)

+#define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

+{ \

+ PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

+ PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \

+#define PCKEV_H4_UB(...) PCKEV_H4(v16u8, __VA_ARGS__)

+/* Description : Pack odd halfword elements of vector pairs

+ Arguments : Inputs - in0, in1, in2, in3

+ Outputs - out0, out1

+ Return Type - as per RTYPE

+ Details : Odd halfword elements of 'in0' are copied to the left half of

+ 'out0' & odd halfword elements of 'in1' are copied to the

+ right half of 'out0'.

+*/

+#define PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_pckod_h((v8i16)in0, (v8i16)in1); \

+ out1 = (RTYPE)__msa_pckod_h((v8i16)in2, (v8i16)in3); \

+#define PCKOD_H2_UB(...) PCKOD_H2(v16u8, __VA_ARGS__)

+#define PCKOD_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \

+{ \

+ PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

+ out2 = (RTYPE)__msa_pckod_h((v8i16)in4, (v8i16)in5); \

+#define PCKOD_H3_UB(...) PCKOD_H3(v16u8, __VA_ARGS__)

+#define PCKOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

+{ \

+ PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \

+ PCKOD_H2(RTYPE, in4, in5, in6, in7, out2, out3); \

+#define PCKOD_H4_UB(...) PCKOD_H4(v16u8, __VA_ARGS__)

+/* Description : Logical shift right all elements of half-word vector

+ Arguments : Inputs - in0, in1, shift

+ Outputs - in place operation

+ Return Type - as per input vector RTYPE

+ Details : Each element of vector 'in0' is right shifted by 'shift' and

+ the result is written in-place. 'shift' is a GP variable.

+*/

+#define SRLI_B2(RTYPE, in0, in1, shift_val) \

+{ \

+ in0 = (RTYPE)SRLI_B(in0, shift_val); \

+ in1 = (RTYPE)SRLI_B(in1, shift_val); \

+#define SRLI_B2_UB(...) SRLI_B2(v16u8, __VA_ARGS__)

+#define SRLI_B3(RTYPE, in0, in1, in2, shift_val) \

+{ \

+ SRLI_B2(RTYPE, in0, in1, shift_val); \

+ in2 = (RTYPE)SRLI_B(in2, shift_val); \

+#define SRLI_B3_UB(...) SRLI_B3(v16u8, __VA_ARGS__)

+#define SRLI_B4(RTYPE, in0, in1, in2, in3, shift_val) \

+{ \

+ SRLI_B2(RTYPE, in0, in1, shift_val); \

+ SRLI_B2(RTYPE, in2, in3, shift_val); \

+#define SRLI_B4_UB(...) SRLI_B4(v16u8, __VA_ARGS__)

+/* Description : Immediate Bit Insert Right (immediate)

+ Arguments : Inputs - in0, in1, in2, in3, shift

+ Outputs - out0, out1

+ Return Type - as per RTYPE

+ Details : Copy least significant (right) bits in each element of vector

+ 'in1' to elements in vector in0 while preserving the most

+ significant (left) bits. The number of bits to copy is given

+ by the immediate 'shift + 1'.

+*/

+#define BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift) \

+{ \

+ out0 = (RTYPE)__msa_binsri_b((v16u8)in0, (v16u8)in1, shift); \

+ out1 = (RTYPE)__msa_binsri_b((v16u8)in2, (v16u8)in3, shift); \

+#define BINSRI_B2_UB(...) BINSRI_B2(v16u8, __VA_ARGS__)

+#define BINSRI_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2, shift) \

+{ \

+ BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift); \

+ out2 = (RTYPE)__msa_binsri_b((v16u8)in4, (v16u8)in5, shift); \

+#define BINSRI_B3_UB(...) BINSRI_B3(v16u8, __VA_ARGS__)

+/* Description : Multiplication of pairs of vectors

+ Arguments : Inputs - in0, in1, in2, in3

+ Outputs - out0, out1

+ Details : Each element from 'in0' is multiplied with elements from 'in1'

+ and the result is written to 'out0'

+*/

+#define MUL2(in0, in1, in2, in3, out0, out1) \

+{ \

+ out0 = in0 * in1; \

+ out1 = in2 * in3; \

+#define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

+{ \

+ MUL2(in0, in1, in2, in3, out0, out1); \

+ MUL2(in4, in5, in6, in7, out2, out3); \

+/* Description : Division of pairs of vectors

+ Arguments : Inputs - in0, in1, in2, in3

+ Outputs - out0, out1

+ Details : Each element from 'in0' is divided by elements from 'in1'

+ and the result is written to 'out0'

+*/

+#define DIV2(in0, in1, in2, in3, out0, out1) \

+{ \

+ out0 = in0 / in1; \

+ out1 = in2 / in3; \

+#define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \

+{ \

+ DIV2(in0, in1, in2, in3, out0, out1); \

+ DIV2(in4, in5, in6, in7, out2, out3); \

+/* Description : Vector Floating-Point Convert from Unsigned Integer

+ Arguments : Inputs - in0, in1

+ Outputs - out0, out1

+ Details :

+*/

+#define FFINTU_W2(RTYPE, in0, in1, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \

+ out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \

+#define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)

+#define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

+{ \

+ FFINTU_W2(RTYPE, in0, in1, out0, out1); \

+ FFINTU_W2(RTYPE, in2, in3, out2, out3); \

+#define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)

+/* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer

+ Arguments : Inputs - in0, in1

+ Outputs - out0, out1

+ Details :

+*/

+#define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \

+{ \

+ out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \

+ out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \

+#define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)

+#define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \

+{ \

+ FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \

+ FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \

+#define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)

#endif // CommonMacrosMSA_h

« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h » ('j') | no next file with comments »