Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1218)

Unified Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2304183002: Add MSA (MIPS SIMD Arch) optimized WebGL image conversion functions (Closed)
Patch Set: Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
diff --git a/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
index 2363c658e3f74b958924e028c4a07863540575fe..0aa5165a20655595299defa133260ddbdeeba31c 100644
--- a/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
+++ b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
@@ -13,13 +13,21 @@
#endif
#ifdef CLANG_BUILD
+#define SRLI_B(a, b) __msa_srli_b((v16i8)a, b)
#define SRLI_H(a, b) __msa_srli_h((v8i16)a, b)
+#define SLLI_B(a, b) __msa_slli_b((v16i8)a, b)
#define SLLI_H(a, b) __msa_slli_h((v8i16)a, b)
+#define CEQI_B(a, b) __msa_ceqi_b((v16i8)a, b)
#define CEQI_H(a, b) __msa_ceqi_h((v8i16)a, b)
+#define ANDI_B(a, b) __msa_andi_b((v16u8)a, b)
#else
+#define SRLI_B(a, b) ((v16u8)a >> b)
#define SRLI_H(a, b) ((v8u16)a >> b)
+#define SLLI_B(a, b) ((v16i8)a << b)
#define SLLI_H(a, b) ((v8i16)a << b)
+#define CEQI_B(a, b) (a == b)
#define CEQI_H(a, b) (a == b)
+#define ANDI_B(a, b) ((v16u8)a & b)
#endif
#define LD_V(RTYPE, psrc) *((RTYPE*)(psrc))
@@ -134,6 +142,25 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);
#define LD_UH4(...) LD_V4(v8u16, __VA_ARGS__)
#define LD_SP4(...) LD_V4(v4f32, __VA_ARGS__)
+#define LD_V6(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5) \
+{ \
+ LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \
+ LD_V2(RTYPE, psrc, stride, out4, out5); \
+}
+#define LD_UB6(...) LD_V6(v16u8, __VA_ARGS__)
+#define LD_UH6(...) LD_V6(v8u16, __VA_ARGS__)
+#define LD_SP6(...) LD_V6(v4f32, __VA_ARGS__)
+
+#define LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, out7) \
+{ \
+ LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3); \
+ LD_V4(RTYPE, psrc, stride, out4, out5, out6, out7); \
+}
+#define LD_UB8(...) LD_V8(v16u8, __VA_ARGS__)
+#define LD_UH8(...) LD_V8(v8u16, __VA_ARGS__)
+#define LD_SP8(...) LD_V8(v4f32, __VA_ARGS__)
+#define LD_DP8(...) LD_V8(v2f64, __VA_ARGS__)
+
/* Description : Store vectors of elements with stride
* Arguments : Inputs - in0, in1, pdst, stride
* Details : Store elements from 'in0' to (pdst)
@@ -167,6 +194,7 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);
#define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__)
#define ST_UH4(...) ST_V4(v8u16, __VA_ARGS__)
#define ST_SP4(...) ST_V4(v4f32, __VA_ARGS__)
+
#define ST_V6(RTYPE, in0, in1, in2, in3, in4, in5, pdst, stride) \
{ \
ST_V3(RTYPE, in0, in1, in2, pdst, stride); \
@@ -183,6 +211,121 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);
#define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__)
#define ST_SP8(...) ST_V8(v4f32, __VA_ARGS__)
+/* Description : Logical and in0 and in1.
+ Arguments : Inputs - in0, in1, in2, in3,
+ Outputs - out0, out1, out2, out3
+ Return Type - as per RTYPE
+ Details : Each unsigned word element from 'in0' vector is added with
+ each unsigned word element from 'in1' vector. Then the average
+ is calculated and written to 'out0'
+*/
+#define AND_V2(RTYPE, in0, in1, mask, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_and_v((v16u8)in0, (v16u8)mask); \
+ out1 = (RTYPE)__msa_and_v((v16u8)in1, (v16u8)mask); \
+}
+#define AND_V2_UB(...) AND_V2(v16u8, __VA_ARGS__)
+
+#define AND_V4(RTYPE, in0, in1, in2, in3, mask, out0, out1, out2, out3) \
+{ \
+ AND_V2(RTYPE, in0, in1, mask, out0, out1); \
+ AND_V2(RTYPE, in2, in3, mask, out2, out3); \
+}
+#define AND_V4_UB(...) AND_V4(v16u8, __VA_ARGS__)
+
+/* Description : Logical equate of input vectors with immediate value
+ Arguments : Inputs - in0, in1, val
+ Outputs - in place operation
+ Return Type - as per RTYPE
+ Details : Each unsigned byte element from input vector 'in0' & 'in1' is
+ logically and'ed with immediate mask and the result
+ is stored in-place.
+*/
+#define CEQI_B2(RTYPE, in0, in1, val, out0, out1) \
+{ \
+ out0 = CEQI_B(in0, val); \
+ out1 = CEQI_B(in1, val); \
+}
+#define CEQI_B2_UB(...) CEQI_B2(v16u8, __VA_ARGS__)
+
+#define CEQI_B4(RTYPE, in0, in1, in2, in3, val, out0, out1, out2, out3) \
+{ \
+ CEQI_B2(RTYPE, in0, in1, val, out0, out1); \
+ CEQI_B2(RTYPE, in2, in3, val, out2, out3); \
+}
+#define CEQI_B4_UB(...) CEQI_B4(v16u8, __VA_ARGS__)
+
+/* Description : Immediate number of elements to slide
+ * Arguments : Inputs - in0, in1, slide_val
+ * Outputs - out
+ * Return Type - as per RTYPE
+ * Details : Byte elements from 'in1' vector are slid into 'in0' by
+ * value specified in the 'slide_val'
+ */
+#define SLDI_B(RTYPE, in0, in1, slide_val) \
+ (RTYPE)__msa_sldi_b((v16i8)in0, (v16i8)in1, slide_val)
+#define SLDI_UB(...) SLDI_B(v16u8, __VA_ARGS__)
+#define SLDI_D(...) SLDI_B(v2f64, __VA_ARGS__)
+
+/* Description : Immediate number of elements to slide
+ Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by
+ value specified in the 'slide_val'
+*/
+#define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \
+{ \
+ out0 = SLDI_B(RTYPE, in0_0, in1_0, slide_val); \
+ out1 = SLDI_B(RTYPE, in0_1, in1_1, slide_val); \
+}
+#define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__)
+
+/* Description : Shuffle byte vector elements as per variable
+ Arguments : Inputs - in0, in1, shf_val
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Byte elements from 'in0' & 'in1' are copied selectively to
+ 'out0' as per control variable 'shf_val'.
+*/
+#define SHF_B2(RTYPE, in0, in1, shf_val) \
+{ \
+ in0 = (RTYPE)__msa_shf_b((v16i8)in0, shf_val); \
+ in1 = (RTYPE)__msa_shf_b((v16i8)in1, shf_val); \
+}
+#define SHF_B2_UB(...) SHF_B2(v16u8, __VA_ARGS__)
+#define SHF_B2_UH(...) SHF_B2(v8u16, __VA_ARGS__)
+
+#define SHF_B4(RTYPE, in0, in1, in2, in3, shf_val) \
+{ \
+ SHF_B2(RTYPE, in0, in1, shf_val); \
+ SHF_B2(RTYPE, in2, in3, shf_val); \
+}
+#define SHF_B4_UB(...) SHF_B4(v16u8, __VA_ARGS__)
+#define SHF_B4_UH(...) SHF_B4(v8u16, __VA_ARGS__)
+
+/* Description : Interleave even byte elements from vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even byte elements of 'in0' and 'in1' are interleaved
+ and written to 'out0'
+*/
+#define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \
+ out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \
+}
+#define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__)
+#define ILVEV_B2_UH(...) ILVEV_B2(v8u16, __VA_ARGS__)
+
+#define ILVEV_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
+{ \
+ ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \
+ out2 = (RTYPE)__msa_ilvev_b((v16i8)in5, (v16i8)in4); \
+}
+#define ILVEV_B3_UH(...) ILVEV_B3(v8u16, __VA_ARGS__)
+
/* Description : Interleave even halfword elements from vectors
Arguments : Inputs - in0, in1, in2, in3
Outputs - out0, out1
@@ -211,4 +354,206 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);
}
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
+#define ILVRL_H2(RTYPE, in0, in1, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \
+}
+#define ILVRL_H2_UB(...) ILVRL_H2(v16u8, __VA_ARGS__)
+
+/* Description : Interleave both odd and even half of input vectors
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Odd half of byte elements from 'in0' and 'in1' are
+ interleaved and written to 'out0'
+*/
+#define ILVODEV_B2(RTYPE, in0, in1, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_ilvod_b((v16i8)in0, (v16i8)in1); \
+ out1 = (RTYPE)__msa_ilvev_b((v16i8)in0, (v16i8)in1); \
+}
+#define ILVODEV_B2_UB(...) ILVODEV_B2(v16u8, __VA_ARGS__)
+
+/* Description : Pack even halfword elements of vector pairs
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Even halfword elements of 'in0' are copied to the left half of
+ 'out0' & even halfword elements of 'in1' are copied to the
+ right half of 'out0'.
+*/
+#define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \
+}
+#define PCKEV_H2_UB(...) PCKEV_H2(v16u8, __VA_ARGS__)
+
+#define PCKEV_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
+{ \
+ PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ out2 = (RTYPE)__msa_pckev_h((v8i16)in4, (v8i16)in5); \
+}
+#define PCKEV_H3_UB(...) PCKEV_H3(v16u8, __VA_ARGS__)
+
+#define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
+{ \
+ PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define PCKEV_H4_UB(...) PCKEV_H4(v16u8, __VA_ARGS__)
+
+/* Description : Pack odd halfword elements of vector pairs
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Odd halfword elements of 'in0' are copied to the left half of
+ 'out0' & odd halfword elements of 'in1' are copied to the
+ right half of 'out0'.
+*/
+#define PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_pckod_h((v8i16)in0, (v8i16)in1); \
+ out1 = (RTYPE)__msa_pckod_h((v8i16)in2, (v8i16)in3); \
+}
+#define PCKOD_H2_UB(...) PCKOD_H2(v16u8, __VA_ARGS__)
+
+#define PCKOD_H3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \
+{ \
+ PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ out2 = (RTYPE)__msa_pckod_h((v8i16)in4, (v8i16)in5); \
+}
+#define PCKOD_H3_UB(...) PCKOD_H3(v16u8, __VA_ARGS__)
+
+#define PCKOD_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
+{ \
+ PCKOD_H2(RTYPE, in0, in1, in2, in3, out0, out1); \
+ PCKOD_H2(RTYPE, in4, in5, in6, in7, out2, out3); \
+}
+#define PCKOD_H4_UB(...) PCKOD_H4(v16u8, __VA_ARGS__)
+
+/* Description : Logical shift right all elements of half-word vector
+ Arguments : Inputs - in0, in1, shift
+ Outputs - in place operation
+ Return Type - as per input vector RTYPE
+ Details : Each element of vector 'in0' is right shifted by 'shift' and
+ the result is written in-place. 'shift' is a GP variable.
+*/
+#define SRLI_B2(RTYPE, in0, in1, shift_val) \
+{ \
+ in0 = (RTYPE)SRLI_B(in0, shift_val); \
+ in1 = (RTYPE)SRLI_B(in1, shift_val); \
+}
+#define SRLI_B2_UB(...) SRLI_B2(v16u8, __VA_ARGS__)
+
+#define SRLI_B3(RTYPE, in0, in1, in2, shift_val) \
+{ \
+ SRLI_B2(RTYPE, in0, in1, shift_val); \
+ in2 = (RTYPE)SRLI_B(in2, shift_val); \
+}
+#define SRLI_B3_UB(...) SRLI_B3(v16u8, __VA_ARGS__)
+
+#define SRLI_B4(RTYPE, in0, in1, in2, in3, shift_val) \
+{ \
+ SRLI_B2(RTYPE, in0, in1, shift_val); \
+ SRLI_B2(RTYPE, in2, in3, shift_val); \
+}
+#define SRLI_B4_UB(...) SRLI_B4(v16u8, __VA_ARGS__)
+
+/* Description : Immediate Bit Insert Right (immediate)
+ Arguments : Inputs - in0, in1, in2, in3, shift
+ Outputs - out0, out1
+ Return Type - as per RTYPE
+ Details : Copy least significant (right) bits in each element of vector
+ 'in1' to elements in vector in0 while preserving the most
+ significant (left) bits. The number of bits to copy is given
+ by the immediate 'shift + 1'.
+*/
+#define BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift) \
+{ \
+ out0 = (RTYPE)__msa_binsri_b((v16u8)in0, (v16u8)in1, shift); \
+ out1 = (RTYPE)__msa_binsri_b((v16u8)in2, (v16u8)in3, shift); \
+}
+#define BINSRI_B2_UB(...) BINSRI_B2(v16u8, __VA_ARGS__)
+
+#define BINSRI_B3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2, shift) \
+{ \
+ BINSRI_B2(RTYPE, in0, in1, in2, in3, out0, out1, shift); \
+ out2 = (RTYPE)__msa_binsri_b((v16u8)in4, (v16u8)in5, shift); \
+}
+#define BINSRI_B3_UB(...) BINSRI_B3(v16u8, __VA_ARGS__)
+
+/* Description : Multiplication of pairs of vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Details : Each element from 'in0' is multiplied with elements from 'in1'
+ and the result is written to 'out0'
+*/
+#define MUL2(in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = in0 * in1; \
+ out1 = in2 * in3; \
+}
+#define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
+{ \
+ MUL2(in0, in1, in2, in3, out0, out1); \
+ MUL2(in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Division of pairs of vectors
+ Arguments : Inputs - in0, in1, in2, in3
+ Outputs - out0, out1
+ Details : Each element from 'in0' is divided by elements from 'in1'
+ and the result is written to 'out0'
+*/
+#define DIV2(in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = in0 / in1; \
+ out1 = in2 / in3; \
+}
+#define DIV4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
+{ \
+ DIV2(in0, in1, in2, in3, out0, out1); \
+ DIV2(in4, in5, in6, in7, out2, out3); \
+}
+
+/* Description : Vector Floating-Point Convert from Unsigned Integer
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Details :
+*/
+#define FFINTU_W2(RTYPE, in0, in1, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_ffint_u_w((v4u32)in0); \
+ out1 = (RTYPE)__msa_ffint_u_w((v4u32)in1); \
+}
+#define FFINTU_W2_SP(...) FFINTU_W2(v4f32, __VA_ARGS__)
+
+#define FFINTU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
+{ \
+ FFINTU_W2(RTYPE, in0, in1, out0, out1); \
+ FFINTU_W2(RTYPE, in2, in3, out2, out3); \
+}
+#define FFINTU_W4_SP(...) FFINTU_W4(v4f32, __VA_ARGS__)
+
+/* Description : Vector Floating-Point Truncate and Convert to Unsigned Integer
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
+ Details :
+*/
+#define FTRUNCU_W2(RTYPE, in0, in1, out0, out1) \
+{ \
+ out0 = (RTYPE)__msa_ftrunc_u_w((v4f32)in0); \
+ out1 = (RTYPE)__msa_ftrunc_u_w((v4f32)in1); \
+}
+#define FTRUNCU_W2_UB(...) FTRUNCU_W2(v16u8, __VA_ARGS__)
+
+#define FTRUNCU_W4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \
+{ \
+ FTRUNCU_W2(RTYPE, in0, in1, out0, out1); \
+ FTRUNCU_W2(RTYPE, in2, in3, out2, out3); \
+}
+#define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)
+
#endif // CommonMacrosMSA_h
« no previous file with comments | « no previous file | third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698