Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(961)

Unified Diff: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h

Issue 2340583003: Add MSA (MIPS SIMD Arch) optimized VectorMath functions (Closed)
Patch Set: Removing zvmul and vsvesq Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
diff --git a/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
index 0aa5165a20655595299defa133260ddbdeeba31c..55d1235df9897a9f7d0cf08eac356fafd66a9a40 100644
--- a/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
+++ b/third_party/WebKit/Source/platform/cpu/mips/CommonMacrosMSA.h
@@ -518,6 +518,23 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);
DIV2(in4, in5, in6, in7, out2, out3); \
}
+/* Description : Addition of 2 pairs of vectors
+ * Arguments : Inputs - in0, in1, in2, in3
+ * Outputs - out0, out1
+ * Details : Each element in 'in0' is added to 'in1' and result is written
+ * to 'out0'.
+ */
Raymond Toy 2016/10/03 16:47:06 Comment style here seems inconsistent with the nea
Prashant.Patil 2016/10/04 11:47:27 Done.
+#define ADD2(in0, in1, in2, in3, out0, out1) \
+{ \
+ out0 = in0 + in1; \
+ out1 = in2 + in3; \
+}
+#define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \
Raymond Toy 2016/10/03 16:47:06 I know the naming here is consistent with the rest
Prashant.Patil 2016/10/04 11:47:28 I will add macro description for better understand
+{ \
+ ADD2(in0, in1, in2, in3, out0, out1); \
+ ADD2(in4, in5, in6, in7, out2, out3); \
+}
+
/* Description : Vector Floating-Point Convert from Unsigned Integer
Arguments : Inputs - in0, in1
Outputs - out0, out1
@@ -556,4 +573,72 @@ MSA_STORE_FUNC(uint32_t, usw, msa_usw);
}
#define FTRUNCU_W4_UB(...) FTRUNCU_W4(v16u8, __VA_ARGS__)
+/* Description : Vector Floating-Point multiply with scale and accumulate
+ Arguments : Inputs - in0, in1, scale
Raymond Toy 2016/10/03 16:47:06 Based on the code, out0, and out1 are also inputs.
Prashant.Patil 2016/10/04 11:47:27 Done.
+ Outputs - out0, out1
+ Details :
+*/
+#define VSMA2(in0, in1, out0, out1, scale) \
+{ \
+ out0 += in0 * scale; \
+ out1 += in1 * scale; \
+}
+
+#define VSMA4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \
+{ \
+ VSMA2(in0, in1, out0, out1, scale); \
+ VSMA2(in2, in3, out2, out3, scale); \
+}
+
+/* Description : Vector Floating-Point multiply with scale
+ Arguments : Inputs - in0, in1, scale
+ Outputs - out0, out1
+ Details :
+*/
+#define VSMUL2(in0, in1, out0, out1, scale) \
+{ \
+ out0 = in0 * scale; \
+ out1 = in1 * scale; \
+}
+
+#define VSMUL4(in0, in1, in2, in3, out0, out1, out2, out3, scale) \
+{ \
+ VSMUL2(in0, in1, out0, out1, scale); \
+ VSMUL2(in2, in3, out2, out3, scale); \
+}
+
+/* Description : Vector Floating-Point max value with absolute
+ Arguments : Inputs - in0, in1
+ Outputs - out0, out1
Raymond Toy 2016/10/03 16:47:06 There are not outputs out0, out1, just the one out
Prashant.Patil 2016/10/04 11:47:28 I will update the macro usage for better code read
+ Details :
+*/
+#define VMAXMGV2(in0, in1, mask, max) \
+{ \
+ max = __msa_fmax_w(max, (v4f32)((v16i8)in0 & mask)); \
+ max = __msa_fmax_w(max, (v4f32)((v16i8)in1 & mask)); \
+}
+
+#define VMAXMGV4(in0, in1, in2, in3, mask, max) \
+{ \
+ VMAXMGV2(in0, in1, mask, max); \
+ VMAXMGV2(in2, in3, mask, max); \
+}
+
+/* Description : Vector Floating-Point clip to min max
+ Arguments : Inputs - in0, in1
Raymond Toy 2016/10/03 16:47:06 You forgot min, max as inputs.
Prashant.Patil 2016/10/04 11:47:28 Done.
+ Outputs - out0, out1
+ Details :
+*/
+#define VCLIP2(in0, in1, min, max, out0, out1) \
+{ \
+ out0 = __msa_fmax_w(__msa_fmin_w(in0, max), min); \
+ out1 = __msa_fmax_w(__msa_fmin_w(in1, max), min); \
+}
+
+#define VCLIP4(in0, in1, in2, in3, min, max, out0, out1, out2, out3) \
+{ \
+ VCLIP2(in0, in1, min, max, out0, out1); \
+ VCLIP2(in2, in3, min, max, out2, out3); \
+}
+
#endif // CommonMacrosMSA_h

Powered by Google App Engine
This is Rietveld 408576698