Index: include/libyuv/macros_msa.h |
diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h |
index 8a81e8213afba38737e1d8def52ac7c47b25139f..016200f97340d400799ac44a462711b12fb6018e 100644 |
--- a/include/libyuv/macros_msa.h |
+++ b/include/libyuv/macros_msa.h |
@@ -40,6 +40,13 @@ |
} |
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) |
+#define LD_B8(RTYPE, psrc, stride, \ |
+ out0, out1, out2, out3, out4, out5, out6, out7) { \ |
+ LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ |
+ LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \ |
+} |
+#define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__) |
+ |
/* Description : Store two vectors with stride each having 16 'byte' sized |
elements |
Arguments : Inputs - in0, in1, pdst, stride |
@@ -84,6 +91,62 @@ |
} |
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) |
+/* Description : Pack even byte elements of vector pairs |
+ Arguments : Inputs - in0, in1, in2, in3 |
+ Outputs - out0, out1 |
+ Return Type - as per RTYPE |
+ Details : Even byte elements of 'in0' are copied to the left half of |
+ 'out0' & even byte elements of 'in1' are copied to the right |
+ half of 'out0'. |
+*/ |
+#define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \ |
+ out0 = (RTYPE) __msa_pckev_b((v16i8) in0, (v16i8) in1); \ |
+ out1 = (RTYPE) __msa_pckev_b((v16i8) in2, (v16i8) in3); \ |
+} |
+#define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__) |
+ |
+#define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ |
+ out0, out1, out2, out3) { \ |
+ PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ |
+ PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ |
+} |
+#define PCKEV_B4_UB(...) PCKEV_B4(v16u8, __VA_ARGS__) |
+ |
+/* Description : Pack odd byte elements of vector pairs |
+ Arguments : Inputs - in0, in1, in2, in3 |
+ Outputs - out0, out1 |
+ Return Type - as per RTYPE |
+ Details : Odd byte elements of 'in0' are copied to the left half of |
+ 'out0' & odd byte elements of 'in1' are copied to the right |
+ half of 'out0'. |
+*/ |
+#define PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1) { \ |
+ out0 = (RTYPE) __msa_pckod_b((v16i8) in0, (v16i8) in1); \ |
+ out1 = (RTYPE) __msa_pckod_b((v16i8) in2, (v16i8) in3); \ |
+} |
+#define PCKOD_B2_UB(...) PCKOD_B2(v16u8, __VA_ARGS__) |
+ |
+#define PCKOD_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, \ |
+ out0, out1, out2, out3) { \ |
+ PCKOD_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ |
+ PCKOD_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ |
+} |
+#define PCKOD_B4_UB(...) PCKOD_B4(v16u8, __VA_ARGS__) |
+ |
+/* Description : average with rounding (in0 + in1 + 1) / 2. |
+ Arguments : Inputs - in0, in1, in2, in3, |
+ Outputs - out0, out1 |
+ Return Type - as per RTYPE |
+ Details : Each unsigned byte element from 'in0' vector is added with |
+ each unsigned byte element from 'in1' vector. Then the average |
+ with rounding is calculated and written to 'out0' |
+*/ |
+#define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) { \ |
+ out0 = (RTYPE) __msa_aver_u_b((v16u8) in0, (v16u8) in1); \ |
+ out1 = (RTYPE) __msa_aver_u_b((v16u8) in2, (v16u8) in3); \ |
+} |
+#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__) |
+ |
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ |
#endif // INCLUDE_LIBYUV_MACROS_MSA_H_ |