| Index: include/libyuv/macros_msa.h
|
| diff --git a/include/libyuv/macros_msa.h b/include/libyuv/macros_msa.h
|
| index 48bea5d0eb62a105ab8d2e234e2448496e79aa04..a41f287091282332692a4448c4c10181e94b9c0e 100644
|
| --- a/include/libyuv/macros_msa.h
|
| +++ b/include/libyuv/macros_msa.h
|
| @@ -16,76 +16,75 @@
|
| #include <msa.h>
|
|
|
| #if (__mips_isa_rev >= 6)
|
| - #define LW(psrc) ({ \
|
| - uint8* psrc_lw_m = (uint8*) (psrc); \ /* NOLINT */
|
| - uint32 val_m; \
|
| - asm volatile ( \
|
| - "lw %[val_m], %[psrc_lw_m] \n\t" \
|
| - : [val_m] "=r" (val_m) \
|
| - : [psrc_lw_m] "m" (*psrc_lw_m) \
|
| - ); \
|
| - \
|
| - val_m; \
|
| + #define LW(psrc) ({ \
|
| + uint8* psrc_lw_m = (uint8*) (psrc); /* NOLINT */ \
|
| + uint32 val_m; \
|
| + asm volatile ( \
|
| + "lw %[val_m], %[psrc_lw_m] \n\t" \
|
| + : [val_m] "=r" (val_m) \
|
| + : [psrc_lw_m] "m" (*psrc_lw_m) \
|
| + ); \
|
| + val_m; \
|
| })
|
|
|
| #if (__mips == 64)
|
| - #define LD(psrc) ({ \
|
| - uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */
|
| - uint64 val_m = 0; \
|
| - asm volatile ( \
|
| - "ld %[val_m], %[psrc_ld_m] \n\t" \
|
| - : [val_m] "=r" (val_m) \
|
| - : [psrc_ld_m] "m" (*psrc_ld_m) \
|
| - ); \
|
| - val_m; \
|
| + #define LD(psrc) ({ \
|
| + uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
| + uint64 val_m = 0; \
|
| + asm volatile ( \
|
| + "ld %[val_m], %[psrc_ld_m] \n\t" \
|
| + : [val_m] "=r" (val_m) \
|
| + : [psrc_ld_m] "m" (*psrc_ld_m) \
|
| + ); \
|
| + val_m; \
|
| })
|
| #else // !(__mips == 64)
|
| - #define LD(psrc) ({ \
|
| - uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */
|
| - uint32 val0_m, val1_m; \
|
| - uint64 val_m = 0; \
|
| - val0_m = LW(psrc_ld_m); \
|
| - val1_m = LW(psrc_ld_m + 4); \
|
| - val_m = (uint64) (val1_m); \ /* NOLINT */
|
| - val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); \ /* NOLINT */
|
| - val_m = (uint64) (val_m | (uint64) val0_m); \ /* NOLINT */
|
| - val_m; \
|
| + #define LD(psrc) ({ \
|
| + uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
| + uint32 val0_m, val1_m; \
|
| + uint64 val_m = 0; \
|
| + val0_m = LW(psrc_ld_m); \
|
| + val1_m = LW(psrc_ld_m + 4); \
|
| + val_m = (uint64) (val1_m); /* NOLINT */ \
|
| + val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
| + val_m = (uint64) (val_m | (uint64) val0_m); /* NOLINT */ \
|
| + val_m; \
|
| })
|
| #endif // (__mips == 64)
|
| #else // !(__mips_isa_rev >= 6)
|
| - #define LW(psrc) ({ \
|
| - uint8* psrc_lw_m = (uint8*) (psrc); \ /* NOLINT */
|
| - uint32 val_m; \
|
| - asm volatile ( \
|
| - "ulw %[val_m], %[psrc_lw_m] \n\t" \
|
| - : [val_m] "=r" (val_m) \
|
| - : [psrc_lw_m] "m" (*psrc_lw_m) \
|
| - ); \
|
| - val_m; \
|
| + #define LW(psrc) ({ \
|
| + uint8* psrc_lw_m = (uint8*) (psrc); /* NOLINT */ \
|
| + uint32 val_m; \
|
| + asm volatile ( \
|
| + "ulw %[val_m], %[psrc_lw_m] \n\t" \
|
| + : [val_m] "=r" (val_m) \
|
| + : [psrc_lw_m] "m" (*psrc_lw_m) \
|
| + ); \
|
| + val_m; \
|
| })
|
|
|
| #if (__mips == 64)
|
| - #define LD(psrc) ({ \
|
| - uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */
|
| - uint64 val_m = 0; \
|
| - asm volatile ( \
|
| - "uld %[val_m], %[psrc_ld_m] \n\t" \
|
| - : [val_m] "=r" (val_m) \
|
| - : [psrc_ld_m] "m" (*psrc_ld_m) \
|
| - ); \
|
| - val_m; \
|
| + #define LD(psrc) ({ \
|
| + uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
| + uint64 val_m = 0; \
|
| + asm volatile ( \
|
| + "uld %[val_m], %[psrc_ld_m] \n\t" \
|
| + : [val_m] "=r" (val_m) \
|
| + : [psrc_ld_m] "m" (*psrc_ld_m) \
|
| + ); \
|
| + val_m; \
|
| })
|
| #else // !(__mips == 64)
|
| - #define LD(psrc) ({ \
|
| - uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */
|
| - uint32 val0_m, val1_m; \
|
| - uint64 val_m = 0; \
|
| - val0_m = LW(psrc_ld_m); \
|
| - val1_m = LW(psrc_ld_m + 4); \
|
| - val_m = (uint64) (val1_m); \ /* NOLINT */
|
| - val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); \ /* NOLINT */
|
| - val_m = (uint64) (val_m | (uint64) val0_m); \ /* NOLINT */
|
| - val_m; \
|
| + #define LD(psrc) ({ \
|
| + uint8* psrc_ld_m = (uint8*) (psrc); /* NOLINT */ \
|
| + uint32 val0_m, val1_m; \
|
| + uint64 val_m = 0; \
|
| + val0_m = LW(psrc_ld_m); \
|
| + val1_m = LW(psrc_ld_m + 4); \
|
| + val_m = (uint64) (val1_m); /* NOLINT */ \
|
| + val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
| + val_m = (uint64) (val_m | (uint64) val0_m); /* NOLINT */ \
|
| + val_m; \
|
| })
|
| #endif // (__mips == 64)
|
| #endif // (__mips_isa_rev >= 6)
|
| @@ -104,15 +103,15 @@
|
| Details : Load 16 byte elements in 'out0' from (psrc)
|
| Load 16 byte elements in 'out1' from (psrc + stride)
|
| */
|
| -#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
|
| - out0 = LD_B(RTYPE, (psrc)); \
|
| - out1 = LD_B(RTYPE, (psrc) + stride); \
|
| +#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
|
| + out0 = LD_B(RTYPE, (psrc)); \
|
| + out1 = LD_B(RTYPE, (psrc) + stride); \
|
| }
|
| #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
|
|
|
| -#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
|
| - LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
| - LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
|
| +#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
|
| + LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
| + LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
|
| }
|
| #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
|
|
|
| @@ -122,15 +121,15 @@
|
| Details : Store 16 byte elements from 'in0' to (pdst)
|
| Store 16 byte elements from 'in1' to (pdst + stride)
|
| */
|
| -#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
|
| - ST_B(RTYPE, in0, (pdst)); \
|
| - ST_B(RTYPE, in1, (pdst) + stride); \
|
| +#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
|
| + ST_B(RTYPE, in0, (pdst)); \
|
| + ST_B(RTYPE, in1, (pdst) + stride); \
|
| }
|
| #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
|
|
|
| -#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
|
| - ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
| - ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
| +#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
|
| + ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
| + ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
| }
|
| #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
|
|
|
| @@ -142,9 +141,9 @@
|
| Details : Byte elements from 'in0' & 'in1' are copied selectively to
|
| 'out0' as per control vector 'mask0'
|
| */
|
| -#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
|
| - out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
|
| - out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
|
| +#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
|
| + out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
|
| + out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
|
| }
|
| #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
|
|
|
| @@ -155,9 +154,9 @@
|
| Details : Right half of byte elements from 'in0' and 'in1' are
|
| interleaved and written to 'out0'
|
| */
|
| -#define ILVRL_B2(RTYPE, in0, in1, out0, out1) { \
|
| - out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
|
| - out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
|
| +#define ILVRL_B2(RTYPE, in0, in1, out0, out1) { \
|
| + out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \
|
| + out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \
|
| }
|
| #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
|
|
|
|
|