| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ | 11 #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ |
| 12 #define INCLUDE_LIBYUV_MACROS_MSA_H_ | 12 #define INCLUDE_LIBYUV_MACROS_MSA_H_ |
| 13 | 13 |
| 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| 15 #include <msa.h> | 15 #include <msa.h> |
| 16 #include <stdint.h> | 16 #include <stdint.h> |
| 17 | 17 |
| 18 #if (__mips_isa_rev >= 6) | 18 #if (__mips_isa_rev >= 6) |
| 19 #define LW(psrc) \ | 19 #define LW(psrc) \ |
| 20 ({ \ | 20 ({ \ |
| 21 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ | 21 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ |
| 22 uint32 val_m; \ | 22 uint32 val_m; \ |
| 23 asm volatile("lw %[val_m], %[psrc_lw_m] \n\t" \ | 23 asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ |
| 24 : [val_m] "=r"(val_m) \ | 24 : [val_m] "=r"(val_m) \ |
| 25 : [psrc_lw_m] "m"(*psrc_lw_m)); \ | 25 : [psrc_lw_m] "m"(*psrc_lw_m)); \ |
| 26 val_m; \ | 26 val_m; \ |
| 27 }) | 27 }) |
| 28 | 28 |
| 29 #if (__mips == 64) | 29 #if (__mips == 64) |
| 30 #define LD(psrc) \ | 30 #define LD(psrc) \ |
| 31 ({ \ | 31 ({ \ |
| 32 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ | 32 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ |
| 33 uint64 val_m = 0; \ | 33 uint64 val_m = 0; \ |
| 34 asm volatile("ld %[val_m], %[psrc_ld_m] \n\t" \ | 34 asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ |
| 35 : [val_m] "=r"(val_m) \ | 35 : [val_m] "=r"(val_m) \ |
| 36 : [psrc_ld_m] "m"(*psrc_ld_m)); \ | 36 : [psrc_ld_m] "m"(*psrc_ld_m)); \ |
| 37 val_m; \ | 37 val_m; \ |
| 38 }) | 38 }) |
| 39 #else // !(__mips == 64) | 39 #else // !(__mips == 64) |
| 40 #define LD(psrc) \ | 40 #define LD(psrc) \ |
| 41 ({ \ | 41 ({ \ |
| 42 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ | 42 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ |
| 43 uint32 val0_m, val1_m; \ | 43 uint32 val0_m, val1_m; \ |
| 44 uint64 val_m = 0; \ | 44 uint64 val_m = 0; \ |
| 45 val0_m = LW(psrc_ld_m); \ | 45 val0_m = LW(psrc_ld_m); \ |
| 46 val1_m = LW(psrc_ld_m + 4); \ | 46 val1_m = LW(psrc_ld_m + 4); \ |
| 47 val_m = (uint64)(val1_m); /* NOLINT */ \ | 47 val_m = (uint64)(val1_m); /* NOLINT */ \ |
| 48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ | 48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ |
| 49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ | 49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ |
| 50 val_m; \ | 50 val_m; \ |
| 51 }) | 51 }) |
| 52 #endif // (__mips == 64) | 52 #endif // (__mips == 64) |
| 53 #else // !(__mips_isa_rev >= 6) | 53 |
| 54 #define LW(psrc) \ | 54 #define SW(val, pdst) \ |
| 55 ({ \ | 55 ({ \ |
| 56 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ | 56 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \ |
| 57 uint32 val_m; \ | 57 uint32_t val_m = (val); \ |
| 58 asm volatile("ulw %[val_m], %[psrc_lw_m] \n\t" \ | 58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ |
| 59 : [val_m] "=r"(val_m) \ | 59 \ |
| 60 : [psrc_lw_m] "m"(*psrc_lw_m)); \ | 60 : [pdst_sw_m] "=m"(*pdst_sw_m) \ |
| 61 val_m; \ | 61 : [val_m] "r"(val_m)); \ |
| 62 }) | 62 }) |
| 63 | 63 |
| 64 #if (__mips == 64) | 64 #if (__mips == 64) |
| 65 #define LD(psrc) \ | 65 #define SD(val, pdst) \ |
| 66 ({ \ | 66 ({ \ |
| 67 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ | 67 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ |
| 68 uint64 val_m = 0; \ | 68 uint64_t val_m = (val); \ |
| 69 asm volatile("uld %[val_m], %[psrc_ld_m] \n\t" \ | 69 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ |
| 70 : [val_m] "=r"(val_m) \ | 70 \ |
| 71 : [psrc_ld_m] "m"(*psrc_ld_m)); \ | 71 : [pdst_sd_m] "=m"(*pdst_sd_m) \ |
| 72 val_m; \ | 72 : [val_m] "r"(val_m)); \ |
| 73 }) |
| 74 #else // !(__mips == 64) |
| 75 #define SD(val, pdst) \ |
| 76 ({ \ |
| 77 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ |
| 78 uint32_t val0_m, val1_m; \ |
| 79 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ |
| 80 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ |
| 81 SW(val0_m, pdst_sd_m); \ |
| 82 SW(val1_m, pdst_sd_m + 4); \ |
| 83 }) |
| 84 #endif // !(__mips == 64) |
| 85 #else // !(__mips_isa_rev >= 6) |
| 86 #define LW(psrc) \ |
| 87 ({ \ |
| 88 uint8* psrc_lw_m = (uint8*)(psrc); /* NOLINT */ \ |
| 89 uint32 val_m; \ |
| 90 asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ |
| 91 : [val_m] "=r"(val_m) \ |
| 92 : [psrc_lw_m] "m"(*psrc_lw_m)); \ |
| 93 val_m; \ |
| 94 }) |
| 95 |
| 96 #if (__mips == 64) |
| 97 #define LD(psrc) \ |
| 98 ({ \ |
| 99 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ |
| 100 uint64 val_m = 0; \ |
| 101 asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ |
| 102 : [val_m] "=r"(val_m) \ |
| 103 : [psrc_ld_m] "m"(*psrc_ld_m)); \ |
| 104 val_m; \ |
| 73 }) | 105 }) |
| 74 #else // !(__mips == 64) | 106 #else // !(__mips == 64) |
| 75 #define LD(psrc) \ | 107 #define LD(psrc) \ |
| 76 ({ \ | 108 ({ \ |
| 77 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ | 109 uint8* psrc_ld_m = (uint8*)(psrc); /* NOLINT */ \ |
| 78 uint32 val0_m, val1_m; \ | 110 uint32 val0_m, val1_m; \ |
| 79 uint64 val_m = 0; \ | 111 uint64 val_m = 0; \ |
| 80 val0_m = LW(psrc_ld_m); \ | 112 val0_m = LW(psrc_ld_m); \ |
| 81 val1_m = LW(psrc_ld_m + 4); \ | 113 val1_m = LW(psrc_ld_m + 4); \ |
| 82 val_m = (uint64)(val1_m); /* NOLINT */ \ | 114 val_m = (uint64)(val1_m); /* NOLINT */ \ |
| 83 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ | 115 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ |
| 84 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ | 116 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ |
| 85 val_m; \ | 117 val_m; \ |
| 86 }) | 118 }) |
| 87 #endif // (__mips == 64) | 119 #endif // (__mips == 64) |
| 120 |
| 121 #define SW(val, pdst) \ |
| 122 ({ \ |
| 123 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \ |
| 124 uint32_t val_m = (val); \ |
| 125 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ |
| 126 : [pdst_sw_m] "=m"(*pdst_sw_m) \ |
| 127 : [val_m] "r"(val_m)); \ |
| 128 }) |
| 129 |
| 130 #define SD(val, pdst) \ |
| 131 ({ \ |
| 132 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ |
| 133 uint32_t val0_m, val1_m; \ |
| 134 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ |
| 135 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ |
| 136 SW(val0_m, pdst_sd_m); \ |
| 137 SW(val1_m, pdst_sd_m + 4); \ |
| 138 }) |
| 88 #endif // (__mips_isa_rev >= 6) | 139 #endif // (__mips_isa_rev >= 6) |
| 89 | 140 |
| 90 // TODO(fbarchard): Consider removing __VAR_ARGS versions. | 141 // TODO(fbarchard): Consider removing __VAR_ARGS versions. |
| 91 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ | 142 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ |
| 92 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) | 143 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) |
| 93 | 144 |
| 94 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ | 145 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ |
| 95 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) | 146 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) |
| 96 | 147 |
| 97 /* Description : Load two vectors with 16 'byte' sized elements | 148 /* Description : Load two vectors with 16 'byte' sized elements |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 160 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ | 211 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ |
| 161 { \ | 212 { \ |
| 162 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ | 213 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ |
| 163 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ | 214 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ |
| 164 } | 215 } |
| 165 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) | 216 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) |
| 166 | 217 |
| 167 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ | 218 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ |
| 168 | 219 |
| 169 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ | 220 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ |
| OLD | NEW |