Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 44 uint64 val_m = 0; \ | 44 uint64 val_m = 0; \ |
| 45 val0_m = LW(psrc_ld_m); \ | 45 val0_m = LW(psrc_ld_m); \ |
| 46 val1_m = LW(psrc_ld_m + 4); \ | 46 val1_m = LW(psrc_ld_m + 4); \ |
| 47 val_m = (uint64)(val1_m); /* NOLINT */ \ | 47 val_m = (uint64)(val1_m); /* NOLINT */ \ |
| 48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ | 48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ |
| 49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ | 49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ |
| 50 val_m; \ | 50 val_m; \ |
| 51 }) | 51 }) |
| 52 #endif // (__mips == 64) | 52 #endif // (__mips == 64) |
| 53 | 53 |
| 54 #define SW(val, pdst) \ | 54 #define SW(val, pdst) \ |
| 55 ({ \ | 55 ({ \ |
| 56 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \ | 56 uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ |
| 57 uint32_t val_m = (val); \ | 57 uint32_t val_m = (val); \ |
| 58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ | 58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ |
| 59 \ | 59 : [pdst_sw_m] "=m"(*pdst_sw_m) \ |
| 60 : [pdst_sw_m] "=m"(*pdst_sw_m) \ | 60 : [val_m] "r"(val_m)); \ |
| 61 : [val_m] "r"(val_m)); \ | |
| 62 }) | 61 }) |
| 63 | 62 |
| 64 #if (__mips == 64) | 63 #if (__mips == 64) |
| 65 #define SD(val, pdst) \ | 64 #define SD(val, pdst) \ |
| 66 ({ \ | 65 ({ \ |
| 67 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ | 66 uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ |
| 68 uint64_t val_m = (val); \ | 67 uint64_t val_m = (val); \ |
| 69 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ | 68 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ |
| 70 \ | 69 : [pdst_sd_m] "=m"(*pdst_sd_m) \ |
| 71 : [pdst_sd_m] "=m"(*pdst_sd_m) \ | 70 : [val_m] "r"(val_m)); \ |
| 72 : [val_m] "r"(val_m)); \ | |
| 73 }) | 71 }) |
| 74 #else // !(__mips == 64) | 72 #else // !(__mips == 64) |
| 75 #define SD(val, pdst) \ | 73 #define SD(val, pdst) \ |
| 76 ({ \ | 74 ({ \ |
| 77 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ | 75 uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ |
| 78 uint32_t val0_m, val1_m; \ | 76 uint32_t val0_m, val1_m; \ |
| 79 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ | 77 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ |
| 80 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ | 78 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ |
| 81 SW(val0_m, pdst_sd_m); \ | 79 SW(val0_m, pdst_sd_m); \ |
| 82 SW(val1_m, pdst_sd_m + 4); \ | 80 SW(val1_m, pdst_sd_m + 4); \ |
| 83 }) | 81 }) |
| 84 #endif // !(__mips == 64) | 82 #endif // !(__mips == 64) |
| 85 #else // !(__mips_isa_rev >= 6) | 83 #else // !(__mips_isa_rev >= 6) |
| 86 #define LW(psrc) \ | 84 #define LW(psrc) \ |
| 87 ({ \ | 85 ({ \ |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 111 uint64 val_m = 0; \ | 109 uint64 val_m = 0; \ |
| 112 val0_m = LW(psrc_ld_m); \ | 110 val0_m = LW(psrc_ld_m); \ |
| 113 val1_m = LW(psrc_ld_m + 4); \ | 111 val1_m = LW(psrc_ld_m + 4); \ |
| 114 val_m = (uint64)(val1_m); /* NOLINT */ \ | 112 val_m = (uint64)(val1_m); /* NOLINT */ \ |
| 115 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ | 113 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ |
| 116 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ | 114 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ |
| 117 val_m; \ | 115 val_m; \ |
| 118 }) | 116 }) |
| 119 #endif // (__mips == 64) | 117 #endif // (__mips == 64) |
| 120 | 118 |
| 121 #define SW(val, pdst) \ | 119 #define SW(val, pdst) \ |
| 122 ({ \ | 120 ({ \ |
| 123 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \ | 121 uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ |
| 124 uint32_t val_m = (val); \ | 122 uint32_t val_m = (val); \ |
| 125 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ | 123 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ |
| 126 : [pdst_sw_m] "=m"(*pdst_sw_m) \ | 124 : [pdst_sw_m] "=m"(*pdst_sw_m) \ |
| 127 : [val_m] "r"(val_m)); \ | 125 : [val_m] "r"(val_m)); \ |
| 128 }) | 126 }) |
| 129 | 127 |
| 130 #define SD(val, pdst) \ | 128 #define SD(val, pdst) \ |
| 131 ({ \ | 129 ({ \ |
| 132 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ | 130 uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ |
| 133 uint32_t val0_m, val1_m; \ | 131 uint32_t val0_m, val1_m; \ |
| 134 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ | 132 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ |
| 135 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ | 133 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ |
| 136 SW(val0_m, pdst_sd_m); \ | 134 SW(val0_m, pdst_sd_m); \ |
| 137 SW(val1_m, pdst_sd_m + 4); \ | 135 SW(val1_m, pdst_sd_m + 4); \ |
| 138 }) | 136 }) |
| 139 #endif // (__mips_isa_rev >= 6) | 137 #endif // (__mips_isa_rev >= 6) |
| 140 | 138 |
| 141 // TODO(fbarchard): Consider removing __VAR_ARGS versions. | 139 // TODO(fbarchard): Consider removing __VAR_ARGS versions. |
| 142 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ | 140 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ |
| 143 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) | 141 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) |
| 144 | 142 |
| 145 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ | 143 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ |
| 146 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) | 144 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) |
| 147 | 145 |
| 146 #define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ | |
| 147 #define ST_UH(...) ST_H(v8u16, __VA_ARGS__) | |
| 148 | |
| 148 /* Description : Load two vectors with 16 'byte' sized elements | 149 /* Description : Load two vectors with 16 'byte' sized elements |
| 149 Arguments : Inputs - psrc, stride | 150 Arguments : Inputs - psrc, stride |
| 150 Outputs - out0, out1 | 151 Outputs - out0, out1 |
| 151 Return Type - as per RTYPE | 152 Return Type - as per RTYPE |
| 152 Details : Load 16 byte elements in 'out0' from (psrc) | 153 Details : Load 16 byte elements in 'out0' from (psrc) |
| 153 Load 16 byte elements in 'out1' from (psrc + stride) | 154 Load 16 byte elements in 'out1' from (psrc + stride) |
| 154 */ | 155 */ |
| 155 #define LD_B2(RTYPE, psrc, stride, out0, out1) \ | 156 #define LD_B2(RTYPE, psrc, stride, out0, out1) \ |
| 156 { \ | 157 { \ |
| 157 out0 = LD_B(RTYPE, (psrc)); \ | 158 out0 = LD_B(RTYPE, (psrc)); \ |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 179 } | 180 } |
| 180 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) | 181 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) |
| 181 | 182 |
| 182 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ | 183 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ |
| 183 { \ | 184 { \ |
| 184 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | 185 ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
| 185 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | 186 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ |
| 186 } | 187 } |
| 187 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) | 188 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) |
| 188 | 189 |
| 190 /* Description : Store vectors of 8 halfword elements with stride | |
| 191 Arguments : Inputs - in0, in1, pdst, stride | |
| 192 Details : Store 8 halfword elements from 'in0' to (pdst) | |
| 193 Store 8 halfword elements from 'in1' to (pdst + stride) | |
| 194 */ | |
| 195 #define ST_H2(RTYPE, in0, in1, pdst, stride) \ | |
|
fbarchard1
2016/12/16 18:54:06
is this is correct style for macros? try
clang-f
manojkumar.bhosale
2016/12/20 09:25:56
Already did the clang-format as above. Tried again
| |
| 196 { \ | |
| 197 ST_H(RTYPE, in0, (pdst)); \ | |
| 198 ST_H(RTYPE, in1, (pdst) + stride); \ | |
| 199 } | |
| 200 #define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__) | |
| 201 | |
| 189 // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. | 202 // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. |
| 190 /* Description : Shuffle byte vector elements as per mask vector | 203 /* Description : Shuffle byte vector elements as per mask vector |
| 191 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 | 204 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 |
| 192 Outputs - out0, out1 | 205 Outputs - out0, out1 |
| 193 Return Type - as per RTYPE | 206 Return Type - as per RTYPE |
| 194 Details : Byte elements from 'in0' & 'in1' are copied selectively to | 207 Details : Byte elements from 'in0' & 'in1' are copied selectively to |
| 195 'out0' as per control vector 'mask0' | 208 'out0' as per control vector 'mask0' |
| 196 */ | 209 */ |
| 197 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ | 210 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ |
| 198 { \ | 211 { \ |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 211 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ | 224 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ |
| 212 { \ | 225 { \ |
| 213 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ | 226 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ |
| 214 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ | 227 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ |
| 215 } | 228 } |
| 216 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) | 229 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) |
| 217 | 230 |
| 218 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ | 231 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ |
| 219 | 232 |
| 220 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ | 233 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ |
| OLD | NEW |