OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ | 11 #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ |
12 #define INCLUDE_LIBYUV_MACROS_MSA_H_ | 12 #define INCLUDE_LIBYUV_MACROS_MSA_H_ |
13 | 13 |
14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
15 #include <stdint.h> | 15 #include <stdint.h> |
16 #include <msa.h> | 16 #include <msa.h> |
17 | 17 |
18 #if (__mips_isa_rev >= 6) | 18 #if (__mips_isa_rev >= 6) |
19 #define LW(psrc) ( { \ | 19 #define LW(psrc) ({ \ |
20 uint8 *psrc_lw_m = (uint8 *) (psrc); \ | 20 uint8* psrc_lw_m = (uint8*) (psrc); \ /* NOLINT */ |
21 uint32 val_m; \ | 21 uint32 val_m; \ |
22 \ | |
23 asm volatile ( \ | 22 asm volatile ( \ |
24 "lw %[val_m], %[psrc_lw_m] \n\t" \ | 23 "lw %[val_m], %[psrc_lw_m] \n\t" \ |
25 \ | |
26 : [val_m] "=r" (val_m) \ | 24 : [val_m] "=r" (val_m) \ |
27 : [psrc_lw_m] "m" (*psrc_lw_m) \ | 25 : [psrc_lw_m] "m" (*psrc_lw_m) \ |
28 ); \ | 26 ); \ |
29 \ | 27 \ |
30 val_m; \ | 28 val_m; \ |
31 } ) | 29 }) |
32 | 30 |
33 #if (__mips == 64) | 31 #if (__mips == 64) |
34 #define LD(psrc) ( { \ | 32 #define LD(psrc) ({ \ |
35 uint8 *psrc_ld_m = (uint8 *) (psrc); \ | 33 uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */ |
36 uint64 val_m = 0; \ | 34 uint64 val_m = 0; \ |
37 \ | |
38 asm volatile ( \ | 35 asm volatile ( \ |
39 "ld %[val_m], %[psrc_ld_m] \n\t" \ | 36 "ld %[val_m], %[psrc_ld_m] \n\t" \ |
40 \ | |
41 : [val_m] "=r" (val_m) \ | 37 : [val_m] "=r" (val_m) \ |
42 : [psrc_ld_m] "m" (*psrc_ld_m) \ | 38 : [psrc_ld_m] "m" (*psrc_ld_m) \ |
43 ); \ | 39 ); \ |
44 \ | |
45 val_m; \ | 40 val_m; \ |
46 } ) | 41 }) |
47 #else // !(__mips == 64) | 42 #else // !(__mips == 64) |
48 #define LD(psrc) ( { \ | 43 #define LD(psrc) ({ \ |
49 uint8 *psrc_ld_m = (uint8 *) (psrc); \ | 44 uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */ |
50 uint32 val0_m, val1_m; \ | 45 uint32 val0_m, val1_m; \ |
51 uint64 val_m = 0; \ | 46 uint64 val_m = 0; \ |
52 \ | |
53 val0_m = LW(psrc_ld_m); \ | 47 val0_m = LW(psrc_ld_m); \ |
54 val1_m = LW(psrc_ld_m + 4); \ | 48 val1_m = LW(psrc_ld_m + 4); \ |
55 \ | 49 val_m = (uint64) (val1_m); \ /* NOLINT */ |
56 val_m = (uint64) (val1_m); \ | 50 val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); \ /* NOLINT */ |
57 val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); \ | 51 val_m = (uint64) (val_m | (uint64) val0_m); \ /* NOLINT */ |
58 val_m = (uint64) (val_m | (uint64) val0_m); \ | |
59 \ | |
60 val_m; \ | 52 val_m; \ |
61 } ) | 53 }) |
62 #endif // (__mips == 64) | 54 #endif // (__mips == 64) |
63 #else // !(__mips_isa_rev >= 6) | 55 #else // !(__mips_isa_rev >= 6) |
64 #define LW(psrc) ( { \ | 56 #define LW(psrc) ({ \ |
65 uint8 *psrc_lw_m = (uint8 *) (psrc); \ | 57 uint8* psrc_lw_m = (uint8*) (psrc); \ /* NOLINT */ |
66 uint32 val_m; \ | 58 uint32 val_m; \ |
67 \ | |
68 asm volatile ( \ | 59 asm volatile ( \ |
69 "ulw %[val_m], %[psrc_lw_m] \n\t" \ | 60 "ulw %[val_m], %[psrc_lw_m] \n\t" \ |
70 \ | |
71 : [val_m] "=r" (val_m) \ | 61 : [val_m] "=r" (val_m) \ |
72 : [psrc_lw_m] "m" (*psrc_lw_m) \ | 62 : [psrc_lw_m] "m" (*psrc_lw_m) \ |
73 ); \ | 63 ); \ |
74 \ | |
75 val_m; \ | 64 val_m; \ |
76 } ) | 65 }) |
77 | 66 |
78 #if (__mips == 64) | 67 #if (__mips == 64) |
79 #define LD(psrc) ( { \ | 68 #define LD(psrc) ({ \ |
80 uint8 *psrc_ld_m = (uint8 *) (psrc); \ | 69 uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */ |
81 uint64 val_m = 0; \ | 70 uint64 val_m = 0; \ |
82 \ | |
83 asm volatile ( \ | 71 asm volatile ( \ |
84 "uld %[val_m], %[psrc_ld_m] \n\t" \ | 72 "uld %[val_m], %[psrc_ld_m] \n\t" \ |
85 \ | |
86 : [val_m] "=r" (val_m) \ | 73 : [val_m] "=r" (val_m) \ |
87 : [psrc_ld_m] "m" (*psrc_ld_m) \ | 74 : [psrc_ld_m] "m" (*psrc_ld_m) \ |
88 ); \ | 75 ); \ |
89 \ | |
90 val_m; \ | 76 val_m; \ |
91 } ) | 77 }) |
92 #else // !(__mips == 64) | 78 #else // !(__mips == 64) |
93 #define LD(psrc) ( { \ | 79 #define LD(psrc) ({ \ |
94 uint8 *psrc_ld_m = (uint8 *) (psrc); \ | 80 uint8* psrc_ld_m = (uint8*) (psrc); \ /* NOLINT */ |
95 uint32 val0_m, val1_m; \ | 81 uint32 val0_m, val1_m; \ |
96 uint64 val_m = 0; \ | 82 uint64 val_m = 0; \ |
97 \ | |
98 val0_m = LW(psrc_ld_m); \ | 83 val0_m = LW(psrc_ld_m); \ |
99 val1_m = LW(psrc_ld_m + 4); \ | 84 val1_m = LW(psrc_ld_m + 4); \ |
100 \ | 85 val_m = (uint64) (val1_m); \ /* NOLINT */ |
101 val_m = (uint64) (val1_m); \ | 86 val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); \ /* NOLINT */ |
102 val_m = (uint64) ((val_m << 32) & 0xFFFFFFFF00000000); \ | 87 val_m = (uint64) (val_m | (uint64) val0_m); \ /* NOLINT */ |
103 val_m = (uint64) (val_m | (uint64) val0_m); \ | |
104 \ | |
105 val_m; \ | 88 val_m; \ |
106 } ) | 89 }) |
107 #endif // (__mips == 64) | 90 #endif // (__mips == 64) |
108 #endif // (__mips_isa_rev >= 6) | 91 #endif // (__mips_isa_rev >= 6) |
109 | 92 |
| 93 // TODO(fbarchard): Consider removing __VAR_ARGS versions. |
110 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ | 94 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ |
111 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) | 95 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) |
112 | 96 |
113 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ | 97 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ |
114 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) | 98 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) |
115 | 99 |
116 /* Description : Load two vectors with 16 'byte' sized elements | 100 /* Description : Load two vectors with 16 'byte' sized elements |
117 Arguments : Inputs - psrc, stride | 101 Arguments : Inputs - psrc, stride |
118 Outputs - out0, out1 | 102 Outputs - out0, out1 |
119 Return Type - as per RTYPE | 103 Return Type - as per RTYPE |
(...skipping 16 matching lines...) Expand all Loading... |
136 elements | 120 elements |
137 Arguments : Inputs - in0, in1, pdst, stride | 121 Arguments : Inputs - in0, in1, pdst, stride |
138 Details : Store 16 byte elements from 'in0' to (pdst) | 122 Details : Store 16 byte elements from 'in0' to (pdst) |
139 Store 16 byte elements from 'in1' to (pdst + stride) | 123 Store 16 byte elements from 'in1' to (pdst + stride) |
140 */ | 124 */ |
141 #define ST_B2(RTYPE, in0, in1, pdst, stride) { \ | 125 #define ST_B2(RTYPE, in0, in1, pdst, stride) { \ |
142 ST_B(RTYPE, in0, (pdst)); \ | 126 ST_B(RTYPE, in0, (pdst)); \ |
143 ST_B(RTYPE, in1, (pdst) + stride); \ | 127 ST_B(RTYPE, in1, (pdst) + stride); \ |
144 } | 128 } |
145 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) | 129 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) |
146 # | 130 |
147 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \ | 131 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \ |
148 ST_B2(RTYPE, in0, in1, (pdst), stride); \ | 132 ST_B2(RTYPE, in0, in1, (pdst), stride); \ |
149 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ | 133 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ |
150 } | 134 } |
151 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) | 135 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) |
152 # | 136 |
| 137 // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. |
153 /* Description : Shuffle byte vector elements as per mask vector | 138 /* Description : Shuffle byte vector elements as per mask vector |
154 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 | 139 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 |
155 Outputs - out0, out1 | 140 Outputs - out0, out1 |
156 Return Type - as per RTYPE | 141 Return Type - as per RTYPE |
157 Details : Byte elements from 'in0' & 'in1' are copied selectively to | 142 Details : Byte elements from 'in0' & 'in1' are copied selectively to |
158 'out0' as per control vector 'mask0' | 143 'out0' as per control vector 'mask0' |
159 */ | 144 */ |
160 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ | 145 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \ |
161 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ | 146 out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \ |
162 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ | 147 out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \ |
163 } | 148 } |
164 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) | 149 #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) |
165 | 150 |
166 /* Description : Interleave both left and right half of input vectors | 151 /* Description : Interleave both left and right half of input vectors |
167 Arguments : Inputs - in0, in1 | 152 Arguments : Inputs - in0, in1 |
168 Outputs - out0, out1 | 153 Outputs - out0, out1 |
169 Return Type - as per RTYPE | 154 Return Type - as per RTYPE |
170 Details : Right half of byte elements from 'in0' and 'in1' are | 155 Details : Right half of byte elements from 'in0' and 'in1' are |
171 interleaved and written to 'out0' | 156 interleaved and written to 'out0' |
172 */ | 157 */ |
173 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) { \ | 158 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) { \ |
174 out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \ | 159 out0 = (RTYPE) __msa_ilvr_b((v16i8) in0, (v16i8) in1); \ |
175 out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \ | 160 out1 = (RTYPE) __msa_ilvl_b((v16i8) in0, (v16i8) in1); \ |
176 } | 161 } |
177 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) | 162 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) |
178 | 163 |
179 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ | 164 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ |
180 | 165 |
181 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ | 166 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ |
OLD | NEW |