Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(711)

Side by Side Diff: include/libyuv/macros_msa.h

Issue 2559683002: Add MSA optimized remaining scale row functions (Closed)
Patch Set: Changes as per review comments Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | include/libyuv/scale_row.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
44 uint64 val_m = 0; \ 44 uint64 val_m = 0; \
45 val0_m = LW(psrc_ld_m); \ 45 val0_m = LW(psrc_ld_m); \
46 val1_m = LW(psrc_ld_m + 4); \ 46 val1_m = LW(psrc_ld_m + 4); \
47 val_m = (uint64)(val1_m); /* NOLINT */ \ 47 val_m = (uint64)(val1_m); /* NOLINT */ \
48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ 48 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ 49 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
50 val_m; \ 50 val_m; \
51 }) 51 })
52 #endif // (__mips == 64) 52 #endif // (__mips == 64)
53 53
54 #define SW(val, pdst) \ 54 #define SW(val, pdst) \
55 ({ \ 55 ({ \
56 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \ 56 uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
57 uint32_t val_m = (val); \ 57 uint32_t val_m = (val); \
58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ 58 asm volatile("sw %[val_m], %[pdst_sw_m] \n" \
59 \ 59 : [pdst_sw_m] "=m"(*pdst_sw_m) \
60 : [pdst_sw_m] "=m"(*pdst_sw_m) \ 60 : [val_m] "r"(val_m)); \
61 : [val_m] "r"(val_m)); \
62 }) 61 })
63 62
64 #if (__mips == 64) 63 #if (__mips == 64)
65 #define SD(val, pdst) \ 64 #define SD(val, pdst) \
66 ({ \ 65 ({ \
67 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ 66 uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
68 uint64_t val_m = (val); \ 67 uint64_t val_m = (val); \
69 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ 68 asm volatile("sd %[val_m], %[pdst_sd_m] \n" \
70 \ 69 : [pdst_sd_m] "=m"(*pdst_sd_m) \
71 : [pdst_sd_m] "=m"(*pdst_sd_m) \ 70 : [val_m] "r"(val_m)); \
72 : [val_m] "r"(val_m)); \
73 }) 71 })
74 #else // !(__mips == 64) 72 #else // !(__mips == 64)
75 #define SD(val, pdst) \ 73 #define SD(val, pdst) \
76 ({ \ 74 ({ \
77 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ 75 uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
78 uint32_t val0_m, val1_m; \ 76 uint32_t val0_m, val1_m; \
79 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ 77 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
80 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ 78 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
81 SW(val0_m, pdst_sd_m); \ 79 SW(val0_m, pdst_sd_m); \
82 SW(val1_m, pdst_sd_m + 4); \ 80 SW(val1_m, pdst_sd_m + 4); \
83 }) 81 })
84 #endif // !(__mips == 64) 82 #endif // !(__mips == 64)
85 #else // !(__mips_isa_rev >= 6) 83 #else // !(__mips_isa_rev >= 6)
86 #define LW(psrc) \ 84 #define LW(psrc) \
87 ({ \ 85 ({ \
(...skipping 23 matching lines...) Expand all
111 uint64 val_m = 0; \ 109 uint64 val_m = 0; \
112 val0_m = LW(psrc_ld_m); \ 110 val0_m = LW(psrc_ld_m); \
113 val1_m = LW(psrc_ld_m + 4); \ 111 val1_m = LW(psrc_ld_m + 4); \
114 val_m = (uint64)(val1_m); /* NOLINT */ \ 112 val_m = (uint64)(val1_m); /* NOLINT */ \
115 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ 113 val_m = (uint64)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
116 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \ 114 val_m = (uint64)(val_m | (uint64)val0_m); /* NOLINT */ \
117 val_m; \ 115 val_m; \
118 }) 116 })
119 #endif // (__mips == 64) 117 #endif // (__mips == 64)
120 118
121 #define SW(val, pdst) \ 119 #define SW(val, pdst) \
122 ({ \ 120 ({ \
123 uint8_t* pdst_sw_m = (uint8_t*)(pdst); \ 121 uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
124 uint32_t val_m = (val); \ 122 uint32_t val_m = (val); \
125 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ 123 asm volatile("usw %[val_m], %[pdst_sw_m] \n" \
126 : [pdst_sw_m] "=m"(*pdst_sw_m) \ 124 : [pdst_sw_m] "=m"(*pdst_sw_m) \
127 : [val_m] "r"(val_m)); \ 125 : [val_m] "r"(val_m)); \
128 }) 126 })
129 127
130 #define SD(val, pdst) \ 128 #define SD(val, pdst) \
131 ({ \ 129 ({ \
132 uint8_t* pdst_sd_m = (uint8_t*)(pdst); \ 130 uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
133 uint32_t val0_m, val1_m; \ 131 uint32_t val0_m, val1_m; \
134 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ 132 val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
135 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ 133 val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
136 SW(val0_m, pdst_sd_m); \ 134 SW(val0_m, pdst_sd_m); \
137 SW(val1_m, pdst_sd_m + 4); \ 135 SW(val1_m, pdst_sd_m + 4); \
138 }) 136 })
139 #endif // (__mips_isa_rev >= 6) 137 #endif // (__mips_isa_rev >= 6)
140 138
141 // TODO(fbarchard): Consider removing __VAR_ARGS versions. 139 // TODO(fbarchard): Consider removing __VAR_ARGS versions.
142 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ 140 #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
143 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) 141 #define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
144 142
145 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ 143 #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
146 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) 144 #define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
147 145
146 #define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
147 #define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
148
148 /* Description : Load two vectors with 16 'byte' sized elements 149 /* Description : Load two vectors with 16 'byte' sized elements
149 Arguments : Inputs - psrc, stride 150 Arguments : Inputs - psrc, stride
150 Outputs - out0, out1 151 Outputs - out0, out1
151 Return Type - as per RTYPE 152 Return Type - as per RTYPE
152 Details : Load 16 byte elements in 'out0' from (psrc) 153 Details : Load 16 byte elements in 'out0' from (psrc)
153 Load 16 byte elements in 'out1' from (psrc + stride) 154 Load 16 byte elements in 'out1' from (psrc + stride)
154 */ 155 */
155 #define LD_B2(RTYPE, psrc, stride, out0, out1) \ 156 #define LD_B2(RTYPE, psrc, stride, out0, out1) \
156 { \ 157 { \
157 out0 = LD_B(RTYPE, (psrc)); \ 158 out0 = LD_B(RTYPE, (psrc)); \
(...skipping 21 matching lines...) Expand all
179 } 180 }
180 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) 181 #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
181 182
182 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ 183 #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
183 { \ 184 { \
184 ST_B2(RTYPE, in0, in1, (pdst), stride); \ 185 ST_B2(RTYPE, in0, in1, (pdst), stride); \
185 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ 186 ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
186 } 187 }
187 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) 188 #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
188 189
190 /* Description : Store vectors of 8 halfword elements with stride
191 Arguments : Inputs - in0, in1, pdst, stride
192 Details : Store 8 halfword elements from 'in0' to (pdst)
193 Store 8 halfword elements from 'in1' to (pdst + stride)
194 */
195 #define ST_H2(RTYPE, in0, in1, pdst, stride) \
fbarchard1 2016/12/16 18:54:06 is this is correct style for macros? try clang-f
manojkumar.bhosale 2016/12/20 09:25:56 Already did the clang-format as above. Tried again
196 { \
197 ST_H(RTYPE, in0, (pdst)); \
198 ST_H(RTYPE, in1, (pdst) + stride); \
199 }
200 #define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)
201
189 // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. 202 // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
190 /* Description : Shuffle byte vector elements as per mask vector 203 /* Description : Shuffle byte vector elements as per mask vector
191 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 204 Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
192 Outputs - out0, out1 205 Outputs - out0, out1
193 Return Type - as per RTYPE 206 Return Type - as per RTYPE
194 Details : Byte elements from 'in0' & 'in1' are copied selectively to 207 Details : Byte elements from 'in0' & 'in1' are copied selectively to
195 'out0' as per control vector 'mask0' 208 'out0' as per control vector 'mask0'
196 */ 209 */
197 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ 210 #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
198 { \ 211 { \
(...skipping 12 matching lines...) Expand all
211 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ 224 #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
212 { \ 225 { \
213 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ 226 out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
214 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ 227 out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
215 } 228 }
216 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) 229 #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
217 230
218 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ 231 #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
219 232
220 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ 233 #endif // INCLUDE_LIBYUV_MACROS_MSA_H_
OLDNEW
« no previous file with comments | « no previous file | include/libyuv/scale_row.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698