Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(516)

Side by Side Diff: source/row_msa.cc

Issue 2397693002: Add MSA optimized YUY2ToI422, YUY2ToI420, UYVYToI422, UYVYToI420 functions (Closed)
Patch Set: Updates as per review comments Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
94 ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); 94 ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1);
95 ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); 95 ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3);
96 ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); 96 ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16);
97 src_u += 16; 97 src_u += 16;
98 src_v += 16; 98 src_v += 16;
99 src_y += 32; 99 src_y += 32;
100 dst_uyvy += 64; 100 dst_uyvy += 64;
101 } 101 }
102 } 102 }
103 103
104 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
105 int x;
106 v16u8 src0, src1, src2, src3, dst0, dst1;
107
108 for (x = 0; x < width; x += 32) {
109 LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
110 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
111 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
112 ST_UB2(dst0, dst1, dst_y, 16);
113 src_yuy2 += 64;
114 dst_y += 32;
115 }
116 }
117
118 void YUY2ToUVRow_MSA(const uint8* src_yuy2, int src_stride_yuy2,
119 uint8* dst_u, uint8* dst_v, int width) {
120 const uint8* src_yuy2_next = src_yuy2 + src_stride_yuy2;
121 int x;
122 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
123 v16u8 vec0, vec1, dst0, dst1;
124
125 for (x = 0; x < width; x += 32) {
126 LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
127 LD_UB4(src_yuy2_next, 16, src4, src5, src6, src7);
128 src0 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
129 src1 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
130 src2 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4);
131 src3 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6);
132 vec0 = __msa_aver_u_b(src0, src2);
133 vec1 = __msa_aver_u_b(src1, src3);
134 dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0);
135 dst1 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0);
136 ST_UB(dst0, dst_u);
137 ST_UB(dst1, dst_v);
138 src_yuy2 += 64;
139 src_yuy2_next += 64;
140 dst_u += 16;
141 dst_v += 16;
142 }
143 }
144
145 void YUY2ToUV422Row_MSA(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
146 int width) {
147 int x;
148 v16u8 src0, src1, src2, src3, dst0, dst1;
149
150 for (x = 0; x < width; x += 32) {
151 LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
152 src0 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
153 src1 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
154 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
155 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
156 ST_UB(dst0, dst_u);
157 ST_UB(dst1, dst_v);
158 src_yuy2 += 64;
159 dst_u += 16;
160 dst_v += 16;
161 }
162 }
163
164 void UYVYToYRow_MSA(const uint8* src_uyvy, uint8* dst_y, int width) {
165 int x;
166 v16u8 src0, src1, src2, src3, dst0, dst1;
167
168 for (x = 0; x < width; x += 32) {
169 LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
170 dst0 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
171 dst1 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2);
172 ST_UB2(dst0, dst1, dst_y, 16);
173 src_uyvy += 64;
174 dst_y += 32;
175 }
176 }
177
178 void UYVYToUVRow_MSA(const uint8* src_uyvy, int src_stride_uyvy,
179 uint8* dst_u, uint8* dst_v, int width) {
180 const uint8 *src_uyvy_next = src_uyvy + src_stride_uyvy;
181 int x;
182 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
183 v16u8 vec0, vec1, dst0, dst1;
184
185 for (x = 0; x < width; x += 32) {
186 LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
187 LD_UB4(src_uyvy_next, 16, src4, src5, src6, src7);
188 src0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
189 src1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
190 src2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4);
191 src3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6);
192 vec0 = __msa_aver_u_b(src0, src2);
193 vec1 = __msa_aver_u_b(src1, src3);
194 dst0 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0);
195 dst1 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0);
196 ST_UB(dst0, dst_u);
197 ST_UB(dst1, dst_v);
198 src_uyvy += 64;
199 src_uyvy_next += 64;
200 dst_u += 16;
201 dst_v += 16;
202 }
203 }
204
205 void UYVYToUV422Row_MSA(const uint8* src_uyvy, uint8* dst_u, uint8* dst_v,
206 int width) {
207 int x;
208 v16u8 src0, src1, src2, src3, dst0, dst1;
209
210 for (x = 0; x < width; x += 32) {
211 LD_UB4(src_uyvy, 16, src0, src1, src2, src3);
212 src0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
213 src1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0);
216 ST_UB(dst0, dst_u);
217 ST_UB(dst1, dst_v);
218 src_uyvy += 64;
219 dst_u += 16;
220 dst_v += 16;
221 }
222 }
223
104 #ifdef __cplusplus 224 #ifdef __cplusplus
105 } // extern "C" 225 } // extern "C"
106 } // namespace libyuv 226 } // namespace libyuv
107 #endif 227 #endif
108 228
109 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 229 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
OLDNEW
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698