Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1395)

Side by Side Diff: source/row_msa.cc

Issue 2430313005: Add MSA optimized I422ToARGBRow_MSA and I422ToRGBARow_MSA functions (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "libyuv/row.h" 11 #include "libyuv/row.h"
12 12
13 // This module is for GCC MSA 13 // This module is for GCC MSA
14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
15 #include "libyuv/macros_msa.h" 15 #include "libyuv/macros_msa.h"
16 16
17 #ifdef __cplusplus 17 #ifdef __cplusplus
18 namespace libyuv { 18 namespace libyuv {
19 extern "C" { 19 extern "C" {
20 #endif 20 #endif
21 21
22 #define I422TORGB(in0, in1, in2, ub, vr, ug, vg, \
fbarchard1 2016/10/24 17:36:32 Could use a brief comment to describe what this ma
23 bb, bg, br, yg, out0, out1, out2) { \
24 v8i16 vec0_m; \
25 v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \
26 v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \
27 v4i32 max_val_m = __msa_ldi_w(255); \
28 v8i16 zero_m = { 0 }; \
29 \
30 in1 = (v16u8) __msa_ilvr_b((v16i8) in1, (v16i8) in1); \
31 in2 = (v16u8) __msa_ilvr_b((v16i8) in2, (v16i8) in2); \
32 vec0_m = (v8i16) __msa_ilvr_b((v16i8) in0, (v16i8) in0); \
33 reg0_m = (v4i32) __msa_ilvr_h(zero_m, vec0_m); \
34 reg1_m = (v4i32) __msa_ilvl_h(zero_m, vec0_m); \
35 reg0_m *= vec_yg; \
36 reg1_m *= vec_yg; \
37 reg0_m = __msa_srai_w(reg0_m, 16); \
38 reg1_m = __msa_srai_w(reg1_m, 16); \
39 reg4_m = reg0_m + br; \
40 reg5_m = reg1_m + br; \
41 reg2_m = reg0_m + bg; \
42 reg3_m = reg1_m + bg; \
43 reg0_m += bb; \
44 reg1_m += bb; \
45 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in1); \
46 reg6_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \
47 reg7_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \
48 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in2); \
49 reg8_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \
50 reg9_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \
51 reg0_m -= reg6_m * ub; \
52 reg1_m -= reg7_m * ub; \
53 reg2_m -= reg6_m * ug; \
54 reg3_m -= reg7_m * ug; \
55 reg4_m -= reg8_m * vr; \
56 reg5_m -= reg9_m * vr; \
57 reg2_m -= reg8_m * vg; \
58 reg3_m -= reg9_m * vg; \
59 reg0_m = __msa_srai_w(reg0_m, 6); \
60 reg1_m = __msa_srai_w(reg1_m, 6); \
61 reg2_m = __msa_srai_w(reg2_m, 6); \
62 reg3_m = __msa_srai_w(reg3_m, 6); \
63 reg4_m = __msa_srai_w(reg4_m, 6); \
64 reg5_m = __msa_srai_w(reg5_m, 6); \
65 reg0_m = __msa_maxi_s_w(reg0_m, 0); \
66 reg1_m = __msa_maxi_s_w(reg1_m, 0); \
67 reg2_m = __msa_maxi_s_w(reg2_m, 0); \
68 reg3_m = __msa_maxi_s_w(reg3_m, 0); \
69 reg4_m = __msa_maxi_s_w(reg4_m, 0); \
70 reg5_m = __msa_maxi_s_w(reg5_m, 0); \
71 reg0_m = __msa_min_s_w(reg0_m, max_val_m); \
72 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \
73 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \
74 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \
75 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \
76 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \
77 out0 = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \
78 out1 = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \
79 out2 = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \
80 }
81
22 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { 82 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
23 int x; 83 int x;
24 v16u8 src0, src1, src2, src3; 84 v16u8 src0, src1, src2, src3;
25 v16u8 dst0, dst1, dst2, dst3; 85 v16u8 dst0, dst1, dst2, dst3;
26 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; 86 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
27 src += width - 64; 87 src += width - 64;
28 88
29 for (x = 0; x < width; x += 64) { 89 for (x = 0; x < width; x += 64) {
30 LD_UB4(src, 16, src3, src2, src1, src0); 90 LD_UB4(src, 16, src3, src2, src1, src0);
31 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); 91 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2);
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
94 ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); 154 ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1);
95 ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); 155 ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3);
96 ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); 156 ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16);
97 src_u += 16; 157 src_u += 16;
98 src_v += 16; 158 src_v += 16;
99 src_y += 32; 159 src_y += 32;
100 dst_uyvy += 64; 160 dst_uyvy += 64;
101 } 161 }
102 } 162 }
103 163
164 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
165 const uint8* src_v, uint8* rgb_buf,
166 const struct YuvConstants* yuvconstants, int width) {
167 int x;
168 int32 data_u, data_v;
169 int64 data_y;
170 v16u8 src0, src1, src2, dst0, dst1;
171 v8i16 vec0, vec1, vec2;
172 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
173 v16u8 const_255 = (v16u8) __msa_ldi_b(255);
174 v4i32 zero = { 0 };
175
176 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
177 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
178 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
179 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
180 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
181 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
182 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
183 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
184
185 for (x = 0; x < width; x += 8) {
186 data_y = LD(src_y);
fbarchard1 2016/10/24 17:36:32 Consider a macro to read I422, as there are many Y
187 data_u = LW(src_u);
188 data_v = LW(src_v);
189 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y);
190 src1 = (v16u8) __msa_insert_w(zero, 0, data_u);
191 src2 = (v16u8) __msa_insert_w(zero, 0, data_v);
fbarchard1 2016/10/24 17:36:32 No way to load 4 or 8 bytes directly into an MSA r
192 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
193 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
194 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0);
195 vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2);
196 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0);
197 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0);
198 ST_UB2(dst0, dst1, rgb_buf, 16);
199 src_y += 8;
200 src_u += 4;
201 src_v += 4;
202 rgb_buf += 32;
203 }
204 }
205
206 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u,
207 const uint8* src_v, uint8* rgb_buf,
208 const struct YuvConstants* yuvconstants, int width) {
209 int x;
210 int64 data_y;
211 int32 data_u, data_v;
212 v16u8 src0, src1, src2, dst0, dst1;
213 v8i16 vec0, vec1, vec2;
214 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
215 v16u8 const_255 = (v16u8) __msa_ldi_b(255);
216 v4i32 zero = { 0 };
217
218 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
219 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
220 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
221 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
222 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
223 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
224 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
225 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
226
227 for (x = 0; x < width; x += 8) {
228 data_y = LD(src_y);
229 data_u = LW(src_u);
230 data_v = LW(src_v);
231 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y);
232 src1 = (v16u8) __msa_insert_w(zero, 0, data_u);
233 src2 = (v16u8) __msa_insert_w(zero, 0, data_v);
234 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
235 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
236 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255);
237 vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1);
238 dst0 = (v16u8) __msa_ilvr_h(vec1, vec0);
239 dst1 = (v16u8) __msa_ilvl_h(vec1, vec0);
240 ST_UB2(dst0, dst1, rgb_buf, 16);
241 src_y += 8;
242 src_u += 4;
243 src_v += 4;
244 rgb_buf += 32;
245 }
246 }
247
104 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { 248 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
105 int x; 249 int x;
106 v16u8 src0, src1, src2, src3, dst0, dst1; 250 v16u8 src0, src1, src2, src3, dst0, dst1;
107 251
108 for (x = 0; x < width; x += 32) { 252 for (x = 0; x < width; x += 32) {
109 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); 253 LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
110 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); 254 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
111 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); 255 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
112 ST_UB2(dst0, dst1, dst_y, 16); 256 ST_UB2(dst0, dst1, dst_y, 16);
113 src_yuy2 += 64; 257 src_yuy2 += 64;
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after
409 dst_argb += 64; 553 dst_argb += 64;
410 } 554 }
411 } 555 }
412 556
413 #ifdef __cplusplus 557 #ifdef __cplusplus
414 } // extern "C" 558 } // extern "C"
415 } // namespace libyuv 559 } // namespace libyuv
416 #endif 560 #endif
417 561
418 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 562 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
OLDNEW
« include/libyuv/macros_msa.h ('K') | « source/row_any.cc ('k') | source/scale_argb.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698