Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(75)

Side by Side Diff: source/row_msa.cc

Issue 2454433003: Add MSA optimized I422AlphaToARGBRow_MSA and I422ToRGB24Row_MSA functions (Closed)
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« source/convert_from.cc ('K') | « source/row_any.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "libyuv/row.h" 11 #include "libyuv/row.h"
12 12
13 // This module is for GCC MSA 13 // This module is for GCC MSA
14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
15 #include "libyuv/macros_msa.h" 15 #include "libyuv/macros_msa.h"
16 16
17 #ifdef __cplusplus 17 #ifdef __cplusplus
18 namespace libyuv { 18 namespace libyuv {
19 extern "C" { 19 extern "C" {
20 #endif 20 #endif
21 21
22 // Load YUV 422 pixel data
23 #define LOAD_I422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) { \
fbarchard1 2016/10/26 17:56:31 suggest READYUV422 as name, for consistency with r
24 uint64 y_m; \
25 uint32 u_m, v_m; \
26 v4i32 zero_m = { 0 }; \
27 y_m = LD(psrc_y); \
28 u_m = LW(psrc_u); \
29 v_m = LW(psrc_v); \
30 out_y = (v16u8) __msa_insert_d((v2i64) zero_m, 0, (int64) y_m); \
31 out_u = (v16u8) __msa_insert_w(zero_m, 0, (int32) u_m); \
32 out_v = (v16u8) __msa_insert_w(zero_m, 0, (int32) v_m); \
33 }
34
22 // Convert 8 pixels of YUV 420 to RGB. 35 // Convert 8 pixels of YUV 420 to RGB.
23 #define I422TORGB(in0, in1, in2, ub, vr, ug, vg, \ 36 #define I422TORGB(in_y, in_u, in_v, \
24 bb, bg, br, yg, out0, out1, out2) { \ 37 ub, vr, ug, vg, bb, bg, br, yg, \
25 v8i16 vec0_m; \ 38 out_b, out_g, out_r) { \
26 v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ 39 v8i16 vec0_m; \
27 v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \ 40 v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \
28 v4i32 max_val_m = __msa_ldi_w(255); \ 41 v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \
29 v8i16 zero_m = { 0 }; \ 42 v4i32 max_val_m = __msa_ldi_w(255); \
30 \ 43 v8i16 zero_m = { 0 }; \
31 in1 = (v16u8) __msa_ilvr_b((v16i8) in1, (v16i8) in1); \ 44 \
32 in2 = (v16u8) __msa_ilvr_b((v16i8) in2, (v16i8) in2); \ 45 in_u = (v16u8) __msa_ilvr_b((v16i8) in_u, (v16i8) in_u); \
33 vec0_m = (v8i16) __msa_ilvr_b((v16i8) in0, (v16i8) in0); \ 46 in_v = (v16u8) __msa_ilvr_b((v16i8) in_v, (v16i8) in_v); \
34 reg0_m = (v4i32) __msa_ilvr_h(zero_m, vec0_m); \ 47 vec0_m = (v8i16) __msa_ilvr_b((v16i8) in_y, (v16i8) in_y); \
35 reg1_m = (v4i32) __msa_ilvl_h(zero_m, vec0_m); \ 48 reg0_m = (v4i32) __msa_ilvr_h(zero_m, vec0_m); \
36 reg0_m *= vec_yg; \ 49 reg1_m = (v4i32) __msa_ilvl_h(zero_m, vec0_m); \
37 reg1_m *= vec_yg; \ 50 reg0_m *= vec_yg; \
38 reg0_m = __msa_srai_w(reg0_m, 16); \ 51 reg1_m *= vec_yg; \
39 reg1_m = __msa_srai_w(reg1_m, 16); \ 52 reg0_m = __msa_srai_w(reg0_m, 16); \
40 reg4_m = reg0_m + br; \ 53 reg1_m = __msa_srai_w(reg1_m, 16); \
41 reg5_m = reg1_m + br; \ 54 reg4_m = reg0_m + br; \
42 reg2_m = reg0_m + bg; \ 55 reg5_m = reg1_m + br; \
43 reg3_m = reg1_m + bg; \ 56 reg2_m = reg0_m + bg; \
44 reg0_m += bb; \ 57 reg3_m = reg1_m + bg; \
45 reg1_m += bb; \ 58 reg0_m += bb; \
46 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in1); \ 59 reg1_m += bb; \
47 reg6_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ 60 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in_u); \
48 reg7_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ 61 reg6_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \
49 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in2); \ 62 reg7_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \
50 reg8_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ 63 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in_v); \
51 reg9_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ 64 reg8_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \
52 reg0_m -= reg6_m * ub; \ 65 reg9_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \
53 reg1_m -= reg7_m * ub; \ 66 reg0_m -= reg6_m * ub; \
54 reg2_m -= reg6_m * ug; \ 67 reg1_m -= reg7_m * ub; \
55 reg3_m -= reg7_m * ug; \ 68 reg2_m -= reg6_m * ug; \
56 reg4_m -= reg8_m * vr; \ 69 reg3_m -= reg7_m * ug; \
57 reg5_m -= reg9_m * vr; \ 70 reg4_m -= reg8_m * vr; \
58 reg2_m -= reg8_m * vg; \ 71 reg5_m -= reg9_m * vr; \
59 reg3_m -= reg9_m * vg; \ 72 reg2_m -= reg8_m * vg; \
60 reg0_m = __msa_srai_w(reg0_m, 6); \ 73 reg3_m -= reg9_m * vg; \
61 reg1_m = __msa_srai_w(reg1_m, 6); \ 74 reg0_m = __msa_srai_w(reg0_m, 6); \
62 reg2_m = __msa_srai_w(reg2_m, 6); \ 75 reg1_m = __msa_srai_w(reg1_m, 6); \
63 reg3_m = __msa_srai_w(reg3_m, 6); \ 76 reg2_m = __msa_srai_w(reg2_m, 6); \
64 reg4_m = __msa_srai_w(reg4_m, 6); \ 77 reg3_m = __msa_srai_w(reg3_m, 6); \
65 reg5_m = __msa_srai_w(reg5_m, 6); \ 78 reg4_m = __msa_srai_w(reg4_m, 6); \
66 reg0_m = __msa_maxi_s_w(reg0_m, 0); \ 79 reg5_m = __msa_srai_w(reg5_m, 6); \
67 reg1_m = __msa_maxi_s_w(reg1_m, 0); \ 80 reg0_m = __msa_maxi_s_w(reg0_m, 0); \
68 reg2_m = __msa_maxi_s_w(reg2_m, 0); \ 81 reg1_m = __msa_maxi_s_w(reg1_m, 0); \
69 reg3_m = __msa_maxi_s_w(reg3_m, 0); \ 82 reg2_m = __msa_maxi_s_w(reg2_m, 0); \
70 reg4_m = __msa_maxi_s_w(reg4_m, 0); \ 83 reg3_m = __msa_maxi_s_w(reg3_m, 0); \
71 reg5_m = __msa_maxi_s_w(reg5_m, 0); \ 84 reg4_m = __msa_maxi_s_w(reg4_m, 0); \
72 reg0_m = __msa_min_s_w(reg0_m, max_val_m); \ 85 reg5_m = __msa_maxi_s_w(reg5_m, 0); \
73 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \ 86 reg0_m = __msa_min_s_w(reg0_m, max_val_m); \
74 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \ 87 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \
75 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \ 88 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \
76 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \ 89 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \
77 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \ 90 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \
78 out0 = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \ 91 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \
79 out1 = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \ 92 out_b = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \
80 out2 = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \ 93 out_g = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \
94 out_r = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \
81 } 95 }
82 96
83 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { 97 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
84 int x; 98 int x;
85 v16u8 src0, src1, src2, src3; 99 v16u8 src0, src1, src2, src3;
86 v16u8 dst0, dst1, dst2, dst3; 100 v16u8 dst0, dst1, dst2, dst3;
87 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; 101 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
88 src += width - 64; 102 src += width - 64;
89 103
90 for (x = 0; x < width; x += 64) { 104 for (x = 0; x < width; x += 64) {
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
159 src_v += 16; 173 src_v += 16;
160 src_y += 32; 174 src_y += 32;
161 dst_uyvy += 64; 175 dst_uyvy += 64;
162 } 176 }
163 } 177 }
164 178
165 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, 179 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
166 const uint8* src_v, uint8* rgb_buf, 180 const uint8* src_v, uint8* rgb_buf,
167 const struct YuvConstants* yuvconstants, int width) { 181 const struct YuvConstants* yuvconstants, int width) {
168 int x; 182 int x;
169 int32 data_u, data_v;
170 int64 data_y;
171 v16u8 src0, src1, src2, dst0, dst1; 183 v16u8 src0, src1, src2, dst0, dst1;
172 v8i16 vec0, vec1, vec2; 184 v8i16 vec0, vec1, vec2;
173 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 185 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
174 v16u8 const_255 = (v16u8) __msa_ldi_b(255); 186 v16u8 const_255 = (v16u8) __msa_ldi_b(255);
175 v4i32 zero = { 0 };
176 187
177 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); 188 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
178 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); 189 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
179 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); 190 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
180 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); 191 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
181 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); 192 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
182 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); 193 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
183 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); 194 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
184 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); 195 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
185 196
186 for (x = 0; x < width; x += 8) { 197 for (x = 0; x < width; x += 8) {
187 data_y = LD(src_y); 198 LOAD_I422(src_y, src_u, src_v, src0, src1, src2);
188 data_u = LW(src_u);
189 data_v = LW(src_v);
190 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y);
191 src1 = (v16u8) __msa_insert_w(zero, 0, data_u);
192 src2 = (v16u8) __msa_insert_w(zero, 0, data_v);
193 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, 199 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
194 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); 200 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
195 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); 201 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0);
196 vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2); 202 vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2);
197 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); 203 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0);
198 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); 204 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0);
199 ST_UB2(dst0, dst1, rgb_buf, 16); 205 ST_UB2(dst0, dst1, rgb_buf, 16);
200 src_y += 8; 206 src_y += 8;
201 src_u += 4; 207 src_u += 4;
202 src_v += 4; 208 src_v += 4;
203 rgb_buf += 32; 209 rgb_buf += 32;
204 } 210 }
205 } 211 }
206 212
207 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u, 213 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u,
208 const uint8* src_v, uint8* rgb_buf, 214 const uint8* src_v, uint8* rgb_buf,
209 const struct YuvConstants* yuvconstants, int width) { 215 const struct YuvConstants* yuvconstants, int width) {
210 int x; 216 int x;
211 int64 data_y;
212 int32 data_u, data_v;
213 v16u8 src0, src1, src2, dst0, dst1; 217 v16u8 src0, src1, src2, dst0, dst1;
214 v8i16 vec0, vec1, vec2; 218 v8i16 vec0, vec1, vec2;
215 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; 219 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
216 v16u8 const_255 = (v16u8) __msa_ldi_b(255); 220 v16u8 const_255 = (v16u8) __msa_ldi_b(255);
217 v4i32 zero = { 0 };
218 221
219 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); 222 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
220 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); 223 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
221 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); 224 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
222 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); 225 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
223 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); 226 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
224 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); 227 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
225 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); 228 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
226 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); 229 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
227 230
228 for (x = 0; x < width; x += 8) { 231 for (x = 0; x < width; x += 8) {
229 data_y = LD(src_y); 232 LOAD_I422(src_y, src_u, src_v, src0, src1, src2);
230 data_u = LW(src_u);
231 data_v = LW(src_v);
232 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y);
233 src1 = (v16u8) __msa_insert_w(zero, 0, data_u);
234 src2 = (v16u8) __msa_insert_w(zero, 0, data_v);
235 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, 233 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
236 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); 234 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
237 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255); 235 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255);
238 vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1); 236 vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1);
239 dst0 = (v16u8) __msa_ilvr_h(vec1, vec0); 237 dst0 = (v16u8) __msa_ilvr_h(vec1, vec0);
240 dst1 = (v16u8) __msa_ilvl_h(vec1, vec0); 238 dst1 = (v16u8) __msa_ilvl_h(vec1, vec0);
241 ST_UB2(dst0, dst1, rgb_buf, 16); 239 ST_UB2(dst0, dst1, rgb_buf, 16);
242 src_y += 8; 240 src_y += 8;
243 src_u += 4; 241 src_u += 4;
244 src_v += 4; 242 src_v += 4;
245 rgb_buf += 32; 243 rgb_buf += 32;
246 } 244 }
247 } 245 }
248 246
247 void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
248 const uint8* src_v, const uint8* src_a,
249 uint8* rgb_buf,
250 const struct YuvConstants* yuvconstants,
251 int width) {
252 int x;
253 int64 data_a;
254 v16u8 src0, src1, src2, src3, dst0, dst1;
255 v8i16 vec0, vec1, vec2;
256 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
257 v4i32 zero = { 0 };
258
259 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
260 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
261 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
262 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
263 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
264 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
265 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
266 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
267
268 for (x = 0; x < width; x += 8) {
269 data_a = LD(src_a);
270 LOAD_I422(src_y, src_u, src_v, src0, src1, src2);
271 src3 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_a);
272 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
273 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
274 src3 = (v16u8) __msa_ilvr_b((v16i8) src3, (v16i8) src3);
fbarchard1 2016/10/26 17:56:31 consider a macro for STOREARGB since it will come
275 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0);
276 vec1 = (v8i16) __msa_ilvev_b((v16i8) src3, (v16i8) vec2);
277 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0);
278 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0);
279 ST_UB2(dst0, dst1, rgb_buf, 16);
280 src_y += 8;
281 src_u += 4;
282 src_v += 4;
283 src_a += 8;
284 rgb_buf += 32;
285 }
286 }
287
288 void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u,
289 const uint8* src_v, uint8* rgb_buf,
290 const struct YuvConstants* yuvconstants, int32 width) {
291 int x;
292 int64 data_u, data_v;
293 v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2;
294 v8i16 vec0, vec1, vec2, vec3, vec4, vec5;
295 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
296 v16u8 reg0, reg1, reg2, reg3;
297 v2i64 zero = { 0 };
298 v16i8 shuffler0 = { 0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10 };
299 v16i8 shuffler1 = { 0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10 };
300 v16i8 shuffler2 =
301 { 26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31 };
fbarchard1 2016/10/26 17:56:31 indent should be 4 from line above.
302
303 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
304 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
305 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
306 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
307 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
308 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
309 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
310 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
311
312 for (x = 0; x < width; x += 16) {
313 src0 = (v16u8) __msa_ld_b((v16u8*) src_y, 0);
314 data_u = LD(src_u);
315 data_v = LD(src_v);
316 src1 = (v16u8) __msa_insert_d(zero, 0, data_u);
317 src2 = (v16u8) __msa_insert_d(zero, 0, data_v);
318 src3 = (v16u8) __msa_sldi_b((v16i8) src0, (v16i8) src0, 8);
319 src4 = (v16u8) __msa_sldi_b((v16i8) src1, (v16i8) src1, 4);
320 src5 = (v16u8) __msa_sldi_b((v16i8) src2, (v16i8) src2, 4);
321 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
322 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
323 I422TORGB(src3, src4, src5, vec_ub, vec_vr, vec_ug, vec_vg,
324 vec_bb, vec_bg, vec_br, vec_yg, vec3, vec4, vec5);
325 reg0 = (v16u8) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0);
326 reg2 = (v16u8) __msa_ilvev_b((v16i8) vec4, (v16i8) vec3);
327 reg3 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec2);
328 reg1 = (v16u8) __msa_sldi_b((v16i8) reg2, (v16i8) reg0, 11);
329 dst0 = (v16u8) __msa_vshf_b(shuffler0, (v16i8) reg3, (v16i8) reg0);
330 dst1 = (v16u8) __msa_vshf_b(shuffler1, (v16i8) reg3, (v16i8) reg1);
331 dst2 = (v16u8) __msa_vshf_b(shuffler2, (v16i8) reg3, (v16i8) reg2);
332 ST_UB2(dst0, dst1, rgb_buf, 16);
333 ST_UB(dst2, (rgb_buf + 32));
334 src_y += 16;
335 src_u += 8;
336 src_v += 8;
337 rgb_buf += 48;
338 }
339 }
340
249 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { 341 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
250 int x; 342 int x;
251 v16u8 src0, src1, src2, src3, dst0, dst1; 343 v16u8 src0, src1, src2, src3, dst0, dst1;
252 344
253 for (x = 0; x < width; x += 32) { 345 for (x = 0; x < width; x += 32) {
254 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); 346 LD_UB4(src_yuy2, 16, src0, src1, src2, src3);
255 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); 347 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0);
256 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); 348 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2);
257 ST_UB2(dst0, dst1, dst_y, 16); 349 ST_UB2(dst0, dst1, dst_y, 16);
258 src_yuy2 += 64; 350 src_yuy2 += 64;
(...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after
554 dst_argb += 64; 646 dst_argb += 64;
555 } 647 }
556 } 648 }
557 649
558 #ifdef __cplusplus 650 #ifdef __cplusplus
559 } // extern "C" 651 } // extern "C"
560 } // namespace libyuv 652 } // namespace libyuv
561 #endif 653 #endif
562 654
563 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 655 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
OLDNEW
« source/convert_from.cc ('K') | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698