Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
| 12 | 12 |
| 13 // This module is for GCC MSA | 13 // This module is for GCC MSA |
| 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| 15 #include "libyuv/macros_msa.h" | 15 #include "libyuv/macros_msa.h" |
| 16 | 16 |
| 17 #ifdef __cplusplus | 17 #ifdef __cplusplus |
| 18 namespace libyuv { | 18 namespace libyuv { |
| 19 extern "C" { | 19 extern "C" { |
| 20 #endif | 20 #endif |
| 21 | 21 |
| 22 #define I422TORGB(in0, in1, in2, ub, vr, ug, vg, \ | |
|
fbarchard1
2016/10/24 17:36:32
Could use a brief comment to describe what this ma
| |
| 23 bb, bg, br, yg, out0, out1, out2) { \ | |
| 24 v8i16 vec0_m; \ | |
| 25 v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ | |
| 26 v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \ | |
| 27 v4i32 max_val_m = __msa_ldi_w(255); \ | |
| 28 v8i16 zero_m = { 0 }; \ | |
| 29 \ | |
| 30 in1 = (v16u8) __msa_ilvr_b((v16i8) in1, (v16i8) in1); \ | |
| 31 in2 = (v16u8) __msa_ilvr_b((v16i8) in2, (v16i8) in2); \ | |
| 32 vec0_m = (v8i16) __msa_ilvr_b((v16i8) in0, (v16i8) in0); \ | |
| 33 reg0_m = (v4i32) __msa_ilvr_h(zero_m, vec0_m); \ | |
| 34 reg1_m = (v4i32) __msa_ilvl_h(zero_m, vec0_m); \ | |
| 35 reg0_m *= vec_yg; \ | |
| 36 reg1_m *= vec_yg; \ | |
| 37 reg0_m = __msa_srai_w(reg0_m, 16); \ | |
| 38 reg1_m = __msa_srai_w(reg1_m, 16); \ | |
| 39 reg4_m = reg0_m + br; \ | |
| 40 reg5_m = reg1_m + br; \ | |
| 41 reg2_m = reg0_m + bg; \ | |
| 42 reg3_m = reg1_m + bg; \ | |
| 43 reg0_m += bb; \ | |
| 44 reg1_m += bb; \ | |
| 45 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in1); \ | |
| 46 reg6_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ | |
| 47 reg7_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ | |
| 48 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in2); \ | |
| 49 reg8_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ | |
| 50 reg9_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ | |
| 51 reg0_m -= reg6_m * ub; \ | |
| 52 reg1_m -= reg7_m * ub; \ | |
| 53 reg2_m -= reg6_m * ug; \ | |
| 54 reg3_m -= reg7_m * ug; \ | |
| 55 reg4_m -= reg8_m * vr; \ | |
| 56 reg5_m -= reg9_m * vr; \ | |
| 57 reg2_m -= reg8_m * vg; \ | |
| 58 reg3_m -= reg9_m * vg; \ | |
| 59 reg0_m = __msa_srai_w(reg0_m, 6); \ | |
| 60 reg1_m = __msa_srai_w(reg1_m, 6); \ | |
| 61 reg2_m = __msa_srai_w(reg2_m, 6); \ | |
| 62 reg3_m = __msa_srai_w(reg3_m, 6); \ | |
| 63 reg4_m = __msa_srai_w(reg4_m, 6); \ | |
| 64 reg5_m = __msa_srai_w(reg5_m, 6); \ | |
| 65 reg0_m = __msa_maxi_s_w(reg0_m, 0); \ | |
| 66 reg1_m = __msa_maxi_s_w(reg1_m, 0); \ | |
| 67 reg2_m = __msa_maxi_s_w(reg2_m, 0); \ | |
| 68 reg3_m = __msa_maxi_s_w(reg3_m, 0); \ | |
| 69 reg4_m = __msa_maxi_s_w(reg4_m, 0); \ | |
| 70 reg5_m = __msa_maxi_s_w(reg5_m, 0); \ | |
| 71 reg0_m = __msa_min_s_w(reg0_m, max_val_m); \ | |
| 72 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \ | |
| 73 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \ | |
| 74 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \ | |
| 75 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \ | |
| 76 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \ | |
| 77 out0 = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \ | |
| 78 out1 = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \ | |
| 79 out2 = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \ | |
| 80 } | |
| 81 | |
| 22 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { | 82 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
| 23 int x; | 83 int x; |
| 24 v16u8 src0, src1, src2, src3; | 84 v16u8 src0, src1, src2, src3; |
| 25 v16u8 dst0, dst1, dst2, dst3; | 85 v16u8 dst0, dst1, dst2, dst3; |
| 26 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; | 86 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; |
| 27 src += width - 64; | 87 src += width - 64; |
| 28 | 88 |
| 29 for (x = 0; x < width; x += 64) { | 89 for (x = 0; x < width; x += 64) { |
| 30 LD_UB4(src, 16, src3, src2, src1, src0); | 90 LD_UB4(src, 16, src3, src2, src1, src0); |
| 31 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); | 91 VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 94 ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); | 154 ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); |
| 95 ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); | 155 ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); |
| 96 ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); | 156 ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); |
| 97 src_u += 16; | 157 src_u += 16; |
| 98 src_v += 16; | 158 src_v += 16; |
| 99 src_y += 32; | 159 src_y += 32; |
| 100 dst_uyvy += 64; | 160 dst_uyvy += 64; |
| 101 } | 161 } |
| 102 } | 162 } |
| 103 | 163 |
| 164 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, | |
| 165 const uint8* src_v, uint8* rgb_buf, | |
| 166 const struct YuvConstants* yuvconstants, int width) { | |
| 167 int x; | |
| 168 int32 data_u, data_v; | |
| 169 int64 data_y; | |
| 170 v16u8 src0, src1, src2, dst0, dst1; | |
| 171 v8i16 vec0, vec1, vec2; | |
| 172 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | |
| 173 v16u8 const_255 = (v16u8) __msa_ldi_b(255); | |
| 174 v4i32 zero = { 0 }; | |
| 175 | |
| 176 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | |
| 177 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | |
| 178 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 179 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 180 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 181 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 182 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 183 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 184 | |
| 185 for (x = 0; x < width; x += 8) { | |
| 186 data_y = LD(src_y); | |
|
fbarchard1
2016/10/24 17:36:32
Consider a macro to read I422, as there are many Y
| |
| 187 data_u = LW(src_u); | |
| 188 data_v = LW(src_v); | |
| 189 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y); | |
| 190 src1 = (v16u8) __msa_insert_w(zero, 0, data_u); | |
| 191 src2 = (v16u8) __msa_insert_w(zero, 0, data_v); | |
|
fbarchard1
2016/10/24 17:36:32
No way to load 4 or 8 bytes directly into an MSA r
| |
| 192 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | |
| 193 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | |
| 194 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); | |
| 195 vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2); | |
| 196 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); | |
| 197 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); | |
| 198 ST_UB2(dst0, dst1, rgb_buf, 16); | |
| 199 src_y += 8; | |
| 200 src_u += 4; | |
| 201 src_v += 4; | |
| 202 rgb_buf += 32; | |
| 203 } | |
| 204 } | |
| 205 | |
| 206 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u, | |
| 207 const uint8* src_v, uint8* rgb_buf, | |
| 208 const struct YuvConstants* yuvconstants, int width) { | |
| 209 int x; | |
| 210 int64 data_y; | |
| 211 int32 data_u, data_v; | |
| 212 v16u8 src0, src1, src2, dst0, dst1; | |
| 213 v8i16 vec0, vec1, vec2; | |
| 214 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | |
| 215 v16u8 const_255 = (v16u8) __msa_ldi_b(255); | |
| 216 v4i32 zero = { 0 }; | |
| 217 | |
| 218 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | |
| 219 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | |
| 220 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 221 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 222 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 223 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 224 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 225 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 226 | |
| 227 for (x = 0; x < width; x += 8) { | |
| 228 data_y = LD(src_y); | |
| 229 data_u = LW(src_u); | |
| 230 data_v = LW(src_v); | |
| 231 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y); | |
| 232 src1 = (v16u8) __msa_insert_w(zero, 0, data_u); | |
| 233 src2 = (v16u8) __msa_insert_w(zero, 0, data_v); | |
| 234 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | |
| 235 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | |
| 236 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255); | |
| 237 vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1); | |
| 238 dst0 = (v16u8) __msa_ilvr_h(vec1, vec0); | |
| 239 dst1 = (v16u8) __msa_ilvl_h(vec1, vec0); | |
| 240 ST_UB2(dst0, dst1, rgb_buf, 16); | |
| 241 src_y += 8; | |
| 242 src_u += 4; | |
| 243 src_v += 4; | |
| 244 rgb_buf += 32; | |
| 245 } | |
| 246 } | |
| 247 | |
| 104 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { | 248 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { |
| 105 int x; | 249 int x; |
| 106 v16u8 src0, src1, src2, src3, dst0, dst1; | 250 v16u8 src0, src1, src2, src3, dst0, dst1; |
| 107 | 251 |
| 108 for (x = 0; x < width; x += 32) { | 252 for (x = 0; x < width; x += 32) { |
| 109 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); | 253 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); |
| 110 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | 254 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); |
| 111 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | 255 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); |
| 112 ST_UB2(dst0, dst1, dst_y, 16); | 256 ST_UB2(dst0, dst1, dst_y, 16); |
| 113 src_yuy2 += 64; | 257 src_yuy2 += 64; |
| (...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 409 dst_argb += 64; | 553 dst_argb += 64; |
| 410 } | 554 } |
| 411 } | 555 } |
| 412 | 556 |
| 413 #ifdef __cplusplus | 557 #ifdef __cplusplus |
| 414 } // extern "C" | 558 } // extern "C" |
| 415 } // namespace libyuv | 559 } // namespace libyuv |
| 416 #endif | 560 #endif |
| 417 | 561 |
| 418 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 562 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| OLD | NEW |