Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
| 12 | 12 |
| 13 // This module is for GCC MSA | 13 // This module is for GCC MSA |
| 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 14 #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| 15 #include "libyuv/macros_msa.h" | 15 #include "libyuv/macros_msa.h" |
| 16 | 16 |
| 17 #ifdef __cplusplus | 17 #ifdef __cplusplus |
| 18 namespace libyuv { | 18 namespace libyuv { |
| 19 extern "C" { | 19 extern "C" { |
| 20 #endif | 20 #endif |
| 21 | 21 |
| 22 // Load YUV 422 pixel data | |
| 23 #define LOAD_I422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) { \ | |
|
fbarchard1
2016/10/26 17:56:31
suggest READYUV422 as name, for consistency with r
| |
| 24 uint64 y_m; \ | |
| 25 uint32 u_m, v_m; \ | |
| 26 v4i32 zero_m = { 0 }; \ | |
| 27 y_m = LD(psrc_y); \ | |
| 28 u_m = LW(psrc_u); \ | |
| 29 v_m = LW(psrc_v); \ | |
| 30 out_y = (v16u8) __msa_insert_d((v2i64) zero_m, 0, (int64) y_m); \ | |
| 31 out_u = (v16u8) __msa_insert_w(zero_m, 0, (int32) u_m); \ | |
| 32 out_v = (v16u8) __msa_insert_w(zero_m, 0, (int32) v_m); \ | |
| 33 } | |
| 34 | |
| 22 // Convert 8 pixels of YUV 420 to RGB. | 35 // Convert 8 pixels of YUV 420 to RGB. |
| 23 #define I422TORGB(in0, in1, in2, ub, vr, ug, vg, \ | 36 #define I422TORGB(in_y, in_u, in_v, \ |
| 24 bb, bg, br, yg, out0, out1, out2) { \ | 37 ub, vr, ug, vg, bb, bg, br, yg, \ |
| 25 v8i16 vec0_m; \ | 38 out_b, out_g, out_r) { \ |
| 26 v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ | 39 v8i16 vec0_m; \ |
| 27 v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \ | 40 v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ |
| 28 v4i32 max_val_m = __msa_ldi_w(255); \ | 41 v4i32 reg5_m, reg6_m, reg7_m, reg8_m, reg9_m; \ |
| 29 v8i16 zero_m = { 0 }; \ | 42 v4i32 max_val_m = __msa_ldi_w(255); \ |
| 30 \ | 43 v8i16 zero_m = { 0 }; \ |
| 31 in1 = (v16u8) __msa_ilvr_b((v16i8) in1, (v16i8) in1); \ | 44 \ |
| 32 in2 = (v16u8) __msa_ilvr_b((v16i8) in2, (v16i8) in2); \ | 45 in_u = (v16u8) __msa_ilvr_b((v16i8) in_u, (v16i8) in_u); \ |
| 33 vec0_m = (v8i16) __msa_ilvr_b((v16i8) in0, (v16i8) in0); \ | 46 in_v = (v16u8) __msa_ilvr_b((v16i8) in_v, (v16i8) in_v); \ |
| 34 reg0_m = (v4i32) __msa_ilvr_h(zero_m, vec0_m); \ | 47 vec0_m = (v8i16) __msa_ilvr_b((v16i8) in_y, (v16i8) in_y); \ |
| 35 reg1_m = (v4i32) __msa_ilvl_h(zero_m, vec0_m); \ | 48 reg0_m = (v4i32) __msa_ilvr_h(zero_m, vec0_m); \ |
| 36 reg0_m *= vec_yg; \ | 49 reg1_m = (v4i32) __msa_ilvl_h(zero_m, vec0_m); \ |
| 37 reg1_m *= vec_yg; \ | 50 reg0_m *= vec_yg; \ |
| 38 reg0_m = __msa_srai_w(reg0_m, 16); \ | 51 reg1_m *= vec_yg; \ |
| 39 reg1_m = __msa_srai_w(reg1_m, 16); \ | 52 reg0_m = __msa_srai_w(reg0_m, 16); \ |
| 40 reg4_m = reg0_m + br; \ | 53 reg1_m = __msa_srai_w(reg1_m, 16); \ |
| 41 reg5_m = reg1_m + br; \ | 54 reg4_m = reg0_m + br; \ |
| 42 reg2_m = reg0_m + bg; \ | 55 reg5_m = reg1_m + br; \ |
| 43 reg3_m = reg1_m + bg; \ | 56 reg2_m = reg0_m + bg; \ |
| 44 reg0_m += bb; \ | 57 reg3_m = reg1_m + bg; \ |
| 45 reg1_m += bb; \ | 58 reg0_m += bb; \ |
| 46 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in1); \ | 59 reg1_m += bb; \ |
| 47 reg6_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ | 60 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in_u); \ |
| 48 reg7_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ | 61 reg6_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ |
| 49 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in2); \ | 62 reg7_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ |
| 50 reg8_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ | 63 vec0_m = (v8i16) __msa_ilvr_b((v16i8) zero_m, (v16i8) in_v); \ |
| 51 reg9_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ | 64 reg8_m = (v4i32) __msa_ilvr_h(zero_m, (v8i16) vec0_m); \ |
| 52 reg0_m -= reg6_m * ub; \ | 65 reg9_m = (v4i32) __msa_ilvl_h(zero_m, (v8i16) vec0_m); \ |
| 53 reg1_m -= reg7_m * ub; \ | 66 reg0_m -= reg6_m * ub; \ |
| 54 reg2_m -= reg6_m * ug; \ | 67 reg1_m -= reg7_m * ub; \ |
| 55 reg3_m -= reg7_m * ug; \ | 68 reg2_m -= reg6_m * ug; \ |
| 56 reg4_m -= reg8_m * vr; \ | 69 reg3_m -= reg7_m * ug; \ |
| 57 reg5_m -= reg9_m * vr; \ | 70 reg4_m -= reg8_m * vr; \ |
| 58 reg2_m -= reg8_m * vg; \ | 71 reg5_m -= reg9_m * vr; \ |
| 59 reg3_m -= reg9_m * vg; \ | 72 reg2_m -= reg8_m * vg; \ |
| 60 reg0_m = __msa_srai_w(reg0_m, 6); \ | 73 reg3_m -= reg9_m * vg; \ |
| 61 reg1_m = __msa_srai_w(reg1_m, 6); \ | 74 reg0_m = __msa_srai_w(reg0_m, 6); \ |
| 62 reg2_m = __msa_srai_w(reg2_m, 6); \ | 75 reg1_m = __msa_srai_w(reg1_m, 6); \ |
| 63 reg3_m = __msa_srai_w(reg3_m, 6); \ | 76 reg2_m = __msa_srai_w(reg2_m, 6); \ |
| 64 reg4_m = __msa_srai_w(reg4_m, 6); \ | 77 reg3_m = __msa_srai_w(reg3_m, 6); \ |
| 65 reg5_m = __msa_srai_w(reg5_m, 6); \ | 78 reg4_m = __msa_srai_w(reg4_m, 6); \ |
| 66 reg0_m = __msa_maxi_s_w(reg0_m, 0); \ | 79 reg5_m = __msa_srai_w(reg5_m, 6); \ |
| 67 reg1_m = __msa_maxi_s_w(reg1_m, 0); \ | 80 reg0_m = __msa_maxi_s_w(reg0_m, 0); \ |
| 68 reg2_m = __msa_maxi_s_w(reg2_m, 0); \ | 81 reg1_m = __msa_maxi_s_w(reg1_m, 0); \ |
| 69 reg3_m = __msa_maxi_s_w(reg3_m, 0); \ | 82 reg2_m = __msa_maxi_s_w(reg2_m, 0); \ |
| 70 reg4_m = __msa_maxi_s_w(reg4_m, 0); \ | 83 reg3_m = __msa_maxi_s_w(reg3_m, 0); \ |
| 71 reg5_m = __msa_maxi_s_w(reg5_m, 0); \ | 84 reg4_m = __msa_maxi_s_w(reg4_m, 0); \ |
| 72 reg0_m = __msa_min_s_w(reg0_m, max_val_m); \ | 85 reg5_m = __msa_maxi_s_w(reg5_m, 0); \ |
| 73 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \ | 86 reg0_m = __msa_min_s_w(reg0_m, max_val_m); \ |
| 74 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \ | 87 reg1_m = __msa_min_s_w(reg1_m, max_val_m); \ |
| 75 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \ | 88 reg2_m = __msa_min_s_w(reg2_m, max_val_m); \ |
| 76 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \ | 89 reg3_m = __msa_min_s_w(reg3_m, max_val_m); \ |
| 77 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \ | 90 reg4_m = __msa_min_s_w(reg4_m, max_val_m); \ |
| 78 out0 = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \ | 91 reg5_m = __msa_min_s_w(reg5_m, max_val_m); \ |
| 79 out1 = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \ | 92 out_b = __msa_pckev_h((v8i16) reg1_m, (v8i16) reg0_m); \ |
| 80 out2 = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \ | 93 out_g = __msa_pckev_h((v8i16) reg3_m, (v8i16) reg2_m); \ |
| 94 out_r = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \ | |
| 81 } | 95 } |
| 82 | 96 |
| 83 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { | 97 void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
| 84 int x; | 98 int x; |
| 85 v16u8 src0, src1, src2, src3; | 99 v16u8 src0, src1, src2, src3; |
| 86 v16u8 dst0, dst1, dst2, dst3; | 100 v16u8 dst0, dst1, dst2, dst3; |
| 87 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; | 101 v16i8 shuffler = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; |
| 88 src += width - 64; | 102 src += width - 64; |
| 89 | 103 |
| 90 for (x = 0; x < width; x += 64) { | 104 for (x = 0; x < width; x += 64) { |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 159 src_v += 16; | 173 src_v += 16; |
| 160 src_y += 32; | 174 src_y += 32; |
| 161 dst_uyvy += 64; | 175 dst_uyvy += 64; |
| 162 } | 176 } |
| 163 } | 177 } |
| 164 | 178 |
| 165 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, | 179 void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, |
| 166 const uint8* src_v, uint8* rgb_buf, | 180 const uint8* src_v, uint8* rgb_buf, |
| 167 const struct YuvConstants* yuvconstants, int width) { | 181 const struct YuvConstants* yuvconstants, int width) { |
| 168 int x; | 182 int x; |
| 169 int32 data_u, data_v; | |
| 170 int64 data_y; | |
| 171 v16u8 src0, src1, src2, dst0, dst1; | 183 v16u8 src0, src1, src2, dst0, dst1; |
| 172 v8i16 vec0, vec1, vec2; | 184 v8i16 vec0, vec1, vec2; |
| 173 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | 185 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| 174 v16u8 const_255 = (v16u8) __msa_ldi_b(255); | 186 v16u8 const_255 = (v16u8) __msa_ldi_b(255); |
| 175 v4i32 zero = { 0 }; | |
| 176 | 187 |
| 177 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | 188 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); |
| 178 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | 189 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); |
| 179 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | 190 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); |
| 180 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | 191 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); |
| 181 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | 192 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); |
| 182 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | 193 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); |
| 183 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | 194 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); |
| 184 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | 195 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); |
| 185 | 196 |
| 186 for (x = 0; x < width; x += 8) { | 197 for (x = 0; x < width; x += 8) { |
| 187 data_y = LD(src_y); | 198 LOAD_I422(src_y, src_u, src_v, src0, src1, src2); |
| 188 data_u = LW(src_u); | |
| 189 data_v = LW(src_v); | |
| 190 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y); | |
| 191 src1 = (v16u8) __msa_insert_w(zero, 0, data_u); | |
| 192 src2 = (v16u8) __msa_insert_w(zero, 0, data_v); | |
| 193 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | 199 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| 194 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | 200 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| 195 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); | 201 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); |
| 196 vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2); | 202 vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2); |
| 197 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); | 203 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); |
| 198 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); | 204 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); |
| 199 ST_UB2(dst0, dst1, rgb_buf, 16); | 205 ST_UB2(dst0, dst1, rgb_buf, 16); |
| 200 src_y += 8; | 206 src_y += 8; |
| 201 src_u += 4; | 207 src_u += 4; |
| 202 src_v += 4; | 208 src_v += 4; |
| 203 rgb_buf += 32; | 209 rgb_buf += 32; |
| 204 } | 210 } |
| 205 } | 211 } |
| 206 | 212 |
| 207 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u, | 213 void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u, |
| 208 const uint8* src_v, uint8* rgb_buf, | 214 const uint8* src_v, uint8* rgb_buf, |
| 209 const struct YuvConstants* yuvconstants, int width) { | 215 const struct YuvConstants* yuvconstants, int width) { |
| 210 int x; | 216 int x; |
| 211 int64 data_y; | |
| 212 int32 data_u, data_v; | |
| 213 v16u8 src0, src1, src2, dst0, dst1; | 217 v16u8 src0, src1, src2, dst0, dst1; |
| 214 v8i16 vec0, vec1, vec2; | 218 v8i16 vec0, vec1, vec2; |
| 215 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | 219 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| 216 v16u8 const_255 = (v16u8) __msa_ldi_b(255); | 220 v16u8 const_255 = (v16u8) __msa_ldi_b(255); |
| 217 v4i32 zero = { 0 }; | |
| 218 | 221 |
| 219 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | 222 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); |
| 220 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | 223 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); |
| 221 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | 224 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); |
| 222 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | 225 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); |
| 223 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | 226 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); |
| 224 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | 227 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); |
| 225 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | 228 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); |
| 226 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | 229 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); |
| 227 | 230 |
| 228 for (x = 0; x < width; x += 8) { | 231 for (x = 0; x < width; x += 8) { |
| 229 data_y = LD(src_y); | 232 LOAD_I422(src_y, src_u, src_v, src0, src1, src2); |
| 230 data_u = LW(src_u); | |
| 231 data_v = LW(src_v); | |
| 232 src0 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_y); | |
| 233 src1 = (v16u8) __msa_insert_w(zero, 0, data_u); | |
| 234 src2 = (v16u8) __msa_insert_w(zero, 0, data_v); | |
| 235 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | 233 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| 236 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | 234 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| 237 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255); | 235 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255); |
| 238 vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1); | 236 vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1); |
| 239 dst0 = (v16u8) __msa_ilvr_h(vec1, vec0); | 237 dst0 = (v16u8) __msa_ilvr_h(vec1, vec0); |
| 240 dst1 = (v16u8) __msa_ilvl_h(vec1, vec0); | 238 dst1 = (v16u8) __msa_ilvl_h(vec1, vec0); |
| 241 ST_UB2(dst0, dst1, rgb_buf, 16); | 239 ST_UB2(dst0, dst1, rgb_buf, 16); |
| 242 src_y += 8; | 240 src_y += 8; |
| 243 src_u += 4; | 241 src_u += 4; |
| 244 src_v += 4; | 242 src_v += 4; |
| 245 rgb_buf += 32; | 243 rgb_buf += 32; |
| 246 } | 244 } |
| 247 } | 245 } |
| 248 | 246 |
| 247 void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u, | |
| 248 const uint8* src_v, const uint8* src_a, | |
| 249 uint8* rgb_buf, | |
| 250 const struct YuvConstants* yuvconstants, | |
| 251 int width) { | |
| 252 int x; | |
| 253 int64 data_a; | |
| 254 v16u8 src0, src1, src2, src3, dst0, dst1; | |
| 255 v8i16 vec0, vec1, vec2; | |
| 256 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | |
| 257 v4i32 zero = { 0 }; | |
| 258 | |
| 259 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | |
| 260 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | |
| 261 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 262 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 263 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 264 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 265 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 266 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 267 | |
| 268 for (x = 0; x < width; x += 8) { | |
| 269 data_a = LD(src_a); | |
| 270 LOAD_I422(src_y, src_u, src_v, src0, src1, src2); | |
| 271 src3 = (v16u8) __msa_insert_d((v2i64) zero, 0, data_a); | |
| 272 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | |
| 273 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | |
| 274 src3 = (v16u8) __msa_ilvr_b((v16i8) src3, (v16i8) src3); | |
|
fbarchard1
2016/10/26 17:56:31
consider a macro for STOREARGB since it will come
| |
| 275 vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); | |
| 276 vec1 = (v8i16) __msa_ilvev_b((v16i8) src3, (v16i8) vec2); | |
| 277 dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); | |
| 278 dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); | |
| 279 ST_UB2(dst0, dst1, rgb_buf, 16); | |
| 280 src_y += 8; | |
| 281 src_u += 4; | |
| 282 src_v += 4; | |
| 283 src_a += 8; | |
| 284 rgb_buf += 32; | |
| 285 } | |
| 286 } | |
| 287 | |
| 288 void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u, | |
| 289 const uint8* src_v, uint8* rgb_buf, | |
| 290 const struct YuvConstants* yuvconstants, int32 width) { | |
| 291 int x; | |
| 292 int64 data_u, data_v; | |
| 293 v16u8 src0, src1, src2, src3, src4, src5, dst0, dst1, dst2; | |
| 294 v8i16 vec0, vec1, vec2, vec3, vec4, vec5; | |
| 295 v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; | |
| 296 v16u8 reg0, reg1, reg2, reg3; | |
| 297 v2i64 zero = { 0 }; | |
| 298 v16i8 shuffler0 = { 0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10 }; | |
| 299 v16i8 shuffler1 = { 0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10 }; | |
| 300 v16i8 shuffler2 = | |
| 301 { 26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31 }; | |
|
fbarchard1
2016/10/26 17:56:31
indent should be 4 from line above.
| |
| 302 | |
| 303 vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); | |
| 304 vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); | |
| 305 vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); | |
| 306 vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); | |
| 307 vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); | |
| 308 vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); | |
| 309 vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); | |
| 310 vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); | |
| 311 | |
| 312 for (x = 0; x < width; x += 16) { | |
| 313 src0 = (v16u8) __msa_ld_b((v16u8*) src_y, 0); | |
| 314 data_u = LD(src_u); | |
| 315 data_v = LD(src_v); | |
| 316 src1 = (v16u8) __msa_insert_d(zero, 0, data_u); | |
| 317 src2 = (v16u8) __msa_insert_d(zero, 0, data_v); | |
| 318 src3 = (v16u8) __msa_sldi_b((v16i8) src0, (v16i8) src0, 8); | |
| 319 src4 = (v16u8) __msa_sldi_b((v16i8) src1, (v16i8) src1, 4); | |
| 320 src5 = (v16u8) __msa_sldi_b((v16i8) src2, (v16i8) src2, 4); | |
| 321 I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, | |
| 322 vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); | |
| 323 I422TORGB(src3, src4, src5, vec_ub, vec_vr, vec_ug, vec_vg, | |
| 324 vec_bb, vec_bg, vec_br, vec_yg, vec3, vec4, vec5); | |
| 325 reg0 = (v16u8) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); | |
| 326 reg2 = (v16u8) __msa_ilvev_b((v16i8) vec4, (v16i8) vec3); | |
| 327 reg3 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec2); | |
| 328 reg1 = (v16u8) __msa_sldi_b((v16i8) reg2, (v16i8) reg0, 11); | |
| 329 dst0 = (v16u8) __msa_vshf_b(shuffler0, (v16i8) reg3, (v16i8) reg0); | |
| 330 dst1 = (v16u8) __msa_vshf_b(shuffler1, (v16i8) reg3, (v16i8) reg1); | |
| 331 dst2 = (v16u8) __msa_vshf_b(shuffler2, (v16i8) reg3, (v16i8) reg2); | |
| 332 ST_UB2(dst0, dst1, rgb_buf, 16); | |
| 333 ST_UB(dst2, (rgb_buf + 32)); | |
| 334 src_y += 16; | |
| 335 src_u += 8; | |
| 336 src_v += 8; | |
| 337 rgb_buf += 48; | |
| 338 } | |
| 339 } | |
| 340 | |
| 249 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { | 341 void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { |
| 250 int x; | 342 int x; |
| 251 v16u8 src0, src1, src2, src3, dst0, dst1; | 343 v16u8 src0, src1, src2, src3, dst0, dst1; |
| 252 | 344 |
| 253 for (x = 0; x < width; x += 32) { | 345 for (x = 0; x < width; x += 32) { |
| 254 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); | 346 LD_UB4(src_yuy2, 16, src0, src1, src2, src3); |
| 255 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | 347 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); |
| 256 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | 348 dst1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); |
| 257 ST_UB2(dst0, dst1, dst_y, 16); | 349 ST_UB2(dst0, dst1, dst_y, 16); |
| 258 src_yuy2 += 64; | 350 src_yuy2 += 64; |
| (...skipping 295 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 554 dst_argb += 64; | 646 dst_argb += 64; |
| 555 } | 647 } |
| 556 } | 648 } |
| 557 | 649 |
| 558 #ifdef __cplusplus | 650 #ifdef __cplusplus |
| 559 } // extern "C" | 651 } // extern "C" |
| 560 } // namespace libyuv | 652 } // namespace libyuv |
| 561 #endif | 653 #endif |
| 562 | 654 |
| 563 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 655 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| OLD | NEW |