Chromium Code Reviews| Index: source/row_msa.cc |
| diff --git a/source/row_msa.cc b/source/row_msa.cc |
| index 7b3c206788ce04f25b531761c4da74e3f2b098b7..6147ee69074b5c86b8c5125d80de15ed6ae5df0b 100644 |
| --- a/source/row_msa.cc |
| +++ b/source/row_msa.cc |
| @@ -19,6 +19,18 @@ namespace libyuv { |
| extern "C" { |
| #endif |
| +// Fill YUV -> RGB conversion constants into vectors |
| +#define FILLYUVTORGBCONSTS(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) { \ |
|
fbarchard1
2016/10/27 17:37:14
suggest using the same name as row_gcc: YUVTORGB_S
fbarchard1
2016/10/27 18:22:55
Done.
|
| + ub = __msa_fill_w(yuvconst->kUVToB[0]); \ |
| + vr = __msa_fill_w(yuvconst->kUVToR[1]); \ |
| + ug = __msa_fill_w(yuvconst->kUVToG[0]); \ |
| + vg = __msa_fill_w(yuvconst->kUVToG[1]); \ |
| + bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \ |
| + bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \ |
| + br = __msa_fill_w(yuvconst->kUVBiasR[0]); \ |
| + yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ |
| +} |
| + |
| // Load YUV 422 pixel data |
| #define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) { \ |
| uint64 y_m; \ |
| @@ -94,6 +106,17 @@ extern "C" { |
| out_r = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \ |
| } |
| +// Pack and Store 8 ARGB values. |
| +#define STOREARGB(in0, in1, in2, in3, pdst_argb) { \ |
| + v8i16 vec0_m, vec1_m; \ |
| + v16u8 dst0_m, dst1_m; \ |
| + vec0_m = (v8i16) __msa_ilvev_b((v16i8) in1, (v16i8) in0); \ |
| + vec1_m = (v8i16) __msa_ilvev_b((v16i8) in3, (v16i8) in2); \ |
| + dst0_m = (v16u8) __msa_ilvr_h(vec1_m, vec0_m); \ |
| + dst1_m = (v16u8) __msa_ilvl_h(vec1_m, vec0_m); \ |
| + ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \ |
| +} |
| + |
| void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
| int x; |
| v16u8 src0, src1, src2, src3; |
| @@ -180,29 +203,19 @@ void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u, |
| const uint8* src_v, uint8* rgb_buf, |
| const struct YuvConstants* yuvconstants, int width) { |
| int x; |
| - v16u8 src0, src1, src2, dst0, dst1; |
| + v16u8 src0, src1, src2; |
| v8i16 vec0, vec1, vec2; |
| v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| v16u8 const_255 = (v16u8) __msa_ldi_b(255); |
| - vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); |
| - vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); |
| - vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); |
| - vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); |
| - vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); |
| - vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); |
| - vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); |
| - vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); |
| + FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| + vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| for (x = 0; x < width; x += 8) { |
| READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| - vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); |
| - vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2); |
| - dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); |
| - dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); |
| - ST_UB2(dst0, dst1, rgb_buf, 16); |
| + STOREARGB(vec0, vec1, vec2, const_255, rgb_buf); |
| src_y += 8; |
| src_u += 4; |
| src_v += 4; |
| @@ -214,29 +227,19 @@ void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u, |
| const uint8* src_v, uint8* rgb_buf, |
| const struct YuvConstants* yuvconstants, int width) { |
| int x; |
| - v16u8 src0, src1, src2, dst0, dst1; |
| + v16u8 src0, src1, src2; |
| v8i16 vec0, vec1, vec2; |
| v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| v16u8 const_255 = (v16u8) __msa_ldi_b(255); |
| - vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); |
| - vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); |
| - vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); |
| - vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); |
| - vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); |
| - vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); |
| - vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); |
| - vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); |
| + FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| + vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| for (x = 0; x < width; x += 8) { |
| READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| - vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255); |
| - vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1); |
| - dst0 = (v16u8) __msa_ilvr_h(vec1, vec0); |
| - dst1 = (v16u8) __msa_ilvl_h(vec1, vec0); |
| - ST_UB2(dst0, dst1, rgb_buf, 16); |
| + STOREARGB(const_255, vec0, vec1, vec2, rgb_buf); |
| src_y += 8; |
| src_u += 4; |
| src_v += 4; |
| @@ -251,19 +254,13 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u, |
| int width) { |
| int x; |
| int64 data_a; |
| - v16u8 src0, src1, src2, src3, dst0, dst1; |
| + v16u8 src0, src1, src2, src3; |
| v8i16 vec0, vec1, vec2; |
| v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| v4i32 zero = { 0 }; |
| - vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); |
| - vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); |
| - vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); |
| - vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); |
| - vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); |
| - vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); |
| - vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); |
| - vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); |
| + FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| + vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| for (x = 0; x < width; x += 8) { |
| data_a = LD(src_a); |
| @@ -272,11 +269,7 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u, |
| I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| src3 = (v16u8) __msa_ilvr_b((v16i8) src3, (v16i8) src3); |
| - vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0); |
| - vec1 = (v8i16) __msa_ilvev_b((v16i8) src3, (v16i8) vec2); |
| - dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0); |
| - dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0); |
| - ST_UB2(dst0, dst1, rgb_buf, 16); |
| + STOREARGB(vec0, vec1, vec2, src3, rgb_buf); |
| src_y += 8; |
| src_u += 4; |
| src_v += 4; |
| @@ -300,14 +293,8 @@ void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u, |
| v16i8 shuffler2 = |
| { 26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31 }; |
| - vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]); |
| - vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]); |
| - vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]); |
| - vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]); |
| - vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]); |
| - vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]); |
| - vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]); |
| - vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]); |
| + FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| + vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| for (x = 0; x < width; x += 16) { |
| src0 = (v16u8) __msa_ld_b((v16u8*) src_y, 0); |
| @@ -338,6 +325,102 @@ void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u, |
| } |
| } |
| +void I422ToRGB565Row_MSA(const uint8* src_y, const uint8* src_u, |
| + const uint8* src_v, uint8* dst_rgb565, |
| + const struct YuvConstants* yuvconstants, int width) { |
| + int x; |
| + v16u8 src0, src1, src2, dst0; |
| + v8i16 vec0, vec1, vec2; |
| + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| + |
| + FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| + vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| + |
| + for (x = 0; x < width; x += 8) { |
| + READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| + I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
|
fbarchard1
2016/10/27 17:37:15
Suggest renaming I422ToRGB to YUVTORGB for consist
fbarchard1
2016/10/27 18:22:55
Done.
|
| + vec_bb, vec_bg, vec_br, vec_yg, vec0, vec2, vec1); |
| + vec0 = __msa_srai_h(vec0, 3); |
| + vec1 = __msa_srai_h(vec1, 3); |
| + vec2 = __msa_srai_h(vec2, 2); |
| + vec1 = __msa_slli_h(vec1, 11); |
| + vec2 = __msa_slli_h(vec2, 5); |
| + vec0 |= vec1; |
| + dst0 = (v16u8) (vec2 | vec0); |
| + ST_UB(dst0, dst_rgb565); |
| + src_y += 8; |
| + src_u += 4; |
| + src_v += 4; |
| + dst_rgb565 += 16; |
| + } |
| +} |
| + |
| +void I422ToARGB4444Row_MSA(const uint8* src_y, const uint8* src_u, |
| + const uint8* src_v, uint8* dst_argb4444, |
| + const struct YuvConstants* yuvconstants, int width) { |
| + int x; |
| + v16u8 src0, src1, src2, dst0; |
| + v8i16 vec0, vec1, vec2; |
| + v8u16 reg0, reg1, reg2; |
| + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| + v8u16 const_0xF000 = (v8u16) __msa_fill_h(0xF000); |
| + |
| + FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| + vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| + |
| + for (x = 0; x < width; x += 8) { |
| + READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| + I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| + vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| + reg0 = (v8u16) __msa_srai_h(vec0, 4); |
| + reg1 = (v8u16) __msa_srai_h(vec1, 4); |
|
fbarchard1
2016/10/27 17:37:15
instead of shift right and then left, consider in
|
| + reg2 = (v8u16) __msa_srai_h(vec2, 4); |
| + reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 4); |
| + reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 8); |
| + reg1 |= const_0xF000; |
| + reg0 |= reg2; |
| + dst0 = (v16u8) (reg1 | reg0); |
| + ST_UB(dst0, dst_argb4444); |
| + src_y += 8; |
| + src_u += 4; |
| + src_v += 4; |
| + dst_argb4444 += 16; |
| + } |
| +} |
| + |
| +void I422ToARGB1555Row_MSA(const uint8* src_y, const uint8* src_u, |
| + const uint8* src_v, uint8* dst_argb1555, |
| + const struct YuvConstants* yuvconstants, int width) { |
| + int x; |
| + v16u8 src0, src1, src2, dst0; |
| + v8i16 vec0, vec1, vec2; |
| + v8u16 reg0, reg1, reg2; |
| + v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; |
| + v8u16 const_0x8000 = (v8u16) __msa_fill_h(0x8000); |
| + |
| + FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug, |
| + vec_vg, vec_bb, vec_bg, vec_br, vec_yg); |
| + |
| + for (x = 0; x < width; x += 8) { |
| + READYUV422(src_y, src_u, src_v, src0, src1, src2); |
| + I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg, |
| + vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); |
| + reg0 = (v8u16) __msa_srai_h(vec0, 3); |
| + reg1 = (v8u16) __msa_srai_h(vec1, 3); |
| + reg2 = (v8u16) __msa_srai_h(vec2, 3); |
| + reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 5); |
| + reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 10); |
| + reg1 |= const_0x8000; |
| + reg0 |= reg2; |
| + dst0 = (v16u8) (reg1 | reg0); |
| + ST_UB(dst0, dst_argb1555); |
| + src_y += 8; |
| + src_u += 4; |
| + src_v += 4; |
| + dst_argb1555 += 16; |
| + } |
| +} |
| + |
| void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) { |
| int x; |
| v16u8 src0, src1, src2, src3, dst0, dst1; |