Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(93)

Unified Diff: source/row_msa.cc

Issue 2445343007: Add MSA optimized I422ToRGB565Row_MSA, I422ToARGB4444Row_MSA and I422ToARGB1555Row_MSA functions (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/row_msa.cc
diff --git a/source/row_msa.cc b/source/row_msa.cc
index 7b3c206788ce04f25b531761c4da74e3f2b098b7..6147ee69074b5c86b8c5125d80de15ed6ae5df0b 100644
--- a/source/row_msa.cc
+++ b/source/row_msa.cc
@@ -19,6 +19,18 @@ namespace libyuv {
extern "C" {
#endif
+// Fill YUV -> RGB conversion constants into vectors
+#define FILLYUVTORGBCONSTS(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) { \
fbarchard1 2016/10/27 17:37:14 suggest using the same name as row_gcc: YUVTORGB_S
fbarchard1 2016/10/27 18:22:55 Done.
+ ub = __msa_fill_w(yuvconst->kUVToB[0]); \
+ vr = __msa_fill_w(yuvconst->kUVToR[1]); \
+ ug = __msa_fill_w(yuvconst->kUVToG[0]); \
+ vg = __msa_fill_w(yuvconst->kUVToG[1]); \
+ bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \
+ bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \
+ br = __msa_fill_w(yuvconst->kUVBiasR[0]); \
+ yg = __msa_fill_w(yuvconst->kYToRgb[0]); \
+}
+
// Load YUV 422 pixel data
#define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) { \
uint64 y_m; \
@@ -94,6 +106,17 @@ extern "C" {
out_r = __msa_pckev_h((v8i16) reg5_m, (v8i16) reg4_m); \
}
+// Pack and Store 8 ARGB values.
+#define STOREARGB(in0, in1, in2, in3, pdst_argb) { \
+ v8i16 vec0_m, vec1_m; \
+ v16u8 dst0_m, dst1_m; \
+ vec0_m = (v8i16) __msa_ilvev_b((v16i8) in1, (v16i8) in0); \
+ vec1_m = (v8i16) __msa_ilvev_b((v16i8) in3, (v16i8) in2); \
+ dst0_m = (v16u8) __msa_ilvr_h(vec1_m, vec0_m); \
+ dst1_m = (v16u8) __msa_ilvl_h(vec1_m, vec0_m); \
+ ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \
+}
+
void MirrorRow_MSA(const uint8* src, uint8* dst, int width) {
int x;
v16u8 src0, src1, src2, src3;
@@ -180,29 +203,19 @@ void I422ToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* rgb_buf,
const struct YuvConstants* yuvconstants, int width) {
int x;
- v16u8 src0, src1, src2, dst0, dst1;
+ v16u8 src0, src1, src2;
v8i16 vec0, vec1, vec2;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
v16u8 const_255 = (v16u8) __msa_ldi_b(255);
- vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
- vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
- vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
- vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
- vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
- vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
- vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
- vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
+ FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug,
+ vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
for (x = 0; x < width; x += 8) {
READYUV422(src_y, src_u, src_v, src0, src1, src2);
I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
- vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0);
- vec1 = (v8i16) __msa_ilvev_b((v16i8) const_255, (v16i8) vec2);
- dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0);
- dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0);
- ST_UB2(dst0, dst1, rgb_buf, 16);
+ STOREARGB(vec0, vec1, vec2, const_255, rgb_buf);
src_y += 8;
src_u += 4;
src_v += 4;
@@ -214,29 +227,19 @@ void I422ToRGBARow_MSA(const uint8* src_y, const uint8* src_u,
const uint8* src_v, uint8* rgb_buf,
const struct YuvConstants* yuvconstants, int width) {
int x;
- v16u8 src0, src1, src2, dst0, dst1;
+ v16u8 src0, src1, src2;
v8i16 vec0, vec1, vec2;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
v16u8 const_255 = (v16u8) __msa_ldi_b(255);
- vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
- vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
- vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
- vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
- vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
- vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
- vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
- vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
+ FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug,
+ vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
for (x = 0; x < width; x += 8) {
READYUV422(src_y, src_u, src_v, src0, src1, src2);
I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
- vec0 = (v8i16) __msa_ilvev_b((v16i8) vec0, (v16i8) const_255);
- vec1 = (v8i16) __msa_ilvev_b((v16i8) vec2, (v16i8) vec1);
- dst0 = (v16u8) __msa_ilvr_h(vec1, vec0);
- dst1 = (v16u8) __msa_ilvl_h(vec1, vec0);
- ST_UB2(dst0, dst1, rgb_buf, 16);
+ STOREARGB(const_255, vec0, vec1, vec2, rgb_buf);
src_y += 8;
src_u += 4;
src_v += 4;
@@ -251,19 +254,13 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
int width) {
int x;
int64 data_a;
- v16u8 src0, src1, src2, src3, dst0, dst1;
+ v16u8 src0, src1, src2, src3;
v8i16 vec0, vec1, vec2;
v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
v4i32 zero = { 0 };
- vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
- vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
- vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
- vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
- vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
- vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
- vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
- vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
+ FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug,
+ vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
for (x = 0; x < width; x += 8) {
data_a = LD(src_a);
@@ -272,11 +269,7 @@ void I422AlphaToARGBRow_MSA(const uint8* src_y, const uint8* src_u,
I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
src3 = (v16u8) __msa_ilvr_b((v16i8) src3, (v16i8) src3);
- vec0 = (v8i16) __msa_ilvev_b((v16i8) vec1, (v16i8) vec0);
- vec1 = (v8i16) __msa_ilvev_b((v16i8) src3, (v16i8) vec2);
- dst0 = (v16u8) __msa_ilvr_h((v8i16) vec1, (v8i16) vec0);
- dst1 = (v16u8) __msa_ilvl_h((v8i16) vec1, (v8i16) vec0);
- ST_UB2(dst0, dst1, rgb_buf, 16);
+ STOREARGB(vec0, vec1, vec2, src3, rgb_buf);
src_y += 8;
src_u += 4;
src_v += 4;
@@ -300,14 +293,8 @@ void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u,
v16i8 shuffler2 =
{ 26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31 };
- vec_ub = __msa_fill_w(yuvconstants->kUVToB[0]);
- vec_vr = __msa_fill_w(yuvconstants->kUVToR[1]);
- vec_ug = __msa_fill_w(yuvconstants->kUVToG[0]);
- vec_vg = __msa_fill_w(yuvconstants->kUVToG[1]);
- vec_bb = __msa_fill_w(yuvconstants->kUVBiasB[0]);
- vec_bg = __msa_fill_w(yuvconstants->kUVBiasG[0]);
- vec_br = __msa_fill_w(yuvconstants->kUVBiasR[0]);
- vec_yg = __msa_fill_w(yuvconstants->kYToRgb[0]);
+ FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug,
+ vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
for (x = 0; x < width; x += 16) {
src0 = (v16u8) __msa_ld_b((v16u8*) src_y, 0);
@@ -338,6 +325,102 @@ void I422ToRGB24Row_MSA(const uint8* src_y, const uint8* src_u,
}
}
+void I422ToRGB565Row_MSA(const uint8* src_y, const uint8* src_u,
+ const uint8* src_v, uint8* dst_rgb565,
+ const struct YuvConstants* yuvconstants, int width) {
+ int x;
+ v16u8 src0, src1, src2, dst0;
+ v8i16 vec0, vec1, vec2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+
+ FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug,
+ vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
fbarchard1 2016/10/27 17:37:15 Suggest renaming I422ToRGB to YUVTORGB for consist
fbarchard1 2016/10/27 18:22:55 Done.
+ vec_bb, vec_bg, vec_br, vec_yg, vec0, vec2, vec1);
+ vec0 = __msa_srai_h(vec0, 3);
+ vec1 = __msa_srai_h(vec1, 3);
+ vec2 = __msa_srai_h(vec2, 2);
+ vec1 = __msa_slli_h(vec1, 11);
+ vec2 = __msa_slli_h(vec2, 5);
+ vec0 |= vec1;
+ dst0 = (v16u8) (vec2 | vec0);
+ ST_UB(dst0, dst_rgb565);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_rgb565 += 16;
+ }
+}
+
+void I422ToARGB4444Row_MSA(const uint8* src_y, const uint8* src_u,
+ const uint8* src_v, uint8* dst_argb4444,
+ const struct YuvConstants* yuvconstants, int width) {
+ int x;
+ v16u8 src0, src1, src2, dst0;
+ v8i16 vec0, vec1, vec2;
+ v8u16 reg0, reg1, reg2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v8u16 const_0xF000 = (v8u16) __msa_fill_h(0xF000);
+
+ FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug,
+ vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
+ vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
+ reg0 = (v8u16) __msa_srai_h(vec0, 4);
+ reg1 = (v8u16) __msa_srai_h(vec1, 4);
fbarchard1 2016/10/27 17:37:15 instead of shift right and then left, consider in
+ reg2 = (v8u16) __msa_srai_h(vec2, 4);
+ reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 4);
+ reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 8);
+ reg1 |= const_0xF000;
+ reg0 |= reg2;
+ dst0 = (v16u8) (reg1 | reg0);
+ ST_UB(dst0, dst_argb4444);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_argb4444 += 16;
+ }
+}
+
+void I422ToARGB1555Row_MSA(const uint8* src_y, const uint8* src_u,
+ const uint8* src_v, uint8* dst_argb1555,
+ const struct YuvConstants* yuvconstants, int width) {
+ int x;
+ v16u8 src0, src1, src2, dst0;
+ v8i16 vec0, vec1, vec2;
+ v8u16 reg0, reg1, reg2;
+ v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg;
+ v8u16 const_0x8000 = (v8u16) __msa_fill_h(0x8000);
+
+ FILLYUVTORGBCONSTS(yuvconstants, vec_ub, vec_vr, vec_ug,
+ vec_vg, vec_bb, vec_bg, vec_br, vec_yg);
+
+ for (x = 0; x < width; x += 8) {
+ READYUV422(src_y, src_u, src_v, src0, src1, src2);
+ I422TORGB(src0, src1, src2, vec_ub, vec_vr, vec_ug, vec_vg,
+ vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2);
+ reg0 = (v8u16) __msa_srai_h(vec0, 3);
+ reg1 = (v8u16) __msa_srai_h(vec1, 3);
+ reg2 = (v8u16) __msa_srai_h(vec2, 3);
+ reg1 = (v8u16) __msa_slli_h((v8i16) reg1, 5);
+ reg2 = (v8u16) __msa_slli_h((v8i16) reg2, 10);
+ reg1 |= const_0x8000;
+ reg0 |= reg2;
+ dst0 = (v16u8) (reg1 | reg0);
+ ST_UB(dst0, dst_argb1555);
+ src_y += 8;
+ src_u += 4;
+ src_v += 4;
+ dst_argb1555 += 16;
+ }
+}
+
void YUY2ToYRow_MSA(const uint8* src_yuy2, uint8* dst_y, int width) {
int x;
v16u8 src0, src1, src2, src3, dst0, dst1;
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698