Chromium Code Reviews| Index: source/row_msa.cc |
| diff --git a/source/row_msa.cc b/source/row_msa.cc |
| index b86865cf315307f1c8ddf0e09444819f5849974b..52a246cdb2c6a0779e6789dd46f01bbde29077ff 100644 |
| --- a/source/row_msa.cc |
| +++ b/source/row_msa.cc |
| @@ -53,6 +53,54 @@ void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
| } |
| } |
| +void I422ToYUY2Row_MSA(const uint8* src_y, |
| + const uint8* src_u, |
| + const uint8* src_v, |
| + uint8* dst_yuy2, |
| + int width) { |
| + int x; |
| + v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; |
| + v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3; |
| + |
| + for (x = 0; x < width; x += 32) { |
| + src_u0 = LD_UB(src_u); |
| + src_v0 = LD_UB(src_v); |
| + LD_UB2(src_y, 16, src_y0, src_y1); |
| + ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); |
|
fbarchard1
2016/09/29 17:10:43
Did you try just 1 vector at a time instead of 2?
manojkumar.bhosale
2016/09/30 09:00:29
all the loop unrolling we have done is based on lo
|
| + ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1); |
| + ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3); |
| + ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16); |
| + src_u += 16; |
| + src_v += 16; |
| + src_y += 32; |
| + dst_yuy2 += 64; |
| + } |
| +} |
| + |
| +void I422ToUYVYRow_MSA(const uint8* src_y, |
| + const uint8* src_u, |
| + const uint8* src_v, |
| + uint8* dst_uyvy, |
| + int width) { |
| + int x; |
| + v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; |
| + v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3; |
| + |
| + for (x = 0; x < width; x += 32) { |
| + src_u0 = LD_UB(src_u); |
| + src_v0 = LD_UB(src_v); |
| + LD_UB2(src_y, 16, src_y0, src_y1); |
| + ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); |
| + ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); |
| + ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); |
| + ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); |
| + src_u += 16; |
| + src_v += 16; |
| + src_y += 32; |
| + dst_uyvy += 64; |
| + } |
| +} |
| + |
| #ifdef __cplusplus |
| } // extern "C" |
| } // namespace libyuv |