source/row_msa.cc - Issue 2378753004: Add MSA optimized I422ToYUY2Row, I422ToUYVYRow functions

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: source/row_msa.cc

Issue 2378753004: Add MSA optimized I422ToYUY2Row, I422ToUYVYRow functions (Closed)

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/row_msa.cc

diff --git a/source/row_msa.cc b/source/row_msa.cc

index b86865cf315307f1c8ddf0e09444819f5849974b..52a246cdb2c6a0779e6789dd46f01bbde29077ff 100644

--- a/source/row_msa.cc

+++ b/source/row_msa.cc

@@ -53,6 +53,54 @@ void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) {

}

+void I422ToYUY2Row_MSA(const uint8* src_y,

+ const uint8* src_u,

+ const uint8* src_v,

+ uint8* dst_yuy2,

+ int width) {

+ int x;

+ v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1;

+ v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3;

+ for (x = 0; x < width; x += 32) {

+ src_u0 = LD_UB(src_u);

+ src_v0 = LD_UB(src_v);

+ LD_UB2(src_y, 16, src_y0, src_y1);

+ ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1);

fbarchard1 2016/09/29 17:10:43 Did you try just 1 vector at a time instead of 2?

manojkumar.bhosale 2016/09/30 09:00:29 all the loop unrolling we have done is based on lo

+ ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1);

+ ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3);

+ ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16);

+ src_u += 16;

+ src_v += 16;

+ src_y += 32;

+ dst_yuy2 += 64;

+ }

+void I422ToUYVYRow_MSA(const uint8* src_y,

+ const uint8* src_u,

+ const uint8* src_v,

+ uint8* dst_uyvy,

+ int width) {

+ int x;

+ v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1;

+ v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3;

+ for (x = 0; x < width; x += 32) {

+ src_u0 = LD_UB(src_u);

+ src_v0 = LD_UB(src_v);

+ LD_UB2(src_y, 16, src_y0, src_y1);

+ ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1);

+ ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1);

+ ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3);

+ ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16);

+ src_u += 16;

+ src_v += 16;

+ src_y += 32;

+ dst_uyvy += 64;

+ }

#ifdef __cplusplus

} // extern "C"

} // namespace libyuv

« source/cpu_id.cc ('K') | « source/row_any.cc ('k') | no next file » | no next file with comments »