source/row_gcc.cc - Issue 2438893002: HalfFloat avx2 unpack bug fix.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: source/row_gcc.cc

Issue 2438893002: HalfFloat avx2 unpack bug fix. (Closed)

Patch Set: test use random Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/row_gcc.cc

diff --git a/source/row_gcc.cc b/source/row_gcc.cc

index 03f7f1bdc55a5cfa59cd54a6ac0699057315ffb8..8020108d041ac71be101d3155b017a514ff42f21 100644

--- a/source/row_gcc.cc

+++ b/source/row_gcc.cc

@@ -5350,17 +5350,17 @@ void HalfFloatRow_AVX2(const uint16* src, uint16* dst, float scale, int width) {

// 16 pixel loop.

LABELALIGN

"1: \n"

- "vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts

+ "vmovdqu " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts

"lea " MEMLEA(0x20,0) ",%0 \n"

- "vpunpckhwd %%ymm2,%%ymm5,%%ymm3 \n"

- "vpunpcklwd %%ymm2,%%ymm5,%%ymm2 \n"

+ "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates

wangcheng 2016/10/20 22:38:30 reverse order of ymm5 and ymm2

+ "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n"

"vcvtdq2ps %%ymm3,%%ymm3 \n"

"vcvtdq2ps %%ymm2,%%ymm2 \n"

"vmulps %%ymm3,%%ymm4,%%ymm3 \n"

"vmulps %%ymm2,%%ymm4,%%ymm2 \n"

"vpsrld $0xd,%%ymm3,%%ymm3 \n"

"vpsrld $0xd,%%ymm2,%%ymm2 \n"

- "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates

+ "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates

"vmovdqu %%ymm2," MEMACCESS(1) " \n"

"lea " MEMLEA(0x20,1) ",%1 \n"

"sub $0x10,%2 \n"

@@ -5384,8 +5384,8 @@ void HalfFloatRow_F16C(const uint16* src, uint16* dst, float scale, int width) {

// 16 pixel loop.

LABELALIGN

"1: \n"

- "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 8 shorts -> 8 ints

- "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n" // 8 more

+ "vpmovzxwd " MEMACCESS(0) ",%%ymm2 \n" // 16 shorts -> 16 ints

+ "vpmovzxwd " MEMACCESS2(0x10,0) ",%%ymm3 \n"

"lea " MEMLEA(0x20,0) ",%0 \n"

"vcvtdq2ps %%ymm2,%%ymm2 \n"

"vcvtdq2ps %%ymm3,%%ymm3 \n"

« no previous file with comments | « include/libyuv/version.h ('k') | unit_test/planar_test.cc » ('j') | no next file with comments »