source/libvpx/third_party/libyuv/source/row_win.cc - Issue 478033002: libvpx: Pull from upstream

Unified Diff: source/libvpx/third_party/libyuv/source/row_win.cc

Issue 478033002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/third_party/libyuv/source/row_win.cc

===================================================================

--- source/libvpx/third_party/libyuv/source/row_win.cc (revision 290053)

+++ source/libvpx/third_party/libyuv/source/row_win.cc (working copy)

@@ -8,16 +8,180 @@

* be found in the AUTHORS file in the root of the source tree.

-#include "third_party/libyuv/include/libyuv/row.h"

+#include "libyuv/row.h"

+#if defined (_M_X64)

+#include <emmintrin.h>

+#include <tmmintrin.h> // For _mm_maddubs_epi16

+#endif

#ifdef __cplusplus

namespace libyuv {

extern "C" {

#endif

-// This module is for Visual C x86.

-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)

+// This module is for Visual C.

+#if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)

+#define YG 74 /* (int8)(1.164 * 64 + 0.5) */

+#define UB 127 /* min(127,(int8)(2.018 * 64)) */

+#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */

+#define UR 0

+#define VB 0

+#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */

+#define VR 102 /* (int8)(1.596 * 64 + 0.5) */

+// Bias

+#define BB UB * 128 + VB * 128

+#define BG UG * 128 + VG * 128

+#define BR UR * 128 + VR * 128

+static const vec8 kUVToB = {

+ UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB

+};

+static const vec8 kUVToR = {

+ UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR

+};

+static const vec8 kUVToG = {

+ UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG

+};

+static const vec8 kVUToB = {

+ VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,

+};

+static const vec8 kVUToR = {

+ VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,

+};

+static const vec8 kVUToG = {

+ VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,

+};

+static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };

+static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };

+static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };

+static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };

+static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };

+// 64 bit

+#if defined(_M_X64)

+// Aligned destination version.

+__declspec(align(16))

+void I422ToARGBRow_SSSE3(const uint8* y_buf,

+ const uint8* u_buf,

+ const uint8* v_buf,

+ uint8* dst_argb,

+ int width) {

+ __m128i xmm0, xmm1, xmm2, xmm3;

+ const __m128i xmm5 = _mm_set1_epi8(-1);

+ const __m128i xmm4 = _mm_setzero_si128();

+ const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;

+ while (width > 0) {

+ xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);

+ xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));

+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);

+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);

+ xmm1 = _mm_load_si128(&xmm0);

+ xmm2 = _mm_load_si128(&xmm0);

+ xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);

+ xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);

+ xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);

+ xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);

+ xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);

+ xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);

+ xmm3 = _mm_loadl_epi64((__m128i*)y_buf);

+ xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);

+ xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);

+ xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);

+ xmm0 = _mm_adds_epi16(xmm0, xmm3);

+ xmm1 = _mm_adds_epi16(xmm1, xmm3);

+ xmm2 = _mm_adds_epi16(xmm2, xmm3);

+ xmm0 = _mm_srai_epi16(xmm0, 6);

+ xmm1 = _mm_srai_epi16(xmm1, 6);

+ xmm2 = _mm_srai_epi16(xmm2, 6);

+ xmm0 = _mm_packus_epi16(xmm0, xmm0);

+ xmm1 = _mm_packus_epi16(xmm1, xmm1);

+ xmm2 = _mm_packus_epi16(xmm2, xmm2);

+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);

+ xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);

+ xmm1 = _mm_load_si128(&xmm0);

+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);

+ xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);

+ _mm_store_si128((__m128i *)dst_argb, xmm0);

+ _mm_store_si128((__m128i *)(dst_argb + 16), xmm1);

+ y_buf += 8;

+ u_buf += 4;

+ dst_argb += 32;

+ width -= 8;

+ }

+// Unaligned destination version.

+void I422ToARGBRow_Unaligned_SSSE3(const uint8* y_buf,

+ const uint8* u_buf,

+ const uint8* v_buf,

+ uint8* dst_argb,

+ int width) {

+ __m128i xmm0, xmm1, xmm2, xmm3;

+ const __m128i xmm5 = _mm_set1_epi8(-1);

+ const __m128i xmm4 = _mm_setzero_si128();

+ const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;

+ while (width > 0) {

+ xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf);

+ xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset));

+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);

+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0);

+ xmm1 = _mm_load_si128(&xmm0);

+ xmm2 = _mm_load_si128(&xmm0);

+ xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)kUVToB);

+ xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)kUVToG);

+ xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)kUVToR);

+ xmm0 = _mm_sub_epi16(xmm0, *(__m128i*)kUVBiasB);

+ xmm1 = _mm_sub_epi16(xmm1, *(__m128i*)kUVBiasG);

+ xmm2 = _mm_sub_epi16(xmm2, *(__m128i*)kUVBiasR);

+ xmm3 = _mm_loadl_epi64((__m128i*)y_buf);

+ xmm3 = _mm_unpacklo_epi8(xmm3, xmm4);

+ xmm3 = _mm_subs_epi16(xmm3, *(__m128i*)kYSub16);

+ xmm3 = _mm_mullo_epi16(xmm3, *(__m128i*)kYToRgb);

+ xmm0 = _mm_adds_epi16(xmm0, xmm3);

+ xmm1 = _mm_adds_epi16(xmm1, xmm3);

+ xmm2 = _mm_adds_epi16(xmm2, xmm3);

+ xmm0 = _mm_srai_epi16(xmm0, 6);

+ xmm1 = _mm_srai_epi16(xmm1, 6);

+ xmm2 = _mm_srai_epi16(xmm2, 6);

+ xmm0 = _mm_packus_epi16(xmm0, xmm0);

+ xmm1 = _mm_packus_epi16(xmm1, xmm1);

+ xmm2 = _mm_packus_epi16(xmm2, xmm2);

+ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1);

+ xmm2 = _mm_unpacklo_epi8(xmm2, xmm5);

+ xmm1 = _mm_load_si128(&xmm0);

+ xmm0 = _mm_unpacklo_epi16(xmm0, xmm2);

+ xmm1 = _mm_unpackhi_epi16(xmm1, xmm2);

+ _mm_storeu_si128((__m128i *)dst_argb, xmm0);

+ _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1);

+ y_buf += 8;

+ u_buf += 4;

+ dst_argb += 32;

+ width -= 8;

+ }

+// 32 bit

+#else // defined(_M_X64)

#ifdef HAS_ARGBTOYROW_SSSE3

// Constants for ARGB.

@@ -2030,21 +2194,6 @@

}

#endif // HAS_ARGBTOYROW_SSSE3

-#define YG 74 /* (int8)(1.164 * 64 + 0.5) */

-#define UB 127 /* min(63,(int8)(2.018 * 64)) */

-#define UG -25 /* (int8)(-0.391 * 64 - 0.5) */

-#define UR 0

-#define VB 0

-#define VG -52 /* (int8)(-0.813 * 64 - 0.5) */

-#define VR 102 /* (int8)(1.596 * 64 + 0.5) */

-// Bias

-#define BB UB * 128 + VB * 128

-#define BG UG * 128 + VG * 128

-#define BR UR * 128 + VR * 128

#ifdef HAS_I422TOARGBROW_AVX2

static const lvec8 kUVToB_AVX = {

@@ -2079,10 +2228,10 @@

// 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).

__declspec(naked) __declspec(align(16))

void I422ToARGBRow_AVX2(const uint8* y_buf,

- const uint8* u_buf,

- const uint8* v_buf,

- uint8* dst_argb,

- int width) {

+ const uint8* u_buf,

+ const uint8* v_buf,

+ uint8* dst_argb,

+ int width) {

__asm {

push esi

push edi

@@ -2150,36 +2299,6 @@

#ifdef HAS_I422TOARGBROW_SSSE3

-static const vec8 kUVToB = {

- UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB

-};

-static const vec8 kUVToR = {

- UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR

-};

-static const vec8 kUVToG = {

- UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG

-};

-static const vec8 kVUToB = {

- VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB, VB, UB,

-};

-static const vec8 kVUToR = {

- VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR, VR, UR,

-};

-static const vec8 kVUToG = {

- VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG,

-};

-static const vec16 kYToRgb = { YG, YG, YG, YG, YG, YG, YG, YG };

-static const vec16 kYSub16 = { 16, 16, 16, 16, 16, 16, 16, 16 };

-static const vec16 kUVBiasB = { BB, BB, BB, BB, BB, BB, BB, BB };

-static const vec16 kUVBiasG = { BG, BG, BG, BG, BG, BG, BG, BG };

-static const vec16 kUVBiasR = { BR, BR, BR, BR, BR, BR, BR, BR };

// TODO(fbarchard): Read that does half size on Y and treats 420 as 444.

// Read 8 UV from 444.

@@ -7276,7 +7395,8 @@

}

#endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3

-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)

+#endif // defined(_M_X64)

+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER)

#ifdef __cplusplus

} // extern "C"

« no previous file with comments | « source/libvpx/third_party/libyuv/source/row_posix.cc ('k') | source/libvpx/third_party/libyuv/source/row_x86.asm » ('j') | no next file with comments »