source/libvpx/third_party/libyuv/source/scale_win.cc - Issue 1302353004: libvpx: Pull from upstream

Unified Diff: source/libvpx/third_party/libyuv/source/scale_win.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/third_party/libyuv/source/scale_win.cc

diff --git a/source/libvpx/third_party/libyuv/source/scale_win.cc b/source/libvpx/third_party/libyuv/source/scale_win.cc

index e0209cdec8c243d1b06dd4159c6f7c553b380798..c3896ebad2fd89869118c088f90bfe4c36dd9046 100644

--- a/source/libvpx/third_party/libyuv/source/scale_win.cc

+++ b/source/libvpx/third_party/libyuv/source/scale_win.cc

@@ -9,6 +9,7 @@

#include "libyuv/row.h"

+#include "libyuv/scale_row.h"

#ifdef __cplusplus

namespace libyuv {

@@ -16,7 +17,8 @@ extern "C" {

#endif

// This module is for Visual C x86.

-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)

+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \

+ defined(_MSC_VER) && !defined(__clang__)

// Offsets for source bytes 0 to 9

static uvec8 kShuf0 =

@@ -93,8 +95,7 @@ static uvec16 kScaleAb2 =

{ 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };

// Reads 32 pixels, throws half away and writes 16 pixels.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

__asm {

@@ -120,8 +121,7 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

}

// Blends 32x1 rectangle to 16x1.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

__asm {

@@ -157,8 +157,7 @@ void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

}

// Blends 32x2 rectangle to 16x1.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

__asm {

@@ -199,9 +198,116 @@ void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

}

+#ifdef HAS_SCALEROWDOWN2_AVX2

+// Reads 64 pixels, throws half away and writes 32 pixels.

+__declspec(naked)

+void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,

+ uint8* dst_ptr, int dst_width) {

+ __asm {

+ mov eax, [esp + 4] // src_ptr

+ // src_stride ignored

+ mov edx, [esp + 12] // dst_ptr

+ mov ecx, [esp + 16] // dst_width

+ wloop:

+ vmovdqu ymm0, [eax]

+ vmovdqu ymm1, [eax + 32]

+ lea eax, [eax + 64]

+ vpsrlw ymm0, ymm0, 8 // isolate odd pixels.

+ vpsrlw ymm1, ymm1, 8

+ vpackuswb ymm0, ymm0, ymm1

+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb

+ vmovdqu [edx], ymm0

+ lea edx, [edx + 32]

+ sub ecx, 32

+ jg wloop

+ vzeroupper

+ ret

+ }

+// Blends 64x1 rectangle to 32x1.

+__declspec(naked)

+void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,

+ uint8* dst_ptr, int dst_width) {

+ __asm {

+ mov eax, [esp + 4] // src_ptr

+ // src_stride

+ mov edx, [esp + 12] // dst_ptr

+ mov ecx, [esp + 16] // dst_width

+ vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b

+ vpsrlw ymm4, ymm4, 15

+ vpackuswb ymm4, ymm4, ymm4

+ vpxor ymm5, ymm5, ymm5 // constant 0

+ wloop:

+ vmovdqu ymm0, [eax]

+ vmovdqu ymm1, [eax + 32]

+ lea eax, [eax + 64]

+ vpmaddubsw ymm0, ymm0, ymm4 // average horizontally

+ vpmaddubsw ymm1, ymm1, ymm4

+ vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2

+ vpavgw ymm1, ymm1, ymm5

+ vpackuswb ymm0, ymm0, ymm1

+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb

+ vmovdqu [edx], ymm0

+ lea edx, [edx + 32]

+ sub ecx, 32

+ jg wloop

+ vzeroupper

+ ret

+ }

+// Blends 64x2 rectangle to 32x1.

+__declspec(naked)

+void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,

+ uint8* dst_ptr, int dst_width) {

+ __asm {

+ push esi

+ mov eax, [esp + 4 + 4] // src_ptr

+ mov esi, [esp + 4 + 8] // src_stride

+ mov edx, [esp + 4 + 12] // dst_ptr

+ mov ecx, [esp + 4 + 16] // dst_width

+ vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b

+ vpsrlw ymm4, ymm4, 15

+ vpackuswb ymm4, ymm4, ymm4

+ vpxor ymm5, ymm5, ymm5 // constant 0

+ wloop:

+ vmovdqu ymm0, [eax] // average rows

+ vmovdqu ymm1, [eax + 32]

+ vpavgb ymm0, ymm0, [eax + esi]

+ vpavgb ymm1, ymm1, [eax + esi + 32]

+ lea eax, [eax + 64]

+ vpmaddubsw ymm0, ymm0, ymm4 // average horizontally

+ vpmaddubsw ymm1, ymm1, ymm4

+ vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2

+ vpavgw ymm1, ymm1, ymm5

+ vpackuswb ymm0, ymm0, ymm1

+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb

+ vmovdqu [edx], ymm0

+ lea edx, [edx + 32]

+ sub ecx, 32

+ jg wloop

+ pop esi

+ vzeroupper

+ ret

+ }

+#endif // HAS_SCALEROWDOWN2_AVX2

// Point samples 32 pixels to 8 pixels.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

__asm {

@@ -232,8 +338,7 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

}

// Blends 32x4 rectangle to 8x1.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

__asm {

@@ -248,11 +353,11 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

psrlw xmm7, 8

wloop:

- movdqu xmm0, [eax]

+ movdqu xmm0, [eax] // average rows

movdqu xmm1, [eax + 16]

movdqu xmm2, [eax + esi]

movdqu xmm3, [eax + esi + 16]

- pavgb xmm0, xmm2 // average rows

+ pavgb xmm0, xmm2

pavgb xmm1, xmm3

movdqu xmm2, [eax + esi * 2]

movdqu xmm3, [eax + esi * 2 + 16]

@@ -291,13 +396,102 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

}

+#ifdef HAS_SCALEROWDOWN4_AVX2

+// Point samples 64 pixels to 16 pixels.

+__declspec(naked)

+void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,

+ uint8* dst_ptr, int dst_width) {

+ __asm {

+ mov eax, [esp + 4] // src_ptr

+ // src_stride ignored

+ mov edx, [esp + 12] // dst_ptr

+ mov ecx, [esp + 16] // dst_width

+ vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff0000

+ vpsrld ymm5, ymm5, 24

+ vpslld ymm5, ymm5, 16

+ wloop:

+ vmovdqu ymm0, [eax]

+ vmovdqu ymm1, [eax + 32]

+ lea eax, [eax + 64]

+ vpand ymm0, ymm0, ymm5

+ vpand ymm1, ymm1, ymm5

+ vpackuswb ymm0, ymm0, ymm1

+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb

+ vpsrlw ymm0, ymm0, 8

+ vpackuswb ymm0, ymm0, ymm0

+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb

+ vmovdqu [edx], xmm0

+ lea edx, [edx + 16]

+ sub ecx, 16

+ jg wloop

+ vzeroupper

+ ret

+ }

+// Blends 64x4 rectangle to 16x1.

+__declspec(naked)

+void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,

+ uint8* dst_ptr, int dst_width) {

+ __asm {

+ push esi

+ push edi

+ mov eax, [esp + 8 + 4] // src_ptr

+ mov esi, [esp + 8 + 8] // src_stride

+ mov edx, [esp + 8 + 12] // dst_ptr

+ mov ecx, [esp + 8 + 16] // dst_width

+ lea edi, [esi + esi * 2] // src_stride * 3

+ vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0x00ff00ff

+ vpsrlw ymm7, ymm7, 8

+ wloop:

+ vmovdqu ymm0, [eax] // average rows

+ vmovdqu ymm1, [eax + 32]

+ vpavgb ymm0, ymm0, [eax + esi]

+ vpavgb ymm1, ymm1, [eax + esi + 32]

+ vmovdqu ymm2, [eax + esi * 2]

+ vmovdqu ymm3, [eax + esi * 2 + 32]

+ vpavgb ymm2, ymm2, [eax + edi]

+ vpavgb ymm3, ymm3, [eax + edi + 32]

+ lea eax, [eax + 64]

+ vpavgb ymm0, ymm0, ymm2

+ vpavgb ymm1, ymm1, ymm3

+ vpand ymm2, ymm0, ymm7 // average columns (64 to 32 pixels)

+ vpand ymm3, ymm1, ymm7

+ vpsrlw ymm0, ymm0, 8

+ vpsrlw ymm1, ymm1, 8

+ vpavgw ymm0, ymm0, ymm2

+ vpavgw ymm1, ymm1, ymm3

+ vpackuswb ymm0, ymm0, ymm1

+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb

+ vpand ymm2, ymm0, ymm7 // average columns (32 to 16 pixels)

+ vpsrlw ymm0, ymm0, 8

+ vpavgw ymm0, ymm0, ymm2

+ vpackuswb ymm0, ymm0, ymm0

+ vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb

+ vmovdqu [edx], xmm0

+ lea edx, [edx + 16]

+ sub ecx, 16

+ jg wloop

+ pop edi

+ pop esi

+ vzeroupper

+ ret

+ }

+#endif // HAS_SCALEROWDOWN4_AVX2

// Point samples 32 pixels to 24 pixels.

// Produces three 8 byte values. For each 8 bytes, 16 bytes are read.

// Then shuffled to do the scaling.

-// Note that movdqa+palign may be better than movdqu.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

__asm {

@@ -344,8 +538,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,

// xmm7 kRound34

// Note that movdqa+palign may be better than movdqu.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,

ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

@@ -402,8 +595,7 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,

}

// Note that movdqa+palign may be better than movdqu.

-// Alignment requirement: src_ptr 16 byte aligned, dst_ptr 8 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,

ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

@@ -465,7 +657,7 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,

// 3/8 point sampler

// Scale 32 pixels to 12

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

__asm {

@@ -496,7 +688,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,

}

// Scale 16x3 pixels to 6x1 with interpolation

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,

ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

@@ -561,7 +753,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,

}

// Scale 16x2 pixels to 6x1 with interpolation

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,

ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) {

@@ -605,76 +797,68 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,

}

-// Reads 16xN bytes and produces 16 shorts at a time.

-// TODO(fbarchard): Make this handle 4xN bytes for any width ARGB.

-__declspec(naked) __declspec(align(16))

-void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,

- uint16* dst_ptr, int src_width,

- int src_height) {

+// Reads 16 bytes and accumulates to 16 shorts at a time.

+__declspec(naked)

+void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {

__asm {

- push esi

- push edi

- push ebx

- push ebp

- mov esi, [esp + 16 + 4] // src_ptr

- mov edx, [esp + 16 + 8] // src_stride

- mov edi, [esp + 16 + 12] // dst_ptr

- mov ecx, [esp + 16 + 16] // dst_width

- mov ebx, [esp + 16 + 20] // height

- pxor xmm4, xmm4

- dec ebx

+ mov eax, [esp + 4] // src_ptr

+ mov edx, [esp + 8] // dst_ptr

+ mov ecx, [esp + 12] // src_width

+ pxor xmm5, xmm5

+ // sum rows

xloop:

- // first row

- movdqu xmm0, [esi]

- lea eax, [esi + edx]

- movdqa xmm1, xmm0

- punpcklbw xmm0, xmm4

- punpckhbw xmm1, xmm4

- lea esi, [esi + 16]

- mov ebp, ebx

- test ebp, ebp

- je ydone

- // sum remaining rows

- yloop:

- movdqu xmm2, [eax] // read 16 pixels

- lea eax, [eax + edx] // advance to next row

- movdqa xmm3, xmm2

- punpcklbw xmm2, xmm4

- punpckhbw xmm3, xmm4

+ movdqu xmm3, [eax] // read 16 bytes

+ lea eax, [eax + 16]

+ movdqu xmm0, [edx] // read 16 words from destination

+ movdqu xmm1, [edx + 16]

+ movdqa xmm2, xmm3

+ punpcklbw xmm2, xmm5

+ punpckhbw xmm3, xmm5

paddusw xmm0, xmm2 // sum 16 words

paddusw xmm1, xmm3

- sub ebp, 1

- jg yloop

- ydone:

- movdqu [edi], xmm0

- movdqu [edi + 16], xmm1

- lea edi, [edi + 32]

+ movdqu [edx], xmm0 // write 16 words to destination

+ movdqu [edx + 16], xmm1

+ lea edx, [edx + 32]

sub ecx, 16

jg xloop

+ ret

+ }

- pop ebp

- pop ebx

- pop edi

- pop esi

+#ifdef HAS_SCALEADDROW_AVX2

+// Reads 32 bytes and accumulates to 32 shorts at a time.

+__declspec(naked)

+void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {

+ __asm {

+ mov eax, [esp + 4] // src_ptr

+ mov edx, [esp + 8] // dst_ptr

+ mov ecx, [esp + 12] // src_width

+ vpxor ymm5, ymm5, ymm5

+ // sum rows

+ xloop:

+ vmovdqu ymm3, [eax] // read 32 bytes

+ lea eax, [eax + 32]

+ vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck

+ vpunpcklbw ymm2, ymm3, ymm5

+ vpunpckhbw ymm3, ymm3, ymm5

+ vpaddusw ymm0, ymm2, [edx] // sum 16 words

+ vpaddusw ymm1, ymm3, [edx + 32]

+ vmovdqu [edx], ymm0 // write 32 words to destination

+ vmovdqu [edx + 32], ymm1

+ lea edx, [edx + 64]

+ sub ecx, 32

+ jg xloop

+ vzeroupper

ret

}

+#endif // HAS_SCALEADDROW_AVX2

// Bilinear column filtering. SSSE3 version.

-// TODO(fbarchard): Port to Neon

-// TODO(fbarchard): Switch the following:

-// xor ebx, ebx

-// mov bx, word ptr [esi + eax] // 2 source x0 pixels

-// To

-// movzx ebx, word ptr [esi + eax] // 2 source x0 pixels

-// when drmemory bug fixed.

-// https://code.google.com/p/drmemory/issues/detail?id=1396

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,

int dst_width, int x, int dx) {

__asm {

@@ -751,8 +935,7 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,

}

// Reads 16 pixels, duplicates them and writes 32 pixels.

-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,

int dst_width, int x, int dx) {

__asm {

@@ -777,8 +960,7 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,

}

// Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6)

-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBRowDown2_SSE2(const uint8* src_argb,

ptrdiff_t src_stride,

uint8* dst_argb, int dst_width) {

@@ -803,8 +985,7 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,

}

// Blends 8x1 rectangle to 4x1.

-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,

ptrdiff_t src_stride,

uint8* dst_argb, int dst_width) {

@@ -832,8 +1013,7 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,

}

// Blends 8x2 rectangle to 4x1.

-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,

ptrdiff_t src_stride,

uint8* dst_argb, int dst_width) {

@@ -867,8 +1047,7 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,

}

// Reads 4 pixels at a time.

-// Alignment requirement: dst_argb 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,

int src_stepx,

uint8* dst_argb, int dst_width) {

@@ -904,8 +1083,7 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,

}

// Blends four 2x2 to 4x1.

-// Alignment requirement: dst_argb 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,

ptrdiff_t src_stride,

int src_stepx,

@@ -953,7 +1131,7 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,

}

// Column scaling unfiltered. SSE2 version.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,

int dst_width, int x, int dx) {

__asm {

@@ -1044,7 +1222,7 @@ static uvec8 kShuffleFractions = {

0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,

};

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,

int dst_width, int x, int dx) {

__asm {

@@ -1115,8 +1293,7 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,

}

// Reads 4 pixels, duplicates them and writes 8 pixels.

-// Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,

int dst_width, int x, int dx) {

__asm {

@@ -1141,7 +1318,7 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,

}

// Divide num by div and return as 16.16 fixed point result.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

int FixedDiv_X86(int num, int div) {

__asm {

mov eax, [esp + 4] // num

@@ -1154,7 +1331,7 @@ int FixedDiv_X86(int num, int div) {

}

// Divide num by div and return as 16.16 fixed point result.

-__declspec(naked) __declspec(align(16))

+__declspec(naked)

int FixedDiv1_X86(int num, int div) {

__asm {

mov eax, [esp + 4] // num

@@ -1169,8 +1346,7 @@ int FixedDiv1_X86(int num, int div) {

ret

}

-#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)

+#endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)

#ifdef __cplusplus

} // extern "C"

« no previous file with comments | « source/libvpx/third_party/libyuv/source/scale_posix.cc ('k') | source/libvpx/third_party/x86inc/README.libvpx » ('j') | no next file with comments »