source/libvpx/third_party/libyuv/source/scale.cc - Issue 1302353004: libvpx: Pull from upstream

Unified Diff: source/libvpx/third_party/libyuv/source/scale.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/third_party/libyuv/source/scale.cc

diff --git a/source/libvpx/third_party/libyuv/source/scale.cc b/source/libvpx/third_party/libyuv/source/scale.cc

index 482c5a61e35599882c7070838b7211d24e5180e5..0a01304c41086fbd2676263071176e9c95040865 100644

--- a/source/libvpx/third_party/libyuv/source/scale.cc

+++ b/source/libvpx/third_party/libyuv/source/scale.cc

@@ -23,9 +23,6 @@ namespace libyuv {

extern "C" {

#endif

-// Remove this macro if OVERREAD is safe.

-#define AVOID_OVERREAD 1

static __inline int Abs(int v) {

return v >= 0 ? v : -v;

}

@@ -44,9 +41,8 @@ static void ScalePlaneDown2(int src_width, int src_height,

int y;

void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,

uint8* dst_ptr, int dst_width) =

- filtering == kFilterNone ? ScaleRowDown2_C :

- (filtering == kFilterLinear ? ScaleRowDown2Linear_C :

- ScaleRowDown2Box_C);

+ filtering == kFilterNone ? ScaleRowDown2_C :

+ (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);

int row_stride = src_stride << 1;

if (!filtering) {

src_ptr += src_stride; // Point to odd rows.

@@ -54,15 +50,39 @@ static void ScalePlaneDown2(int src_width, int src_height,

}

#if defined(HAS_SCALEROWDOWN2_NEON)

- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {

- ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;

+ if (TestCpuFlag(kCpuHasNEON)) {

+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :

+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :

+ ScaleRowDown2Box_Any_NEON);

+ if (IS_ALIGNED(dst_width, 16)) {

+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :

+ (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :

+ ScaleRowDown2Box_NEON);

+ }

}

#endif

#if defined(HAS_SCALEROWDOWN2_SSE2)

- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {

- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :

- (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :

- ScaleRowDown2Box_SSE2);

+ if (TestCpuFlag(kCpuHasSSE2)) {

+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :

+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :

+ ScaleRowDown2Box_Any_SSE2);

+ if (IS_ALIGNED(dst_width, 16)) {

+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :

+ (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :

+ ScaleRowDown2Box_SSE2);

+ }

+#endif

+#if defined(HAS_SCALEROWDOWN2_AVX2)

+ if (TestCpuFlag(kCpuHasAVX2)) {

+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :

+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :

+ ScaleRowDown2Box_Any_AVX2);

+ if (IS_ALIGNED(dst_width, 32)) {

+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :

+ (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :

+ ScaleRowDown2Box_AVX2);

+ }

}

#endif

#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)

@@ -154,13 +174,30 @@ static void ScalePlaneDown4(int src_width, int src_height,

src_stride = 0;

}

#if defined(HAS_SCALEROWDOWN4_NEON)

- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {

- ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;

+ if (TestCpuFlag(kCpuHasNEON)) {

+ ScaleRowDown4 = filtering ?

+ ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;

+ if (IS_ALIGNED(dst_width, 8)) {

+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;

+ }

}

#endif

#if defined(HAS_SCALEROWDOWN4_SSE2)

- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {

- ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;

+ if (TestCpuFlag(kCpuHasSSE2)) {

+ ScaleRowDown4 = filtering ?

+ ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2;

+ if (IS_ALIGNED(dst_width, 8)) {

+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;

+ }

+#endif

+#if defined(HAS_SCALEROWDOWN4_AVX2)

+ if (TestCpuFlag(kCpuHasAVX2)) {

+ ScaleRowDown4 = filtering ?

+ ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;

+ if (IS_ALIGNED(dst_width, 16)) {

+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;

+ }

}

#endif

#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)

@@ -249,24 +286,42 @@ static void ScalePlaneDown34(int src_width, int src_height,

ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;

}

#if defined(HAS_SCALEROWDOWN34_NEON)

- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {

+ if (TestCpuFlag(kCpuHasNEON)) {

if (!filtering) {

- ScaleRowDown34_0 = ScaleRowDown34_NEON;

- ScaleRowDown34_1 = ScaleRowDown34_NEON;

+ ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;

+ ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;

} else {

- ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;

- ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;

+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;

+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;

+ }

+ if (dst_width % 24 == 0) {

+ if (!filtering) {

+ ScaleRowDown34_0 = ScaleRowDown34_NEON;

+ ScaleRowDown34_1 = ScaleRowDown34_NEON;

+ } else {

+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;

+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;

+ }

}

#endif

#if defined(HAS_SCALEROWDOWN34_SSSE3)

- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {

+ if (TestCpuFlag(kCpuHasSSSE3)) {

if (!filtering) {

- ScaleRowDown34_0 = ScaleRowDown34_SSSE3;

- ScaleRowDown34_1 = ScaleRowDown34_SSSE3;

+ ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;

+ ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;

} else {

- ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;

- ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;

+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;

+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;

+ }

+ if (dst_width % 24 == 0) {

+ if (!filtering) {

+ ScaleRowDown34_0 = ScaleRowDown34_SSSE3;

+ ScaleRowDown34_1 = ScaleRowDown34_SSSE3;

+ } else {

+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;

+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;

+ }

}

#endif

@@ -422,23 +477,41 @@ static void ScalePlaneDown38(int src_width, int src_height,

ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;

ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;

}

#if defined(HAS_SCALEROWDOWN38_NEON)

- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {

+ if (TestCpuFlag(kCpuHasNEON)) {

if (!filtering) {

- ScaleRowDown38_3 = ScaleRowDown38_NEON;

- ScaleRowDown38_2 = ScaleRowDown38_NEON;

+ ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;

+ ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;

} else {

- ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;

- ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;

+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;

+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;

+ }

+ if (dst_width % 12 == 0) {

+ if (!filtering) {

+ ScaleRowDown38_3 = ScaleRowDown38_NEON;

+ ScaleRowDown38_2 = ScaleRowDown38_NEON;

+ } else {

+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;

+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;

+ }

}

#endif

#if defined(HAS_SCALEROWDOWN38_SSSE3)

- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {

+ if (TestCpuFlag(kCpuHasSSSE3)) {

if (!filtering) {

+ ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;

+ ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;

+ } else {

+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;

+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;

+ }

+ if (dst_width % 12 == 0 && !filtering) {

ScaleRowDown38_3 = ScaleRowDown38_SSSE3;

ScaleRowDown38_2 = ScaleRowDown38_SSSE3;

- } else {

+ }

+ if (dst_width % 6 == 0 && filtering) {

ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;

ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;

}

@@ -559,65 +632,7 @@ static void ScalePlaneDown38_16(int src_width, int src_height,

}

-static __inline uint32 SumBox(int iboxwidth, int iboxheight,

- ptrdiff_t src_stride, const uint8* src_ptr) {

- uint32 sum = 0u;

- int y;

- assert(iboxwidth > 0);

- assert(iboxheight > 0);

- for (y = 0; y < iboxheight; ++y) {

- int x;

- for (x = 0; x < iboxwidth; ++x) {

- sum += src_ptr[x];

- }

- src_ptr += src_stride;

- }

- return sum;

-static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,

- ptrdiff_t src_stride, const uint16* src_ptr) {

- uint32 sum = 0u;

- int y;

- assert(iboxwidth > 0);

- assert(iboxheight > 0);

- for (y = 0; y < iboxheight; ++y) {

- int x;

- for (x = 0; x < iboxwidth; ++x) {

- sum += src_ptr[x];

- }

- src_ptr += src_stride;

- }

- return sum;

-static void ScalePlaneBoxRow_C(int dst_width, int boxheight,

- int x, int dx, ptrdiff_t src_stride,

- const uint8* src_ptr, uint8* dst_ptr) {

- int i;

- int boxwidth;

- for (i = 0; i < dst_width; ++i) {

- int ix = x >> 16;

- x += dx;

- boxwidth = (x >> 16) - ix;

- *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /

- (boxwidth * boxheight);

- }

-static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,

- int x, int dx, ptrdiff_t src_stride,

- const uint16* src_ptr, uint16* dst_ptr) {

- int i;

- int boxwidth;

- for (i = 0; i < dst_width; ++i) {

- int ix = x >> 16;

- x += dx;

- boxwidth = (x >> 16) - ix;

- *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /

- (boxwidth * boxheight);

- }

+#define MIN1(x) ((x) < 1 ? 1 : (x))

static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {

uint32 sum = 0u;

@@ -643,15 +658,15 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,

const uint16* src_ptr, uint8* dst_ptr) {

int i;

int scaletbl[2];

- int minboxwidth = (dx >> 16);

+ int minboxwidth = dx >> 16;

int* scaleptr = scaletbl - minboxwidth;

int boxwidth;

- scaletbl[0] = 65536 / (minboxwidth * boxheight);

- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);

+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);

+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);

for (i = 0; i < dst_width; ++i) {

int ix = x >> 16;

x += dx;

- boxwidth = (x >> 16) - ix;

+ boxwidth = MIN1((x >> 16) - ix);

*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;

}

@@ -660,25 +675,36 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,

const uint32* src_ptr, uint16* dst_ptr) {

int i;

int scaletbl[2];

- int minboxwidth = (dx >> 16);

+ int minboxwidth = dx >> 16;

int* scaleptr = scaletbl - minboxwidth;

int boxwidth;

- scaletbl[0] = 65536 / (minboxwidth * boxheight);

- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);

+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);

+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);

for (i = 0; i < dst_width; ++i) {

int ix = x >> 16;

x += dx;

- boxwidth = (x >> 16) - ix;

- *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *

- scaleptr[boxwidth] >> 16;

+ boxwidth = MIN1((x >> 16) - ix);

+ *dst_ptr++ =

+ SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;

+ }

+static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,

+ const uint16* src_ptr, uint8* dst_ptr) {

+ int scaleval = 65536 / boxheight;

+ int i;

+ src_ptr += (x >> 16);

+ for (i = 0; i < dst_width; ++i) {

+ *dst_ptr++ = src_ptr[i] * scaleval >> 16;

}

static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,

const uint16* src_ptr, uint8* dst_ptr) {

- int boxwidth = (dx >> 16);

+ int boxwidth = MIN1(dx >> 16);

int scaleval = 65536 / (boxwidth * boxheight);

int i;

+ x >>= 16;

for (i = 0; i < dst_width; ++i) {

*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;

x += boxwidth;

@@ -687,7 +713,7 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,

static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,

const uint32* src_ptr, uint16* dst_ptr) {

- int boxwidth = (dx >> 16);

+ int boxwidth = MIN1(dx >> 16);

int scaleval = 65536 / (boxwidth * boxheight);

int i;

for (i = 0; i < dst_width; ++i) {

@@ -707,7 +733,7 @@ static void ScalePlaneBox(int src_width, int src_height,

int dst_width, int dst_height,

int src_stride, int dst_stride,

const uint8* src_ptr, uint8* dst_ptr) {

- int j;

+ int j, k;

// Initial source x/y coordinate and step values as 16.16 fixed point.

int x = 0;

int y = 0;

@@ -717,42 +743,37 @@ static void ScalePlaneBox(int src_width, int src_height,

ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,

&x, &y, &dx, &dy);

src_width = Abs(src_width);

- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.

- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {

- uint8* dst = dst_ptr;

- int j;

- for (j = 0; j < dst_height; ++j) {

- int boxheight;

- int iy = y >> 16;

- const uint8* src = src_ptr + iy * src_stride;

- y += dy;

- if (y > max_y) {

- y = max_y;

- }

- boxheight = (y >> 16) - iy;

- ScalePlaneBoxRow_C(dst_width, boxheight,

- x, dx, src_stride,

- src, dst);

- dst += dst_stride;

- }

- return;

- }

{

// Allocate a row buffer of uint16.

align_buffer_64(row16, src_width * 2);

void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,

const uint16* src_ptr, uint8* dst_ptr) =

- (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;

- void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,

- uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;

-#if defined(HAS_SCALEADDROWS_SSE2)

- if (TestCpuFlag(kCpuHasSSE2)

-#ifdef AVOID_OVERREAD

- && IS_ALIGNED(src_width, 16)

+ (dx & 0xffff) ? ScaleAddCols2_C:

+ ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);

+ void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =

+ ScaleAddRow_C;

+#if defined(HAS_SCALEADDROW_SSE2)

+ if (TestCpuFlag(kCpuHasSSE2)) {

+ ScaleAddRow = ScaleAddRow_Any_SSE2;

+ if (IS_ALIGNED(src_width, 16)) {

+ ScaleAddRow = ScaleAddRow_SSE2;

+ }

#endif

- ) {

- ScaleAddRows = ScaleAddRows_SSE2;

+#if defined(HAS_SCALEADDROW_AVX2)

+ if (TestCpuFlag(kCpuHasAVX2)) {

+ ScaleAddRow = ScaleAddRow_Any_AVX2;

+ if (IS_ALIGNED(src_width, 32)) {

+ ScaleAddRow = ScaleAddRow_AVX2;

+ }

+#endif

+#if defined(HAS_SCALEADDROW_NEON)

+ if (TestCpuFlag(kCpuHasNEON)) {

+ ScaleAddRow = ScaleAddRow_Any_NEON;

+ if (IS_ALIGNED(src_width, 16)) {

+ ScaleAddRow = ScaleAddRow_NEON;

+ }

}

#endif

@@ -761,14 +782,16 @@ static void ScalePlaneBox(int src_width, int src_height,

int iy = y >> 16;

const uint8* src = src_ptr + iy * src_stride;

y += dy;

- if (y > (src_height << 16)) {

- y = (src_height << 16);

+ if (y > max_y) {

+ y = max_y;

+ }

+ boxheight = MIN1((y >> 16) - iy);

+ memset(row16, 0, src_width * 2);

+ for (k = 0; k < boxheight; ++k) {

+ ScaleAddRow(src, (uint16 *)(row16), src_width);

+ src += src_stride;

}

- boxheight = (y >> 16) - iy;

- ScaleAddRows(src, src_stride, (uint16*)(row16),

- src_width, boxheight);

- ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),

- dst_ptr);

+ ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);

dst_ptr += dst_stride;

}

free_aligned_buffer_64(row16);

@@ -779,7 +802,7 @@ static void ScalePlaneBox_16(int src_width, int src_height,

int dst_width, int dst_height,

int src_stride, int dst_stride,

const uint16* src_ptr, uint16* dst_ptr) {

- int j;

+ int j, k;

// Initial source x/y coordinate and step values as 16.16 fixed point.

int x = 0;

int y = 0;

@@ -789,42 +812,18 @@ static void ScalePlaneBox_16(int src_width, int src_height,

ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,

&x, &y, &dx, &dy);

src_width = Abs(src_width);

- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.

- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {

- uint16* dst = dst_ptr;

- int j;

- for (j = 0; j < dst_height; ++j) {

- int boxheight;

- int iy = y >> 16;

- const uint16* src = src_ptr + iy * src_stride;

- y += dy;

- if (y > max_y) {

- y = max_y;

- }

- boxheight = (y >> 16) - iy;

- ScalePlaneBoxRow_16_C(dst_width, boxheight,

- x, dx, src_stride,

- src, dst);

- dst += dst_stride;

- }

- return;

- }

{

// Allocate a row buffer of uint32.

align_buffer_64(row32, src_width * 4);

void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,

const uint32* src_ptr, uint16* dst_ptr) =

(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;

- void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,

- uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;

+ void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =

+ ScaleAddRow_16_C;

-#if defined(HAS_SCALEADDROWS_16_SSE2)

- if (TestCpuFlag(kCpuHasSSE2)

-#ifdef AVOID_OVERREAD

- && IS_ALIGNED(src_width, 16)

-#endif

- ) {

- ScaleAddRows = ScaleAddRows_16_SSE2;

+#if defined(HAS_SCALEADDROW_16_SSE2)

+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {

+ ScaleAddRow = ScaleAddRow_16_SSE2;

}

#endif

@@ -833,14 +832,16 @@ static void ScalePlaneBox_16(int src_width, int src_height,

int iy = y >> 16;

const uint16* src = src_ptr + iy * src_stride;

y += dy;

- if (y > (src_height << 16)) {

- y = (src_height << 16);

+ if (y > max_y) {

+ y = max_y;

}

- boxheight = (y >> 16) - iy;

- ScaleAddRows(src, src_stride, (uint32*)(row32),

- src_width, boxheight);

- ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),

- dst_ptr);

+ boxheight = MIN1((y >> 16) - iy);

+ memset(row32, 0, src_width * 4);

+ for (k = 0; k < boxheight; ++k) {

+ ScaleAddRow(src, (uint32 *)(row32), src_width);

+ src += src_stride;

+ }

+ ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);

dst_ptr += dst_stride;

}

free_aligned_buffer_64(row32);

@@ -921,6 +922,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height,

ScaleFilterCols = ScaleFilterCols_SSSE3;

}

#endif

+#if defined(HAS_SCALEFILTERCOLS_NEON)

+ if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {

+ ScaleFilterCols = ScaleFilterCols_Any_NEON;

+ if (IS_ALIGNED(dst_width, 8)) {

+ ScaleFilterCols = ScaleFilterCols_NEON;

+ }

+#endif

if (y > max_y) {

y = max_y;

}

@@ -1057,8 +1066,8 @@ void ScalePlaneBilinearUp(int src_width, int src_height,

ptrdiff_t src_stride, int dst_width, int source_y_fraction) =

InterpolateRow_C;

void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,

- int dst_width, int x, int dx) =

- filtering ? ScaleFilterCols_C : ScaleCols_C;

+ int dst_width, int x, int dx) =

+ filtering ? ScaleFilterCols_C : ScaleCols_C;

ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,

&x, &y, &dx, &dy);

src_width = Abs(src_width);

@@ -1112,6 +1121,14 @@ void ScalePlaneBilinearUp(int src_width, int src_height,

ScaleFilterCols = ScaleFilterCols_SSSE3;

}

#endif

+#if defined(HAS_SCALEFILTERCOLS_NEON)

+ if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {

+ ScaleFilterCols = ScaleFilterCols_Any_NEON;

+ if (IS_ALIGNED(dst_width, 8)) {

+ ScaleFilterCols = ScaleFilterCols_NEON;

+ }

+#endif

if (!filtering && src_width * 2 == dst_width && x < 0x8000) {

ScaleFilterCols = ScaleColsUp2_C;

#if defined(HAS_SCALECOLS_SSE2)

@@ -1129,7 +1146,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,

const uint8* src = src_ptr + yi * src_stride;

// Allocate 2 row buffers.

- const int kRowSize = (dst_width + 15) & ~15;

+ const int kRowSize = (dst_width + 31) & ~31;

align_buffer_64(row, kRowSize * 2);

uint8* rowptr = row;

@@ -1188,8 +1205,8 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,

ptrdiff_t src_stride, int dst_width, int source_y_fraction) =

InterpolateRow_16_C;

void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,

- int dst_width, int x, int dx) =

- filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;

+ int dst_width, int x, int dx) =

+ filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;

ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,

&x, &y, &dx, &dy);

src_width = Abs(src_width);

@@ -1260,7 +1277,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,

const uint16* src = src_ptr + yi * src_stride;

// Allocate 2 row buffers.

- const int kRowSize = (dst_width + 15) & ~15;

+ const int kRowSize = (dst_width + 31) & ~31;

align_buffer_64(row, kRowSize * 4);

uint16* rowptr = (uint16*)row;

@@ -1334,8 +1351,7 @@ static void ScalePlaneSimple(int src_width, int src_height,

}

for (i = 0; i < dst_height; ++i) {

- ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,

- dst_width, x, dx);

+ ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);

dst_ptr += dst_stride;

y += dy;

}

@@ -1385,8 +1401,7 @@ void ScalePlane(const uint8* src, int src_stride,

enum FilterMode filtering) {

// Simplify filtering when possible.

filtering = ScaleFilterReduce(src_width, src_height,

- dst_width, dst_height,

- filtering);

+ dst_width, dst_height, filtering);

// Negative height means invert the image.

if (src_height < 0) {

@@ -1402,9 +1417,9 @@ void ScalePlane(const uint8* src, int src_stride,

CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);

return;

}

- if (dst_width == src_width) {

+ if (dst_width == src_width && filtering != kFilterBox) {

int dy = FixedDiv(src_height, dst_height);

- // Arbitrary scale vertically, but unscaled vertically.

+ // Arbitrary scale vertically, but unscaled horizontally.

ScalePlaneVertical(src_height,

dst_width, dst_height,

src_stride, dst_stride, src, dst,

@@ -1435,7 +1450,7 @@ void ScalePlane(const uint8* src, int src_stride,

return;

}

if (4 * dst_width == src_width && 4 * dst_height == src_height &&

- filtering != kFilterBilinear) {

+ (filtering == kFilterBox || filtering == kFilterNone)) {

// optimized, 1/4

ScalePlaneDown4(src_width, src_height, dst_width, dst_height,

src_stride, dst_stride, src, dst, filtering);

@@ -1469,8 +1484,7 @@ void ScalePlane_16(const uint16* src, int src_stride,

enum FilterMode filtering) {

// Simplify filtering when possible.

filtering = ScaleFilterReduce(src_width, src_height,

- dst_width, dst_height,

- filtering);

+ dst_width, dst_height, filtering);

// Negative height means invert the image.

if (src_height < 0) {

@@ -1563,6 +1577,7 @@ int I420Scale(const uint8* src_y, int src_stride_y,

int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);

int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);

if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||

+ src_width > 32768 || src_height > 32768 ||

!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {

return -1;

}

@@ -1594,6 +1609,7 @@ int I420Scale_16(const uint16* src_y, int src_stride_y,

int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);

int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);

if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||

+ src_width > 32768 || src_height > 32768 ||

!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {

return -1;

}

« no previous file with comments | « source/libvpx/third_party/libyuv/source/row_win.cc ('k') | source/libvpx/third_party/libyuv/source/scale_any.cc » ('j') | no next file with comments »