Index: source/libvpx/third_party/libyuv/source/scale.cc |
diff --git a/source/libvpx/third_party/libyuv/source/scale.cc b/source/libvpx/third_party/libyuv/source/scale.cc |
index 482c5a61e35599882c7070838b7211d24e5180e5..0a01304c41086fbd2676263071176e9c95040865 100644 |
--- a/source/libvpx/third_party/libyuv/source/scale.cc |
+++ b/source/libvpx/third_party/libyuv/source/scale.cc |
@@ -23,9 +23,6 @@ namespace libyuv { |
extern "C" { |
#endif |
-// Remove this macro if OVERREAD is safe. |
-#define AVOID_OVERREAD 1 |
- |
static __inline int Abs(int v) { |
return v >= 0 ? v : -v; |
} |
@@ -44,9 +41,8 @@ static void ScalePlaneDown2(int src_width, int src_height, |
int y; |
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, |
uint8* dst_ptr, int dst_width) = |
- filtering == kFilterNone ? ScaleRowDown2_C : |
- (filtering == kFilterLinear ? ScaleRowDown2Linear_C : |
- ScaleRowDown2Box_C); |
+ filtering == kFilterNone ? ScaleRowDown2_C : |
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C); |
int row_stride = src_stride << 1; |
if (!filtering) { |
src_ptr += src_stride; // Point to odd rows. |
@@ -54,15 +50,39 @@ static void ScalePlaneDown2(int src_width, int src_height, |
} |
#if defined(HAS_SCALEROWDOWN2_NEON) |
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { |
- ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON; |
+ if (TestCpuFlag(kCpuHasNEON)) { |
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON : |
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON : |
+ ScaleRowDown2Box_Any_NEON); |
+ if (IS_ALIGNED(dst_width, 16)) { |
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON : |
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON : |
+ ScaleRowDown2Box_NEON); |
+ } |
} |
#endif |
#if defined(HAS_SCALEROWDOWN2_SSE2) |
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { |
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : |
- (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : |
- ScaleRowDown2Box_SSE2); |
+ if (TestCpuFlag(kCpuHasSSE2)) { |
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 : |
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 : |
+ ScaleRowDown2Box_Any_SSE2); |
+ if (IS_ALIGNED(dst_width, 16)) { |
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : |
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : |
+ ScaleRowDown2Box_SSE2); |
+ } |
+ } |
+#endif |
+#if defined(HAS_SCALEROWDOWN2_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 : |
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 : |
+ ScaleRowDown2Box_Any_AVX2); |
+ if (IS_ALIGNED(dst_width, 32)) { |
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 : |
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 : |
+ ScaleRowDown2Box_AVX2); |
+ } |
} |
#endif |
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) |
@@ -154,13 +174,30 @@ static void ScalePlaneDown4(int src_width, int src_height, |
src_stride = 0; |
} |
#if defined(HAS_SCALEROWDOWN4_NEON) |
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { |
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; |
+ if (TestCpuFlag(kCpuHasNEON)) { |
+ ScaleRowDown4 = filtering ? |
+ ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON; |
+ if (IS_ALIGNED(dst_width, 8)) { |
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; |
+ } |
} |
#endif |
#if defined(HAS_SCALEROWDOWN4_SSE2) |
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { |
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; |
+ if (TestCpuFlag(kCpuHasSSE2)) { |
+ ScaleRowDown4 = filtering ? |
+ ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2; |
+ if (IS_ALIGNED(dst_width, 8)) { |
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; |
+ } |
+ } |
+#endif |
+#if defined(HAS_SCALEROWDOWN4_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ScaleRowDown4 = filtering ? |
+ ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2; |
+ if (IS_ALIGNED(dst_width, 16)) { |
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2; |
+ } |
} |
#endif |
#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) |
@@ -249,24 +286,42 @@ static void ScalePlaneDown34(int src_width, int src_height, |
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; |
} |
#if defined(HAS_SCALEROWDOWN34_NEON) |
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { |
+ if (TestCpuFlag(kCpuHasNEON)) { |
if (!filtering) { |
- ScaleRowDown34_0 = ScaleRowDown34_NEON; |
- ScaleRowDown34_1 = ScaleRowDown34_NEON; |
+ ScaleRowDown34_0 = ScaleRowDown34_Any_NEON; |
+ ScaleRowDown34_1 = ScaleRowDown34_Any_NEON; |
} else { |
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; |
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; |
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON; |
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON; |
+ } |
+ if (dst_width % 24 == 0) { |
+ if (!filtering) { |
+ ScaleRowDown34_0 = ScaleRowDown34_NEON; |
+ ScaleRowDown34_1 = ScaleRowDown34_NEON; |
+ } else { |
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; |
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; |
+ } |
} |
} |
#endif |
#if defined(HAS_SCALEROWDOWN34_SSSE3) |
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { |
+ if (TestCpuFlag(kCpuHasSSSE3)) { |
if (!filtering) { |
- ScaleRowDown34_0 = ScaleRowDown34_SSSE3; |
- ScaleRowDown34_1 = ScaleRowDown34_SSSE3; |
+ ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3; |
+ ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3; |
} else { |
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; |
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; |
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3; |
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3; |
+ } |
+ if (dst_width % 24 == 0) { |
+ if (!filtering) { |
+ ScaleRowDown34_0 = ScaleRowDown34_SSSE3; |
+ ScaleRowDown34_1 = ScaleRowDown34_SSSE3; |
+ } else { |
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; |
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; |
+ } |
} |
} |
#endif |
@@ -422,23 +477,41 @@ static void ScalePlaneDown38(int src_width, int src_height, |
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; |
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; |
} |
+ |
#if defined(HAS_SCALEROWDOWN38_NEON) |
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { |
+ if (TestCpuFlag(kCpuHasNEON)) { |
if (!filtering) { |
- ScaleRowDown38_3 = ScaleRowDown38_NEON; |
- ScaleRowDown38_2 = ScaleRowDown38_NEON; |
+ ScaleRowDown38_3 = ScaleRowDown38_Any_NEON; |
+ ScaleRowDown38_2 = ScaleRowDown38_Any_NEON; |
} else { |
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; |
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; |
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON; |
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON; |
+ } |
+ if (dst_width % 12 == 0) { |
+ if (!filtering) { |
+ ScaleRowDown38_3 = ScaleRowDown38_NEON; |
+ ScaleRowDown38_2 = ScaleRowDown38_NEON; |
+ } else { |
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; |
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; |
+ } |
} |
} |
#endif |
#if defined(HAS_SCALEROWDOWN38_SSSE3) |
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { |
+ if (TestCpuFlag(kCpuHasSSSE3)) { |
if (!filtering) { |
+ ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3; |
+ ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3; |
+ } else { |
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3; |
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3; |
+ } |
+ if (dst_width % 12 == 0 && !filtering) { |
ScaleRowDown38_3 = ScaleRowDown38_SSSE3; |
ScaleRowDown38_2 = ScaleRowDown38_SSSE3; |
- } else { |
+ } |
+ if (dst_width % 6 == 0 && filtering) { |
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; |
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; |
} |
@@ -559,65 +632,7 @@ static void ScalePlaneDown38_16(int src_width, int src_height, |
} |
} |
-static __inline uint32 SumBox(int iboxwidth, int iboxheight, |
- ptrdiff_t src_stride, const uint8* src_ptr) { |
- uint32 sum = 0u; |
- int y; |
- assert(iboxwidth > 0); |
- assert(iboxheight > 0); |
- for (y = 0; y < iboxheight; ++y) { |
- int x; |
- for (x = 0; x < iboxwidth; ++x) { |
- sum += src_ptr[x]; |
- } |
- src_ptr += src_stride; |
- } |
- return sum; |
-} |
- |
-static __inline uint32 SumBox_16(int iboxwidth, int iboxheight, |
- ptrdiff_t src_stride, const uint16* src_ptr) { |
- uint32 sum = 0u; |
- int y; |
- assert(iboxwidth > 0); |
- assert(iboxheight > 0); |
- for (y = 0; y < iboxheight; ++y) { |
- int x; |
- for (x = 0; x < iboxwidth; ++x) { |
- sum += src_ptr[x]; |
- } |
- src_ptr += src_stride; |
- } |
- return sum; |
-} |
- |
-static void ScalePlaneBoxRow_C(int dst_width, int boxheight, |
- int x, int dx, ptrdiff_t src_stride, |
- const uint8* src_ptr, uint8* dst_ptr) { |
- int i; |
- int boxwidth; |
- for (i = 0; i < dst_width; ++i) { |
- int ix = x >> 16; |
- x += dx; |
- boxwidth = (x >> 16) - ix; |
- *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) / |
- (boxwidth * boxheight); |
- } |
-} |
- |
-static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight, |
- int x, int dx, ptrdiff_t src_stride, |
- const uint16* src_ptr, uint16* dst_ptr) { |
- int i; |
- int boxwidth; |
- for (i = 0; i < dst_width; ++i) { |
- int ix = x >> 16; |
- x += dx; |
- boxwidth = (x >> 16) - ix; |
- *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) / |
- (boxwidth * boxheight); |
- } |
-} |
+#define MIN1(x) ((x) < 1 ? 1 : (x)) |
static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { |
uint32 sum = 0u; |
@@ -643,15 +658,15 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, |
const uint16* src_ptr, uint8* dst_ptr) { |
int i; |
int scaletbl[2]; |
- int minboxwidth = (dx >> 16); |
+ int minboxwidth = dx >> 16; |
int* scaleptr = scaletbl - minboxwidth; |
int boxwidth; |
- scaletbl[0] = 65536 / (minboxwidth * boxheight); |
- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); |
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); |
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); |
for (i = 0; i < dst_width; ++i) { |
int ix = x >> 16; |
x += dx; |
- boxwidth = (x >> 16) - ix; |
+ boxwidth = MIN1((x >> 16) - ix); |
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; |
} |
} |
@@ -660,25 +675,36 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, |
const uint32* src_ptr, uint16* dst_ptr) { |
int i; |
int scaletbl[2]; |
- int minboxwidth = (dx >> 16); |
+ int minboxwidth = dx >> 16; |
int* scaleptr = scaletbl - minboxwidth; |
int boxwidth; |
- scaletbl[0] = 65536 / (minboxwidth * boxheight); |
- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); |
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); |
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); |
for (i = 0; i < dst_width; ++i) { |
int ix = x >> 16; |
x += dx; |
- boxwidth = (x >> 16) - ix; |
- *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * |
- scaleptr[boxwidth] >> 16; |
+ boxwidth = MIN1((x >> 16) - ix); |
+ *dst_ptr++ = |
+ SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; |
+ } |
+} |
+ |
+static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int, |
+ const uint16* src_ptr, uint8* dst_ptr) { |
+ int scaleval = 65536 / boxheight; |
+ int i; |
+ src_ptr += (x >> 16); |
+ for (i = 0; i < dst_width; ++i) { |
+ *dst_ptr++ = src_ptr[i] * scaleval >> 16; |
} |
} |
static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, |
const uint16* src_ptr, uint8* dst_ptr) { |
- int boxwidth = (dx >> 16); |
+ int boxwidth = MIN1(dx >> 16); |
int scaleval = 65536 / (boxwidth * boxheight); |
int i; |
+ x >>= 16; |
for (i = 0; i < dst_width; ++i) { |
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; |
x += boxwidth; |
@@ -687,7 +713,7 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, |
static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, |
const uint32* src_ptr, uint16* dst_ptr) { |
- int boxwidth = (dx >> 16); |
+ int boxwidth = MIN1(dx >> 16); |
int scaleval = 65536 / (boxwidth * boxheight); |
int i; |
for (i = 0; i < dst_width; ++i) { |
@@ -707,7 +733,7 @@ static void ScalePlaneBox(int src_width, int src_height, |
int dst_width, int dst_height, |
int src_stride, int dst_stride, |
const uint8* src_ptr, uint8* dst_ptr) { |
- int j; |
+ int j, k; |
// Initial source x/y coordinate and step values as 16.16 fixed point. |
int x = 0; |
int y = 0; |
@@ -717,42 +743,37 @@ static void ScalePlaneBox(int src_width, int src_height, |
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, |
&x, &y, &dx, &dy); |
src_width = Abs(src_width); |
- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. |
- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { |
- uint8* dst = dst_ptr; |
- int j; |
- for (j = 0; j < dst_height; ++j) { |
- int boxheight; |
- int iy = y >> 16; |
- const uint8* src = src_ptr + iy * src_stride; |
- y += dy; |
- if (y > max_y) { |
- y = max_y; |
- } |
- boxheight = (y >> 16) - iy; |
- ScalePlaneBoxRow_C(dst_width, boxheight, |
- x, dx, src_stride, |
- src, dst); |
- dst += dst_stride; |
- } |
- return; |
- } |
{ |
// Allocate a row buffer of uint16. |
align_buffer_64(row16, src_width * 2); |
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, |
const uint16* src_ptr, uint8* dst_ptr) = |
- (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; |
- void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, |
- uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; |
- |
-#if defined(HAS_SCALEADDROWS_SSE2) |
- if (TestCpuFlag(kCpuHasSSE2) |
-#ifdef AVOID_OVERREAD |
- && IS_ALIGNED(src_width, 16) |
+ (dx & 0xffff) ? ScaleAddCols2_C: |
+ ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); |
+ void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) = |
+ ScaleAddRow_C; |
+#if defined(HAS_SCALEADDROW_SSE2) |
+ if (TestCpuFlag(kCpuHasSSE2)) { |
+ ScaleAddRow = ScaleAddRow_Any_SSE2; |
+ if (IS_ALIGNED(src_width, 16)) { |
+ ScaleAddRow = ScaleAddRow_SSE2; |
+ } |
+ } |
#endif |
- ) { |
- ScaleAddRows = ScaleAddRows_SSE2; |
+#if defined(HAS_SCALEADDROW_AVX2) |
+ if (TestCpuFlag(kCpuHasAVX2)) { |
+ ScaleAddRow = ScaleAddRow_Any_AVX2; |
+ if (IS_ALIGNED(src_width, 32)) { |
+ ScaleAddRow = ScaleAddRow_AVX2; |
+ } |
+ } |
+#endif |
+#if defined(HAS_SCALEADDROW_NEON) |
+ if (TestCpuFlag(kCpuHasNEON)) { |
+ ScaleAddRow = ScaleAddRow_Any_NEON; |
+ if (IS_ALIGNED(src_width, 16)) { |
+ ScaleAddRow = ScaleAddRow_NEON; |
+ } |
} |
#endif |
@@ -761,14 +782,16 @@ static void ScalePlaneBox(int src_width, int src_height, |
int iy = y >> 16; |
const uint8* src = src_ptr + iy * src_stride; |
y += dy; |
- if (y > (src_height << 16)) { |
- y = (src_height << 16); |
+ if (y > max_y) { |
+ y = max_y; |
+ } |
+ boxheight = MIN1((y >> 16) - iy); |
+ memset(row16, 0, src_width * 2); |
+ for (k = 0; k < boxheight; ++k) { |
+ ScaleAddRow(src, (uint16 *)(row16), src_width); |
+ src += src_stride; |
} |
- boxheight = (y >> 16) - iy; |
- ScaleAddRows(src, src_stride, (uint16*)(row16), |
- src_width, boxheight); |
- ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), |
- dst_ptr); |
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr); |
dst_ptr += dst_stride; |
} |
free_aligned_buffer_64(row16); |
@@ -779,7 +802,7 @@ static void ScalePlaneBox_16(int src_width, int src_height, |
int dst_width, int dst_height, |
int src_stride, int dst_stride, |
const uint16* src_ptr, uint16* dst_ptr) { |
- int j; |
+ int j, k; |
// Initial source x/y coordinate and step values as 16.16 fixed point. |
int x = 0; |
int y = 0; |
@@ -789,42 +812,18 @@ static void ScalePlaneBox_16(int src_width, int src_height, |
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, |
&x, &y, &dx, &dy); |
src_width = Abs(src_width); |
- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. |
- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { |
- uint16* dst = dst_ptr; |
- int j; |
- for (j = 0; j < dst_height; ++j) { |
- int boxheight; |
- int iy = y >> 16; |
- const uint16* src = src_ptr + iy * src_stride; |
- y += dy; |
- if (y > max_y) { |
- y = max_y; |
- } |
- boxheight = (y >> 16) - iy; |
- ScalePlaneBoxRow_16_C(dst_width, boxheight, |
- x, dx, src_stride, |
- src, dst); |
- dst += dst_stride; |
- } |
- return; |
- } |
{ |
// Allocate a row buffer of uint32. |
align_buffer_64(row32, src_width * 4); |
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, |
const uint32* src_ptr, uint16* dst_ptr) = |
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C; |
- void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride, |
- uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C; |
+ void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) = |
+ ScaleAddRow_16_C; |
-#if defined(HAS_SCALEADDROWS_16_SSE2) |
- if (TestCpuFlag(kCpuHasSSE2) |
-#ifdef AVOID_OVERREAD |
- && IS_ALIGNED(src_width, 16) |
-#endif |
- ) { |
- ScaleAddRows = ScaleAddRows_16_SSE2; |
+#if defined(HAS_SCALEADDROW_16_SSE2) |
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) { |
+ ScaleAddRow = ScaleAddRow_16_SSE2; |
} |
#endif |
@@ -833,14 +832,16 @@ static void ScalePlaneBox_16(int src_width, int src_height, |
int iy = y >> 16; |
const uint16* src = src_ptr + iy * src_stride; |
y += dy; |
- if (y > (src_height << 16)) { |
- y = (src_height << 16); |
+ if (y > max_y) { |
+ y = max_y; |
} |
- boxheight = (y >> 16) - iy; |
- ScaleAddRows(src, src_stride, (uint32*)(row32), |
- src_width, boxheight); |
- ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), |
- dst_ptr); |
+ boxheight = MIN1((y >> 16) - iy); |
+ memset(row32, 0, src_width * 4); |
+ for (k = 0; k < boxheight; ++k) { |
+ ScaleAddRow(src, (uint32 *)(row32), src_width); |
+ src += src_stride; |
+ } |
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr); |
dst_ptr += dst_stride; |
} |
free_aligned_buffer_64(row32); |
@@ -921,6 +922,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height, |
ScaleFilterCols = ScaleFilterCols_SSSE3; |
} |
#endif |
+#if defined(HAS_SCALEFILTERCOLS_NEON) |
+ if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) { |
+ ScaleFilterCols = ScaleFilterCols_Any_NEON; |
+ if (IS_ALIGNED(dst_width, 8)) { |
+ ScaleFilterCols = ScaleFilterCols_NEON; |
+ } |
+ } |
+#endif |
if (y > max_y) { |
y = max_y; |
} |
@@ -1057,8 +1066,8 @@ void ScalePlaneBilinearUp(int src_width, int src_height, |
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
InterpolateRow_C; |
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, |
- int dst_width, int x, int dx) = |
- filtering ? ScaleFilterCols_C : ScaleCols_C; |
+ int dst_width, int x, int dx) = |
+ filtering ? ScaleFilterCols_C : ScaleCols_C; |
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
&x, &y, &dx, &dy); |
src_width = Abs(src_width); |
@@ -1112,6 +1121,14 @@ void ScalePlaneBilinearUp(int src_width, int src_height, |
ScaleFilterCols = ScaleFilterCols_SSSE3; |
} |
#endif |
+#if defined(HAS_SCALEFILTERCOLS_NEON) |
+ if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) { |
+ ScaleFilterCols = ScaleFilterCols_Any_NEON; |
+ if (IS_ALIGNED(dst_width, 8)) { |
+ ScaleFilterCols = ScaleFilterCols_NEON; |
+ } |
+ } |
+#endif |
if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
ScaleFilterCols = ScaleColsUp2_C; |
#if defined(HAS_SCALECOLS_SSE2) |
@@ -1129,7 +1146,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height, |
const uint8* src = src_ptr + yi * src_stride; |
// Allocate 2 row buffers. |
- const int kRowSize = (dst_width + 15) & ~15; |
+ const int kRowSize = (dst_width + 31) & ~31; |
align_buffer_64(row, kRowSize * 2); |
uint8* rowptr = row; |
@@ -1188,8 +1205,8 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, |
ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
InterpolateRow_16_C; |
void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, |
- int dst_width, int x, int dx) = |
- filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; |
+ int dst_width, int x, int dx) = |
+ filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; |
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
&x, &y, &dx, &dy); |
src_width = Abs(src_width); |
@@ -1260,7 +1277,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height, |
const uint16* src = src_ptr + yi * src_stride; |
// Allocate 2 row buffers. |
- const int kRowSize = (dst_width + 15) & ~15; |
+ const int kRowSize = (dst_width + 31) & ~31; |
align_buffer_64(row, kRowSize * 4); |
uint16* rowptr = (uint16*)row; |
@@ -1334,8 +1351,7 @@ static void ScalePlaneSimple(int src_width, int src_height, |
} |
for (i = 0; i < dst_height; ++i) { |
- ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, |
- dst_width, x, dx); |
+ ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); |
dst_ptr += dst_stride; |
y += dy; |
} |
@@ -1385,8 +1401,7 @@ void ScalePlane(const uint8* src, int src_stride, |
enum FilterMode filtering) { |
// Simplify filtering when possible. |
filtering = ScaleFilterReduce(src_width, src_height, |
- dst_width, dst_height, |
- filtering); |
+ dst_width, dst_height, filtering); |
// Negative height means invert the image. |
if (src_height < 0) { |
@@ -1402,9 +1417,9 @@ void ScalePlane(const uint8* src, int src_stride, |
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); |
return; |
} |
- if (dst_width == src_width) { |
+ if (dst_width == src_width && filtering != kFilterBox) { |
int dy = FixedDiv(src_height, dst_height); |
- // Arbitrary scale vertically, but unscaled vertically. |
+ // Arbitrary scale vertically, but unscaled horizontally. |
ScalePlaneVertical(src_height, |
dst_width, dst_height, |
src_stride, dst_stride, src, dst, |
@@ -1435,7 +1450,7 @@ void ScalePlane(const uint8* src, int src_stride, |
return; |
} |
if (4 * dst_width == src_width && 4 * dst_height == src_height && |
- filtering != kFilterBilinear) { |
+ (filtering == kFilterBox || filtering == kFilterNone)) { |
// optimized, 1/4 |
ScalePlaneDown4(src_width, src_height, dst_width, dst_height, |
src_stride, dst_stride, src, dst, filtering); |
@@ -1469,8 +1484,7 @@ void ScalePlane_16(const uint16* src, int src_stride, |
enum FilterMode filtering) { |
// Simplify filtering when possible. |
filtering = ScaleFilterReduce(src_width, src_height, |
- dst_width, dst_height, |
- filtering); |
+ dst_width, dst_height, filtering); |
// Negative height means invert the image. |
if (src_height < 0) { |
@@ -1563,6 +1577,7 @@ int I420Scale(const uint8* src_y, int src_stride_y, |
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || |
+ src_width > 32768 || src_height > 32768 || |
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { |
return -1; |
} |
@@ -1594,6 +1609,7 @@ int I420Scale_16(const uint16* src_y, int src_stride_y, |
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || |
+ src_width > 32768 || src_height > 32768 || |
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { |
return -1; |
} |