Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Unified Diff: source/libvpx/third_party/libyuv/source/scale.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: source/libvpx/third_party/libyuv/source/scale.cc
diff --git a/source/libvpx/third_party/libyuv/source/scale.cc b/source/libvpx/third_party/libyuv/source/scale.cc
index 482c5a61e35599882c7070838b7211d24e5180e5..0a01304c41086fbd2676263071176e9c95040865 100644
--- a/source/libvpx/third_party/libyuv/source/scale.cc
+++ b/source/libvpx/third_party/libyuv/source/scale.cc
@@ -23,9 +23,6 @@ namespace libyuv {
extern "C" {
#endif
-// Remove this macro if OVERREAD is safe.
-#define AVOID_OVERREAD 1
-
static __inline int Abs(int v) {
return v >= 0 ? v : -v;
}
@@ -44,9 +41,8 @@ static void ScalePlaneDown2(int src_width, int src_height,
int y;
void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
uint8* dst_ptr, int dst_width) =
- filtering == kFilterNone ? ScaleRowDown2_C :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
- ScaleRowDown2Box_C);
+ filtering == kFilterNone ? ScaleRowDown2_C :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
int row_stride = src_stride << 1;
if (!filtering) {
src_ptr += src_stride; // Point to odd rows.
@@ -54,15 +50,39 @@ static void ScalePlaneDown2(int src_width, int src_height,
}
#if defined(HAS_SCALEROWDOWN2_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
+ ScaleRowDown2Box_Any_NEON);
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
+ ScaleRowDown2Box_NEON);
+ }
}
#endif
#if defined(HAS_SCALEROWDOWN2_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
- ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
- (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
- ScaleRowDown2Box_SSE2);
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
+ ScaleRowDown2Box_Any_SSE2);
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
+ ScaleRowDown2Box_SSE2);
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN2_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
+ ScaleRowDown2Box_Any_AVX2);
+ if (IS_ALIGNED(dst_width, 32)) {
+ ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
+ (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
+ ScaleRowDown2Box_AVX2);
+ }
}
#endif
#if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
@@ -154,13 +174,30 @@ static void ScalePlaneDown4(int src_width, int src_height,
src_stride = 0;
}
#if defined(HAS_SCALEROWDOWN4_NEON)
- if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleRowDown4 = filtering ?
+ ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
+ }
}
#endif
#if defined(HAS_SCALEROWDOWN4_SSE2)
- if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
- ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleRowDown4 = filtering ?
+ ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEROWDOWN4_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleRowDown4 = filtering ?
+ ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
+ if (IS_ALIGNED(dst_width, 16)) {
+ ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
+ }
}
#endif
#if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
@@ -249,24 +286,42 @@ static void ScalePlaneDown34(int src_width, int src_height,
ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
}
#if defined(HAS_SCALEROWDOWN34_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
+ if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_NEON;
+ ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
} else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
+ }
+ if (dst_width % 24 == 0) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_NEON;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
+ }
}
}
#endif
#if defined(HAS_SCALEROWDOWN34_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
- ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
+ ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
} else {
- ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
- ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
+ }
+ if (dst_width % 24 == 0) {
+ if (!filtering) {
+ ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
+ } else {
+ ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
+ ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
+ }
}
}
#endif
@@ -422,23 +477,41 @@ static void ScalePlaneDown38(int src_width, int src_height,
ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
}
+
#if defined(HAS_SCALEROWDOWN38_NEON)
- if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
+ if (TestCpuFlag(kCpuHasNEON)) {
if (!filtering) {
- ScaleRowDown38_3 = ScaleRowDown38_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_NEON;
+ ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
} else {
- ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
- ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
+ }
+ if (dst_width % 12 == 0) {
+ if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_NEON;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
+ }
}
}
#endif
#if defined(HAS_SCALEROWDOWN38_SSSE3)
- if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) {
+ if (TestCpuFlag(kCpuHasSSSE3)) {
if (!filtering) {
+ ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
+ } else {
+ ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
+ ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
+ }
+ if (dst_width % 12 == 0 && !filtering) {
ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
- } else {
+ }
+ if (dst_width % 6 == 0 && filtering) {
ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
}
@@ -559,65 +632,7 @@ static void ScalePlaneDown38_16(int src_width, int src_height,
}
}
-static __inline uint32 SumBox(int iboxwidth, int iboxheight,
- ptrdiff_t src_stride, const uint8* src_ptr) {
- uint32 sum = 0u;
- int y;
- assert(iboxwidth > 0);
- assert(iboxheight > 0);
- for (y = 0; y < iboxheight; ++y) {
- int x;
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- src_ptr += src_stride;
- }
- return sum;
-}
-
-static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
- ptrdiff_t src_stride, const uint16* src_ptr) {
- uint32 sum = 0u;
- int y;
- assert(iboxwidth > 0);
- assert(iboxheight > 0);
- for (y = 0; y < iboxheight; ++y) {
- int x;
- for (x = 0; x < iboxwidth; ++x) {
- sum += src_ptr[x];
- }
- src_ptr += src_stride;
- }
- return sum;
-}
-
-static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
- int x, int dx, ptrdiff_t src_stride,
- const uint8* src_ptr, uint8* dst_ptr) {
- int i;
- int boxwidth;
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
- (boxwidth * boxheight);
- }
-}
-
-static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
- int x, int dx, ptrdiff_t src_stride,
- const uint16* src_ptr, uint16* dst_ptr) {
- int i;
- int boxwidth;
- for (i = 0; i < dst_width; ++i) {
- int ix = x >> 16;
- x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
- (boxwidth * boxheight);
- }
-}
+#define MIN1(x) ((x) < 1 ? 1 : (x))
static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
uint32 sum = 0u;
@@ -643,15 +658,15 @@ static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
int i;
int scaletbl[2];
- int minboxwidth = (dx >> 16);
+ int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
- scaletbl[0] = 65536 / (minboxwidth * boxheight);
- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
- boxwidth = (x >> 16) - ix;
+ boxwidth = MIN1((x >> 16) - ix);
*dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
}
}
@@ -660,25 +675,36 @@ static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
int i;
int scaletbl[2];
- int minboxwidth = (dx >> 16);
+ int minboxwidth = dx >> 16;
int* scaleptr = scaletbl - minboxwidth;
int boxwidth;
- scaletbl[0] = 65536 / (minboxwidth * boxheight);
- scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
+ scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
+ scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
for (i = 0; i < dst_width; ++i) {
int ix = x >> 16;
x += dx;
- boxwidth = (x >> 16) - ix;
- *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
- scaleptr[boxwidth] >> 16;
+ boxwidth = MIN1((x >> 16) - ix);
+ *dst_ptr++ =
+ SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
+ }
+}
+
+static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
+ const uint16* src_ptr, uint8* dst_ptr) {
+ int scaleval = 65536 / boxheight;
+ int i;
+ src_ptr += (x >> 16);
+ for (i = 0; i < dst_width; ++i) {
+ *dst_ptr++ = src_ptr[i] * scaleval >> 16;
}
}
static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) {
- int boxwidth = (dx >> 16);
+ int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
+ x >>= 16;
for (i = 0; i < dst_width; ++i) {
*dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
x += boxwidth;
@@ -687,7 +713,7 @@ static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) {
- int boxwidth = (dx >> 16);
+ int boxwidth = MIN1(dx >> 16);
int scaleval = 65536 / (boxwidth * boxheight);
int i;
for (i = 0; i < dst_width; ++i) {
@@ -707,7 +733,7 @@ static void ScalePlaneBox(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint8* src_ptr, uint8* dst_ptr) {
- int j;
+ int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -717,42 +743,37 @@ static void ScalePlaneBox(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
- uint8* dst = dst_ptr;
- int j;
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint8* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- boxheight = (y >> 16) - iy;
- ScalePlaneBoxRow_C(dst_width, boxheight,
- x, dx, src_stride,
- src, dst);
- dst += dst_stride;
- }
- return;
- }
{
// Allocate a row buffer of uint16.
align_buffer_64(row16, src_width * 2);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint16* src_ptr, uint8* dst_ptr) =
- (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
- void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
- uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
-
-#if defined(HAS_SCALEADDROWS_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)
-#ifdef AVOID_OVERREAD
- && IS_ALIGNED(src_width, 16)
+ (dx & 0xffff) ? ScaleAddCols2_C:
+ ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
+ void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
+ ScaleAddRow_C;
+#if defined(HAS_SCALEADDROW_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2)) {
+ ScaleAddRow = ScaleAddRow_Any_SSE2;
+ if (IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_SSE2;
+ }
+ }
#endif
- ) {
- ScaleAddRows = ScaleAddRows_SSE2;
+#if defined(HAS_SCALEADDROW_AVX2)
+ if (TestCpuFlag(kCpuHasAVX2)) {
+ ScaleAddRow = ScaleAddRow_Any_AVX2;
+ if (IS_ALIGNED(src_width, 32)) {
+ ScaleAddRow = ScaleAddRow_AVX2;
+ }
+ }
+#endif
+#if defined(HAS_SCALEADDROW_NEON)
+ if (TestCpuFlag(kCpuHasNEON)) {
+ ScaleAddRow = ScaleAddRow_Any_NEON;
+ if (IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_NEON;
+ }
}
#endif
@@ -761,14 +782,16 @@ static void ScalePlaneBox(int src_width, int src_height,
int iy = y >> 16;
const uint8* src = src_ptr + iy * src_stride;
y += dy;
- if (y > (src_height << 16)) {
- y = (src_height << 16);
+ if (y > max_y) {
+ y = max_y;
+ }
+ boxheight = MIN1((y >> 16) - iy);
+ memset(row16, 0, src_width * 2);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint16 *)(row16), src_width);
+ src += src_stride;
}
- boxheight = (y >> 16) - iy;
- ScaleAddRows(src, src_stride, (uint16*)(row16),
- src_width, boxheight);
- ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
- dst_ptr);
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row16);
@@ -779,7 +802,7 @@ static void ScalePlaneBox_16(int src_width, int src_height,
int dst_width, int dst_height,
int src_stride, int dst_stride,
const uint16* src_ptr, uint16* dst_ptr) {
- int j;
+ int j, k;
// Initial source x/y coordinate and step values as 16.16 fixed point.
int x = 0;
int y = 0;
@@ -789,42 +812,18 @@ static void ScalePlaneBox_16(int src_width, int src_height,
ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
- // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
- if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
- uint16* dst = dst_ptr;
- int j;
- for (j = 0; j < dst_height; ++j) {
- int boxheight;
- int iy = y >> 16;
- const uint16* src = src_ptr + iy * src_stride;
- y += dy;
- if (y > max_y) {
- y = max_y;
- }
- boxheight = (y >> 16) - iy;
- ScalePlaneBoxRow_16_C(dst_width, boxheight,
- x, dx, src_stride,
- src, dst);
- dst += dst_stride;
- }
- return;
- }
{
// Allocate a row buffer of uint32.
align_buffer_64(row32, src_width * 4);
void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
const uint32* src_ptr, uint16* dst_ptr) =
(dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
- void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
- uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
+ void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
+ ScaleAddRow_16_C;
-#if defined(HAS_SCALEADDROWS_16_SSE2)
- if (TestCpuFlag(kCpuHasSSE2)
-#ifdef AVOID_OVERREAD
- && IS_ALIGNED(src_width, 16)
-#endif
- ) {
- ScaleAddRows = ScaleAddRows_16_SSE2;
+#if defined(HAS_SCALEADDROW_16_SSE2)
+ if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
+ ScaleAddRow = ScaleAddRow_16_SSE2;
}
#endif
@@ -833,14 +832,16 @@ static void ScalePlaneBox_16(int src_width, int src_height,
int iy = y >> 16;
const uint16* src = src_ptr + iy * src_stride;
y += dy;
- if (y > (src_height << 16)) {
- y = (src_height << 16);
+ if (y > max_y) {
+ y = max_y;
}
- boxheight = (y >> 16) - iy;
- ScaleAddRows(src, src_stride, (uint32*)(row32),
- src_width, boxheight);
- ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
- dst_ptr);
+ boxheight = MIN1((y >> 16) - iy);
+ memset(row32, 0, src_width * 4);
+ for (k = 0; k < boxheight; ++k) {
+ ScaleAddRow(src, (uint32 *)(row32), src_width);
+ src += src_stride;
+ }
+ ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
dst_ptr += dst_stride;
}
free_aligned_buffer_64(row32);
@@ -921,6 +922,14 @@ void ScalePlaneBilinearDown(int src_width, int src_height,
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
+#if defined(HAS_SCALEFILTERCOLS_NEON)
+ if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleFilterCols_NEON;
+ }
+ }
+#endif
if (y > max_y) {
y = max_y;
}
@@ -1057,8 +1066,8 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_C;
void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
- int dst_width, int x, int dx) =
- filtering ? ScaleFilterCols_C : ScaleCols_C;
+ int dst_width, int x, int dx) =
+ filtering ? ScaleFilterCols_C : ScaleCols_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
@@ -1112,6 +1121,14 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
ScaleFilterCols = ScaleFilterCols_SSSE3;
}
#endif
+#if defined(HAS_SCALEFILTERCOLS_NEON)
+ if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
+ ScaleFilterCols = ScaleFilterCols_Any_NEON;
+ if (IS_ALIGNED(dst_width, 8)) {
+ ScaleFilterCols = ScaleFilterCols_NEON;
+ }
+ }
+#endif
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
ScaleFilterCols = ScaleColsUp2_C;
#if defined(HAS_SCALECOLS_SSE2)
@@ -1129,7 +1146,7 @@ void ScalePlaneBilinearUp(int src_width, int src_height,
const uint8* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 15) & ~15;
+ const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 2);
uint8* rowptr = row;
@@ -1188,8 +1205,8 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
InterpolateRow_16_C;
void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
- int dst_width, int x, int dx) =
- filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
+ int dst_width, int x, int dx) =
+ filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
&x, &y, &dx, &dy);
src_width = Abs(src_width);
@@ -1260,7 +1277,7 @@ void ScalePlaneBilinearUp_16(int src_width, int src_height,
const uint16* src = src_ptr + yi * src_stride;
// Allocate 2 row buffers.
- const int kRowSize = (dst_width + 15) & ~15;
+ const int kRowSize = (dst_width + 31) & ~31;
align_buffer_64(row, kRowSize * 4);
uint16* rowptr = (uint16*)row;
@@ -1334,8 +1351,7 @@ static void ScalePlaneSimple(int src_width, int src_height,
}
for (i = 0; i < dst_height; ++i) {
- ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
- dst_width, x, dx);
+ ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
dst_ptr += dst_stride;
y += dy;
}
@@ -1385,8 +1401,7 @@ void ScalePlane(const uint8* src, int src_stride,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
+ dst_width, dst_height, filtering);
// Negative height means invert the image.
if (src_height < 0) {
@@ -1402,9 +1417,9 @@ void ScalePlane(const uint8* src, int src_stride,
CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
return;
}
- if (dst_width == src_width) {
+ if (dst_width == src_width && filtering != kFilterBox) {
int dy = FixedDiv(src_height, dst_height);
- // Arbitrary scale vertically, but unscaled vertically.
+ // Arbitrary scale vertically, but unscaled horizontally.
ScalePlaneVertical(src_height,
dst_width, dst_height,
src_stride, dst_stride, src, dst,
@@ -1435,7 +1450,7 @@ void ScalePlane(const uint8* src, int src_stride,
return;
}
if (4 * dst_width == src_width && 4 * dst_height == src_height &&
- filtering != kFilterBilinear) {
+ (filtering == kFilterBox || filtering == kFilterNone)) {
// optimized, 1/4
ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
src_stride, dst_stride, src, dst, filtering);
@@ -1469,8 +1484,7 @@ void ScalePlane_16(const uint16* src, int src_stride,
enum FilterMode filtering) {
// Simplify filtering when possible.
filtering = ScaleFilterReduce(src_width, src_height,
- dst_width, dst_height,
- filtering);
+ dst_width, dst_height, filtering);
// Negative height means invert the image.
if (src_height < 0) {
@@ -1563,6 +1577,7 @@ int I420Scale(const uint8* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}
@@ -1594,6 +1609,7 @@ int I420Scale_16(const uint16* src_y, int src_stride_y,
int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
+ src_width > 32768 || src_height > 32768 ||
!dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
return -1;
}
« no previous file with comments | « source/libvpx/third_party/libyuv/source/row_win.cc ('k') | source/libvpx/third_party/libyuv/source/scale_any.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698