source/libvpx/third_party/libyuv/source/scale.cc - Issue 341293003: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/third_party/libyuv/source/scale.cc

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 /*

	2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.

	3 *

	4 * Use of this source code is governed by a BSD-style license

	5 * that can be found in the LICENSE file in the root of the source

	6 * tree. An additional intellectual property rights grant can be found

	7 * in the file PATENTS. All contributing project authors may

	8 * be found in the AUTHORS file in the root of the source tree.

	9 */

	10

	11 #include "third_party/libyuv/include/libyuv/scale.h"

	12

	13 #include <assert.h>

	14 #include <string.h>

	15

	16 #include "third_party/libyuv/include/libyuv/cpu_id.h"

	17 #include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyPlane

	18 #include "third_party/libyuv/include/libyuv/row.h"

	19 #include "third_party/libyuv/include/libyuv/scale_row.h"

	20

	21 #ifdef __cplusplus

	22 namespace libyuv {

	23 extern "C" {

	24 #endif

	25

	26 // Remove this macro if OVERREAD is safe.

	27 #define AVOID_OVERREAD 1

	28

	29 static __inline int Abs(int v) {

	30 return v >= 0 ? v : -v;

	31 }

	32

	33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)

	34

	35 // Scale plane, 1/2

	36 // This is an optimized version for scaling down a plane to 1/2 of

	37 // its original size.

	38

	39 static void ScalePlaneDown2(int src_width, int src_height,

	40 int dst_width, int dst_height,

	41 int src_stride, int dst_stride,

	42 const uint8* src_ptr, uint8* dst_ptr,

	43 enum FilterMode filtering) {

	44 int y;

	45 void (ScaleRowDown2)(const uint8 src_ptr, ptrdiff_t src_stride,

	46 uint8* dst_ptr, int dst_width) =

	47 filtering == kFilterNone ? ScaleRowDown2_C :

	48 (filtering == kFilterLinear ? ScaleRowDown2Linear_C :

	49 ScaleRowDown2Box_C);

	50 int row_stride = src_stride << 1;

	51 if (!filtering) {

	52 src_ptr += src_stride; // Point to odd rows.

	53 src_stride = 0;

	54 }

	55

	56 #if defined(HAS_SCALEROWDOWN2_NEON)

	57 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {

	58 ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;

	59 }

	60 #elif defined(HAS_SCALEROWDOWN2_SSE2)

	61 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {

	62 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :

	63 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :

	64 ScaleRowDown2Box_Unaligned_SSE2);

	65 if (IS_ALIGNED(src_ptr, 16) &&

	66 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&

	67 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :

	69 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :

	70 ScaleRowDown2Box_SSE2);

	71 }

	72 }

	73 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)

	74 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&

	75 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&

	76 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	77 ScaleRowDown2 = filtering ?

	78 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;

	79 }

	80 #endif

	81

	82 if (filtering == kFilterLinear) {

	83 src_stride = 0;

	84 }

	85 // TODO(fbarchard): Loop through source height to allow odd height.

	86 for (y = 0; y < dst_height; ++y) {

	87 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);

	88 src_ptr += row_stride;

	89 dst_ptr += dst_stride;

	90 }

	91 }

	92

	93 static void ScalePlaneDown2_16(int src_width, int src_height,

	94 int dst_width, int dst_height,

	95 int src_stride, int dst_stride,

	96 const uint16* src_ptr, uint16* dst_ptr,

	97 enum FilterMode filtering) {

	98 int y;

	99 void (ScaleRowDown2)(const uint16 src_ptr, ptrdiff_t src_stride,

	100 uint16* dst_ptr, int dst_width) =

	101 filtering == kFilterNone ? ScaleRowDown2_16_C :

	102 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :

	103 ScaleRowDown2Box_16_C);

	104 int row_stride = src_stride << 1;

	105 if (!filtering) {

	106 src_ptr += src_stride; // Point to odd rows.

	107 src_stride = 0;

	108 }

	109

	110 #if defined(HAS_SCALEROWDOWN2_16_NEON)

	111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {

	112 ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :

	113 ScaleRowDown2_16_NEON;

	114 }

	115 #elif defined(HAS_SCALEROWDOWN2_16_SSE2)

	116 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {

	117 ScaleRowDown2 = filtering == kFilterNone ?

	118 ScaleRowDown2_Unaligned_16_SSE2 :

	119 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :

	120 ScaleRowDown2Box_Unaligned_16_SSE2);

	121 if (IS_ALIGNED(src_ptr, 16) &&

	122 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&

	123 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	124 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :

	125 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :

	126 ScaleRowDown2Box_16_SSE2);

	127 }

	128 }

	129 #elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)

	130 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&

	131 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&

	132 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	133 ScaleRowDown2 = filtering ?

	134 ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;

	135 }

	136 #endif

	137

	138 if (filtering == kFilterLinear) {

	139 src_stride = 0;

	140 }

	141 // TODO(fbarchard): Loop through source height to allow odd height.

	142 for (y = 0; y < dst_height; ++y) {

	143 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);

	144 src_ptr += row_stride;

	145 dst_ptr += dst_stride;

	146 }

	147 }

	148

	149 // Scale plane, 1/4

	150 // This is an optimized version for scaling down a plane to 1/4 of

	151 // its original size.

	152

	153 static void ScalePlaneDown4(int src_width, int src_height,

	154 int dst_width, int dst_height,

	155 int src_stride, int dst_stride,

	156 const uint8* src_ptr, uint8* dst_ptr,

	157 enum FilterMode filtering) {

	158 int y;

	159 void (ScaleRowDown4)(const uint8 src_ptr, ptrdiff_t src_stride,

	160 uint8* dst_ptr, int dst_width) =

	161 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;

	162 int row_stride = src_stride << 2;

	163 if (!filtering) {

	164 src_ptr += src_stride * 2; // Point to row 2.

	165 src_stride = 0;

	166 }

	167 #if defined(HAS_SCALEROWDOWN4_NEON)

	168 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {

	169 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;

	170 }

	171 #elif defined(HAS_SCALEROWDOWN4_SSE2)

	172 if (TestCpuFlag(kCpuHasSSE2) &&

	173 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&

	174 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	175 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;

	176 }

	177 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)

	178 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&

	179 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&

	180 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	181 ScaleRowDown4 = filtering ?

	182 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;

	183 }

	184 #endif

	185

	186 if (filtering == kFilterLinear) {

	187 src_stride = 0;

	188 }

	189 for (y = 0; y < dst_height; ++y) {

	190 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);

	191 src_ptr += row_stride;

	192 dst_ptr += dst_stride;

	193 }

	194 }

	195

	196 static void ScalePlaneDown4_16(int src_width, int src_height,

	197 int dst_width, int dst_height,

	198 int src_stride, int dst_stride,

	199 const uint16* src_ptr, uint16* dst_ptr,

	200 enum FilterMode filtering) {

	201 int y;

	202 void (ScaleRowDown4)(const uint16 src_ptr, ptrdiff_t src_stride,

	203 uint16* dst_ptr, int dst_width) =

	204 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;

	205 int row_stride = src_stride << 2;

	206 if (!filtering) {

	207 src_ptr += src_stride * 2; // Point to row 2.

	208 src_stride = 0;

	209 }

	210 #if defined(HAS_SCALEROWDOWN4_16_NEON)

	211 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {

	212 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :

	213 ScaleRowDown4_16_NEON;

	214 }

	215 #elif defined(HAS_SCALEROWDOWN4_16_SSE2)

	216 if (TestCpuFlag(kCpuHasSSE2) &&

	217 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&

	218 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	219 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :

	220 ScaleRowDown4_16_SSE2;

	221 }

	222 #elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)

	223 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&

	224 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&

	225 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	226 ScaleRowDown4 = filtering ?

	227 ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;

	228 }

	229 #endif

	230

	231 if (filtering == kFilterLinear) {

	232 src_stride = 0;

	233 }

	234 for (y = 0; y < dst_height; ++y) {

	235 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);

	236 src_ptr += row_stride;

	237 dst_ptr += dst_stride;

	238 }

	239 }

	240

	241 // Scale plane down, 3/4

	242

	243 static void ScalePlaneDown34(int src_width, int src_height,

	244 int dst_width, int dst_height,

	245 int src_stride, int dst_stride,

	246 const uint8* src_ptr, uint8* dst_ptr,

	247 enum FilterMode filtering) {

	248 int y;

	249 void (ScaleRowDown34_0)(const uint8 src_ptr, ptrdiff_t src_stride,

	250 uint8* dst_ptr, int dst_width);

	251 void (ScaleRowDown34_1)(const uint8 src_ptr, ptrdiff_t src_stride,

	252 uint8* dst_ptr, int dst_width);

	253 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;

	254 assert(dst_width % 3 == 0);

	255 if (!filtering) {

	256 ScaleRowDown34_0 = ScaleRowDown34_C;

	257 ScaleRowDown34_1 = ScaleRowDown34_C;

	258 } else {

	259 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;

	260 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;

	261 }

	262 #if defined(HAS_SCALEROWDOWN34_NEON)

	263 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {

	264 if (!filtering) {

	265 ScaleRowDown34_0 = ScaleRowDown34_NEON;

	266 ScaleRowDown34_1 = ScaleRowDown34_NEON;

	267 } else {

	268 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;

	269 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;

	270 }

	271 }

	272 #endif

	273 #if defined(HAS_SCALEROWDOWN34_SSSE3)

	274 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&

	275 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	276 if (!filtering) {

	277 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;

	278 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;

	279 } else {

	280 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;

	281 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;

	282 }

	283 }

	284 #endif

	285 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)

	286 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&

	287 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&

	288 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	289 if (!filtering) {

	290 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;

	291 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;

	292 } else {

	293 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;

	294 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;

	295 }

	296 }

	297 #endif

	298

	299 for (y = 0; y < dst_height - 2; y += 3) {

	300 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);

	301 src_ptr += src_stride;

	302 dst_ptr += dst_stride;

	303 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);

	304 src_ptr += src_stride;

	305 dst_ptr += dst_stride;

	306 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,

	307 dst_ptr, dst_width);

	308 src_ptr += src_stride * 2;

	309 dst_ptr += dst_stride;

	310 }

	311

	312 // Remainder 1 or 2 rows with last row vertically unfiltered

	313 if ((dst_height % 3) == 2) {

	314 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);

	315 src_ptr += src_stride;

	316 dst_ptr += dst_stride;

	317 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);

	318 } else if ((dst_height % 3) == 1) {

	319 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);

	320 }

	321 }

	322

	323 static void ScalePlaneDown34_16(int src_width, int src_height,

	324 int dst_width, int dst_height,

	325 int src_stride, int dst_stride,

	326 const uint16* src_ptr, uint16* dst_ptr,

	327 enum FilterMode filtering) {

	328 int y;

	329 void (ScaleRowDown34_0)(const uint16 src_ptr, ptrdiff_t src_stride,

	330 uint16* dst_ptr, int dst_width);

	331 void (ScaleRowDown34_1)(const uint16 src_ptr, ptrdiff_t src_stride,

	332 uint16* dst_ptr, int dst_width);

	333 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;

	334 assert(dst_width % 3 == 0);

	335 if (!filtering) {

	336 ScaleRowDown34_0 = ScaleRowDown34_16_C;

	337 ScaleRowDown34_1 = ScaleRowDown34_16_C;

	338 } else {

	339 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;

	340 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;

	341 }

	342 #if defined(HAS_SCALEROWDOWN34_16_NEON)

	343 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {

	344 if (!filtering) {

	345 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;

	346 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;

	347 } else {

	348 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;

	349 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;

	350 }

	351 }

	352 #endif

	353 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)

	354 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&

	355 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	356 if (!filtering) {

	357 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;

	358 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;

	359 } else {

	360 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;

	361 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;

	362 }

	363 }

	364 #endif

	365 #if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)

	366 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&

	367 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&

	368 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	369 if (!filtering) {

	370 ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;

	371 ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;

	372 } else {

	373 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;

	374 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;

	375 }

	376 }

	377 #endif

	378

	379 for (y = 0; y < dst_height - 2; y += 3) {

	380 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);

	381 src_ptr += src_stride;

	382 dst_ptr += dst_stride;

	383 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);

	384 src_ptr += src_stride;

	385 dst_ptr += dst_stride;

	386 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,

	387 dst_ptr, dst_width);

	388 src_ptr += src_stride * 2;

	389 dst_ptr += dst_stride;

	390 }

	391

	392 // Remainder 1 or 2 rows with last row vertically unfiltered

	393 if ((dst_height % 3) == 2) {

	394 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);

	395 src_ptr += src_stride;

	396 dst_ptr += dst_stride;

	397 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);

	398 } else if ((dst_height % 3) == 1) {

	399 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);

	400 }

	401 }

	402

	403

	404 // Scale plane, 3/8

	405 // This is an optimized version for scaling down a plane to 3/8

	406 // of its original size.

	407 //

	408 // Uses box filter arranges like this

	409 // aaabbbcc -> abc

	410 // aaabbbcc def

	411 // aaabbbcc ghi

	412 // dddeeeff

	413 // dddeeeff

	414 // dddeeeff

	415 // ggghhhii

	416 // ggghhhii

	417 // Boxes are 3x3, 2x3, 3x2 and 2x2

	418

	419 static void ScalePlaneDown38(int src_width, int src_height,

	420 int dst_width, int dst_height,

	421 int src_stride, int dst_stride,

	422 const uint8* src_ptr, uint8* dst_ptr,

	423 enum FilterMode filtering) {

	424 int y;

	425 void (ScaleRowDown38_3)(const uint8 src_ptr, ptrdiff_t src_stride,

	426 uint8* dst_ptr, int dst_width);

	427 void (ScaleRowDown38_2)(const uint8 src_ptr, ptrdiff_t src_stride,

	428 uint8* dst_ptr, int dst_width);

	429 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;

	430 assert(dst_width % 3 == 0);

	431 if (!filtering) {

	432 ScaleRowDown38_3 = ScaleRowDown38_C;

	433 ScaleRowDown38_2 = ScaleRowDown38_C;

	434 } else {

	435 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;

	436 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;

	437 }

	438 #if defined(HAS_SCALEROWDOWN38_NEON)

	439 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {

	440 if (!filtering) {

	441 ScaleRowDown38_3 = ScaleRowDown38_NEON;

	442 ScaleRowDown38_2 = ScaleRowDown38_NEON;

	443 } else {

	444 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;

	445 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;

	446 }

	447 }

	448 #elif defined(HAS_SCALEROWDOWN38_SSSE3)

	449 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&

	450 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	451 if (!filtering) {

	452 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;

	453 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;

	454 } else {

	455 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;

	456 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;

	457 }

	458 }

	459 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)

	460 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&

	461 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&

	462 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	463 if (!filtering) {

	464 ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;

	465 ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;

	466 } else {

	467 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;

	468 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;

	469 }

	470 }

	471 #endif

	472

	473 for (y = 0; y < dst_height - 2; y += 3) {

	474 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);

	475 src_ptr += src_stride * 3;

	476 dst_ptr += dst_stride;

	477 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);

	478 src_ptr += src_stride * 3;

	479 dst_ptr += dst_stride;

	480 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);

	481 src_ptr += src_stride * 2;

	482 dst_ptr += dst_stride;

	483 }

	484

	485 // Remainder 1 or 2 rows with last row vertically unfiltered

	486 if ((dst_height % 3) == 2) {

	487 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);

	488 src_ptr += src_stride * 3;

	489 dst_ptr += dst_stride;

	490 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);

	491 } else if ((dst_height % 3) == 1) {

	492 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);

	493 }

	494 }

	495

	496 static void ScalePlaneDown38_16(int src_width, int src_height,

	497 int dst_width, int dst_height,

	498 int src_stride, int dst_stride,

	499 const uint16* src_ptr, uint16* dst_ptr,

	500 enum FilterMode filtering) {

	501 int y;

	502 void (ScaleRowDown38_3)(const uint16 src_ptr, ptrdiff_t src_stride,

	503 uint16* dst_ptr, int dst_width);

	504 void (ScaleRowDown38_2)(const uint16 src_ptr, ptrdiff_t src_stride,

	505 uint16* dst_ptr, int dst_width);

	506 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;

	507 assert(dst_width % 3 == 0);

	508 if (!filtering) {

	509 ScaleRowDown38_3 = ScaleRowDown38_16_C;

	510 ScaleRowDown38_2 = ScaleRowDown38_16_C;

	511 } else {

	512 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;

	513 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;

	514 }

	515 #if defined(HAS_SCALEROWDOWN38_16_NEON)

	516 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {

	517 if (!filtering) {

	518 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;

	519 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;

	520 } else {

	521 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;

	522 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;

	523 }

	524 }

	525 #elif defined(HAS_SCALEROWDOWN38_16_SSSE3)

	526 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&

	527 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	528 if (!filtering) {

	529 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;

	530 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;

	531 } else {

	532 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;

	533 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;

	534 }

	535 }

	536 #elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)

	537 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&

	538 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&

	539 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {

	540 if (!filtering) {

	541 ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;

	542 ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;

	543 } else {

	544 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;

	545 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;

	546 }

	547 }

	548 #endif

	549

	550 for (y = 0; y < dst_height - 2; y += 3) {

	551 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);

	552 src_ptr += src_stride * 3;

	553 dst_ptr += dst_stride;

	554 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);

	555 src_ptr += src_stride * 3;

	556 dst_ptr += dst_stride;

	557 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);

	558 src_ptr += src_stride * 2;

	559 dst_ptr += dst_stride;

	560 }

	561

	562 // Remainder 1 or 2 rows with last row vertically unfiltered

	563 if ((dst_height % 3) == 2) {

	564 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);

	565 src_ptr += src_stride * 3;

	566 dst_ptr += dst_stride;

	567 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);

	568 } else if ((dst_height % 3) == 1) {

	569 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);

	570 }

	571 }

	572

	573 static __inline uint32 SumBox(int iboxwidth, int iboxheight,

	574 ptrdiff_t src_stride, const uint8* src_ptr) {

	575 uint32 sum = 0u;

	576 int y;

	577 assert(iboxwidth > 0);

	578 assert(iboxheight > 0);

	579 for (y = 0; y < iboxheight; ++y) {

	580 int x;

	581 for (x = 0; x < iboxwidth; ++x) {

	582 sum += src_ptr[x];

	583 }

	584 src_ptr += src_stride;

	585 }

	586 return sum;

	587 }

	588

	589 static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,

	590 ptrdiff_t src_stride, const uint16* src_ptr) {

	591 uint32 sum = 0u;

	592 int y;

	593 assert(iboxwidth > 0);

	594 assert(iboxheight > 0);

	595 for (y = 0; y < iboxheight; ++y) {

	596 int x;

	597 for (x = 0; x < iboxwidth; ++x) {

	598 sum += src_ptr[x];

	599 }

	600 src_ptr += src_stride;

	601 }

	602 return sum;

	603 }

	604

	605 static void ScalePlaneBoxRow_C(int dst_width, int boxheight,

	606 int x, int dx, ptrdiff_t src_stride,

	607 const uint8* src_ptr, uint8* dst_ptr) {

	608 int i;

	609 int boxwidth;

	610 for (i = 0; i < dst_width; ++i) {

	611 int ix = x >> 16;

	612 x += dx;

	613 boxwidth = (x >> 16) - ix;

	614 *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /

	615 (boxwidth * boxheight);

	616 }

	617 }

	618

	619 static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,

	620 int x, int dx, ptrdiff_t src_stride,

	621 const uint16* src_ptr, uint16* dst_ptr) {

	622 int i;

	623 int boxwidth;

	624 for (i = 0; i < dst_width; ++i) {

	625 int ix = x >> 16;

	626 x += dx;

	627 boxwidth = (x >> 16) - ix;

	628 *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /

	629 (boxwidth * boxheight);

	630 }

	631 }

	632

	633 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {

	634 uint32 sum = 0u;

	635 int x;

	636 assert(iboxwidth > 0);

	637 for (x = 0; x < iboxwidth; ++x) {

	638 sum += src_ptr[x];

	639 }

	640 return sum;

	641 }

	642

	643 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {

	644 uint32 sum = 0u;

	645 int x;

	646 assert(iboxwidth > 0);

	647 for (x = 0; x < iboxwidth; ++x) {

	648 sum += src_ptr[x];

	649 }

	650 return sum;

	651 }

	652

	653 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,

	654 const uint16* src_ptr, uint8* dst_ptr) {

	655 int i;

	656 int scaletbl[2];

	657 int minboxwidth = (dx >> 16);

	658 int* scaleptr = scaletbl - minboxwidth;

	659 int boxwidth;

	660 scaletbl[0] = 65536 / (minboxwidth * boxheight);

	661 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);

	662 for (i = 0; i < dst_width; ++i) {

	663 int ix = x >> 16;

	664 x += dx;

	665 boxwidth = (x >> 16) - ix;

	666 dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) scaleptr[boxwidth] >> 16;

	667 }

	668 }

	669

	670 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,

	671 const uint32* src_ptr, uint16* dst_ptr) {

	672 int i;

	673 int scaletbl[2];

	674 int minboxwidth = (dx >> 16);

	675 int* scaleptr = scaletbl - minboxwidth;

	676 int boxwidth;

	677 scaletbl[0] = 65536 / (minboxwidth * boxheight);

	678 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);

	679 for (i = 0; i < dst_width; ++i) {

	680 int ix = x >> 16;

	681 x += dx;

	682 boxwidth = (x >> 16) - ix;

	683 dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix)

	684 scaleptr[boxwidth] >> 16;

	685 }

	686 }

	687

	688 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,

	689 const uint16* src_ptr, uint8* dst_ptr) {

	690 int boxwidth = (dx >> 16);

	691 int scaleval = 65536 / (boxwidth * boxheight);

	692 int i;

	693 for (i = 0; i < dst_width; ++i) {

	694 dst_ptr++ = SumPixels(boxwidth, src_ptr + x) scaleval >> 16;

	695 x += boxwidth;

	696 }

	697 }

	698

	699 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,

	700 const uint32* src_ptr, uint16* dst_ptr) {

	701 int boxwidth = (dx >> 16);

	702 int scaleval = 65536 / (boxwidth * boxheight);

	703 int i;

	704 for (i = 0; i < dst_width; ++i) {

	705 dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) scaleval >> 16;

	706 x += boxwidth;

	707 }

	708 }

	709

	710 // Scale plane down to any dimensions, with interpolation.

	711 // (boxfilter).

	712 //

	713 // Same method as SimpleScale, which is fixed point, outputting

	714 // one pixel of destination using fixed point (16.16) to step

	715 // through source, sampling a box of pixel with simple

	716 // averaging.

	717 static void ScalePlaneBox(int src_width, int src_height,

	718 int dst_width, int dst_height,

	719 int src_stride, int dst_stride,

	720 const uint8* src_ptr, uint8* dst_ptr) {

	721 int j;

	722 // Initial source x/y coordinate and step values as 16.16 fixed point.

	723 int x = 0;

	724 int y = 0;

	725 int dx = 0;

	726 int dy = 0;

	727 const int max_y = (src_height << 16);

	728 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,

	729 &x, &y, &dx, &dy);

	730 src_width = Abs(src_width);

	731 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.

	732 if (!IS_ALIGNED(src_width, 16) \|\| dst_height * 2 > src_height) {

	733 uint8* dst = dst_ptr;

	734 int j;

	735 for (j = 0; j < dst_height; ++j) {

	736 int boxheight;

	737 int iy = y >> 16;

	738 const uint8* src = src_ptr + iy * src_stride;

	739 y += dy;

	740 if (y > max_y) {

	741 y = max_y;

	742 }

	743 boxheight = (y >> 16) - iy;

	744 ScalePlaneBoxRow_C(dst_width, boxheight,

	745 x, dx, src_stride,

	746 src, dst);

	747 dst += dst_stride;

	748 }

	749 return;

	750 }

	751 {

	752 // Allocate a row buffer of uint16.

	753 align_buffer_64(row16, src_width * 2);

	754 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,

	755 const uint16* src_ptr, uint8* dst_ptr) =

	756 (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;

	757 void (ScaleAddRows)(const uint8 src_ptr, ptrdiff_t src_stride,

	758 uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;

	759

	760 #if defined(HAS_SCALEADDROWS_SSE2)

	761 if (TestCpuFlag(kCpuHasSSE2) &&

	762 #ifdef AVOID_OVERREAD

	763 IS_ALIGNED(src_width, 16) &&

	764 #endif

	765 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	766 ScaleAddRows = ScaleAddRows_SSE2;

	767 }

	768 #endif

	769

	770 for (j = 0; j < dst_height; ++j) {

	771 int boxheight;

	772 int iy = y >> 16;

	773 const uint8* src = src_ptr + iy * src_stride;

	774 y += dy;

	775 if (y > (src_height << 16)) {

	776 y = (src_height << 16);

	777 }

	778 boxheight = (y >> 16) - iy;

	779 ScaleAddRows(src, src_stride, (uint16*)(row16),

	780 src_width, boxheight);

	781 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),

	782 dst_ptr);

	783 dst_ptr += dst_stride;

	784 }

	785 free_aligned_buffer_64(row16);

	786 }

	787 }

	788

	789 static void ScalePlaneBox_16(int src_width, int src_height,

	790 int dst_width, int dst_height,

	791 int src_stride, int dst_stride,

	792 const uint16* src_ptr, uint16* dst_ptr) {

	793 int j;

	794 // Initial source x/y coordinate and step values as 16.16 fixed point.

	795 int x = 0;

	796 int y = 0;

	797 int dx = 0;

	798 int dy = 0;

	799 const int max_y = (src_height << 16);

	800 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,

	801 &x, &y, &dx, &dy);

	802 src_width = Abs(src_width);

	803 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.

	804 if (!IS_ALIGNED(src_width, 16) \|\| dst_height * 2 > src_height) {

	805 uint16* dst = dst_ptr;

	806 int j;

	807 for (j = 0; j < dst_height; ++j) {

	808 int boxheight;

	809 int iy = y >> 16;

	810 const uint16* src = src_ptr + iy * src_stride;

	811 y += dy;

	812 if (y > max_y) {

	813 y = max_y;

	814 }

	815 boxheight = (y >> 16) - iy;

	816 ScalePlaneBoxRow_16_C(dst_width, boxheight,

	817 x, dx, src_stride,

	818 src, dst);

	819 dst += dst_stride;

	820 }

	821 return;

	822 }

	823 {

	824 // Allocate a row buffer of uint32.

	825 align_buffer_64(row32, src_width * 4);

	826 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,

	827 const uint32* src_ptr, uint16* dst_ptr) =

	828 (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;

	829 void (ScaleAddRows)(const uint16 src_ptr, ptrdiff_t src_stride,

	830 uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;

	831

	832 #if defined(HAS_SCALEADDROWS_16_SSE2)

	833 if (TestCpuFlag(kCpuHasSSE2) &&

	834 #ifdef AVOID_OVERREAD

	835 IS_ALIGNED(src_width, 16) &&

	836 #endif

	837 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	838 ScaleAddRows = ScaleAddRows_16_SSE2;

	839 }

	840 #endif

	841

	842 for (j = 0; j < dst_height; ++j) {

	843 int boxheight;

	844 int iy = y >> 16;

	845 const uint16* src = src_ptr + iy * src_stride;

	846 y += dy;

	847 if (y > (src_height << 16)) {

	848 y = (src_height << 16);

	849 }

	850 boxheight = (y >> 16) - iy;

	851 ScaleAddRows(src, src_stride, (uint32*)(row32),

	852 src_width, boxheight);

	853 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),

	854 dst_ptr);

	855 dst_ptr += dst_stride;

	856 }

	857 free_aligned_buffer_64(row32);

	858 }

	859 }

	860

	861 // Scale plane down with bilinear interpolation.

	862 void ScalePlaneBilinearDown(int src_width, int src_height,

	863 int dst_width, int dst_height,

	864 int src_stride, int dst_stride,

	865 const uint8* src_ptr, uint8* dst_ptr,

	866 enum FilterMode filtering) {

	867 // Initial source x/y coordinate and step values as 16.16 fixed point.

	868 int x = 0;

	869 int y = 0;

	870 int dx = 0;

	871 int dy = 0;

	872 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.

	873 // Allocate a row buffer.

	874 align_buffer_64(row, src_width);

	875

	876 const int max_y = (src_height - 1) << 16;

	877 int j;

	878 void (ScaleFilterCols)(uint8 dst_ptr, const uint8* src_ptr,

	879 int dst_width, int x, int dx) =

	880 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;

	881 void (InterpolateRow)(uint8 dst_ptr, const uint8* src_ptr,

	882 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =

	883 InterpolateRow_C;

	884 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,

	885 &x, &y, &dx, &dy);

	886 src_width = Abs(src_width);

	887

	888 #if defined(HAS_INTERPOLATEROW_SSE2)

	889 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {

	890 InterpolateRow = InterpolateRow_Any_SSE2;

	891 if (IS_ALIGNED(src_width, 16)) {

	892 InterpolateRow = InterpolateRow_Unaligned_SSE2;

	893 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	894 InterpolateRow = InterpolateRow_SSE2;

	895 }

	896 }

	897 }

	898 #endif

	899 #if defined(HAS_INTERPOLATEROW_SSSE3)

	900 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {

	901 InterpolateRow = InterpolateRow_Any_SSSE3;

	902 if (IS_ALIGNED(src_width, 16)) {

	903 InterpolateRow = InterpolateRow_Unaligned_SSSE3;

	904 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	905 InterpolateRow = InterpolateRow_SSSE3;

	906 }

	907 }

	908 }

	909 #endif

	910 #if defined(HAS_INTERPOLATEROW_AVX2)

	911 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {

	912 InterpolateRow = InterpolateRow_Any_AVX2;

	913 if (IS_ALIGNED(src_width, 32)) {

	914 InterpolateRow = InterpolateRow_AVX2;

	915 }

	916 }

	917 #endif

	918 #if defined(HAS_INTERPOLATEROW_NEON)

	919 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {

	920 InterpolateRow = InterpolateRow_Any_NEON;

	921 if (IS_ALIGNED(src_width, 16)) {

	922 InterpolateRow = InterpolateRow_NEON;

	923 }

	924 }

	925 #endif

	926 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)

	927 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {

	928 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;

	929 if (IS_ALIGNED(src_width, 4)) {

	930 InterpolateRow = InterpolateRow_MIPS_DSPR2;

	931 }

	932 }

	933 #endif

	934

	935

	936 #if defined(HAS_SCALEFILTERCOLS_SSSE3)

	937 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {

	938 ScaleFilterCols = ScaleFilterCols_SSSE3;

	939 }

	940 #endif

	941 if (y > max_y) {

	942 y = max_y;

	943 }

	944

	945 for (j = 0; j < dst_height; ++j) {

	946 int yi = y >> 16;

	947 const uint8* src = src_ptr + yi * src_stride;

	948 if (filtering == kFilterLinear) {

	949 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);

	950 } else {

	951 int yf = (y >> 8) & 255;

	952 InterpolateRow(row, src, src_stride, src_width, yf);

	953 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);

	954 }

	955 dst_ptr += dst_stride;

	956 y += dy;

	957 if (y > max_y) {

	958 y = max_y;

	959 }

	960 }

	961 free_aligned_buffer_64(row);

	962 }

	963

	964 void ScalePlaneBilinearDown_16(int src_width, int src_height,

	965 int dst_width, int dst_height,

	966 int src_stride, int dst_stride,

	967 const uint16* src_ptr, uint16* dst_ptr,

	968 enum FilterMode filtering) {

	969 // Initial source x/y coordinate and step values as 16.16 fixed point.

	970 int x = 0;

	971 int y = 0;

	972 int dx = 0;

	973 int dy = 0;

	974 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.

	975 // Allocate a row buffer.

	976 align_buffer_64(row, src_width * 2);

	977

	978 const int max_y = (src_height - 1) << 16;

	979 int j;

	980 void (ScaleFilterCols)(uint16 dst_ptr, const uint16* src_ptr,

	981 int dst_width, int x, int dx) =

	982 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;

	983 void (InterpolateRow)(uint16 dst_ptr, const uint16* src_ptr,

	984 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =

	985 InterpolateRow_16_C;

	986 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,

	987 &x, &y, &dx, &dy);

	988 src_width = Abs(src_width);

	989

	990 #if defined(HAS_INTERPOLATEROW_16_SSE2)

	991 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {

	992 InterpolateRow = InterpolateRow_Any_16_SSE2;

	993 if (IS_ALIGNED(src_width, 16)) {

	994 InterpolateRow = InterpolateRow_Unaligned_16_SSE2;

	995 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	996 InterpolateRow = InterpolateRow_16_SSE2;

	997 }

	998 }

	999 }

	1000 #endif

	1001 #if defined(HAS_INTERPOLATEROW_16_SSSE3)

	1002 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {

	1003 InterpolateRow = InterpolateRow_Any_16_SSSE3;

	1004 if (IS_ALIGNED(src_width, 16)) {

	1005 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;

	1006 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {

	1007 InterpolateRow = InterpolateRow_16_SSSE3;

	1008 }

	1009 }

	1010 }

	1011 #endif

	1012 #if defined(HAS_INTERPOLATEROW_16_AVX2)

	1013 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {

	1014 InterpolateRow = InterpolateRow_Any_16_AVX2;

	1015 if (IS_ALIGNED(src_width, 32)) {

	1016 InterpolateRow = InterpolateRow_16_AVX2;

	1017 }

	1018 }

	1019 #endif

	1020 #if defined(HAS_INTERPOLATEROW_16_NEON)

	1021 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {

	1022 InterpolateRow = InterpolateRow_Any_16_NEON;

	1023 if (IS_ALIGNED(src_width, 16)) {

	1024 InterpolateRow = InterpolateRow_16_NEON;

	1025 }

	1026 }

	1027 #endif

	1028 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)

	1029 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {

	1030 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;

	1031 if (IS_ALIGNED(src_width, 4)) {

	1032 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;

	1033 }

	1034 }

	1035 #endif

	1036

	1037

	1038 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)

	1039 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {

	1040 ScaleFilterCols = ScaleFilterCols_16_SSSE3;

	1041 }

	1042 #endif

	1043 if (y > max_y) {

	1044 y = max_y;

	1045 }

	1046

	1047 for (j = 0; j < dst_height; ++j) {

	1048 int yi = y >> 16;

	1049 const uint16* src = src_ptr + yi * src_stride;

	1050 if (filtering == kFilterLinear) {

	1051 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);

	1052 } else {

	1053 int yf = (y >> 8) & 255;

	1054 InterpolateRow((uint16*)row, src, src_stride, src_width, yf);

	1055 ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);

	1056 }

	1057 dst_ptr += dst_stride;

	1058 y += dy;

	1059 if (y > max_y) {

	1060 y = max_y;

	1061 }

	1062 }

	1063 free_aligned_buffer_64(row);

	1064 }

	1065

	1066 // Scale up down with bilinear interpolation.

	1067 void ScalePlaneBilinearUp(int src_width, int src_height,

	1068 int dst_width, int dst_height,

	1069 int src_stride, int dst_stride,

	1070 const uint8* src_ptr, uint8* dst_ptr,

	1071 enum FilterMode filtering) {

	1072 int j;

	1073 // Initial source x/y coordinate and step values as 16.16 fixed point.

	1074 int x = 0;

	1075 int y = 0;

	1076 int dx = 0;

	1077 int dy = 0;

	1078 const int max_y = (src_height - 1) << 16;

	1079 void (InterpolateRow)(uint8 dst_ptr, const uint8* src_ptr,

	1080 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =

	1081 InterpolateRow_C;

	1082 void (ScaleFilterCols)(uint8 dst_ptr, const uint8* src_ptr,

	1083 int dst_width, int x, int dx) =

	1084 filtering ? ScaleFilterCols_C : ScaleCols_C;

	1085 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,

	1086 &x, &y, &dx, &dy);

	1087 src_width = Abs(src_width);

	1088

	1089 #if defined(HAS_INTERPOLATEROW_SSE2)

	1090 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {

	1091 InterpolateRow = InterpolateRow_Any_SSE2;

	1092 if (IS_ALIGNED(dst_width, 16)) {

	1093 InterpolateRow = InterpolateRow_Unaligned_SSE2;

	1094 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1095 InterpolateRow = InterpolateRow_SSE2;

	1096 }

	1097 }

	1098 }

	1099 #endif

	1100 #if defined(HAS_INTERPOLATEROW_SSSE3)

	1101 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {

	1102 InterpolateRow = InterpolateRow_Any_SSSE3;

	1103 if (IS_ALIGNED(dst_width, 16)) {

	1104 InterpolateRow = InterpolateRow_Unaligned_SSSE3;

	1105 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1106 InterpolateRow = InterpolateRow_SSSE3;

	1107 }

	1108 }

	1109 }

	1110 #endif

	1111 #if defined(HAS_INTERPOLATEROW_AVX2)

	1112 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {

	1113 InterpolateRow = InterpolateRow_Any_AVX2;

	1114 if (IS_ALIGNED(dst_width, 32)) {

	1115 InterpolateRow = InterpolateRow_AVX2;

	1116 }

	1117 }

	1118 #endif

	1119 #if defined(HAS_INTERPOLATEROW_NEON)

	1120 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {

	1121 InterpolateRow = InterpolateRow_Any_NEON;

	1122 if (IS_ALIGNED(dst_width, 16)) {

	1123 InterpolateRow = InterpolateRow_NEON;

	1124 }

	1125 }

	1126 #endif

	1127 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)

	1128 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {

	1129 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;

	1130 if (IS_ALIGNED(dst_width, 4)) {

	1131 InterpolateRow = InterpolateRow_MIPS_DSPR2;

	1132 }

	1133 }

	1134 #endif

	1135

	1136 if (filtering && src_width >= 32768) {

	1137 ScaleFilterCols = ScaleFilterCols64_C;

	1138 }

	1139 #if defined(HAS_SCALEFILTERCOLS_SSSE3)

	1140 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {

	1141 ScaleFilterCols = ScaleFilterCols_SSSE3;

	1142 }

	1143 #endif

	1144 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {

	1145 ScaleFilterCols = ScaleColsUp2_C;

	1146 #if defined(HAS_SCALECOLS_SSE2)

	1147 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&

	1148 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&

	1149 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1150 ScaleFilterCols = ScaleColsUp2_SSE2;

	1151 }

	1152 #endif

	1153 }

	1154

	1155 if (y > max_y) {

	1156 y = max_y;

	1157 }

	1158 {

	1159 int yi = y >> 16;

	1160 const uint8* src = src_ptr + yi * src_stride;

	1161

	1162 // Allocate 2 row buffers.

	1163 const int kRowSize = (dst_width + 15) & ~15;

	1164 align_buffer_64(row, kRowSize * 2);

	1165

	1166 uint8* rowptr = row;

	1167 int rowstride = kRowSize;

	1168 int lasty = yi;

	1169

	1170 ScaleFilterCols(rowptr, src, dst_width, x, dx);

	1171 if (src_height > 1) {

	1172 src += src_stride;

	1173 }

	1174 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);

	1175 src += src_stride;

	1176

	1177 for (j = 0; j < dst_height; ++j) {

	1178 yi = y >> 16;

	1179 if (yi != lasty) {

	1180 if (y > max_y) {

	1181 y = max_y;

	1182 yi = y >> 16;

	1183 src = src_ptr + yi * src_stride;

	1184 }

	1185 if (yi != lasty) {

	1186 ScaleFilterCols(rowptr, src, dst_width, x, dx);

	1187 rowptr += rowstride;

	1188 rowstride = -rowstride;

	1189 lasty = yi;

	1190 src += src_stride;

	1191 }

	1192 }

	1193 if (filtering == kFilterLinear) {

	1194 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);

	1195 } else {

	1196 int yf = (y >> 8) & 255;

	1197 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);

	1198 }

	1199 dst_ptr += dst_stride;

	1200 y += dy;

	1201 }

	1202 free_aligned_buffer_64(row);

	1203 }

	1204 }

	1205

	1206 void ScalePlaneBilinearUp_16(int src_width, int src_height,

	1207 int dst_width, int dst_height,

	1208 int src_stride, int dst_stride,

	1209 const uint16* src_ptr, uint16* dst_ptr,

	1210 enum FilterMode filtering) {

	1211 int j;

	1212 // Initial source x/y coordinate and step values as 16.16 fixed point.

	1213 int x = 0;

	1214 int y = 0;

	1215 int dx = 0;

	1216 int dy = 0;

	1217 const int max_y = (src_height - 1) << 16;

	1218 void (InterpolateRow)(uint16 dst_ptr, const uint16* src_ptr,

	1219 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =

	1220 InterpolateRow_16_C;

	1221 void (ScaleFilterCols)(uint16 dst_ptr, const uint16* src_ptr,

	1222 int dst_width, int x, int dx) =

	1223 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;

	1224 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,

	1225 &x, &y, &dx, &dy);

	1226 src_width = Abs(src_width);

	1227

	1228 #if defined(HAS_INTERPOLATEROW_16_SSE2)

	1229 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {

	1230 InterpolateRow = InterpolateRow_Any_16_SSE2;

	1231 if (IS_ALIGNED(dst_width, 16)) {

	1232 InterpolateRow = InterpolateRow_Unaligned_16_SSE2;

	1233 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1234 InterpolateRow = InterpolateRow_16_SSE2;

	1235 }

	1236 }

	1237 }

	1238 #endif

	1239 #if defined(HAS_INTERPOLATEROW_16_SSSE3)

	1240 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {

	1241 InterpolateRow = InterpolateRow_Any_16_SSSE3;

	1242 if (IS_ALIGNED(dst_width, 16)) {

	1243 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;

	1244 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1245 InterpolateRow = InterpolateRow_16_SSSE3;

	1246 }

	1247 }

	1248 }

	1249 #endif

	1250 #if defined(HAS_INTERPOLATEROW_16_AVX2)

	1251 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {

	1252 InterpolateRow = InterpolateRow_Any_16_AVX2;

	1253 if (IS_ALIGNED(dst_width, 32)) {

	1254 InterpolateRow = InterpolateRow_16_AVX2;

	1255 }

	1256 }

	1257 #endif

	1258 #if defined(HAS_INTERPOLATEROW_16_NEON)

	1259 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {

	1260 InterpolateRow = InterpolateRow_Any_16_NEON;

	1261 if (IS_ALIGNED(dst_width, 16)) {

	1262 InterpolateRow = InterpolateRow_16_NEON;

	1263 }

	1264 }

	1265 #endif

	1266 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)

	1267 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {

	1268 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;

	1269 if (IS_ALIGNED(dst_width, 4)) {

	1270 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;

	1271 }

	1272 }

	1273 #endif

	1274

	1275 if (filtering && src_width >= 32768) {

	1276 ScaleFilterCols = ScaleFilterCols64_16_C;

	1277 }

	1278 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)

	1279 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {

	1280 ScaleFilterCols = ScaleFilterCols_16_SSSE3;

	1281 }

	1282 #endif

	1283 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {

	1284 ScaleFilterCols = ScaleColsUp2_16_C;

	1285 #if defined(HAS_SCALECOLS_16_SSE2)

	1286 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&

	1287 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&

	1288 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1289 ScaleFilterCols = ScaleColsUp2_16_SSE2;

	1290 }

	1291 #endif

	1292 }

	1293

	1294 if (y > max_y) {

	1295 y = max_y;

	1296 }

	1297 {

	1298 int yi = y >> 16;

	1299 const uint16* src = src_ptr + yi * src_stride;

	1300

	1301 // Allocate 2 row buffers.

	1302 const int kRowSize = (dst_width + 15) & ~15;

	1303 align_buffer_64(row, kRowSize * 4);

	1304

	1305 uint16* rowptr = (uint16*)row;

	1306 int rowstride = kRowSize;

	1307 int lasty = yi;

	1308

	1309 ScaleFilterCols(rowptr, src, dst_width, x, dx);

	1310 if (src_height > 1) {

	1311 src += src_stride;

	1312 }

	1313 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);

	1314 src += src_stride;

	1315

	1316 for (j = 0; j < dst_height; ++j) {

	1317 yi = y >> 16;

	1318 if (yi != lasty) {

	1319 if (y > max_y) {

	1320 y = max_y;

	1321 yi = y >> 16;

	1322 src = src_ptr + yi * src_stride;

	1323 }

	1324 if (yi != lasty) {

	1325 ScaleFilterCols(rowptr, src, dst_width, x, dx);

	1326 rowptr += rowstride;

	1327 rowstride = -rowstride;

	1328 lasty = yi;

	1329 src += src_stride;

	1330 }

	1331 }

	1332 if (filtering == kFilterLinear) {

	1333 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);

	1334 } else {

	1335 int yf = (y >> 8) & 255;

	1336 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);

	1337 }

	1338 dst_ptr += dst_stride;

	1339 y += dy;

	1340 }

	1341 free_aligned_buffer_64(row);

	1342 }

	1343 }

	1344

	1345 // Scale Plane to/from any dimensions, without interpolation.

	1346 // Fixed point math is used for performance: The upper 16 bits

	1347 // of x and dx is the integer part of the source position and

	1348 // the lower 16 bits are the fixed decimal part.

	1349

	1350 static void ScalePlaneSimple(int src_width, int src_height,

	1351 int dst_width, int dst_height,

	1352 int src_stride, int dst_stride,

	1353 const uint8* src_ptr, uint8* dst_ptr) {

	1354 int i;

	1355 void (ScaleCols)(uint8 dst_ptr, const uint8* src_ptr,

	1356 int dst_width, int x, int dx) = ScaleCols_C;

	1357 // Initial source x/y coordinate and step values as 16.16 fixed point.

	1358 int x = 0;

	1359 int y = 0;

	1360 int dx = 0;

	1361 int dy = 0;

	1362 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,

	1363 &x, &y, &dx, &dy);

	1364 src_width = Abs(src_width);

	1365

	1366 if (src_width * 2 == dst_width && x < 0x8000) {

	1367 ScaleCols = ScaleColsUp2_C;

	1368 #if defined(HAS_SCALECOLS_SSE2)

	1369 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&

	1370 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&

	1371 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1372 ScaleCols = ScaleColsUp2_SSE2;

	1373 }

	1374 #endif

	1375 }

	1376

	1377 for (i = 0; i < dst_height; ++i) {

	1378 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,

	1379 dst_width, x, dx);

	1380 dst_ptr += dst_stride;

	1381 y += dy;

	1382 }

	1383 }

	1384

	1385 static void ScalePlaneSimple_16(int src_width, int src_height,

	1386 int dst_width, int dst_height,

	1387 int src_stride, int dst_stride,

	1388 const uint16* src_ptr, uint16* dst_ptr) {

	1389 int i;

	1390 void (ScaleCols)(uint16 dst_ptr, const uint16* src_ptr,

	1391 int dst_width, int x, int dx) = ScaleCols_16_C;

	1392 // Initial source x/y coordinate and step values as 16.16 fixed point.

	1393 int x = 0;

	1394 int y = 0;

	1395 int dx = 0;

	1396 int dy = 0;

	1397 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,

	1398 &x, &y, &dx, &dy);

	1399 src_width = Abs(src_width);

	1400

	1401 if (src_width * 2 == dst_width && x < 0x8000) {

	1402 ScaleCols = ScaleColsUp2_16_C;

	1403 #if defined(HAS_SCALECOLS_16_SSE2)

	1404 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&

	1405 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&

	1406 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {

	1407 ScaleCols = ScaleColsUp2_16_SSE2;

	1408 }

	1409 #endif

	1410 }

	1411

	1412 for (i = 0; i < dst_height; ++i) {

	1413 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,

	1414 dst_width, x, dx);

	1415 dst_ptr += dst_stride;

	1416 y += dy;

	1417 }

	1418 }

	1419

	1420 // Scale a plane.

	1421 // This function dispatches to a specialized scaler based on scale factor.

	1422

	1423 LIBYUV_API

	1424 void ScalePlane(const uint8* src, int src_stride,

	1425 int src_width, int src_height,

	1426 uint8* dst, int dst_stride,

	1427 int dst_width, int dst_height,

	1428 enum FilterMode filtering) {

	1429 // Simplify filtering when possible.

	1430 filtering = ScaleFilterReduce(src_width, src_height,

	1431 dst_width, dst_height,

	1432 filtering);

	1433

	1434 // Negative height means invert the image.

	1435 if (src_height < 0) {

	1436 src_height = -src_height;

	1437 src = src + (src_height - 1) * src_stride;

	1438 src_stride = -src_stride;

	1439 }

	1440

	1441 // Use specialized scales to improve performance for common resolutions.

	1442 // For example, all the 1/2 scalings will use ScalePlaneDown2()

	1443 if (dst_width == src_width && dst_height == src_height) {

	1444 // Straight copy.

	1445 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);

	1446 return;

	1447 }

	1448 if (dst_width == src_width) {

	1449 int dy = FixedDiv(src_height, dst_height);

	1450 // Arbitrary scale vertically, but unscaled vertically.

	1451 ScalePlaneVertical(src_height,

	1452 dst_width, dst_height,

	1453 src_stride, dst_stride, src, dst,

	1454 0, 0, dy, 1, filtering);

	1455 return;

	1456 }

	1457 if (dst_width <= Abs(src_width) && dst_height <= src_height) {

	1458 // Scale down.

	1459 if (4 * dst_width == 3 * src_width &&

	1460 4 * dst_height == 3 * src_height) {

	1461 // optimized, 3/4

	1462 ScalePlaneDown34(src_width, src_height, dst_width, dst_height,

	1463 src_stride, dst_stride, src, dst, filtering);

	1464 return;

	1465 }

	1466 if (2 * dst_width == src_width && 2 * dst_height == src_height) {

	1467 // optimized, 1/2

	1468 ScalePlaneDown2(src_width, src_height, dst_width, dst_height,

	1469 src_stride, dst_stride, src, dst, filtering);

	1470 return;

	1471 }

	1472 // 3/8 rounded up for odd sized chroma height.

	1473 if (8 * dst_width == 3 * src_width &&

	1474 dst_height == ((src_height * 3 + 7) / 8)) {

	1475 // optimized, 3/8

	1476 ScalePlaneDown38(src_width, src_height, dst_width, dst_height,

	1477 src_stride, dst_stride, src, dst, filtering);

	1478 return;

	1479 }

	1480 if (4 * dst_width == src_width && 4 * dst_height == src_height &&

	1481 filtering != kFilterBilinear) {

	1482 // optimized, 1/4

	1483 ScalePlaneDown4(src_width, src_height, dst_width, dst_height,

	1484 src_stride, dst_stride, src, dst, filtering);

	1485 return;

	1486 }

	1487 }

	1488 if (filtering == kFilterBox && dst_height * 2 < src_height) {

	1489 ScalePlaneBox(src_width, src_height, dst_width, dst_height,

	1490 src_stride, dst_stride, src, dst);

	1491 return;

	1492 }

	1493 if (filtering && dst_height > src_height) {

	1494 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,

	1495 src_stride, dst_stride, src, dst, filtering);

	1496 return;

	1497 }

	1498 if (filtering) {

	1499 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,

	1500 src_stride, dst_stride, src, dst, filtering);

	1501 return;

	1502 }

	1503 ScalePlaneSimple(src_width, src_height, dst_width, dst_height,

	1504 src_stride, dst_stride, src, dst);

	1505 }

	1506

	1507 LIBYUV_API

	1508 void ScalePlane_16(const uint16* src, int src_stride,

	1509 int src_width, int src_height,

	1510 uint16* dst, int dst_stride,

	1511 int dst_width, int dst_height,

	1512 enum FilterMode filtering) {

	1513 // Simplify filtering when possible.

	1514 filtering = ScaleFilterReduce(src_width, src_height,

	1515 dst_width, dst_height,

	1516 filtering);

	1517

	1518 // Negative height means invert the image.

	1519 if (src_height < 0) {

	1520 src_height = -src_height;

	1521 src = src + (src_height - 1) * src_stride;

	1522 src_stride = -src_stride;

	1523 }

	1524

	1525 // Use specialized scales to improve performance for common resolutions.

	1526 // For example, all the 1/2 scalings will use ScalePlaneDown2()

	1527 if (dst_width == src_width && dst_height == src_height) {

	1528 // Straight copy.

	1529 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);

	1530 return;

	1531 }

	1532 if (dst_width == src_width) {

	1533 int dy = FixedDiv(src_height, dst_height);

	1534 // Arbitrary scale vertically, but unscaled vertically.

	1535 ScalePlaneVertical_16(src_height,

	1536 dst_width, dst_height,

	1537 src_stride, dst_stride, src, dst,

	1538 0, 0, dy, 1, filtering);

	1539 return;

	1540 }

	1541 if (dst_width <= Abs(src_width) && dst_height <= src_height) {

	1542 // Scale down.

	1543 if (4 * dst_width == 3 * src_width &&

	1544 4 * dst_height == 3 * src_height) {

	1545 // optimized, 3/4

	1546 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,

	1547 src_stride, dst_stride, src, dst, filtering);

	1548 return;

	1549 }

	1550 if (2 * dst_width == src_width && 2 * dst_height == src_height) {

	1551 // optimized, 1/2

	1552 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,

	1553 src_stride, dst_stride, src, dst, filtering);

	1554 return;

	1555 }

	1556 // 3/8 rounded up for odd sized chroma height.

	1557 if (8 * dst_width == 3 * src_width &&

	1558 dst_height == ((src_height * 3 + 7) / 8)) {

	1559 // optimized, 3/8

	1560 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,

	1561 src_stride, dst_stride, src, dst, filtering);

	1562 return;

	1563 }

	1564 if (4 * dst_width == src_width && 4 * dst_height == src_height &&

	1565 filtering != kFilterBilinear) {

	1566 // optimized, 1/4

	1567 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,

	1568 src_stride, dst_stride, src, dst, filtering);

	1569 return;

	1570 }

	1571 }

	1572 if (filtering == kFilterBox && dst_height * 2 < src_height) {

	1573 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,

	1574 src_stride, dst_stride, src, dst);

	1575 return;

	1576 }

	1577 if (filtering && dst_height > src_height) {

	1578 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,

	1579 src_stride, dst_stride, src, dst, filtering);

	1580 return;

	1581 }

	1582 if (filtering) {

	1583 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,

	1584 src_stride, dst_stride, src, dst, filtering);

	1585 return;

	1586 }

	1587 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,

	1588 src_stride, dst_stride, src, dst);

	1589 }

	1590

	1591 // Scale an I420 image.

	1592 // This function in turn calls a scaling function for each plane.

	1593

	1594 LIBYUV_API

	1595 int I420Scale(const uint8* src_y, int src_stride_y,

	1596 const uint8* src_u, int src_stride_u,

	1597 const uint8* src_v, int src_stride_v,

	1598 int src_width, int src_height,

	1599 uint8* dst_y, int dst_stride_y,

	1600 uint8* dst_u, int dst_stride_u,

	1601 uint8* dst_v, int dst_stride_v,

	1602 int dst_width, int dst_height,

	1603 enum FilterMode filtering) {

	1604 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);

	1605 int src_halfheight = SUBSAMPLE(src_height, 1, 1);

	1606 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);

	1607 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);

	1608 if (!src_y \|\| !src_u \|\| !src_v \|\| src_width == 0 \|\| src_height == 0 \|\|

	1609 !dst_y \|\| !dst_u \|\| !dst_v \|\| dst_width <= 0 \|\| dst_height <= 0) {

	1610 return -1;

	1611 }

	1612

	1613 ScalePlane(src_y, src_stride_y, src_width, src_height,

	1614 dst_y, dst_stride_y, dst_width, dst_height,

	1615 filtering);

	1616 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,

	1617 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,

	1618 filtering);

	1619 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,

	1620 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,

	1621 filtering);

	1622 return 0;

	1623 }

	1624

	1625 LIBYUV_API

	1626 int I420Scale_16(const uint16* src_y, int src_stride_y,

	1627 const uint16* src_u, int src_stride_u,

	1628 const uint16* src_v, int src_stride_v,

	1629 int src_width, int src_height,

	1630 uint16* dst_y, int dst_stride_y,

	1631 uint16* dst_u, int dst_stride_u,

	1632 uint16* dst_v, int dst_stride_v,

	1633 int dst_width, int dst_height,

	1634 enum FilterMode filtering) {

	1635 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);

	1636 int src_halfheight = SUBSAMPLE(src_height, 1, 1);

	1637 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);

	1638 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);

	1639 if (!src_y \|\| !src_u \|\| !src_v \|\| src_width == 0 \|\| src_height == 0 \|\|

	1640 !dst_y \|\| !dst_u \|\| !dst_v \|\| dst_width <= 0 \|\| dst_height <= 0) {

	1641 return -1;

	1642 }

	1643

	1644 ScalePlane_16(src_y, src_stride_y, src_width, src_height,

	1645 dst_y, dst_stride_y, dst_width, dst_height,

	1646 filtering);

	1647 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,

	1648 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,

	1649 filtering);

	1650 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,

	1651 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,

	1652 filtering);

	1653 return 0;

	1654 }

	1655

	1656 // Deprecated api

	1657 LIBYUV_API

	1658 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,

	1659 int src_stride_y, int src_stride_u, int src_stride_v,

	1660 int src_width, int src_height,

	1661 uint8* dst_y, uint8* dst_u, uint8* dst_v,

	1662 int dst_stride_y, int dst_stride_u, int dst_stride_v,

	1663 int dst_width, int dst_height,

	1664 LIBYUV_BOOL interpolate) {

	1665 return I420Scale(src_y, src_stride_y,

	1666 src_u, src_stride_u,

	1667 src_v, src_stride_v,

	1668 src_width, src_height,

	1669 dst_y, dst_stride_y,

	1670 dst_u, dst_stride_u,

	1671 dst_v, dst_stride_v,

	1672 dst_width, dst_height,

	1673 interpolate ? kFilterBox : kFilterNone);

	1674 }

	1675

	1676 // Deprecated api

	1677 LIBYUV_API

	1678 int ScaleOffset(const uint8* src, int src_width, int src_height,

	1679 uint8* dst, int dst_width, int dst_height, int dst_yoffset,

	1680 LIBYUV_BOOL interpolate) {

	1681 // Chroma requires offset to multiple of 2.

	1682 int dst_yoffset_even = dst_yoffset & ~1;

	1683 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);

	1684 int src_halfheight = SUBSAMPLE(src_height, 1, 1);

	1685 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);

	1686 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);

	1687 int aheight = dst_height - dst_yoffset_even * 2; // actual output height

	1688 const uint8* src_y = src;

	1689 const uint8* src_u = src + src_width * src_height;

	1690 const uint8* src_v = src + src_width * src_height +

	1691 src_halfwidth * src_halfheight;

	1692 uint8* dst_y = dst + dst_yoffset_even * dst_width;

	1693 uint8* dst_u = dst + dst_width * dst_height +

	1694 (dst_yoffset_even >> 1) * dst_halfwidth;

	1695 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +

	1696 (dst_yoffset_even >> 1) * dst_halfwidth;

	1697 if (!src \|\| src_width <= 0 \|\| src_height <= 0 \|\|

	1698 !dst \|\| dst_width <= 0 \|\| dst_height <= 0 \|\| dst_yoffset_even < 0 \|\|

	1699 dst_yoffset_even >= dst_height) {

	1700 return -1;

	1701 }

	1702 return I420Scale(src_y, src_width,

	1703 src_u, src_halfwidth,

	1704 src_v, src_halfwidth,

	1705 src_width, src_height,

	1706 dst_y, dst_width,

	1707 dst_u, dst_halfwidth,

	1708 dst_v, dst_halfwidth,

	1709 dst_width, aheight,

	1710 interpolate ? kFilterBox : kFilterNone);

	1711 }

	1712

	1713 #ifdef __cplusplus

	1714 } // extern "C"

	1715 } // namespace libyuv

	1716 #endif

OLD	NEW

« no previous file with comments | « source/libvpx/third_party/libyuv/source/scale.c ('k') | source/libvpx/third_party/libyuv/source/scale_common.cc » ('j') | no next file with comments »