Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(201)

Side by Side Diff: source/libvpx/third_party/libyuv/source/scale.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "libyuv/scale.h" 11 #include "libyuv/scale.h"
12 12
13 #include <assert.h> 13 #include <assert.h>
14 #include <string.h> 14 #include <string.h>
15 15
16 #include "libyuv/cpu_id.h" 16 #include "libyuv/cpu_id.h"
17 #include "libyuv/planar_functions.h" // For CopyPlane 17 #include "libyuv/planar_functions.h" // For CopyPlane
18 #include "libyuv/row.h" 18 #include "libyuv/row.h"
19 #include "libyuv/scale_row.h" 19 #include "libyuv/scale_row.h"
20 20
21 #ifdef __cplusplus 21 #ifdef __cplusplus
22 namespace libyuv { 22 namespace libyuv {
23 extern "C" { 23 extern "C" {
24 #endif 24 #endif
25 25
26 // Remove this macro if OVERREAD is safe.
27 #define AVOID_OVERREAD 1
28
29 static __inline int Abs(int v) { 26 static __inline int Abs(int v) {
30 return v >= 0 ? v : -v; 27 return v >= 0 ? v : -v;
31 } 28 }
32 29
33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) 30 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
34 31
35 // Scale plane, 1/2 32 // Scale plane, 1/2
36 // This is an optimized version for scaling down a plane to 1/2 of 33 // This is an optimized version for scaling down a plane to 1/2 of
37 // its original size. 34 // its original size.
38 35
39 static void ScalePlaneDown2(int src_width, int src_height, 36 static void ScalePlaneDown2(int src_width, int src_height,
40 int dst_width, int dst_height, 37 int dst_width, int dst_height,
41 int src_stride, int dst_stride, 38 int src_stride, int dst_stride,
42 const uint8* src_ptr, uint8* dst_ptr, 39 const uint8* src_ptr, uint8* dst_ptr,
43 enum FilterMode filtering) { 40 enum FilterMode filtering) {
44 int y; 41 int y;
45 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, 42 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
46 uint8* dst_ptr, int dst_width) = 43 uint8* dst_ptr, int dst_width) =
47 filtering == kFilterNone ? ScaleRowDown2_C : 44 filtering == kFilterNone ? ScaleRowDown2_C :
48 (filtering == kFilterLinear ? ScaleRowDown2Linear_C : 45 (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C);
49 ScaleRowDown2Box_C);
50 int row_stride = src_stride << 1; 46 int row_stride = src_stride << 1;
51 if (!filtering) { 47 if (!filtering) {
52 src_ptr += src_stride; // Point to odd rows. 48 src_ptr += src_stride; // Point to odd rows.
53 src_stride = 0; 49 src_stride = 0;
54 } 50 }
55 51
56 #if defined(HAS_SCALEROWDOWN2_NEON) 52 #if defined(HAS_SCALEROWDOWN2_NEON)
57 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { 53 if (TestCpuFlag(kCpuHasNEON)) {
58 ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON; 54 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON :
55 (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON :
56 ScaleRowDown2Box_Any_NEON);
57 if (IS_ALIGNED(dst_width, 16)) {
58 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON :
59 (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON :
60 ScaleRowDown2Box_NEON);
61 }
59 } 62 }
60 #endif 63 #endif
61 #if defined(HAS_SCALEROWDOWN2_SSE2) 64 #if defined(HAS_SCALEROWDOWN2_SSE2)
62 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { 65 if (TestCpuFlag(kCpuHasSSE2)) {
63 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : 66 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSE2 :
64 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : 67 (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSE2 :
65 ScaleRowDown2Box_SSE2); 68 ScaleRowDown2Box_Any_SSE2);
69 if (IS_ALIGNED(dst_width, 16)) {
70 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
71 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
72 ScaleRowDown2Box_SSE2);
73 }
74 }
75 #endif
76 #if defined(HAS_SCALEROWDOWN2_AVX2)
77 if (TestCpuFlag(kCpuHasAVX2)) {
78 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 :
79 (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 :
80 ScaleRowDown2Box_Any_AVX2);
81 if (IS_ALIGNED(dst_width, 32)) {
82 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 :
83 (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 :
84 ScaleRowDown2Box_AVX2);
85 }
66 } 86 }
67 #endif 87 #endif
68 #if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) 88 #if defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
69 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && 89 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
70 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && 90 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
71 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 91 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
72 ScaleRowDown2 = filtering ? 92 ScaleRowDown2 = filtering ?
73 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2; 93 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
74 } 94 }
75 #endif 95 #endif
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
147 int y; 167 int y;
148 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, 168 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
149 uint8* dst_ptr, int dst_width) = 169 uint8* dst_ptr, int dst_width) =
150 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; 170 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
151 int row_stride = src_stride << 2; 171 int row_stride = src_stride << 2;
152 if (!filtering) { 172 if (!filtering) {
153 src_ptr += src_stride * 2; // Point to row 2. 173 src_ptr += src_stride * 2; // Point to row 2.
154 src_stride = 0; 174 src_stride = 0;
155 } 175 }
156 #if defined(HAS_SCALEROWDOWN4_NEON) 176 #if defined(HAS_SCALEROWDOWN4_NEON)
157 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { 177 if (TestCpuFlag(kCpuHasNEON)) {
158 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; 178 ScaleRowDown4 = filtering ?
179 ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON;
180 if (IS_ALIGNED(dst_width, 8)) {
181 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
182 }
159 } 183 }
160 #endif 184 #endif
161 #if defined(HAS_SCALEROWDOWN4_SSE2) 185 #if defined(HAS_SCALEROWDOWN4_SSE2)
162 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 186 if (TestCpuFlag(kCpuHasSSE2)) {
163 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; 187 ScaleRowDown4 = filtering ?
188 ScaleRowDown4Box_Any_SSE2 : ScaleRowDown4_Any_SSE2;
189 if (IS_ALIGNED(dst_width, 8)) {
190 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
191 }
192 }
193 #endif
194 #if defined(HAS_SCALEROWDOWN4_AVX2)
195 if (TestCpuFlag(kCpuHasAVX2)) {
196 ScaleRowDown4 = filtering ?
197 ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2;
198 if (IS_ALIGNED(dst_width, 16)) {
199 ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2;
200 }
164 } 201 }
165 #endif 202 #endif
166 #if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) 203 #if defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
167 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && 204 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
168 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 205 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
169 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 206 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
170 ScaleRowDown4 = filtering ? 207 ScaleRowDown4 = filtering ?
171 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2; 208 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
172 } 209 }
173 #endif 210 #endif
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
242 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 279 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
243 assert(dst_width % 3 == 0); 280 assert(dst_width % 3 == 0);
244 if (!filtering) { 281 if (!filtering) {
245 ScaleRowDown34_0 = ScaleRowDown34_C; 282 ScaleRowDown34_0 = ScaleRowDown34_C;
246 ScaleRowDown34_1 = ScaleRowDown34_C; 283 ScaleRowDown34_1 = ScaleRowDown34_C;
247 } else { 284 } else {
248 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; 285 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
249 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; 286 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
250 } 287 }
251 #if defined(HAS_SCALEROWDOWN34_NEON) 288 #if defined(HAS_SCALEROWDOWN34_NEON)
252 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { 289 if (TestCpuFlag(kCpuHasNEON)) {
253 if (!filtering) { 290 if (!filtering) {
254 ScaleRowDown34_0 = ScaleRowDown34_NEON; 291 ScaleRowDown34_0 = ScaleRowDown34_Any_NEON;
255 ScaleRowDown34_1 = ScaleRowDown34_NEON; 292 ScaleRowDown34_1 = ScaleRowDown34_Any_NEON;
256 } else { 293 } else {
257 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; 294 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON;
258 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; 295 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON;
296 }
297 if (dst_width % 24 == 0) {
298 if (!filtering) {
299 ScaleRowDown34_0 = ScaleRowDown34_NEON;
300 ScaleRowDown34_1 = ScaleRowDown34_NEON;
301 } else {
302 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
303 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
304 }
259 } 305 }
260 } 306 }
261 #endif 307 #endif
262 #if defined(HAS_SCALEROWDOWN34_SSSE3) 308 #if defined(HAS_SCALEROWDOWN34_SSSE3)
263 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { 309 if (TestCpuFlag(kCpuHasSSSE3)) {
264 if (!filtering) { 310 if (!filtering) {
265 ScaleRowDown34_0 = ScaleRowDown34_SSSE3; 311 ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3;
266 ScaleRowDown34_1 = ScaleRowDown34_SSSE3; 312 ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3;
267 } else { 313 } else {
268 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; 314 ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3;
269 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; 315 ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3;
316 }
317 if (dst_width % 24 == 0) {
318 if (!filtering) {
319 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
320 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
321 } else {
322 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
323 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
324 }
270 } 325 }
271 } 326 }
272 #endif 327 #endif
273 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2) 328 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
274 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && 329 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
275 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 330 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
276 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 331 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
277 if (!filtering) { 332 if (!filtering) {
278 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2; 333 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
279 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2; 334 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 uint8* dst_ptr, int dst_width); 470 uint8* dst_ptr, int dst_width);
416 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; 471 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
417 assert(dst_width % 3 == 0); 472 assert(dst_width % 3 == 0);
418 if (!filtering) { 473 if (!filtering) {
419 ScaleRowDown38_3 = ScaleRowDown38_C; 474 ScaleRowDown38_3 = ScaleRowDown38_C;
420 ScaleRowDown38_2 = ScaleRowDown38_C; 475 ScaleRowDown38_2 = ScaleRowDown38_C;
421 } else { 476 } else {
422 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; 477 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
423 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; 478 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
424 } 479 }
480
425 #if defined(HAS_SCALEROWDOWN38_NEON) 481 #if defined(HAS_SCALEROWDOWN38_NEON)
426 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { 482 if (TestCpuFlag(kCpuHasNEON)) {
427 if (!filtering) { 483 if (!filtering) {
428 ScaleRowDown38_3 = ScaleRowDown38_NEON; 484 ScaleRowDown38_3 = ScaleRowDown38_Any_NEON;
429 ScaleRowDown38_2 = ScaleRowDown38_NEON; 485 ScaleRowDown38_2 = ScaleRowDown38_Any_NEON;
430 } else { 486 } else {
431 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; 487 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON;
432 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; 488 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON;
489 }
490 if (dst_width % 12 == 0) {
491 if (!filtering) {
492 ScaleRowDown38_3 = ScaleRowDown38_NEON;
493 ScaleRowDown38_2 = ScaleRowDown38_NEON;
494 } else {
495 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
496 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
497 }
433 } 498 }
434 } 499 }
435 #endif 500 #endif
436 #if defined(HAS_SCALEROWDOWN38_SSSE3) 501 #if defined(HAS_SCALEROWDOWN38_SSSE3)
437 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { 502 if (TestCpuFlag(kCpuHasSSSE3)) {
438 if (!filtering) { 503 if (!filtering) {
504 ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3;
505 ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3;
506 } else {
507 ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3;
508 ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3;
509 }
510 if (dst_width % 12 == 0 && !filtering) {
439 ScaleRowDown38_3 = ScaleRowDown38_SSSE3; 511 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
440 ScaleRowDown38_2 = ScaleRowDown38_SSSE3; 512 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
441 } else { 513 }
514 if (dst_width % 6 == 0 && filtering) {
442 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; 515 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
443 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; 516 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
444 } 517 }
445 } 518 }
446 #endif 519 #endif
447 #if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2) 520 #if defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
448 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && 521 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
449 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && 522 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
450 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { 523 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
451 if (!filtering) { 524 if (!filtering) {
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
552 if ((dst_height % 3) == 2) { 625 if ((dst_height % 3) == 2) {
553 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); 626 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
554 src_ptr += src_stride * 3; 627 src_ptr += src_stride * 3;
555 dst_ptr += dst_stride; 628 dst_ptr += dst_stride;
556 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 629 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
557 } else if ((dst_height % 3) == 1) { 630 } else if ((dst_height % 3) == 1) {
558 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); 631 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
559 } 632 }
560 } 633 }
561 634
562 static __inline uint32 SumBox(int iboxwidth, int iboxheight, 635 #define MIN1(x) ((x) < 1 ? 1 : (x))
563 ptrdiff_t src_stride, const uint8* src_ptr) {
564 uint32 sum = 0u;
565 int y;
566 assert(iboxwidth > 0);
567 assert(iboxheight > 0);
568 for (y = 0; y < iboxheight; ++y) {
569 int x;
570 for (x = 0; x < iboxwidth; ++x) {
571 sum += src_ptr[x];
572 }
573 src_ptr += src_stride;
574 }
575 return sum;
576 }
577
578 static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
579 ptrdiff_t src_stride, const uint16* src_ptr) {
580 uint32 sum = 0u;
581 int y;
582 assert(iboxwidth > 0);
583 assert(iboxheight > 0);
584 for (y = 0; y < iboxheight; ++y) {
585 int x;
586 for (x = 0; x < iboxwidth; ++x) {
587 sum += src_ptr[x];
588 }
589 src_ptr += src_stride;
590 }
591 return sum;
592 }
593
594 static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
595 int x, int dx, ptrdiff_t src_stride,
596 const uint8* src_ptr, uint8* dst_ptr) {
597 int i;
598 int boxwidth;
599 for (i = 0; i < dst_width; ++i) {
600 int ix = x >> 16;
601 x += dx;
602 boxwidth = (x >> 16) - ix;
603 *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
604 (boxwidth * boxheight);
605 }
606 }
607
608 static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
609 int x, int dx, ptrdiff_t src_stride,
610 const uint16* src_ptr, uint16* dst_ptr) {
611 int i;
612 int boxwidth;
613 for (i = 0; i < dst_width; ++i) {
614 int ix = x >> 16;
615 x += dx;
616 boxwidth = (x >> 16) - ix;
617 *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
618 (boxwidth * boxheight);
619 }
620 }
621 636
622 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { 637 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
623 uint32 sum = 0u; 638 uint32 sum = 0u;
624 int x; 639 int x;
625 assert(iboxwidth > 0); 640 assert(iboxwidth > 0);
626 for (x = 0; x < iboxwidth; ++x) { 641 for (x = 0; x < iboxwidth; ++x) {
627 sum += src_ptr[x]; 642 sum += src_ptr[x];
628 } 643 }
629 return sum; 644 return sum;
630 } 645 }
631 646
632 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { 647 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
633 uint32 sum = 0u; 648 uint32 sum = 0u;
634 int x; 649 int x;
635 assert(iboxwidth > 0); 650 assert(iboxwidth > 0);
636 for (x = 0; x < iboxwidth; ++x) { 651 for (x = 0; x < iboxwidth; ++x) {
637 sum += src_ptr[x]; 652 sum += src_ptr[x];
638 } 653 }
639 return sum; 654 return sum;
640 } 655 }
641 656
642 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, 657 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
643 const uint16* src_ptr, uint8* dst_ptr) { 658 const uint16* src_ptr, uint8* dst_ptr) {
644 int i; 659 int i;
645 int scaletbl[2]; 660 int scaletbl[2];
646 int minboxwidth = (dx >> 16); 661 int minboxwidth = dx >> 16;
647 int* scaleptr = scaletbl - minboxwidth; 662 int* scaleptr = scaletbl - minboxwidth;
648 int boxwidth; 663 int boxwidth;
649 scaletbl[0] = 65536 / (minboxwidth * boxheight); 664 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
650 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); 665 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
651 for (i = 0; i < dst_width; ++i) { 666 for (i = 0; i < dst_width; ++i) {
652 int ix = x >> 16; 667 int ix = x >> 16;
653 x += dx; 668 x += dx;
654 boxwidth = (x >> 16) - ix; 669 boxwidth = MIN1((x >> 16) - ix);
655 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; 670 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
656 } 671 }
657 } 672 }
658 673
659 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, 674 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
660 const uint32* src_ptr, uint16* dst_ptr) { 675 const uint32* src_ptr, uint16* dst_ptr) {
661 int i; 676 int i;
662 int scaletbl[2]; 677 int scaletbl[2];
663 int minboxwidth = (dx >> 16); 678 int minboxwidth = dx >> 16;
664 int* scaleptr = scaletbl - minboxwidth; 679 int* scaleptr = scaletbl - minboxwidth;
665 int boxwidth; 680 int boxwidth;
666 scaletbl[0] = 65536 / (minboxwidth * boxheight); 681 scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight);
667 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); 682 scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight);
668 for (i = 0; i < dst_width; ++i) { 683 for (i = 0; i < dst_width; ++i) {
669 int ix = x >> 16; 684 int ix = x >> 16;
670 x += dx; 685 x += dx;
671 boxwidth = (x >> 16) - ix; 686 boxwidth = MIN1((x >> 16) - ix);
672 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * 687 *dst_ptr++ =
673 scaleptr[boxwidth] >> 16; 688 SumPixels_16(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
689 }
690 }
691
692 static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int,
693 const uint16* src_ptr, uint8* dst_ptr) {
694 int scaleval = 65536 / boxheight;
695 int i;
696 src_ptr += (x >> 16);
697 for (i = 0; i < dst_width; ++i) {
698 *dst_ptr++ = src_ptr[i] * scaleval >> 16;
674 } 699 }
675 } 700 }
676 701
677 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, 702 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
678 const uint16* src_ptr, uint8* dst_ptr) { 703 const uint16* src_ptr, uint8* dst_ptr) {
679 int boxwidth = (dx >> 16); 704 int boxwidth = MIN1(dx >> 16);
680 int scaleval = 65536 / (boxwidth * boxheight); 705 int scaleval = 65536 / (boxwidth * boxheight);
681 int i; 706 int i;
707 x >>= 16;
682 for (i = 0; i < dst_width; ++i) { 708 for (i = 0; i < dst_width; ++i) {
683 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; 709 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
684 x += boxwidth; 710 x += boxwidth;
685 } 711 }
686 } 712 }
687 713
688 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, 714 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
689 const uint32* src_ptr, uint16* dst_ptr) { 715 const uint32* src_ptr, uint16* dst_ptr) {
690 int boxwidth = (dx >> 16); 716 int boxwidth = MIN1(dx >> 16);
691 int scaleval = 65536 / (boxwidth * boxheight); 717 int scaleval = 65536 / (boxwidth * boxheight);
692 int i; 718 int i;
693 for (i = 0; i < dst_width; ++i) { 719 for (i = 0; i < dst_width; ++i) {
694 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; 720 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
695 x += boxwidth; 721 x += boxwidth;
696 } 722 }
697 } 723 }
698 724
699 // Scale plane down to any dimensions, with interpolation. 725 // Scale plane down to any dimensions, with interpolation.
700 // (boxfilter). 726 // (boxfilter).
701 // 727 //
702 // Same method as SimpleScale, which is fixed point, outputting 728 // Same method as SimpleScale, which is fixed point, outputting
703 // one pixel of destination using fixed point (16.16) to step 729 // one pixel of destination using fixed point (16.16) to step
704 // through source, sampling a box of pixel with simple 730 // through source, sampling a box of pixel with simple
705 // averaging. 731 // averaging.
706 static void ScalePlaneBox(int src_width, int src_height, 732 static void ScalePlaneBox(int src_width, int src_height,
707 int dst_width, int dst_height, 733 int dst_width, int dst_height,
708 int src_stride, int dst_stride, 734 int src_stride, int dst_stride,
709 const uint8* src_ptr, uint8* dst_ptr) { 735 const uint8* src_ptr, uint8* dst_ptr) {
710 int j; 736 int j, k;
711 // Initial source x/y coordinate and step values as 16.16 fixed point. 737 // Initial source x/y coordinate and step values as 16.16 fixed point.
712 int x = 0; 738 int x = 0;
713 int y = 0; 739 int y = 0;
714 int dx = 0; 740 int dx = 0;
715 int dy = 0; 741 int dy = 0;
716 const int max_y = (src_height << 16); 742 const int max_y = (src_height << 16);
717 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, 743 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
718 &x, &y, &dx, &dy); 744 &x, &y, &dx, &dy);
719 src_width = Abs(src_width); 745 src_width = Abs(src_width);
720 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
721 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
722 uint8* dst = dst_ptr;
723 int j;
724 for (j = 0; j < dst_height; ++j) {
725 int boxheight;
726 int iy = y >> 16;
727 const uint8* src = src_ptr + iy * src_stride;
728 y += dy;
729 if (y > max_y) {
730 y = max_y;
731 }
732 boxheight = (y >> 16) - iy;
733 ScalePlaneBoxRow_C(dst_width, boxheight,
734 x, dx, src_stride,
735 src, dst);
736 dst += dst_stride;
737 }
738 return;
739 }
740 { 746 {
741 // Allocate a row buffer of uint16. 747 // Allocate a row buffer of uint16.
742 align_buffer_64(row16, src_width * 2); 748 align_buffer_64(row16, src_width * 2);
743 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, 749 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
744 const uint16* src_ptr, uint8* dst_ptr) = 750 const uint16* src_ptr, uint8* dst_ptr) =
745 (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; 751 (dx & 0xffff) ? ScaleAddCols2_C:
746 void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, 752 ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C);
747 uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; 753 void (*ScaleAddRow)(const uint8* src_ptr, uint16* dst_ptr, int src_width) =
748 754 ScaleAddRow_C;
749 #if defined(HAS_SCALEADDROWS_SSE2) 755 #if defined(HAS_SCALEADDROW_SSE2)
750 if (TestCpuFlag(kCpuHasSSE2) 756 if (TestCpuFlag(kCpuHasSSE2)) {
751 #ifdef AVOID_OVERREAD 757 ScaleAddRow = ScaleAddRow_Any_SSE2;
752 && IS_ALIGNED(src_width, 16) 758 if (IS_ALIGNED(src_width, 16)) {
759 ScaleAddRow = ScaleAddRow_SSE2;
760 }
761 }
753 #endif 762 #endif
754 ) { 763 #if defined(HAS_SCALEADDROW_AVX2)
755 ScaleAddRows = ScaleAddRows_SSE2; 764 if (TestCpuFlag(kCpuHasAVX2)) {
765 ScaleAddRow = ScaleAddRow_Any_AVX2;
766 if (IS_ALIGNED(src_width, 32)) {
767 ScaleAddRow = ScaleAddRow_AVX2;
768 }
769 }
770 #endif
771 #if defined(HAS_SCALEADDROW_NEON)
772 if (TestCpuFlag(kCpuHasNEON)) {
773 ScaleAddRow = ScaleAddRow_Any_NEON;
774 if (IS_ALIGNED(src_width, 16)) {
775 ScaleAddRow = ScaleAddRow_NEON;
776 }
756 } 777 }
757 #endif 778 #endif
758 779
759 for (j = 0; j < dst_height; ++j) { 780 for (j = 0; j < dst_height; ++j) {
760 int boxheight; 781 int boxheight;
761 int iy = y >> 16; 782 int iy = y >> 16;
762 const uint8* src = src_ptr + iy * src_stride; 783 const uint8* src = src_ptr + iy * src_stride;
763 y += dy; 784 y += dy;
764 if (y > (src_height << 16)) { 785 if (y > max_y) {
765 y = (src_height << 16); 786 y = max_y;
766 } 787 }
767 boxheight = (y >> 16) - iy; 788 boxheight = MIN1((y >> 16) - iy);
768 ScaleAddRows(src, src_stride, (uint16*)(row16), 789 memset(row16, 0, src_width * 2);
769 src_width, boxheight); 790 for (k = 0; k < boxheight; ++k) {
770 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), 791 ScaleAddRow(src, (uint16 *)(row16), src_width);
771 dst_ptr); 792 src += src_stride;
793 }
794 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), dst_ptr);
772 dst_ptr += dst_stride; 795 dst_ptr += dst_stride;
773 } 796 }
774 free_aligned_buffer_64(row16); 797 free_aligned_buffer_64(row16);
775 } 798 }
776 } 799 }
777 800
778 static void ScalePlaneBox_16(int src_width, int src_height, 801 static void ScalePlaneBox_16(int src_width, int src_height,
779 int dst_width, int dst_height, 802 int dst_width, int dst_height,
780 int src_stride, int dst_stride, 803 int src_stride, int dst_stride,
781 const uint16* src_ptr, uint16* dst_ptr) { 804 const uint16* src_ptr, uint16* dst_ptr) {
782 int j; 805 int j, k;
783 // Initial source x/y coordinate and step values as 16.16 fixed point. 806 // Initial source x/y coordinate and step values as 16.16 fixed point.
784 int x = 0; 807 int x = 0;
785 int y = 0; 808 int y = 0;
786 int dx = 0; 809 int dx = 0;
787 int dy = 0; 810 int dy = 0;
788 const int max_y = (src_height << 16); 811 const int max_y = (src_height << 16);
789 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, 812 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
790 &x, &y, &dx, &dy); 813 &x, &y, &dx, &dy);
791 src_width = Abs(src_width); 814 src_width = Abs(src_width);
792 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
793 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
794 uint16* dst = dst_ptr;
795 int j;
796 for (j = 0; j < dst_height; ++j) {
797 int boxheight;
798 int iy = y >> 16;
799 const uint16* src = src_ptr + iy * src_stride;
800 y += dy;
801 if (y > max_y) {
802 y = max_y;
803 }
804 boxheight = (y >> 16) - iy;
805 ScalePlaneBoxRow_16_C(dst_width, boxheight,
806 x, dx, src_stride,
807 src, dst);
808 dst += dst_stride;
809 }
810 return;
811 }
812 { 815 {
813 // Allocate a row buffer of uint32. 816 // Allocate a row buffer of uint32.
814 align_buffer_64(row32, src_width * 4); 817 align_buffer_64(row32, src_width * 4);
815 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, 818 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
816 const uint32* src_ptr, uint16* dst_ptr) = 819 const uint32* src_ptr, uint16* dst_ptr) =
817 (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C; 820 (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
818 void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride, 821 void (*ScaleAddRow)(const uint16* src_ptr, uint32* dst_ptr, int src_width) =
819 uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C; 822 ScaleAddRow_16_C;
820 823
821 #if defined(HAS_SCALEADDROWS_16_SSE2) 824 #if defined(HAS_SCALEADDROW_16_SSE2)
822 if (TestCpuFlag(kCpuHasSSE2) 825 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) {
823 #ifdef AVOID_OVERREAD 826 ScaleAddRow = ScaleAddRow_16_SSE2;
824 && IS_ALIGNED(src_width, 16)
825 #endif
826 ) {
827 ScaleAddRows = ScaleAddRows_16_SSE2;
828 } 827 }
829 #endif 828 #endif
830 829
831 for (j = 0; j < dst_height; ++j) { 830 for (j = 0; j < dst_height; ++j) {
832 int boxheight; 831 int boxheight;
833 int iy = y >> 16; 832 int iy = y >> 16;
834 const uint16* src = src_ptr + iy * src_stride; 833 const uint16* src = src_ptr + iy * src_stride;
835 y += dy; 834 y += dy;
836 if (y > (src_height << 16)) { 835 if (y > max_y) {
837 y = (src_height << 16); 836 y = max_y;
838 } 837 }
839 boxheight = (y >> 16) - iy; 838 boxheight = MIN1((y >> 16) - iy);
840 ScaleAddRows(src, src_stride, (uint32*)(row32), 839 memset(row32, 0, src_width * 4);
841 src_width, boxheight); 840 for (k = 0; k < boxheight; ++k) {
842 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), 841 ScaleAddRow(src, (uint32 *)(row32), src_width);
843 dst_ptr); 842 src += src_stride;
843 }
844 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), dst_ptr);
844 dst_ptr += dst_stride; 845 dst_ptr += dst_stride;
845 } 846 }
846 free_aligned_buffer_64(row32); 847 free_aligned_buffer_64(row32);
847 } 848 }
848 } 849 }
849 850
850 // Scale plane down with bilinear interpolation. 851 // Scale plane down with bilinear interpolation.
851 void ScalePlaneBilinearDown(int src_width, int src_height, 852 void ScalePlaneBilinearDown(int src_width, int src_height,
852 int dst_width, int dst_height, 853 int dst_width, int dst_height,
853 int src_stride, int dst_stride, 854 int src_stride, int dst_stride,
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
914 } 915 }
915 } 916 }
916 #endif 917 #endif
917 918
918 919
919 #if defined(HAS_SCALEFILTERCOLS_SSSE3) 920 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
920 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 921 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
921 ScaleFilterCols = ScaleFilterCols_SSSE3; 922 ScaleFilterCols = ScaleFilterCols_SSSE3;
922 } 923 }
923 #endif 924 #endif
925 #if defined(HAS_SCALEFILTERCOLS_NEON)
926 if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
927 ScaleFilterCols = ScaleFilterCols_Any_NEON;
928 if (IS_ALIGNED(dst_width, 8)) {
929 ScaleFilterCols = ScaleFilterCols_NEON;
930 }
931 }
932 #endif
924 if (y > max_y) { 933 if (y > max_y) {
925 y = max_y; 934 y = max_y;
926 } 935 }
927 936
928 for (j = 0; j < dst_height; ++j) { 937 for (j = 0; j < dst_height; ++j) {
929 int yi = y >> 16; 938 int yi = y >> 16;
930 const uint8* src = src_ptr + yi * src_stride; 939 const uint8* src = src_ptr + yi * src_stride;
931 if (filtering == kFilterLinear) { 940 if (filtering == kFilterLinear) {
932 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); 941 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
933 } else { 942 } else {
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
1050 // Initial source x/y coordinate and step values as 16.16 fixed point. 1059 // Initial source x/y coordinate and step values as 16.16 fixed point.
1051 int x = 0; 1060 int x = 0;
1052 int y = 0; 1061 int y = 0;
1053 int dx = 0; 1062 int dx = 0;
1054 int dy = 0; 1063 int dy = 0;
1055 const int max_y = (src_height - 1) << 16; 1064 const int max_y = (src_height - 1) << 16;
1056 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, 1065 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1057 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1066 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1058 InterpolateRow_C; 1067 InterpolateRow_C;
1059 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, 1068 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
1060 int dst_width, int x, int dx) = 1069 int dst_width, int x, int dx) =
1061 filtering ? ScaleFilterCols_C : ScaleCols_C; 1070 filtering ? ScaleFilterCols_C : ScaleCols_C;
1062 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 1071 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1063 &x, &y, &dx, &dy); 1072 &x, &y, &dx, &dy);
1064 src_width = Abs(src_width); 1073 src_width = Abs(src_width);
1065 1074
1066 #if defined(HAS_INTERPOLATEROW_SSE2) 1075 #if defined(HAS_INTERPOLATEROW_SSE2)
1067 if (TestCpuFlag(kCpuHasSSE2)) { 1076 if (TestCpuFlag(kCpuHasSSE2)) {
1068 InterpolateRow = InterpolateRow_Any_SSE2; 1077 InterpolateRow = InterpolateRow_Any_SSE2;
1069 if (IS_ALIGNED(dst_width, 16)) { 1078 if (IS_ALIGNED(dst_width, 16)) {
1070 InterpolateRow = InterpolateRow_SSE2; 1079 InterpolateRow = InterpolateRow_SSE2;
1071 } 1080 }
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
1105 #endif 1114 #endif
1106 1115
1107 if (filtering && src_width >= 32768) { 1116 if (filtering && src_width >= 32768) {
1108 ScaleFilterCols = ScaleFilterCols64_C; 1117 ScaleFilterCols = ScaleFilterCols64_C;
1109 } 1118 }
1110 #if defined(HAS_SCALEFILTERCOLS_SSSE3) 1119 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1111 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { 1120 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1112 ScaleFilterCols = ScaleFilterCols_SSSE3; 1121 ScaleFilterCols = ScaleFilterCols_SSSE3;
1113 } 1122 }
1114 #endif 1123 #endif
1124 #if defined(HAS_SCALEFILTERCOLS_NEON)
1125 if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) {
1126 ScaleFilterCols = ScaleFilterCols_Any_NEON;
1127 if (IS_ALIGNED(dst_width, 8)) {
1128 ScaleFilterCols = ScaleFilterCols_NEON;
1129 }
1130 }
1131 #endif
1115 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { 1132 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1116 ScaleFilterCols = ScaleColsUp2_C; 1133 ScaleFilterCols = ScaleColsUp2_C;
1117 #if defined(HAS_SCALECOLS_SSE2) 1134 #if defined(HAS_SCALECOLS_SSE2)
1118 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 1135 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1119 ScaleFilterCols = ScaleColsUp2_SSE2; 1136 ScaleFilterCols = ScaleColsUp2_SSE2;
1120 } 1137 }
1121 #endif 1138 #endif
1122 } 1139 }
1123 1140
1124 if (y > max_y) { 1141 if (y > max_y) {
1125 y = max_y; 1142 y = max_y;
1126 } 1143 }
1127 { 1144 {
1128 int yi = y >> 16; 1145 int yi = y >> 16;
1129 const uint8* src = src_ptr + yi * src_stride; 1146 const uint8* src = src_ptr + yi * src_stride;
1130 1147
1131 // Allocate 2 row buffers. 1148 // Allocate 2 row buffers.
1132 const int kRowSize = (dst_width + 15) & ~15; 1149 const int kRowSize = (dst_width + 31) & ~31;
1133 align_buffer_64(row, kRowSize * 2); 1150 align_buffer_64(row, kRowSize * 2);
1134 1151
1135 uint8* rowptr = row; 1152 uint8* rowptr = row;
1136 int rowstride = kRowSize; 1153 int rowstride = kRowSize;
1137 int lasty = yi; 1154 int lasty = yi;
1138 1155
1139 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1156 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1140 if (src_height > 1) { 1157 if (src_height > 1) {
1141 src += src_stride; 1158 src += src_stride;
1142 } 1159 }
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
1181 // Initial source x/y coordinate and step values as 16.16 fixed point. 1198 // Initial source x/y coordinate and step values as 16.16 fixed point.
1182 int x = 0; 1199 int x = 0;
1183 int y = 0; 1200 int y = 0;
1184 int dx = 0; 1201 int dx = 0;
1185 int dy = 0; 1202 int dy = 0;
1186 const int max_y = (src_height - 1) << 16; 1203 const int max_y = (src_height - 1) << 16;
1187 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, 1204 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
1188 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = 1205 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1189 InterpolateRow_16_C; 1206 InterpolateRow_16_C;
1190 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, 1207 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
1191 int dst_width, int x, int dx) = 1208 int dst_width, int x, int dx) =
1192 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; 1209 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1193 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, 1210 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1194 &x, &y, &dx, &dy); 1211 &x, &y, &dx, &dy);
1195 src_width = Abs(src_width); 1212 src_width = Abs(src_width);
1196 1213
1197 #if defined(HAS_INTERPOLATEROW_16_SSE2) 1214 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1198 if (TestCpuFlag(kCpuHasSSE2)) { 1215 if (TestCpuFlag(kCpuHasSSE2)) {
1199 InterpolateRow = InterpolateRow_Any_16_SSE2; 1216 InterpolateRow = InterpolateRow_Any_16_SSE2;
1200 if (IS_ALIGNED(dst_width, 16)) { 1217 if (IS_ALIGNED(dst_width, 16)) {
1201 InterpolateRow = InterpolateRow_16_SSE2; 1218 InterpolateRow = InterpolateRow_16_SSE2;
1202 } 1219 }
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
1253 } 1270 }
1254 1271
1255 if (y > max_y) { 1272 if (y > max_y) {
1256 y = max_y; 1273 y = max_y;
1257 } 1274 }
1258 { 1275 {
1259 int yi = y >> 16; 1276 int yi = y >> 16;
1260 const uint16* src = src_ptr + yi * src_stride; 1277 const uint16* src = src_ptr + yi * src_stride;
1261 1278
1262 // Allocate 2 row buffers. 1279 // Allocate 2 row buffers.
1263 const int kRowSize = (dst_width + 15) & ~15; 1280 const int kRowSize = (dst_width + 31) & ~31;
1264 align_buffer_64(row, kRowSize * 4); 1281 align_buffer_64(row, kRowSize * 4);
1265 1282
1266 uint16* rowptr = (uint16*)row; 1283 uint16* rowptr = (uint16*)row;
1267 int rowstride = kRowSize; 1284 int rowstride = kRowSize;
1268 int lasty = yi; 1285 int lasty = yi;
1269 1286
1270 ScaleFilterCols(rowptr, src, dst_width, x, dx); 1287 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1271 if (src_height > 1) { 1288 if (src_height > 1) {
1272 src += src_stride; 1289 src += src_stride;
1273 } 1290 }
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
1327 if (src_width * 2 == dst_width && x < 0x8000) { 1344 if (src_width * 2 == dst_width && x < 0x8000) {
1328 ScaleCols = ScaleColsUp2_C; 1345 ScaleCols = ScaleColsUp2_C;
1329 #if defined(HAS_SCALECOLS_SSE2) 1346 #if defined(HAS_SCALECOLS_SSE2)
1330 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { 1347 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
1331 ScaleCols = ScaleColsUp2_SSE2; 1348 ScaleCols = ScaleColsUp2_SSE2;
1332 } 1349 }
1333 #endif 1350 #endif
1334 } 1351 }
1335 1352
1336 for (i = 0; i < dst_height; ++i) { 1353 for (i = 0; i < dst_height; ++i) {
1337 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, 1354 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx);
1338 dst_width, x, dx);
1339 dst_ptr += dst_stride; 1355 dst_ptr += dst_stride;
1340 y += dy; 1356 y += dy;
1341 } 1357 }
1342 } 1358 }
1343 1359
1344 static void ScalePlaneSimple_16(int src_width, int src_height, 1360 static void ScalePlaneSimple_16(int src_width, int src_height,
1345 int dst_width, int dst_height, 1361 int dst_width, int dst_height,
1346 int src_stride, int dst_stride, 1362 int src_stride, int dst_stride,
1347 const uint16* src_ptr, uint16* dst_ptr) { 1363 const uint16* src_ptr, uint16* dst_ptr) {
1348 int i; 1364 int i;
(...skipping 29 matching lines...) Expand all
1378 // This function dispatches to a specialized scaler based on scale factor. 1394 // This function dispatches to a specialized scaler based on scale factor.
1379 1395
1380 LIBYUV_API 1396 LIBYUV_API
1381 void ScalePlane(const uint8* src, int src_stride, 1397 void ScalePlane(const uint8* src, int src_stride,
1382 int src_width, int src_height, 1398 int src_width, int src_height,
1383 uint8* dst, int dst_stride, 1399 uint8* dst, int dst_stride,
1384 int dst_width, int dst_height, 1400 int dst_width, int dst_height,
1385 enum FilterMode filtering) { 1401 enum FilterMode filtering) {
1386 // Simplify filtering when possible. 1402 // Simplify filtering when possible.
1387 filtering = ScaleFilterReduce(src_width, src_height, 1403 filtering = ScaleFilterReduce(src_width, src_height,
1388 dst_width, dst_height, 1404 dst_width, dst_height, filtering);
1389 filtering);
1390 1405
1391 // Negative height means invert the image. 1406 // Negative height means invert the image.
1392 if (src_height < 0) { 1407 if (src_height < 0) {
1393 src_height = -src_height; 1408 src_height = -src_height;
1394 src = src + (src_height - 1) * src_stride; 1409 src = src + (src_height - 1) * src_stride;
1395 src_stride = -src_stride; 1410 src_stride = -src_stride;
1396 } 1411 }
1397 1412
1398 // Use specialized scales to improve performance for common resolutions. 1413 // Use specialized scales to improve performance for common resolutions.
1399 // For example, all the 1/2 scalings will use ScalePlaneDown2() 1414 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1400 if (dst_width == src_width && dst_height == src_height) { 1415 if (dst_width == src_width && dst_height == src_height) {
1401 // Straight copy. 1416 // Straight copy.
1402 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); 1417 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1403 return; 1418 return;
1404 } 1419 }
1405 if (dst_width == src_width) { 1420 if (dst_width == src_width && filtering != kFilterBox) {
1406 int dy = FixedDiv(src_height, dst_height); 1421 int dy = FixedDiv(src_height, dst_height);
1407 // Arbitrary scale vertically, but unscaled vertically. 1422 // Arbitrary scale vertically, but unscaled horizontally.
1408 ScalePlaneVertical(src_height, 1423 ScalePlaneVertical(src_height,
1409 dst_width, dst_height, 1424 dst_width, dst_height,
1410 src_stride, dst_stride, src, dst, 1425 src_stride, dst_stride, src, dst,
1411 0, 0, dy, 1, filtering); 1426 0, 0, dy, 1, filtering);
1412 return; 1427 return;
1413 } 1428 }
1414 if (dst_width <= Abs(src_width) && dst_height <= src_height) { 1429 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1415 // Scale down. 1430 // Scale down.
1416 if (4 * dst_width == 3 * src_width && 1431 if (4 * dst_width == 3 * src_width &&
1417 4 * dst_height == 3 * src_height) { 1432 4 * dst_height == 3 * src_height) {
(...skipping 10 matching lines...) Expand all
1428 } 1443 }
1429 // 3/8 rounded up for odd sized chroma height. 1444 // 3/8 rounded up for odd sized chroma height.
1430 if (8 * dst_width == 3 * src_width && 1445 if (8 * dst_width == 3 * src_width &&
1431 dst_height == ((src_height * 3 + 7) / 8)) { 1446 dst_height == ((src_height * 3 + 7) / 8)) {
1432 // optimized, 3/8 1447 // optimized, 3/8
1433 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, 1448 ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
1434 src_stride, dst_stride, src, dst, filtering); 1449 src_stride, dst_stride, src, dst, filtering);
1435 return; 1450 return;
1436 } 1451 }
1437 if (4 * dst_width == src_width && 4 * dst_height == src_height && 1452 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1438 filtering != kFilterBilinear) { 1453 (filtering == kFilterBox || filtering == kFilterNone)) {
1439 // optimized, 1/4 1454 // optimized, 1/4
1440 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, 1455 ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
1441 src_stride, dst_stride, src, dst, filtering); 1456 src_stride, dst_stride, src, dst, filtering);
1442 return; 1457 return;
1443 } 1458 }
1444 } 1459 }
1445 if (filtering == kFilterBox && dst_height * 2 < src_height) { 1460 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1446 ScalePlaneBox(src_width, src_height, dst_width, dst_height, 1461 ScalePlaneBox(src_width, src_height, dst_width, dst_height,
1447 src_stride, dst_stride, src, dst); 1462 src_stride, dst_stride, src, dst);
1448 return; 1463 return;
(...skipping 13 matching lines...) Expand all
1462 } 1477 }
1463 1478
1464 LIBYUV_API 1479 LIBYUV_API
1465 void ScalePlane_16(const uint16* src, int src_stride, 1480 void ScalePlane_16(const uint16* src, int src_stride,
1466 int src_width, int src_height, 1481 int src_width, int src_height,
1467 uint16* dst, int dst_stride, 1482 uint16* dst, int dst_stride,
1468 int dst_width, int dst_height, 1483 int dst_width, int dst_height,
1469 enum FilterMode filtering) { 1484 enum FilterMode filtering) {
1470 // Simplify filtering when possible. 1485 // Simplify filtering when possible.
1471 filtering = ScaleFilterReduce(src_width, src_height, 1486 filtering = ScaleFilterReduce(src_width, src_height,
1472 dst_width, dst_height, 1487 dst_width, dst_height, filtering);
1473 filtering);
1474 1488
1475 // Negative height means invert the image. 1489 // Negative height means invert the image.
1476 if (src_height < 0) { 1490 if (src_height < 0) {
1477 src_height = -src_height; 1491 src_height = -src_height;
1478 src = src + (src_height - 1) * src_stride; 1492 src = src + (src_height - 1) * src_stride;
1479 src_stride = -src_stride; 1493 src_stride = -src_stride;
1480 } 1494 }
1481 1495
1482 // Use specialized scales to improve performance for common resolutions. 1496 // Use specialized scales to improve performance for common resolutions.
1483 // For example, all the 1/2 scalings will use ScalePlaneDown2() 1497 // For example, all the 1/2 scalings will use ScalePlaneDown2()
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
1556 uint8* dst_y, int dst_stride_y, 1570 uint8* dst_y, int dst_stride_y,
1557 uint8* dst_u, int dst_stride_u, 1571 uint8* dst_u, int dst_stride_u,
1558 uint8* dst_v, int dst_stride_v, 1572 uint8* dst_v, int dst_stride_v,
1559 int dst_width, int dst_height, 1573 int dst_width, int dst_height,
1560 enum FilterMode filtering) { 1574 enum FilterMode filtering) {
1561 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1575 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1562 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1576 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1563 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1577 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1564 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1578 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1565 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || 1579 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1580 src_width > 32768 || src_height > 32768 ||
1566 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { 1581 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1567 return -1; 1582 return -1;
1568 } 1583 }
1569 1584
1570 ScalePlane(src_y, src_stride_y, src_width, src_height, 1585 ScalePlane(src_y, src_stride_y, src_width, src_height,
1571 dst_y, dst_stride_y, dst_width, dst_height, 1586 dst_y, dst_stride_y, dst_width, dst_height,
1572 filtering); 1587 filtering);
1573 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, 1588 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
1574 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, 1589 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1575 filtering); 1590 filtering);
(...skipping 11 matching lines...) Expand all
1587 uint16* dst_y, int dst_stride_y, 1602 uint16* dst_y, int dst_stride_y,
1588 uint16* dst_u, int dst_stride_u, 1603 uint16* dst_u, int dst_stride_u,
1589 uint16* dst_v, int dst_stride_v, 1604 uint16* dst_v, int dst_stride_v,
1590 int dst_width, int dst_height, 1605 int dst_width, int dst_height,
1591 enum FilterMode filtering) { 1606 enum FilterMode filtering) {
1592 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); 1607 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1593 int src_halfheight = SUBSAMPLE(src_height, 1, 1); 1608 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1594 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); 1609 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1595 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); 1610 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1596 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || 1611 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1612 src_width > 32768 || src_height > 32768 ||
1597 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { 1613 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1598 return -1; 1614 return -1;
1599 } 1615 }
1600 1616
1601 ScalePlane_16(src_y, src_stride_y, src_width, src_height, 1617 ScalePlane_16(src_y, src_stride_y, src_width, src_height,
1602 dst_y, dst_stride_y, dst_width, dst_height, 1618 dst_y, dst_stride_y, dst_width, dst_height,
1603 filtering); 1619 filtering);
1604 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, 1620 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
1605 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, 1621 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1606 filtering); 1622 filtering);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
1664 dst_u, dst_halfwidth, 1680 dst_u, dst_halfwidth,
1665 dst_v, dst_halfwidth, 1681 dst_v, dst_halfwidth,
1666 dst_width, aheight, 1682 dst_width, aheight,
1667 interpolate ? kFilterBox : kFilterNone); 1683 interpolate ? kFilterBox : kFilterNone);
1668 } 1684 }
1669 1685
1670 #ifdef __cplusplus 1686 #ifdef __cplusplus
1671 } // extern "C" 1687 } // extern "C"
1672 } // namespace libyuv 1688 } // namespace libyuv
1673 #endif 1689 #endif
OLDNEW
« no previous file with comments | « source/libvpx/third_party/libyuv/source/row_win.cc ('k') | source/libvpx/third_party/libyuv/source/scale_any.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698