Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(341)

Side by Side Diff: source/libvpx/third_party/libyuv/source/scale.cc

Issue 341293003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "third_party/libyuv/include/libyuv/scale.h"
12
13 #include <assert.h>
14 #include <string.h>
15
16 #include "third_party/libyuv/include/libyuv/cpu_id.h"
17 #include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyPlane
18 #include "third_party/libyuv/include/libyuv/row.h"
19 #include "third_party/libyuv/include/libyuv/scale_row.h"
20
21 #ifdef __cplusplus
22 namespace libyuv {
23 extern "C" {
24 #endif
25
26 // Remove this macro if OVERREAD is safe.
27 #define AVOID_OVERREAD 1
28
29 static __inline int Abs(int v) {
30 return v >= 0 ? v : -v;
31 }
32
33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s)
34
35 // Scale plane, 1/2
36 // This is an optimized version for scaling down a plane to 1/2 of
37 // its original size.
38
39 static void ScalePlaneDown2(int src_width, int src_height,
40 int dst_width, int dst_height,
41 int src_stride, int dst_stride,
42 const uint8* src_ptr, uint8* dst_ptr,
43 enum FilterMode filtering) {
44 int y;
45 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride,
46 uint8* dst_ptr, int dst_width) =
47 filtering == kFilterNone ? ScaleRowDown2_C :
48 (filtering == kFilterLinear ? ScaleRowDown2Linear_C :
49 ScaleRowDown2Box_C);
50 int row_stride = src_stride << 1;
51 if (!filtering) {
52 src_ptr += src_stride; // Point to odd rows.
53 src_stride = 0;
54 }
55
56 #if defined(HAS_SCALEROWDOWN2_NEON)
57 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
58 ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON;
59 }
60 #elif defined(HAS_SCALEROWDOWN2_SSE2)
61 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
62 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 :
63 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 :
64 ScaleRowDown2Box_Unaligned_SSE2);
65 if (IS_ALIGNED(src_ptr, 16) &&
66 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
67 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 :
69 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 :
70 ScaleRowDown2Box_SSE2);
71 }
72 }
73 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2)
74 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
75 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
76 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
77 ScaleRowDown2 = filtering ?
78 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2;
79 }
80 #endif
81
82 if (filtering == kFilterLinear) {
83 src_stride = 0;
84 }
85 // TODO(fbarchard): Loop through source height to allow odd height.
86 for (y = 0; y < dst_height; ++y) {
87 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
88 src_ptr += row_stride;
89 dst_ptr += dst_stride;
90 }
91 }
92
93 static void ScalePlaneDown2_16(int src_width, int src_height,
94 int dst_width, int dst_height,
95 int src_stride, int dst_stride,
96 const uint16* src_ptr, uint16* dst_ptr,
97 enum FilterMode filtering) {
98 int y;
99 void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride,
100 uint16* dst_ptr, int dst_width) =
101 filtering == kFilterNone ? ScaleRowDown2_16_C :
102 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C :
103 ScaleRowDown2Box_16_C);
104 int row_stride = src_stride << 1;
105 if (!filtering) {
106 src_ptr += src_stride; // Point to odd rows.
107 src_stride = 0;
108 }
109
110 #if defined(HAS_SCALEROWDOWN2_16_NEON)
111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) {
112 ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON :
113 ScaleRowDown2_16_NEON;
114 }
115 #elif defined(HAS_SCALEROWDOWN2_16_SSE2)
116 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) {
117 ScaleRowDown2 = filtering == kFilterNone ?
118 ScaleRowDown2_Unaligned_16_SSE2 :
119 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 :
120 ScaleRowDown2Box_Unaligned_16_SSE2);
121 if (IS_ALIGNED(src_ptr, 16) &&
122 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) &&
123 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
124 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 :
125 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 :
126 ScaleRowDown2Box_16_SSE2);
127 }
128 }
129 #elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2)
130 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) &&
131 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) &&
132 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
133 ScaleRowDown2 = filtering ?
134 ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2;
135 }
136 #endif
137
138 if (filtering == kFilterLinear) {
139 src_stride = 0;
140 }
141 // TODO(fbarchard): Loop through source height to allow odd height.
142 for (y = 0; y < dst_height; ++y) {
143 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width);
144 src_ptr += row_stride;
145 dst_ptr += dst_stride;
146 }
147 }
148
149 // Scale plane, 1/4
150 // This is an optimized version for scaling down a plane to 1/4 of
151 // its original size.
152
153 static void ScalePlaneDown4(int src_width, int src_height,
154 int dst_width, int dst_height,
155 int src_stride, int dst_stride,
156 const uint8* src_ptr, uint8* dst_ptr,
157 enum FilterMode filtering) {
158 int y;
159 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride,
160 uint8* dst_ptr, int dst_width) =
161 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C;
162 int row_stride = src_stride << 2;
163 if (!filtering) {
164 src_ptr += src_stride * 2; // Point to row 2.
165 src_stride = 0;
166 }
167 #if defined(HAS_SCALEROWDOWN4_NEON)
168 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
169 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON;
170 }
171 #elif defined(HAS_SCALEROWDOWN4_SSE2)
172 if (TestCpuFlag(kCpuHasSSE2) &&
173 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
174 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
175 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2;
176 }
177 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2)
178 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
179 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
180 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
181 ScaleRowDown4 = filtering ?
182 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2;
183 }
184 #endif
185
186 if (filtering == kFilterLinear) {
187 src_stride = 0;
188 }
189 for (y = 0; y < dst_height; ++y) {
190 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
191 src_ptr += row_stride;
192 dst_ptr += dst_stride;
193 }
194 }
195
196 static void ScalePlaneDown4_16(int src_width, int src_height,
197 int dst_width, int dst_height,
198 int src_stride, int dst_stride,
199 const uint16* src_ptr, uint16* dst_ptr,
200 enum FilterMode filtering) {
201 int y;
202 void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride,
203 uint16* dst_ptr, int dst_width) =
204 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C;
205 int row_stride = src_stride << 2;
206 if (!filtering) {
207 src_ptr += src_stride * 2; // Point to row 2.
208 src_stride = 0;
209 }
210 #if defined(HAS_SCALEROWDOWN4_16_NEON)
211 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) {
212 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON :
213 ScaleRowDown4_16_NEON;
214 }
215 #elif defined(HAS_SCALEROWDOWN4_16_SSE2)
216 if (TestCpuFlag(kCpuHasSSE2) &&
217 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) &&
218 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
219 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 :
220 ScaleRowDown4_16_SSE2;
221 }
222 #elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2)
223 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) &&
224 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
225 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
226 ScaleRowDown4 = filtering ?
227 ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2;
228 }
229 #endif
230
231 if (filtering == kFilterLinear) {
232 src_stride = 0;
233 }
234 for (y = 0; y < dst_height; ++y) {
235 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width);
236 src_ptr += row_stride;
237 dst_ptr += dst_stride;
238 }
239 }
240
241 // Scale plane down, 3/4
242
243 static void ScalePlaneDown34(int src_width, int src_height,
244 int dst_width, int dst_height,
245 int src_stride, int dst_stride,
246 const uint8* src_ptr, uint8* dst_ptr,
247 enum FilterMode filtering) {
248 int y;
249 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride,
250 uint8* dst_ptr, int dst_width);
251 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride,
252 uint8* dst_ptr, int dst_width);
253 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
254 assert(dst_width % 3 == 0);
255 if (!filtering) {
256 ScaleRowDown34_0 = ScaleRowDown34_C;
257 ScaleRowDown34_1 = ScaleRowDown34_C;
258 } else {
259 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C;
260 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C;
261 }
262 #if defined(HAS_SCALEROWDOWN34_NEON)
263 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
264 if (!filtering) {
265 ScaleRowDown34_0 = ScaleRowDown34_NEON;
266 ScaleRowDown34_1 = ScaleRowDown34_NEON;
267 } else {
268 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON;
269 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON;
270 }
271 }
272 #endif
273 #if defined(HAS_SCALEROWDOWN34_SSSE3)
274 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
275 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
276 if (!filtering) {
277 ScaleRowDown34_0 = ScaleRowDown34_SSSE3;
278 ScaleRowDown34_1 = ScaleRowDown34_SSSE3;
279 } else {
280 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3;
281 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3;
282 }
283 }
284 #endif
285 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2)
286 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
287 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
288 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
289 if (!filtering) {
290 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2;
291 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2;
292 } else {
293 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2;
294 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2;
295 }
296 }
297 #endif
298
299 for (y = 0; y < dst_height - 2; y += 3) {
300 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
301 src_ptr += src_stride;
302 dst_ptr += dst_stride;
303 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
304 src_ptr += src_stride;
305 dst_ptr += dst_stride;
306 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
307 dst_ptr, dst_width);
308 src_ptr += src_stride * 2;
309 dst_ptr += dst_stride;
310 }
311
312 // Remainder 1 or 2 rows with last row vertically unfiltered
313 if ((dst_height % 3) == 2) {
314 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
315 src_ptr += src_stride;
316 dst_ptr += dst_stride;
317 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
318 } else if ((dst_height % 3) == 1) {
319 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
320 }
321 }
322
323 static void ScalePlaneDown34_16(int src_width, int src_height,
324 int dst_width, int dst_height,
325 int src_stride, int dst_stride,
326 const uint16* src_ptr, uint16* dst_ptr,
327 enum FilterMode filtering) {
328 int y;
329 void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride,
330 uint16* dst_ptr, int dst_width);
331 void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride,
332 uint16* dst_ptr, int dst_width);
333 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
334 assert(dst_width % 3 == 0);
335 if (!filtering) {
336 ScaleRowDown34_0 = ScaleRowDown34_16_C;
337 ScaleRowDown34_1 = ScaleRowDown34_16_C;
338 } else {
339 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C;
340 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C;
341 }
342 #if defined(HAS_SCALEROWDOWN34_16_NEON)
343 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) {
344 if (!filtering) {
345 ScaleRowDown34_0 = ScaleRowDown34_16_NEON;
346 ScaleRowDown34_1 = ScaleRowDown34_16_NEON;
347 } else {
348 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON;
349 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON;
350 }
351 }
352 #endif
353 #if defined(HAS_SCALEROWDOWN34_16_SSSE3)
354 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
355 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
356 if (!filtering) {
357 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3;
358 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3;
359 } else {
360 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3;
361 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3;
362 }
363 }
364 #endif
365 #if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2)
366 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) &&
367 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
368 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
369 if (!filtering) {
370 ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2;
371 ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2;
372 } else {
373 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2;
374 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2;
375 }
376 }
377 #endif
378
379 for (y = 0; y < dst_height - 2; y += 3) {
380 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
381 src_ptr += src_stride;
382 dst_ptr += dst_stride;
383 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width);
384 src_ptr += src_stride;
385 dst_ptr += dst_stride;
386 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride,
387 dst_ptr, dst_width);
388 src_ptr += src_stride * 2;
389 dst_ptr += dst_stride;
390 }
391
392 // Remainder 1 or 2 rows with last row vertically unfiltered
393 if ((dst_height % 3) == 2) {
394 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width);
395 src_ptr += src_stride;
396 dst_ptr += dst_stride;
397 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width);
398 } else if ((dst_height % 3) == 1) {
399 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width);
400 }
401 }
402
403
404 // Scale plane, 3/8
405 // This is an optimized version for scaling down a plane to 3/8
406 // of its original size.
407 //
408 // Uses box filter arranges like this
409 // aaabbbcc -> abc
410 // aaabbbcc def
411 // aaabbbcc ghi
412 // dddeeeff
413 // dddeeeff
414 // dddeeeff
415 // ggghhhii
416 // ggghhhii
417 // Boxes are 3x3, 2x3, 3x2 and 2x2
418
419 static void ScalePlaneDown38(int src_width, int src_height,
420 int dst_width, int dst_height,
421 int src_stride, int dst_stride,
422 const uint8* src_ptr, uint8* dst_ptr,
423 enum FilterMode filtering) {
424 int y;
425 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride,
426 uint8* dst_ptr, int dst_width);
427 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride,
428 uint8* dst_ptr, int dst_width);
429 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
430 assert(dst_width % 3 == 0);
431 if (!filtering) {
432 ScaleRowDown38_3 = ScaleRowDown38_C;
433 ScaleRowDown38_2 = ScaleRowDown38_C;
434 } else {
435 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C;
436 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C;
437 }
438 #if defined(HAS_SCALEROWDOWN38_NEON)
439 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
440 if (!filtering) {
441 ScaleRowDown38_3 = ScaleRowDown38_NEON;
442 ScaleRowDown38_2 = ScaleRowDown38_NEON;
443 } else {
444 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON;
445 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON;
446 }
447 }
448 #elif defined(HAS_SCALEROWDOWN38_SSSE3)
449 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
450 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
451 if (!filtering) {
452 ScaleRowDown38_3 = ScaleRowDown38_SSSE3;
453 ScaleRowDown38_2 = ScaleRowDown38_SSSE3;
454 } else {
455 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3;
456 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3;
457 }
458 }
459 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2)
460 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
461 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
462 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
463 if (!filtering) {
464 ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2;
465 ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2;
466 } else {
467 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2;
468 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2;
469 }
470 }
471 #endif
472
473 for (y = 0; y < dst_height - 2; y += 3) {
474 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
475 src_ptr += src_stride * 3;
476 dst_ptr += dst_stride;
477 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
478 src_ptr += src_stride * 3;
479 dst_ptr += dst_stride;
480 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
481 src_ptr += src_stride * 2;
482 dst_ptr += dst_stride;
483 }
484
485 // Remainder 1 or 2 rows with last row vertically unfiltered
486 if ((dst_height % 3) == 2) {
487 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
488 src_ptr += src_stride * 3;
489 dst_ptr += dst_stride;
490 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
491 } else if ((dst_height % 3) == 1) {
492 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
493 }
494 }
495
496 static void ScalePlaneDown38_16(int src_width, int src_height,
497 int dst_width, int dst_height,
498 int src_stride, int dst_stride,
499 const uint16* src_ptr, uint16* dst_ptr,
500 enum FilterMode filtering) {
501 int y;
502 void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride,
503 uint16* dst_ptr, int dst_width);
504 void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride,
505 uint16* dst_ptr, int dst_width);
506 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride;
507 assert(dst_width % 3 == 0);
508 if (!filtering) {
509 ScaleRowDown38_3 = ScaleRowDown38_16_C;
510 ScaleRowDown38_2 = ScaleRowDown38_16_C;
511 } else {
512 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C;
513 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C;
514 }
515 #if defined(HAS_SCALEROWDOWN38_16_NEON)
516 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) {
517 if (!filtering) {
518 ScaleRowDown38_3 = ScaleRowDown38_16_NEON;
519 ScaleRowDown38_2 = ScaleRowDown38_16_NEON;
520 } else {
521 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON;
522 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON;
523 }
524 }
525 #elif defined(HAS_SCALEROWDOWN38_16_SSSE3)
526 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) &&
527 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
528 if (!filtering) {
529 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3;
530 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3;
531 } else {
532 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3;
533 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3;
534 }
535 }
536 #elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2)
537 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) &&
538 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) &&
539 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) {
540 if (!filtering) {
541 ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2;
542 ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2;
543 } else {
544 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2;
545 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2;
546 }
547 }
548 #endif
549
550 for (y = 0; y < dst_height - 2; y += 3) {
551 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
552 src_ptr += src_stride * 3;
553 dst_ptr += dst_stride;
554 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
555 src_ptr += src_stride * 3;
556 dst_ptr += dst_stride;
557 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width);
558 src_ptr += src_stride * 2;
559 dst_ptr += dst_stride;
560 }
561
562 // Remainder 1 or 2 rows with last row vertically unfiltered
563 if ((dst_height % 3) == 2) {
564 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width);
565 src_ptr += src_stride * 3;
566 dst_ptr += dst_stride;
567 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
568 } else if ((dst_height % 3) == 1) {
569 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width);
570 }
571 }
572
573 static __inline uint32 SumBox(int iboxwidth, int iboxheight,
574 ptrdiff_t src_stride, const uint8* src_ptr) {
575 uint32 sum = 0u;
576 int y;
577 assert(iboxwidth > 0);
578 assert(iboxheight > 0);
579 for (y = 0; y < iboxheight; ++y) {
580 int x;
581 for (x = 0; x < iboxwidth; ++x) {
582 sum += src_ptr[x];
583 }
584 src_ptr += src_stride;
585 }
586 return sum;
587 }
588
589 static __inline uint32 SumBox_16(int iboxwidth, int iboxheight,
590 ptrdiff_t src_stride, const uint16* src_ptr) {
591 uint32 sum = 0u;
592 int y;
593 assert(iboxwidth > 0);
594 assert(iboxheight > 0);
595 for (y = 0; y < iboxheight; ++y) {
596 int x;
597 for (x = 0; x < iboxwidth; ++x) {
598 sum += src_ptr[x];
599 }
600 src_ptr += src_stride;
601 }
602 return sum;
603 }
604
605 static void ScalePlaneBoxRow_C(int dst_width, int boxheight,
606 int x, int dx, ptrdiff_t src_stride,
607 const uint8* src_ptr, uint8* dst_ptr) {
608 int i;
609 int boxwidth;
610 for (i = 0; i < dst_width; ++i) {
611 int ix = x >> 16;
612 x += dx;
613 boxwidth = (x >> 16) - ix;
614 *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) /
615 (boxwidth * boxheight);
616 }
617 }
618
619 static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight,
620 int x, int dx, ptrdiff_t src_stride,
621 const uint16* src_ptr, uint16* dst_ptr) {
622 int i;
623 int boxwidth;
624 for (i = 0; i < dst_width; ++i) {
625 int ix = x >> 16;
626 x += dx;
627 boxwidth = (x >> 16) - ix;
628 *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) /
629 (boxwidth * boxheight);
630 }
631 }
632
633 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) {
634 uint32 sum = 0u;
635 int x;
636 assert(iboxwidth > 0);
637 for (x = 0; x < iboxwidth; ++x) {
638 sum += src_ptr[x];
639 }
640 return sum;
641 }
642
643 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) {
644 uint32 sum = 0u;
645 int x;
646 assert(iboxwidth > 0);
647 for (x = 0; x < iboxwidth; ++x) {
648 sum += src_ptr[x];
649 }
650 return sum;
651 }
652
653 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx,
654 const uint16* src_ptr, uint8* dst_ptr) {
655 int i;
656 int scaletbl[2];
657 int minboxwidth = (dx >> 16);
658 int* scaleptr = scaletbl - minboxwidth;
659 int boxwidth;
660 scaletbl[0] = 65536 / (minboxwidth * boxheight);
661 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
662 for (i = 0; i < dst_width; ++i) {
663 int ix = x >> 16;
664 x += dx;
665 boxwidth = (x >> 16) - ix;
666 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16;
667 }
668 }
669
670 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx,
671 const uint32* src_ptr, uint16* dst_ptr) {
672 int i;
673 int scaletbl[2];
674 int minboxwidth = (dx >> 16);
675 int* scaleptr = scaletbl - minboxwidth;
676 int boxwidth;
677 scaletbl[0] = 65536 / (minboxwidth * boxheight);
678 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight);
679 for (i = 0; i < dst_width; ++i) {
680 int ix = x >> 16;
681 x += dx;
682 boxwidth = (x >> 16) - ix;
683 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) *
684 scaleptr[boxwidth] >> 16;
685 }
686 }
687
688 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx,
689 const uint16* src_ptr, uint8* dst_ptr) {
690 int boxwidth = (dx >> 16);
691 int scaleval = 65536 / (boxwidth * boxheight);
692 int i;
693 for (i = 0; i < dst_width; ++i) {
694 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16;
695 x += boxwidth;
696 }
697 }
698
699 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx,
700 const uint32* src_ptr, uint16* dst_ptr) {
701 int boxwidth = (dx >> 16);
702 int scaleval = 65536 / (boxwidth * boxheight);
703 int i;
704 for (i = 0; i < dst_width; ++i) {
705 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16;
706 x += boxwidth;
707 }
708 }
709
710 // Scale plane down to any dimensions, with interpolation.
711 // (boxfilter).
712 //
713 // Same method as SimpleScale, which is fixed point, outputting
714 // one pixel of destination using fixed point (16.16) to step
715 // through source, sampling a box of pixel with simple
716 // averaging.
717 static void ScalePlaneBox(int src_width, int src_height,
718 int dst_width, int dst_height,
719 int src_stride, int dst_stride,
720 const uint8* src_ptr, uint8* dst_ptr) {
721 int j;
722 // Initial source x/y coordinate and step values as 16.16 fixed point.
723 int x = 0;
724 int y = 0;
725 int dx = 0;
726 int dy = 0;
727 const int max_y = (src_height << 16);
728 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
729 &x, &y, &dx, &dy);
730 src_width = Abs(src_width);
731 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
732 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
733 uint8* dst = dst_ptr;
734 int j;
735 for (j = 0; j < dst_height; ++j) {
736 int boxheight;
737 int iy = y >> 16;
738 const uint8* src = src_ptr + iy * src_stride;
739 y += dy;
740 if (y > max_y) {
741 y = max_y;
742 }
743 boxheight = (y >> 16) - iy;
744 ScalePlaneBoxRow_C(dst_width, boxheight,
745 x, dx, src_stride,
746 src, dst);
747 dst += dst_stride;
748 }
749 return;
750 }
751 {
752 // Allocate a row buffer of uint16.
753 align_buffer_64(row16, src_width * 2);
754 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
755 const uint16* src_ptr, uint8* dst_ptr) =
756 (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C;
757 void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride,
758 uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C;
759
760 #if defined(HAS_SCALEADDROWS_SSE2)
761 if (TestCpuFlag(kCpuHasSSE2) &&
762 #ifdef AVOID_OVERREAD
763 IS_ALIGNED(src_width, 16) &&
764 #endif
765 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
766 ScaleAddRows = ScaleAddRows_SSE2;
767 }
768 #endif
769
770 for (j = 0; j < dst_height; ++j) {
771 int boxheight;
772 int iy = y >> 16;
773 const uint8* src = src_ptr + iy * src_stride;
774 y += dy;
775 if (y > (src_height << 16)) {
776 y = (src_height << 16);
777 }
778 boxheight = (y >> 16) - iy;
779 ScaleAddRows(src, src_stride, (uint16*)(row16),
780 src_width, boxheight);
781 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16),
782 dst_ptr);
783 dst_ptr += dst_stride;
784 }
785 free_aligned_buffer_64(row16);
786 }
787 }
788
789 static void ScalePlaneBox_16(int src_width, int src_height,
790 int dst_width, int dst_height,
791 int src_stride, int dst_stride,
792 const uint16* src_ptr, uint16* dst_ptr) {
793 int j;
794 // Initial source x/y coordinate and step values as 16.16 fixed point.
795 int x = 0;
796 int y = 0;
797 int dx = 0;
798 int dy = 0;
799 const int max_y = (src_height << 16);
800 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox,
801 &x, &y, &dx, &dy);
802 src_width = Abs(src_width);
803 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1.
804 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) {
805 uint16* dst = dst_ptr;
806 int j;
807 for (j = 0; j < dst_height; ++j) {
808 int boxheight;
809 int iy = y >> 16;
810 const uint16* src = src_ptr + iy * src_stride;
811 y += dy;
812 if (y > max_y) {
813 y = max_y;
814 }
815 boxheight = (y >> 16) - iy;
816 ScalePlaneBoxRow_16_C(dst_width, boxheight,
817 x, dx, src_stride,
818 src, dst);
819 dst += dst_stride;
820 }
821 return;
822 }
823 {
824 // Allocate a row buffer of uint32.
825 align_buffer_64(row32, src_width * 4);
826 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx,
827 const uint32* src_ptr, uint16* dst_ptr) =
828 (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C;
829 void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride,
830 uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C;
831
832 #if defined(HAS_SCALEADDROWS_16_SSE2)
833 if (TestCpuFlag(kCpuHasSSE2) &&
834 #ifdef AVOID_OVERREAD
835 IS_ALIGNED(src_width, 16) &&
836 #endif
837 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
838 ScaleAddRows = ScaleAddRows_16_SSE2;
839 }
840 #endif
841
842 for (j = 0; j < dst_height; ++j) {
843 int boxheight;
844 int iy = y >> 16;
845 const uint16* src = src_ptr + iy * src_stride;
846 y += dy;
847 if (y > (src_height << 16)) {
848 y = (src_height << 16);
849 }
850 boxheight = (y >> 16) - iy;
851 ScaleAddRows(src, src_stride, (uint32*)(row32),
852 src_width, boxheight);
853 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32),
854 dst_ptr);
855 dst_ptr += dst_stride;
856 }
857 free_aligned_buffer_64(row32);
858 }
859 }
860
861 // Scale plane down with bilinear interpolation.
862 void ScalePlaneBilinearDown(int src_width, int src_height,
863 int dst_width, int dst_height,
864 int src_stride, int dst_stride,
865 const uint8* src_ptr, uint8* dst_ptr,
866 enum FilterMode filtering) {
867 // Initial source x/y coordinate and step values as 16.16 fixed point.
868 int x = 0;
869 int y = 0;
870 int dx = 0;
871 int dy = 0;
872 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
873 // Allocate a row buffer.
874 align_buffer_64(row, src_width);
875
876 const int max_y = (src_height - 1) << 16;
877 int j;
878 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
879 int dst_width, int x, int dx) =
880 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C;
881 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
882 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
883 InterpolateRow_C;
884 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
885 &x, &y, &dx, &dy);
886 src_width = Abs(src_width);
887
888 #if defined(HAS_INTERPOLATEROW_SSE2)
889 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
890 InterpolateRow = InterpolateRow_Any_SSE2;
891 if (IS_ALIGNED(src_width, 16)) {
892 InterpolateRow = InterpolateRow_Unaligned_SSE2;
893 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
894 InterpolateRow = InterpolateRow_SSE2;
895 }
896 }
897 }
898 #endif
899 #if defined(HAS_INTERPOLATEROW_SSSE3)
900 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
901 InterpolateRow = InterpolateRow_Any_SSSE3;
902 if (IS_ALIGNED(src_width, 16)) {
903 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
904 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
905 InterpolateRow = InterpolateRow_SSSE3;
906 }
907 }
908 }
909 #endif
910 #if defined(HAS_INTERPOLATEROW_AVX2)
911 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
912 InterpolateRow = InterpolateRow_Any_AVX2;
913 if (IS_ALIGNED(src_width, 32)) {
914 InterpolateRow = InterpolateRow_AVX2;
915 }
916 }
917 #endif
918 #if defined(HAS_INTERPOLATEROW_NEON)
919 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
920 InterpolateRow = InterpolateRow_Any_NEON;
921 if (IS_ALIGNED(src_width, 16)) {
922 InterpolateRow = InterpolateRow_NEON;
923 }
924 }
925 #endif
926 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
927 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
928 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
929 if (IS_ALIGNED(src_width, 4)) {
930 InterpolateRow = InterpolateRow_MIPS_DSPR2;
931 }
932 }
933 #endif
934
935
936 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
937 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
938 ScaleFilterCols = ScaleFilterCols_SSSE3;
939 }
940 #endif
941 if (y > max_y) {
942 y = max_y;
943 }
944
945 for (j = 0; j < dst_height; ++j) {
946 int yi = y >> 16;
947 const uint8* src = src_ptr + yi * src_stride;
948 if (filtering == kFilterLinear) {
949 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
950 } else {
951 int yf = (y >> 8) & 255;
952 InterpolateRow(row, src, src_stride, src_width, yf);
953 ScaleFilterCols(dst_ptr, row, dst_width, x, dx);
954 }
955 dst_ptr += dst_stride;
956 y += dy;
957 if (y > max_y) {
958 y = max_y;
959 }
960 }
961 free_aligned_buffer_64(row);
962 }
963
964 void ScalePlaneBilinearDown_16(int src_width, int src_height,
965 int dst_width, int dst_height,
966 int src_stride, int dst_stride,
967 const uint16* src_ptr, uint16* dst_ptr,
968 enum FilterMode filtering) {
969 // Initial source x/y coordinate and step values as 16.16 fixed point.
970 int x = 0;
971 int y = 0;
972 int dx = 0;
973 int dy = 0;
974 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
975 // Allocate a row buffer.
976 align_buffer_64(row, src_width * 2);
977
978 const int max_y = (src_height - 1) << 16;
979 int j;
980 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
981 int dst_width, int x, int dx) =
982 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C;
983 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
984 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
985 InterpolateRow_16_C;
986 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
987 &x, &y, &dx, &dy);
988 src_width = Abs(src_width);
989
990 #if defined(HAS_INTERPOLATEROW_16_SSE2)
991 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) {
992 InterpolateRow = InterpolateRow_Any_16_SSE2;
993 if (IS_ALIGNED(src_width, 16)) {
994 InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
995 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
996 InterpolateRow = InterpolateRow_16_SSE2;
997 }
998 }
999 }
1000 #endif
1001 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1002 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) {
1003 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1004 if (IS_ALIGNED(src_width, 16)) {
1005 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
1006 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) {
1007 InterpolateRow = InterpolateRow_16_SSSE3;
1008 }
1009 }
1010 }
1011 #endif
1012 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1013 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) {
1014 InterpolateRow = InterpolateRow_Any_16_AVX2;
1015 if (IS_ALIGNED(src_width, 32)) {
1016 InterpolateRow = InterpolateRow_16_AVX2;
1017 }
1018 }
1019 #endif
1020 #if defined(HAS_INTERPOLATEROW_16_NEON)
1021 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) {
1022 InterpolateRow = InterpolateRow_Any_16_NEON;
1023 if (IS_ALIGNED(src_width, 16)) {
1024 InterpolateRow = InterpolateRow_16_NEON;
1025 }
1026 }
1027 #endif
1028 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1029 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) {
1030 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1031 if (IS_ALIGNED(src_width, 4)) {
1032 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1033 }
1034 }
1035 #endif
1036
1037
1038 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1039 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1040 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1041 }
1042 #endif
1043 if (y > max_y) {
1044 y = max_y;
1045 }
1046
1047 for (j = 0; j < dst_height; ++j) {
1048 int yi = y >> 16;
1049 const uint16* src = src_ptr + yi * src_stride;
1050 if (filtering == kFilterLinear) {
1051 ScaleFilterCols(dst_ptr, src, dst_width, x, dx);
1052 } else {
1053 int yf = (y >> 8) & 255;
1054 InterpolateRow((uint16*)row, src, src_stride, src_width, yf);
1055 ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx);
1056 }
1057 dst_ptr += dst_stride;
1058 y += dy;
1059 if (y > max_y) {
1060 y = max_y;
1061 }
1062 }
1063 free_aligned_buffer_64(row);
1064 }
1065
1066 // Scale up down with bilinear interpolation.
1067 void ScalePlaneBilinearUp(int src_width, int src_height,
1068 int dst_width, int dst_height,
1069 int src_stride, int dst_stride,
1070 const uint8* src_ptr, uint8* dst_ptr,
1071 enum FilterMode filtering) {
1072 int j;
1073 // Initial source x/y coordinate and step values as 16.16 fixed point.
1074 int x = 0;
1075 int y = 0;
1076 int dx = 0;
1077 int dy = 0;
1078 const int max_y = (src_height - 1) << 16;
1079 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr,
1080 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1081 InterpolateRow_C;
1082 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr,
1083 int dst_width, int x, int dx) =
1084 filtering ? ScaleFilterCols_C : ScaleCols_C;
1085 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1086 &x, &y, &dx, &dy);
1087 src_width = Abs(src_width);
1088
1089 #if defined(HAS_INTERPOLATEROW_SSE2)
1090 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
1091 InterpolateRow = InterpolateRow_Any_SSE2;
1092 if (IS_ALIGNED(dst_width, 16)) {
1093 InterpolateRow = InterpolateRow_Unaligned_SSE2;
1094 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1095 InterpolateRow = InterpolateRow_SSE2;
1096 }
1097 }
1098 }
1099 #endif
1100 #if defined(HAS_INTERPOLATEROW_SSSE3)
1101 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
1102 InterpolateRow = InterpolateRow_Any_SSSE3;
1103 if (IS_ALIGNED(dst_width, 16)) {
1104 InterpolateRow = InterpolateRow_Unaligned_SSSE3;
1105 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1106 InterpolateRow = InterpolateRow_SSSE3;
1107 }
1108 }
1109 }
1110 #endif
1111 #if defined(HAS_INTERPOLATEROW_AVX2)
1112 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
1113 InterpolateRow = InterpolateRow_Any_AVX2;
1114 if (IS_ALIGNED(dst_width, 32)) {
1115 InterpolateRow = InterpolateRow_AVX2;
1116 }
1117 }
1118 #endif
1119 #if defined(HAS_INTERPOLATEROW_NEON)
1120 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
1121 InterpolateRow = InterpolateRow_Any_NEON;
1122 if (IS_ALIGNED(dst_width, 16)) {
1123 InterpolateRow = InterpolateRow_NEON;
1124 }
1125 }
1126 #endif
1127 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2)
1128 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
1129 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2;
1130 if (IS_ALIGNED(dst_width, 4)) {
1131 InterpolateRow = InterpolateRow_MIPS_DSPR2;
1132 }
1133 }
1134 #endif
1135
1136 if (filtering && src_width >= 32768) {
1137 ScaleFilterCols = ScaleFilterCols64_C;
1138 }
1139 #if defined(HAS_SCALEFILTERCOLS_SSSE3)
1140 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1141 ScaleFilterCols = ScaleFilterCols_SSSE3;
1142 }
1143 #endif
1144 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1145 ScaleFilterCols = ScaleColsUp2_C;
1146 #if defined(HAS_SCALECOLS_SSE2)
1147 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1148 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1149 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1150 ScaleFilterCols = ScaleColsUp2_SSE2;
1151 }
1152 #endif
1153 }
1154
1155 if (y > max_y) {
1156 y = max_y;
1157 }
1158 {
1159 int yi = y >> 16;
1160 const uint8* src = src_ptr + yi * src_stride;
1161
1162 // Allocate 2 row buffers.
1163 const int kRowSize = (dst_width + 15) & ~15;
1164 align_buffer_64(row, kRowSize * 2);
1165
1166 uint8* rowptr = row;
1167 int rowstride = kRowSize;
1168 int lasty = yi;
1169
1170 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1171 if (src_height > 1) {
1172 src += src_stride;
1173 }
1174 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1175 src += src_stride;
1176
1177 for (j = 0; j < dst_height; ++j) {
1178 yi = y >> 16;
1179 if (yi != lasty) {
1180 if (y > max_y) {
1181 y = max_y;
1182 yi = y >> 16;
1183 src = src_ptr + yi * src_stride;
1184 }
1185 if (yi != lasty) {
1186 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1187 rowptr += rowstride;
1188 rowstride = -rowstride;
1189 lasty = yi;
1190 src += src_stride;
1191 }
1192 }
1193 if (filtering == kFilterLinear) {
1194 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1195 } else {
1196 int yf = (y >> 8) & 255;
1197 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1198 }
1199 dst_ptr += dst_stride;
1200 y += dy;
1201 }
1202 free_aligned_buffer_64(row);
1203 }
1204 }
1205
1206 void ScalePlaneBilinearUp_16(int src_width, int src_height,
1207 int dst_width, int dst_height,
1208 int src_stride, int dst_stride,
1209 const uint16* src_ptr, uint16* dst_ptr,
1210 enum FilterMode filtering) {
1211 int j;
1212 // Initial source x/y coordinate and step values as 16.16 fixed point.
1213 int x = 0;
1214 int y = 0;
1215 int dx = 0;
1216 int dy = 0;
1217 const int max_y = (src_height - 1) << 16;
1218 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr,
1219 ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
1220 InterpolateRow_16_C;
1221 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr,
1222 int dst_width, int x, int dx) =
1223 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C;
1224 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
1225 &x, &y, &dx, &dy);
1226 src_width = Abs(src_width);
1227
1228 #if defined(HAS_INTERPOLATEROW_16_SSE2)
1229 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) {
1230 InterpolateRow = InterpolateRow_Any_16_SSE2;
1231 if (IS_ALIGNED(dst_width, 16)) {
1232 InterpolateRow = InterpolateRow_Unaligned_16_SSE2;
1233 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1234 InterpolateRow = InterpolateRow_16_SSE2;
1235 }
1236 }
1237 }
1238 #endif
1239 #if defined(HAS_INTERPOLATEROW_16_SSSE3)
1240 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) {
1241 InterpolateRow = InterpolateRow_Any_16_SSSE3;
1242 if (IS_ALIGNED(dst_width, 16)) {
1243 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3;
1244 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1245 InterpolateRow = InterpolateRow_16_SSSE3;
1246 }
1247 }
1248 }
1249 #endif
1250 #if defined(HAS_INTERPOLATEROW_16_AVX2)
1251 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) {
1252 InterpolateRow = InterpolateRow_Any_16_AVX2;
1253 if (IS_ALIGNED(dst_width, 32)) {
1254 InterpolateRow = InterpolateRow_16_AVX2;
1255 }
1256 }
1257 #endif
1258 #if defined(HAS_INTERPOLATEROW_16_NEON)
1259 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) {
1260 InterpolateRow = InterpolateRow_Any_16_NEON;
1261 if (IS_ALIGNED(dst_width, 16)) {
1262 InterpolateRow = InterpolateRow_16_NEON;
1263 }
1264 }
1265 #endif
1266 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2)
1267 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) {
1268 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2;
1269 if (IS_ALIGNED(dst_width, 4)) {
1270 InterpolateRow = InterpolateRow_16_MIPS_DSPR2;
1271 }
1272 }
1273 #endif
1274
1275 if (filtering && src_width >= 32768) {
1276 ScaleFilterCols = ScaleFilterCols64_16_C;
1277 }
1278 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3)
1279 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
1280 ScaleFilterCols = ScaleFilterCols_16_SSSE3;
1281 }
1282 #endif
1283 if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
1284 ScaleFilterCols = ScaleColsUp2_16_C;
1285 #if defined(HAS_SCALECOLS_16_SSE2)
1286 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1287 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1288 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1289 ScaleFilterCols = ScaleColsUp2_16_SSE2;
1290 }
1291 #endif
1292 }
1293
1294 if (y > max_y) {
1295 y = max_y;
1296 }
1297 {
1298 int yi = y >> 16;
1299 const uint16* src = src_ptr + yi * src_stride;
1300
1301 // Allocate 2 row buffers.
1302 const int kRowSize = (dst_width + 15) & ~15;
1303 align_buffer_64(row, kRowSize * 4);
1304
1305 uint16* rowptr = (uint16*)row;
1306 int rowstride = kRowSize;
1307 int lasty = yi;
1308
1309 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1310 if (src_height > 1) {
1311 src += src_stride;
1312 }
1313 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx);
1314 src += src_stride;
1315
1316 for (j = 0; j < dst_height; ++j) {
1317 yi = y >> 16;
1318 if (yi != lasty) {
1319 if (y > max_y) {
1320 y = max_y;
1321 yi = y >> 16;
1322 src = src_ptr + yi * src_stride;
1323 }
1324 if (yi != lasty) {
1325 ScaleFilterCols(rowptr, src, dst_width, x, dx);
1326 rowptr += rowstride;
1327 rowstride = -rowstride;
1328 lasty = yi;
1329 src += src_stride;
1330 }
1331 }
1332 if (filtering == kFilterLinear) {
1333 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0);
1334 } else {
1335 int yf = (y >> 8) & 255;
1336 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf);
1337 }
1338 dst_ptr += dst_stride;
1339 y += dy;
1340 }
1341 free_aligned_buffer_64(row);
1342 }
1343 }
1344
1345 // Scale Plane to/from any dimensions, without interpolation.
1346 // Fixed point math is used for performance: The upper 16 bits
1347 // of x and dx is the integer part of the source position and
1348 // the lower 16 bits are the fixed decimal part.
1349
1350 static void ScalePlaneSimple(int src_width, int src_height,
1351 int dst_width, int dst_height,
1352 int src_stride, int dst_stride,
1353 const uint8* src_ptr, uint8* dst_ptr) {
1354 int i;
1355 void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr,
1356 int dst_width, int x, int dx) = ScaleCols_C;
1357 // Initial source x/y coordinate and step values as 16.16 fixed point.
1358 int x = 0;
1359 int y = 0;
1360 int dx = 0;
1361 int dy = 0;
1362 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1363 &x, &y, &dx, &dy);
1364 src_width = Abs(src_width);
1365
1366 if (src_width * 2 == dst_width && x < 0x8000) {
1367 ScaleCols = ScaleColsUp2_C;
1368 #if defined(HAS_SCALECOLS_SSE2)
1369 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1370 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1371 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1372 ScaleCols = ScaleColsUp2_SSE2;
1373 }
1374 #endif
1375 }
1376
1377 for (i = 0; i < dst_height; ++i) {
1378 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
1379 dst_width, x, dx);
1380 dst_ptr += dst_stride;
1381 y += dy;
1382 }
1383 }
1384
1385 static void ScalePlaneSimple_16(int src_width, int src_height,
1386 int dst_width, int dst_height,
1387 int src_stride, int dst_stride,
1388 const uint16* src_ptr, uint16* dst_ptr) {
1389 int i;
1390 void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr,
1391 int dst_width, int x, int dx) = ScaleCols_16_C;
1392 // Initial source x/y coordinate and step values as 16.16 fixed point.
1393 int x = 0;
1394 int y = 0;
1395 int dx = 0;
1396 int dy = 0;
1397 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone,
1398 &x, &y, &dx, &dy);
1399 src_width = Abs(src_width);
1400
1401 if (src_width * 2 == dst_width && x < 0x8000) {
1402 ScaleCols = ScaleColsUp2_16_C;
1403 #if defined(HAS_SCALECOLS_16_SSE2)
1404 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) &&
1405 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) &&
1406 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) {
1407 ScaleCols = ScaleColsUp2_16_SSE2;
1408 }
1409 #endif
1410 }
1411
1412 for (i = 0; i < dst_height; ++i) {
1413 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride,
1414 dst_width, x, dx);
1415 dst_ptr += dst_stride;
1416 y += dy;
1417 }
1418 }
1419
1420 // Scale a plane.
1421 // This function dispatches to a specialized scaler based on scale factor.
1422
1423 LIBYUV_API
1424 void ScalePlane(const uint8* src, int src_stride,
1425 int src_width, int src_height,
1426 uint8* dst, int dst_stride,
1427 int dst_width, int dst_height,
1428 enum FilterMode filtering) {
1429 // Simplify filtering when possible.
1430 filtering = ScaleFilterReduce(src_width, src_height,
1431 dst_width, dst_height,
1432 filtering);
1433
1434 // Negative height means invert the image.
1435 if (src_height < 0) {
1436 src_height = -src_height;
1437 src = src + (src_height - 1) * src_stride;
1438 src_stride = -src_stride;
1439 }
1440
1441 // Use specialized scales to improve performance for common resolutions.
1442 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1443 if (dst_width == src_width && dst_height == src_height) {
1444 // Straight copy.
1445 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height);
1446 return;
1447 }
1448 if (dst_width == src_width) {
1449 int dy = FixedDiv(src_height, dst_height);
1450 // Arbitrary scale vertically, but unscaled vertically.
1451 ScalePlaneVertical(src_height,
1452 dst_width, dst_height,
1453 src_stride, dst_stride, src, dst,
1454 0, 0, dy, 1, filtering);
1455 return;
1456 }
1457 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1458 // Scale down.
1459 if (4 * dst_width == 3 * src_width &&
1460 4 * dst_height == 3 * src_height) {
1461 // optimized, 3/4
1462 ScalePlaneDown34(src_width, src_height, dst_width, dst_height,
1463 src_stride, dst_stride, src, dst, filtering);
1464 return;
1465 }
1466 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1467 // optimized, 1/2
1468 ScalePlaneDown2(src_width, src_height, dst_width, dst_height,
1469 src_stride, dst_stride, src, dst, filtering);
1470 return;
1471 }
1472 // 3/8 rounded up for odd sized chroma height.
1473 if (8 * dst_width == 3 * src_width &&
1474 dst_height == ((src_height * 3 + 7) / 8)) {
1475 // optimized, 3/8
1476 ScalePlaneDown38(src_width, src_height, dst_width, dst_height,
1477 src_stride, dst_stride, src, dst, filtering);
1478 return;
1479 }
1480 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1481 filtering != kFilterBilinear) {
1482 // optimized, 1/4
1483 ScalePlaneDown4(src_width, src_height, dst_width, dst_height,
1484 src_stride, dst_stride, src, dst, filtering);
1485 return;
1486 }
1487 }
1488 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1489 ScalePlaneBox(src_width, src_height, dst_width, dst_height,
1490 src_stride, dst_stride, src, dst);
1491 return;
1492 }
1493 if (filtering && dst_height > src_height) {
1494 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height,
1495 src_stride, dst_stride, src, dst, filtering);
1496 return;
1497 }
1498 if (filtering) {
1499 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height,
1500 src_stride, dst_stride, src, dst, filtering);
1501 return;
1502 }
1503 ScalePlaneSimple(src_width, src_height, dst_width, dst_height,
1504 src_stride, dst_stride, src, dst);
1505 }
1506
1507 LIBYUV_API
1508 void ScalePlane_16(const uint16* src, int src_stride,
1509 int src_width, int src_height,
1510 uint16* dst, int dst_stride,
1511 int dst_width, int dst_height,
1512 enum FilterMode filtering) {
1513 // Simplify filtering when possible.
1514 filtering = ScaleFilterReduce(src_width, src_height,
1515 dst_width, dst_height,
1516 filtering);
1517
1518 // Negative height means invert the image.
1519 if (src_height < 0) {
1520 src_height = -src_height;
1521 src = src + (src_height - 1) * src_stride;
1522 src_stride = -src_stride;
1523 }
1524
1525 // Use specialized scales to improve performance for common resolutions.
1526 // For example, all the 1/2 scalings will use ScalePlaneDown2()
1527 if (dst_width == src_width && dst_height == src_height) {
1528 // Straight copy.
1529 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height);
1530 return;
1531 }
1532 if (dst_width == src_width) {
1533 int dy = FixedDiv(src_height, dst_height);
1534 // Arbitrary scale vertically, but unscaled vertically.
1535 ScalePlaneVertical_16(src_height,
1536 dst_width, dst_height,
1537 src_stride, dst_stride, src, dst,
1538 0, 0, dy, 1, filtering);
1539 return;
1540 }
1541 if (dst_width <= Abs(src_width) && dst_height <= src_height) {
1542 // Scale down.
1543 if (4 * dst_width == 3 * src_width &&
1544 4 * dst_height == 3 * src_height) {
1545 // optimized, 3/4
1546 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height,
1547 src_stride, dst_stride, src, dst, filtering);
1548 return;
1549 }
1550 if (2 * dst_width == src_width && 2 * dst_height == src_height) {
1551 // optimized, 1/2
1552 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height,
1553 src_stride, dst_stride, src, dst, filtering);
1554 return;
1555 }
1556 // 3/8 rounded up for odd sized chroma height.
1557 if (8 * dst_width == 3 * src_width &&
1558 dst_height == ((src_height * 3 + 7) / 8)) {
1559 // optimized, 3/8
1560 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height,
1561 src_stride, dst_stride, src, dst, filtering);
1562 return;
1563 }
1564 if (4 * dst_width == src_width && 4 * dst_height == src_height &&
1565 filtering != kFilterBilinear) {
1566 // optimized, 1/4
1567 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height,
1568 src_stride, dst_stride, src, dst, filtering);
1569 return;
1570 }
1571 }
1572 if (filtering == kFilterBox && dst_height * 2 < src_height) {
1573 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height,
1574 src_stride, dst_stride, src, dst);
1575 return;
1576 }
1577 if (filtering && dst_height > src_height) {
1578 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height,
1579 src_stride, dst_stride, src, dst, filtering);
1580 return;
1581 }
1582 if (filtering) {
1583 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height,
1584 src_stride, dst_stride, src, dst, filtering);
1585 return;
1586 }
1587 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height,
1588 src_stride, dst_stride, src, dst);
1589 }
1590
1591 // Scale an I420 image.
1592 // This function in turn calls a scaling function for each plane.
1593
1594 LIBYUV_API
1595 int I420Scale(const uint8* src_y, int src_stride_y,
1596 const uint8* src_u, int src_stride_u,
1597 const uint8* src_v, int src_stride_v,
1598 int src_width, int src_height,
1599 uint8* dst_y, int dst_stride_y,
1600 uint8* dst_u, int dst_stride_u,
1601 uint8* dst_v, int dst_stride_v,
1602 int dst_width, int dst_height,
1603 enum FilterMode filtering) {
1604 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1605 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1606 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1607 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1608 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1609 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1610 return -1;
1611 }
1612
1613 ScalePlane(src_y, src_stride_y, src_width, src_height,
1614 dst_y, dst_stride_y, dst_width, dst_height,
1615 filtering);
1616 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight,
1617 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1618 filtering);
1619 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight,
1620 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1621 filtering);
1622 return 0;
1623 }
1624
1625 LIBYUV_API
1626 int I420Scale_16(const uint16* src_y, int src_stride_y,
1627 const uint16* src_u, int src_stride_u,
1628 const uint16* src_v, int src_stride_v,
1629 int src_width, int src_height,
1630 uint16* dst_y, int dst_stride_y,
1631 uint16* dst_u, int dst_stride_u,
1632 uint16* dst_v, int dst_stride_v,
1633 int dst_width, int dst_height,
1634 enum FilterMode filtering) {
1635 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1636 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1637 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1638 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1639 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 ||
1640 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) {
1641 return -1;
1642 }
1643
1644 ScalePlane_16(src_y, src_stride_y, src_width, src_height,
1645 dst_y, dst_stride_y, dst_width, dst_height,
1646 filtering);
1647 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight,
1648 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight,
1649 filtering);
1650 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight,
1651 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight,
1652 filtering);
1653 return 0;
1654 }
1655
1656 // Deprecated api
1657 LIBYUV_API
1658 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
1659 int src_stride_y, int src_stride_u, int src_stride_v,
1660 int src_width, int src_height,
1661 uint8* dst_y, uint8* dst_u, uint8* dst_v,
1662 int dst_stride_y, int dst_stride_u, int dst_stride_v,
1663 int dst_width, int dst_height,
1664 LIBYUV_BOOL interpolate) {
1665 return I420Scale(src_y, src_stride_y,
1666 src_u, src_stride_u,
1667 src_v, src_stride_v,
1668 src_width, src_height,
1669 dst_y, dst_stride_y,
1670 dst_u, dst_stride_u,
1671 dst_v, dst_stride_v,
1672 dst_width, dst_height,
1673 interpolate ? kFilterBox : kFilterNone);
1674 }
1675
1676 // Deprecated api
1677 LIBYUV_API
1678 int ScaleOffset(const uint8* src, int src_width, int src_height,
1679 uint8* dst, int dst_width, int dst_height, int dst_yoffset,
1680 LIBYUV_BOOL interpolate) {
1681 // Chroma requires offset to multiple of 2.
1682 int dst_yoffset_even = dst_yoffset & ~1;
1683 int src_halfwidth = SUBSAMPLE(src_width, 1, 1);
1684 int src_halfheight = SUBSAMPLE(src_height, 1, 1);
1685 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1);
1686 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1);
1687 int aheight = dst_height - dst_yoffset_even * 2; // actual output height
1688 const uint8* src_y = src;
1689 const uint8* src_u = src + src_width * src_height;
1690 const uint8* src_v = src + src_width * src_height +
1691 src_halfwidth * src_halfheight;
1692 uint8* dst_y = dst + dst_yoffset_even * dst_width;
1693 uint8* dst_u = dst + dst_width * dst_height +
1694 (dst_yoffset_even >> 1) * dst_halfwidth;
1695 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight +
1696 (dst_yoffset_even >> 1) * dst_halfwidth;
1697 if (!src || src_width <= 0 || src_height <= 0 ||
1698 !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 ||
1699 dst_yoffset_even >= dst_height) {
1700 return -1;
1701 }
1702 return I420Scale(src_y, src_width,
1703 src_u, src_halfwidth,
1704 src_v, src_halfwidth,
1705 src_width, src_height,
1706 dst_y, dst_width,
1707 dst_u, dst_halfwidth,
1708 dst_v, dst_halfwidth,
1709 dst_width, aheight,
1710 interpolate ? kFilterBox : kFilterNone);
1711 }
1712
1713 #ifdef __cplusplus
1714 } // extern "C"
1715 } // namespace libyuv
1716 #endif
OLDNEW
« no previous file with comments | « source/libvpx/third_party/libyuv/source/scale.c ('k') | source/libvpx/third_party/libyuv/source/scale_common.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698