OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "third_party/libyuv/include/libyuv/scale.h" |
| 12 |
| 13 #include <assert.h> |
| 14 #include <string.h> |
| 15 |
| 16 #include "third_party/libyuv/include/libyuv/cpu_id.h" |
| 17 #include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyPlane |
| 18 #include "third_party/libyuv/include/libyuv/row.h" |
| 19 #include "third_party/libyuv/include/libyuv/scale_row.h" |
| 20 |
| 21 #ifdef __cplusplus |
| 22 namespace libyuv { |
| 23 extern "C" { |
| 24 #endif |
| 25 |
| 26 // Remove this macro if OVERREAD is safe. |
| 27 #define AVOID_OVERREAD 1 |
| 28 |
| 29 static __inline int Abs(int v) { |
| 30 return v >= 0 ? v : -v; |
| 31 } |
| 32 |
| 33 #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) |
| 34 |
| 35 // Scale plane, 1/2 |
| 36 // This is an optimized version for scaling down a plane to 1/2 of |
| 37 // its original size. |
| 38 |
| 39 static void ScalePlaneDown2(int src_width, int src_height, |
| 40 int dst_width, int dst_height, |
| 41 int src_stride, int dst_stride, |
| 42 const uint8* src_ptr, uint8* dst_ptr, |
| 43 enum FilterMode filtering) { |
| 44 int y; |
| 45 void (*ScaleRowDown2)(const uint8* src_ptr, ptrdiff_t src_stride, |
| 46 uint8* dst_ptr, int dst_width) = |
| 47 filtering == kFilterNone ? ScaleRowDown2_C : |
| 48 (filtering == kFilterLinear ? ScaleRowDown2Linear_C : |
| 49 ScaleRowDown2Box_C); |
| 50 int row_stride = src_stride << 1; |
| 51 if (!filtering) { |
| 52 src_ptr += src_stride; // Point to odd rows. |
| 53 src_stride = 0; |
| 54 } |
| 55 |
| 56 #if defined(HAS_SCALEROWDOWN2_NEON) |
| 57 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { |
| 58 ScaleRowDown2 = filtering ? ScaleRowDown2Box_NEON : ScaleRowDown2_NEON; |
| 59 } |
| 60 #elif defined(HAS_SCALEROWDOWN2_SSE2) |
| 61 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { |
| 62 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Unaligned_SSE2 : |
| 63 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_SSE2 : |
| 64 ScaleRowDown2Box_Unaligned_SSE2); |
| 65 if (IS_ALIGNED(src_ptr, 16) && |
| 66 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && |
| 67 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 68 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSE2 : |
| 69 (filtering == kFilterLinear ? ScaleRowDown2Linear_SSE2 : |
| 70 ScaleRowDown2Box_SSE2); |
| 71 } |
| 72 } |
| 73 #elif defined(HAS_SCALEROWDOWN2_MIPS_DSPR2) |
| 74 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && |
| 75 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && |
| 76 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 77 ScaleRowDown2 = filtering ? |
| 78 ScaleRowDown2Box_MIPS_DSPR2 : ScaleRowDown2_MIPS_DSPR2; |
| 79 } |
| 80 #endif |
| 81 |
| 82 if (filtering == kFilterLinear) { |
| 83 src_stride = 0; |
| 84 } |
| 85 // TODO(fbarchard): Loop through source height to allow odd height. |
| 86 for (y = 0; y < dst_height; ++y) { |
| 87 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); |
| 88 src_ptr += row_stride; |
| 89 dst_ptr += dst_stride; |
| 90 } |
| 91 } |
| 92 |
| 93 static void ScalePlaneDown2_16(int src_width, int src_height, |
| 94 int dst_width, int dst_height, |
| 95 int src_stride, int dst_stride, |
| 96 const uint16* src_ptr, uint16* dst_ptr, |
| 97 enum FilterMode filtering) { |
| 98 int y; |
| 99 void (*ScaleRowDown2)(const uint16* src_ptr, ptrdiff_t src_stride, |
| 100 uint16* dst_ptr, int dst_width) = |
| 101 filtering == kFilterNone ? ScaleRowDown2_16_C : |
| 102 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C : |
| 103 ScaleRowDown2Box_16_C); |
| 104 int row_stride = src_stride << 1; |
| 105 if (!filtering) { |
| 106 src_ptr += src_stride; // Point to odd rows. |
| 107 src_stride = 0; |
| 108 } |
| 109 |
| 110 #if defined(HAS_SCALEROWDOWN2_16_NEON) |
| 111 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { |
| 112 ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON : |
| 113 ScaleRowDown2_16_NEON; |
| 114 } |
| 115 #elif defined(HAS_SCALEROWDOWN2_16_SSE2) |
| 116 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { |
| 117 ScaleRowDown2 = filtering == kFilterNone ? |
| 118 ScaleRowDown2_Unaligned_16_SSE2 : |
| 119 (filtering == kFilterLinear ? ScaleRowDown2Linear_Unaligned_16_SSE2 : |
| 120 ScaleRowDown2Box_Unaligned_16_SSE2); |
| 121 if (IS_ALIGNED(src_ptr, 16) && |
| 122 IS_ALIGNED(src_stride, 16) && IS_ALIGNED(row_stride, 16) && |
| 123 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 124 ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 : |
| 125 (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 : |
| 126 ScaleRowDown2Box_16_SSE2); |
| 127 } |
| 128 } |
| 129 #elif defined(HAS_SCALEROWDOWN2_16_MIPS_DSPR2) |
| 130 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(src_ptr, 4) && |
| 131 IS_ALIGNED(src_stride, 4) && IS_ALIGNED(row_stride, 4) && |
| 132 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 133 ScaleRowDown2 = filtering ? |
| 134 ScaleRowDown2Box_16_MIPS_DSPR2 : ScaleRowDown2_16_MIPS_DSPR2; |
| 135 } |
| 136 #endif |
| 137 |
| 138 if (filtering == kFilterLinear) { |
| 139 src_stride = 0; |
| 140 } |
| 141 // TODO(fbarchard): Loop through source height to allow odd height. |
| 142 for (y = 0; y < dst_height; ++y) { |
| 143 ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); |
| 144 src_ptr += row_stride; |
| 145 dst_ptr += dst_stride; |
| 146 } |
| 147 } |
| 148 |
| 149 // Scale plane, 1/4 |
| 150 // This is an optimized version for scaling down a plane to 1/4 of |
| 151 // its original size. |
| 152 |
| 153 static void ScalePlaneDown4(int src_width, int src_height, |
| 154 int dst_width, int dst_height, |
| 155 int src_stride, int dst_stride, |
| 156 const uint8* src_ptr, uint8* dst_ptr, |
| 157 enum FilterMode filtering) { |
| 158 int y; |
| 159 void (*ScaleRowDown4)(const uint8* src_ptr, ptrdiff_t src_stride, |
| 160 uint8* dst_ptr, int dst_width) = |
| 161 filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; |
| 162 int row_stride = src_stride << 2; |
| 163 if (!filtering) { |
| 164 src_ptr += src_stride * 2; // Point to row 2. |
| 165 src_stride = 0; |
| 166 } |
| 167 #if defined(HAS_SCALEROWDOWN4_NEON) |
| 168 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { |
| 169 ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; |
| 170 } |
| 171 #elif defined(HAS_SCALEROWDOWN4_SSE2) |
| 172 if (TestCpuFlag(kCpuHasSSE2) && |
| 173 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && |
| 174 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 175 ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSE2 : ScaleRowDown4_SSE2; |
| 176 } |
| 177 #elif defined(HAS_SCALEROWDOWN4_MIPS_DSPR2) |
| 178 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && |
| 179 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
| 180 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 181 ScaleRowDown4 = filtering ? |
| 182 ScaleRowDown4Box_MIPS_DSPR2 : ScaleRowDown4_MIPS_DSPR2; |
| 183 } |
| 184 #endif |
| 185 |
| 186 if (filtering == kFilterLinear) { |
| 187 src_stride = 0; |
| 188 } |
| 189 for (y = 0; y < dst_height; ++y) { |
| 190 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); |
| 191 src_ptr += row_stride; |
| 192 dst_ptr += dst_stride; |
| 193 } |
| 194 } |
| 195 |
| 196 static void ScalePlaneDown4_16(int src_width, int src_height, |
| 197 int dst_width, int dst_height, |
| 198 int src_stride, int dst_stride, |
| 199 const uint16* src_ptr, uint16* dst_ptr, |
| 200 enum FilterMode filtering) { |
| 201 int y; |
| 202 void (*ScaleRowDown4)(const uint16* src_ptr, ptrdiff_t src_stride, |
| 203 uint16* dst_ptr, int dst_width) = |
| 204 filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; |
| 205 int row_stride = src_stride << 2; |
| 206 if (!filtering) { |
| 207 src_ptr += src_stride * 2; // Point to row 2. |
| 208 src_stride = 0; |
| 209 } |
| 210 #if defined(HAS_SCALEROWDOWN4_16_NEON) |
| 211 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { |
| 212 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON : |
| 213 ScaleRowDown4_16_NEON; |
| 214 } |
| 215 #elif defined(HAS_SCALEROWDOWN4_16_SSE2) |
| 216 if (TestCpuFlag(kCpuHasSSE2) && |
| 217 IS_ALIGNED(dst_width, 8) && IS_ALIGNED(row_stride, 16) && |
| 218 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 219 ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 : |
| 220 ScaleRowDown4_16_SSE2; |
| 221 } |
| 222 #elif defined(HAS_SCALEROWDOWN4_16_MIPS_DSPR2) |
| 223 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(row_stride, 4) && |
| 224 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
| 225 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 226 ScaleRowDown4 = filtering ? |
| 227 ScaleRowDown4Box_16_MIPS_DSPR2 : ScaleRowDown4_16_MIPS_DSPR2; |
| 228 } |
| 229 #endif |
| 230 |
| 231 if (filtering == kFilterLinear) { |
| 232 src_stride = 0; |
| 233 } |
| 234 for (y = 0; y < dst_height; ++y) { |
| 235 ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); |
| 236 src_ptr += row_stride; |
| 237 dst_ptr += dst_stride; |
| 238 } |
| 239 } |
| 240 |
| 241 // Scale plane down, 3/4 |
| 242 |
| 243 static void ScalePlaneDown34(int src_width, int src_height, |
| 244 int dst_width, int dst_height, |
| 245 int src_stride, int dst_stride, |
| 246 const uint8* src_ptr, uint8* dst_ptr, |
| 247 enum FilterMode filtering) { |
| 248 int y; |
| 249 void (*ScaleRowDown34_0)(const uint8* src_ptr, ptrdiff_t src_stride, |
| 250 uint8* dst_ptr, int dst_width); |
| 251 void (*ScaleRowDown34_1)(const uint8* src_ptr, ptrdiff_t src_stride, |
| 252 uint8* dst_ptr, int dst_width); |
| 253 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
| 254 assert(dst_width % 3 == 0); |
| 255 if (!filtering) { |
| 256 ScaleRowDown34_0 = ScaleRowDown34_C; |
| 257 ScaleRowDown34_1 = ScaleRowDown34_C; |
| 258 } else { |
| 259 ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; |
| 260 ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; |
| 261 } |
| 262 #if defined(HAS_SCALEROWDOWN34_NEON) |
| 263 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { |
| 264 if (!filtering) { |
| 265 ScaleRowDown34_0 = ScaleRowDown34_NEON; |
| 266 ScaleRowDown34_1 = ScaleRowDown34_NEON; |
| 267 } else { |
| 268 ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; |
| 269 ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; |
| 270 } |
| 271 } |
| 272 #endif |
| 273 #if defined(HAS_SCALEROWDOWN34_SSSE3) |
| 274 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && |
| 275 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 276 if (!filtering) { |
| 277 ScaleRowDown34_0 = ScaleRowDown34_SSSE3; |
| 278 ScaleRowDown34_1 = ScaleRowDown34_SSSE3; |
| 279 } else { |
| 280 ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; |
| 281 ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; |
| 282 } |
| 283 } |
| 284 #endif |
| 285 #if defined(HAS_SCALEROWDOWN34_MIPS_DSPR2) |
| 286 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && |
| 287 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
| 288 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 289 if (!filtering) { |
| 290 ScaleRowDown34_0 = ScaleRowDown34_MIPS_DSPR2; |
| 291 ScaleRowDown34_1 = ScaleRowDown34_MIPS_DSPR2; |
| 292 } else { |
| 293 ScaleRowDown34_0 = ScaleRowDown34_0_Box_MIPS_DSPR2; |
| 294 ScaleRowDown34_1 = ScaleRowDown34_1_Box_MIPS_DSPR2; |
| 295 } |
| 296 } |
| 297 #endif |
| 298 |
| 299 for (y = 0; y < dst_height - 2; y += 3) { |
| 300 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
| 301 src_ptr += src_stride; |
| 302 dst_ptr += dst_stride; |
| 303 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); |
| 304 src_ptr += src_stride; |
| 305 dst_ptr += dst_stride; |
| 306 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, |
| 307 dst_ptr, dst_width); |
| 308 src_ptr += src_stride * 2; |
| 309 dst_ptr += dst_stride; |
| 310 } |
| 311 |
| 312 // Remainder 1 or 2 rows with last row vertically unfiltered |
| 313 if ((dst_height % 3) == 2) { |
| 314 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
| 315 src_ptr += src_stride; |
| 316 dst_ptr += dst_stride; |
| 317 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); |
| 318 } else if ((dst_height % 3) == 1) { |
| 319 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); |
| 320 } |
| 321 } |
| 322 |
| 323 static void ScalePlaneDown34_16(int src_width, int src_height, |
| 324 int dst_width, int dst_height, |
| 325 int src_stride, int dst_stride, |
| 326 const uint16* src_ptr, uint16* dst_ptr, |
| 327 enum FilterMode filtering) { |
| 328 int y; |
| 329 void (*ScaleRowDown34_0)(const uint16* src_ptr, ptrdiff_t src_stride, |
| 330 uint16* dst_ptr, int dst_width); |
| 331 void (*ScaleRowDown34_1)(const uint16* src_ptr, ptrdiff_t src_stride, |
| 332 uint16* dst_ptr, int dst_width); |
| 333 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
| 334 assert(dst_width % 3 == 0); |
| 335 if (!filtering) { |
| 336 ScaleRowDown34_0 = ScaleRowDown34_16_C; |
| 337 ScaleRowDown34_1 = ScaleRowDown34_16_C; |
| 338 } else { |
| 339 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C; |
| 340 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C; |
| 341 } |
| 342 #if defined(HAS_SCALEROWDOWN34_16_NEON) |
| 343 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { |
| 344 if (!filtering) { |
| 345 ScaleRowDown34_0 = ScaleRowDown34_16_NEON; |
| 346 ScaleRowDown34_1 = ScaleRowDown34_16_NEON; |
| 347 } else { |
| 348 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON; |
| 349 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON; |
| 350 } |
| 351 } |
| 352 #endif |
| 353 #if defined(HAS_SCALEROWDOWN34_16_SSSE3) |
| 354 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && |
| 355 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 356 if (!filtering) { |
| 357 ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; |
| 358 ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; |
| 359 } else { |
| 360 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3; |
| 361 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3; |
| 362 } |
| 363 } |
| 364 #endif |
| 365 #if defined(HAS_SCALEROWDOWN34_16_MIPS_DSPR2) |
| 366 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 24 == 0) && |
| 367 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
| 368 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 369 if (!filtering) { |
| 370 ScaleRowDown34_0 = ScaleRowDown34_16_MIPS_DSPR2; |
| 371 ScaleRowDown34_1 = ScaleRowDown34_16_MIPS_DSPR2; |
| 372 } else { |
| 373 ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_MIPS_DSPR2; |
| 374 ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_MIPS_DSPR2; |
| 375 } |
| 376 } |
| 377 #endif |
| 378 |
| 379 for (y = 0; y < dst_height - 2; y += 3) { |
| 380 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
| 381 src_ptr += src_stride; |
| 382 dst_ptr += dst_stride; |
| 383 ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); |
| 384 src_ptr += src_stride; |
| 385 dst_ptr += dst_stride; |
| 386 ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, |
| 387 dst_ptr, dst_width); |
| 388 src_ptr += src_stride * 2; |
| 389 dst_ptr += dst_stride; |
| 390 } |
| 391 |
| 392 // Remainder 1 or 2 rows with last row vertically unfiltered |
| 393 if ((dst_height % 3) == 2) { |
| 394 ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); |
| 395 src_ptr += src_stride; |
| 396 dst_ptr += dst_stride; |
| 397 ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); |
| 398 } else if ((dst_height % 3) == 1) { |
| 399 ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); |
| 400 } |
| 401 } |
| 402 |
| 403 |
| 404 // Scale plane, 3/8 |
| 405 // This is an optimized version for scaling down a plane to 3/8 |
| 406 // of its original size. |
| 407 // |
| 408 // Uses box filter arranges like this |
| 409 // aaabbbcc -> abc |
| 410 // aaabbbcc def |
| 411 // aaabbbcc ghi |
| 412 // dddeeeff |
| 413 // dddeeeff |
| 414 // dddeeeff |
| 415 // ggghhhii |
| 416 // ggghhhii |
| 417 // Boxes are 3x3, 2x3, 3x2 and 2x2 |
| 418 |
| 419 static void ScalePlaneDown38(int src_width, int src_height, |
| 420 int dst_width, int dst_height, |
| 421 int src_stride, int dst_stride, |
| 422 const uint8* src_ptr, uint8* dst_ptr, |
| 423 enum FilterMode filtering) { |
| 424 int y; |
| 425 void (*ScaleRowDown38_3)(const uint8* src_ptr, ptrdiff_t src_stride, |
| 426 uint8* dst_ptr, int dst_width); |
| 427 void (*ScaleRowDown38_2)(const uint8* src_ptr, ptrdiff_t src_stride, |
| 428 uint8* dst_ptr, int dst_width); |
| 429 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
| 430 assert(dst_width % 3 == 0); |
| 431 if (!filtering) { |
| 432 ScaleRowDown38_3 = ScaleRowDown38_C; |
| 433 ScaleRowDown38_2 = ScaleRowDown38_C; |
| 434 } else { |
| 435 ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; |
| 436 ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; |
| 437 } |
| 438 #if defined(HAS_SCALEROWDOWN38_NEON) |
| 439 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { |
| 440 if (!filtering) { |
| 441 ScaleRowDown38_3 = ScaleRowDown38_NEON; |
| 442 ScaleRowDown38_2 = ScaleRowDown38_NEON; |
| 443 } else { |
| 444 ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; |
| 445 ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; |
| 446 } |
| 447 } |
| 448 #elif defined(HAS_SCALEROWDOWN38_SSSE3) |
| 449 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && |
| 450 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 451 if (!filtering) { |
| 452 ScaleRowDown38_3 = ScaleRowDown38_SSSE3; |
| 453 ScaleRowDown38_2 = ScaleRowDown38_SSSE3; |
| 454 } else { |
| 455 ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; |
| 456 ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; |
| 457 } |
| 458 } |
| 459 #elif defined(HAS_SCALEROWDOWN38_MIPS_DSPR2) |
| 460 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && |
| 461 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
| 462 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 463 if (!filtering) { |
| 464 ScaleRowDown38_3 = ScaleRowDown38_MIPS_DSPR2; |
| 465 ScaleRowDown38_2 = ScaleRowDown38_MIPS_DSPR2; |
| 466 } else { |
| 467 ScaleRowDown38_3 = ScaleRowDown38_3_Box_MIPS_DSPR2; |
| 468 ScaleRowDown38_2 = ScaleRowDown38_2_Box_MIPS_DSPR2; |
| 469 } |
| 470 } |
| 471 #endif |
| 472 |
| 473 for (y = 0; y < dst_height - 2; y += 3) { |
| 474 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
| 475 src_ptr += src_stride * 3; |
| 476 dst_ptr += dst_stride; |
| 477 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
| 478 src_ptr += src_stride * 3; |
| 479 dst_ptr += dst_stride; |
| 480 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); |
| 481 src_ptr += src_stride * 2; |
| 482 dst_ptr += dst_stride; |
| 483 } |
| 484 |
| 485 // Remainder 1 or 2 rows with last row vertically unfiltered |
| 486 if ((dst_height % 3) == 2) { |
| 487 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
| 488 src_ptr += src_stride * 3; |
| 489 dst_ptr += dst_stride; |
| 490 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
| 491 } else if ((dst_height % 3) == 1) { |
| 492 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
| 493 } |
| 494 } |
| 495 |
| 496 static void ScalePlaneDown38_16(int src_width, int src_height, |
| 497 int dst_width, int dst_height, |
| 498 int src_stride, int dst_stride, |
| 499 const uint16* src_ptr, uint16* dst_ptr, |
| 500 enum FilterMode filtering) { |
| 501 int y; |
| 502 void (*ScaleRowDown38_3)(const uint16* src_ptr, ptrdiff_t src_stride, |
| 503 uint16* dst_ptr, int dst_width); |
| 504 void (*ScaleRowDown38_2)(const uint16* src_ptr, ptrdiff_t src_stride, |
| 505 uint16* dst_ptr, int dst_width); |
| 506 const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; |
| 507 assert(dst_width % 3 == 0); |
| 508 if (!filtering) { |
| 509 ScaleRowDown38_3 = ScaleRowDown38_16_C; |
| 510 ScaleRowDown38_2 = ScaleRowDown38_16_C; |
| 511 } else { |
| 512 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C; |
| 513 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C; |
| 514 } |
| 515 #if defined(HAS_SCALEROWDOWN38_16_NEON) |
| 516 if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { |
| 517 if (!filtering) { |
| 518 ScaleRowDown38_3 = ScaleRowDown38_16_NEON; |
| 519 ScaleRowDown38_2 = ScaleRowDown38_16_NEON; |
| 520 } else { |
| 521 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON; |
| 522 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON; |
| 523 } |
| 524 } |
| 525 #elif defined(HAS_SCALEROWDOWN38_16_SSSE3) |
| 526 if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0) && |
| 527 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 528 if (!filtering) { |
| 529 ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; |
| 530 ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; |
| 531 } else { |
| 532 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3; |
| 533 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3; |
| 534 } |
| 535 } |
| 536 #elif defined(HAS_SCALEROWDOWN38_16_MIPS_DSPR2) |
| 537 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && (dst_width % 12 == 0) && |
| 538 IS_ALIGNED(src_ptr, 4) && IS_ALIGNED(src_stride, 4) && |
| 539 IS_ALIGNED(dst_ptr, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 540 if (!filtering) { |
| 541 ScaleRowDown38_3 = ScaleRowDown38_16_MIPS_DSPR2; |
| 542 ScaleRowDown38_2 = ScaleRowDown38_16_MIPS_DSPR2; |
| 543 } else { |
| 544 ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_MIPS_DSPR2; |
| 545 ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_MIPS_DSPR2; |
| 546 } |
| 547 } |
| 548 #endif |
| 549 |
| 550 for (y = 0; y < dst_height - 2; y += 3) { |
| 551 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
| 552 src_ptr += src_stride * 3; |
| 553 dst_ptr += dst_stride; |
| 554 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
| 555 src_ptr += src_stride * 3; |
| 556 dst_ptr += dst_stride; |
| 557 ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); |
| 558 src_ptr += src_stride * 2; |
| 559 dst_ptr += dst_stride; |
| 560 } |
| 561 |
| 562 // Remainder 1 or 2 rows with last row vertically unfiltered |
| 563 if ((dst_height % 3) == 2) { |
| 564 ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); |
| 565 src_ptr += src_stride * 3; |
| 566 dst_ptr += dst_stride; |
| 567 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
| 568 } else if ((dst_height % 3) == 1) { |
| 569 ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); |
| 570 } |
| 571 } |
| 572 |
| 573 static __inline uint32 SumBox(int iboxwidth, int iboxheight, |
| 574 ptrdiff_t src_stride, const uint8* src_ptr) { |
| 575 uint32 sum = 0u; |
| 576 int y; |
| 577 assert(iboxwidth > 0); |
| 578 assert(iboxheight > 0); |
| 579 for (y = 0; y < iboxheight; ++y) { |
| 580 int x; |
| 581 for (x = 0; x < iboxwidth; ++x) { |
| 582 sum += src_ptr[x]; |
| 583 } |
| 584 src_ptr += src_stride; |
| 585 } |
| 586 return sum; |
| 587 } |
| 588 |
| 589 static __inline uint32 SumBox_16(int iboxwidth, int iboxheight, |
| 590 ptrdiff_t src_stride, const uint16* src_ptr) { |
| 591 uint32 sum = 0u; |
| 592 int y; |
| 593 assert(iboxwidth > 0); |
| 594 assert(iboxheight > 0); |
| 595 for (y = 0; y < iboxheight; ++y) { |
| 596 int x; |
| 597 for (x = 0; x < iboxwidth; ++x) { |
| 598 sum += src_ptr[x]; |
| 599 } |
| 600 src_ptr += src_stride; |
| 601 } |
| 602 return sum; |
| 603 } |
| 604 |
| 605 static void ScalePlaneBoxRow_C(int dst_width, int boxheight, |
| 606 int x, int dx, ptrdiff_t src_stride, |
| 607 const uint8* src_ptr, uint8* dst_ptr) { |
| 608 int i; |
| 609 int boxwidth; |
| 610 for (i = 0; i < dst_width; ++i) { |
| 611 int ix = x >> 16; |
| 612 x += dx; |
| 613 boxwidth = (x >> 16) - ix; |
| 614 *dst_ptr++ = SumBox(boxwidth, boxheight, src_stride, src_ptr + ix) / |
| 615 (boxwidth * boxheight); |
| 616 } |
| 617 } |
| 618 |
| 619 static void ScalePlaneBoxRow_16_C(int dst_width, int boxheight, |
| 620 int x, int dx, ptrdiff_t src_stride, |
| 621 const uint16* src_ptr, uint16* dst_ptr) { |
| 622 int i; |
| 623 int boxwidth; |
| 624 for (i = 0; i < dst_width; ++i) { |
| 625 int ix = x >> 16; |
| 626 x += dx; |
| 627 boxwidth = (x >> 16) - ix; |
| 628 *dst_ptr++ = SumBox_16(boxwidth, boxheight, src_stride, src_ptr + ix) / |
| 629 (boxwidth * boxheight); |
| 630 } |
| 631 } |
| 632 |
| 633 static __inline uint32 SumPixels(int iboxwidth, const uint16* src_ptr) { |
| 634 uint32 sum = 0u; |
| 635 int x; |
| 636 assert(iboxwidth > 0); |
| 637 for (x = 0; x < iboxwidth; ++x) { |
| 638 sum += src_ptr[x]; |
| 639 } |
| 640 return sum; |
| 641 } |
| 642 |
| 643 static __inline uint32 SumPixels_16(int iboxwidth, const uint32* src_ptr) { |
| 644 uint32 sum = 0u; |
| 645 int x; |
| 646 assert(iboxwidth > 0); |
| 647 for (x = 0; x < iboxwidth; ++x) { |
| 648 sum += src_ptr[x]; |
| 649 } |
| 650 return sum; |
| 651 } |
| 652 |
| 653 static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, |
| 654 const uint16* src_ptr, uint8* dst_ptr) { |
| 655 int i; |
| 656 int scaletbl[2]; |
| 657 int minboxwidth = (dx >> 16); |
| 658 int* scaleptr = scaletbl - minboxwidth; |
| 659 int boxwidth; |
| 660 scaletbl[0] = 65536 / (minboxwidth * boxheight); |
| 661 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); |
| 662 for (i = 0; i < dst_width; ++i) { |
| 663 int ix = x >> 16; |
| 664 x += dx; |
| 665 boxwidth = (x >> 16) - ix; |
| 666 *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaleptr[boxwidth] >> 16; |
| 667 } |
| 668 } |
| 669 |
| 670 static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, |
| 671 const uint32* src_ptr, uint16* dst_ptr) { |
| 672 int i; |
| 673 int scaletbl[2]; |
| 674 int minboxwidth = (dx >> 16); |
| 675 int* scaleptr = scaletbl - minboxwidth; |
| 676 int boxwidth; |
| 677 scaletbl[0] = 65536 / (minboxwidth * boxheight); |
| 678 scaletbl[1] = 65536 / ((minboxwidth + 1) * boxheight); |
| 679 for (i = 0; i < dst_width; ++i) { |
| 680 int ix = x >> 16; |
| 681 x += dx; |
| 682 boxwidth = (x >> 16) - ix; |
| 683 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * |
| 684 scaleptr[boxwidth] >> 16; |
| 685 } |
| 686 } |
| 687 |
| 688 static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, |
| 689 const uint16* src_ptr, uint8* dst_ptr) { |
| 690 int boxwidth = (dx >> 16); |
| 691 int scaleval = 65536 / (boxwidth * boxheight); |
| 692 int i; |
| 693 for (i = 0; i < dst_width; ++i) { |
| 694 *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; |
| 695 x += boxwidth; |
| 696 } |
| 697 } |
| 698 |
| 699 static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, |
| 700 const uint32* src_ptr, uint16* dst_ptr) { |
| 701 int boxwidth = (dx >> 16); |
| 702 int scaleval = 65536 / (boxwidth * boxheight); |
| 703 int i; |
| 704 for (i = 0; i < dst_width; ++i) { |
| 705 *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; |
| 706 x += boxwidth; |
| 707 } |
| 708 } |
| 709 |
| 710 // Scale plane down to any dimensions, with interpolation. |
| 711 // (boxfilter). |
| 712 // |
| 713 // Same method as SimpleScale, which is fixed point, outputting |
| 714 // one pixel of destination using fixed point (16.16) to step |
| 715 // through source, sampling a box of pixel with simple |
| 716 // averaging. |
| 717 static void ScalePlaneBox(int src_width, int src_height, |
| 718 int dst_width, int dst_height, |
| 719 int src_stride, int dst_stride, |
| 720 const uint8* src_ptr, uint8* dst_ptr) { |
| 721 int j; |
| 722 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 723 int x = 0; |
| 724 int y = 0; |
| 725 int dx = 0; |
| 726 int dy = 0; |
| 727 const int max_y = (src_height << 16); |
| 728 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, |
| 729 &x, &y, &dx, &dy); |
| 730 src_width = Abs(src_width); |
| 731 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. |
| 732 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { |
| 733 uint8* dst = dst_ptr; |
| 734 int j; |
| 735 for (j = 0; j < dst_height; ++j) { |
| 736 int boxheight; |
| 737 int iy = y >> 16; |
| 738 const uint8* src = src_ptr + iy * src_stride; |
| 739 y += dy; |
| 740 if (y > max_y) { |
| 741 y = max_y; |
| 742 } |
| 743 boxheight = (y >> 16) - iy; |
| 744 ScalePlaneBoxRow_C(dst_width, boxheight, |
| 745 x, dx, src_stride, |
| 746 src, dst); |
| 747 dst += dst_stride; |
| 748 } |
| 749 return; |
| 750 } |
| 751 { |
| 752 // Allocate a row buffer of uint16. |
| 753 align_buffer_64(row16, src_width * 2); |
| 754 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, |
| 755 const uint16* src_ptr, uint8* dst_ptr) = |
| 756 (dx & 0xffff) ? ScaleAddCols2_C: ScaleAddCols1_C; |
| 757 void (*ScaleAddRows)(const uint8* src_ptr, ptrdiff_t src_stride, |
| 758 uint16* dst_ptr, int src_width, int src_height) = ScaleAddRows_C; |
| 759 |
| 760 #if defined(HAS_SCALEADDROWS_SSE2) |
| 761 if (TestCpuFlag(kCpuHasSSE2) && |
| 762 #ifdef AVOID_OVERREAD |
| 763 IS_ALIGNED(src_width, 16) && |
| 764 #endif |
| 765 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 766 ScaleAddRows = ScaleAddRows_SSE2; |
| 767 } |
| 768 #endif |
| 769 |
| 770 for (j = 0; j < dst_height; ++j) { |
| 771 int boxheight; |
| 772 int iy = y >> 16; |
| 773 const uint8* src = src_ptr + iy * src_stride; |
| 774 y += dy; |
| 775 if (y > (src_height << 16)) { |
| 776 y = (src_height << 16); |
| 777 } |
| 778 boxheight = (y >> 16) - iy; |
| 779 ScaleAddRows(src, src_stride, (uint16*)(row16), |
| 780 src_width, boxheight); |
| 781 ScaleAddCols(dst_width, boxheight, x, dx, (uint16*)(row16), |
| 782 dst_ptr); |
| 783 dst_ptr += dst_stride; |
| 784 } |
| 785 free_aligned_buffer_64(row16); |
| 786 } |
| 787 } |
| 788 |
| 789 static void ScalePlaneBox_16(int src_width, int src_height, |
| 790 int dst_width, int dst_height, |
| 791 int src_stride, int dst_stride, |
| 792 const uint16* src_ptr, uint16* dst_ptr) { |
| 793 int j; |
| 794 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 795 int x = 0; |
| 796 int y = 0; |
| 797 int dx = 0; |
| 798 int dy = 0; |
| 799 const int max_y = (src_height << 16); |
| 800 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, |
| 801 &x, &y, &dx, &dy); |
| 802 src_width = Abs(src_width); |
| 803 // TODO(fbarchard): Remove this and make AddRows handle boxheight 1. |
| 804 if (!IS_ALIGNED(src_width, 16) || dst_height * 2 > src_height) { |
| 805 uint16* dst = dst_ptr; |
| 806 int j; |
| 807 for (j = 0; j < dst_height; ++j) { |
| 808 int boxheight; |
| 809 int iy = y >> 16; |
| 810 const uint16* src = src_ptr + iy * src_stride; |
| 811 y += dy; |
| 812 if (y > max_y) { |
| 813 y = max_y; |
| 814 } |
| 815 boxheight = (y >> 16) - iy; |
| 816 ScalePlaneBoxRow_16_C(dst_width, boxheight, |
| 817 x, dx, src_stride, |
| 818 src, dst); |
| 819 dst += dst_stride; |
| 820 } |
| 821 return; |
| 822 } |
| 823 { |
| 824 // Allocate a row buffer of uint32. |
| 825 align_buffer_64(row32, src_width * 4); |
| 826 void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, |
| 827 const uint32* src_ptr, uint16* dst_ptr) = |
| 828 (dx & 0xffff) ? ScaleAddCols2_16_C: ScaleAddCols1_16_C; |
| 829 void (*ScaleAddRows)(const uint16* src_ptr, ptrdiff_t src_stride, |
| 830 uint32* dst_ptr, int src_width, int src_height) = ScaleAddRows_16_C; |
| 831 |
| 832 #if defined(HAS_SCALEADDROWS_16_SSE2) |
| 833 if (TestCpuFlag(kCpuHasSSE2) && |
| 834 #ifdef AVOID_OVERREAD |
| 835 IS_ALIGNED(src_width, 16) && |
| 836 #endif |
| 837 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 838 ScaleAddRows = ScaleAddRows_16_SSE2; |
| 839 } |
| 840 #endif |
| 841 |
| 842 for (j = 0; j < dst_height; ++j) { |
| 843 int boxheight; |
| 844 int iy = y >> 16; |
| 845 const uint16* src = src_ptr + iy * src_stride; |
| 846 y += dy; |
| 847 if (y > (src_height << 16)) { |
| 848 y = (src_height << 16); |
| 849 } |
| 850 boxheight = (y >> 16) - iy; |
| 851 ScaleAddRows(src, src_stride, (uint32*)(row32), |
| 852 src_width, boxheight); |
| 853 ScaleAddCols(dst_width, boxheight, x, dx, (uint32*)(row32), |
| 854 dst_ptr); |
| 855 dst_ptr += dst_stride; |
| 856 } |
| 857 free_aligned_buffer_64(row32); |
| 858 } |
| 859 } |
| 860 |
| 861 // Scale plane down with bilinear interpolation. |
| 862 void ScalePlaneBilinearDown(int src_width, int src_height, |
| 863 int dst_width, int dst_height, |
| 864 int src_stride, int dst_stride, |
| 865 const uint8* src_ptr, uint8* dst_ptr, |
| 866 enum FilterMode filtering) { |
| 867 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 868 int x = 0; |
| 869 int y = 0; |
| 870 int dx = 0; |
| 871 int dy = 0; |
| 872 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. |
| 873 // Allocate a row buffer. |
| 874 align_buffer_64(row, src_width); |
| 875 |
| 876 const int max_y = (src_height - 1) << 16; |
| 877 int j; |
| 878 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, |
| 879 int dst_width, int x, int dx) = |
| 880 (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; |
| 881 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, |
| 882 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
| 883 InterpolateRow_C; |
| 884 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
| 885 &x, &y, &dx, &dy); |
| 886 src_width = Abs(src_width); |
| 887 |
| 888 #if defined(HAS_INTERPOLATEROW_SSE2) |
| 889 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { |
| 890 InterpolateRow = InterpolateRow_Any_SSE2; |
| 891 if (IS_ALIGNED(src_width, 16)) { |
| 892 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
| 893 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 894 InterpolateRow = InterpolateRow_SSE2; |
| 895 } |
| 896 } |
| 897 } |
| 898 #endif |
| 899 #if defined(HAS_INTERPOLATEROW_SSSE3) |
| 900 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { |
| 901 InterpolateRow = InterpolateRow_Any_SSSE3; |
| 902 if (IS_ALIGNED(src_width, 16)) { |
| 903 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
| 904 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 905 InterpolateRow = InterpolateRow_SSSE3; |
| 906 } |
| 907 } |
| 908 } |
| 909 #endif |
| 910 #if defined(HAS_INTERPOLATEROW_AVX2) |
| 911 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { |
| 912 InterpolateRow = InterpolateRow_Any_AVX2; |
| 913 if (IS_ALIGNED(src_width, 32)) { |
| 914 InterpolateRow = InterpolateRow_AVX2; |
| 915 } |
| 916 } |
| 917 #endif |
| 918 #if defined(HAS_INTERPOLATEROW_NEON) |
| 919 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { |
| 920 InterpolateRow = InterpolateRow_Any_NEON; |
| 921 if (IS_ALIGNED(src_width, 16)) { |
| 922 InterpolateRow = InterpolateRow_NEON; |
| 923 } |
| 924 } |
| 925 #endif |
| 926 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) |
| 927 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { |
| 928 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; |
| 929 if (IS_ALIGNED(src_width, 4)) { |
| 930 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
| 931 } |
| 932 } |
| 933 #endif |
| 934 |
| 935 |
| 936 #if defined(HAS_SCALEFILTERCOLS_SSSE3) |
| 937 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
| 938 ScaleFilterCols = ScaleFilterCols_SSSE3; |
| 939 } |
| 940 #endif |
| 941 if (y > max_y) { |
| 942 y = max_y; |
| 943 } |
| 944 |
| 945 for (j = 0; j < dst_height; ++j) { |
| 946 int yi = y >> 16; |
| 947 const uint8* src = src_ptr + yi * src_stride; |
| 948 if (filtering == kFilterLinear) { |
| 949 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); |
| 950 } else { |
| 951 int yf = (y >> 8) & 255; |
| 952 InterpolateRow(row, src, src_stride, src_width, yf); |
| 953 ScaleFilterCols(dst_ptr, row, dst_width, x, dx); |
| 954 } |
| 955 dst_ptr += dst_stride; |
| 956 y += dy; |
| 957 if (y > max_y) { |
| 958 y = max_y; |
| 959 } |
| 960 } |
| 961 free_aligned_buffer_64(row); |
| 962 } |
| 963 |
| 964 void ScalePlaneBilinearDown_16(int src_width, int src_height, |
| 965 int dst_width, int dst_height, |
| 966 int src_stride, int dst_stride, |
| 967 const uint16* src_ptr, uint16* dst_ptr, |
| 968 enum FilterMode filtering) { |
| 969 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 970 int x = 0; |
| 971 int y = 0; |
| 972 int dx = 0; |
| 973 int dy = 0; |
| 974 // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. |
| 975 // Allocate a row buffer. |
| 976 align_buffer_64(row, src_width * 2); |
| 977 |
| 978 const int max_y = (src_height - 1) << 16; |
| 979 int j; |
| 980 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, |
| 981 int dst_width, int x, int dx) = |
| 982 (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; |
| 983 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, |
| 984 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
| 985 InterpolateRow_16_C; |
| 986 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
| 987 &x, &y, &dx, &dy); |
| 988 src_width = Abs(src_width); |
| 989 |
| 990 #if defined(HAS_INTERPOLATEROW_16_SSE2) |
| 991 if (TestCpuFlag(kCpuHasSSE2) && src_width >= 16) { |
| 992 InterpolateRow = InterpolateRow_Any_16_SSE2; |
| 993 if (IS_ALIGNED(src_width, 16)) { |
| 994 InterpolateRow = InterpolateRow_Unaligned_16_SSE2; |
| 995 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 996 InterpolateRow = InterpolateRow_16_SSE2; |
| 997 } |
| 998 } |
| 999 } |
| 1000 #endif |
| 1001 #if defined(HAS_INTERPOLATEROW_16_SSSE3) |
| 1002 if (TestCpuFlag(kCpuHasSSSE3) && src_width >= 16) { |
| 1003 InterpolateRow = InterpolateRow_Any_16_SSSE3; |
| 1004 if (IS_ALIGNED(src_width, 16)) { |
| 1005 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; |
| 1006 if (IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16)) { |
| 1007 InterpolateRow = InterpolateRow_16_SSSE3; |
| 1008 } |
| 1009 } |
| 1010 } |
| 1011 #endif |
| 1012 #if defined(HAS_INTERPOLATEROW_16_AVX2) |
| 1013 if (TestCpuFlag(kCpuHasAVX2) && src_width >= 32) { |
| 1014 InterpolateRow = InterpolateRow_Any_16_AVX2; |
| 1015 if (IS_ALIGNED(src_width, 32)) { |
| 1016 InterpolateRow = InterpolateRow_16_AVX2; |
| 1017 } |
| 1018 } |
| 1019 #endif |
| 1020 #if defined(HAS_INTERPOLATEROW_16_NEON) |
| 1021 if (TestCpuFlag(kCpuHasNEON) && src_width >= 16) { |
| 1022 InterpolateRow = InterpolateRow_Any_16_NEON; |
| 1023 if (IS_ALIGNED(src_width, 16)) { |
| 1024 InterpolateRow = InterpolateRow_16_NEON; |
| 1025 } |
| 1026 } |
| 1027 #endif |
| 1028 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) |
| 1029 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && src_width >= 4) { |
| 1030 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; |
| 1031 if (IS_ALIGNED(src_width, 4)) { |
| 1032 InterpolateRow = InterpolateRow_16_MIPS_DSPR2; |
| 1033 } |
| 1034 } |
| 1035 #endif |
| 1036 |
| 1037 |
| 1038 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) |
| 1039 if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
| 1040 ScaleFilterCols = ScaleFilterCols_16_SSSE3; |
| 1041 } |
| 1042 #endif |
| 1043 if (y > max_y) { |
| 1044 y = max_y; |
| 1045 } |
| 1046 |
| 1047 for (j = 0; j < dst_height; ++j) { |
| 1048 int yi = y >> 16; |
| 1049 const uint16* src = src_ptr + yi * src_stride; |
| 1050 if (filtering == kFilterLinear) { |
| 1051 ScaleFilterCols(dst_ptr, src, dst_width, x, dx); |
| 1052 } else { |
| 1053 int yf = (y >> 8) & 255; |
| 1054 InterpolateRow((uint16*)row, src, src_stride, src_width, yf); |
| 1055 ScaleFilterCols(dst_ptr, (uint16*)row, dst_width, x, dx); |
| 1056 } |
| 1057 dst_ptr += dst_stride; |
| 1058 y += dy; |
| 1059 if (y > max_y) { |
| 1060 y = max_y; |
| 1061 } |
| 1062 } |
| 1063 free_aligned_buffer_64(row); |
| 1064 } |
| 1065 |
| 1066 // Scale up down with bilinear interpolation. |
| 1067 void ScalePlaneBilinearUp(int src_width, int src_height, |
| 1068 int dst_width, int dst_height, |
| 1069 int src_stride, int dst_stride, |
| 1070 const uint8* src_ptr, uint8* dst_ptr, |
| 1071 enum FilterMode filtering) { |
| 1072 int j; |
| 1073 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 1074 int x = 0; |
| 1075 int y = 0; |
| 1076 int dx = 0; |
| 1077 int dy = 0; |
| 1078 const int max_y = (src_height - 1) << 16; |
| 1079 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, |
| 1080 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
| 1081 InterpolateRow_C; |
| 1082 void (*ScaleFilterCols)(uint8* dst_ptr, const uint8* src_ptr, |
| 1083 int dst_width, int x, int dx) = |
| 1084 filtering ? ScaleFilterCols_C : ScaleCols_C; |
| 1085 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
| 1086 &x, &y, &dx, &dy); |
| 1087 src_width = Abs(src_width); |
| 1088 |
| 1089 #if defined(HAS_INTERPOLATEROW_SSE2) |
| 1090 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { |
| 1091 InterpolateRow = InterpolateRow_Any_SSE2; |
| 1092 if (IS_ALIGNED(dst_width, 16)) { |
| 1093 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
| 1094 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1095 InterpolateRow = InterpolateRow_SSE2; |
| 1096 } |
| 1097 } |
| 1098 } |
| 1099 #endif |
| 1100 #if defined(HAS_INTERPOLATEROW_SSSE3) |
| 1101 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { |
| 1102 InterpolateRow = InterpolateRow_Any_SSSE3; |
| 1103 if (IS_ALIGNED(dst_width, 16)) { |
| 1104 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
| 1105 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1106 InterpolateRow = InterpolateRow_SSSE3; |
| 1107 } |
| 1108 } |
| 1109 } |
| 1110 #endif |
| 1111 #if defined(HAS_INTERPOLATEROW_AVX2) |
| 1112 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { |
| 1113 InterpolateRow = InterpolateRow_Any_AVX2; |
| 1114 if (IS_ALIGNED(dst_width, 32)) { |
| 1115 InterpolateRow = InterpolateRow_AVX2; |
| 1116 } |
| 1117 } |
| 1118 #endif |
| 1119 #if defined(HAS_INTERPOLATEROW_NEON) |
| 1120 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { |
| 1121 InterpolateRow = InterpolateRow_Any_NEON; |
| 1122 if (IS_ALIGNED(dst_width, 16)) { |
| 1123 InterpolateRow = InterpolateRow_NEON; |
| 1124 } |
| 1125 } |
| 1126 #endif |
| 1127 #if defined(HAS_INTERPOLATEROW_MIPS_DSPR2) |
| 1128 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { |
| 1129 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; |
| 1130 if (IS_ALIGNED(dst_width, 4)) { |
| 1131 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
| 1132 } |
| 1133 } |
| 1134 #endif |
| 1135 |
| 1136 if (filtering && src_width >= 32768) { |
| 1137 ScaleFilterCols = ScaleFilterCols64_C; |
| 1138 } |
| 1139 #if defined(HAS_SCALEFILTERCOLS_SSSE3) |
| 1140 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
| 1141 ScaleFilterCols = ScaleFilterCols_SSSE3; |
| 1142 } |
| 1143 #endif |
| 1144 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
| 1145 ScaleFilterCols = ScaleColsUp2_C; |
| 1146 #if defined(HAS_SCALECOLS_SSE2) |
| 1147 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
| 1148 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && |
| 1149 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1150 ScaleFilterCols = ScaleColsUp2_SSE2; |
| 1151 } |
| 1152 #endif |
| 1153 } |
| 1154 |
| 1155 if (y > max_y) { |
| 1156 y = max_y; |
| 1157 } |
| 1158 { |
| 1159 int yi = y >> 16; |
| 1160 const uint8* src = src_ptr + yi * src_stride; |
| 1161 |
| 1162 // Allocate 2 row buffers. |
| 1163 const int kRowSize = (dst_width + 15) & ~15; |
| 1164 align_buffer_64(row, kRowSize * 2); |
| 1165 |
| 1166 uint8* rowptr = row; |
| 1167 int rowstride = kRowSize; |
| 1168 int lasty = yi; |
| 1169 |
| 1170 ScaleFilterCols(rowptr, src, dst_width, x, dx); |
| 1171 if (src_height > 1) { |
| 1172 src += src_stride; |
| 1173 } |
| 1174 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); |
| 1175 src += src_stride; |
| 1176 |
| 1177 for (j = 0; j < dst_height; ++j) { |
| 1178 yi = y >> 16; |
| 1179 if (yi != lasty) { |
| 1180 if (y > max_y) { |
| 1181 y = max_y; |
| 1182 yi = y >> 16; |
| 1183 src = src_ptr + yi * src_stride; |
| 1184 } |
| 1185 if (yi != lasty) { |
| 1186 ScaleFilterCols(rowptr, src, dst_width, x, dx); |
| 1187 rowptr += rowstride; |
| 1188 rowstride = -rowstride; |
| 1189 lasty = yi; |
| 1190 src += src_stride; |
| 1191 } |
| 1192 } |
| 1193 if (filtering == kFilterLinear) { |
| 1194 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); |
| 1195 } else { |
| 1196 int yf = (y >> 8) & 255; |
| 1197 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); |
| 1198 } |
| 1199 dst_ptr += dst_stride; |
| 1200 y += dy; |
| 1201 } |
| 1202 free_aligned_buffer_64(row); |
| 1203 } |
| 1204 } |
| 1205 |
| 1206 void ScalePlaneBilinearUp_16(int src_width, int src_height, |
| 1207 int dst_width, int dst_height, |
| 1208 int src_stride, int dst_stride, |
| 1209 const uint16* src_ptr, uint16* dst_ptr, |
| 1210 enum FilterMode filtering) { |
| 1211 int j; |
| 1212 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 1213 int x = 0; |
| 1214 int y = 0; |
| 1215 int dx = 0; |
| 1216 int dy = 0; |
| 1217 const int max_y = (src_height - 1) << 16; |
| 1218 void (*InterpolateRow)(uint16* dst_ptr, const uint16* src_ptr, |
| 1219 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
| 1220 InterpolateRow_16_C; |
| 1221 void (*ScaleFilterCols)(uint16* dst_ptr, const uint16* src_ptr, |
| 1222 int dst_width, int x, int dx) = |
| 1223 filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; |
| 1224 ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, |
| 1225 &x, &y, &dx, &dy); |
| 1226 src_width = Abs(src_width); |
| 1227 |
| 1228 #if defined(HAS_INTERPOLATEROW_16_SSE2) |
| 1229 if (TestCpuFlag(kCpuHasSSE2) && dst_width >= 16) { |
| 1230 InterpolateRow = InterpolateRow_Any_16_SSE2; |
| 1231 if (IS_ALIGNED(dst_width, 16)) { |
| 1232 InterpolateRow = InterpolateRow_Unaligned_16_SSE2; |
| 1233 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1234 InterpolateRow = InterpolateRow_16_SSE2; |
| 1235 } |
| 1236 } |
| 1237 } |
| 1238 #endif |
| 1239 #if defined(HAS_INTERPOLATEROW_16_SSSE3) |
| 1240 if (TestCpuFlag(kCpuHasSSSE3) && dst_width >= 16) { |
| 1241 InterpolateRow = InterpolateRow_Any_16_SSSE3; |
| 1242 if (IS_ALIGNED(dst_width, 16)) { |
| 1243 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; |
| 1244 if (IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1245 InterpolateRow = InterpolateRow_16_SSSE3; |
| 1246 } |
| 1247 } |
| 1248 } |
| 1249 #endif |
| 1250 #if defined(HAS_INTERPOLATEROW_16_AVX2) |
| 1251 if (TestCpuFlag(kCpuHasAVX2) && dst_width >= 32) { |
| 1252 InterpolateRow = InterpolateRow_Any_16_AVX2; |
| 1253 if (IS_ALIGNED(dst_width, 32)) { |
| 1254 InterpolateRow = InterpolateRow_16_AVX2; |
| 1255 } |
| 1256 } |
| 1257 #endif |
| 1258 #if defined(HAS_INTERPOLATEROW_16_NEON) |
| 1259 if (TestCpuFlag(kCpuHasNEON) && dst_width >= 16) { |
| 1260 InterpolateRow = InterpolateRow_Any_16_NEON; |
| 1261 if (IS_ALIGNED(dst_width, 16)) { |
| 1262 InterpolateRow = InterpolateRow_16_NEON; |
| 1263 } |
| 1264 } |
| 1265 #endif |
| 1266 #if defined(HAS_INTERPOLATEROW_16_MIPS_DSPR2) |
| 1267 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width >= 4) { |
| 1268 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; |
| 1269 if (IS_ALIGNED(dst_width, 4)) { |
| 1270 InterpolateRow = InterpolateRow_16_MIPS_DSPR2; |
| 1271 } |
| 1272 } |
| 1273 #endif |
| 1274 |
| 1275 if (filtering && src_width >= 32768) { |
| 1276 ScaleFilterCols = ScaleFilterCols64_16_C; |
| 1277 } |
| 1278 #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) |
| 1279 if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { |
| 1280 ScaleFilterCols = ScaleFilterCols_16_SSSE3; |
| 1281 } |
| 1282 #endif |
| 1283 if (!filtering && src_width * 2 == dst_width && x < 0x8000) { |
| 1284 ScaleFilterCols = ScaleColsUp2_16_C; |
| 1285 #if defined(HAS_SCALECOLS_16_SSE2) |
| 1286 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
| 1287 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && |
| 1288 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1289 ScaleFilterCols = ScaleColsUp2_16_SSE2; |
| 1290 } |
| 1291 #endif |
| 1292 } |
| 1293 |
| 1294 if (y > max_y) { |
| 1295 y = max_y; |
| 1296 } |
| 1297 { |
| 1298 int yi = y >> 16; |
| 1299 const uint16* src = src_ptr + yi * src_stride; |
| 1300 |
| 1301 // Allocate 2 row buffers. |
| 1302 const int kRowSize = (dst_width + 15) & ~15; |
| 1303 align_buffer_64(row, kRowSize * 4); |
| 1304 |
| 1305 uint16* rowptr = (uint16*)row; |
| 1306 int rowstride = kRowSize; |
| 1307 int lasty = yi; |
| 1308 |
| 1309 ScaleFilterCols(rowptr, src, dst_width, x, dx); |
| 1310 if (src_height > 1) { |
| 1311 src += src_stride; |
| 1312 } |
| 1313 ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); |
| 1314 src += src_stride; |
| 1315 |
| 1316 for (j = 0; j < dst_height; ++j) { |
| 1317 yi = y >> 16; |
| 1318 if (yi != lasty) { |
| 1319 if (y > max_y) { |
| 1320 y = max_y; |
| 1321 yi = y >> 16; |
| 1322 src = src_ptr + yi * src_stride; |
| 1323 } |
| 1324 if (yi != lasty) { |
| 1325 ScaleFilterCols(rowptr, src, dst_width, x, dx); |
| 1326 rowptr += rowstride; |
| 1327 rowstride = -rowstride; |
| 1328 lasty = yi; |
| 1329 src += src_stride; |
| 1330 } |
| 1331 } |
| 1332 if (filtering == kFilterLinear) { |
| 1333 InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); |
| 1334 } else { |
| 1335 int yf = (y >> 8) & 255; |
| 1336 InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); |
| 1337 } |
| 1338 dst_ptr += dst_stride; |
| 1339 y += dy; |
| 1340 } |
| 1341 free_aligned_buffer_64(row); |
| 1342 } |
| 1343 } |
| 1344 |
| 1345 // Scale Plane to/from any dimensions, without interpolation. |
| 1346 // Fixed point math is used for performance: The upper 16 bits |
| 1347 // of x and dx is the integer part of the source position and |
| 1348 // the lower 16 bits are the fixed decimal part. |
| 1349 |
| 1350 static void ScalePlaneSimple(int src_width, int src_height, |
| 1351 int dst_width, int dst_height, |
| 1352 int src_stride, int dst_stride, |
| 1353 const uint8* src_ptr, uint8* dst_ptr) { |
| 1354 int i; |
| 1355 void (*ScaleCols)(uint8* dst_ptr, const uint8* src_ptr, |
| 1356 int dst_width, int x, int dx) = ScaleCols_C; |
| 1357 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 1358 int x = 0; |
| 1359 int y = 0; |
| 1360 int dx = 0; |
| 1361 int dy = 0; |
| 1362 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, |
| 1363 &x, &y, &dx, &dy); |
| 1364 src_width = Abs(src_width); |
| 1365 |
| 1366 if (src_width * 2 == dst_width && x < 0x8000) { |
| 1367 ScaleCols = ScaleColsUp2_C; |
| 1368 #if defined(HAS_SCALECOLS_SSE2) |
| 1369 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
| 1370 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && |
| 1371 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1372 ScaleCols = ScaleColsUp2_SSE2; |
| 1373 } |
| 1374 #endif |
| 1375 } |
| 1376 |
| 1377 for (i = 0; i < dst_height; ++i) { |
| 1378 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, |
| 1379 dst_width, x, dx); |
| 1380 dst_ptr += dst_stride; |
| 1381 y += dy; |
| 1382 } |
| 1383 } |
| 1384 |
| 1385 static void ScalePlaneSimple_16(int src_width, int src_height, |
| 1386 int dst_width, int dst_height, |
| 1387 int src_stride, int dst_stride, |
| 1388 const uint16* src_ptr, uint16* dst_ptr) { |
| 1389 int i; |
| 1390 void (*ScaleCols)(uint16* dst_ptr, const uint16* src_ptr, |
| 1391 int dst_width, int x, int dx) = ScaleCols_16_C; |
| 1392 // Initial source x/y coordinate and step values as 16.16 fixed point. |
| 1393 int x = 0; |
| 1394 int y = 0; |
| 1395 int dx = 0; |
| 1396 int dy = 0; |
| 1397 ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, |
| 1398 &x, &y, &dx, &dy); |
| 1399 src_width = Abs(src_width); |
| 1400 |
| 1401 if (src_width * 2 == dst_width && x < 0x8000) { |
| 1402 ScaleCols = ScaleColsUp2_16_C; |
| 1403 #if defined(HAS_SCALECOLS_16_SSE2) |
| 1404 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8) && |
| 1405 IS_ALIGNED(src_ptr, 16) && IS_ALIGNED(src_stride, 16) && |
| 1406 IS_ALIGNED(dst_ptr, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 1407 ScaleCols = ScaleColsUp2_16_SSE2; |
| 1408 } |
| 1409 #endif |
| 1410 } |
| 1411 |
| 1412 for (i = 0; i < dst_height; ++i) { |
| 1413 ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, |
| 1414 dst_width, x, dx); |
| 1415 dst_ptr += dst_stride; |
| 1416 y += dy; |
| 1417 } |
| 1418 } |
| 1419 |
| 1420 // Scale a plane. |
| 1421 // This function dispatches to a specialized scaler based on scale factor. |
| 1422 |
| 1423 LIBYUV_API |
| 1424 void ScalePlane(const uint8* src, int src_stride, |
| 1425 int src_width, int src_height, |
| 1426 uint8* dst, int dst_stride, |
| 1427 int dst_width, int dst_height, |
| 1428 enum FilterMode filtering) { |
| 1429 // Simplify filtering when possible. |
| 1430 filtering = ScaleFilterReduce(src_width, src_height, |
| 1431 dst_width, dst_height, |
| 1432 filtering); |
| 1433 |
| 1434 // Negative height means invert the image. |
| 1435 if (src_height < 0) { |
| 1436 src_height = -src_height; |
| 1437 src = src + (src_height - 1) * src_stride; |
| 1438 src_stride = -src_stride; |
| 1439 } |
| 1440 |
| 1441 // Use specialized scales to improve performance for common resolutions. |
| 1442 // For example, all the 1/2 scalings will use ScalePlaneDown2() |
| 1443 if (dst_width == src_width && dst_height == src_height) { |
| 1444 // Straight copy. |
| 1445 CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); |
| 1446 return; |
| 1447 } |
| 1448 if (dst_width == src_width) { |
| 1449 int dy = FixedDiv(src_height, dst_height); |
| 1450 // Arbitrary scale vertically, but unscaled vertically. |
| 1451 ScalePlaneVertical(src_height, |
| 1452 dst_width, dst_height, |
| 1453 src_stride, dst_stride, src, dst, |
| 1454 0, 0, dy, 1, filtering); |
| 1455 return; |
| 1456 } |
| 1457 if (dst_width <= Abs(src_width) && dst_height <= src_height) { |
| 1458 // Scale down. |
| 1459 if (4 * dst_width == 3 * src_width && |
| 1460 4 * dst_height == 3 * src_height) { |
| 1461 // optimized, 3/4 |
| 1462 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, |
| 1463 src_stride, dst_stride, src, dst, filtering); |
| 1464 return; |
| 1465 } |
| 1466 if (2 * dst_width == src_width && 2 * dst_height == src_height) { |
| 1467 // optimized, 1/2 |
| 1468 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, |
| 1469 src_stride, dst_stride, src, dst, filtering); |
| 1470 return; |
| 1471 } |
| 1472 // 3/8 rounded up for odd sized chroma height. |
| 1473 if (8 * dst_width == 3 * src_width && |
| 1474 dst_height == ((src_height * 3 + 7) / 8)) { |
| 1475 // optimized, 3/8 |
| 1476 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, |
| 1477 src_stride, dst_stride, src, dst, filtering); |
| 1478 return; |
| 1479 } |
| 1480 if (4 * dst_width == src_width && 4 * dst_height == src_height && |
| 1481 filtering != kFilterBilinear) { |
| 1482 // optimized, 1/4 |
| 1483 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, |
| 1484 src_stride, dst_stride, src, dst, filtering); |
| 1485 return; |
| 1486 } |
| 1487 } |
| 1488 if (filtering == kFilterBox && dst_height * 2 < src_height) { |
| 1489 ScalePlaneBox(src_width, src_height, dst_width, dst_height, |
| 1490 src_stride, dst_stride, src, dst); |
| 1491 return; |
| 1492 } |
| 1493 if (filtering && dst_height > src_height) { |
| 1494 ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, |
| 1495 src_stride, dst_stride, src, dst, filtering); |
| 1496 return; |
| 1497 } |
| 1498 if (filtering) { |
| 1499 ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, |
| 1500 src_stride, dst_stride, src, dst, filtering); |
| 1501 return; |
| 1502 } |
| 1503 ScalePlaneSimple(src_width, src_height, dst_width, dst_height, |
| 1504 src_stride, dst_stride, src, dst); |
| 1505 } |
| 1506 |
| 1507 LIBYUV_API |
| 1508 void ScalePlane_16(const uint16* src, int src_stride, |
| 1509 int src_width, int src_height, |
| 1510 uint16* dst, int dst_stride, |
| 1511 int dst_width, int dst_height, |
| 1512 enum FilterMode filtering) { |
| 1513 // Simplify filtering when possible. |
| 1514 filtering = ScaleFilterReduce(src_width, src_height, |
| 1515 dst_width, dst_height, |
| 1516 filtering); |
| 1517 |
| 1518 // Negative height means invert the image. |
| 1519 if (src_height < 0) { |
| 1520 src_height = -src_height; |
| 1521 src = src + (src_height - 1) * src_stride; |
| 1522 src_stride = -src_stride; |
| 1523 } |
| 1524 |
| 1525 // Use specialized scales to improve performance for common resolutions. |
| 1526 // For example, all the 1/2 scalings will use ScalePlaneDown2() |
| 1527 if (dst_width == src_width && dst_height == src_height) { |
| 1528 // Straight copy. |
| 1529 CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); |
| 1530 return; |
| 1531 } |
| 1532 if (dst_width == src_width) { |
| 1533 int dy = FixedDiv(src_height, dst_height); |
| 1534 // Arbitrary scale vertically, but unscaled vertically. |
| 1535 ScalePlaneVertical_16(src_height, |
| 1536 dst_width, dst_height, |
| 1537 src_stride, dst_stride, src, dst, |
| 1538 0, 0, dy, 1, filtering); |
| 1539 return; |
| 1540 } |
| 1541 if (dst_width <= Abs(src_width) && dst_height <= src_height) { |
| 1542 // Scale down. |
| 1543 if (4 * dst_width == 3 * src_width && |
| 1544 4 * dst_height == 3 * src_height) { |
| 1545 // optimized, 3/4 |
| 1546 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, |
| 1547 src_stride, dst_stride, src, dst, filtering); |
| 1548 return; |
| 1549 } |
| 1550 if (2 * dst_width == src_width && 2 * dst_height == src_height) { |
| 1551 // optimized, 1/2 |
| 1552 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, |
| 1553 src_stride, dst_stride, src, dst, filtering); |
| 1554 return; |
| 1555 } |
| 1556 // 3/8 rounded up for odd sized chroma height. |
| 1557 if (8 * dst_width == 3 * src_width && |
| 1558 dst_height == ((src_height * 3 + 7) / 8)) { |
| 1559 // optimized, 3/8 |
| 1560 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, |
| 1561 src_stride, dst_stride, src, dst, filtering); |
| 1562 return; |
| 1563 } |
| 1564 if (4 * dst_width == src_width && 4 * dst_height == src_height && |
| 1565 filtering != kFilterBilinear) { |
| 1566 // optimized, 1/4 |
| 1567 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, |
| 1568 src_stride, dst_stride, src, dst, filtering); |
| 1569 return; |
| 1570 } |
| 1571 } |
| 1572 if (filtering == kFilterBox && dst_height * 2 < src_height) { |
| 1573 ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, |
| 1574 src_stride, dst_stride, src, dst); |
| 1575 return; |
| 1576 } |
| 1577 if (filtering && dst_height > src_height) { |
| 1578 ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, |
| 1579 src_stride, dst_stride, src, dst, filtering); |
| 1580 return; |
| 1581 } |
| 1582 if (filtering) { |
| 1583 ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, |
| 1584 src_stride, dst_stride, src, dst, filtering); |
| 1585 return; |
| 1586 } |
| 1587 ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, |
| 1588 src_stride, dst_stride, src, dst); |
| 1589 } |
| 1590 |
| 1591 // Scale an I420 image. |
| 1592 // This function in turn calls a scaling function for each plane. |
| 1593 |
| 1594 LIBYUV_API |
| 1595 int I420Scale(const uint8* src_y, int src_stride_y, |
| 1596 const uint8* src_u, int src_stride_u, |
| 1597 const uint8* src_v, int src_stride_v, |
| 1598 int src_width, int src_height, |
| 1599 uint8* dst_y, int dst_stride_y, |
| 1600 uint8* dst_u, int dst_stride_u, |
| 1601 uint8* dst_v, int dst_stride_v, |
| 1602 int dst_width, int dst_height, |
| 1603 enum FilterMode filtering) { |
| 1604 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
| 1605 int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
| 1606 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
| 1607 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
| 1608 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || |
| 1609 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { |
| 1610 return -1; |
| 1611 } |
| 1612 |
| 1613 ScalePlane(src_y, src_stride_y, src_width, src_height, |
| 1614 dst_y, dst_stride_y, dst_width, dst_height, |
| 1615 filtering); |
| 1616 ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, |
| 1617 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, |
| 1618 filtering); |
| 1619 ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, |
| 1620 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, |
| 1621 filtering); |
| 1622 return 0; |
| 1623 } |
| 1624 |
| 1625 LIBYUV_API |
| 1626 int I420Scale_16(const uint16* src_y, int src_stride_y, |
| 1627 const uint16* src_u, int src_stride_u, |
| 1628 const uint16* src_v, int src_stride_v, |
| 1629 int src_width, int src_height, |
| 1630 uint16* dst_y, int dst_stride_y, |
| 1631 uint16* dst_u, int dst_stride_u, |
| 1632 uint16* dst_v, int dst_stride_v, |
| 1633 int dst_width, int dst_height, |
| 1634 enum FilterMode filtering) { |
| 1635 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
| 1636 int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
| 1637 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
| 1638 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
| 1639 if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || |
| 1640 !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { |
| 1641 return -1; |
| 1642 } |
| 1643 |
| 1644 ScalePlane_16(src_y, src_stride_y, src_width, src_height, |
| 1645 dst_y, dst_stride_y, dst_width, dst_height, |
| 1646 filtering); |
| 1647 ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, |
| 1648 dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, |
| 1649 filtering); |
| 1650 ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, |
| 1651 dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, |
| 1652 filtering); |
| 1653 return 0; |
| 1654 } |
| 1655 |
| 1656 // Deprecated api |
| 1657 LIBYUV_API |
| 1658 int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v, |
| 1659 int src_stride_y, int src_stride_u, int src_stride_v, |
| 1660 int src_width, int src_height, |
| 1661 uint8* dst_y, uint8* dst_u, uint8* dst_v, |
| 1662 int dst_stride_y, int dst_stride_u, int dst_stride_v, |
| 1663 int dst_width, int dst_height, |
| 1664 LIBYUV_BOOL interpolate) { |
| 1665 return I420Scale(src_y, src_stride_y, |
| 1666 src_u, src_stride_u, |
| 1667 src_v, src_stride_v, |
| 1668 src_width, src_height, |
| 1669 dst_y, dst_stride_y, |
| 1670 dst_u, dst_stride_u, |
| 1671 dst_v, dst_stride_v, |
| 1672 dst_width, dst_height, |
| 1673 interpolate ? kFilterBox : kFilterNone); |
| 1674 } |
| 1675 |
| 1676 // Deprecated api |
| 1677 LIBYUV_API |
| 1678 int ScaleOffset(const uint8* src, int src_width, int src_height, |
| 1679 uint8* dst, int dst_width, int dst_height, int dst_yoffset, |
| 1680 LIBYUV_BOOL interpolate) { |
| 1681 // Chroma requires offset to multiple of 2. |
| 1682 int dst_yoffset_even = dst_yoffset & ~1; |
| 1683 int src_halfwidth = SUBSAMPLE(src_width, 1, 1); |
| 1684 int src_halfheight = SUBSAMPLE(src_height, 1, 1); |
| 1685 int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); |
| 1686 int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); |
| 1687 int aheight = dst_height - dst_yoffset_even * 2; // actual output height |
| 1688 const uint8* src_y = src; |
| 1689 const uint8* src_u = src + src_width * src_height; |
| 1690 const uint8* src_v = src + src_width * src_height + |
| 1691 src_halfwidth * src_halfheight; |
| 1692 uint8* dst_y = dst + dst_yoffset_even * dst_width; |
| 1693 uint8* dst_u = dst + dst_width * dst_height + |
| 1694 (dst_yoffset_even >> 1) * dst_halfwidth; |
| 1695 uint8* dst_v = dst + dst_width * dst_height + dst_halfwidth * dst_halfheight + |
| 1696 (dst_yoffset_even >> 1) * dst_halfwidth; |
| 1697 if (!src || src_width <= 0 || src_height <= 0 || |
| 1698 !dst || dst_width <= 0 || dst_height <= 0 || dst_yoffset_even < 0 || |
| 1699 dst_yoffset_even >= dst_height) { |
| 1700 return -1; |
| 1701 } |
| 1702 return I420Scale(src_y, src_width, |
| 1703 src_u, src_halfwidth, |
| 1704 src_v, src_halfwidth, |
| 1705 src_width, src_height, |
| 1706 dst_y, dst_width, |
| 1707 dst_u, dst_halfwidth, |
| 1708 dst_v, dst_halfwidth, |
| 1709 dst_width, aheight, |
| 1710 interpolate ? kFilterBox : kFilterNone); |
| 1711 } |
| 1712 |
| 1713 #ifdef __cplusplus |
| 1714 } // extern "C" |
| 1715 } // namespace libyuv |
| 1716 #endif |
OLD | NEW |