OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "third_party/libyuv/include/libyuv/scale.h" |
| 12 |
| 13 #include <assert.h> |
| 14 #include <string.h> |
| 15 |
| 16 #include "third_party/libyuv/include/libyuv/cpu_id.h" |
| 17 #include "third_party/libyuv/include/libyuv/planar_functions.h" // CopyARGB |
| 18 #include "third_party/libyuv/include/libyuv/row.h" |
| 19 #include "third_party/libyuv/include/libyuv/scale_row.h" |
| 20 |
| 21 #ifdef __cplusplus |
| 22 namespace libyuv { |
| 23 extern "C" { |
| 24 #endif |
| 25 |
| 26 static __inline int Abs(int v) { |
| 27 return v >= 0 ? v : -v; |
| 28 } |
| 29 |
| 30 // CPU agnostic row functions |
| 31 void ScaleRowDown2_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 32 uint8* dst, int dst_width) { |
| 33 int x; |
| 34 for (x = 0; x < dst_width - 1; x += 2) { |
| 35 dst[0] = src_ptr[1]; |
| 36 dst[1] = src_ptr[3]; |
| 37 dst += 2; |
| 38 src_ptr += 4; |
| 39 } |
| 40 if (dst_width & 1) { |
| 41 dst[0] = src_ptr[1]; |
| 42 } |
| 43 } |
| 44 |
| 45 void ScaleRowDown2_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 46 uint16* dst, int dst_width) { |
| 47 int x; |
| 48 for (x = 0; x < dst_width - 1; x += 2) { |
| 49 dst[0] = src_ptr[1]; |
| 50 dst[1] = src_ptr[3]; |
| 51 dst += 2; |
| 52 src_ptr += 4; |
| 53 } |
| 54 if (dst_width & 1) { |
| 55 dst[0] = src_ptr[1]; |
| 56 } |
| 57 } |
| 58 |
| 59 void ScaleRowDown2Linear_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 60 uint8* dst, int dst_width) { |
| 61 const uint8* s = src_ptr; |
| 62 int x; |
| 63 for (x = 0; x < dst_width - 1; x += 2) { |
| 64 dst[0] = (s[0] + s[1] + 1) >> 1; |
| 65 dst[1] = (s[2] + s[3] + 1) >> 1; |
| 66 dst += 2; |
| 67 s += 4; |
| 68 } |
| 69 if (dst_width & 1) { |
| 70 dst[0] = (s[0] + s[1] + 1) >> 1; |
| 71 } |
| 72 } |
| 73 |
| 74 void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 75 uint16* dst, int dst_width) { |
| 76 const uint16* s = src_ptr; |
| 77 int x; |
| 78 for (x = 0; x < dst_width - 1; x += 2) { |
| 79 dst[0] = (s[0] + s[1] + 1) >> 1; |
| 80 dst[1] = (s[2] + s[3] + 1) >> 1; |
| 81 dst += 2; |
| 82 s += 4; |
| 83 } |
| 84 if (dst_width & 1) { |
| 85 dst[0] = (s[0] + s[1] + 1) >> 1; |
| 86 } |
| 87 } |
| 88 |
| 89 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 90 uint8* dst, int dst_width) { |
| 91 const uint8* s = src_ptr; |
| 92 const uint8* t = src_ptr + src_stride; |
| 93 int x; |
| 94 for (x = 0; x < dst_width - 1; x += 2) { |
| 95 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
| 96 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; |
| 97 dst += 2; |
| 98 s += 4; |
| 99 t += 4; |
| 100 } |
| 101 if (dst_width & 1) { |
| 102 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
| 103 } |
| 104 } |
| 105 |
| 106 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 107 uint16* dst, int dst_width) { |
| 108 const uint16* s = src_ptr; |
| 109 const uint16* t = src_ptr + src_stride; |
| 110 int x; |
| 111 for (x = 0; x < dst_width - 1; x += 2) { |
| 112 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
| 113 dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; |
| 114 dst += 2; |
| 115 s += 4; |
| 116 t += 4; |
| 117 } |
| 118 if (dst_width & 1) { |
| 119 dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; |
| 120 } |
| 121 } |
| 122 |
| 123 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 124 uint8* dst, int dst_width) { |
| 125 int x; |
| 126 for (x = 0; x < dst_width - 1; x += 2) { |
| 127 dst[0] = src_ptr[2]; |
| 128 dst[1] = src_ptr[6]; |
| 129 dst += 2; |
| 130 src_ptr += 8; |
| 131 } |
| 132 if (dst_width & 1) { |
| 133 dst[0] = src_ptr[2]; |
| 134 } |
| 135 } |
| 136 |
| 137 void ScaleRowDown4_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 138 uint16* dst, int dst_width) { |
| 139 int x; |
| 140 for (x = 0; x < dst_width - 1; x += 2) { |
| 141 dst[0] = src_ptr[2]; |
| 142 dst[1] = src_ptr[6]; |
| 143 dst += 2; |
| 144 src_ptr += 8; |
| 145 } |
| 146 if (dst_width & 1) { |
| 147 dst[0] = src_ptr[2]; |
| 148 } |
| 149 } |
| 150 |
| 151 void ScaleRowDown4Box_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 152 uint8* dst, int dst_width) { |
| 153 intptr_t stride = src_stride; |
| 154 int x; |
| 155 for (x = 0; x < dst_width - 1; x += 2) { |
| 156 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
| 157 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 158 src_ptr[stride + 2] + src_ptr[stride + 3] + |
| 159 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + |
| 160 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + |
| 161 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + |
| 162 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + |
| 163 8) >> 4; |
| 164 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + |
| 165 src_ptr[stride + 4] + src_ptr[stride + 5] + |
| 166 src_ptr[stride + 6] + src_ptr[stride + 7] + |
| 167 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + |
| 168 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + |
| 169 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + |
| 170 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + |
| 171 8) >> 4; |
| 172 dst += 2; |
| 173 src_ptr += 8; |
| 174 } |
| 175 if (dst_width & 1) { |
| 176 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
| 177 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 178 src_ptr[stride + 2] + src_ptr[stride + 3] + |
| 179 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + |
| 180 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + |
| 181 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + |
| 182 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + |
| 183 8) >> 4; |
| 184 } |
| 185 } |
| 186 |
| 187 void ScaleRowDown4Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 188 uint16* dst, int dst_width) { |
| 189 intptr_t stride = src_stride; |
| 190 int x; |
| 191 for (x = 0; x < dst_width - 1; x += 2) { |
| 192 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
| 193 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 194 src_ptr[stride + 2] + src_ptr[stride + 3] + |
| 195 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + |
| 196 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + |
| 197 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + |
| 198 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + |
| 199 8) >> 4; |
| 200 dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + |
| 201 src_ptr[stride + 4] + src_ptr[stride + 5] + |
| 202 src_ptr[stride + 6] + src_ptr[stride + 7] + |
| 203 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + |
| 204 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + |
| 205 src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + |
| 206 src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + |
| 207 8) >> 4; |
| 208 dst += 2; |
| 209 src_ptr += 8; |
| 210 } |
| 211 if (dst_width & 1) { |
| 212 dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + |
| 213 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 214 src_ptr[stride + 2] + src_ptr[stride + 3] + |
| 215 src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + |
| 216 src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + |
| 217 src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + |
| 218 src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + |
| 219 8) >> 4; |
| 220 } |
| 221 } |
| 222 |
| 223 void ScaleRowDown34_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 224 uint8* dst, int dst_width) { |
| 225 int x; |
| 226 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 227 for (x = 0; x < dst_width; x += 3) { |
| 228 dst[0] = src_ptr[0]; |
| 229 dst[1] = src_ptr[1]; |
| 230 dst[2] = src_ptr[3]; |
| 231 dst += 3; |
| 232 src_ptr += 4; |
| 233 } |
| 234 } |
| 235 |
| 236 void ScaleRowDown34_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 237 uint16* dst, int dst_width) { |
| 238 int x; |
| 239 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 240 for (x = 0; x < dst_width; x += 3) { |
| 241 dst[0] = src_ptr[0]; |
| 242 dst[1] = src_ptr[1]; |
| 243 dst[2] = src_ptr[3]; |
| 244 dst += 3; |
| 245 src_ptr += 4; |
| 246 } |
| 247 } |
| 248 |
| 249 // Filter rows 0 and 1 together, 3 : 1 |
| 250 void ScaleRowDown34_0_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 251 uint8* d, int dst_width) { |
| 252 const uint8* s = src_ptr; |
| 253 const uint8* t = src_ptr + src_stride; |
| 254 int x; |
| 255 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 256 for (x = 0; x < dst_width; x += 3) { |
| 257 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
| 258 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
| 259 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
| 260 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
| 261 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
| 262 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
| 263 d[0] = (a0 * 3 + b0 + 2) >> 2; |
| 264 d[1] = (a1 * 3 + b1 + 2) >> 2; |
| 265 d[2] = (a2 * 3 + b2 + 2) >> 2; |
| 266 d += 3; |
| 267 s += 4; |
| 268 t += 4; |
| 269 } |
| 270 } |
| 271 |
| 272 void ScaleRowDown34_0_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 273 uint16* d, int dst_width) { |
| 274 const uint16* s = src_ptr; |
| 275 const uint16* t = src_ptr + src_stride; |
| 276 int x; |
| 277 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 278 for (x = 0; x < dst_width; x += 3) { |
| 279 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
| 280 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
| 281 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
| 282 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
| 283 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
| 284 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
| 285 d[0] = (a0 * 3 + b0 + 2) >> 2; |
| 286 d[1] = (a1 * 3 + b1 + 2) >> 2; |
| 287 d[2] = (a2 * 3 + b2 + 2) >> 2; |
| 288 d += 3; |
| 289 s += 4; |
| 290 t += 4; |
| 291 } |
| 292 } |
| 293 |
| 294 // Filter rows 1 and 2 together, 1 : 1 |
| 295 void ScaleRowDown34_1_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 296 uint8* d, int dst_width) { |
| 297 const uint8* s = src_ptr; |
| 298 const uint8* t = src_ptr + src_stride; |
| 299 int x; |
| 300 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 301 for (x = 0; x < dst_width; x += 3) { |
| 302 uint8 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
| 303 uint8 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
| 304 uint8 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
| 305 uint8 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
| 306 uint8 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
| 307 uint8 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
| 308 d[0] = (a0 + b0 + 1) >> 1; |
| 309 d[1] = (a1 + b1 + 1) >> 1; |
| 310 d[2] = (a2 + b2 + 1) >> 1; |
| 311 d += 3; |
| 312 s += 4; |
| 313 t += 4; |
| 314 } |
| 315 } |
| 316 |
| 317 void ScaleRowDown34_1_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 318 uint16* d, int dst_width) { |
| 319 const uint16* s = src_ptr; |
| 320 const uint16* t = src_ptr + src_stride; |
| 321 int x; |
| 322 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 323 for (x = 0; x < dst_width; x += 3) { |
| 324 uint16 a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; |
| 325 uint16 a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; |
| 326 uint16 a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; |
| 327 uint16 b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; |
| 328 uint16 b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; |
| 329 uint16 b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; |
| 330 d[0] = (a0 + b0 + 1) >> 1; |
| 331 d[1] = (a1 + b1 + 1) >> 1; |
| 332 d[2] = (a2 + b2 + 1) >> 1; |
| 333 d += 3; |
| 334 s += 4; |
| 335 t += 4; |
| 336 } |
| 337 } |
| 338 |
| 339 // Scales a single row of pixels using point sampling. |
| 340 void ScaleCols_C(uint8* dst_ptr, const uint8* src_ptr, |
| 341 int dst_width, int x, int dx) { |
| 342 int j; |
| 343 for (j = 0; j < dst_width - 1; j += 2) { |
| 344 dst_ptr[0] = src_ptr[x >> 16]; |
| 345 x += dx; |
| 346 dst_ptr[1] = src_ptr[x >> 16]; |
| 347 x += dx; |
| 348 dst_ptr += 2; |
| 349 } |
| 350 if (dst_width & 1) { |
| 351 dst_ptr[0] = src_ptr[x >> 16]; |
| 352 } |
| 353 } |
| 354 |
| 355 void ScaleCols_16_C(uint16* dst_ptr, const uint16* src_ptr, |
| 356 int dst_width, int x, int dx) { |
| 357 int j; |
| 358 for (j = 0; j < dst_width - 1; j += 2) { |
| 359 dst_ptr[0] = src_ptr[x >> 16]; |
| 360 x += dx; |
| 361 dst_ptr[1] = src_ptr[x >> 16]; |
| 362 x += dx; |
| 363 dst_ptr += 2; |
| 364 } |
| 365 if (dst_width & 1) { |
| 366 dst_ptr[0] = src_ptr[x >> 16]; |
| 367 } |
| 368 } |
| 369 |
| 370 // Scales a single row of pixels up by 2x using point sampling. |
| 371 void ScaleColsUp2_C(uint8* dst_ptr, const uint8* src_ptr, |
| 372 int dst_width, int x, int dx) { |
| 373 int j; |
| 374 for (j = 0; j < dst_width - 1; j += 2) { |
| 375 dst_ptr[1] = dst_ptr[0] = src_ptr[0]; |
| 376 src_ptr += 1; |
| 377 dst_ptr += 2; |
| 378 } |
| 379 if (dst_width & 1) { |
| 380 dst_ptr[0] = src_ptr[0]; |
| 381 } |
| 382 } |
| 383 |
| 384 void ScaleColsUp2_16_C(uint16* dst_ptr, const uint16* src_ptr, |
| 385 int dst_width, int x, int dx) { |
| 386 int j; |
| 387 for (j = 0; j < dst_width - 1; j += 2) { |
| 388 dst_ptr[1] = dst_ptr[0] = src_ptr[0]; |
| 389 src_ptr += 1; |
| 390 dst_ptr += 2; |
| 391 } |
| 392 if (dst_width & 1) { |
| 393 dst_ptr[0] = src_ptr[0]; |
| 394 } |
| 395 } |
| 396 |
| 397 // (1-f)a + fb can be replaced with a + f(b-a) |
| 398 #define BLENDER(a, b, f) (uint8)((int)(a) + \ |
| 399 ((int)(f) * ((int)(b) - (int)(a)) >> 16)) |
| 400 |
| 401 void ScaleFilterCols_C(uint8* dst_ptr, const uint8* src_ptr, |
| 402 int dst_width, int x, int dx) { |
| 403 int j; |
| 404 for (j = 0; j < dst_width - 1; j += 2) { |
| 405 int xi = x >> 16; |
| 406 int a = src_ptr[xi]; |
| 407 int b = src_ptr[xi + 1]; |
| 408 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 409 x += dx; |
| 410 xi = x >> 16; |
| 411 a = src_ptr[xi]; |
| 412 b = src_ptr[xi + 1]; |
| 413 dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
| 414 x += dx; |
| 415 dst_ptr += 2; |
| 416 } |
| 417 if (dst_width & 1) { |
| 418 int xi = x >> 16; |
| 419 int a = src_ptr[xi]; |
| 420 int b = src_ptr[xi + 1]; |
| 421 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 422 } |
| 423 } |
| 424 |
| 425 void ScaleFilterCols64_C(uint8* dst_ptr, const uint8* src_ptr, |
| 426 int dst_width, int x32, int dx) { |
| 427 int64 x = (int64)(x32); |
| 428 int j; |
| 429 for (j = 0; j < dst_width - 1; j += 2) { |
| 430 int64 xi = x >> 16; |
| 431 int a = src_ptr[xi]; |
| 432 int b = src_ptr[xi + 1]; |
| 433 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 434 x += dx; |
| 435 xi = x >> 16; |
| 436 a = src_ptr[xi]; |
| 437 b = src_ptr[xi + 1]; |
| 438 dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
| 439 x += dx; |
| 440 dst_ptr += 2; |
| 441 } |
| 442 if (dst_width & 1) { |
| 443 int64 xi = x >> 16; |
| 444 int a = src_ptr[xi]; |
| 445 int b = src_ptr[xi + 1]; |
| 446 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 447 } |
| 448 } |
| 449 #undef BLENDER |
| 450 |
| 451 #define BLENDER(a, b, f) (uint16)((int)(a) + \ |
| 452 ((int)(f) * ((int)(b) - (int)(a)) >> 16)) |
| 453 |
| 454 void ScaleFilterCols_16_C(uint16* dst_ptr, const uint16* src_ptr, |
| 455 int dst_width, int x, int dx) { |
| 456 int j; |
| 457 for (j = 0; j < dst_width - 1; j += 2) { |
| 458 int xi = x >> 16; |
| 459 int a = src_ptr[xi]; |
| 460 int b = src_ptr[xi + 1]; |
| 461 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 462 x += dx; |
| 463 xi = x >> 16; |
| 464 a = src_ptr[xi]; |
| 465 b = src_ptr[xi + 1]; |
| 466 dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
| 467 x += dx; |
| 468 dst_ptr += 2; |
| 469 } |
| 470 if (dst_width & 1) { |
| 471 int xi = x >> 16; |
| 472 int a = src_ptr[xi]; |
| 473 int b = src_ptr[xi + 1]; |
| 474 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 475 } |
| 476 } |
| 477 |
| 478 void ScaleFilterCols64_16_C(uint16* dst_ptr, const uint16* src_ptr, |
| 479 int dst_width, int x32, int dx) { |
| 480 int64 x = (int64)(x32); |
| 481 int j; |
| 482 for (j = 0; j < dst_width - 1; j += 2) { |
| 483 int64 xi = x >> 16; |
| 484 int a = src_ptr[xi]; |
| 485 int b = src_ptr[xi + 1]; |
| 486 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 487 x += dx; |
| 488 xi = x >> 16; |
| 489 a = src_ptr[xi]; |
| 490 b = src_ptr[xi + 1]; |
| 491 dst_ptr[1] = BLENDER(a, b, x & 0xffff); |
| 492 x += dx; |
| 493 dst_ptr += 2; |
| 494 } |
| 495 if (dst_width & 1) { |
| 496 int64 xi = x >> 16; |
| 497 int a = src_ptr[xi]; |
| 498 int b = src_ptr[xi + 1]; |
| 499 dst_ptr[0] = BLENDER(a, b, x & 0xffff); |
| 500 } |
| 501 } |
| 502 #undef BLENDER |
| 503 |
| 504 void ScaleRowDown38_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 505 uint8* dst, int dst_width) { |
| 506 int x; |
| 507 assert(dst_width % 3 == 0); |
| 508 for (x = 0; x < dst_width; x += 3) { |
| 509 dst[0] = src_ptr[0]; |
| 510 dst[1] = src_ptr[3]; |
| 511 dst[2] = src_ptr[6]; |
| 512 dst += 3; |
| 513 src_ptr += 8; |
| 514 } |
| 515 } |
| 516 |
| 517 void ScaleRowDown38_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 518 uint16* dst, int dst_width) { |
| 519 int x; |
| 520 assert(dst_width % 3 == 0); |
| 521 for (x = 0; x < dst_width; x += 3) { |
| 522 dst[0] = src_ptr[0]; |
| 523 dst[1] = src_ptr[3]; |
| 524 dst[2] = src_ptr[6]; |
| 525 dst += 3; |
| 526 src_ptr += 8; |
| 527 } |
| 528 } |
| 529 |
| 530 // 8x3 -> 3x1 |
| 531 void ScaleRowDown38_3_Box_C(const uint8* src_ptr, |
| 532 ptrdiff_t src_stride, |
| 533 uint8* dst_ptr, int dst_width) { |
| 534 intptr_t stride = src_stride; |
| 535 int i; |
| 536 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 537 for (i = 0; i < dst_width; i += 3) { |
| 538 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + |
| 539 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 540 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + |
| 541 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * |
| 542 (65536 / 9) >> 16; |
| 543 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + |
| 544 src_ptr[stride + 3] + src_ptr[stride + 4] + |
| 545 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + |
| 546 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * |
| 547 (65536 / 9) >> 16; |
| 548 dst_ptr[2] = (src_ptr[6] + src_ptr[7] + |
| 549 src_ptr[stride + 6] + src_ptr[stride + 7] + |
| 550 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * |
| 551 (65536 / 6) >> 16; |
| 552 src_ptr += 8; |
| 553 dst_ptr += 3; |
| 554 } |
| 555 } |
| 556 |
| 557 void ScaleRowDown38_3_Box_16_C(const uint16* src_ptr, |
| 558 ptrdiff_t src_stride, |
| 559 uint16* dst_ptr, int dst_width) { |
| 560 intptr_t stride = src_stride; |
| 561 int i; |
| 562 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 563 for (i = 0; i < dst_width; i += 3) { |
| 564 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + |
| 565 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 566 src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + |
| 567 src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * |
| 568 (65536 / 9) >> 16; |
| 569 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + |
| 570 src_ptr[stride + 3] + src_ptr[stride + 4] + |
| 571 src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + |
| 572 src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * |
| 573 (65536 / 9) >> 16; |
| 574 dst_ptr[2] = (src_ptr[6] + src_ptr[7] + |
| 575 src_ptr[stride + 6] + src_ptr[stride + 7] + |
| 576 src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * |
| 577 (65536 / 6) >> 16; |
| 578 src_ptr += 8; |
| 579 dst_ptr += 3; |
| 580 } |
| 581 } |
| 582 |
| 583 // 8x2 -> 3x1 |
| 584 void ScaleRowDown38_2_Box_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 585 uint8* dst_ptr, int dst_width) { |
| 586 intptr_t stride = src_stride; |
| 587 int i; |
| 588 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 589 for (i = 0; i < dst_width; i += 3) { |
| 590 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + |
| 591 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 592 src_ptr[stride + 2]) * (65536 / 6) >> 16; |
| 593 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + |
| 594 src_ptr[stride + 3] + src_ptr[stride + 4] + |
| 595 src_ptr[stride + 5]) * (65536 / 6) >> 16; |
| 596 dst_ptr[2] = (src_ptr[6] + src_ptr[7] + |
| 597 src_ptr[stride + 6] + src_ptr[stride + 7]) * |
| 598 (65536 / 4) >> 16; |
| 599 src_ptr += 8; |
| 600 dst_ptr += 3; |
| 601 } |
| 602 } |
| 603 |
| 604 void ScaleRowDown38_2_Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 605 uint16* dst_ptr, int dst_width) { |
| 606 intptr_t stride = src_stride; |
| 607 int i; |
| 608 assert((dst_width % 3 == 0) && (dst_width > 0)); |
| 609 for (i = 0; i < dst_width; i += 3) { |
| 610 dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + |
| 611 src_ptr[stride + 0] + src_ptr[stride + 1] + |
| 612 src_ptr[stride + 2]) * (65536 / 6) >> 16; |
| 613 dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + |
| 614 src_ptr[stride + 3] + src_ptr[stride + 4] + |
| 615 src_ptr[stride + 5]) * (65536 / 6) >> 16; |
| 616 dst_ptr[2] = (src_ptr[6] + src_ptr[7] + |
| 617 src_ptr[stride + 6] + src_ptr[stride + 7]) * |
| 618 (65536 / 4) >> 16; |
| 619 src_ptr += 8; |
| 620 dst_ptr += 3; |
| 621 } |
| 622 } |
| 623 |
| 624 void ScaleAddRows_C(const uint8* src_ptr, ptrdiff_t src_stride, |
| 625 uint16* dst_ptr, int src_width, int src_height) { |
| 626 int x; |
| 627 assert(src_width > 0); |
| 628 assert(src_height > 0); |
| 629 for (x = 0; x < src_width; ++x) { |
| 630 const uint8* s = src_ptr + x; |
| 631 unsigned int sum = 0u; |
| 632 int y; |
| 633 for (y = 0; y < src_height; ++y) { |
| 634 sum += s[0]; |
| 635 s += src_stride; |
| 636 } |
| 637 // TODO(fbarchard): Consider limitting height to 256 to avoid overflow. |
| 638 dst_ptr[x] = sum < 65535u ? sum : 65535u; |
| 639 } |
| 640 } |
| 641 |
| 642 void ScaleAddRows_16_C(const uint16* src_ptr, ptrdiff_t src_stride, |
| 643 uint32* dst_ptr, int src_width, int src_height) { |
| 644 int x; |
| 645 assert(src_width > 0); |
| 646 assert(src_height > 0); |
| 647 for (x = 0; x < src_width; ++x) { |
| 648 const uint16* s = src_ptr + x; |
| 649 unsigned int sum = 0u; |
| 650 int y; |
| 651 for (y = 0; y < src_height; ++y) { |
| 652 sum += s[0]; |
| 653 s += src_stride; |
| 654 } |
| 655 // No risk of overflow here now |
| 656 dst_ptr[x] = sum; |
| 657 } |
| 658 } |
| 659 |
| 660 void ScaleARGBRowDown2_C(const uint8* src_argb, |
| 661 ptrdiff_t src_stride, |
| 662 uint8* dst_argb, int dst_width) { |
| 663 const uint32* src = (const uint32*)(src_argb); |
| 664 uint32* dst = (uint32*)(dst_argb); |
| 665 |
| 666 int x; |
| 667 for (x = 0; x < dst_width - 1; x += 2) { |
| 668 dst[0] = src[1]; |
| 669 dst[1] = src[3]; |
| 670 src += 4; |
| 671 dst += 2; |
| 672 } |
| 673 if (dst_width & 1) { |
| 674 dst[0] = src[1]; |
| 675 } |
| 676 } |
| 677 |
| 678 void ScaleARGBRowDown2Linear_C(const uint8* src_argb, |
| 679 ptrdiff_t src_stride, |
| 680 uint8* dst_argb, int dst_width) { |
| 681 int x; |
| 682 for (x = 0; x < dst_width; ++x) { |
| 683 dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; |
| 684 dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; |
| 685 dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; |
| 686 dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; |
| 687 src_argb += 8; |
| 688 dst_argb += 4; |
| 689 } |
| 690 } |
| 691 |
| 692 void ScaleARGBRowDown2Box_C(const uint8* src_argb, ptrdiff_t src_stride, |
| 693 uint8* dst_argb, int dst_width) { |
| 694 int x; |
| 695 for (x = 0; x < dst_width; ++x) { |
| 696 dst_argb[0] = (src_argb[0] + src_argb[4] + |
| 697 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; |
| 698 dst_argb[1] = (src_argb[1] + src_argb[5] + |
| 699 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; |
| 700 dst_argb[2] = (src_argb[2] + src_argb[6] + |
| 701 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; |
| 702 dst_argb[3] = (src_argb[3] + src_argb[7] + |
| 703 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; |
| 704 src_argb += 8; |
| 705 dst_argb += 4; |
| 706 } |
| 707 } |
| 708 |
| 709 void ScaleARGBRowDownEven_C(const uint8* src_argb, ptrdiff_t src_stride, |
| 710 int src_stepx, |
| 711 uint8* dst_argb, int dst_width) { |
| 712 const uint32* src = (const uint32*)(src_argb); |
| 713 uint32* dst = (uint32*)(dst_argb); |
| 714 |
| 715 int x; |
| 716 for (x = 0; x < dst_width - 1; x += 2) { |
| 717 dst[0] = src[0]; |
| 718 dst[1] = src[src_stepx]; |
| 719 src += src_stepx * 2; |
| 720 dst += 2; |
| 721 } |
| 722 if (dst_width & 1) { |
| 723 dst[0] = src[0]; |
| 724 } |
| 725 } |
| 726 |
| 727 void ScaleARGBRowDownEvenBox_C(const uint8* src_argb, |
| 728 ptrdiff_t src_stride, |
| 729 int src_stepx, |
| 730 uint8* dst_argb, int dst_width) { |
| 731 int x; |
| 732 for (x = 0; x < dst_width; ++x) { |
| 733 dst_argb[0] = (src_argb[0] + src_argb[4] + |
| 734 src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; |
| 735 dst_argb[1] = (src_argb[1] + src_argb[5] + |
| 736 src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; |
| 737 dst_argb[2] = (src_argb[2] + src_argb[6] + |
| 738 src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; |
| 739 dst_argb[3] = (src_argb[3] + src_argb[7] + |
| 740 src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; |
| 741 src_argb += src_stepx * 4; |
| 742 dst_argb += 4; |
| 743 } |
| 744 } |
| 745 |
| 746 // Scales a single row of pixels using point sampling. |
| 747 void ScaleARGBCols_C(uint8* dst_argb, const uint8* src_argb, |
| 748 int dst_width, int x, int dx) { |
| 749 const uint32* src = (const uint32*)(src_argb); |
| 750 uint32* dst = (uint32*)(dst_argb); |
| 751 int j; |
| 752 for (j = 0; j < dst_width - 1; j += 2) { |
| 753 dst[0] = src[x >> 16]; |
| 754 x += dx; |
| 755 dst[1] = src[x >> 16]; |
| 756 x += dx; |
| 757 dst += 2; |
| 758 } |
| 759 if (dst_width & 1) { |
| 760 dst[0] = src[x >> 16]; |
| 761 } |
| 762 } |
| 763 |
| 764 void ScaleARGBCols64_C(uint8* dst_argb, const uint8* src_argb, |
| 765 int dst_width, int x32, int dx) { |
| 766 int64 x = (int64)(x32); |
| 767 const uint32* src = (const uint32*)(src_argb); |
| 768 uint32* dst = (uint32*)(dst_argb); |
| 769 int j; |
| 770 for (j = 0; j < dst_width - 1; j += 2) { |
| 771 dst[0] = src[x >> 16]; |
| 772 x += dx; |
| 773 dst[1] = src[x >> 16]; |
| 774 x += dx; |
| 775 dst += 2; |
| 776 } |
| 777 if (dst_width & 1) { |
| 778 dst[0] = src[x >> 16]; |
| 779 } |
| 780 } |
| 781 |
| 782 // Scales a single row of pixels up by 2x using point sampling. |
| 783 void ScaleARGBColsUp2_C(uint8* dst_argb, const uint8* src_argb, |
| 784 int dst_width, int x, int dx) { |
| 785 const uint32* src = (const uint32*)(src_argb); |
| 786 uint32* dst = (uint32*)(dst_argb); |
| 787 int j; |
| 788 for (j = 0; j < dst_width - 1; j += 2) { |
| 789 dst[1] = dst[0] = src[0]; |
| 790 src += 1; |
| 791 dst += 2; |
| 792 } |
| 793 if (dst_width & 1) { |
| 794 dst[0] = src[0]; |
| 795 } |
| 796 } |
| 797 |
| 798 // Mimics SSSE3 blender |
| 799 #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b) * f) >> 7 |
| 800 #define BLENDERC(a, b, f, s) (uint32)( \ |
| 801 BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) |
| 802 #define BLENDER(a, b, f) \ |
| 803 BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | \ |
| 804 BLENDERC(a, b, f, 8) | BLENDERC(a, b, f, 0) |
| 805 |
| 806 void ScaleARGBFilterCols_C(uint8* dst_argb, const uint8* src_argb, |
| 807 int dst_width, int x, int dx) { |
| 808 const uint32* src = (const uint32*)(src_argb); |
| 809 uint32* dst = (uint32*)(dst_argb); |
| 810 int j; |
| 811 for (j = 0; j < dst_width - 1; j += 2) { |
| 812 int xi = x >> 16; |
| 813 int xf = (x >> 9) & 0x7f; |
| 814 uint32 a = src[xi]; |
| 815 uint32 b = src[xi + 1]; |
| 816 dst[0] = BLENDER(a, b, xf); |
| 817 x += dx; |
| 818 xi = x >> 16; |
| 819 xf = (x >> 9) & 0x7f; |
| 820 a = src[xi]; |
| 821 b = src[xi + 1]; |
| 822 dst[1] = BLENDER(a, b, xf); |
| 823 x += dx; |
| 824 dst += 2; |
| 825 } |
| 826 if (dst_width & 1) { |
| 827 int xi = x >> 16; |
| 828 int xf = (x >> 9) & 0x7f; |
| 829 uint32 a = src[xi]; |
| 830 uint32 b = src[xi + 1]; |
| 831 dst[0] = BLENDER(a, b, xf); |
| 832 } |
| 833 } |
| 834 |
| 835 void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb, |
| 836 int dst_width, int x32, int dx) { |
| 837 int64 x = (int64)(x32); |
| 838 const uint32* src = (const uint32*)(src_argb); |
| 839 uint32* dst = (uint32*)(dst_argb); |
| 840 int j; |
| 841 for (j = 0; j < dst_width - 1; j += 2) { |
| 842 int64 xi = x >> 16; |
| 843 int xf = (x >> 9) & 0x7f; |
| 844 uint32 a = src[xi]; |
| 845 uint32 b = src[xi + 1]; |
| 846 dst[0] = BLENDER(a, b, xf); |
| 847 x += dx; |
| 848 xi = x >> 16; |
| 849 xf = (x >> 9) & 0x7f; |
| 850 a = src[xi]; |
| 851 b = src[xi + 1]; |
| 852 dst[1] = BLENDER(a, b, xf); |
| 853 x += dx; |
| 854 dst += 2; |
| 855 } |
| 856 if (dst_width & 1) { |
| 857 int64 xi = x >> 16; |
| 858 int xf = (x >> 9) & 0x7f; |
| 859 uint32 a = src[xi]; |
| 860 uint32 b = src[xi + 1]; |
| 861 dst[0] = BLENDER(a, b, xf); |
| 862 } |
| 863 } |
| 864 #undef BLENDER1 |
| 865 #undef BLENDERC |
| 866 #undef BLENDER |
| 867 |
| 868 // Scale plane vertically with bilinear interpolation. |
| 869 void ScalePlaneVertical(int src_height, |
| 870 int dst_width, int dst_height, |
| 871 int src_stride, int dst_stride, |
| 872 const uint8* src_argb, uint8* dst_argb, |
| 873 int x, int y, int dy, |
| 874 int bpp, enum FilterMode filtering) { |
| 875 // TODO(fbarchard): Allow higher bpp. |
| 876 int dst_width_bytes = dst_width * bpp; |
| 877 void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb, |
| 878 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
| 879 InterpolateRow_C; |
| 880 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; |
| 881 int j; |
| 882 assert(bpp >= 1 && bpp <= 4); |
| 883 assert(src_height != 0); |
| 884 assert(dst_width > 0); |
| 885 assert(dst_height > 0); |
| 886 src_argb += (x >> 16) * bpp; |
| 887 #if defined(HAS_INTERPOLATEROW_SSE2) |
| 888 if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) { |
| 889 InterpolateRow = InterpolateRow_Any_SSE2; |
| 890 if (IS_ALIGNED(dst_width_bytes, 16)) { |
| 891 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
| 892 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && |
| 893 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 894 InterpolateRow = InterpolateRow_SSE2; |
| 895 } |
| 896 } |
| 897 } |
| 898 #endif |
| 899 #if defined(HAS_INTERPOLATEROW_SSSE3) |
| 900 if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { |
| 901 InterpolateRow = InterpolateRow_Any_SSSE3; |
| 902 if (IS_ALIGNED(dst_width_bytes, 16)) { |
| 903 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
| 904 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && |
| 905 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 906 InterpolateRow = InterpolateRow_SSSE3; |
| 907 } |
| 908 } |
| 909 } |
| 910 #endif |
| 911 #if defined(HAS_INTERPOLATEROW_AVX2) |
| 912 if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { |
| 913 InterpolateRow = InterpolateRow_Any_AVX2; |
| 914 if (IS_ALIGNED(dst_width_bytes, 32)) { |
| 915 InterpolateRow = InterpolateRow_AVX2; |
| 916 } |
| 917 } |
| 918 #endif |
| 919 #if defined(HAS_INTERPOLATEROW_NEON) |
| 920 if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { |
| 921 InterpolateRow = InterpolateRow_Any_NEON; |
| 922 if (IS_ALIGNED(dst_width_bytes, 16)) { |
| 923 InterpolateRow = InterpolateRow_NEON; |
| 924 } |
| 925 } |
| 926 #endif |
| 927 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) |
| 928 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && |
| 929 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && |
| 930 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 931 InterpolateRow = InterpolateRow_Any_MIPS_DSPR2; |
| 932 if (IS_ALIGNED(dst_width_bytes, 4)) { |
| 933 InterpolateRow = InterpolateRow_MIPS_DSPR2; |
| 934 } |
| 935 } |
| 936 #endif |
| 937 for (j = 0; j < dst_height; ++j) { |
| 938 int yi; |
| 939 int yf; |
| 940 if (y > max_y) { |
| 941 y = max_y; |
| 942 } |
| 943 yi = y >> 16; |
| 944 yf = filtering ? ((y >> 8) & 255) : 0; |
| 945 InterpolateRow(dst_argb, src_argb + yi * src_stride, |
| 946 src_stride, dst_width_bytes, yf); |
| 947 dst_argb += dst_stride; |
| 948 y += dy; |
| 949 } |
| 950 } |
| 951 void ScalePlaneVertical_16(int src_height, |
| 952 int dst_width, int dst_height, |
| 953 int src_stride, int dst_stride, |
| 954 const uint16* src_argb, uint16* dst_argb, |
| 955 int x, int y, int dy, |
| 956 int wpp, enum FilterMode filtering) { |
| 957 // TODO(fbarchard): Allow higher wpp. |
| 958 int dst_width_words = dst_width * wpp; |
| 959 void (*InterpolateRow)(uint16* dst_argb, const uint16* src_argb, |
| 960 ptrdiff_t src_stride, int dst_width, int source_y_fraction) = |
| 961 InterpolateRow_16_C; |
| 962 const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; |
| 963 int j; |
| 964 assert(wpp >= 1 && wpp <= 2); |
| 965 assert(src_height != 0); |
| 966 assert(dst_width > 0); |
| 967 assert(dst_height > 0); |
| 968 src_argb += (x >> 16) * wpp; |
| 969 #if defined(HAS_INTERPOLATEROW_16_SSE2) |
| 970 if (TestCpuFlag(kCpuHasSSE2) && dst_width_bytes >= 16) { |
| 971 InterpolateRow = InterpolateRow_Any_16_SSE2; |
| 972 if (IS_ALIGNED(dst_width_bytes, 16)) { |
| 973 InterpolateRow = InterpolateRow_Unaligned_16_SSE2; |
| 974 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && |
| 975 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 976 InterpolateRow = InterpolateRow_16_SSE2; |
| 977 } |
| 978 } |
| 979 } |
| 980 #endif |
| 981 #if defined(HAS_INTERPOLATEROW_16_SSSE3) |
| 982 if (TestCpuFlag(kCpuHasSSSE3) && dst_width_bytes >= 16) { |
| 983 InterpolateRow = InterpolateRow_Any_16_SSSE3; |
| 984 if (IS_ALIGNED(dst_width_bytes, 16)) { |
| 985 InterpolateRow = InterpolateRow_Unaligned_16_SSSE3; |
| 986 if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride, 16) && |
| 987 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride, 16)) { |
| 988 InterpolateRow = InterpolateRow_16_SSSE3; |
| 989 } |
| 990 } |
| 991 } |
| 992 #endif |
| 993 #if defined(HAS_INTERPOLATEROW_16_AVX2) |
| 994 if (TestCpuFlag(kCpuHasAVX2) && dst_width_bytes >= 32) { |
| 995 InterpolateRow = InterpolateRow_Any_16_AVX2; |
| 996 if (IS_ALIGNED(dst_width_bytes, 32)) { |
| 997 InterpolateRow = InterpolateRow_16_AVX2; |
| 998 } |
| 999 } |
| 1000 #endif |
| 1001 #if defined(HAS_INTERPOLATEROW_16_NEON) |
| 1002 if (TestCpuFlag(kCpuHasNEON) && dst_width_bytes >= 16) { |
| 1003 InterpolateRow = InterpolateRow_Any_16_NEON; |
| 1004 if (IS_ALIGNED(dst_width_bytes, 16)) { |
| 1005 InterpolateRow = InterpolateRow_16_NEON; |
| 1006 } |
| 1007 } |
| 1008 #endif |
| 1009 #if defined(HAS_INTERPOLATEROWS_16_MIPS_DSPR2) |
| 1010 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && dst_width_bytes >= 4 && |
| 1011 IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4) && |
| 1012 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) { |
| 1013 InterpolateRow = InterpolateRow_Any_16_MIPS_DSPR2; |
| 1014 if (IS_ALIGNED(dst_width_bytes, 4)) { |
| 1015 InterpolateRow = InterpolateRow_16_MIPS_DSPR2; |
| 1016 } |
| 1017 } |
| 1018 #endif |
| 1019 for (j = 0; j < dst_height; ++j) { |
| 1020 int yi; |
| 1021 int yf; |
| 1022 if (y > max_y) { |
| 1023 y = max_y; |
| 1024 } |
| 1025 yi = y >> 16; |
| 1026 yf = filtering ? ((y >> 8) & 255) : 0; |
| 1027 InterpolateRow(dst_argb, src_argb + yi * src_stride, |
| 1028 src_stride, dst_width_words, yf); |
| 1029 dst_argb += dst_stride; |
| 1030 y += dy; |
| 1031 } |
| 1032 } |
| 1033 |
| 1034 // Simplify the filtering based on scale factors. |
| 1035 enum FilterMode ScaleFilterReduce(int src_width, int src_height, |
| 1036 int dst_width, int dst_height, |
| 1037 enum FilterMode filtering) { |
| 1038 if (src_width < 0) { |
| 1039 src_width = -src_width; |
| 1040 } |
| 1041 if (src_height < 0) { |
| 1042 src_height = -src_height; |
| 1043 } |
| 1044 if (filtering == kFilterBox) { |
| 1045 // If scaling both axis to 0.5 or larger, switch from Box to Bilinear. |
| 1046 if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) { |
| 1047 filtering = kFilterBilinear; |
| 1048 } |
| 1049 // If scaling to larger, switch from Box to Bilinear. |
| 1050 if (dst_width >= src_width || dst_height >= src_height) { |
| 1051 filtering = kFilterBilinear; |
| 1052 } |
| 1053 } |
| 1054 if (filtering == kFilterBilinear) { |
| 1055 if (src_height == 1) { |
| 1056 filtering = kFilterLinear; |
| 1057 } |
| 1058 // TODO(fbarchard): Detect any odd scale factor and reduce to Linear. |
| 1059 if (dst_height == src_height || dst_height * 3 == src_height) { |
| 1060 filtering = kFilterLinear; |
| 1061 } |
| 1062 // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to |
| 1063 // avoid reading 2 pixels horizontally that causes memory exception. |
| 1064 if (src_width == 1) { |
| 1065 filtering = kFilterNone; |
| 1066 } |
| 1067 } |
| 1068 if (filtering == kFilterLinear) { |
| 1069 if (src_width == 1) { |
| 1070 filtering = kFilterNone; |
| 1071 } |
| 1072 // TODO(fbarchard): Detect any odd scale factor and reduce to None. |
| 1073 if (dst_width == src_width || dst_width * 3 == src_width) { |
| 1074 filtering = kFilterNone; |
| 1075 } |
| 1076 } |
| 1077 return filtering; |
| 1078 } |
| 1079 |
| 1080 // Divide num by div and return as 16.16 fixed point result. |
| 1081 int FixedDiv_C(int num, int div) { |
| 1082 return (int)(((int64)(num) << 16) / div); |
| 1083 } |
| 1084 |
| 1085 // Divide num by div and return as 16.16 fixed point result. |
| 1086 int FixedDiv1_C(int num, int div) { |
| 1087 return (int)((((int64)(num) << 16) - 0x00010001) / |
| 1088 (div - 1)); |
| 1089 } |
| 1090 |
| 1091 #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) |
| 1092 |
| 1093 // Compute slope values for stepping. |
| 1094 void ScaleSlope(int src_width, int src_height, |
| 1095 int dst_width, int dst_height, |
| 1096 enum FilterMode filtering, |
| 1097 int* x, int* y, int* dx, int* dy) { |
| 1098 assert(x != NULL); |
| 1099 assert(y != NULL); |
| 1100 assert(dx != NULL); |
| 1101 assert(dy != NULL); |
| 1102 assert(src_width != 0); |
| 1103 assert(src_height != 0); |
| 1104 assert(dst_width > 0); |
| 1105 assert(dst_height > 0); |
| 1106 // Check for 1 pixel and avoid FixedDiv overflow. |
| 1107 if (dst_width == 1 && src_width >= 32768) { |
| 1108 dst_width = src_width; |
| 1109 } |
| 1110 if (dst_height == 1 && src_height >= 32768) { |
| 1111 dst_height = src_height; |
| 1112 } |
| 1113 if (filtering == kFilterBox) { |
| 1114 // Scale step for point sampling duplicates all pixels equally. |
| 1115 *dx = FixedDiv(Abs(src_width), dst_width); |
| 1116 *dy = FixedDiv(src_height, dst_height); |
| 1117 *x = 0; |
| 1118 *y = 0; |
| 1119 } else if (filtering == kFilterBilinear) { |
| 1120 // Scale step for bilinear sampling renders last pixel once for upsample. |
| 1121 if (dst_width <= Abs(src_width)) { |
| 1122 *dx = FixedDiv(Abs(src_width), dst_width); |
| 1123 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. |
| 1124 } else if (dst_width > 1) { |
| 1125 *dx = FixedDiv1(Abs(src_width), dst_width); |
| 1126 *x = 0; |
| 1127 } |
| 1128 if (dst_height <= src_height) { |
| 1129 *dy = FixedDiv(src_height, dst_height); |
| 1130 *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. |
| 1131 } else if (dst_height > 1) { |
| 1132 *dy = FixedDiv1(src_height, dst_height); |
| 1133 *y = 0; |
| 1134 } |
| 1135 } else if (filtering == kFilterLinear) { |
| 1136 // Scale step for bilinear sampling renders last pixel once for upsample. |
| 1137 if (dst_width <= Abs(src_width)) { |
| 1138 *dx = FixedDiv(Abs(src_width), dst_width); |
| 1139 *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. |
| 1140 } else if (dst_width > 1) { |
| 1141 *dx = FixedDiv1(Abs(src_width), dst_width); |
| 1142 *x = 0; |
| 1143 } |
| 1144 *dy = FixedDiv(src_height, dst_height); |
| 1145 *y = *dy >> 1; |
| 1146 } else { |
| 1147 // Scale step for point sampling duplicates all pixels equally. |
| 1148 *dx = FixedDiv(Abs(src_width), dst_width); |
| 1149 *dy = FixedDiv(src_height, dst_height); |
| 1150 *x = CENTERSTART(*dx, 0); |
| 1151 *y = CENTERSTART(*dy, 0); |
| 1152 } |
| 1153 // Negative src_width means horizontally mirror. |
| 1154 if (src_width < 0) { |
| 1155 *x += (dst_width - 1) * *dx; |
| 1156 *dx = -*dx; |
| 1157 // src_width = -src_width; // Caller must do this. |
| 1158 } |
| 1159 } |
| 1160 #undef CENTERSTART |
| 1161 |
| 1162 #ifdef __cplusplus |
| 1163 } // extern "C" |
| 1164 } // namespace libyuv |
| 1165 #endif |
OLD | NEW |