OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ |
| 10 |
| 11 #include "third_party/libyuv/include/libyuv/planar_functions.h" |
| 12 |
| 13 #include <string.h> // for memset() |
| 14 |
| 15 #include "third_party/libyuv/include/libyuv/cpu_id.h" |
| 16 #ifdef HAVE_JPEG |
| 17 #include "third_party/libyuv/include/libyuv/mjpeg_decoder.h" |
| 18 #endif |
| 19 #include "third_party/libyuv/include/libyuv/row.h" |
| 20 |
| 21 #ifdef __cplusplus |
| 22 namespace libyuv { |
| 23 extern "C" { |
| 24 #endif |
| 25 |
| 26 // Copy a plane of data |
| 27 LIBYUV_API |
| 28 void CopyPlane(const uint8* src_y, int src_stride_y, |
| 29 uint8* dst_y, int dst_stride_y, |
| 30 int width, int height) { |
| 31 int y; |
| 32 void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C; |
| 33 // Coalesce rows. |
| 34 if (src_stride_y == width && |
| 35 dst_stride_y == width) { |
| 36 width *= height; |
| 37 height = 1; |
| 38 src_stride_y = dst_stride_y = 0; |
| 39 } |
| 40 #if defined(HAS_COPYROW_X86) |
| 41 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { |
| 42 CopyRow = CopyRow_X86; |
| 43 } |
| 44 #endif |
| 45 #if defined(HAS_COPYROW_SSE2) |
| 46 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && |
| 47 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && |
| 48 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { |
| 49 CopyRow = CopyRow_SSE2; |
| 50 } |
| 51 #endif |
| 52 #if defined(HAS_COPYROW_ERMS) |
| 53 if (TestCpuFlag(kCpuHasERMS)) { |
| 54 CopyRow = CopyRow_ERMS; |
| 55 } |
| 56 #endif |
| 57 #if defined(HAS_COPYROW_NEON) |
| 58 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { |
| 59 CopyRow = CopyRow_NEON; |
| 60 } |
| 61 #endif |
| 62 #if defined(HAS_COPYROW_MIPS) |
| 63 if (TestCpuFlag(kCpuHasMIPS)) { |
| 64 CopyRow = CopyRow_MIPS; |
| 65 } |
| 66 #endif |
| 67 |
| 68 // Copy plane |
| 69 for (y = 0; y < height; ++y) { |
| 70 CopyRow(src_y, dst_y, width); |
| 71 src_y += src_stride_y; |
| 72 dst_y += dst_stride_y; |
| 73 } |
| 74 } |
| 75 |
| 76 LIBYUV_API |
| 77 void CopyPlane_16(const uint16* src_y, int src_stride_y, |
| 78 uint16* dst_y, int dst_stride_y, |
| 79 int width, int height) { |
| 80 int y; |
| 81 void (*CopyRow)(const uint16* src, uint16* dst, int width) = CopyRow_16_C; |
| 82 // Coalesce rows. |
| 83 if (src_stride_y == width && |
| 84 dst_stride_y == width) { |
| 85 width *= height; |
| 86 height = 1; |
| 87 src_stride_y = dst_stride_y = 0; |
| 88 } |
| 89 #if defined(HAS_COPYROW_16_X86) |
| 90 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { |
| 91 CopyRow = CopyRow_16_X86; |
| 92 } |
| 93 #endif |
| 94 #if defined(HAS_COPYROW_16_SSE2) |
| 95 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) && |
| 96 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && |
| 97 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { |
| 98 CopyRow = CopyRow_16_SSE2; |
| 99 } |
| 100 #endif |
| 101 #if defined(HAS_COPYROW_16_ERMS) |
| 102 if (TestCpuFlag(kCpuHasERMS)) { |
| 103 CopyRow = CopyRow_16_ERMS; |
| 104 } |
| 105 #endif |
| 106 #if defined(HAS_COPYROW_16_NEON) |
| 107 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { |
| 108 CopyRow = CopyRow_16_NEON; |
| 109 } |
| 110 #endif |
| 111 #if defined(HAS_COPYROW_16_MIPS) |
| 112 if (TestCpuFlag(kCpuHasMIPS)) { |
| 113 CopyRow = CopyRow_16_MIPS; |
| 114 } |
| 115 #endif |
| 116 |
| 117 // Copy plane |
| 118 for (y = 0; y < height; ++y) { |
| 119 CopyRow(src_y, dst_y, width); |
| 120 src_y += src_stride_y; |
| 121 dst_y += dst_stride_y; |
| 122 } |
| 123 } |
| 124 |
| 125 // Copy I422. |
| 126 LIBYUV_API |
| 127 int I422Copy(const uint8* src_y, int src_stride_y, |
| 128 const uint8* src_u, int src_stride_u, |
| 129 const uint8* src_v, int src_stride_v, |
| 130 uint8* dst_y, int dst_stride_y, |
| 131 uint8* dst_u, int dst_stride_u, |
| 132 uint8* dst_v, int dst_stride_v, |
| 133 int width, int height) { |
| 134 int halfwidth = (width + 1) >> 1; |
| 135 if (!src_y || !src_u || !src_v || |
| 136 !dst_y || !dst_u || !dst_v || |
| 137 width <= 0 || height == 0) { |
| 138 return -1; |
| 139 } |
| 140 // Negative height means invert the image. |
| 141 if (height < 0) { |
| 142 height = -height; |
| 143 src_y = src_y + (height - 1) * src_stride_y; |
| 144 src_u = src_u + (height - 1) * src_stride_u; |
| 145 src_v = src_v + (height - 1) * src_stride_v; |
| 146 src_stride_y = -src_stride_y; |
| 147 src_stride_u = -src_stride_u; |
| 148 src_stride_v = -src_stride_v; |
| 149 } |
| 150 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); |
| 151 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); |
| 152 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); |
| 153 return 0; |
| 154 } |
| 155 |
| 156 // Copy I444. |
| 157 LIBYUV_API |
| 158 int I444Copy(const uint8* src_y, int src_stride_y, |
| 159 const uint8* src_u, int src_stride_u, |
| 160 const uint8* src_v, int src_stride_v, |
| 161 uint8* dst_y, int dst_stride_y, |
| 162 uint8* dst_u, int dst_stride_u, |
| 163 uint8* dst_v, int dst_stride_v, |
| 164 int width, int height) { |
| 165 if (!src_y || !src_u || !src_v || |
| 166 !dst_y || !dst_u || !dst_v || |
| 167 width <= 0 || height == 0) { |
| 168 return -1; |
| 169 } |
| 170 // Negative height means invert the image. |
| 171 if (height < 0) { |
| 172 height = -height; |
| 173 src_y = src_y + (height - 1) * src_stride_y; |
| 174 src_u = src_u + (height - 1) * src_stride_u; |
| 175 src_v = src_v + (height - 1) * src_stride_v; |
| 176 src_stride_y = -src_stride_y; |
| 177 src_stride_u = -src_stride_u; |
| 178 src_stride_v = -src_stride_v; |
| 179 } |
| 180 |
| 181 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); |
| 182 CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); |
| 183 CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); |
| 184 return 0; |
| 185 } |
| 186 |
| 187 // Copy I400. |
| 188 LIBYUV_API |
| 189 int I400ToI400(const uint8* src_y, int src_stride_y, |
| 190 uint8* dst_y, int dst_stride_y, |
| 191 int width, int height) { |
| 192 if (!src_y || !dst_y || width <= 0 || height == 0) { |
| 193 return -1; |
| 194 } |
| 195 // Negative height means invert the image. |
| 196 if (height < 0) { |
| 197 height = -height; |
| 198 src_y = src_y + (height - 1) * src_stride_y; |
| 199 src_stride_y = -src_stride_y; |
| 200 } |
| 201 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); |
| 202 return 0; |
| 203 } |
| 204 |
| 205 // Convert I420 to I400. |
| 206 LIBYUV_API |
| 207 int I420ToI400(const uint8* src_y, int src_stride_y, |
| 208 const uint8* src_u, int src_stride_u, |
| 209 const uint8* src_v, int src_stride_v, |
| 210 uint8* dst_y, int dst_stride_y, |
| 211 int width, int height) { |
| 212 if (!src_y || !dst_y || width <= 0 || height == 0) { |
| 213 return -1; |
| 214 } |
| 215 // Negative height means invert the image. |
| 216 if (height < 0) { |
| 217 height = -height; |
| 218 src_y = src_y + (height - 1) * src_stride_y; |
| 219 src_stride_y = -src_stride_y; |
| 220 } |
| 221 CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); |
| 222 return 0; |
| 223 } |
| 224 |
| 225 // Mirror a plane of data. |
| 226 void MirrorPlane(const uint8* src_y, int src_stride_y, |
| 227 uint8* dst_y, int dst_stride_y, |
| 228 int width, int height) { |
| 229 int y; |
| 230 void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C; |
| 231 // Negative height means invert the image. |
| 232 if (height < 0) { |
| 233 height = -height; |
| 234 src_y = src_y + (height - 1) * src_stride_y; |
| 235 src_stride_y = -src_stride_y; |
| 236 } |
| 237 #if defined(HAS_MIRRORROW_NEON) |
| 238 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { |
| 239 MirrorRow = MirrorRow_NEON; |
| 240 } |
| 241 #endif |
| 242 #if defined(HAS_MIRRORROW_SSE2) |
| 243 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16)) { |
| 244 MirrorRow = MirrorRow_SSE2; |
| 245 } |
| 246 #endif |
| 247 #if defined(HAS_MIRRORROW_SSSE3) |
| 248 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) && |
| 249 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && |
| 250 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { |
| 251 MirrorRow = MirrorRow_SSSE3; |
| 252 } |
| 253 #endif |
| 254 #if defined(HAS_MIRRORROW_AVX2) |
| 255 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) { |
| 256 MirrorRow = MirrorRow_AVX2; |
| 257 } |
| 258 #endif |
| 259 |
| 260 // Mirror plane |
| 261 for (y = 0; y < height; ++y) { |
| 262 MirrorRow(src_y, dst_y, width); |
| 263 src_y += src_stride_y; |
| 264 dst_y += dst_stride_y; |
| 265 } |
| 266 } |
| 267 |
| 268 // Convert YUY2 to I422. |
| 269 LIBYUV_API |
| 270 int YUY2ToI422(const uint8* src_yuy2, int src_stride_yuy2, |
| 271 uint8* dst_y, int dst_stride_y, |
| 272 uint8* dst_u, int dst_stride_u, |
| 273 uint8* dst_v, int dst_stride_v, |
| 274 int width, int height) { |
| 275 int y; |
| 276 void (*YUY2ToUV422Row)(const uint8* src_yuy2, |
| 277 uint8* dst_u, uint8* dst_v, int pix) = |
| 278 YUY2ToUV422Row_C; |
| 279 void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) = |
| 280 YUY2ToYRow_C; |
| 281 // Negative height means invert the image. |
| 282 if (height < 0) { |
| 283 height = -height; |
| 284 src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; |
| 285 src_stride_yuy2 = -src_stride_yuy2; |
| 286 } |
| 287 // Coalesce rows. |
| 288 if (src_stride_yuy2 == width * 2 && |
| 289 dst_stride_y == width && |
| 290 dst_stride_u * 2 == width && |
| 291 dst_stride_v * 2 == width) { |
| 292 width *= height; |
| 293 height = 1; |
| 294 src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; |
| 295 } |
| 296 #if defined(HAS_YUY2TOYROW_SSE2) |
| 297 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { |
| 298 YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; |
| 299 YUY2ToYRow = YUY2ToYRow_Any_SSE2; |
| 300 if (IS_ALIGNED(width, 16)) { |
| 301 YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2; |
| 302 YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2; |
| 303 if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) { |
| 304 YUY2ToUV422Row = YUY2ToUV422Row_SSE2; |
| 305 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { |
| 306 YUY2ToYRow = YUY2ToYRow_SSE2; |
| 307 } |
| 308 } |
| 309 } |
| 310 } |
| 311 #endif |
| 312 #if defined(HAS_YUY2TOYROW_AVX2) |
| 313 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { |
| 314 YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; |
| 315 YUY2ToYRow = YUY2ToYRow_Any_AVX2; |
| 316 if (IS_ALIGNED(width, 32)) { |
| 317 YUY2ToUV422Row = YUY2ToUV422Row_AVX2; |
| 318 YUY2ToYRow = YUY2ToYRow_AVX2; |
| 319 } |
| 320 } |
| 321 #endif |
| 322 #if defined(HAS_YUY2TOYROW_NEON) |
| 323 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 324 YUY2ToYRow = YUY2ToYRow_Any_NEON; |
| 325 if (width >= 16) { |
| 326 YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; |
| 327 } |
| 328 if (IS_ALIGNED(width, 16)) { |
| 329 YUY2ToYRow = YUY2ToYRow_NEON; |
| 330 YUY2ToUV422Row = YUY2ToUV422Row_NEON; |
| 331 } |
| 332 } |
| 333 #endif |
| 334 |
| 335 for (y = 0; y < height; ++y) { |
| 336 YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); |
| 337 YUY2ToYRow(src_yuy2, dst_y, width); |
| 338 src_yuy2 += src_stride_yuy2; |
| 339 dst_y += dst_stride_y; |
| 340 dst_u += dst_stride_u; |
| 341 dst_v += dst_stride_v; |
| 342 } |
| 343 return 0; |
| 344 } |
| 345 |
| 346 // Convert UYVY to I422. |
| 347 LIBYUV_API |
| 348 int UYVYToI422(const uint8* src_uyvy, int src_stride_uyvy, |
| 349 uint8* dst_y, int dst_stride_y, |
| 350 uint8* dst_u, int dst_stride_u, |
| 351 uint8* dst_v, int dst_stride_v, |
| 352 int width, int height) { |
| 353 int y; |
| 354 void (*UYVYToUV422Row)(const uint8* src_uyvy, |
| 355 uint8* dst_u, uint8* dst_v, int pix) = |
| 356 UYVYToUV422Row_C; |
| 357 void (*UYVYToYRow)(const uint8* src_uyvy, |
| 358 uint8* dst_y, int pix) = UYVYToYRow_C; |
| 359 // Negative height means invert the image. |
| 360 if (height < 0) { |
| 361 height = -height; |
| 362 src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; |
| 363 src_stride_uyvy = -src_stride_uyvy; |
| 364 } |
| 365 // Coalesce rows. |
| 366 if (src_stride_uyvy == width * 2 && |
| 367 dst_stride_y == width && |
| 368 dst_stride_u * 2 == width && |
| 369 dst_stride_v * 2 == width) { |
| 370 width *= height; |
| 371 height = 1; |
| 372 src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; |
| 373 } |
| 374 #if defined(HAS_UYVYTOYROW_SSE2) |
| 375 if (TestCpuFlag(kCpuHasSSE2) && width >= 16) { |
| 376 UYVYToUV422Row = UYVYToUV422Row_Any_SSE2; |
| 377 UYVYToYRow = UYVYToYRow_Any_SSE2; |
| 378 if (IS_ALIGNED(width, 16)) { |
| 379 UYVYToUV422Row = UYVYToUV422Row_Unaligned_SSE2; |
| 380 UYVYToYRow = UYVYToYRow_Unaligned_SSE2; |
| 381 if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) { |
| 382 UYVYToUV422Row = UYVYToUV422Row_SSE2; |
| 383 if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { |
| 384 UYVYToYRow = UYVYToYRow_SSE2; |
| 385 } |
| 386 } |
| 387 } |
| 388 } |
| 389 #endif |
| 390 #if defined(HAS_UYVYTOYROW_AVX2) |
| 391 if (TestCpuFlag(kCpuHasAVX2) && width >= 32) { |
| 392 UYVYToUV422Row = UYVYToUV422Row_Any_AVX2; |
| 393 UYVYToYRow = UYVYToYRow_Any_AVX2; |
| 394 if (IS_ALIGNED(width, 32)) { |
| 395 UYVYToUV422Row = UYVYToUV422Row_AVX2; |
| 396 UYVYToYRow = UYVYToYRow_AVX2; |
| 397 } |
| 398 } |
| 399 #endif |
| 400 #if defined(HAS_UYVYTOYROW_NEON) |
| 401 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 402 UYVYToYRow = UYVYToYRow_Any_NEON; |
| 403 if (width >= 16) { |
| 404 UYVYToUV422Row = UYVYToUV422Row_Any_NEON; |
| 405 } |
| 406 if (IS_ALIGNED(width, 16)) { |
| 407 UYVYToYRow = UYVYToYRow_NEON; |
| 408 UYVYToUV422Row = UYVYToUV422Row_NEON; |
| 409 } |
| 410 } |
| 411 #endif |
| 412 |
| 413 for (y = 0; y < height; ++y) { |
| 414 UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); |
| 415 UYVYToYRow(src_uyvy, dst_y, width); |
| 416 src_uyvy += src_stride_uyvy; |
| 417 dst_y += dst_stride_y; |
| 418 dst_u += dst_stride_u; |
| 419 dst_v += dst_stride_v; |
| 420 } |
| 421 return 0; |
| 422 } |
| 423 |
| 424 // Mirror I400 with optional flipping |
| 425 LIBYUV_API |
| 426 int I400Mirror(const uint8* src_y, int src_stride_y, |
| 427 uint8* dst_y, int dst_stride_y, |
| 428 int width, int height) { |
| 429 if (!src_y || !dst_y || |
| 430 width <= 0 || height == 0) { |
| 431 return -1; |
| 432 } |
| 433 // Negative height means invert the image. |
| 434 if (height < 0) { |
| 435 height = -height; |
| 436 src_y = src_y + (height - 1) * src_stride_y; |
| 437 src_stride_y = -src_stride_y; |
| 438 } |
| 439 |
| 440 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); |
| 441 return 0; |
| 442 } |
| 443 |
| 444 // Mirror I420 with optional flipping |
| 445 LIBYUV_API |
| 446 int I420Mirror(const uint8* src_y, int src_stride_y, |
| 447 const uint8* src_u, int src_stride_u, |
| 448 const uint8* src_v, int src_stride_v, |
| 449 uint8* dst_y, int dst_stride_y, |
| 450 uint8* dst_u, int dst_stride_u, |
| 451 uint8* dst_v, int dst_stride_v, |
| 452 int width, int height) { |
| 453 int halfwidth = (width + 1) >> 1; |
| 454 int halfheight = (height + 1) >> 1; |
| 455 if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || |
| 456 width <= 0 || height == 0) { |
| 457 return -1; |
| 458 } |
| 459 // Negative height means invert the image. |
| 460 if (height < 0) { |
| 461 height = -height; |
| 462 halfheight = (height + 1) >> 1; |
| 463 src_y = src_y + (height - 1) * src_stride_y; |
| 464 src_u = src_u + (halfheight - 1) * src_stride_u; |
| 465 src_v = src_v + (halfheight - 1) * src_stride_v; |
| 466 src_stride_y = -src_stride_y; |
| 467 src_stride_u = -src_stride_u; |
| 468 src_stride_v = -src_stride_v; |
| 469 } |
| 470 |
| 471 if (dst_y) { |
| 472 MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); |
| 473 } |
| 474 MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); |
| 475 MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); |
| 476 return 0; |
| 477 } |
| 478 |
| 479 // ARGB mirror. |
| 480 LIBYUV_API |
| 481 int ARGBMirror(const uint8* src_argb, int src_stride_argb, |
| 482 uint8* dst_argb, int dst_stride_argb, |
| 483 int width, int height) { |
| 484 int y; |
| 485 void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) = |
| 486 ARGBMirrorRow_C; |
| 487 if (!src_argb || !dst_argb || width <= 0 || height == 0) { |
| 488 return -1; |
| 489 } |
| 490 // Negative height means invert the image. |
| 491 if (height < 0) { |
| 492 height = -height; |
| 493 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 494 src_stride_argb = -src_stride_argb; |
| 495 } |
| 496 |
| 497 #if defined(HAS_ARGBMIRRORROW_SSSE3) |
| 498 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4) && |
| 499 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && |
| 500 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 501 ARGBMirrorRow = ARGBMirrorRow_SSSE3; |
| 502 } |
| 503 #endif |
| 504 #if defined(HAS_ARGBMIRRORROW_AVX2) |
| 505 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 8)) { |
| 506 ARGBMirrorRow = ARGBMirrorRow_AVX2; |
| 507 } |
| 508 #endif |
| 509 #if defined(HAS_ARGBMIRRORROW_NEON) |
| 510 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 4)) { |
| 511 ARGBMirrorRow = ARGBMirrorRow_NEON; |
| 512 } |
| 513 #endif |
| 514 |
| 515 // Mirror plane |
| 516 for (y = 0; y < height; ++y) { |
| 517 ARGBMirrorRow(src_argb, dst_argb, width); |
| 518 src_argb += src_stride_argb; |
| 519 dst_argb += dst_stride_argb; |
| 520 } |
| 521 return 0; |
| 522 } |
| 523 |
| 524 // Get a blender that optimized for the CPU, alignment and pixel count. |
| 525 // As there are 6 blenders to choose from, the caller should try to use |
| 526 // the same blend function for all pixels if possible. |
| 527 LIBYUV_API |
| 528 ARGBBlendRow GetARGBBlend() { |
| 529 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, |
| 530 uint8* dst_argb, int width) = ARGBBlendRow_C; |
| 531 #if defined(HAS_ARGBBLENDROW_SSSE3) |
| 532 if (TestCpuFlag(kCpuHasSSSE3)) { |
| 533 ARGBBlendRow = ARGBBlendRow_SSSE3; |
| 534 return ARGBBlendRow; |
| 535 } |
| 536 #endif |
| 537 #if defined(HAS_ARGBBLENDROW_SSE2) |
| 538 if (TestCpuFlag(kCpuHasSSE2)) { |
| 539 ARGBBlendRow = ARGBBlendRow_SSE2; |
| 540 } |
| 541 #endif |
| 542 #if defined(HAS_ARGBBLENDROW_NEON) |
| 543 if (TestCpuFlag(kCpuHasNEON)) { |
| 544 ARGBBlendRow = ARGBBlendRow_NEON; |
| 545 } |
| 546 #endif |
| 547 return ARGBBlendRow; |
| 548 } |
| 549 |
| 550 // Alpha Blend 2 ARGB images and store to destination. |
| 551 LIBYUV_API |
| 552 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0, |
| 553 const uint8* src_argb1, int src_stride_argb1, |
| 554 uint8* dst_argb, int dst_stride_argb, |
| 555 int width, int height) { |
| 556 int y; |
| 557 void (*ARGBBlendRow)(const uint8* src_argb, const uint8* src_argb1, |
| 558 uint8* dst_argb, int width) = GetARGBBlend(); |
| 559 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { |
| 560 return -1; |
| 561 } |
| 562 // Negative height means invert the image. |
| 563 if (height < 0) { |
| 564 height = -height; |
| 565 dst_argb = dst_argb + (height - 1) * dst_stride_argb; |
| 566 dst_stride_argb = -dst_stride_argb; |
| 567 } |
| 568 // Coalesce rows. |
| 569 if (src_stride_argb0 == width * 4 && |
| 570 src_stride_argb1 == width * 4 && |
| 571 dst_stride_argb == width * 4) { |
| 572 width *= height; |
| 573 height = 1; |
| 574 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; |
| 575 } |
| 576 |
| 577 for (y = 0; y < height; ++y) { |
| 578 ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); |
| 579 src_argb0 += src_stride_argb0; |
| 580 src_argb1 += src_stride_argb1; |
| 581 dst_argb += dst_stride_argb; |
| 582 } |
| 583 return 0; |
| 584 } |
| 585 |
| 586 // Multiply 2 ARGB images and store to destination. |
| 587 LIBYUV_API |
| 588 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0, |
| 589 const uint8* src_argb1, int src_stride_argb1, |
| 590 uint8* dst_argb, int dst_stride_argb, |
| 591 int width, int height) { |
| 592 int y; |
| 593 void (*ARGBMultiplyRow)(const uint8* src0, const uint8* src1, uint8* dst, |
| 594 int width) = ARGBMultiplyRow_C; |
| 595 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { |
| 596 return -1; |
| 597 } |
| 598 // Negative height means invert the image. |
| 599 if (height < 0) { |
| 600 height = -height; |
| 601 dst_argb = dst_argb + (height - 1) * dst_stride_argb; |
| 602 dst_stride_argb = -dst_stride_argb; |
| 603 } |
| 604 // Coalesce rows. |
| 605 if (src_stride_argb0 == width * 4 && |
| 606 src_stride_argb1 == width * 4 && |
| 607 dst_stride_argb == width * 4) { |
| 608 width *= height; |
| 609 height = 1; |
| 610 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; |
| 611 } |
| 612 #if defined(HAS_ARGBMULTIPLYROW_SSE2) |
| 613 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { |
| 614 ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2; |
| 615 if (IS_ALIGNED(width, 4)) { |
| 616 ARGBMultiplyRow = ARGBMultiplyRow_SSE2; |
| 617 } |
| 618 } |
| 619 #endif |
| 620 #if defined(HAS_ARGBMULTIPLYROW_AVX2) |
| 621 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { |
| 622 ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2; |
| 623 if (IS_ALIGNED(width, 8)) { |
| 624 ARGBMultiplyRow = ARGBMultiplyRow_AVX2; |
| 625 } |
| 626 } |
| 627 #endif |
| 628 #if defined(HAS_ARGBMULTIPLYROW_NEON) |
| 629 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 630 ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON; |
| 631 if (IS_ALIGNED(width, 8)) { |
| 632 ARGBMultiplyRow = ARGBMultiplyRow_NEON; |
| 633 } |
| 634 } |
| 635 #endif |
| 636 |
| 637 // Multiply plane |
| 638 for (y = 0; y < height; ++y) { |
| 639 ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width); |
| 640 src_argb0 += src_stride_argb0; |
| 641 src_argb1 += src_stride_argb1; |
| 642 dst_argb += dst_stride_argb; |
| 643 } |
| 644 return 0; |
| 645 } |
| 646 |
| 647 // Add 2 ARGB images and store to destination. |
| 648 LIBYUV_API |
| 649 int ARGBAdd(const uint8* src_argb0, int src_stride_argb0, |
| 650 const uint8* src_argb1, int src_stride_argb1, |
| 651 uint8* dst_argb, int dst_stride_argb, |
| 652 int width, int height) { |
| 653 int y; |
| 654 void (*ARGBAddRow)(const uint8* src0, const uint8* src1, uint8* dst, |
| 655 int width) = ARGBAddRow_C; |
| 656 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { |
| 657 return -1; |
| 658 } |
| 659 // Negative height means invert the image. |
| 660 if (height < 0) { |
| 661 height = -height; |
| 662 dst_argb = dst_argb + (height - 1) * dst_stride_argb; |
| 663 dst_stride_argb = -dst_stride_argb; |
| 664 } |
| 665 // Coalesce rows. |
| 666 if (src_stride_argb0 == width * 4 && |
| 667 src_stride_argb1 == width * 4 && |
| 668 dst_stride_argb == width * 4) { |
| 669 width *= height; |
| 670 height = 1; |
| 671 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; |
| 672 } |
| 673 #if defined(HAS_ARGBADDROW_SSE2) && defined(_MSC_VER) |
| 674 if (TestCpuFlag(kCpuHasSSE2)) { |
| 675 ARGBAddRow = ARGBAddRow_SSE2; |
| 676 } |
| 677 #endif |
| 678 #if defined(HAS_ARGBADDROW_SSE2) && !defined(_MSC_VER) |
| 679 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { |
| 680 ARGBAddRow = ARGBAddRow_Any_SSE2; |
| 681 if (IS_ALIGNED(width, 4)) { |
| 682 ARGBAddRow = ARGBAddRow_SSE2; |
| 683 } |
| 684 } |
| 685 #endif |
| 686 #if defined(HAS_ARGBADDROW_AVX2) |
| 687 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { |
| 688 ARGBAddRow = ARGBAddRow_Any_AVX2; |
| 689 if (IS_ALIGNED(width, 8)) { |
| 690 ARGBAddRow = ARGBAddRow_AVX2; |
| 691 } |
| 692 } |
| 693 #endif |
| 694 #if defined(HAS_ARGBADDROW_NEON) |
| 695 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 696 ARGBAddRow = ARGBAddRow_Any_NEON; |
| 697 if (IS_ALIGNED(width, 8)) { |
| 698 ARGBAddRow = ARGBAddRow_NEON; |
| 699 } |
| 700 } |
| 701 #endif |
| 702 |
| 703 // Add plane |
| 704 for (y = 0; y < height; ++y) { |
| 705 ARGBAddRow(src_argb0, src_argb1, dst_argb, width); |
| 706 src_argb0 += src_stride_argb0; |
| 707 src_argb1 += src_stride_argb1; |
| 708 dst_argb += dst_stride_argb; |
| 709 } |
| 710 return 0; |
| 711 } |
| 712 |
| 713 // Subtract 2 ARGB images and store to destination. |
| 714 LIBYUV_API |
| 715 int ARGBSubtract(const uint8* src_argb0, int src_stride_argb0, |
| 716 const uint8* src_argb1, int src_stride_argb1, |
| 717 uint8* dst_argb, int dst_stride_argb, |
| 718 int width, int height) { |
| 719 int y; |
| 720 void (*ARGBSubtractRow)(const uint8* src0, const uint8* src1, uint8* dst, |
| 721 int width) = ARGBSubtractRow_C; |
| 722 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { |
| 723 return -1; |
| 724 } |
| 725 // Negative height means invert the image. |
| 726 if (height < 0) { |
| 727 height = -height; |
| 728 dst_argb = dst_argb + (height - 1) * dst_stride_argb; |
| 729 dst_stride_argb = -dst_stride_argb; |
| 730 } |
| 731 // Coalesce rows. |
| 732 if (src_stride_argb0 == width * 4 && |
| 733 src_stride_argb1 == width * 4 && |
| 734 dst_stride_argb == width * 4) { |
| 735 width *= height; |
| 736 height = 1; |
| 737 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; |
| 738 } |
| 739 #if defined(HAS_ARGBSUBTRACTROW_SSE2) |
| 740 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { |
| 741 ARGBSubtractRow = ARGBSubtractRow_Any_SSE2; |
| 742 if (IS_ALIGNED(width, 4)) { |
| 743 ARGBSubtractRow = ARGBSubtractRow_SSE2; |
| 744 } |
| 745 } |
| 746 #endif |
| 747 #if defined(HAS_ARGBSUBTRACTROW_AVX2) |
| 748 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { |
| 749 ARGBSubtractRow = ARGBSubtractRow_Any_AVX2; |
| 750 if (IS_ALIGNED(width, 8)) { |
| 751 ARGBSubtractRow = ARGBSubtractRow_AVX2; |
| 752 } |
| 753 } |
| 754 #endif |
| 755 #if defined(HAS_ARGBSUBTRACTROW_NEON) |
| 756 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 757 ARGBSubtractRow = ARGBSubtractRow_Any_NEON; |
| 758 if (IS_ALIGNED(width, 8)) { |
| 759 ARGBSubtractRow = ARGBSubtractRow_NEON; |
| 760 } |
| 761 } |
| 762 #endif |
| 763 |
| 764 // Subtract plane |
| 765 for (y = 0; y < height; ++y) { |
| 766 ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width); |
| 767 src_argb0 += src_stride_argb0; |
| 768 src_argb1 += src_stride_argb1; |
| 769 dst_argb += dst_stride_argb; |
| 770 } |
| 771 return 0; |
| 772 } |
| 773 |
| 774 // Convert I422 to BGRA. |
| 775 LIBYUV_API |
| 776 int I422ToBGRA(const uint8* src_y, int src_stride_y, |
| 777 const uint8* src_u, int src_stride_u, |
| 778 const uint8* src_v, int src_stride_v, |
| 779 uint8* dst_bgra, int dst_stride_bgra, |
| 780 int width, int height) { |
| 781 int y; |
| 782 void (*I422ToBGRARow)(const uint8* y_buf, |
| 783 const uint8* u_buf, |
| 784 const uint8* v_buf, |
| 785 uint8* rgb_buf, |
| 786 int width) = I422ToBGRARow_C; |
| 787 if (!src_y || !src_u || !src_v || |
| 788 !dst_bgra || |
| 789 width <= 0 || height == 0) { |
| 790 return -1; |
| 791 } |
| 792 // Negative height means invert the image. |
| 793 if (height < 0) { |
| 794 height = -height; |
| 795 dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra; |
| 796 dst_stride_bgra = -dst_stride_bgra; |
| 797 } |
| 798 // Coalesce rows. |
| 799 if (src_stride_y == width && |
| 800 src_stride_u * 2 == width && |
| 801 src_stride_v * 2 == width && |
| 802 dst_stride_bgra == width * 4) { |
| 803 width *= height; |
| 804 height = 1; |
| 805 src_stride_y = src_stride_u = src_stride_v = dst_stride_bgra = 0; |
| 806 } |
| 807 #if defined(HAS_I422TOBGRAROW_NEON) |
| 808 if (TestCpuFlag(kCpuHasNEON)) { |
| 809 I422ToBGRARow = I422ToBGRARow_Any_NEON; |
| 810 if (IS_ALIGNED(width, 16)) { |
| 811 I422ToBGRARow = I422ToBGRARow_NEON; |
| 812 } |
| 813 } |
| 814 #elif defined(HAS_I422TOBGRAROW_SSSE3) |
| 815 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { |
| 816 I422ToBGRARow = I422ToBGRARow_Any_SSSE3; |
| 817 if (IS_ALIGNED(width, 8)) { |
| 818 I422ToBGRARow = I422ToBGRARow_Unaligned_SSSE3; |
| 819 if (IS_ALIGNED(dst_bgra, 16) && IS_ALIGNED(dst_stride_bgra, 16)) { |
| 820 I422ToBGRARow = I422ToBGRARow_SSSE3; |
| 821 } |
| 822 } |
| 823 } |
| 824 #elif defined(HAS_I422TOBGRAROW_MIPS_DSPR2) |
| 825 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) && |
| 826 IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) && |
| 827 IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) && |
| 828 IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) && |
| 829 IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) { |
| 830 I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2; |
| 831 } |
| 832 #endif |
| 833 |
| 834 for (y = 0; y < height; ++y) { |
| 835 I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width); |
| 836 dst_bgra += dst_stride_bgra; |
| 837 src_y += src_stride_y; |
| 838 src_u += src_stride_u; |
| 839 src_v += src_stride_v; |
| 840 } |
| 841 return 0; |
| 842 } |
| 843 |
| 844 // Convert I422 to ABGR. |
| 845 LIBYUV_API |
| 846 int I422ToABGR(const uint8* src_y, int src_stride_y, |
| 847 const uint8* src_u, int src_stride_u, |
| 848 const uint8* src_v, int src_stride_v, |
| 849 uint8* dst_abgr, int dst_stride_abgr, |
| 850 int width, int height) { |
| 851 int y; |
| 852 void (*I422ToABGRRow)(const uint8* y_buf, |
| 853 const uint8* u_buf, |
| 854 const uint8* v_buf, |
| 855 uint8* rgb_buf, |
| 856 int width) = I422ToABGRRow_C; |
| 857 if (!src_y || !src_u || !src_v || |
| 858 !dst_abgr || |
| 859 width <= 0 || height == 0) { |
| 860 return -1; |
| 861 } |
| 862 // Negative height means invert the image. |
| 863 if (height < 0) { |
| 864 height = -height; |
| 865 dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr; |
| 866 dst_stride_abgr = -dst_stride_abgr; |
| 867 } |
| 868 // Coalesce rows. |
| 869 if (src_stride_y == width && |
| 870 src_stride_u * 2 == width && |
| 871 src_stride_v * 2 == width && |
| 872 dst_stride_abgr == width * 4) { |
| 873 width *= height; |
| 874 height = 1; |
| 875 src_stride_y = src_stride_u = src_stride_v = dst_stride_abgr = 0; |
| 876 } |
| 877 #if defined(HAS_I422TOABGRROW_NEON) |
| 878 if (TestCpuFlag(kCpuHasNEON)) { |
| 879 I422ToABGRRow = I422ToABGRRow_Any_NEON; |
| 880 if (IS_ALIGNED(width, 16)) { |
| 881 I422ToABGRRow = I422ToABGRRow_NEON; |
| 882 } |
| 883 } |
| 884 #elif defined(HAS_I422TOABGRROW_SSSE3) |
| 885 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { |
| 886 I422ToABGRRow = I422ToABGRRow_Any_SSSE3; |
| 887 if (IS_ALIGNED(width, 8)) { |
| 888 I422ToABGRRow = I422ToABGRRow_Unaligned_SSSE3; |
| 889 if (IS_ALIGNED(dst_abgr, 16) && IS_ALIGNED(dst_stride_abgr, 16)) { |
| 890 I422ToABGRRow = I422ToABGRRow_SSSE3; |
| 891 } |
| 892 } |
| 893 } |
| 894 #endif |
| 895 |
| 896 for (y = 0; y < height; ++y) { |
| 897 I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width); |
| 898 dst_abgr += dst_stride_abgr; |
| 899 src_y += src_stride_y; |
| 900 src_u += src_stride_u; |
| 901 src_v += src_stride_v; |
| 902 } |
| 903 return 0; |
| 904 } |
| 905 |
| 906 // Convert I422 to RGBA. |
| 907 LIBYUV_API |
| 908 int I422ToRGBA(const uint8* src_y, int src_stride_y, |
| 909 const uint8* src_u, int src_stride_u, |
| 910 const uint8* src_v, int src_stride_v, |
| 911 uint8* dst_rgba, int dst_stride_rgba, |
| 912 int width, int height) { |
| 913 int y; |
| 914 void (*I422ToRGBARow)(const uint8* y_buf, |
| 915 const uint8* u_buf, |
| 916 const uint8* v_buf, |
| 917 uint8* rgb_buf, |
| 918 int width) = I422ToRGBARow_C; |
| 919 if (!src_y || !src_u || !src_v || |
| 920 !dst_rgba || |
| 921 width <= 0 || height == 0) { |
| 922 return -1; |
| 923 } |
| 924 // Negative height means invert the image. |
| 925 if (height < 0) { |
| 926 height = -height; |
| 927 dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; |
| 928 dst_stride_rgba = -dst_stride_rgba; |
| 929 } |
| 930 // Coalesce rows. |
| 931 if (src_stride_y == width && |
| 932 src_stride_u * 2 == width && |
| 933 src_stride_v * 2 == width && |
| 934 dst_stride_rgba == width * 4) { |
| 935 width *= height; |
| 936 height = 1; |
| 937 src_stride_y = src_stride_u = src_stride_v = dst_stride_rgba = 0; |
| 938 } |
| 939 #if defined(HAS_I422TORGBAROW_NEON) |
| 940 if (TestCpuFlag(kCpuHasNEON)) { |
| 941 I422ToRGBARow = I422ToRGBARow_Any_NEON; |
| 942 if (IS_ALIGNED(width, 16)) { |
| 943 I422ToRGBARow = I422ToRGBARow_NEON; |
| 944 } |
| 945 } |
| 946 #elif defined(HAS_I422TORGBAROW_SSSE3) |
| 947 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { |
| 948 I422ToRGBARow = I422ToRGBARow_Any_SSSE3; |
| 949 if (IS_ALIGNED(width, 8)) { |
| 950 I422ToRGBARow = I422ToRGBARow_Unaligned_SSSE3; |
| 951 if (IS_ALIGNED(dst_rgba, 16) && IS_ALIGNED(dst_stride_rgba, 16)) { |
| 952 I422ToRGBARow = I422ToRGBARow_SSSE3; |
| 953 } |
| 954 } |
| 955 } |
| 956 #endif |
| 957 |
| 958 for (y = 0; y < height; ++y) { |
| 959 I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width); |
| 960 dst_rgba += dst_stride_rgba; |
| 961 src_y += src_stride_y; |
| 962 src_u += src_stride_u; |
| 963 src_v += src_stride_v; |
| 964 } |
| 965 return 0; |
| 966 } |
| 967 |
| 968 // Convert NV12 to RGB565. |
| 969 LIBYUV_API |
| 970 int NV12ToRGB565(const uint8* src_y, int src_stride_y, |
| 971 const uint8* src_uv, int src_stride_uv, |
| 972 uint8* dst_rgb565, int dst_stride_rgb565, |
| 973 int width, int height) { |
| 974 int y; |
| 975 void (*NV12ToRGB565Row)(const uint8* y_buf, |
| 976 const uint8* uv_buf, |
| 977 uint8* rgb_buf, |
| 978 int width) = NV12ToRGB565Row_C; |
| 979 if (!src_y || !src_uv || !dst_rgb565 || |
| 980 width <= 0 || height == 0) { |
| 981 return -1; |
| 982 } |
| 983 // Negative height means invert the image. |
| 984 if (height < 0) { |
| 985 height = -height; |
| 986 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; |
| 987 dst_stride_rgb565 = -dst_stride_rgb565; |
| 988 } |
| 989 #if defined(HAS_NV12TORGB565ROW_SSSE3) |
| 990 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { |
| 991 NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; |
| 992 if (IS_ALIGNED(width, 8)) { |
| 993 NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; |
| 994 } |
| 995 } |
| 996 #elif defined(HAS_NV12TORGB565ROW_NEON) |
| 997 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 998 NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; |
| 999 if (IS_ALIGNED(width, 8)) { |
| 1000 NV12ToRGB565Row = NV12ToRGB565Row_NEON; |
| 1001 } |
| 1002 } |
| 1003 #endif |
| 1004 |
| 1005 for (y = 0; y < height; ++y) { |
| 1006 NV12ToRGB565Row(src_y, src_uv, dst_rgb565, width); |
| 1007 dst_rgb565 += dst_stride_rgb565; |
| 1008 src_y += src_stride_y; |
| 1009 if (y & 1) { |
| 1010 src_uv += src_stride_uv; |
| 1011 } |
| 1012 } |
| 1013 return 0; |
| 1014 } |
| 1015 |
| 1016 // Convert NV21 to RGB565. |
| 1017 LIBYUV_API |
| 1018 int NV21ToRGB565(const uint8* src_y, int src_stride_y, |
| 1019 const uint8* src_vu, int src_stride_vu, |
| 1020 uint8* dst_rgb565, int dst_stride_rgb565, |
| 1021 int width, int height) { |
| 1022 int y; |
| 1023 void (*NV21ToRGB565Row)(const uint8* y_buf, |
| 1024 const uint8* src_vu, |
| 1025 uint8* rgb_buf, |
| 1026 int width) = NV21ToRGB565Row_C; |
| 1027 if (!src_y || !src_vu || !dst_rgb565 || |
| 1028 width <= 0 || height == 0) { |
| 1029 return -1; |
| 1030 } |
| 1031 // Negative height means invert the image. |
| 1032 if (height < 0) { |
| 1033 height = -height; |
| 1034 dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; |
| 1035 dst_stride_rgb565 = -dst_stride_rgb565; |
| 1036 } |
| 1037 #if defined(HAS_NV21TORGB565ROW_SSSE3) |
| 1038 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { |
| 1039 NV21ToRGB565Row = NV21ToRGB565Row_Any_SSSE3; |
| 1040 if (IS_ALIGNED(width, 8)) { |
| 1041 NV21ToRGB565Row = NV21ToRGB565Row_SSSE3; |
| 1042 } |
| 1043 } |
| 1044 #elif defined(HAS_NV21TORGB565ROW_NEON) |
| 1045 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 1046 NV21ToRGB565Row = NV21ToRGB565Row_Any_NEON; |
| 1047 if (IS_ALIGNED(width, 8)) { |
| 1048 NV21ToRGB565Row = NV21ToRGB565Row_NEON; |
| 1049 } |
| 1050 } |
| 1051 #endif |
| 1052 |
| 1053 for (y = 0; y < height; ++y) { |
| 1054 NV21ToRGB565Row(src_y, src_vu, dst_rgb565, width); |
| 1055 dst_rgb565 += dst_stride_rgb565; |
| 1056 src_y += src_stride_y; |
| 1057 if (y & 1) { |
| 1058 src_vu += src_stride_vu; |
| 1059 } |
| 1060 } |
| 1061 return 0; |
| 1062 } |
| 1063 |
| 1064 LIBYUV_API |
| 1065 void SetPlane(uint8* dst_y, int dst_stride_y, |
| 1066 int width, int height, |
| 1067 uint32 value) { |
| 1068 int y; |
| 1069 uint32 v32 = value | (value << 8) | (value << 16) | (value << 24); |
| 1070 void (*SetRow)(uint8* dst, uint32 value, int pix) = SetRow_C; |
| 1071 // Coalesce rows. |
| 1072 if (dst_stride_y == width) { |
| 1073 width *= height; |
| 1074 height = 1; |
| 1075 dst_stride_y = 0; |
| 1076 } |
| 1077 #if defined(HAS_SETROW_NEON) |
| 1078 if (TestCpuFlag(kCpuHasNEON) && |
| 1079 IS_ALIGNED(width, 16) && |
| 1080 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { |
| 1081 SetRow = SetRow_NEON; |
| 1082 } |
| 1083 #endif |
| 1084 #if defined(HAS_SETROW_X86) |
| 1085 if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) { |
| 1086 SetRow = SetRow_X86; |
| 1087 } |
| 1088 #endif |
| 1089 |
| 1090 // Set plane |
| 1091 for (y = 0; y < height; ++y) { |
| 1092 SetRow(dst_y, v32, width); |
| 1093 dst_y += dst_stride_y; |
| 1094 } |
| 1095 } |
| 1096 |
| 1097 // Draw a rectangle into I420 |
| 1098 LIBYUV_API |
| 1099 int I420Rect(uint8* dst_y, int dst_stride_y, |
| 1100 uint8* dst_u, int dst_stride_u, |
| 1101 uint8* dst_v, int dst_stride_v, |
| 1102 int x, int y, |
| 1103 int width, int height, |
| 1104 int value_y, int value_u, int value_v) { |
| 1105 int halfwidth = (width + 1) >> 1; |
| 1106 int halfheight = (height + 1) >> 1; |
| 1107 uint8* start_y = dst_y + y * dst_stride_y + x; |
| 1108 uint8* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); |
| 1109 uint8* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); |
| 1110 if (!dst_y || !dst_u || !dst_v || |
| 1111 width <= 0 || height <= 0 || |
| 1112 x < 0 || y < 0 || |
| 1113 value_y < 0 || value_y > 255 || |
| 1114 value_u < 0 || value_u > 255 || |
| 1115 value_v < 0 || value_v > 255) { |
| 1116 return -1; |
| 1117 } |
| 1118 |
| 1119 SetPlane(start_y, dst_stride_y, width, height, value_y); |
| 1120 SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u); |
| 1121 SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v); |
| 1122 return 0; |
| 1123 } |
| 1124 |
| 1125 // Draw a rectangle into ARGB |
| 1126 LIBYUV_API |
| 1127 int ARGBRect(uint8* dst_argb, int dst_stride_argb, |
| 1128 int dst_x, int dst_y, |
| 1129 int width, int height, |
| 1130 uint32 value) { |
| 1131 if (!dst_argb || |
| 1132 width <= 0 || height <= 0 || |
| 1133 dst_x < 0 || dst_y < 0) { |
| 1134 return -1; |
| 1135 } |
| 1136 dst_argb += dst_y * dst_stride_argb + dst_x * 4; |
| 1137 // Coalesce rows. |
| 1138 if (dst_stride_argb == width * 4) { |
| 1139 width *= height; |
| 1140 height = 1; |
| 1141 dst_stride_argb = 0; |
| 1142 } |
| 1143 #if defined(HAS_SETROW_NEON) |
| 1144 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16) && |
| 1145 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1146 ARGBSetRows_NEON(dst_argb, value, width, dst_stride_argb, height); |
| 1147 return 0; |
| 1148 } |
| 1149 #endif |
| 1150 #if defined(HAS_SETROW_X86) |
| 1151 if (TestCpuFlag(kCpuHasX86)) { |
| 1152 ARGBSetRows_X86(dst_argb, value, width, dst_stride_argb, height); |
| 1153 return 0; |
| 1154 } |
| 1155 #endif |
| 1156 ARGBSetRows_C(dst_argb, value, width, dst_stride_argb, height); |
| 1157 return 0; |
| 1158 } |
| 1159 |
| 1160 // Convert unattentuated ARGB to preattenuated ARGB. |
| 1161 // An unattenutated ARGB alpha blend uses the formula |
| 1162 // p = a * f + (1 - a) * b |
| 1163 // where |
| 1164 // p is output pixel |
| 1165 // f is foreground pixel |
| 1166 // b is background pixel |
| 1167 // a is alpha value from foreground pixel |
| 1168 // An preattenutated ARGB alpha blend uses the formula |
| 1169 // p = f + (1 - a) * b |
| 1170 // where |
| 1171 // f is foreground pixel premultiplied by alpha |
| 1172 |
| 1173 LIBYUV_API |
| 1174 int ARGBAttenuate(const uint8* src_argb, int src_stride_argb, |
| 1175 uint8* dst_argb, int dst_stride_argb, |
| 1176 int width, int height) { |
| 1177 int y; |
| 1178 void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb, |
| 1179 int width) = ARGBAttenuateRow_C; |
| 1180 if (!src_argb || !dst_argb || width <= 0 || height == 0) { |
| 1181 return -1; |
| 1182 } |
| 1183 if (height < 0) { |
| 1184 height = -height; |
| 1185 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 1186 src_stride_argb = -src_stride_argb; |
| 1187 } |
| 1188 // Coalesce rows. |
| 1189 if (src_stride_argb == width * 4 && |
| 1190 dst_stride_argb == width * 4) { |
| 1191 width *= height; |
| 1192 height = 1; |
| 1193 src_stride_argb = dst_stride_argb = 0; |
| 1194 } |
| 1195 #if defined(HAS_ARGBATTENUATEROW_SSE2) |
| 1196 if (TestCpuFlag(kCpuHasSSE2) && width >= 4 && |
| 1197 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && |
| 1198 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1199 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSE2; |
| 1200 if (IS_ALIGNED(width, 4)) { |
| 1201 ARGBAttenuateRow = ARGBAttenuateRow_SSE2; |
| 1202 } |
| 1203 } |
| 1204 #endif |
| 1205 #if defined(HAS_ARGBATTENUATEROW_SSSE3) |
| 1206 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { |
| 1207 ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; |
| 1208 if (IS_ALIGNED(width, 4)) { |
| 1209 ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; |
| 1210 } |
| 1211 } |
| 1212 #endif |
| 1213 #if defined(HAS_ARGBATTENUATEROW_AVX2) |
| 1214 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { |
| 1215 ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; |
| 1216 if (IS_ALIGNED(width, 8)) { |
| 1217 ARGBAttenuateRow = ARGBAttenuateRow_AVX2; |
| 1218 } |
| 1219 } |
| 1220 #endif |
| 1221 #if defined(HAS_ARGBATTENUATEROW_NEON) |
| 1222 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 1223 ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; |
| 1224 if (IS_ALIGNED(width, 8)) { |
| 1225 ARGBAttenuateRow = ARGBAttenuateRow_NEON; |
| 1226 } |
| 1227 } |
| 1228 #endif |
| 1229 |
| 1230 for (y = 0; y < height; ++y) { |
| 1231 ARGBAttenuateRow(src_argb, dst_argb, width); |
| 1232 src_argb += src_stride_argb; |
| 1233 dst_argb += dst_stride_argb; |
| 1234 } |
| 1235 return 0; |
| 1236 } |
| 1237 |
| 1238 // Convert preattentuated ARGB to unattenuated ARGB. |
| 1239 LIBYUV_API |
| 1240 int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb, |
| 1241 uint8* dst_argb, int dst_stride_argb, |
| 1242 int width, int height) { |
| 1243 int y; |
| 1244 void (*ARGBUnattenuateRow)(const uint8* src_argb, uint8* dst_argb, |
| 1245 int width) = ARGBUnattenuateRow_C; |
| 1246 if (!src_argb || !dst_argb || width <= 0 || height == 0) { |
| 1247 return -1; |
| 1248 } |
| 1249 if (height < 0) { |
| 1250 height = -height; |
| 1251 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 1252 src_stride_argb = -src_stride_argb; |
| 1253 } |
| 1254 // Coalesce rows. |
| 1255 if (src_stride_argb == width * 4 && |
| 1256 dst_stride_argb == width * 4) { |
| 1257 width *= height; |
| 1258 height = 1; |
| 1259 src_stride_argb = dst_stride_argb = 0; |
| 1260 } |
| 1261 #if defined(HAS_ARGBUNATTENUATEROW_SSE2) |
| 1262 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { |
| 1263 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; |
| 1264 if (IS_ALIGNED(width, 4)) { |
| 1265 ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; |
| 1266 } |
| 1267 } |
| 1268 #endif |
| 1269 #if defined(HAS_ARGBUNATTENUATEROW_AVX2) |
| 1270 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { |
| 1271 ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; |
| 1272 if (IS_ALIGNED(width, 8)) { |
| 1273 ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; |
| 1274 } |
| 1275 } |
| 1276 #endif |
| 1277 // TODO(fbarchard): Neon version. |
| 1278 |
| 1279 for (y = 0; y < height; ++y) { |
| 1280 ARGBUnattenuateRow(src_argb, dst_argb, width); |
| 1281 src_argb += src_stride_argb; |
| 1282 dst_argb += dst_stride_argb; |
| 1283 } |
| 1284 return 0; |
| 1285 } |
| 1286 |
| 1287 // Convert ARGB to Grayed ARGB. |
| 1288 LIBYUV_API |
| 1289 int ARGBGrayTo(const uint8* src_argb, int src_stride_argb, |
| 1290 uint8* dst_argb, int dst_stride_argb, |
| 1291 int width, int height) { |
| 1292 int y; |
| 1293 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, |
| 1294 int width) = ARGBGrayRow_C; |
| 1295 if (!src_argb || !dst_argb || width <= 0 || height == 0) { |
| 1296 return -1; |
| 1297 } |
| 1298 if (height < 0) { |
| 1299 height = -height; |
| 1300 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 1301 src_stride_argb = -src_stride_argb; |
| 1302 } |
| 1303 // Coalesce rows. |
| 1304 if (src_stride_argb == width * 4 && |
| 1305 dst_stride_argb == width * 4) { |
| 1306 width *= height; |
| 1307 height = 1; |
| 1308 src_stride_argb = dst_stride_argb = 0; |
| 1309 } |
| 1310 #if defined(HAS_ARGBGRAYROW_SSSE3) |
| 1311 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && |
| 1312 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && |
| 1313 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1314 ARGBGrayRow = ARGBGrayRow_SSSE3; |
| 1315 } |
| 1316 #elif defined(HAS_ARGBGRAYROW_NEON) |
| 1317 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 1318 ARGBGrayRow = ARGBGrayRow_NEON; |
| 1319 } |
| 1320 #endif |
| 1321 |
| 1322 for (y = 0; y < height; ++y) { |
| 1323 ARGBGrayRow(src_argb, dst_argb, width); |
| 1324 src_argb += src_stride_argb; |
| 1325 dst_argb += dst_stride_argb; |
| 1326 } |
| 1327 return 0; |
| 1328 } |
| 1329 |
| 1330 // Make a rectangle of ARGB gray scale. |
| 1331 LIBYUV_API |
| 1332 int ARGBGray(uint8* dst_argb, int dst_stride_argb, |
| 1333 int dst_x, int dst_y, |
| 1334 int width, int height) { |
| 1335 int y; |
| 1336 void (*ARGBGrayRow)(const uint8* src_argb, uint8* dst_argb, |
| 1337 int width) = ARGBGrayRow_C; |
| 1338 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; |
| 1339 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { |
| 1340 return -1; |
| 1341 } |
| 1342 // Coalesce rows. |
| 1343 if (dst_stride_argb == width * 4) { |
| 1344 width *= height; |
| 1345 height = 1; |
| 1346 dst_stride_argb = 0; |
| 1347 } |
| 1348 #if defined(HAS_ARGBGRAYROW_SSSE3) |
| 1349 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && |
| 1350 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1351 ARGBGrayRow = ARGBGrayRow_SSSE3; |
| 1352 } |
| 1353 #elif defined(HAS_ARGBGRAYROW_NEON) |
| 1354 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 1355 ARGBGrayRow = ARGBGrayRow_NEON; |
| 1356 } |
| 1357 #endif |
| 1358 for (y = 0; y < height; ++y) { |
| 1359 ARGBGrayRow(dst, dst, width); |
| 1360 dst += dst_stride_argb; |
| 1361 } |
| 1362 return 0; |
| 1363 } |
| 1364 |
| 1365 // Make a rectangle of ARGB Sepia tone. |
| 1366 LIBYUV_API |
| 1367 int ARGBSepia(uint8* dst_argb, int dst_stride_argb, |
| 1368 int dst_x, int dst_y, int width, int height) { |
| 1369 int y; |
| 1370 void (*ARGBSepiaRow)(uint8* dst_argb, int width) = ARGBSepiaRow_C; |
| 1371 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; |
| 1372 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { |
| 1373 return -1; |
| 1374 } |
| 1375 // Coalesce rows. |
| 1376 if (dst_stride_argb == width * 4) { |
| 1377 width *= height; |
| 1378 height = 1; |
| 1379 dst_stride_argb = 0; |
| 1380 } |
| 1381 #if defined(HAS_ARGBSEPIAROW_SSSE3) |
| 1382 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && |
| 1383 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1384 ARGBSepiaRow = ARGBSepiaRow_SSSE3; |
| 1385 } |
| 1386 #elif defined(HAS_ARGBSEPIAROW_NEON) |
| 1387 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 1388 ARGBSepiaRow = ARGBSepiaRow_NEON; |
| 1389 } |
| 1390 #endif |
| 1391 for (y = 0; y < height; ++y) { |
| 1392 ARGBSepiaRow(dst, width); |
| 1393 dst += dst_stride_argb; |
| 1394 } |
| 1395 return 0; |
| 1396 } |
| 1397 |
| 1398 // Apply a 4x4 matrix to each ARGB pixel. |
| 1399 // Note: Normally for shading, but can be used to swizzle or invert. |
| 1400 LIBYUV_API |
| 1401 int ARGBColorMatrix(const uint8* src_argb, int src_stride_argb, |
| 1402 uint8* dst_argb, int dst_stride_argb, |
| 1403 const int8* matrix_argb, |
| 1404 int width, int height) { |
| 1405 int y; |
| 1406 void (*ARGBColorMatrixRow)(const uint8* src_argb, uint8* dst_argb, |
| 1407 const int8* matrix_argb, int width) = ARGBColorMatrixRow_C; |
| 1408 if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { |
| 1409 return -1; |
| 1410 } |
| 1411 if (height < 0) { |
| 1412 height = -height; |
| 1413 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 1414 src_stride_argb = -src_stride_argb; |
| 1415 } |
| 1416 // Coalesce rows. |
| 1417 if (src_stride_argb == width * 4 && |
| 1418 dst_stride_argb == width * 4) { |
| 1419 width *= height; |
| 1420 height = 1; |
| 1421 src_stride_argb = dst_stride_argb = 0; |
| 1422 } |
| 1423 #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3) |
| 1424 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8) && |
| 1425 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1426 ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3; |
| 1427 } |
| 1428 #elif defined(HAS_ARGBCOLORMATRIXROW_NEON) |
| 1429 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 1430 ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; |
| 1431 } |
| 1432 #endif |
| 1433 for (y = 0; y < height; ++y) { |
| 1434 ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); |
| 1435 src_argb += src_stride_argb; |
| 1436 dst_argb += dst_stride_argb; |
| 1437 } |
| 1438 return 0; |
| 1439 } |
| 1440 |
| 1441 // Apply a 4x3 matrix to each ARGB pixel. |
| 1442 // Deprecated. |
| 1443 LIBYUV_API |
| 1444 int RGBColorMatrix(uint8* dst_argb, int dst_stride_argb, |
| 1445 const int8* matrix_rgb, |
| 1446 int dst_x, int dst_y, int width, int height) { |
| 1447 SIMD_ALIGNED(int8 matrix_argb[16]); |
| 1448 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; |
| 1449 if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || |
| 1450 dst_x < 0 || dst_y < 0) { |
| 1451 return -1; |
| 1452 } |
| 1453 |
| 1454 // Convert 4x3 7 bit matrix to 4x4 6 bit matrix. |
| 1455 matrix_argb[0] = matrix_rgb[0] / 2; |
| 1456 matrix_argb[1] = matrix_rgb[1] / 2; |
| 1457 matrix_argb[2] = matrix_rgb[2] / 2; |
| 1458 matrix_argb[3] = matrix_rgb[3] / 2; |
| 1459 matrix_argb[4] = matrix_rgb[4] / 2; |
| 1460 matrix_argb[5] = matrix_rgb[5] / 2; |
| 1461 matrix_argb[6] = matrix_rgb[6] / 2; |
| 1462 matrix_argb[7] = matrix_rgb[7] / 2; |
| 1463 matrix_argb[8] = matrix_rgb[8] / 2; |
| 1464 matrix_argb[9] = matrix_rgb[9] / 2; |
| 1465 matrix_argb[10] = matrix_rgb[10] / 2; |
| 1466 matrix_argb[11] = matrix_rgb[11] / 2; |
| 1467 matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; |
| 1468 matrix_argb[15] = 64; // 1.0 |
| 1469 |
| 1470 return ARGBColorMatrix((const uint8*)(dst), dst_stride_argb, |
| 1471 dst, dst_stride_argb, |
| 1472 &matrix_argb[0], width, height); |
| 1473 } |
| 1474 |
| 1475 // Apply a color table each ARGB pixel. |
| 1476 // Table contains 256 ARGB values. |
| 1477 LIBYUV_API |
| 1478 int ARGBColorTable(uint8* dst_argb, int dst_stride_argb, |
| 1479 const uint8* table_argb, |
| 1480 int dst_x, int dst_y, int width, int height) { |
| 1481 int y; |
| 1482 void (*ARGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, |
| 1483 int width) = ARGBColorTableRow_C; |
| 1484 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; |
| 1485 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || |
| 1486 dst_x < 0 || dst_y < 0) { |
| 1487 return -1; |
| 1488 } |
| 1489 // Coalesce rows. |
| 1490 if (dst_stride_argb == width * 4) { |
| 1491 width *= height; |
| 1492 height = 1; |
| 1493 dst_stride_argb = 0; |
| 1494 } |
| 1495 #if defined(HAS_ARGBCOLORTABLEROW_X86) |
| 1496 if (TestCpuFlag(kCpuHasX86)) { |
| 1497 ARGBColorTableRow = ARGBColorTableRow_X86; |
| 1498 } |
| 1499 #endif |
| 1500 for (y = 0; y < height; ++y) { |
| 1501 ARGBColorTableRow(dst, table_argb, width); |
| 1502 dst += dst_stride_argb; |
| 1503 } |
| 1504 return 0; |
| 1505 } |
| 1506 |
| 1507 // Apply a color table each ARGB pixel but preserve destination alpha. |
| 1508 // Table contains 256 ARGB values. |
| 1509 LIBYUV_API |
| 1510 int RGBColorTable(uint8* dst_argb, int dst_stride_argb, |
| 1511 const uint8* table_argb, |
| 1512 int dst_x, int dst_y, int width, int height) { |
| 1513 int y; |
| 1514 void (*RGBColorTableRow)(uint8* dst_argb, const uint8* table_argb, |
| 1515 int width) = RGBColorTableRow_C; |
| 1516 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; |
| 1517 if (!dst_argb || !table_argb || width <= 0 || height <= 0 || |
| 1518 dst_x < 0 || dst_y < 0) { |
| 1519 return -1; |
| 1520 } |
| 1521 // Coalesce rows. |
| 1522 if (dst_stride_argb == width * 4) { |
| 1523 width *= height; |
| 1524 height = 1; |
| 1525 dst_stride_argb = 0; |
| 1526 } |
| 1527 #if defined(HAS_RGBCOLORTABLEROW_X86) |
| 1528 if (TestCpuFlag(kCpuHasX86)) { |
| 1529 RGBColorTableRow = RGBColorTableRow_X86; |
| 1530 } |
| 1531 #endif |
| 1532 for (y = 0; y < height; ++y) { |
| 1533 RGBColorTableRow(dst, table_argb, width); |
| 1534 dst += dst_stride_argb; |
| 1535 } |
| 1536 return 0; |
| 1537 } |
| 1538 |
| 1539 // ARGBQuantize is used to posterize art. |
| 1540 // e.g. rgb / qvalue * qvalue + qvalue / 2 |
| 1541 // But the low levels implement efficiently with 3 parameters, and could be |
| 1542 // used for other high level operations. |
| 1543 // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; |
| 1544 // where scale is 1 / interval_size as a fixed point value. |
| 1545 // The divide is replaces with a multiply by reciprocal fixed point multiply. |
| 1546 // Caveat - although SSE2 saturates, the C function does not and should be used |
| 1547 // with care if doing anything but quantization. |
| 1548 LIBYUV_API |
| 1549 int ARGBQuantize(uint8* dst_argb, int dst_stride_argb, |
| 1550 int scale, int interval_size, int interval_offset, |
| 1551 int dst_x, int dst_y, int width, int height) { |
| 1552 int y; |
| 1553 void (*ARGBQuantizeRow)(uint8* dst_argb, int scale, int interval_size, |
| 1554 int interval_offset, int width) = ARGBQuantizeRow_C; |
| 1555 uint8* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; |
| 1556 if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || |
| 1557 interval_size < 1 || interval_size > 255) { |
| 1558 return -1; |
| 1559 } |
| 1560 // Coalesce rows. |
| 1561 if (dst_stride_argb == width * 4) { |
| 1562 width *= height; |
| 1563 height = 1; |
| 1564 dst_stride_argb = 0; |
| 1565 } |
| 1566 #if defined(HAS_ARGBQUANTIZEROW_SSE2) |
| 1567 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && |
| 1568 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1569 ARGBQuantizeRow = ARGBQuantizeRow_SSE2; |
| 1570 } |
| 1571 #elif defined(HAS_ARGBQUANTIZEROW_NEON) |
| 1572 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 1573 ARGBQuantizeRow = ARGBQuantizeRow_NEON; |
| 1574 } |
| 1575 #endif |
| 1576 for (y = 0; y < height; ++y) { |
| 1577 ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); |
| 1578 dst += dst_stride_argb; |
| 1579 } |
| 1580 return 0; |
| 1581 } |
| 1582 |
| 1583 // Computes table of cumulative sum for image where the value is the sum |
| 1584 // of all values above and to the left of the entry. Used by ARGBBlur. |
| 1585 LIBYUV_API |
| 1586 int ARGBComputeCumulativeSum(const uint8* src_argb, int src_stride_argb, |
| 1587 int32* dst_cumsum, int dst_stride32_cumsum, |
| 1588 int width, int height) { |
| 1589 int y; |
| 1590 void (*ComputeCumulativeSumRow)(const uint8* row, int32* cumsum, |
| 1591 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; |
| 1592 int32* previous_cumsum = dst_cumsum; |
| 1593 if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { |
| 1594 return -1; |
| 1595 } |
| 1596 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) |
| 1597 if (TestCpuFlag(kCpuHasSSE2)) { |
| 1598 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; |
| 1599 } |
| 1600 #endif |
| 1601 memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. |
| 1602 for (y = 0; y < height; ++y) { |
| 1603 ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); |
| 1604 previous_cumsum = dst_cumsum; |
| 1605 dst_cumsum += dst_stride32_cumsum; |
| 1606 src_argb += src_stride_argb; |
| 1607 } |
| 1608 return 0; |
| 1609 } |
| 1610 |
| 1611 // Blur ARGB image. |
| 1612 // Caller should allocate CumulativeSum table of width * height * 16 bytes |
| 1613 // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory |
| 1614 // as the buffer is treated as circular. |
| 1615 LIBYUV_API |
| 1616 int ARGBBlur(const uint8* src_argb, int src_stride_argb, |
| 1617 uint8* dst_argb, int dst_stride_argb, |
| 1618 int32* dst_cumsum, int dst_stride32_cumsum, |
| 1619 int width, int height, int radius) { |
| 1620 int y; |
| 1621 void (*ComputeCumulativeSumRow)(const uint8 *row, int32 *cumsum, |
| 1622 const int32* previous_cumsum, int width) = ComputeCumulativeSumRow_C; |
| 1623 void (*CumulativeSumToAverageRow)(const int32* topleft, const int32* botleft, |
| 1624 int width, int area, uint8* dst, int count) = CumulativeSumToAverageRow_C; |
| 1625 int32* cumsum_bot_row; |
| 1626 int32* max_cumsum_bot_row; |
| 1627 int32* cumsum_top_row; |
| 1628 |
| 1629 if (!src_argb || !dst_argb || width <= 0 || height == 0) { |
| 1630 return -1; |
| 1631 } |
| 1632 if (height < 0) { |
| 1633 height = -height; |
| 1634 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 1635 src_stride_argb = -src_stride_argb; |
| 1636 } |
| 1637 if (radius > height) { |
| 1638 radius = height; |
| 1639 } |
| 1640 if (radius > (width / 2 - 1)) { |
| 1641 radius = width / 2 - 1; |
| 1642 } |
| 1643 if (radius <= 0) { |
| 1644 return -1; |
| 1645 } |
| 1646 #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) |
| 1647 if (TestCpuFlag(kCpuHasSSE2)) { |
| 1648 ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; |
| 1649 CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; |
| 1650 } |
| 1651 #endif |
| 1652 // Compute enough CumulativeSum for first row to be blurred. After this |
| 1653 // one row of CumulativeSum is updated at a time. |
| 1654 ARGBComputeCumulativeSum(src_argb, src_stride_argb, |
| 1655 dst_cumsum, dst_stride32_cumsum, |
| 1656 width, radius); |
| 1657 |
| 1658 src_argb = src_argb + radius * src_stride_argb; |
| 1659 cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; |
| 1660 |
| 1661 max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; |
| 1662 cumsum_top_row = &dst_cumsum[0]; |
| 1663 |
| 1664 for (y = 0; y < height; ++y) { |
| 1665 int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; |
| 1666 int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); |
| 1667 int area = radius * (bot_y - top_y); |
| 1668 int boxwidth = radius * 4; |
| 1669 int x; |
| 1670 int n; |
| 1671 |
| 1672 // Increment cumsum_top_row pointer with circular buffer wrap around. |
| 1673 if (top_y) { |
| 1674 cumsum_top_row += dst_stride32_cumsum; |
| 1675 if (cumsum_top_row >= max_cumsum_bot_row) { |
| 1676 cumsum_top_row = dst_cumsum; |
| 1677 } |
| 1678 } |
| 1679 // Increment cumsum_bot_row pointer with circular buffer wrap around and |
| 1680 // then fill in a row of CumulativeSum. |
| 1681 if ((y + radius) < height) { |
| 1682 const int32* prev_cumsum_bot_row = cumsum_bot_row; |
| 1683 cumsum_bot_row += dst_stride32_cumsum; |
| 1684 if (cumsum_bot_row >= max_cumsum_bot_row) { |
| 1685 cumsum_bot_row = dst_cumsum; |
| 1686 } |
| 1687 ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, |
| 1688 width); |
| 1689 src_argb += src_stride_argb; |
| 1690 } |
| 1691 |
| 1692 // Left clipped. |
| 1693 for (x = 0; x < radius + 1; ++x) { |
| 1694 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, |
| 1695 boxwidth, area, &dst_argb[x * 4], 1); |
| 1696 area += (bot_y - top_y); |
| 1697 boxwidth += 4; |
| 1698 } |
| 1699 |
| 1700 // Middle unclipped. |
| 1701 n = (width - 1) - radius - x + 1; |
| 1702 CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, |
| 1703 boxwidth, area, &dst_argb[x * 4], n); |
| 1704 |
| 1705 // Right clipped. |
| 1706 for (x += n; x <= width - 1; ++x) { |
| 1707 area -= (bot_y - top_y); |
| 1708 boxwidth -= 4; |
| 1709 CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, |
| 1710 cumsum_bot_row + (x - radius - 1) * 4, |
| 1711 boxwidth, area, &dst_argb[x * 4], 1); |
| 1712 } |
| 1713 dst_argb += dst_stride_argb; |
| 1714 } |
| 1715 return 0; |
| 1716 } |
| 1717 |
| 1718 // Multiply ARGB image by a specified ARGB value. |
| 1719 LIBYUV_API |
| 1720 int ARGBShade(const uint8* src_argb, int src_stride_argb, |
| 1721 uint8* dst_argb, int dst_stride_argb, |
| 1722 int width, int height, uint32 value) { |
| 1723 int y; |
| 1724 void (*ARGBShadeRow)(const uint8* src_argb, uint8* dst_argb, |
| 1725 int width, uint32 value) = ARGBShadeRow_C; |
| 1726 if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { |
| 1727 return -1; |
| 1728 } |
| 1729 if (height < 0) { |
| 1730 height = -height; |
| 1731 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 1732 src_stride_argb = -src_stride_argb; |
| 1733 } |
| 1734 // Coalesce rows. |
| 1735 if (src_stride_argb == width * 4 && |
| 1736 dst_stride_argb == width * 4) { |
| 1737 width *= height; |
| 1738 height = 1; |
| 1739 src_stride_argb = dst_stride_argb = 0; |
| 1740 } |
| 1741 #if defined(HAS_ARGBSHADEROW_SSE2) |
| 1742 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4) && |
| 1743 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && |
| 1744 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1745 ARGBShadeRow = ARGBShadeRow_SSE2; |
| 1746 } |
| 1747 #elif defined(HAS_ARGBSHADEROW_NEON) |
| 1748 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 1749 ARGBShadeRow = ARGBShadeRow_NEON; |
| 1750 } |
| 1751 #endif |
| 1752 |
| 1753 for (y = 0; y < height; ++y) { |
| 1754 ARGBShadeRow(src_argb, dst_argb, width, value); |
| 1755 src_argb += src_stride_argb; |
| 1756 dst_argb += dst_stride_argb; |
| 1757 } |
| 1758 return 0; |
| 1759 } |
| 1760 |
| 1761 // Interpolate 2 ARGB images by specified amount (0 to 255). |
| 1762 LIBYUV_API |
| 1763 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0, |
| 1764 const uint8* src_argb1, int src_stride_argb1, |
| 1765 uint8* dst_argb, int dst_stride_argb, |
| 1766 int width, int height, int interpolation) { |
| 1767 int y; |
| 1768 void (*InterpolateRow)(uint8* dst_ptr, const uint8* src_ptr, |
| 1769 ptrdiff_t src_stride, int dst_width, |
| 1770 int source_y_fraction) = InterpolateRow_C; |
| 1771 if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { |
| 1772 return -1; |
| 1773 } |
| 1774 // Negative height means invert the image. |
| 1775 if (height < 0) { |
| 1776 height = -height; |
| 1777 dst_argb = dst_argb + (height - 1) * dst_stride_argb; |
| 1778 dst_stride_argb = -dst_stride_argb; |
| 1779 } |
| 1780 // Coalesce rows. |
| 1781 if (src_stride_argb0 == width * 4 && |
| 1782 src_stride_argb1 == width * 4 && |
| 1783 dst_stride_argb == width * 4) { |
| 1784 width *= height; |
| 1785 height = 1; |
| 1786 src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; |
| 1787 } |
| 1788 #if defined(HAS_INTERPOLATEROW_SSE2) |
| 1789 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { |
| 1790 InterpolateRow = InterpolateRow_Any_SSE2; |
| 1791 if (IS_ALIGNED(width, 4)) { |
| 1792 InterpolateRow = InterpolateRow_Unaligned_SSE2; |
| 1793 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && |
| 1794 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && |
| 1795 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1796 InterpolateRow = InterpolateRow_SSE2; |
| 1797 } |
| 1798 } |
| 1799 } |
| 1800 #endif |
| 1801 #if defined(HAS_INTERPOLATEROW_SSSE3) |
| 1802 if (TestCpuFlag(kCpuHasSSSE3) && width >= 4) { |
| 1803 InterpolateRow = InterpolateRow_Any_SSSE3; |
| 1804 if (IS_ALIGNED(width, 4)) { |
| 1805 InterpolateRow = InterpolateRow_Unaligned_SSSE3; |
| 1806 if (IS_ALIGNED(src_argb0, 16) && IS_ALIGNED(src_stride_argb0, 16) && |
| 1807 IS_ALIGNED(src_argb1, 16) && IS_ALIGNED(src_stride_argb1, 16) && |
| 1808 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1809 InterpolateRow = InterpolateRow_SSSE3; |
| 1810 } |
| 1811 } |
| 1812 } |
| 1813 #endif |
| 1814 #if defined(HAS_INTERPOLATEROW_AVX2) |
| 1815 if (TestCpuFlag(kCpuHasAVX2) && width >= 8) { |
| 1816 InterpolateRow = InterpolateRow_Any_AVX2; |
| 1817 if (IS_ALIGNED(width, 8)) { |
| 1818 InterpolateRow = InterpolateRow_AVX2; |
| 1819 } |
| 1820 } |
| 1821 #endif |
| 1822 #if defined(HAS_INTERPOLATEROW_NEON) |
| 1823 if (TestCpuFlag(kCpuHasNEON) && width >= 4) { |
| 1824 InterpolateRow = InterpolateRow_Any_NEON; |
| 1825 if (IS_ALIGNED(width, 4)) { |
| 1826 InterpolateRow = InterpolateRow_NEON; |
| 1827 } |
| 1828 } |
| 1829 #endif |
| 1830 #if defined(HAS_INTERPOLATEROWS_MIPS_DSPR2) |
| 1831 if (TestCpuFlag(kCpuHasMIPS_DSPR2) && width >= 1 && |
| 1832 IS_ALIGNED(src_argb0, 4) && IS_ALIGNED(src_stride_argb0, 4) && |
| 1833 IS_ALIGNED(src_argb1, 4) && IS_ALIGNED(src_stride_argb1, 4) && |
| 1834 IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) { |
| 1835 ScaleARGBFilterRows = InterpolateRow_MIPS_DSPR2; |
| 1836 } |
| 1837 #endif |
| 1838 |
| 1839 for (y = 0; y < height; ++y) { |
| 1840 InterpolateRow(dst_argb, src_argb0, src_argb1 - src_argb0, |
| 1841 width * 4, interpolation); |
| 1842 src_argb0 += src_stride_argb0; |
| 1843 src_argb1 += src_stride_argb1; |
| 1844 dst_argb += dst_stride_argb; |
| 1845 } |
| 1846 return 0; |
| 1847 } |
| 1848 |
| 1849 // Shuffle ARGB channel order. e.g. BGRA to ARGB. |
| 1850 LIBYUV_API |
| 1851 int ARGBShuffle(const uint8* src_bgra, int src_stride_bgra, |
| 1852 uint8* dst_argb, int dst_stride_argb, |
| 1853 const uint8* shuffler, int width, int height) { |
| 1854 int y; |
| 1855 void (*ARGBShuffleRow)(const uint8* src_bgra, uint8* dst_argb, |
| 1856 const uint8* shuffler, int pix) = ARGBShuffleRow_C; |
| 1857 if (!src_bgra || !dst_argb || |
| 1858 width <= 0 || height == 0) { |
| 1859 return -1; |
| 1860 } |
| 1861 // Negative height means invert the image. |
| 1862 if (height < 0) { |
| 1863 height = -height; |
| 1864 src_bgra = src_bgra + (height - 1) * src_stride_bgra; |
| 1865 src_stride_bgra = -src_stride_bgra; |
| 1866 } |
| 1867 // Coalesce rows. |
| 1868 if (src_stride_bgra == width * 4 && |
| 1869 dst_stride_argb == width * 4) { |
| 1870 width *= height; |
| 1871 height = 1; |
| 1872 src_stride_bgra = dst_stride_argb = 0; |
| 1873 } |
| 1874 #if defined(HAS_ARGBSHUFFLEROW_SSE2) |
| 1875 if (TestCpuFlag(kCpuHasSSE2) && width >= 4) { |
| 1876 ARGBShuffleRow = ARGBShuffleRow_Any_SSE2; |
| 1877 if (IS_ALIGNED(width, 4)) { |
| 1878 ARGBShuffleRow = ARGBShuffleRow_SSE2; |
| 1879 } |
| 1880 } |
| 1881 #endif |
| 1882 #if defined(HAS_ARGBSHUFFLEROW_SSSE3) |
| 1883 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) { |
| 1884 ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; |
| 1885 if (IS_ALIGNED(width, 8)) { |
| 1886 ARGBShuffleRow = ARGBShuffleRow_Unaligned_SSSE3; |
| 1887 if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16) && |
| 1888 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 1889 ARGBShuffleRow = ARGBShuffleRow_SSSE3; |
| 1890 } |
| 1891 } |
| 1892 } |
| 1893 #endif |
| 1894 #if defined(HAS_ARGBSHUFFLEROW_AVX2) |
| 1895 if (TestCpuFlag(kCpuHasAVX2) && width >= 16) { |
| 1896 ARGBShuffleRow = ARGBShuffleRow_Any_AVX2; |
| 1897 if (IS_ALIGNED(width, 16)) { |
| 1898 ARGBShuffleRow = ARGBShuffleRow_AVX2; |
| 1899 } |
| 1900 } |
| 1901 #endif |
| 1902 #if defined(HAS_ARGBSHUFFLEROW_NEON) |
| 1903 if (TestCpuFlag(kCpuHasNEON) && width >= 4) { |
| 1904 ARGBShuffleRow = ARGBShuffleRow_Any_NEON; |
| 1905 if (IS_ALIGNED(width, 4)) { |
| 1906 ARGBShuffleRow = ARGBShuffleRow_NEON; |
| 1907 } |
| 1908 } |
| 1909 #endif |
| 1910 |
| 1911 for (y = 0; y < height; ++y) { |
| 1912 ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); |
| 1913 src_bgra += src_stride_bgra; |
| 1914 dst_argb += dst_stride_argb; |
| 1915 } |
| 1916 return 0; |
| 1917 } |
| 1918 |
| 1919 // Sobel ARGB effect. |
| 1920 static int ARGBSobelize(const uint8* src_argb, int src_stride_argb, |
| 1921 uint8* dst_argb, int dst_stride_argb, |
| 1922 int width, int height, |
| 1923 void (*SobelRow)(const uint8* src_sobelx, |
| 1924 const uint8* src_sobely, |
| 1925 uint8* dst, int width)) { |
| 1926 int y; |
| 1927 void (*ARGBToBayerRow)(const uint8* src_argb, uint8* dst_bayer, |
| 1928 uint32 selector, int pix) = ARGBToBayerGGRow_C; |
| 1929 void (*SobelYRow)(const uint8* src_y0, const uint8* src_y1, |
| 1930 uint8* dst_sobely, int width) = SobelYRow_C; |
| 1931 void (*SobelXRow)(const uint8* src_y0, const uint8* src_y1, |
| 1932 const uint8* src_y2, uint8* dst_sobely, int width) = |
| 1933 SobelXRow_C; |
| 1934 const int kEdge = 16; // Extra pixels at start of row for extrude/align. |
| 1935 if (!src_argb || !dst_argb || width <= 0 || height == 0) { |
| 1936 return -1; |
| 1937 } |
| 1938 // Negative height means invert the image. |
| 1939 if (height < 0) { |
| 1940 height = -height; |
| 1941 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 1942 src_stride_argb = -src_stride_argb; |
| 1943 } |
| 1944 // ARGBToBayer used to select G channel from ARGB. |
| 1945 #if defined(HAS_ARGBTOBAYERGGROW_SSE2) |
| 1946 if (TestCpuFlag(kCpuHasSSE2) && width >= 8 && |
| 1947 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { |
| 1948 ARGBToBayerRow = ARGBToBayerGGRow_Any_SSE2; |
| 1949 if (IS_ALIGNED(width, 8)) { |
| 1950 ARGBToBayerRow = ARGBToBayerGGRow_SSE2; |
| 1951 } |
| 1952 } |
| 1953 #endif |
| 1954 #if defined(HAS_ARGBTOBAYERROW_SSSE3) |
| 1955 if (TestCpuFlag(kCpuHasSSSE3) && width >= 8 && |
| 1956 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) { |
| 1957 ARGBToBayerRow = ARGBToBayerRow_Any_SSSE3; |
| 1958 if (IS_ALIGNED(width, 8)) { |
| 1959 ARGBToBayerRow = ARGBToBayerRow_SSSE3; |
| 1960 } |
| 1961 } |
| 1962 #endif |
| 1963 #if defined(HAS_ARGBTOBAYERGGROW_NEON) |
| 1964 if (TestCpuFlag(kCpuHasNEON) && width >= 8) { |
| 1965 ARGBToBayerRow = ARGBToBayerGGRow_Any_NEON; |
| 1966 if (IS_ALIGNED(width, 8)) { |
| 1967 ARGBToBayerRow = ARGBToBayerGGRow_NEON; |
| 1968 } |
| 1969 } |
| 1970 #endif |
| 1971 #if defined(HAS_SOBELYROW_SSE2) |
| 1972 if (TestCpuFlag(kCpuHasSSE2)) { |
| 1973 SobelYRow = SobelYRow_SSE2; |
| 1974 } |
| 1975 #endif |
| 1976 #if defined(HAS_SOBELYROW_NEON) |
| 1977 if (TestCpuFlag(kCpuHasNEON)) { |
| 1978 SobelYRow = SobelYRow_NEON; |
| 1979 } |
| 1980 #endif |
| 1981 #if defined(HAS_SOBELXROW_SSE2) |
| 1982 if (TestCpuFlag(kCpuHasSSE2)) { |
| 1983 SobelXRow = SobelXRow_SSE2; |
| 1984 } |
| 1985 #endif |
| 1986 #if defined(HAS_SOBELXROW_NEON) |
| 1987 if (TestCpuFlag(kCpuHasNEON)) { |
| 1988 SobelXRow = SobelXRow_NEON; |
| 1989 } |
| 1990 #endif |
| 1991 { |
| 1992 // 3 rows with edges before/after. |
| 1993 const int kRowSize = (width + kEdge + 15) & ~15; |
| 1994 align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); |
| 1995 uint8* row_sobelx = rows; |
| 1996 uint8* row_sobely = rows + kRowSize; |
| 1997 uint8* row_y = rows + kRowSize * 2; |
| 1998 |
| 1999 // Convert first row. |
| 2000 uint8* row_y0 = row_y + kEdge; |
| 2001 uint8* row_y1 = row_y0 + kRowSize; |
| 2002 uint8* row_y2 = row_y1 + kRowSize; |
| 2003 ARGBToBayerRow(src_argb, row_y0, 0x0d090501, width); |
| 2004 row_y0[-1] = row_y0[0]; |
| 2005 memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. |
| 2006 ARGBToBayerRow(src_argb, row_y1, 0x0d090501, width); |
| 2007 row_y1[-1] = row_y1[0]; |
| 2008 memset(row_y1 + width, row_y1[width - 1], 16); |
| 2009 memset(row_y2 + width, 0, 16); |
| 2010 |
| 2011 for (y = 0; y < height; ++y) { |
| 2012 // Convert next row of ARGB to Y. |
| 2013 if (y < (height - 1)) { |
| 2014 src_argb += src_stride_argb; |
| 2015 } |
| 2016 ARGBToBayerRow(src_argb, row_y2, 0x0d090501, width); |
| 2017 row_y2[-1] = row_y2[0]; |
| 2018 row_y2[width] = row_y2[width - 1]; |
| 2019 |
| 2020 SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); |
| 2021 SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); |
| 2022 SobelRow(row_sobelx, row_sobely, dst_argb, width); |
| 2023 |
| 2024 // Cycle thru circular queue of 3 row_y buffers. |
| 2025 { |
| 2026 uint8* row_yt = row_y0; |
| 2027 row_y0 = row_y1; |
| 2028 row_y1 = row_y2; |
| 2029 row_y2 = row_yt; |
| 2030 } |
| 2031 |
| 2032 dst_argb += dst_stride_argb; |
| 2033 } |
| 2034 free_aligned_buffer_64(rows); |
| 2035 } |
| 2036 return 0; |
| 2037 } |
| 2038 |
| 2039 // Sobel ARGB effect. |
| 2040 LIBYUV_API |
| 2041 int ARGBSobel(const uint8* src_argb, int src_stride_argb, |
| 2042 uint8* dst_argb, int dst_stride_argb, |
| 2043 int width, int height) { |
| 2044 void (*SobelRow)(const uint8* src_sobelx, const uint8* src_sobely, |
| 2045 uint8* dst_argb, int width) = SobelRow_C; |
| 2046 #if defined(HAS_SOBELROW_SSE2) |
| 2047 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && |
| 2048 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 2049 SobelRow = SobelRow_SSE2; |
| 2050 } |
| 2051 #endif |
| 2052 #if defined(HAS_SOBELROW_NEON) |
| 2053 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 2054 SobelRow = SobelRow_NEON; |
| 2055 } |
| 2056 #endif |
| 2057 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, |
| 2058 width, height, SobelRow); |
| 2059 } |
| 2060 |
| 2061 // Sobel ARGB effect with planar output. |
| 2062 LIBYUV_API |
| 2063 int ARGBSobelToPlane(const uint8* src_argb, int src_stride_argb, |
| 2064 uint8* dst_y, int dst_stride_y, |
| 2065 int width, int height) { |
| 2066 void (*SobelToPlaneRow)(const uint8* src_sobelx, const uint8* src_sobely, |
| 2067 uint8* dst_, int width) = SobelToPlaneRow_C; |
| 2068 #if defined(HAS_SOBELTOPLANEROW_SSE2) |
| 2069 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && |
| 2070 IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) { |
| 2071 SobelToPlaneRow = SobelToPlaneRow_SSE2; |
| 2072 } |
| 2073 #endif |
| 2074 #if defined(HAS_SOBELTOPLANEROW_NEON) |
| 2075 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) { |
| 2076 SobelToPlaneRow = SobelToPlaneRow_NEON; |
| 2077 } |
| 2078 #endif |
| 2079 return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, |
| 2080 width, height, SobelToPlaneRow); |
| 2081 } |
| 2082 |
| 2083 // SobelXY ARGB effect. |
| 2084 // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. |
| 2085 LIBYUV_API |
| 2086 int ARGBSobelXY(const uint8* src_argb, int src_stride_argb, |
| 2087 uint8* dst_argb, int dst_stride_argb, |
| 2088 int width, int height) { |
| 2089 void (*SobelXYRow)(const uint8* src_sobelx, const uint8* src_sobely, |
| 2090 uint8* dst_argb, int width) = SobelXYRow_C; |
| 2091 #if defined(HAS_SOBELXYROW_SSE2) |
| 2092 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) && |
| 2093 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) { |
| 2094 SobelXYRow = SobelXYRow_SSE2; |
| 2095 } |
| 2096 #endif |
| 2097 #if defined(HAS_SOBELXYROW_NEON) |
| 2098 if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { |
| 2099 SobelXYRow = SobelXYRow_NEON; |
| 2100 } |
| 2101 #endif |
| 2102 return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, |
| 2103 width, height, SobelXYRow); |
| 2104 } |
| 2105 |
| 2106 // Apply a 4x4 polynomial to each ARGB pixel. |
| 2107 LIBYUV_API |
| 2108 int ARGBPolynomial(const uint8* src_argb, int src_stride_argb, |
| 2109 uint8* dst_argb, int dst_stride_argb, |
| 2110 const float* poly, |
| 2111 int width, int height) { |
| 2112 int y; |
| 2113 void (*ARGBPolynomialRow)(const uint8* src_argb, |
| 2114 uint8* dst_argb, const float* poly, |
| 2115 int width) = ARGBPolynomialRow_C; |
| 2116 if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { |
| 2117 return -1; |
| 2118 } |
| 2119 // Negative height means invert the image. |
| 2120 if (height < 0) { |
| 2121 height = -height; |
| 2122 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 2123 src_stride_argb = -src_stride_argb; |
| 2124 } |
| 2125 // Coalesce rows. |
| 2126 if (src_stride_argb == width * 4 && |
| 2127 dst_stride_argb == width * 4) { |
| 2128 width *= height; |
| 2129 height = 1; |
| 2130 src_stride_argb = dst_stride_argb = 0; |
| 2131 } |
| 2132 #if defined(HAS_ARGBPOLYNOMIALROW_SSE2) |
| 2133 if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) { |
| 2134 ARGBPolynomialRow = ARGBPolynomialRow_SSE2; |
| 2135 } |
| 2136 #endif |
| 2137 #if defined(HAS_ARGBPOLYNOMIALROW_AVX2) |
| 2138 if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) && |
| 2139 IS_ALIGNED(width, 2)) { |
| 2140 ARGBPolynomialRow = ARGBPolynomialRow_AVX2; |
| 2141 } |
| 2142 #endif |
| 2143 |
| 2144 for (y = 0; y < height; ++y) { |
| 2145 ARGBPolynomialRow(src_argb, dst_argb, poly, width); |
| 2146 src_argb += src_stride_argb; |
| 2147 dst_argb += dst_stride_argb; |
| 2148 } |
| 2149 return 0; |
| 2150 } |
| 2151 |
| 2152 // Apply a lumacolortable to each ARGB pixel. |
| 2153 LIBYUV_API |
| 2154 int ARGBLumaColorTable(const uint8* src_argb, int src_stride_argb, |
| 2155 uint8* dst_argb, int dst_stride_argb, |
| 2156 const uint8* luma, |
| 2157 int width, int height) { |
| 2158 int y; |
| 2159 void (*ARGBLumaColorTableRow)(const uint8* src_argb, uint8* dst_argb, |
| 2160 int width, const uint8* luma, const uint32 lumacoeff) = |
| 2161 ARGBLumaColorTableRow_C; |
| 2162 if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { |
| 2163 return -1; |
| 2164 } |
| 2165 // Negative height means invert the image. |
| 2166 if (height < 0) { |
| 2167 height = -height; |
| 2168 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 2169 src_stride_argb = -src_stride_argb; |
| 2170 } |
| 2171 // Coalesce rows. |
| 2172 if (src_stride_argb == width * 4 && |
| 2173 dst_stride_argb == width * 4) { |
| 2174 width *= height; |
| 2175 height = 1; |
| 2176 src_stride_argb = dst_stride_argb = 0; |
| 2177 } |
| 2178 #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) |
| 2179 if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { |
| 2180 ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; |
| 2181 } |
| 2182 #endif |
| 2183 |
| 2184 for (y = 0; y < height; ++y) { |
| 2185 ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f); |
| 2186 src_argb += src_stride_argb; |
| 2187 dst_argb += dst_stride_argb; |
| 2188 } |
| 2189 return 0; |
| 2190 } |
| 2191 |
| 2192 // Copy Alpha from one ARGB image to another. |
| 2193 LIBYUV_API |
| 2194 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb, |
| 2195 uint8* dst_argb, int dst_stride_argb, |
| 2196 int width, int height) { |
| 2197 int y; |
| 2198 void (*ARGBCopyAlphaRow)(const uint8* src_argb, uint8* dst_argb, int width) = |
| 2199 ARGBCopyAlphaRow_C; |
| 2200 if (!src_argb || !dst_argb || width <= 0 || height == 0) { |
| 2201 return -1; |
| 2202 } |
| 2203 // Negative height means invert the image. |
| 2204 if (height < 0) { |
| 2205 height = -height; |
| 2206 src_argb = src_argb + (height - 1) * src_stride_argb; |
| 2207 src_stride_argb = -src_stride_argb; |
| 2208 } |
| 2209 // Coalesce rows. |
| 2210 if (src_stride_argb == width * 4 && |
| 2211 dst_stride_argb == width * 4) { |
| 2212 width *= height; |
| 2213 height = 1; |
| 2214 src_stride_argb = dst_stride_argb = 0; |
| 2215 } |
| 2216 #if defined(HAS_ARGBCOPYALPHAROW_SSE2) |
| 2217 if (TestCpuFlag(kCpuHasSSE2) && |
| 2218 IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16) && |
| 2219 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && |
| 2220 IS_ALIGNED(width, 8)) { |
| 2221 ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; |
| 2222 } |
| 2223 #endif |
| 2224 #if defined(HAS_ARGBCOPYALPHAROW_AVX2) |
| 2225 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { |
| 2226 ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; |
| 2227 } |
| 2228 #endif |
| 2229 |
| 2230 for (y = 0; y < height; ++y) { |
| 2231 ARGBCopyAlphaRow(src_argb, dst_argb, width); |
| 2232 src_argb += src_stride_argb; |
| 2233 dst_argb += dst_stride_argb; |
| 2234 } |
| 2235 return 0; |
| 2236 } |
| 2237 |
| 2238 // Copy a planar Y channel to the alpha channel of a destination ARGB image. |
| 2239 LIBYUV_API |
| 2240 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y, |
| 2241 uint8* dst_argb, int dst_stride_argb, |
| 2242 int width, int height) { |
| 2243 int y; |
| 2244 void (*ARGBCopyYToAlphaRow)(const uint8* src_y, uint8* dst_argb, int width) = |
| 2245 ARGBCopyYToAlphaRow_C; |
| 2246 if (!src_y || !dst_argb || width <= 0 || height == 0) { |
| 2247 return -1; |
| 2248 } |
| 2249 // Negative height means invert the image. |
| 2250 if (height < 0) { |
| 2251 height = -height; |
| 2252 src_y = src_y + (height - 1) * src_stride_y; |
| 2253 src_stride_y = -src_stride_y; |
| 2254 } |
| 2255 // Coalesce rows. |
| 2256 if (src_stride_y == width && |
| 2257 dst_stride_argb == width * 4) { |
| 2258 width *= height; |
| 2259 height = 1; |
| 2260 src_stride_y = dst_stride_argb = 0; |
| 2261 } |
| 2262 #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) |
| 2263 if (TestCpuFlag(kCpuHasSSE2) && |
| 2264 IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) && |
| 2265 IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16) && |
| 2266 IS_ALIGNED(width, 8)) { |
| 2267 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; |
| 2268 } |
| 2269 #endif |
| 2270 #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) |
| 2271 if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 16)) { |
| 2272 ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; |
| 2273 } |
| 2274 #endif |
| 2275 |
| 2276 for (y = 0; y < height; ++y) { |
| 2277 ARGBCopyYToAlphaRow(src_y, dst_argb, width); |
| 2278 src_y += src_stride_y; |
| 2279 dst_argb += dst_stride_argb; |
| 2280 } |
| 2281 return 0; |
| 2282 } |
| 2283 |
| 2284 #ifdef __cplusplus |
| 2285 } // extern "C" |
| 2286 } // namespace libyuv |
| 2287 #endif |
OLD | NEW |