OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 3449 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3460 "+r"(src_argb1), // %1 | 3460 "+r"(src_argb1), // %1 |
3461 "+r"(dst_argb), // %2 | 3461 "+r"(dst_argb), // %2 |
3462 "+r"(width) // %3 | 3462 "+r"(width) // %3 |
3463 : "m"(kShuffleAlpha) // %4 | 3463 : "m"(kShuffleAlpha) // %4 |
3464 : "memory", "cc" | 3464 : "memory", "cc" |
3465 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 3465 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
3466 ); | 3466 ); |
3467 } | 3467 } |
3468 #endif // HAS_ARGBBLENDROW_SSSE3 | 3468 #endif // HAS_ARGBBLENDROW_SSSE3 |
3469 | 3469 |
| 3470 |
| 3471 #ifdef HAS_BLENDPLANEROW_SSSE3 |
| 3472 // Blend 8 pixels at a time. |
| 3473 // =((G2*C2)+(H2*(D2))+32768+127)/256 |
| 3474 void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, |
| 3475 const uint8* alpha, uint8* dst, int width) { |
| 3476 asm volatile ( |
| 3477 "pcmpeqb %%xmm5,%%xmm5 \n" |
| 3478 "psllw $0x8,%%xmm5 \n" |
| 3479 "mov $0x80808080,%%eax \n" |
| 3480 "movd %%eax,%%xmm6 \n" |
| 3481 "pshufd $0x0,%%xmm6,%%xmm6 \n" |
| 3482 "mov $0x807f807f,%%eax \n" |
| 3483 "movd %%eax,%%xmm7 \n" |
| 3484 "pshufd $0x0,%%xmm7,%%xmm7 \n" |
| 3485 "sub %2,%0 \n" |
| 3486 "sub %2,%1 \n" |
| 3487 "sub %2,%3 \n" |
| 3488 |
| 3489 // 8 pixel loop. |
| 3490 LABELALIGN |
| 3491 "1: \n" |
| 3492 "movq (%2),%%xmm0 \n" |
| 3493 "punpcklbw %%xmm0,%%xmm0 \n" |
| 3494 "pxor %%xmm5,%%xmm0 \n" |
| 3495 "movq (%0,%2,1),%%xmm1 \n" |
| 3496 "movq (%1,%2,1),%%xmm2 \n" |
| 3497 "punpcklbw %%xmm2,%%xmm1 \n" |
| 3498 "psubb %%xmm6,%%xmm1 \n" |
| 3499 "pmaddubsw %%xmm1,%%xmm0 \n" |
| 3500 "paddw %%xmm7,%%xmm0 \n" |
| 3501 "psrlw $0x8,%%xmm0 \n" |
| 3502 "packuswb %%xmm0,%%xmm0 \n" |
| 3503 "movq %%xmm0,(%3,%2,1) \n" |
| 3504 "lea 0x8(%2),%2 \n" |
| 3505 "sub $0x8,%4 \n" |
| 3506 "jg 1b \n" |
| 3507 : "+r"(src0), // %0 |
| 3508 "+r"(src1), // %1 |
| 3509 "+r"(alpha), // %2 |
| 3510 "+r"(dst), // %3 |
| 3511 "+r"(width) // %4 |
| 3512 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" |
| 3513 ); |
| 3514 } |
| 3515 #endif // HAS_BLENDPLANEROW_SSSE3 |
| 3516 |
| 3517 |
3470 #ifdef HAS_ARGBATTENUATEROW_SSSE3 | 3518 #ifdef HAS_ARGBATTENUATEROW_SSSE3 |
3471 // Shuffle table duplicating alpha | 3519 // Shuffle table duplicating alpha |
3472 static uvec8 kShuffleAlpha0 = { | 3520 static uvec8 kShuffleAlpha0 = { |
3473 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u | 3521 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u |
3474 }; | 3522 }; |
3475 static uvec8 kShuffleAlpha1 = { | 3523 static uvec8 kShuffleAlpha1 = { |
3476 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, | 3524 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, |
3477 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u | 3525 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u |
3478 }; | 3526 }; |
3479 // Attenuate 4 pixels at a time. | 3527 // Attenuate 4 pixels at a time. |
(...skipping 2010 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5490 ); | 5538 ); |
5491 } | 5539 } |
5492 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5540 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5493 | 5541 |
5494 #endif // defined(__x86_64__) || defined(__i386__) | 5542 #endif // defined(__x86_64__) || defined(__i386__) |
5495 | 5543 |
5496 #ifdef __cplusplus | 5544 #ifdef __cplusplus |
5497 } // extern "C" | 5545 } // extern "C" |
5498 } // namespace libyuv | 5546 } // namespace libyuv |
5499 #endif | 5547 #endif |
OLD | NEW |