OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 3513 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3524 "mov $0x80808080,%%eax \n" | 3524 "mov $0x80808080,%%eax \n" |
3525 "vmovd %%eax,%%xmm6 \n" | 3525 "vmovd %%eax,%%xmm6 \n" |
3526 "vbroadcastss %%xmm6,%%ymm6 \n" | 3526 "vbroadcastss %%xmm6,%%ymm6 \n" |
3527 "mov $0x807f807f,%%eax \n" | 3527 "mov $0x807f807f,%%eax \n" |
3528 "vmovd %%eax,%%xmm7 \n" | 3528 "vmovd %%eax,%%xmm7 \n" |
3529 "vbroadcastss %%xmm7,%%ymm7 \n" | 3529 "vbroadcastss %%xmm7,%%ymm7 \n" |
3530 "sub %2,%0 \n" | 3530 "sub %2,%0 \n" |
3531 "sub %2,%1 \n" | 3531 "sub %2,%1 \n" |
3532 "sub %2,%3 \n" | 3532 "sub %2,%3 \n" |
3533 | 3533 |
3534 // 16 pixel loop. | 3534 // 32 pixel loop. |
3535 LABELALIGN | 3535 LABELALIGN |
3536 "1: \n" | 3536 "1: \n" |
3537 "vmovdqu (%2),%%xmm0 \n" | 3537 "vmovdqu (%2),%%ymm0 \n" |
3538 "vpermq $0xd8,%%ymm0,%%ymm0 \n" | 3538 "vpunpckhbw %%ymm0,%%ymm0,%%ymm3 \n" |
3539 "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" | 3539 "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" |
| 3540 "vpxor %%ymm5,%%ymm3,%%ymm3 \n" |
3540 "vpxor %%ymm5,%%ymm0,%%ymm0 \n" | 3541 "vpxor %%ymm5,%%ymm0,%%ymm0 \n" |
3541 "vmovdqu (%0,%2,1),%%xmm1 \n" | 3542 "vmovdqu (%0,%2,1),%%ymm1 \n" |
3542 "vmovdqu (%1,%2,1),%%xmm2 \n" | 3543 "vmovdqu (%1,%2,1),%%ymm2 \n" |
3543 "vpermq $0xd8,%%ymm1,%%ymm1 \n" | 3544 "vpunpckhbw %%ymm2,%%ymm1,%%ymm4 \n" |
3544 "vpermq $0xd8,%%ymm2,%%ymm2 \n" | |
3545 "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" | 3545 "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" |
| 3546 "vpsubb %%ymm6,%%ymm1,%%ymm4 \n" |
3546 "vpsubb %%ymm6,%%ymm1,%%ymm1 \n" | 3547 "vpsubb %%ymm6,%%ymm1,%%ymm1 \n" |
| 3548 "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" |
3547 "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n" | 3549 "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n" |
| 3550 "vpaddw %%ymm7,%%ymm3,%%ymm3 \n" |
3548 "vpaddw %%ymm7,%%ymm0,%%ymm0 \n" | 3551 "vpaddw %%ymm7,%%ymm0,%%ymm0 \n" |
| 3552 "vpsrlw $0x8,%%ymm3,%%ymm3 \n" |
3549 "vpsrlw $0x8,%%ymm0,%%ymm0 \n" | 3553 "vpsrlw $0x8,%%ymm0,%%ymm0 \n" |
3550 "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" | 3554 "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" |
3551 "vpermq $0xd8,%%ymm0,%%ymm0 \n" | 3555 "vmovdqu %%ymm0,(%3,%2,1) \n" |
3552 "vmovdqu %%xmm0,(%3,%2,1) \n" | 3556 "lea 0x20(%2),%2 \n" |
3553 "lea 0x10(%2),%2 \n" | 3557 "sub $0x20,%4 \n" |
3554 "sub $0x10,%4 \n" | |
3555 "jg 1b \n" | 3558 "jg 1b \n" |
3556 "vzeroupper \n" | 3559 "vzeroupper \n" |
3557 : "+r"(src0), // %0 | 3560 : "+r"(src0), // %0 |
3558 "+r"(src1), // %1 | 3561 "+r"(src1), // %1 |
3559 "+r"(alpha), // %2 | 3562 "+r"(alpha), // %2 |
3560 "+r"(dst), // %3 | 3563 "+r"(dst), // %3 |
3561 "+r"(width) // %4 | 3564 "+r"(width) // %4 |
3562 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" | 3565 :: "memory", "cc", "eax", |
| 3566 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
3563 ); | 3567 ); |
3564 } | 3568 } |
3565 #endif // HAS_BLENDPLANEROW_AVX2 | 3569 #endif // HAS_BLENDPLANEROW_AVX2 |
3566 | 3570 |
3567 #ifdef HAS_ARGBATTENUATEROW_SSSE3 | 3571 #ifdef HAS_ARGBATTENUATEROW_SSSE3 |
3568 // Shuffle table duplicating alpha | 3572 // Shuffle table duplicating alpha |
3569 static uvec8 kShuffleAlpha0 = { | 3573 static uvec8 kShuffleAlpha0 = { |
3570 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u | 3574 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u |
3571 }; | 3575 }; |
3572 static uvec8 kShuffleAlpha1 = { | 3576 static uvec8 kShuffleAlpha1 = { |
(...skipping 2014 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5587 ); | 5591 ); |
5588 } | 5592 } |
5589 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5593 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5590 | 5594 |
5591 #endif // defined(__x86_64__) || defined(__i386__) | 5595 #endif // defined(__x86_64__) || defined(__i386__) |
5592 | 5596 |
5593 #ifdef __cplusplus | 5597 #ifdef __cplusplus |
5594 } // extern "C" | 5598 } // extern "C" |
5595 } // namespace libyuv | 5599 } // namespace libyuv |
5596 #endif | 5600 #endif |
OLD | NEW |