Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(165)

Side by Side Diff: source/row_gcc.cc

Issue 1505433002: AVX2 YUV alpha blender and improved unittests (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: off by 1 fix on win Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/planar_functions.cc ('k') | source/row_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 3449 matching lines...) Expand 10 before | Expand all | Expand 10 after
3460 "+r"(src_argb1), // %1 3460 "+r"(src_argb1), // %1
3461 "+r"(dst_argb), // %2 3461 "+r"(dst_argb), // %2
3462 "+r"(width) // %3 3462 "+r"(width) // %3
3463 : "m"(kShuffleAlpha) // %4 3463 : "m"(kShuffleAlpha) // %4
3464 : "memory", "cc" 3464 : "memory", "cc"
3465 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 3465 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
3466 ); 3466 );
3467 } 3467 }
3468 #endif // HAS_ARGBBLENDROW_SSSE3 3468 #endif // HAS_ARGBBLENDROW_SSSE3
3469 3469
3470
3471 #ifdef HAS_BLENDPLANEROW_SSSE3 3470 #ifdef HAS_BLENDPLANEROW_SSSE3
3472 // Blend 8 pixels at a time. 3471 // Blend 8 pixels at a time.
3473 // =((G2*C2)+(H2*(D2))+32768+127)/256 3472 // =((G2*C2)+(H2*(D2))+32768+127)/256
3474 void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1, 3473 void BlendPlaneRow_SSSE3(const uint8* src0, const uint8* src1,
3475 const uint8* alpha, uint8* dst, int width) { 3474 const uint8* alpha, uint8* dst, int width) {
3476 asm volatile ( 3475 asm volatile (
3477 "pcmpeqb %%xmm5,%%xmm5 \n" 3476 "pcmpeqb %%xmm5,%%xmm5 \n"
3478 "psllw $0x8,%%xmm5 \n" 3477 "psllw $0x8,%%xmm5 \n"
3479 "mov $0x80808080,%%eax \n" 3478 "mov $0x80808080,%%eax \n"
3480 "movd %%eax,%%xmm6 \n" 3479 "movd %%eax,%%xmm6 \n"
(...skipping 26 matching lines...) Expand all
3507 : "+r"(src0), // %0 3506 : "+r"(src0), // %0
3508 "+r"(src1), // %1 3507 "+r"(src1), // %1
3509 "+r"(alpha), // %2 3508 "+r"(alpha), // %2
3510 "+r"(dst), // %3 3509 "+r"(dst), // %3
3511 "+r"(width) // %4 3510 "+r"(width) // %4
3512 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7" 3511 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7"
3513 ); 3512 );
3514 } 3513 }
3515 #endif // HAS_BLENDPLANEROW_SSSE3 3514 #endif // HAS_BLENDPLANEROW_SSSE3
3516 3515
3516 #ifdef HAS_BLENDPLANEROW_AVX2
3517 // Blend 16 pixels at a time.
3518 // =((G2*C2)+(H2*(D2))+32768+127)/256
3519 void BlendPlaneRow_AVX2(const uint8* src0, const uint8* src1,
3520 const uint8* alpha, uint8* dst, int width) {
3521 asm volatile (
3522 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
3523 "vpsllw $0x8,%%ymm5,%%ymm5 \n"
3524 "mov $0x80808080,%%eax \n"
3525 "vmovd %%eax,%%xmm6 \n"
3526 "vbroadcastss %%xmm6,%%ymm6 \n"
3527 "mov $0x807f807f,%%eax \n"
3528 "vmovd %%eax,%%xmm7 \n"
3529 "vbroadcastss %%xmm7,%%ymm7 \n"
3530 "sub %2,%0 \n"
3531 "sub %2,%1 \n"
3532 "sub %2,%3 \n"
3533
3534 // 16 pixel loop.
3535 LABELALIGN
3536 "1: \n"
3537 "vmovdqu (%2),%%xmm0 \n"
3538 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
3539 "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n"
3540 "vpxor %%ymm5,%%ymm0,%%ymm0 \n"
3541 "vmovdqu (%0,%2,1),%%xmm1 \n"
3542 "vmovdqu (%1,%2,1),%%xmm2 \n"
3543 "vpermq $0xd8,%%ymm1,%%ymm1 \n"
3544 "vpermq $0xd8,%%ymm2,%%ymm2 \n"
3545 "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n"
3546 "vpsubb %%ymm6,%%ymm1,%%ymm1 \n"
3547 "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n"
3548 "vpaddw %%ymm7,%%ymm0,%%ymm0 \n"
3549 "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
3550 "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
3551 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
3552 "vmovdqu %%xmm0,(%3,%2,1) \n"
3553 "lea 0x10(%2),%2 \n"
3554 "sub $0x10,%4 \n"
3555 "jg 1b \n"
3556 "vzeroupper \n"
3557 : "+r"(src0), // %0
3558 "+r"(src1), // %1
3559 "+r"(alpha), // %2
3560 "+r"(dst), // %3
3561 "+r"(width) // %4
3562 :: "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7"
3563 );
3564 }
3565 #endif // HAS_BLENDPLANEROW_AVX2
3517 3566
3518 #ifdef HAS_ARGBATTENUATEROW_SSSE3 3567 #ifdef HAS_ARGBATTENUATEROW_SSSE3
3519 // Shuffle table duplicating alpha 3568 // Shuffle table duplicating alpha
3520 static uvec8 kShuffleAlpha0 = { 3569 static uvec8 kShuffleAlpha0 = {
3521 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u 3570 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u
3522 }; 3571 };
3523 static uvec8 kShuffleAlpha1 = { 3572 static uvec8 kShuffleAlpha1 = {
3524 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 3573 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u,
3525 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u 3574 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u
3526 }; 3575 };
(...skipping 2011 matching lines...) Expand 10 before | Expand all | Expand 10 after
5538 ); 5587 );
5539 } 5588 }
5540 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5589 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5541 5590
5542 #endif // defined(__x86_64__) || defined(__i386__) 5591 #endif // defined(__x86_64__) || defined(__i386__)
5543 5592
5544 #ifdef __cplusplus 5593 #ifdef __cplusplus
5545 } // extern "C" 5594 } // extern "C"
5546 } // namespace libyuv 5595 } // namespace libyuv
5547 #endif 5596 #endif
OLDNEW
« no previous file with comments | « source/planar_functions.cc ('k') | source/row_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698