Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(53)

Side by Side Diff: source/row_gcc.cc

Issue 1391273003: port ARGB to 565 dithering AVX2 code to GCC. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 548 matching lines...) Expand 10 before | Expand all | Expand 10 after
559 "por %%xmm1,%%xmm0 \n" 559 "por %%xmm1,%%xmm0 \n"
560 "packssdw %%xmm0,%%xmm0 \n" 560 "packssdw %%xmm0,%%xmm0 \n"
561 "lea 0x10(%0),%0 \n" 561 "lea 0x10(%0),%0 \n"
562 "movq %%xmm0,(%1) \n" 562 "movq %%xmm0,(%1) \n"
563 "lea 0x8(%1),%1 \n" 563 "lea 0x8(%1),%1 \n"
564 "sub $0x4,%2 \n" 564 "sub $0x4,%2 \n"
565 "jg 1b \n" 565 "jg 1b \n"
566 : "+r"(src), // %0 566 : "+r"(src), // %0
567 "+r"(dst), // %1 567 "+r"(dst), // %1
568 "+r"(pix) // %2 568 "+r"(pix) // %2
569 : "m"(dither4) // %3 569 : "rm"(dither4) // %3
570 : "memory", "cc", 570 : "memory", "cc",
571 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 571 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
572 ); 572 );
573 } 573 }
574 574
575 #ifdef HAS_ARGBTORGB565DITHERROW_AVX2
576 void ARGBToRGB565DitherRow_AVX2(const uint8* src, uint8* dst,
577 const uint32 dither4, int pix) {
578 asm volatile (
579 "vbroadcastss %3,%%xmm6 \n"
580 "vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n"
581 "vpermq $0xd8,%%ymm6,%%ymm6 \n"
582 "vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n"
583 "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n"
584 "vpsrld $0x1b,%%ymm3,%%ymm3 \n"
585 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
586 "vpsrld $0x1a,%%ymm4,%%ymm4 \n"
587 "vpslld $0x5,%%ymm4,%%ymm4 \n"
588 "vpslld $0xb,%%ymm3,%%ymm5 \n"
589
590 LABELALIGN
591 "1: \n"
592 "vmovdqu (%0),%%ymm0 \n"
593 "vpaddusb %%ymm6,%%ymm0,%%ymm0 \n"
594 "vpsrld $0x5,%%ymm0,%%ymm2 \n"
595 "vpsrld $0x3,%%ymm0,%%ymm1 \n"
596 "vpsrld $0x8,%%ymm0,%%ymm0 \n"
597 "vpand %%ymm4,%%ymm2,%%ymm2 \n"
598 "vpand %%ymm3,%%ymm1,%%ymm1 \n"
599 "vpand %%ymm5,%%ymm0,%%ymm0 \n"
600 "vpor %%ymm2,%%ymm1,%%ymm1 \n"
601 "vpor %%ymm1,%%ymm0,%%ymm0 \n"
602 "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n"
603 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
604 "lea 0x20(%0),%0 \n"
605 "vmovdqu %%xmm0,(%1) \n"
606 "lea 0x10(%1),%1 \n"
607 "sub $0x8,%2 \n"
608 "jg 1b \n"
609 "vzeroupper \n"
610 : "+r"(src), // %0
611 "+r"(dst), // %1
612 "+r"(pix) // %2
613 : "rm"(dither4) // %3
614 : "memory", "cc",
615 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
616 );
617 }
618 #endif // HAS_ARGBTORGB565DITHERROW_AVX2
619
620
575 void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { 621 void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) {
576 asm volatile ( 622 asm volatile (
577 "pcmpeqb %%xmm4,%%xmm4 \n" 623 "pcmpeqb %%xmm4,%%xmm4 \n"
578 "psrld $0x1b,%%xmm4 \n" 624 "psrld $0x1b,%%xmm4 \n"
579 "movdqa %%xmm4,%%xmm5 \n" 625 "movdqa %%xmm4,%%xmm5 \n"
580 "pslld $0x5,%%xmm5 \n" 626 "pslld $0x5,%%xmm5 \n"
581 "movdqa %%xmm4,%%xmm6 \n" 627 "movdqa %%xmm4,%%xmm6 \n"
582 "pslld $0xa,%%xmm6 \n" 628 "pslld $0xa,%%xmm6 \n"
583 "pcmpeqb %%xmm7,%%xmm7 \n" 629 "pcmpeqb %%xmm7,%%xmm7 \n"
584 "pslld $0xf,%%xmm7 \n" 630 "pslld $0xf,%%xmm7 \n"
(...skipping 4953 matching lines...) Expand 10 before | Expand all | Expand 10 after
5538 ); 5584 );
5539 } 5585 }
5540 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5541 5587
5542 #endif // defined(__x86_64__) || defined(__i386__) 5588 #endif // defined(__x86_64__) || defined(__i386__)
5543 5589
5544 #ifdef __cplusplus 5590 #ifdef __cplusplus
5545 } // extern "C" 5591 } // extern "C"
5546 } // namespace libyuv 5592 } // namespace libyuv
5547 #endif 5593 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698