OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 548 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
559 "por %%xmm1,%%xmm0 \n" | 559 "por %%xmm1,%%xmm0 \n" |
560 "packssdw %%xmm0,%%xmm0 \n" | 560 "packssdw %%xmm0,%%xmm0 \n" |
561 "lea 0x10(%0),%0 \n" | 561 "lea 0x10(%0),%0 \n" |
562 "movq %%xmm0,(%1) \n" | 562 "movq %%xmm0,(%1) \n" |
563 "lea 0x8(%1),%1 \n" | 563 "lea 0x8(%1),%1 \n" |
564 "sub $0x4,%2 \n" | 564 "sub $0x4,%2 \n" |
565 "jg 1b \n" | 565 "jg 1b \n" |
566 : "+r"(src), // %0 | 566 : "+r"(src), // %0 |
567 "+r"(dst), // %1 | 567 "+r"(dst), // %1 |
568 "+r"(pix) // %2 | 568 "+r"(pix) // %2 |
569 : "m"(dither4) // %3 | 569 : "rm"(dither4) // %3 |
570 : "memory", "cc", | 570 : "memory", "cc", |
571 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 571 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
572 ); | 572 ); |
573 } | 573 } |
574 | 574 |
| 575 #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 |
| 576 void ARGBToRGB565DitherRow_AVX2(const uint8* src, uint8* dst, |
| 577 const uint32 dither4, int pix) { |
| 578 asm volatile ( |
| 579 "vbroadcastss %3,%%xmm6 \n" |
| 580 "vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n" |
| 581 "vpermq $0xd8,%%ymm6,%%ymm6 \n" |
| 582 "vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n" |
| 583 "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n" |
| 584 "vpsrld $0x1b,%%ymm3,%%ymm3 \n" |
| 585 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" |
| 586 "vpsrld $0x1a,%%ymm4,%%ymm4 \n" |
| 587 "vpslld $0x5,%%ymm4,%%ymm4 \n" |
| 588 "vpslld $0xb,%%ymm3,%%ymm5 \n" |
| 589 |
| 590 LABELALIGN |
| 591 "1: \n" |
| 592 "vmovdqu (%0),%%ymm0 \n" |
| 593 "vpaddusb %%ymm6,%%ymm0,%%ymm0 \n" |
| 594 "vpsrld $0x5,%%ymm0,%%ymm2 \n" |
| 595 "vpsrld $0x3,%%ymm0,%%ymm1 \n" |
| 596 "vpsrld $0x8,%%ymm0,%%ymm0 \n" |
| 597 "vpand %%ymm4,%%ymm2,%%ymm2 \n" |
| 598 "vpand %%ymm3,%%ymm1,%%ymm1 \n" |
| 599 "vpand %%ymm5,%%ymm0,%%ymm0 \n" |
| 600 "vpor %%ymm2,%%ymm1,%%ymm1 \n" |
| 601 "vpor %%ymm1,%%ymm0,%%ymm0 \n" |
| 602 "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" |
| 603 "vpermq $0xd8,%%ymm0,%%ymm0 \n" |
| 604 "lea 0x20(%0),%0 \n" |
| 605 "vmovdqu %%xmm0,(%1) \n" |
| 606 "lea 0x10(%1),%1 \n" |
| 607 "sub $0x8,%2 \n" |
| 608 "jg 1b \n" |
| 609 "vzeroupper \n" |
| 610 : "+r"(src), // %0 |
| 611 "+r"(dst), // %1 |
| 612 "+r"(pix) // %2 |
| 613 : "rm"(dither4) // %3 |
| 614 : "memory", "cc", |
| 615 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
| 616 ); |
| 617 } |
| 618 #endif // HAS_ARGBTORGB565DITHERROW_AVX2 |
| 619 |
| 620 |
575 void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { | 621 void ARGBToARGB1555Row_SSE2(const uint8* src, uint8* dst, int pix) { |
576 asm volatile ( | 622 asm volatile ( |
577 "pcmpeqb %%xmm4,%%xmm4 \n" | 623 "pcmpeqb %%xmm4,%%xmm4 \n" |
578 "psrld $0x1b,%%xmm4 \n" | 624 "psrld $0x1b,%%xmm4 \n" |
579 "movdqa %%xmm4,%%xmm5 \n" | 625 "movdqa %%xmm4,%%xmm5 \n" |
580 "pslld $0x5,%%xmm5 \n" | 626 "pslld $0x5,%%xmm5 \n" |
581 "movdqa %%xmm4,%%xmm6 \n" | 627 "movdqa %%xmm4,%%xmm6 \n" |
582 "pslld $0xa,%%xmm6 \n" | 628 "pslld $0xa,%%xmm6 \n" |
583 "pcmpeqb %%xmm7,%%xmm7 \n" | 629 "pcmpeqb %%xmm7,%%xmm7 \n" |
584 "pslld $0xf,%%xmm7 \n" | 630 "pslld $0xf,%%xmm7 \n" |
(...skipping 4953 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5538 ); | 5584 ); |
5539 } | 5585 } |
5540 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5586 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5541 | 5587 |
5542 #endif // defined(__x86_64__) || defined(__i386__) | 5588 #endif // defined(__x86_64__) || defined(__i386__) |
5543 | 5589 |
5544 #ifdef __cplusplus | 5590 #ifdef __cplusplus |
5545 } // extern "C" | 5591 } // extern "C" |
5546 } // namespace libyuv | 5592 } // namespace libyuv |
5547 #endif | 5593 #endif |
OLD | NEW |