OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 585 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
596 "sub $0x10,%2 \n" | 596 "sub $0x10,%2 \n" |
597 "jg 1b \n" | 597 "jg 1b \n" |
598 : "+r"(src_ptr), // %0 | 598 : "+r"(src_ptr), // %0 |
599 "+r"(dst_ptr), // %1 | 599 "+r"(dst_ptr), // %1 |
600 "+r"(src_width) // %2 | 600 "+r"(src_width) // %2 |
601 : | 601 : |
602 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 602 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" |
603 ); | 603 ); |
604 } | 604 } |
605 | 605 |
| 606 |
| 607 #ifdef HAS_SCALEADDROW_AVX2 |
| 608 // Reads 32 bytes and accumulates to 32 shorts at a time. |
| 609 void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { |
| 610 asm volatile ( |
| 611 "vpxor %%xmm5,%%xmm5 \n" |
| 612 |
| 613 LABELALIGN |
| 614 "1: \n" |
| 615 "vmovdqu " MEMACCESS(0) ",%%ymm3 \n" |
| 616 "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 16 |
| 617 "vpermq $0xd8,%%ymm3,%%ymm3 \n" |
| 618 "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" |
| 619 "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" |
| 620 "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n" |
| 621 "vpaddusw " MEMACCESS2(0x10,1) ",%%ymm3,%%ymm1 \n" |
| 622 "vmovdqu %%ymm0," MEMACCESS(1) " \n" |
| 623 "vmovdqu %%ymm1," MEMACCESS2(0x10,1) " \n" |
| 624 "lea " MEMLEA(0x40,1) ",%1 \n" |
| 625 "sub $0x20,%2 \n" |
| 626 "jg 1b \n" |
| 627 "vzeroupper \n" |
| 628 : "+r"(src_ptr), // %0 |
| 629 "+r"(dst_ptr), // %1 |
| 630 "+r"(src_width) // %2 |
| 631 : |
| 632 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" |
| 633 ); |
| 634 } |
| 635 #endif // HAS_SCALEADDROW_AVX2 |
| 636 |
606 // Bilinear column filtering. SSSE3 version. | 637 // Bilinear column filtering. SSSE3 version. |
607 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | 638 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, |
608 int dst_width, int x, int dx) { | 639 int dst_width, int x, int dx) { |
609 intptr_t x0 = 0, x1 = 0, temp_pixel = 0; | 640 intptr_t x0 = 0, x1 = 0, temp_pixel = 0; |
610 asm volatile ( | 641 asm volatile ( |
611 "movd %6,%%xmm2 \n" | 642 "movd %6,%%xmm2 \n" |
612 "movd %7,%%xmm3 \n" | 643 "movd %7,%%xmm3 \n" |
613 "movl $0x04040000,%k2 \n" | 644 "movl $0x04040000,%k2 \n" |
614 "movd %k2,%%xmm5 \n" | 645 "movd %k2,%%xmm5 \n" |
615 "pcmpeqb %%xmm6,%%xmm6 \n" | 646 "pcmpeqb %%xmm6,%%xmm6 \n" |
(...skipping 449 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1065 ); | 1096 ); |
1066 return num; | 1097 return num; |
1067 } | 1098 } |
1068 | 1099 |
1069 #endif // defined(__x86_64__) || defined(__i386__) | 1100 #endif // defined(__x86_64__) || defined(__i386__) |
1070 | 1101 |
1071 #ifdef __cplusplus | 1102 #ifdef __cplusplus |
1072 } // extern "C" | 1103 } // extern "C" |
1073 } // namespace libyuv | 1104 } // namespace libyuv |
1074 #endif | 1105 #endif |
OLD | NEW |