OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 2842 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2853 : "+r"(src_argb), // %0 | 2853 : "+r"(src_argb), // %0 |
2854 "+r"(dst_a), // %1 | 2854 "+r"(dst_a), // %1 |
2855 "+rm"(width) // %2 | 2855 "+rm"(width) // %2 |
2856 : | 2856 : |
2857 : "memory", "cc" | 2857 : "memory", "cc" |
2858 , "xmm0", "xmm1" | 2858 , "xmm0", "xmm1" |
2859 ); | 2859 ); |
2860 } | 2860 } |
2861 #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 | 2861 #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 |
2862 | 2862 |
| 2863 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 |
| 2864 static const uvec8 kShuffleAlphaShort_AVX2 = { |
| 2865 3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u, |
| 2866 11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u |
| 2867 }; |
| 2868 |
| 2869 void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) { |
| 2870 asm volatile ( |
| 2871 "vmovdqa %3,%%ymm4 \n" |
| 2872 "vbroadcastf128 %4,%%ymm5 \n" |
| 2873 LABELALIGN |
| 2874 "1: \n" |
| 2875 "vmovdqu " MEMACCESS(0) ", %%ymm0 \n" |
| 2876 "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n" |
| 2877 "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0 |
| 2878 "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" |
| 2879 "vmovdqu " MEMACCESS2(0x40, 0) ", %%ymm2 \n" |
| 2880 "vmovdqu " MEMACCESS2(0x60, 0) ", %%ymm3 \n" |
| 2881 "lea " MEMLEA(0x80, 0) ", %0 \n" |
| 2882 "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates |
| 2883 "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" |
| 2884 "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" |
| 2885 "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates |
| 2886 "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. |
| 2887 "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate. |
| 2888 "vmovdqu %%ymm0," MEMACCESS(1) " \n" |
| 2889 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 2890 "sub $0x20, %2 \n" |
| 2891 "jg 1b \n" |
| 2892 "vzeroupper \n" |
| 2893 : "+r"(src_argb), // %0 |
| 2894 "+r"(dst_a), // %1 |
| 2895 "+rm"(width) // %2 |
| 2896 : "m"(kPermdARGBToY_AVX), // %3 |
| 2897 "m"(kShuffleAlphaShort_AVX2) // %4 |
| 2898 : "memory", "cc" |
| 2899 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 2900 ); |
| 2901 } |
| 2902 #endif // HAS_ARGBEXTRACTALPHAROW_AVX2 |
| 2903 |
2863 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 | 2904 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 |
2864 // width in pixels | 2905 // width in pixels |
2865 void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { | 2906 void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { |
2866 asm volatile ( | 2907 asm volatile ( |
2867 "pcmpeqb %%xmm0,%%xmm0 \n" | 2908 "pcmpeqb %%xmm0,%%xmm0 \n" |
2868 "pslld $0x18,%%xmm0 \n" | 2909 "pslld $0x18,%%xmm0 \n" |
2869 "pcmpeqb %%xmm1,%%xmm1 \n" | 2910 "pcmpeqb %%xmm1,%%xmm1 \n" |
2870 "psrld $0x8,%%xmm1 \n" | 2911 "psrld $0x8,%%xmm1 \n" |
2871 LABELALIGN | 2912 LABELALIGN |
2872 "1: \n" | 2913 "1: \n" |
(...skipping 2618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5491 ); | 5532 ); |
5492 } | 5533 } |
5493 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5534 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5494 | 5535 |
5495 #endif // defined(__x86_64__) || defined(__i386__) | 5536 #endif // defined(__x86_64__) || defined(__i386__) |
5496 | 5537 |
5497 #ifdef __cplusplus | 5538 #ifdef __cplusplus |
5498 } // extern "C" | 5539 } // extern "C" |
5499 } // namespace libyuv | 5540 } // namespace libyuv |
5500 #endif | 5541 #endif |
OLD | NEW |