OLD | NEW |
---|---|
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 2842 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2853 : "+r"(src_argb), // %0 | 2853 : "+r"(src_argb), // %0 |
2854 "+r"(dst_a), // %1 | 2854 "+r"(dst_a), // %1 |
2855 "+rm"(width) // %2 | 2855 "+rm"(width) // %2 |
2856 : | 2856 : |
2857 : "memory", "cc" | 2857 : "memory", "cc" |
2858 , "xmm0", "xmm1" | 2858 , "xmm0", "xmm1" |
2859 ); | 2859 ); |
2860 } | 2860 } |
2861 #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 | 2861 #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 |
2862 | 2862 |
2863 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 | |
2864 void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) { | |
2865 asm volatile ( | |
2866 "vmovdqa %3,%%ymm6 \n" | |
2867 LABELALIGN | |
2868 "1: \n" | |
2869 "vmovdqu " MEMACCESS(0) ", %%ymm0 \n" | |
2870 "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n" | |
2871 "lea " MEMLEA(0x40, 0) ", %0 \n" | |
2872 "vpsrld $0x18, %%ymm0, %%ymm0 \n" | |
2873 "vpsrld $0x18, %%ymm1, %%ymm1 \n" | |
2874 "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates | |
2875 "vpackuswb %%ymm0, %%ymm0, %%ymm0 \n" // mutates | |
2876 "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. | |
2877 "vmovdqu %%xmm0," MEMACCESS(1) " \n" | |
2878 "lea " MEMLEA(0x10, 1) ", %1 \n" | |
2879 "sub $0x10, %2 \n" | |
2880 "jg 1b \n" | |
2881 "vzeroupper \n" | |
2882 : "+r"(src_argb), // %0 | |
2883 "+r"(dst_a), // %1 | |
2884 "+rm"(width) // %2 | |
wangcheng
2016/10/13 19:52:18
Do you need to check "width" is multiply of 16?
fbarchard1
2016/10/13 21:46:27
planar_functions.cc does that check:
#if defined(
| |
2885 : "m"(kPermdARGBToY_AVX) // %3 | |
2886 : "memory", "cc" | |
2887 , "xmm0", "xmm1", "xmm6" | |
2888 ); | |
2889 } | |
2890 #endif // HAS_ARGBEXTRACTALPHAROW_AVX2 | |
2891 | |
2892 | |
2863 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 | 2893 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 |
2864 // width in pixels | 2894 // width in pixels |
2865 void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { | 2895 void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { |
2866 asm volatile ( | 2896 asm volatile ( |
2867 "pcmpeqb %%xmm0,%%xmm0 \n" | 2897 "pcmpeqb %%xmm0,%%xmm0 \n" |
2868 "pslld $0x18,%%xmm0 \n" | 2898 "pslld $0x18,%%xmm0 \n" |
2869 "pcmpeqb %%xmm1,%%xmm1 \n" | 2899 "pcmpeqb %%xmm1,%%xmm1 \n" |
2870 "psrld $0x8,%%xmm1 \n" | 2900 "psrld $0x8,%%xmm1 \n" |
2871 LABELALIGN | 2901 LABELALIGN |
2872 "1: \n" | 2902 "1: \n" |
(...skipping 2618 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5491 ); | 5521 ); |
5492 } | 5522 } |
5493 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5523 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5494 | 5524 |
5495 #endif // defined(__x86_64__) || defined(__i386__) | 5525 #endif // defined(__x86_64__) || defined(__i386__) |
5496 | 5526 |
5497 #ifdef __cplusplus | 5527 #ifdef __cplusplus |
5498 } // extern "C" | 5528 } // extern "C" |
5499 } // namespace libyuv | 5529 } // namespace libyuv |
5500 #endif | 5530 #endif |
OLD | NEW |