Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(156)

Side by Side Diff: source/row_gcc.cc

Issue 2420553002: Add ARGBExtractAlpha_AVX2 function (Closed)
Patch Set: bump version Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 2842 matching lines...) Expand 10 before | Expand all | Expand 10 after
2853 : "+r"(src_argb), // %0 2853 : "+r"(src_argb), // %0
2854 "+r"(dst_a), // %1 2854 "+r"(dst_a), // %1
2855 "+rm"(width) // %2 2855 "+rm"(width) // %2
2856 : 2856 :
2857 : "memory", "cc" 2857 : "memory", "cc"
2858 , "xmm0", "xmm1" 2858 , "xmm0", "xmm1"
2859 ); 2859 );
2860 } 2860 }
2861 #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 2861 #endif // HAS_ARGBEXTRACTALPHAROW_SSE2
2862 2862
2863 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
2864 static const uvec8 kShuffleAlphaShort_AVX2 = {
2865 3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u,
2866 11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u
2867 };
2868
2869 void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
2870 asm volatile (
2871 "vmovdqa %3,%%ymm4 \n"
2872 "vbroadcastf128 %4,%%ymm5 \n"
2873 LABELALIGN
2874 "1: \n"
2875 "vmovdqu " MEMACCESS(0) ", %%ymm0 \n"
2876 "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n"
2877 "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0
2878 "vpshufb %%ymm5,%%ymm1,%%ymm1 \n"
2879 "vmovdqu " MEMACCESS2(0x40, 0) ", %%ymm2 \n"
2880 "vmovdqu " MEMACCESS2(0x60, 0) ", %%ymm3 \n"
2881 "lea " MEMLEA(0x80, 0) ", %0 \n"
2882 "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates
2883 "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
2884 "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
2885 "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates
2886 "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates.
2887 "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate.
2888 "vmovdqu %%ymm0," MEMACCESS(1) " \n"
2889 "lea " MEMLEA(0x20,1) ",%1 \n"
2890 "sub $0x20, %2 \n"
2891 "jg 1b \n"
2892 "vzeroupper \n"
2893 : "+r"(src_argb), // %0
2894 "+r"(dst_a), // %1
2895 "+rm"(width) // %2
2896 : "m"(kPermdARGBToY_AVX), // %3
2897 "m"(kShuffleAlphaShort_AVX2) // %4
2898 : "memory", "cc"
2899 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
2900 );
2901 }
2902 #endif // HAS_ARGBEXTRACTALPHAROW_AVX2
2903
2863 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 2904 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
2864 // width in pixels 2905 // width in pixels
2865 void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { 2906 void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
2866 asm volatile ( 2907 asm volatile (
2867 "pcmpeqb %%xmm0,%%xmm0 \n" 2908 "pcmpeqb %%xmm0,%%xmm0 \n"
2868 "pslld $0x18,%%xmm0 \n" 2909 "pslld $0x18,%%xmm0 \n"
2869 "pcmpeqb %%xmm1,%%xmm1 \n" 2910 "pcmpeqb %%xmm1,%%xmm1 \n"
2870 "psrld $0x8,%%xmm1 \n" 2911 "psrld $0x8,%%xmm1 \n"
2871 LABELALIGN 2912 LABELALIGN
2872 "1: \n" 2913 "1: \n"
(...skipping 2618 matching lines...) Expand 10 before | Expand all | Expand 10 after
5491 ); 5532 );
5492 } 5533 }
5493 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5534 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5494 5535
5495 #endif // defined(__x86_64__) || defined(__i386__) 5536 #endif // defined(__x86_64__) || defined(__i386__)
5496 5537
5497 #ifdef __cplusplus 5538 #ifdef __cplusplus
5498 } // extern "C" 5539 } // extern "C"
5499 } // namespace libyuv 5540 } // namespace libyuv
5500 #endif 5541 #endif
OLDNEW
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698