Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(52)

Side by Side Diff: source/scale_gcc.cc

Issue 2084533006: YUV scale filter columns improved filtering accuracy (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: bump version Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/scale_common.cc ('k') | source/scale_neon.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 803 matching lines...) Expand 10 before | Expand all | Expand 10 after
814 "vzeroupper \n" 814 "vzeroupper \n"
815 : "+r"(src_ptr), // %0 815 : "+r"(src_ptr), // %0
816 "+r"(dst_ptr), // %1 816 "+r"(dst_ptr), // %1
817 "+r"(src_width) // %2 817 "+r"(src_width) // %2
818 : 818 :
819 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 819 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
820 ); 820 );
821 } 821 }
822 #endif // HAS_SCALEADDROW_AVX2 822 #endif // HAS_SCALEADDROW_AVX2
823 823
824 // Constant for making pixels signed to avoid pmaddubsw
825 // saturation.
826 static uvec8 kFsub80 =
827 { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
828 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
829
830 // Constant for making pixels unsigned and adding .5 for rounding.
831 static uvec16 kFadd40 =
832 { 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040 };
833
824 // Bilinear column filtering. SSSE3 version. 834 // Bilinear column filtering. SSSE3 version.
825 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 835 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
826 int dst_width, int x, int dx) { 836 int dst_width, int x, int dx) {
827 intptr_t x0, x1, temp_pixel; 837 intptr_t x0, x1, temp_pixel;
828 asm volatile ( 838 asm volatile (
829 "movd %6,%%xmm2 \n" 839 "movd %6,%%xmm2 \n"
830 "movd %7,%%xmm3 \n" 840 "movd %7,%%xmm3 \n"
831 "movl $0x04040000,%k2 \n" 841 "movl $0x04040000,%k2 \n"
832 "movd %k2,%%xmm5 \n" 842 "movd %k2,%%xmm5 \n"
833 "pcmpeqb %%xmm6,%%xmm6 \n" 843 "pcmpeqb %%xmm6,%%xmm6 \n"
834 "psrlw $0x9,%%xmm6 \n" 844 "psrlw $0x9,%%xmm6 \n" // 0x007f007f
845 "pcmpeqb %%xmm7,%%xmm7 \n"
846 "psrlw $15,%%xmm7 \n" // 0x00010001
847
835 "pextrw $0x1,%%xmm2,%k3 \n" 848 "pextrw $0x1,%%xmm2,%k3 \n"
836 "subl $0x2,%5 \n" 849 "subl $0x2,%5 \n"
837 "jl 29f \n" 850 "jl 29f \n"
838 "movdqa %%xmm2,%%xmm0 \n" 851 "movdqa %%xmm2,%%xmm0 \n"
839 "paddd %%xmm3,%%xmm0 \n" 852 "paddd %%xmm3,%%xmm0 \n"
840 "punpckldq %%xmm0,%%xmm2 \n" 853 "punpckldq %%xmm0,%%xmm2 \n"
841 "punpckldq %%xmm3,%%xmm3 \n" 854 "punpckldq %%xmm3,%%xmm3 \n"
842 "paddd %%xmm3,%%xmm3 \n" 855 "paddd %%xmm3,%%xmm3 \n"
843 "pextrw $0x3,%%xmm2,%k4 \n" 856 "pextrw $0x3,%%xmm2,%k4 \n"
844 857
845 LABELALIGN 858 LABELALIGN
846 "2: \n" 859 "2: \n"
847 "movdqa %%xmm2,%%xmm1 \n" 860 "movdqa %%xmm2,%%xmm1 \n"
848 "paddd %%xmm3,%%xmm2 \n" 861 "paddd %%xmm3,%%xmm2 \n"
849 MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 862 MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
850 "movd %k2,%%xmm0 \n" 863 "movd %k2,%%xmm0 \n"
851 "psrlw $0x9,%%xmm1 \n" 864 "psrlw $0x9,%%xmm1 \n"
852 MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2 865 MEMOPARG(movzwl,0x00,1,4,1,k2) // movzwl (%1,%4,1),%k2
853 "movd %k2,%%xmm4 \n" 866 "movd %k2,%%xmm4 \n"
854 "pshufb %%xmm5,%%xmm1 \n" 867 "pshufb %%xmm5,%%xmm1 \n"
855 "punpcklwd %%xmm4,%%xmm0 \n" 868 "punpcklwd %%xmm4,%%xmm0 \n"
856 "pxor %%xmm6,%%xmm1 \n" 869 "psubb %8,%%xmm0 \n" // make pixels signed.
857 "pmaddubsw %%xmm1,%%xmm0 \n" 870 "pxor %%xmm6,%%xmm1 \n" // 128 -f = (f ^ 127 ) + 1
871 "paddusb %%xmm7,%%xmm1 \n"
872 "pmaddubsw %%xmm0,%%xmm1 \n"
858 "pextrw $0x1,%%xmm2,%k3 \n" 873 "pextrw $0x1,%%xmm2,%k3 \n"
859 "pextrw $0x3,%%xmm2,%k4 \n" 874 "pextrw $0x3,%%xmm2,%k4 \n"
860 "psrlw $0x7,%%xmm0 \n" 875 "paddw %9,%%xmm1 \n" // make pixels unsigned.
861 "packuswb %%xmm0,%%xmm0 \n" 876 "psrlw $0x7,%%xmm1 \n"
862 "movd %%xmm0,%k2 \n" 877 "packuswb %%xmm1,%%xmm1 \n"
878 "movd %%xmm1,%k2 \n"
863 "mov %w2," MEMACCESS(0) " \n" 879 "mov %w2," MEMACCESS(0) " \n"
864 "lea " MEMLEA(0x2,0) ",%0 \n" 880 "lea " MEMLEA(0x2,0) ",%0 \n"
865 "sub $0x2,%5 \n" 881 "sub $0x2,%5 \n"
866 "jge 2b \n" 882 "jge 2b \n"
867 883
868 LABELALIGN 884 LABELALIGN
869 "29: \n" 885 "29: \n"
870 "addl $0x1,%5 \n" 886 "addl $0x1,%5 \n"
871 "jl 99f \n" 887 "jl 99f \n"
872 MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2 888 MEMOPARG(movzwl,0x00,1,3,1,k2) // movzwl (%1,%3,1),%k2
873 "movd %k2,%%xmm0 \n" 889 "movd %k2,%%xmm0 \n"
874 "psrlw $0x9,%%xmm2 \n" 890 "psrlw $0x9,%%xmm2 \n"
875 "pshufb %%xmm5,%%xmm2 \n" 891 "pshufb %%xmm5,%%xmm2 \n"
892 "psubb %8,%%xmm0 \n" // make pixels signed.
876 "pxor %%xmm6,%%xmm2 \n" 893 "pxor %%xmm6,%%xmm2 \n"
877 "pmaddubsw %%xmm2,%%xmm0 \n" 894 "paddusb %%xmm7,%%xmm2 \n"
878 "psrlw $0x7,%%xmm0 \n" 895 "pmaddubsw %%xmm0,%%xmm2 \n"
879 "packuswb %%xmm0,%%xmm0 \n" 896 "paddw %9,%%xmm2 \n" // make pixels unsigned.
880 "movd %%xmm0,%k2 \n" 897 "psrlw $0x7,%%xmm2 \n"
898 "packuswb %%xmm2,%%xmm2 \n"
899 "movd %%xmm2,%k2 \n"
881 "mov %b2," MEMACCESS(0) " \n" 900 "mov %b2," MEMACCESS(0) " \n"
882 "99: \n" 901 "99: \n"
883 : "+r"(dst_ptr), // %0 902 : "+r"(dst_ptr), // %0
884 "+r"(src_ptr), // %1 903 "+r"(src_ptr), // %1
885 "=&a"(temp_pixel), // %2 904 "=&a"(temp_pixel), // %2
886 "=&r"(x0), // %3 905 "=&r"(x0), // %3
887 "=&r"(x1), // %4 906 "=&r"(x1), // %4
888 "+rm"(dst_width) // %5 907 "+rm"(dst_width) // %5
889 : "rm"(x), // %6 908 : "rm"(x), // %6
890 "rm"(dx) // %7 909 "rm"(dx), // %7
910 #if defined(__x86_64__)
911 "x"(kFsub80), // %8
912 "x"(kFadd40) // %9
913 #else
914 "m"(kFsub80), // %8
915 "m"(kFadd40) // %9
916 #endif
891 : "memory", "cc", NACL_R14 917 : "memory", "cc", NACL_R14
892 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 918 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
893 ); 919 );
894 } 920 }
895 921
896 // Reads 4 pixels, duplicates them and writes 8 pixels. 922 // Reads 4 pixels, duplicates them and writes 8 pixels.
897 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. 923 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
898 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, 924 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
899 int dst_width, int x, int dx) { 925 int dst_width, int x, int dx) {
900 asm volatile ( 926 asm volatile (
901 LABELALIGN 927 LABELALIGN
902 "1: \n" 928 "1: \n"
(...skipping 380 matching lines...) Expand 10 before | Expand all | Expand 10 after
1283 ); 1309 );
1284 return num; 1310 return num;
1285 } 1311 }
1286 1312
1287 #endif // defined(__x86_64__) || defined(__i386__) 1313 #endif // defined(__x86_64__) || defined(__i386__)
1288 1314
1289 #ifdef __cplusplus 1315 #ifdef __cplusplus
1290 } // extern "C" 1316 } // extern "C"
1291 } // namespace libyuv 1317 } // namespace libyuv
1292 #endif 1318 #endif
OLDNEW
« no previous file with comments | « source/scale_common.cc ('k') | source/scale_neon.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698