Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(229)

Side by Side Diff: source/scale_gcc.cc

Issue 1895743008: Remove initialize to zero on output variables for inline. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: use early write for all outputs to avoid them being reassigned to input Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_mips.cc ('k') | source/scale_neon.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 298 matching lines...) Expand 10 before | Expand all | Expand 10 after
309 "jg 1b \n" 309 "jg 1b \n"
310 : "+r"(src_ptr), // %0 310 : "+r"(src_ptr), // %0
311 "+r"(dst_ptr), // %1 311 "+r"(dst_ptr), // %1
312 "+r"(dst_width) // %2 312 "+r"(dst_width) // %2
313 :: "memory", "cc", "xmm0", "xmm1", "xmm5" 313 :: "memory", "cc", "xmm0", "xmm1", "xmm5"
314 ); 314 );
315 } 315 }
316 316
317 void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, 317 void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
318 uint8* dst_ptr, int dst_width) { 318 uint8* dst_ptr, int dst_width) {
319 intptr_t stridex3 = 0; 319 intptr_t stridex3;
320 asm volatile ( 320 asm volatile (
321 "pcmpeqb %%xmm4,%%xmm4 \n" 321 "pcmpeqb %%xmm4,%%xmm4 \n"
322 "psrlw $0xf,%%xmm4 \n" 322 "psrlw $0xf,%%xmm4 \n"
323 "movdqa %%xmm4,%%xmm5 \n" 323 "movdqa %%xmm4,%%xmm5 \n"
324 "packuswb %%xmm4,%%xmm4 \n" 324 "packuswb %%xmm4,%%xmm4 \n"
325 "psllw $0x3,%%xmm5 \n" 325 "psllw $0x3,%%xmm5 \n"
326 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n" 326 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n"
327 327
328 LABELALIGN 328 LABELALIGN
329 "1: \n" 329 "1: \n"
(...skipping 24 matching lines...) Expand all
354 "paddw %%xmm5,%%xmm0 \n" 354 "paddw %%xmm5,%%xmm0 \n"
355 "psrlw $0x4,%%xmm0 \n" 355 "psrlw $0x4,%%xmm0 \n"
356 "packuswb %%xmm0,%%xmm0 \n" 356 "packuswb %%xmm0,%%xmm0 \n"
357 "movq %%xmm0," MEMACCESS(1) " \n" 357 "movq %%xmm0," MEMACCESS(1) " \n"
358 "lea " MEMLEA(0x8,1) ",%1 \n" 358 "lea " MEMLEA(0x8,1) ",%1 \n"
359 "sub $0x8,%2 \n" 359 "sub $0x8,%2 \n"
360 "jg 1b \n" 360 "jg 1b \n"
361 : "+r"(src_ptr), // %0 361 : "+r"(src_ptr), // %0
362 "+r"(dst_ptr), // %1 362 "+r"(dst_ptr), // %1
363 "+r"(dst_width), // %2 363 "+r"(dst_width), // %2
364 "+r"(stridex3) // %3 364 "=&r"(stridex3) // %3
365 : "r"((intptr_t)(src_stride)) // %4 365 : "r"((intptr_t)(src_stride)) // %4
366 : "memory", "cc", NACL_R14 366 : "memory", "cc", NACL_R14
367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
368 ); 368 );
369 } 369 }
370 370
371 371
372 #ifdef HAS_SCALEROWDOWN4_AVX2 372 #ifdef HAS_SCALEROWDOWN4_AVX2
373 void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride, 373 void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
374 uint8* dst_ptr, int dst_width) { 374 uint8* dst_ptr, int dst_width) {
(...skipping 442 matching lines...) Expand 10 before | Expand all | Expand 10 after
817 "+r"(src_width) // %2 817 "+r"(src_width) // %2
818 : 818 :
819 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 819 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
820 ); 820 );
821 } 821 }
822 #endif // HAS_SCALEADDROW_AVX2 822 #endif // HAS_SCALEADDROW_AVX2
823 823
824 // Bilinear column filtering. SSSE3 version. 824 // Bilinear column filtering. SSSE3 version.
825 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 825 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
826 int dst_width, int x, int dx) { 826 int dst_width, int x, int dx) {
827 intptr_t x0 = 0, x1 = 0, temp_pixel = 0; 827 intptr_t x0, x1, temp_pixel;
828 asm volatile ( 828 asm volatile (
829 "movd %6,%%xmm2 \n" 829 "movd %6,%%xmm2 \n"
830 "movd %7,%%xmm3 \n" 830 "movd %7,%%xmm3 \n"
831 "movl $0x04040000,%k2 \n" 831 "movl $0x04040000,%k2 \n"
832 "movd %k2,%%xmm5 \n" 832 "movd %k2,%%xmm5 \n"
833 "pcmpeqb %%xmm6,%%xmm6 \n" 833 "pcmpeqb %%xmm6,%%xmm6 \n"
834 "psrlw $0x9,%%xmm6 \n" 834 "psrlw $0x9,%%xmm6 \n"
835 "pextrw $0x1,%%xmm2,%k3 \n" 835 "pextrw $0x1,%%xmm2,%k3 \n"
836 "subl $0x2,%5 \n" 836 "subl $0x2,%5 \n"
837 "jl 29f \n" 837 "jl 29f \n"
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
873 "movd %k2,%%xmm0 \n" 873 "movd %k2,%%xmm0 \n"
874 "psrlw $0x9,%%xmm2 \n" 874 "psrlw $0x9,%%xmm2 \n"
875 "pshufb %%xmm5,%%xmm2 \n" 875 "pshufb %%xmm5,%%xmm2 \n"
876 "pxor %%xmm6,%%xmm2 \n" 876 "pxor %%xmm6,%%xmm2 \n"
877 "pmaddubsw %%xmm2,%%xmm0 \n" 877 "pmaddubsw %%xmm2,%%xmm0 \n"
878 "psrlw $0x7,%%xmm0 \n" 878 "psrlw $0x7,%%xmm0 \n"
879 "packuswb %%xmm0,%%xmm0 \n" 879 "packuswb %%xmm0,%%xmm0 \n"
880 "movd %%xmm0,%k2 \n" 880 "movd %%xmm0,%k2 \n"
881 "mov %b2," MEMACCESS(0) " \n" 881 "mov %b2," MEMACCESS(0) " \n"
882 "99: \n" 882 "99: \n"
883 : "+r"(dst_ptr), // %0 883 : "+r"(dst_ptr), // %0
884 "+r"(src_ptr), // %1 884 "+r"(src_ptr), // %1
885 "+a"(temp_pixel), // %2 885 "=&a"(temp_pixel), // %2
886 "+r"(x0), // %3 886 "=&r"(x0), // %3
887 "+r"(x1), // %4 887 "=&r"(x1), // %4
888 "+rm"(dst_width) // %5 888 "+rm"(dst_width) // %5
889 : "rm"(x), // %6 889 : "rm"(x), // %6
890 "rm"(dx) // %7 890 "rm"(dx) // %7
891 : "memory", "cc", NACL_R14 891 : "memory", "cc", NACL_R14
892 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 892 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
893 ); 893 );
894 } 894 }
895 895
896 // Reads 4 pixels, duplicates them and writes 8 pixels. 896 // Reads 4 pixels, duplicates them and writes 8 pixels.
897 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. 897 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.
898 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr, 898 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
899 int dst_width, int x, int dx) { 899 int dst_width, int x, int dx) {
900 asm volatile ( 900 asm volatile (
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
991 : "memory", "cc", NACL_R14 991 : "memory", "cc", NACL_R14
992 "xmm0", "xmm1", "xmm2", "xmm3" 992 "xmm0", "xmm1", "xmm2", "xmm3"
993 ); 993 );
994 } 994 }
995 995
996 // Reads 4 pixels at a time. 996 // Reads 4 pixels at a time.
997 // Alignment requirement: dst_argb 16 byte aligned. 997 // Alignment requirement: dst_argb 16 byte aligned.
998 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, 998 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
999 int src_stepx, uint8* dst_argb, int dst_width) { 999 int src_stepx, uint8* dst_argb, int dst_width) {
1000 intptr_t src_stepx_x4 = (intptr_t)(src_stepx); 1000 intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
1001 intptr_t src_stepx_x12 = 0; 1001 intptr_t src_stepx_x12;
1002 asm volatile ( 1002 asm volatile (
1003 "lea " MEMLEA3(0x00,1,4) ",%1 \n" 1003 "lea " MEMLEA3(0x00,1,4) ",%1 \n"
1004 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" 1004 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
1005 LABELALIGN 1005 LABELALIGN
1006 "1: \n" 1006 "1: \n"
1007 "movd " MEMACCESS(0) ",%%xmm0 \n" 1007 "movd " MEMACCESS(0) ",%%xmm0 \n"
1008 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 1008 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
1009 "punpckldq %%xmm1,%%xmm0 \n" 1009 "punpckldq %%xmm1,%%xmm0 \n"
1010 MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2 1010 MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2
1011 MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3 1011 MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3
1012 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" 1012 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
1013 "punpckldq %%xmm3,%%xmm2 \n" 1013 "punpckldq %%xmm3,%%xmm2 \n"
1014 "punpcklqdq %%xmm2,%%xmm0 \n" 1014 "punpcklqdq %%xmm2,%%xmm0 \n"
1015 "movdqu %%xmm0," MEMACCESS(2) " \n" 1015 "movdqu %%xmm0," MEMACCESS(2) " \n"
1016 "lea " MEMLEA(0x10,2) ",%2 \n" 1016 "lea " MEMLEA(0x10,2) ",%2 \n"
1017 "sub $0x4,%3 \n" 1017 "sub $0x4,%3 \n"
1018 "jg 1b \n" 1018 "jg 1b \n"
1019 : "+r"(src_argb), // %0 1019 : "+r"(src_argb), // %0
1020 "+r"(src_stepx_x4), // %1 1020 "+r"(src_stepx_x4), // %1
1021 "+r"(dst_argb), // %2 1021 "+r"(dst_argb), // %2
1022 "+r"(dst_width), // %3 1022 "+r"(dst_width), // %3
1023 "+r"(src_stepx_x12) // %4 1023 "=&r"(src_stepx_x12) // %4
1024 :: "memory", "cc", NACL_R14 1024 :: "memory", "cc", NACL_R14
1025 "xmm0", "xmm1", "xmm2", "xmm3" 1025 "xmm0", "xmm1", "xmm2", "xmm3"
1026 ); 1026 );
1027 } 1027 }
1028 1028
1029 // Blends four 2x2 to 4x1. 1029 // Blends four 2x2 to 4x1.
1030 // Alignment requirement: dst_argb 16 byte aligned. 1030 // Alignment requirement: dst_argb 16 byte aligned.
1031 void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb, 1031 void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
1032 ptrdiff_t src_stride, int src_stepx, 1032 ptrdiff_t src_stride, int src_stepx,
1033 uint8* dst_argb, int dst_width) { 1033 uint8* dst_argb, int dst_width) {
1034 intptr_t src_stepx_x4 = (intptr_t)(src_stepx); 1034 intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
1035 intptr_t src_stepx_x12 = 0; 1035 intptr_t src_stepx_x12;
1036 intptr_t row1 = (intptr_t)(src_stride); 1036 intptr_t row1 = (intptr_t)(src_stride);
1037 asm volatile ( 1037 asm volatile (
1038 "lea " MEMLEA3(0x00,1,4) ",%1 \n" 1038 "lea " MEMLEA3(0x00,1,4) ",%1 \n"
1039 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" 1039 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
1040 "lea " MEMLEA4(0x00,0,5,1) ",%5 \n" 1040 "lea " MEMLEA4(0x00,0,5,1) ",%5 \n"
1041 1041
1042 LABELALIGN 1042 LABELALIGN
1043 "1: \n" 1043 "1: \n"
1044 "movq " MEMACCESS(0) ",%%xmm0 \n" 1044 "movq " MEMACCESS(0) ",%%xmm0 \n"
1045 MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0 1045 MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0
1046 MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1 1046 MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1
1047 MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1 1047 MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1
1048 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n" 1048 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"
1049 "movq " MEMACCESS(5) ",%%xmm2 \n" 1049 "movq " MEMACCESS(5) ",%%xmm2 \n"
1050 MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2 1050 MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2
1051 MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3 1051 MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3
1052 MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3 1052 MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3
1053 "lea " MEMLEA4(0x00,5,1,4) ",%5 \n" 1053 "lea " MEMLEA4(0x00,5,1,4) ",%5 \n"
1054 "pavgb %%xmm2,%%xmm0 \n" 1054 "pavgb %%xmm2,%%xmm0 \n"
1055 "pavgb %%xmm3,%%xmm1 \n" 1055 "pavgb %%xmm3,%%xmm1 \n"
1056 "movdqa %%xmm0,%%xmm2 \n" 1056 "movdqa %%xmm0,%%xmm2 \n"
1057 "shufps $0x88,%%xmm1,%%xmm0 \n" 1057 "shufps $0x88,%%xmm1,%%xmm0 \n"
1058 "shufps $0xdd,%%xmm1,%%xmm2 \n" 1058 "shufps $0xdd,%%xmm1,%%xmm2 \n"
1059 "pavgb %%xmm2,%%xmm0 \n" 1059 "pavgb %%xmm2,%%xmm0 \n"
1060 "movdqu %%xmm0," MEMACCESS(2) " \n" 1060 "movdqu %%xmm0," MEMACCESS(2) " \n"
1061 "lea " MEMLEA(0x10,2) ",%2 \n" 1061 "lea " MEMLEA(0x10,2) ",%2 \n"
1062 "sub $0x4,%3 \n" 1062 "sub $0x4,%3 \n"
1063 "jg 1b \n" 1063 "jg 1b \n"
1064 : "+r"(src_argb), // %0 1064 : "+r"(src_argb), // %0
1065 "+r"(src_stepx_x4), // %1 1065 "+r"(src_stepx_x4), // %1
1066 "+r"(dst_argb), // %2 1066 "+r"(dst_argb), // %2
1067 "+rm"(dst_width), // %3 1067 "+rm"(dst_width), // %3
1068 "+r"(src_stepx_x12), // %4 1068 "=&r"(src_stepx_x12), // %4
1069 "+r"(row1) // %5 1069 "+r"(row1) // %5
1070 :: "memory", "cc", NACL_R14 1070 :: "memory", "cc", NACL_R14
1071 "xmm0", "xmm1", "xmm2", "xmm3" 1071 "xmm0", "xmm1", "xmm2", "xmm3"
1072 ); 1072 );
1073 } 1073 }
1074 1074
1075 void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb, 1075 void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
1076 int dst_width, int x, int dx) { 1076 int dst_width, int x, int dx) {
1077 intptr_t x0 = 0, x1 = 0; 1077 intptr_t x0, x1;
1078 asm volatile ( 1078 asm volatile (
1079 "movd %5,%%xmm2 \n" 1079 "movd %5,%%xmm2 \n"
1080 "movd %6,%%xmm3 \n" 1080 "movd %6,%%xmm3 \n"
1081 "pshufd $0x0,%%xmm2,%%xmm2 \n" 1081 "pshufd $0x0,%%xmm2,%%xmm2 \n"
1082 "pshufd $0x11,%%xmm3,%%xmm0 \n" 1082 "pshufd $0x11,%%xmm3,%%xmm0 \n"
1083 "paddd %%xmm0,%%xmm2 \n" 1083 "paddd %%xmm0,%%xmm2 \n"
1084 "paddd %%xmm3,%%xmm3 \n" 1084 "paddd %%xmm3,%%xmm3 \n"
1085 "pshufd $0x5,%%xmm3,%%xmm0 \n" 1085 "pshufd $0x5,%%xmm3,%%xmm0 \n"
1086 "paddd %%xmm0,%%xmm2 \n" 1086 "paddd %%xmm0,%%xmm2 \n"
1087 "paddd %%xmm3,%%xmm3 \n" 1087 "paddd %%xmm3,%%xmm3 \n"
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
1120 "pextrw $0x5,%%xmm2,%k0 \n" 1120 "pextrw $0x5,%%xmm2,%k0 \n"
1121 "punpckldq %%xmm1,%%xmm0 \n" 1121 "punpckldq %%xmm1,%%xmm0 \n"
1122 "movq %%xmm0," MEMACCESS(2) " \n" 1122 "movq %%xmm0," MEMACCESS(2) " \n"
1123 "lea " MEMLEA(0x8,2) ",%2 \n" 1123 "lea " MEMLEA(0x8,2) ",%2 \n"
1124 "29: \n" 1124 "29: \n"
1125 "test $0x1,%4 \n" 1125 "test $0x1,%4 \n"
1126 "je 99f \n" 1126 "je 99f \n"
1127 MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0 1127 MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0
1128 "movd %%xmm0," MEMACCESS(2) " \n" 1128 "movd %%xmm0," MEMACCESS(2) " \n"
1129 "99: \n" 1129 "99: \n"
1130 : "+a"(x0), // %0 1130 : "=&a"(x0), // %0
1131 "+d"(x1), // %1 1131 "=&d"(x1), // %1
1132 "+r"(dst_argb), // %2 1132 "+r"(dst_argb), // %2
1133 "+r"(src_argb), // %3 1133 "+r"(src_argb), // %3
1134 "+r"(dst_width) // %4 1134 "+r"(dst_width) // %4
1135 : "rm"(x), // %5 1135 : "rm"(x), // %5
1136 "rm"(dx) // %6 1136 "rm"(dx) // %6
1137 : "memory", "cc", NACL_R14 1137 : "memory", "cc", NACL_R14
1138 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" 1138 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
1139 ); 1139 );
1140 } 1140 }
1141 1141
(...skipping 30 matching lines...) Expand all
1172 }; 1172 };
1173 1173
1174 // Shuffle table for duplicating 2 fractions into 8 bytes each 1174 // Shuffle table for duplicating 2 fractions into 8 bytes each
1175 static uvec8 kShuffleFractions = { 1175 static uvec8 kShuffleFractions = {
1176 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 1176 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,
1177 }; 1177 };
1178 1178
1179 // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version 1179 // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version
1180 void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb, 1180 void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
1181 int dst_width, int x, int dx) { 1181 int dst_width, int x, int dx) {
1182 intptr_t x0 = 0, x1 = 0; 1182 intptr_t x0, x1;
1183 asm volatile ( 1183 asm volatile (
1184 "movdqa %0,%%xmm4 \n" 1184 "movdqa %0,%%xmm4 \n"
1185 "movdqa %1,%%xmm5 \n" 1185 "movdqa %1,%%xmm5 \n"
1186 : 1186 :
1187 : "m"(kShuffleColARGB), // %0 1187 : "m"(kShuffleColARGB), // %0
1188 "m"(kShuffleFractions) // %1 1188 "m"(kShuffleFractions) // %1
1189 ); 1189 );
1190 1190
1191 asm volatile ( 1191 asm volatile (
1192 "movd %5,%%xmm2 \n" 1192 "movd %5,%%xmm2 \n"
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
1235 "pmaddubsw %%xmm2,%%xmm0 \n" 1235 "pmaddubsw %%xmm2,%%xmm0 \n"
1236 "psrlw $0x7,%%xmm0 \n" 1236 "psrlw $0x7,%%xmm0 \n"
1237 "packuswb %%xmm0,%%xmm0 \n" 1237 "packuswb %%xmm0,%%xmm0 \n"
1238 "movd %%xmm0," MEMACCESS(0) " \n" 1238 "movd %%xmm0," MEMACCESS(0) " \n"
1239 1239
1240 LABELALIGN 1240 LABELALIGN
1241 "99: \n" 1241 "99: \n"
1242 : "+r"(dst_argb), // %0 1242 : "+r"(dst_argb), // %0
1243 "+r"(src_argb), // %1 1243 "+r"(src_argb), // %1
1244 "+rm"(dst_width), // %2 1244 "+rm"(dst_width), // %2
1245 "+r"(x0), // %3 1245 "=&r"(x0), // %3
1246 "+r"(x1) // %4 1246 "=&r"(x1) // %4
1247 : "rm"(x), // %5 1247 : "rm"(x), // %5
1248 "rm"(dx) // %6 1248 "rm"(dx) // %6
1249 : "memory", "cc", NACL_R14 1249 : "memory", "cc", NACL_R14
1250 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 1250 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
1251 ); 1251 );
1252 } 1252 }
1253 1253
1254 // Divide num by div and return as 16.16 fixed point result. 1254 // Divide num by div and return as 16.16 fixed point result.
1255 int FixedDiv_X86(int num, int div) { 1255 int FixedDiv_X86(int num, int div) {
1256 asm volatile ( 1256 asm volatile (
(...skipping 26 matching lines...) Expand all
1283 ); 1283 );
1284 return num; 1284 return num;
1285 } 1285 }
1286 1286
1287 #endif // defined(__x86_64__) || defined(__i386__) 1287 #endif // defined(__x86_64__) || defined(__i386__)
1288 1288
1289 #ifdef __cplusplus 1289 #ifdef __cplusplus
1290 } // extern "C" 1290 } // extern "C"
1291 } // namespace libyuv 1291 } // namespace libyuv
1292 #endif 1292 #endif
OLDNEW
« no previous file with comments | « source/row_mips.cc ('k') | source/scale_neon.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698