source/scale_gcc.cc - Issue 1895743008: Remove initialize to zero on output variables for inline.

Side by Side Diff: source/scale_gcc.cc

Issue 1895743008: Remove initialize to zero on output variables for inline. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master

Patch Set: use early write for all outputs to avoid them being reassigned to input Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.	2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 298 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
309 "jg 1b \n"	309 "jg 1b \n"

310 : "+r"(src_ptr), // %0	310 : "+r"(src_ptr), // %0

311 "+r"(dst_ptr), // %1	311 "+r"(dst_ptr), // %1

312 "+r"(dst_width) // %2	312 "+r"(dst_width) // %2

313 :: "memory", "cc", "xmm0", "xmm1", "xmm5"	313 :: "memory", "cc", "xmm0", "xmm1", "xmm5"

314 );	314 );

315 }	315 }

316	316

317 void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,	317 void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,

318 uint8* dst_ptr, int dst_width) {	318 uint8* dst_ptr, int dst_width) {

319 intptr_t stridex3 = 0;	319 intptr_t stridex3;

320 asm volatile (	320 asm volatile (

321 "pcmpeqb %%xmm4,%%xmm4 \n"	321 "pcmpeqb %%xmm4,%%xmm4 \n"

322 "psrlw $0xf,%%xmm4 \n"	322 "psrlw $0xf,%%xmm4 \n"

323 "movdqa %%xmm4,%%xmm5 \n"	323 "movdqa %%xmm4,%%xmm5 \n"

324 "packuswb %%xmm4,%%xmm4 \n"	324 "packuswb %%xmm4,%%xmm4 \n"

325 "psllw $0x3,%%xmm5 \n"	325 "psllw $0x3,%%xmm5 \n"

326 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n"	326 "lea " MEMLEA4(0x00,4,4,2) ",%3 \n"

327	327

328 LABELALIGN	328 LABELALIGN

329 "1: \n"	329 "1: \n"

(...skipping 24 matching lines...) Expand all Loading...
354 "paddw %%xmm5,%%xmm0 \n"	354 "paddw %%xmm5,%%xmm0 \n"

355 "psrlw $0x4,%%xmm0 \n"	355 "psrlw $0x4,%%xmm0 \n"

356 "packuswb %%xmm0,%%xmm0 \n"	356 "packuswb %%xmm0,%%xmm0 \n"

357 "movq %%xmm0," MEMACCESS(1) " \n"	357 "movq %%xmm0," MEMACCESS(1) " \n"

358 "lea " MEMLEA(0x8,1) ",%1 \n"	358 "lea " MEMLEA(0x8,1) ",%1 \n"

359 "sub $0x8,%2 \n"	359 "sub $0x8,%2 \n"

360 "jg 1b \n"	360 "jg 1b \n"

361 : "+r"(src_ptr), // %0	361 : "+r"(src_ptr), // %0

362 "+r"(dst_ptr), // %1	362 "+r"(dst_ptr), // %1

363 "+r"(dst_width), // %2	363 "+r"(dst_width), // %2

364 "+r"(stridex3) // %3	364 "=&r"(stridex3) // %3

365 : "r"((intptr_t)(src_stride)) // %4	365 : "r"((intptr_t)(src_stride)) // %4

366 : "memory", "cc", NACL_R14	366 : "memory", "cc", NACL_R14

367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"	367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"

368 );	368 );

369 }	369 }

370	370

371	371

372 #ifdef HAS_SCALEROWDOWN4_AVX2	372 #ifdef HAS_SCALEROWDOWN4_AVX2

373 void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,	373 void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,

374 uint8* dst_ptr, int dst_width) {	374 uint8* dst_ptr, int dst_width) {

(...skipping 442 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
817 "+r"(src_width) // %2	817 "+r"(src_width) // %2

818 :	818 :

819 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"	819 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"

820 );	820 );

821 }	821 }

822 #endif // HAS_SCALEADDROW_AVX2	822 #endif // HAS_SCALEADDROW_AVX2

823	823

824 // Bilinear column filtering. SSSE3 version.	824 // Bilinear column filtering. SSSE3 version.

825 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,	825 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,

826 int dst_width, int x, int dx) {	826 int dst_width, int x, int dx) {

827 intptr_t x0 = 0, x1 = 0, temp_pixel = 0;	827 intptr_t x0, x1, temp_pixel;

828 asm volatile (	828 asm volatile (

829 "movd %6,%%xmm2 \n"	829 "movd %6,%%xmm2 \n"

830 "movd %7,%%xmm3 \n"	830 "movd %7,%%xmm3 \n"

831 "movl $0x04040000,%k2 \n"	831 "movl $0x04040000,%k2 \n"

832 "movd %k2,%%xmm5 \n"	832 "movd %k2,%%xmm5 \n"

833 "pcmpeqb %%xmm6,%%xmm6 \n"	833 "pcmpeqb %%xmm6,%%xmm6 \n"

834 "psrlw $0x9,%%xmm6 \n"	834 "psrlw $0x9,%%xmm6 \n"

835 "pextrw $0x1,%%xmm2,%k3 \n"	835 "pextrw $0x1,%%xmm2,%k3 \n"

836 "subl $0x2,%5 \n"	836 "subl $0x2,%5 \n"

837 "jl 29f \n"	837 "jl 29f \n"

(...skipping 35 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
873 "movd %k2,%%xmm0 \n"	873 "movd %k2,%%xmm0 \n"

874 "psrlw $0x9,%%xmm2 \n"	874 "psrlw $0x9,%%xmm2 \n"

875 "pshufb %%xmm5,%%xmm2 \n"	875 "pshufb %%xmm5,%%xmm2 \n"

876 "pxor %%xmm6,%%xmm2 \n"	876 "pxor %%xmm6,%%xmm2 \n"

877 "pmaddubsw %%xmm2,%%xmm0 \n"	877 "pmaddubsw %%xmm2,%%xmm0 \n"

878 "psrlw $0x7,%%xmm0 \n"	878 "psrlw $0x7,%%xmm0 \n"

879 "packuswb %%xmm0,%%xmm0 \n"	879 "packuswb %%xmm0,%%xmm0 \n"

880 "movd %%xmm0,%k2 \n"	880 "movd %%xmm0,%k2 \n"

881 "mov %b2," MEMACCESS(0) " \n"	881 "mov %b2," MEMACCESS(0) " \n"

882 "99: \n"	882 "99: \n"

883 : "+r"(dst_ptr), // %0	883 : "+r"(dst_ptr), // %0

884 "+r"(src_ptr), // %1	884 "+r"(src_ptr), // %1

885 "+a"(temp_pixel), // %2	885 "=&a"(temp_pixel), // %2

886 "+r"(x0), // %3	886 "=&r"(x0), // %3

887 "+r"(x1), // %4	887 "=&r"(x1), // %4

888 "+rm"(dst_width) // %5	888 "+rm"(dst_width) // %5

889 : "rm"(x), // %6	889 : "rm"(x), // %6

890 "rm"(dx) // %7	890 "rm"(dx) // %7

891 : "memory", "cc", NACL_R14	891 : "memory", "cc", NACL_R14

892 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"	892 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"

893 );	893 );

894 }	894 }

895	895

896 // Reads 4 pixels, duplicates them and writes 8 pixels.	896 // Reads 4 pixels, duplicates them and writes 8 pixels.

897 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.	897 // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned.

898 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,	898 void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,

899 int dst_width, int x, int dx) {	899 int dst_width, int x, int dx) {

900 asm volatile (	900 asm volatile (

(...skipping 90 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
991 : "memory", "cc", NACL_R14	991 : "memory", "cc", NACL_R14

992 "xmm0", "xmm1", "xmm2", "xmm3"	992 "xmm0", "xmm1", "xmm2", "xmm3"

993 );	993 );

994 }	994 }

995	995

996 // Reads 4 pixels at a time.	996 // Reads 4 pixels at a time.

997 // Alignment requirement: dst_argb 16 byte aligned.	997 // Alignment requirement: dst_argb 16 byte aligned.

998 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,	998 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,

999 int src_stepx, uint8* dst_argb, int dst_width) {	999 int src_stepx, uint8* dst_argb, int dst_width) {

1000 intptr_t src_stepx_x4 = (intptr_t)(src_stepx);	1000 intptr_t src_stepx_x4 = (intptr_t)(src_stepx);

1001 intptr_t src_stepx_x12 = 0;	1001 intptr_t src_stepx_x12;

1002 asm volatile (	1002 asm volatile (

1003 "lea " MEMLEA3(0x00,1,4) ",%1 \n"	1003 "lea " MEMLEA3(0x00,1,4) ",%1 \n"

1004 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"	1004 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"

1005 LABELALIGN	1005 LABELALIGN

1006 "1: \n"	1006 "1: \n"

1007 "movd " MEMACCESS(0) ",%%xmm0 \n"	1007 "movd " MEMACCESS(0) ",%%xmm0 \n"

1008 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1	1008 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1

1009 "punpckldq %%xmm1,%%xmm0 \n"	1009 "punpckldq %%xmm1,%%xmm0 \n"

1010 MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2	1010 MEMOPREG(movd,0x00,0,1,2,xmm2) // movd (%0,%1,2),%%xmm2

1011 MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3	1011 MEMOPREG(movd,0x00,0,4,1,xmm3) // movd (%0,%4,1),%%xmm3

1012 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"	1012 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"

1013 "punpckldq %%xmm3,%%xmm2 \n"	1013 "punpckldq %%xmm3,%%xmm2 \n"

1014 "punpcklqdq %%xmm2,%%xmm0 \n"	1014 "punpcklqdq %%xmm2,%%xmm0 \n"

1015 "movdqu %%xmm0," MEMACCESS(2) " \n"	1015 "movdqu %%xmm0," MEMACCESS(2) " \n"

1016 "lea " MEMLEA(0x10,2) ",%2 \n"	1016 "lea " MEMLEA(0x10,2) ",%2 \n"

1017 "sub $0x4,%3 \n"	1017 "sub $0x4,%3 \n"

1018 "jg 1b \n"	1018 "jg 1b \n"

1019 : "+r"(src_argb), // %0	1019 : "+r"(src_argb), // %0

1020 "+r"(src_stepx_x4), // %1	1020 "+r"(src_stepx_x4), // %1

1021 "+r"(dst_argb), // %2	1021 "+r"(dst_argb), // %2

1022 "+r"(dst_width), // %3	1022 "+r"(dst_width), // %3

1023 "+r"(src_stepx_x12) // %4	1023 "=&r"(src_stepx_x12) // %4

1024 :: "memory", "cc", NACL_R14	1024 :: "memory", "cc", NACL_R14

1025 "xmm0", "xmm1", "xmm2", "xmm3"	1025 "xmm0", "xmm1", "xmm2", "xmm3"

1026 );	1026 );

1027 }	1027 }

1028	1028

1029 // Blends four 2x2 to 4x1.	1029 // Blends four 2x2 to 4x1.

1030 // Alignment requirement: dst_argb 16 byte aligned.	1030 // Alignment requirement: dst_argb 16 byte aligned.

1031 void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,	1031 void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,

1032 ptrdiff_t src_stride, int src_stepx,	1032 ptrdiff_t src_stride, int src_stepx,

1033 uint8* dst_argb, int dst_width) {	1033 uint8* dst_argb, int dst_width) {

1034 intptr_t src_stepx_x4 = (intptr_t)(src_stepx);	1034 intptr_t src_stepx_x4 = (intptr_t)(src_stepx);

1035 intptr_t src_stepx_x12 = 0;	1035 intptr_t src_stepx_x12;

1036 intptr_t row1 = (intptr_t)(src_stride);	1036 intptr_t row1 = (intptr_t)(src_stride);

1037 asm volatile (	1037 asm volatile (

1038 "lea " MEMLEA3(0x00,1,4) ",%1 \n"	1038 "lea " MEMLEA3(0x00,1,4) ",%1 \n"

1039 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"	1039 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"

1040 "lea " MEMLEA4(0x00,0,5,1) ",%5 \n"	1040 "lea " MEMLEA4(0x00,0,5,1) ",%5 \n"

1041	1041

1042 LABELALIGN	1042 LABELALIGN

1043 "1: \n"	1043 "1: \n"

1044 "movq " MEMACCESS(0) ",%%xmm0 \n"	1044 "movq " MEMACCESS(0) ",%%xmm0 \n"

1045 MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0	1045 MEMOPREG(movhps,0x00,0,1,1,xmm0) // movhps (%0,%1,1),%%xmm0

1046 MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1	1046 MEMOPREG(movq,0x00,0,1,2,xmm1) // movq (%0,%1,2),%%xmm1

1047 MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1	1047 MEMOPREG(movhps,0x00,0,4,1,xmm1) // movhps (%0,%4,1),%%xmm1

1048 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"	1048 "lea " MEMLEA4(0x00,0,1,4) ",%0 \n"

1049 "movq " MEMACCESS(5) ",%%xmm2 \n"	1049 "movq " MEMACCESS(5) ",%%xmm2 \n"

1050 MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2	1050 MEMOPREG(movhps,0x00,5,1,1,xmm2) // movhps (%5,%1,1),%%xmm2

1051 MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3	1051 MEMOPREG(movq,0x00,5,1,2,xmm3) // movq (%5,%1,2),%%xmm3

1052 MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3	1052 MEMOPREG(movhps,0x00,5,4,1,xmm3) // movhps (%5,%4,1),%%xmm3

1053 "lea " MEMLEA4(0x00,5,1,4) ",%5 \n"	1053 "lea " MEMLEA4(0x00,5,1,4) ",%5 \n"

1054 "pavgb %%xmm2,%%xmm0 \n"	1054 "pavgb %%xmm2,%%xmm0 \n"

1055 "pavgb %%xmm3,%%xmm1 \n"	1055 "pavgb %%xmm3,%%xmm1 \n"

1056 "movdqa %%xmm0,%%xmm2 \n"	1056 "movdqa %%xmm0,%%xmm2 \n"

1057 "shufps $0x88,%%xmm1,%%xmm0 \n"	1057 "shufps $0x88,%%xmm1,%%xmm0 \n"

1058 "shufps $0xdd,%%xmm1,%%xmm2 \n"	1058 "shufps $0xdd,%%xmm1,%%xmm2 \n"

1059 "pavgb %%xmm2,%%xmm0 \n"	1059 "pavgb %%xmm2,%%xmm0 \n"

1060 "movdqu %%xmm0," MEMACCESS(2) " \n"	1060 "movdqu %%xmm0," MEMACCESS(2) " \n"

1061 "lea " MEMLEA(0x10,2) ",%2 \n"	1061 "lea " MEMLEA(0x10,2) ",%2 \n"

1062 "sub $0x4,%3 \n"	1062 "sub $0x4,%3 \n"

1063 "jg 1b \n"	1063 "jg 1b \n"

1064 : "+r"(src_argb), // %0	1064 : "+r"(src_argb), // %0

1065 "+r"(src_stepx_x4), // %1	1065 "+r"(src_stepx_x4), // %1

1066 "+r"(dst_argb), // %2	1066 "+r"(dst_argb), // %2

1067 "+rm"(dst_width), // %3	1067 "+rm"(dst_width), // %3

1068 "+r"(src_stepx_x12), // %4	1068 "=&r"(src_stepx_x12), // %4

1069 "+r"(row1) // %5	1069 "+r"(row1) // %5

1070 :: "memory", "cc", NACL_R14	1070 :: "memory", "cc", NACL_R14

1071 "xmm0", "xmm1", "xmm2", "xmm3"	1071 "xmm0", "xmm1", "xmm2", "xmm3"

1072 );	1072 );

1073 }	1073 }

1074	1074

1075 void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,	1075 void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,

1076 int dst_width, int x, int dx) {	1076 int dst_width, int x, int dx) {

1077 intptr_t x0 = 0, x1 = 0;	1077 intptr_t x0, x1;

1078 asm volatile (	1078 asm volatile (

1079 "movd %5,%%xmm2 \n"	1079 "movd %5,%%xmm2 \n"

1080 "movd %6,%%xmm3 \n"	1080 "movd %6,%%xmm3 \n"

1081 "pshufd $0x0,%%xmm2,%%xmm2 \n"	1081 "pshufd $0x0,%%xmm2,%%xmm2 \n"

1082 "pshufd $0x11,%%xmm3,%%xmm0 \n"	1082 "pshufd $0x11,%%xmm3,%%xmm0 \n"

1083 "paddd %%xmm0,%%xmm2 \n"	1083 "paddd %%xmm0,%%xmm2 \n"

1084 "paddd %%xmm3,%%xmm3 \n"	1084 "paddd %%xmm3,%%xmm3 \n"

1085 "pshufd $0x5,%%xmm3,%%xmm0 \n"	1085 "pshufd $0x5,%%xmm3,%%xmm0 \n"

1086 "paddd %%xmm0,%%xmm2 \n"	1086 "paddd %%xmm0,%%xmm2 \n"

1087 "paddd %%xmm3,%%xmm3 \n"	1087 "paddd %%xmm3,%%xmm3 \n"

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1120 "pextrw $0x5,%%xmm2,%k0 \n"	1120 "pextrw $0x5,%%xmm2,%k0 \n"

1121 "punpckldq %%xmm1,%%xmm0 \n"	1121 "punpckldq %%xmm1,%%xmm0 \n"

1122 "movq %%xmm0," MEMACCESS(2) " \n"	1122 "movq %%xmm0," MEMACCESS(2) " \n"

1123 "lea " MEMLEA(0x8,2) ",%2 \n"	1123 "lea " MEMLEA(0x8,2) ",%2 \n"

1124 "29: \n"	1124 "29: \n"

1125 "test $0x1,%4 \n"	1125 "test $0x1,%4 \n"

1126 "je 99f \n"	1126 "je 99f \n"

1127 MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0	1127 MEMOPREG(movd,0x00,3,0,4,xmm0) // movd (%3,%0,4),%%xmm0

1128 "movd %%xmm0," MEMACCESS(2) " \n"	1128 "movd %%xmm0," MEMACCESS(2) " \n"

1129 "99: \n"	1129 "99: \n"

1130 : "+a"(x0), // %0	1130 : "=&a"(x0), // %0

1131 "+d"(x1), // %1	1131 "=&d"(x1), // %1

1132 "+r"(dst_argb), // %2	1132 "+r"(dst_argb), // %2

1133 "+r"(src_argb), // %3	1133 "+r"(src_argb), // %3

1134 "+r"(dst_width) // %4	1134 "+r"(dst_width) // %4

1135 : "rm"(x), // %5	1135 : "rm"(x), // %5

1136 "rm"(dx) // %6	1136 "rm"(dx) // %6

1137 : "memory", "cc", NACL_R14	1137 : "memory", "cc", NACL_R14

1138 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"	1138 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"

1139 );	1139 );

1140 }	1140 }

1141	1141

(...skipping 30 matching lines...) Expand all Loading...
1172 };	1172 };

1173	1173

1174 // Shuffle table for duplicating 2 fractions into 8 bytes each	1174 // Shuffle table for duplicating 2 fractions into 8 bytes each

1175 static uvec8 kShuffleFractions = {	1175 static uvec8 kShuffleFractions = {

1176 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,	1176 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u,

1177 };	1177 };

1178	1178

1179 // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version	1179 // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version

1180 void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,	1180 void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,

1181 int dst_width, int x, int dx) {	1181 int dst_width, int x, int dx) {

1182 intptr_t x0 = 0, x1 = 0;	1182 intptr_t x0, x1;

1183 asm volatile (	1183 asm volatile (

1184 "movdqa %0,%%xmm4 \n"	1184 "movdqa %0,%%xmm4 \n"

1185 "movdqa %1,%%xmm5 \n"	1185 "movdqa %1,%%xmm5 \n"

1186 :	1186 :

1187 : "m"(kShuffleColARGB), // %0	1187 : "m"(kShuffleColARGB), // %0

1188 "m"(kShuffleFractions) // %1	1188 "m"(kShuffleFractions) // %1

1189 );	1189 );

1190	1190

1191 asm volatile (	1191 asm volatile (

1192 "movd %5,%%xmm2 \n"	1192 "movd %5,%%xmm2 \n"

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1235 "pmaddubsw %%xmm2,%%xmm0 \n"	1235 "pmaddubsw %%xmm2,%%xmm0 \n"

1236 "psrlw $0x7,%%xmm0 \n"	1236 "psrlw $0x7,%%xmm0 \n"

1237 "packuswb %%xmm0,%%xmm0 \n"	1237 "packuswb %%xmm0,%%xmm0 \n"

1238 "movd %%xmm0," MEMACCESS(0) " \n"	1238 "movd %%xmm0," MEMACCESS(0) " \n"

1239	1239

1240 LABELALIGN	1240 LABELALIGN

1241 "99: \n"	1241 "99: \n"

1242 : "+r"(dst_argb), // %0	1242 : "+r"(dst_argb), // %0

1243 "+r"(src_argb), // %1	1243 "+r"(src_argb), // %1

1244 "+rm"(dst_width), // %2	1244 "+rm"(dst_width), // %2

1245 "+r"(x0), // %3	1245 "=&r"(x0), // %3

1246 "+r"(x1) // %4	1246 "=&r"(x1) // %4

1247 : "rm"(x), // %5	1247 : "rm"(x), // %5

1248 "rm"(dx) // %6	1248 "rm"(dx) // %6

1249 : "memory", "cc", NACL_R14	1249 : "memory", "cc", NACL_R14

1250 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"	1250 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"

1251 );	1251 );

1252 }	1252 }

1253	1253

1254 // Divide num by div and return as 16.16 fixed point result.	1254 // Divide num by div and return as 16.16 fixed point result.

1255 int FixedDiv_X86(int num, int div) {	1255 int FixedDiv_X86(int num, int div) {

1256 asm volatile (	1256 asm volatile (

(...skipping 26 matching lines...) Expand all Loading...
1283 );	1283 );

1284 return num;	1284 return num;

1285 }	1285 }

1286	1286

1287 #endif // defined(__x86_64__) \|\| defined(__i386__)	1287 #endif // defined(__x86_64__) \|\| defined(__i386__)

1288	1288

1289 #ifdef __cplusplus	1289 #ifdef __cplusplus

1290 } // extern "C"	1290 } // extern "C"

1291 } // namespace libyuv	1291 } // namespace libyuv

1292 #endif	1292 #endif

OLD	NEW

« no previous file with comments | « source/row_mips.cc ('k') | source/scale_neon.cc » ('j') | no next file with comments »