Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(452)

Side by Side Diff: source/scale_gcc.cc

Issue 1513183004: use rounding in scaledown by 2 (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: corrected version to 1554 Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/scale_any.cc ('k') | source/scale_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
91 { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 }; 91 { 2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128 };
92 92
93 // Scaling values for boxes of 3x2 and 2x2 93 // Scaling values for boxes of 3x2 and 2x2
94 static uvec16 kScaleAb2 = 94 static uvec16 kScaleAb2 =
95 { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 }; 95 { 65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0 };
96 96
97 // GCC versions of row functions are verbatim conversions from Visual C. 97 // GCC versions of row functions are verbatim conversions from Visual C.
98 // Generated using gcc disassembly on Visual C object file: 98 // Generated using gcc disassembly on Visual C object file:
99 // objdump -D yuvscaler.obj >yuvscaler.txt 99 // objdump -D yuvscaler.obj >yuvscaler.txt
100 100
101 void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 101 void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
102 uint8* dst_ptr, int dst_width) { 102 uint8* dst_ptr, int dst_width) {
103 asm volatile ( 103 asm volatile (
104 LABELALIGN 104 LABELALIGN
105 "1: \n" 105 "1: \n"
106 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 106 "movdqu " MEMACCESS(0) ",%%xmm0 \n"
107 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 107 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
108 "lea " MEMLEA(0x20,0) ",%0 \n" 108 "lea " MEMLEA(0x20,0) ",%0 \n"
109 "psrlw $0x8,%%xmm0 \n" 109 "psrlw $0x8,%%xmm0 \n"
110 "psrlw $0x8,%%xmm1 \n" 110 "psrlw $0x8,%%xmm1 \n"
111 "packuswb %%xmm1,%%xmm0 \n" 111 "packuswb %%xmm1,%%xmm0 \n"
112 "movdqu %%xmm0," MEMACCESS(1) " \n" 112 "movdqu %%xmm0," MEMACCESS(1) " \n"
113 "lea " MEMLEA(0x10,1) ",%1 \n" 113 "lea " MEMLEA(0x10,1) ",%1 \n"
114 "sub $0x10,%2 \n" 114 "sub $0x10,%2 \n"
115 "jg 1b \n" 115 "jg 1b \n"
116 : "+r"(src_ptr), // %0 116 : "+r"(src_ptr), // %0
117 "+r"(dst_ptr), // %1 117 "+r"(dst_ptr), // %1
118 "+r"(dst_width) // %2 118 "+r"(dst_width) // %2
119 :: "memory", "cc", "xmm0", "xmm1" 119 :: "memory", "cc", "xmm0", "xmm1"
120 ); 120 );
121 } 121 }
122 122
123 void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 123 void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
124 uint8* dst_ptr, int dst_width) { 124 uint8* dst_ptr, int dst_width) {
125 asm volatile ( 125 asm volatile (
126 "pcmpeqb %%xmm5,%%xmm5 \n" 126 "pcmpeqb %%xmm4,%%xmm4 \n"
127 "psrlw $0x8,%%xmm5 \n" 127 "psrlw $0xf,%%xmm4 \n"
128 "packuswb %%xmm4,%%xmm4 \n"
129 "pxor %%xmm5,%%xmm5 \n"
128 130
129 LABELALIGN 131 LABELALIGN
130 "1: \n" 132 "1: \n"
131 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 133 "movdqu " MEMACCESS(0) ",%%xmm0 \n"
132 "movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n" 134 "movdqu " MEMACCESS2(0x10, 0) ",%%xmm1 \n"
133 "lea " MEMLEA(0x20,0) ",%0 \n" 135 "lea " MEMLEA(0x20,0) ",%0 \n"
134 "movdqa %%xmm0,%%xmm2 \n" 136 "pmaddubsw %%xmm4,%%xmm0 \n"
135 "psrlw $0x8,%%xmm0 \n" 137 "pmaddubsw %%xmm4,%%xmm1 \n"
136 "movdqa %%xmm1,%%xmm3 \n" 138 "pavgw %%xmm5,%%xmm0 \n"
137 "psrlw $0x8,%%xmm1 \n" 139 "pavgw %%xmm5,%%xmm1 \n"
138 "pand %%xmm5,%%xmm2 \n" 140 "packuswb %%xmm1,%%xmm0 \n"
139 "pand %%xmm5,%%xmm3 \n"
140 "pavgw %%xmm2,%%xmm0 \n"
141 "pavgw %%xmm3,%%xmm1 \n"
142 "packuswb %%xmm1,%%xmm0 \n"
143 "movdqu %%xmm0," MEMACCESS(1) " \n" 141 "movdqu %%xmm0," MEMACCESS(1) " \n"
144 "lea " MEMLEA(0x10,1) ",%1 \n" 142 "lea " MEMLEA(0x10,1) ",%1 \n"
145 "sub $0x10,%2 \n" 143 "sub $0x10,%2 \n"
146 "jg 1b \n" 144 "jg 1b \n"
147 : "+r"(src_ptr), // %0 145 : "+r"(src_ptr), // %0
148 "+r"(dst_ptr), // %1 146 "+r"(dst_ptr), // %1
149 "+r"(dst_width) // %2 147 "+r"(dst_width) // %2
150 :: "memory", "cc", "xmm0", "xmm1", "xmm5" 148 :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
151 ); 149 );
152 } 150 }
153 151
154 void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 152 void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
155 uint8* dst_ptr, int dst_width) { 153 uint8* dst_ptr, int dst_width) {
156 asm volatile ( 154 asm volatile (
157 "pcmpeqb %%xmm5,%%xmm5 \n" 155 "pcmpeqb %%xmm4,%%xmm4 \n"
158 "psrlw $0x8,%%xmm5 \n" 156 "psrlw $0xf,%%xmm4 \n"
157 "packuswb %%xmm4,%%xmm4 \n"
158 "pxor %%xmm5,%%xmm5 \n"
159 159
160 LABELALIGN 160 LABELALIGN
161 "1: \n" 161 "1: \n"
162 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 162 "movdqu " MEMACCESS(0) ",%%xmm0 \n"
163 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" 163 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n"
164 MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2 164 MEMOPREG(movdqu,0x00,0,3,1,xmm2) // movdqu (%0,%3,1),%%xmm2
165 MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3 165 MEMOPREG(movdqu,0x10,0,3,1,xmm3) // movdqu 0x10(%0,%3,1),%%xmm3
166 "lea " MEMLEA(0x20,0) ",%0 \n" 166 "lea " MEMLEA(0x20,0) ",%0 \n"
167 "pavgb %%xmm2,%%xmm0 \n" 167 "pmaddubsw %%xmm4,%%xmm0 \n"
168 "pavgb %%xmm3,%%xmm1 \n" 168 "pmaddubsw %%xmm4,%%xmm1 \n"
169 "movdqa %%xmm0,%%xmm2 \n" 169 "pmaddubsw %%xmm4,%%xmm2 \n"
170 "psrlw $0x8,%%xmm0 \n" 170 "pmaddubsw %%xmm4,%%xmm3 \n"
171 "movdqa %%xmm1,%%xmm3 \n" 171 "paddw %%xmm2,%%xmm0 \n"
172 "psrlw $0x8,%%xmm1 \n" 172 "paddw %%xmm3,%%xmm1 \n"
173 "pand %%xmm5,%%xmm2 \n" 173 "psrlw $0x1,%%xmm0 \n"
174 "pand %%xmm5,%%xmm3 \n" 174 "psrlw $0x1,%%xmm1 \n"
175 "pavgw %%xmm2,%%xmm0 \n" 175 "pavgw %%xmm5,%%xmm0 \n"
176 "pavgw %%xmm3,%%xmm1 \n" 176 "pavgw %%xmm5,%%xmm1 \n"
177 "packuswb %%xmm1,%%xmm0 \n" 177 "packuswb %%xmm1,%%xmm0 \n"
178 "movdqu %%xmm0," MEMACCESS(1) " \n" 178 "movdqu %%xmm0," MEMACCESS(1) " \n"
179 "lea " MEMLEA(0x10,1) ",%1 \n" 179 "lea " MEMLEA(0x10,1) ",%1 \n"
180 "sub $0x10,%2 \n" 180 "sub $0x10,%2 \n"
181 "jg 1b \n" 181 "jg 1b \n"
182 : "+r"(src_ptr), // %0 182 : "+r"(src_ptr), // %0
183 "+r"(dst_ptr), // %1 183 "+r"(dst_ptr), // %1
184 "+r"(dst_width) // %2 184 "+r"(dst_width) // %2
185 : "r"((intptr_t)(src_stride)) // %3 185 : "r"((intptr_t)(src_stride)) // %3
186 : "memory", "cc", NACL_R14 186 : "memory", "cc", NACL_R14
187 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 187 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
(...skipping 909 matching lines...) Expand 10 before | Expand all | Expand 10 after
1097 ); 1097 );
1098 return num; 1098 return num;
1099 } 1099 }
1100 1100
1101 #endif // defined(__x86_64__) || defined(__i386__) 1101 #endif // defined(__x86_64__) || defined(__i386__)
1102 1102
1103 #ifdef __cplusplus 1103 #ifdef __cplusplus
1104 } // extern "C" 1104 } // extern "C"
1105 } // namespace libyuv 1105 } // namespace libyuv
1106 #endif 1106 #endif
OLDNEW
« no previous file with comments | « source/scale_any.cc ('k') | source/scale_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698