Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(379)

Side by Side Diff: source/scale_gcc.cc

Issue 1520423003: avx2 scale down by 2 for gcc (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: merge with head Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | unit_test/scale_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 170 matching lines...) Expand 10 before | Expand all | Expand 10 after
181 "jg 1b \n" 181 "jg 1b \n"
182 : "+r"(src_ptr), // %0 182 : "+r"(src_ptr), // %0
183 "+r"(dst_ptr), // %1 183 "+r"(dst_ptr), // %1
184 "+r"(dst_width) // %2 184 "+r"(dst_width) // %2
185 : "r"((intptr_t)(src_stride)) // %3 185 : "r"((intptr_t)(src_stride)) // %3
186 : "memory", "cc", NACL_R14 186 : "memory", "cc", NACL_R14
187 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 187 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
188 ); 188 );
189 } 189 }
190 190
191 #ifdef HAS_SCALEROWDOWN2_AVX2
192 void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
193 uint8* dst_ptr, int dst_width) {
194 asm volatile (
195 LABELALIGN
196 "1: \n"
197 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
198 "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
199 "lea " MEMLEA(0x40,0) ",%0 \n"
200 "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
201 "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
202 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
203 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
204 "vmovdqu %%ymm0," MEMACCESS(1) " \n"
205 "lea " MEMLEA(0x20,1) ",%1 \n"
206 "sub $0x20,%2 \n"
207 "jg 1b \n"
208 "vzeroupper \n"
209 : "+r"(src_ptr), // %0
210 "+r"(dst_ptr), // %1
211 "+r"(dst_width) // %2
212 :: "memory", "cc", "xmm0", "xmm1"
213 );
214 }
215
216 void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
217 uint8* dst_ptr, int dst_width) {
218 asm volatile (
219 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
220 "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
221 "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
222 "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
223
224 LABELALIGN
225 "1: \n"
226 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
227 "vmovdqu " MEMACCESS2(0x20, 0) ",%%ymm1 \n"
228 "lea " MEMLEA(0x40,0) ",%0 \n"
229 "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
230 "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
231 "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
232 "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
233 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
234 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
235 "vmovdqu %%ymm0," MEMACCESS(1) " \n"
236 "lea " MEMLEA(0x20,1) ",%1 \n"
237 "sub $0x20,%2 \n"
238 "jg 1b \n"
239 "vzeroupper \n"
240 : "+r"(src_ptr), // %0
241 "+r"(dst_ptr), // %1
242 "+r"(dst_width) // %2
243 :: "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
244 );
245 }
246
247 void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
248 uint8* dst_ptr, int dst_width) {
249 asm volatile (
250 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
251 "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
252 "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
253 "vpxor %%ymm5,%%ymm5,%%ymm5 \n"
254
255 LABELALIGN
256 "1: \n"
257 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
258 "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
259 MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
260 MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
261 "lea " MEMLEA(0x40,0) ",%0 \n"
262 "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
263 "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
264 "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
265 "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
266 "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
267 "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
268 "vpsrlw $0x1,%%ymm0,%%ymm0 \n"
269 "vpsrlw $0x1,%%ymm1,%%ymm1 \n"
270 "vpavgw %%ymm5,%%ymm0,%%ymm0 \n"
271 "vpavgw %%ymm5,%%ymm1,%%ymm1 \n"
272 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
273 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
274 "vmovdqu %%ymm0," MEMACCESS(1) " \n"
275 "lea " MEMLEA(0x20,1) ",%1 \n"
276 "sub $0x20,%2 \n"
277 "jg 1b \n"
278 "vzeroupper \n"
279 : "+r"(src_ptr), // %0
280 "+r"(dst_ptr), // %1
281 "+r"(dst_width) // %2
282 : "r"((intptr_t)(src_stride)) // %3
283 : "memory", "cc", NACL_R14
284 "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
285 );
286 }
287 #endif // HAS_SCALEROWDOWN2_AVX2
288
191 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 289 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
192 uint8* dst_ptr, int dst_width) { 290 uint8* dst_ptr, int dst_width) {
193 asm volatile ( 291 asm volatile (
194 "pcmpeqb %%xmm5,%%xmm5 \n" 292 "pcmpeqb %%xmm5,%%xmm5 \n"
195 "psrld $0x18,%%xmm5 \n" 293 "psrld $0x18,%%xmm5 \n"
196 "pslld $0x10,%%xmm5 \n" 294 "pslld $0x10,%%xmm5 \n"
197 295
198 LABELALIGN 296 LABELALIGN
199 "1: \n" 297 "1: \n"
200 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 298 "movdqu " MEMACCESS(0) ",%%xmm0 \n"
(...skipping 896 matching lines...) Expand 10 before | Expand all | Expand 10 after
1097 ); 1195 );
1098 return num; 1196 return num;
1099 } 1197 }
1100 1198
1101 #endif // defined(__x86_64__) || defined(__i386__) 1199 #endif // defined(__x86_64__) || defined(__i386__)
1102 1200
1103 #ifdef __cplusplus 1201 #ifdef __cplusplus
1104 } // extern "C" 1202 } // extern "C"
1105 } // namespace libyuv 1203 } // namespace libyuv
1106 #endif 1204 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | unit_test/scale_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698