Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(965)

Side by Side Diff: source/scale_gcc.cc

Issue 1546763002: port scaledownby4_avx2 to gcc (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: bump version to r1561 Created 4 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 350 matching lines...) Expand 10 before | Expand all | Expand 10 after
361 : "+r"(src_ptr), // %0 361 : "+r"(src_ptr), // %0
362 "+r"(dst_ptr), // %1 362 "+r"(dst_ptr), // %1
363 "+r"(dst_width), // %2 363 "+r"(dst_width), // %2
364 "+r"(stridex3) // %3 364 "+r"(stridex3) // %3
365 : "r"((intptr_t)(src_stride)) // %4 365 : "r"((intptr_t)(src_stride)) // %4
366 : "memory", "cc", NACL_R14 366 : "memory", "cc", NACL_R14
367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 367 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
368 ); 368 );
369 } 369 }
370 370
371
372 #ifdef HAS_SCALEROWDOWN4_AVX2
373 void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
374 uint8* dst_ptr, int dst_width) {
375 asm volatile (
376 "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
377 "vpsrld $0x18,%%ymm5,%%ymm5 \n"
378 "vpslld $0x10,%%ymm5,%%ymm5 \n"
379 LABELALIGN
380 "1: \n"
381 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
382 "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
383 "lea " MEMLEA(0x40,0) ",%0 \n"
384 "vpand %%ymm5,%%ymm0,%%ymm0 \n"
385 "vpand %%ymm5,%%ymm1,%%ymm1 \n"
386 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
387 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
388 "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
389 "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
390 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
391 "vmovdqu %%xmm0," MEMACCESS(1) " \n"
392 "lea " MEMLEA(0x10,1) ",%1 \n"
393 "sub $0x10,%2 \n"
394 "jg 1b \n"
395 "vzeroupper \n"
396 : "+r"(src_ptr), // %0
397 "+r"(dst_ptr), // %1
398 "+r"(dst_width) // %2
399 :: "memory", "cc", "xmm0", "xmm1", "xmm5"
400 );
401 }
402
403 void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
404 uint8* dst_ptr, int dst_width) {
405 asm volatile (
406 "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n"
407 "vpsrlw $0xf,%%ymm4,%%ymm4 \n"
408 "vpsllw $0x3,%%ymm4,%%ymm5 \n"
409 "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n"
410
411 LABELALIGN
412 "1: \n"
413 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n"
414 "vmovdqu " MEMACCESS2(0x20,0) ",%%ymm1 \n"
415 MEMOPREG(vmovdqu,0x00,0,3,1,ymm2) // vmovdqu (%0,%3,1),%%ymm2
416 MEMOPREG(vmovdqu,0x20,0,3,1,ymm3) // vmovdqu 0x20(%0,%3,1),%%ymm3
417 "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n"
418 "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n"
419 "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
420 "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
421 "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
422 "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
423 MEMOPREG(vmovdqu,0x00,0,3,2,ymm2) // vmovdqu (%0,%3,2),%%ymm2
424 MEMOPREG(vmovdqu,0x20,0,3,2,ymm3) // vmovdqu 0x20(%0,%3,2),%%ymm3
425 "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
426 "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
427 "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
428 "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
429 MEMOPREG(vmovdqu,0x00,0,4,1,ymm2) // vmovdqu (%0,%4,1),%%ymm2
430 MEMOPREG(vmovdqu,0x20,0,4,1,ymm3) // vmovdqu 0x20(%0,%4,1),%%ymm3
431 "lea " MEMLEA(0x40,0) ",%0 \n"
432 "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n"
433 "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n"
434 "vpaddw %%ymm2,%%ymm0,%%ymm0 \n"
435 "vpaddw %%ymm3,%%ymm1,%%ymm1 \n"
436 "vphaddw %%ymm1,%%ymm0,%%ymm0 \n"
437 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
438 "vpaddw %%ymm5,%%ymm0,%%ymm0 \n"
439 "vpsrlw $0x4,%%ymm0,%%ymm0 \n"
440 "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n"
441 "vpermq $0xd8,%%ymm0,%%ymm0 \n"
442 "vmovdqu %%xmm0," MEMACCESS(1) " \n"
443 "lea " MEMLEA(0x10,1) ",%1 \n"
444 "sub $0x10,%2 \n"
445 "jg 1b \n"
446 "vzeroupper \n"
447 : "+r"(src_ptr), // %0
448 "+r"(dst_ptr), // %1
449 "+r"(dst_width) // %2
450 : "r"((intptr_t)(src_stride)), // %3
451 "r"((intptr_t)(src_stride * 3)) // %4
452 : "memory", "cc", NACL_R14
453 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
454 );
455 }
456 #endif // HAS_SCALEROWDOWN4_AVX2
457
371 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride, 458 void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
372 uint8* dst_ptr, int dst_width) { 459 uint8* dst_ptr, int dst_width) {
373 asm volatile ( 460 asm volatile (
374 "movdqa %0,%%xmm3 \n" 461 "movdqa %0,%%xmm3 \n"
375 "movdqa %1,%%xmm4 \n" 462 "movdqa %1,%%xmm4 \n"
376 "movdqa %2,%%xmm5 \n" 463 "movdqa %2,%%xmm5 \n"
377 : 464 :
378 : "m"(kShuf0), // %0 465 : "m"(kShuf0), // %0
379 "m"(kShuf1), // %1 466 "m"(kShuf1), // %1
380 "m"(kShuf2) // %2 467 "m"(kShuf2) // %2
(...skipping 815 matching lines...) Expand 10 before | Expand all | Expand 10 after
1196 ); 1283 );
1197 return num; 1284 return num;
1198 } 1285 }
1199 1286
1200 #endif // defined(__x86_64__) || defined(__i386__) 1287 #endif // defined(__x86_64__) || defined(__i386__)
1201 1288
1202 #ifdef __cplusplus 1289 #ifdef __cplusplus
1203 } // extern "C" 1290 } // extern "C"
1204 } // namespace libyuv 1291 } // namespace libyuv
1205 #endif 1292 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698