Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(299)

Side by Side Diff: source/scale_gcc.cc

Issue 1392803002: add box scaling avx2 optimization for gcc (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 585 matching lines...) Expand 10 before | Expand all | Expand 10 after
596 "sub $0x10,%2 \n" 596 "sub $0x10,%2 \n"
597 "jg 1b \n" 597 "jg 1b \n"
598 : "+r"(src_ptr), // %0 598 : "+r"(src_ptr), // %0
599 "+r"(dst_ptr), // %1 599 "+r"(dst_ptr), // %1
600 "+r"(src_width) // %2 600 "+r"(src_width) // %2
601 : 601 :
602 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" 602 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
603 ); 603 );
604 } 604 }
605 605
606
607 #ifdef HAS_SCALEADDROW_AVX2
608 // Reads 32 bytes and accumulates to 32 shorts at a time.
609 void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
610 asm volatile (
611 "vpxor %%xmm5,%%xmm5 \n"
612
613 LABELALIGN
614 "1: \n"
615 "vmovdqu " MEMACCESS(0) ",%%ymm3 \n"
616 "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 16
617 "vpermq $0xd8,%%ymm3,%%ymm3 \n"
618 "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n"
619 "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n"
620 "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n"
621 "vpaddusw " MEMACCESS2(0x10,1) ",%%ymm3,%%ymm1 \n"
622 "vmovdqu %%ymm0," MEMACCESS(1) " \n"
623 "vmovdqu %%ymm1," MEMACCESS2(0x10,1) " \n"
624 "lea " MEMLEA(0x40,1) ",%1 \n"
625 "sub $0x20,%2 \n"
626 "jg 1b \n"
627 "vzeroupper \n"
628 : "+r"(src_ptr), // %0
629 "+r"(dst_ptr), // %1
630 "+r"(src_width) // %2
631 :
632 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
633 );
634 }
635 #endif // HAS_SCALEADDROW_AVX2
636
606 // Bilinear column filtering. SSSE3 version. 637 // Bilinear column filtering. SSSE3 version.
607 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 638 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
608 int dst_width, int x, int dx) { 639 int dst_width, int x, int dx) {
609 intptr_t x0 = 0, x1 = 0, temp_pixel = 0; 640 intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
610 asm volatile ( 641 asm volatile (
611 "movd %6,%%xmm2 \n" 642 "movd %6,%%xmm2 \n"
612 "movd %7,%%xmm3 \n" 643 "movd %7,%%xmm3 \n"
613 "movl $0x04040000,%k2 \n" 644 "movl $0x04040000,%k2 \n"
614 "movd %k2,%%xmm5 \n" 645 "movd %k2,%%xmm5 \n"
615 "pcmpeqb %%xmm6,%%xmm6 \n" 646 "pcmpeqb %%xmm6,%%xmm6 \n"
(...skipping 449 matching lines...) Expand 10 before | Expand all | Expand 10 after
1065 ); 1096 );
1066 return num; 1097 return num;
1067 } 1098 }
1068 1099
1069 #endif // defined(__x86_64__) || defined(__i386__) 1100 #endif // defined(__x86_64__) || defined(__i386__)
1070 1101
1071 #ifdef __cplusplus 1102 #ifdef __cplusplus
1072 } // extern "C" 1103 } // extern "C"
1073 } // namespace libyuv 1104 } // namespace libyuv
1074 #endif 1105 #endif
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698