Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(385)

Side by Side Diff: source/scale_gcc.cc

Issue 1393653002: port scale box filter sse2 to gcc (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 557 matching lines...) Expand 10 before | Expand all | Expand 10 after
568 : "+r"(src_ptr), // %0 568 : "+r"(src_ptr), // %0
569 "+r"(dst_ptr), // %1 569 "+r"(dst_ptr), // %1
570 "+r"(dst_width) // %2 570 "+r"(dst_width) // %2
571 : "r"((intptr_t)(src_stride)) // %3 571 : "r"((intptr_t)(src_stride)) // %3
572 : "memory", "cc", NACL_R14 572 : "memory", "cc", NACL_R14
573 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 573 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
574 ); 574 );
575 } 575 }
576 576
577 // Reads 16xN bytes and produces 16 shorts at a time. 577 // Reads 16xN bytes and produces 16 shorts at a time.
578 void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 578 void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) {
579 uint16* dst_ptr, int src_width, int src_height) {
580 int tmp_height = 0;
581 intptr_t tmp_src = 0;
582 asm volatile ( 579 asm volatile (
583 "mov %0,%3 \n" // row pointer 580 "pxor %%xmm5,%%xmm5 \n"
584 "mov %5,%2 \n" // height
585 "pxor %%xmm0,%%xmm0 \n" // clear accumulators
586 "pxor %%xmm1,%%xmm1 \n"
587 "pxor %%xmm4,%%xmm4 \n"
588 581
589 LABELALIGN 582 LABELALIGN
590 "1: \n" 583 "1: \n"
591 "movdqu " MEMACCESS(3) ",%%xmm2 \n" 584 "movdqu " MEMACCESS(0) ",%%xmm3 \n"
592 "add %6,%3 \n" 585 "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
593 "movdqa %%xmm2,%%xmm3 \n" 586 "movdqu " MEMACCESS(1) ",%%xmm0 \n"
594 "punpcklbw %%xmm4,%%xmm2 \n" 587 "movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n"
595 "punpckhbw %%xmm4,%%xmm3 \n" 588 "movdqa %%xmm3,%%xmm2 \n"
589 "punpcklbw %%xmm5,%%xmm2 \n"
590 "punpckhbw %%xmm5,%%xmm3 \n"
596 "paddusw %%xmm2,%%xmm0 \n" 591 "paddusw %%xmm2,%%xmm0 \n"
597 "paddusw %%xmm3,%%xmm1 \n" 592 "paddusw %%xmm3,%%xmm1 \n"
598 "sub $0x1,%2 \n"
599 "jg 1b \n"
600
601 "movdqu %%xmm0," MEMACCESS(1) " \n" 593 "movdqu %%xmm0," MEMACCESS(1) " \n"
602 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" 594 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
603 "lea " MEMLEA(0x20,1) ",%1 \n" 595 "lea " MEMLEA(0x20,1) ",%1 \n"
604 "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 596 "sub $0x10,%2 \n"
605 "mov %0,%3 \n" // row pointer
606 "mov %5,%2 \n" // height
607 "pxor %%xmm0,%%xmm0 \n" // clear accumulators
608 "pxor %%xmm1,%%xmm1 \n"
609 "sub $0x10,%4 \n"
610 "jg 1b \n" 597 "jg 1b \n"
611 : "+r"(src_ptr), // %0 598 : "+r"(src_ptr), // %0
612 "+r"(dst_ptr), // %1 599 "+r"(dst_ptr), // %1
613 "+r"(tmp_height), // %2 600 "+r"(src_width) // %2
614 "+r"(tmp_src), // %3 601 :
615 "+r"(src_width), // %4 602 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
616 "+rm"(src_height) // %5
617 : "rm"((intptr_t)(src_stride)) // %6
618 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
619 ); 603 );
620 } 604 }
621 605
622 // Bilinear column filtering. SSSE3 version. 606 // Bilinear column filtering. SSSE3 version.
623 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 607 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
624 int dst_width, int x, int dx) { 608 int dst_width, int x, int dx) {
625 intptr_t x0 = 0, x1 = 0, temp_pixel = 0; 609 intptr_t x0 = 0, x1 = 0, temp_pixel = 0;
626 asm volatile ( 610 asm volatile (
627 "movd %6,%%xmm2 \n" 611 "movd %6,%%xmm2 \n"
628 "movd %7,%%xmm3 \n" 612 "movd %7,%%xmm3 \n"
(...skipping 452 matching lines...) Expand 10 before | Expand all | Expand 10 after
1081 ); 1065 );
1082 return num; 1066 return num;
1083 } 1067 }
1084 1068
1085 #endif // defined(__x86_64__) || defined(__i386__) 1069 #endif // defined(__x86_64__) || defined(__i386__)
1086 1070
1087 #ifdef __cplusplus 1071 #ifdef __cplusplus
1088 } // extern "C" 1072 } // extern "C"
1089 } // namespace libyuv 1073 } // namespace libyuv
1090 #endif 1074 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698