Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Side by Side Diff: source/libvpx/third_party/libyuv/source/scale_gcc.cc

Issue 1302353004: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 555 matching lines...) Expand 10 before | Expand all | Expand 10 after
566 "jg 1b \n" 566 "jg 1b \n"
567 : "+r"(src_ptr), // %0 567 : "+r"(src_ptr), // %0
568 "+r"(dst_ptr), // %1 568 "+r"(dst_ptr), // %1
569 "+r"(dst_width) // %2 569 "+r"(dst_width) // %2
570 : "r"((intptr_t)(src_stride)) // %3 570 : "r"((intptr_t)(src_stride)) // %3
571 : "memory", "cc", NACL_R14 571 : "memory", "cc", NACL_R14
572 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 572 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
573 ); 573 );
574 } 574 }
575 575
576 // Reads 16xN bytes and produces 16 shorts at a time.
576 void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, 577 void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
577 uint16* dst_ptr, int src_width, int src_height) { 578 uint16* dst_ptr, int src_width, int src_height) {
578 int tmp_height = 0; 579 int tmp_height = 0;
579 intptr_t tmp_src = 0; 580 intptr_t tmp_src = 0;
580 asm volatile ( 581 asm volatile (
582 "mov %0,%3 \n" // row pointer
583 "mov %5,%2 \n" // height
584 "pxor %%xmm0,%%xmm0 \n" // clear accumulators
585 "pxor %%xmm1,%%xmm1 \n"
581 "pxor %%xmm4,%%xmm4 \n" 586 "pxor %%xmm4,%%xmm4 \n"
582 "sub $0x1,%5 \n"
583 587
584 LABELALIGN 588 LABELALIGN
585 "1: \n" 589 "1: \n"
586 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 590 "movdqu " MEMACCESS(3) ",%%xmm2 \n"
587 "mov %0,%3 \n" 591 "add %6,%3 \n"
588 "add %6,%0 \n"
589 "movdqa %%xmm0,%%xmm1 \n"
590 "punpcklbw %%xmm4,%%xmm0 \n"
591 "punpckhbw %%xmm4,%%xmm1 \n"
592 "mov %5,%2 \n"
593 "test %2,%2 \n"
594 "je 3f \n"
595
596 LABELALIGN
597 "2: \n"
598 "movdqu " MEMACCESS(0) ",%%xmm2 \n"
599 "add %6,%0 \n"
600 "movdqa %%xmm2,%%xmm3 \n" 592 "movdqa %%xmm2,%%xmm3 \n"
601 "punpcklbw %%xmm4,%%xmm2 \n" 593 "punpcklbw %%xmm4,%%xmm2 \n"
602 "punpckhbw %%xmm4,%%xmm3 \n" 594 "punpckhbw %%xmm4,%%xmm3 \n"
603 "paddusw %%xmm2,%%xmm0 \n" 595 "paddusw %%xmm2,%%xmm0 \n"
604 "paddusw %%xmm3,%%xmm1 \n" 596 "paddusw %%xmm3,%%xmm1 \n"
605 "sub $0x1,%2 \n" 597 "sub $0x1,%2 \n"
606 "jg 2b \n" 598 "jg 1b \n"
607 599
608 LABELALIGN
609 "3: \n"
610 "movdqu %%xmm0," MEMACCESS(1) " \n" 600 "movdqu %%xmm0," MEMACCESS(1) " \n"
611 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" 601 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n"
612 "lea " MEMLEA(0x10,3) ",%0 \n"
613 "lea " MEMLEA(0x20,1) ",%1 \n" 602 "lea " MEMLEA(0x20,1) ",%1 \n"
603 "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16
604 "mov %0,%3 \n" // row pointer
605 "mov %5,%2 \n" // height
606 "pxor %%xmm0,%%xmm0 \n" // clear accumulators
607 "pxor %%xmm1,%%xmm1 \n"
614 "sub $0x10,%4 \n" 608 "sub $0x10,%4 \n"
615 "jg 1b \n" 609 "jg 1b \n"
616 : "+r"(src_ptr), // %0 610 : "+r"(src_ptr), // %0
617 "+r"(dst_ptr), // %1 611 "+r"(dst_ptr), // %1
618 "+r"(tmp_height), // %2 612 "+r"(tmp_height), // %2
619 "+r"(tmp_src), // %3 613 "+r"(tmp_src), // %3
620 "+r"(src_width), // %4 614 "+r"(src_width), // %4
621 "+rm"(src_height) // %5 615 "+rm"(src_height) // %5
622 : "rm"((intptr_t)(src_stride)) // %6 616 : "rm"((intptr_t)(src_stride)) // %6
623 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" 617 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
792 "+r"(dst_width) // %2 786 "+r"(dst_width) // %2
793 : "r"((intptr_t)(src_stride)) // %3 787 : "r"((intptr_t)(src_stride)) // %3
794 : "memory", "cc", NACL_R14 788 : "memory", "cc", NACL_R14
795 "xmm0", "xmm1", "xmm2", "xmm3" 789 "xmm0", "xmm1", "xmm2", "xmm3"
796 ); 790 );
797 } 791 }
798 792
799 // Reads 4 pixels at a time. 793 // Reads 4 pixels at a time.
800 // Alignment requirement: dst_argb 16 byte aligned. 794 // Alignment requirement: dst_argb 16 byte aligned.
801 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, 795 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
802 int src_stepx, 796 int src_stepx, uint8* dst_argb, int dst_width) {
803 uint8* dst_argb, int dst_width) {
804 intptr_t src_stepx_x4 = (intptr_t)(src_stepx); 797 intptr_t src_stepx_x4 = (intptr_t)(src_stepx);
805 intptr_t src_stepx_x12 = 0; 798 intptr_t src_stepx_x12 = 0;
806 asm volatile ( 799 asm volatile (
807 "lea " MEMLEA3(0x00,1,4) ",%1 \n" 800 "lea " MEMLEA3(0x00,1,4) ",%1 \n"
808 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" 801 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n"
809 LABELALIGN 802 LABELALIGN
810 "1: \n" 803 "1: \n"
811 "movd " MEMACCESS(0) ",%%xmm0 \n" 804 "movd " MEMACCESS(0) ",%%xmm0 \n"
812 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 805 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1
813 "punpckldq %%xmm1,%%xmm0 \n" 806 "punpckldq %%xmm1,%%xmm0 \n"
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
1087 ); 1080 );
1088 return num; 1081 return num;
1089 } 1082 }
1090 1083
1091 #endif // defined(__x86_64__) || defined(__i386__) 1084 #endif // defined(__x86_64__) || defined(__i386__)
1092 1085
1093 #ifdef __cplusplus 1086 #ifdef __cplusplus
1094 } // extern "C" 1087 } // extern "C"
1095 } // namespace libyuv 1088 } // namespace libyuv
1096 #endif 1089 #endif
OLDNEW
« no previous file with comments | « source/libvpx/third_party/libyuv/source/scale_common.cc ('k') | source/libvpx/third_party/libyuv/source/scale_neon.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698