| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 555 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 566 "jg 1b \n" | 566 "jg 1b \n" |
| 567 : "+r"(src_ptr), // %0 | 567 : "+r"(src_ptr), // %0 |
| 568 "+r"(dst_ptr), // %1 | 568 "+r"(dst_ptr), // %1 |
| 569 "+r"(dst_width) // %2 | 569 "+r"(dst_width) // %2 |
| 570 : "r"((intptr_t)(src_stride)) // %3 | 570 : "r"((intptr_t)(src_stride)) // %3 |
| 571 : "memory", "cc", NACL_R14 | 571 : "memory", "cc", NACL_R14 |
| 572 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 572 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
| 573 ); | 573 ); |
| 574 } | 574 } |
| 575 | 575 |
| 576 // Reads 16xN bytes and produces 16 shorts at a time. |
| 576 void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | 577 void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, |
| 577 uint16* dst_ptr, int src_width, int src_height) { | 578 uint16* dst_ptr, int src_width, int src_height) { |
| 578 int tmp_height = 0; | 579 int tmp_height = 0; |
| 579 intptr_t tmp_src = 0; | 580 intptr_t tmp_src = 0; |
| 580 asm volatile ( | 581 asm volatile ( |
| 582 "mov %0,%3 \n" // row pointer |
| 583 "mov %5,%2 \n" // height |
| 584 "pxor %%xmm0,%%xmm0 \n" // clear accumulators |
| 585 "pxor %%xmm1,%%xmm1 \n" |
| 581 "pxor %%xmm4,%%xmm4 \n" | 586 "pxor %%xmm4,%%xmm4 \n" |
| 582 "sub $0x1,%5 \n" | |
| 583 | 587 |
| 584 LABELALIGN | 588 LABELALIGN |
| 585 "1: \n" | 589 "1: \n" |
| 586 "movdqu " MEMACCESS(0) ",%%xmm0 \n" | 590 "movdqu " MEMACCESS(3) ",%%xmm2 \n" |
| 587 "mov %0,%3 \n" | 591 "add %6,%3 \n" |
| 588 "add %6,%0 \n" | |
| 589 "movdqa %%xmm0,%%xmm1 \n" | |
| 590 "punpcklbw %%xmm4,%%xmm0 \n" | |
| 591 "punpckhbw %%xmm4,%%xmm1 \n" | |
| 592 "mov %5,%2 \n" | |
| 593 "test %2,%2 \n" | |
| 594 "je 3f \n" | |
| 595 | |
| 596 LABELALIGN | |
| 597 "2: \n" | |
| 598 "movdqu " MEMACCESS(0) ",%%xmm2 \n" | |
| 599 "add %6,%0 \n" | |
| 600 "movdqa %%xmm2,%%xmm3 \n" | 592 "movdqa %%xmm2,%%xmm3 \n" |
| 601 "punpcklbw %%xmm4,%%xmm2 \n" | 593 "punpcklbw %%xmm4,%%xmm2 \n" |
| 602 "punpckhbw %%xmm4,%%xmm3 \n" | 594 "punpckhbw %%xmm4,%%xmm3 \n" |
| 603 "paddusw %%xmm2,%%xmm0 \n" | 595 "paddusw %%xmm2,%%xmm0 \n" |
| 604 "paddusw %%xmm3,%%xmm1 \n" | 596 "paddusw %%xmm3,%%xmm1 \n" |
| 605 "sub $0x1,%2 \n" | 597 "sub $0x1,%2 \n" |
| 606 "jg 2b \n" | 598 "jg 1b \n" |
| 607 | 599 |
| 608 LABELALIGN | |
| 609 "3: \n" | |
| 610 "movdqu %%xmm0," MEMACCESS(1) " \n" | 600 "movdqu %%xmm0," MEMACCESS(1) " \n" |
| 611 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" | 601 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" |
| 612 "lea " MEMLEA(0x10,3) ",%0 \n" | |
| 613 "lea " MEMLEA(0x20,1) ",%1 \n" | 602 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 603 "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 |
| 604 "mov %0,%3 \n" // row pointer |
| 605 "mov %5,%2 \n" // height |
| 606 "pxor %%xmm0,%%xmm0 \n" // clear accumulators |
| 607 "pxor %%xmm1,%%xmm1 \n" |
| 614 "sub $0x10,%4 \n" | 608 "sub $0x10,%4 \n" |
| 615 "jg 1b \n" | 609 "jg 1b \n" |
| 616 : "+r"(src_ptr), // %0 | 610 : "+r"(src_ptr), // %0 |
| 617 "+r"(dst_ptr), // %1 | 611 "+r"(dst_ptr), // %1 |
| 618 "+r"(tmp_height), // %2 | 612 "+r"(tmp_height), // %2 |
| 619 "+r"(tmp_src), // %3 | 613 "+r"(tmp_src), // %3 |
| 620 "+r"(src_width), // %4 | 614 "+r"(src_width), // %4 |
| 621 "+rm"(src_height) // %5 | 615 "+rm"(src_height) // %5 |
| 622 : "rm"((intptr_t)(src_stride)) // %6 | 616 : "rm"((intptr_t)(src_stride)) // %6 |
| 623 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" | 617 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" |
| (...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 792 "+r"(dst_width) // %2 | 786 "+r"(dst_width) // %2 |
| 793 : "r"((intptr_t)(src_stride)) // %3 | 787 : "r"((intptr_t)(src_stride)) // %3 |
| 794 : "memory", "cc", NACL_R14 | 788 : "memory", "cc", NACL_R14 |
| 795 "xmm0", "xmm1", "xmm2", "xmm3" | 789 "xmm0", "xmm1", "xmm2", "xmm3" |
| 796 ); | 790 ); |
| 797 } | 791 } |
| 798 | 792 |
| 799 // Reads 4 pixels at a time. | 793 // Reads 4 pixels at a time. |
| 800 // Alignment requirement: dst_argb 16 byte aligned. | 794 // Alignment requirement: dst_argb 16 byte aligned. |
| 801 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, | 795 void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride, |
| 802 int src_stepx, | 796 int src_stepx, uint8* dst_argb, int dst_width) { |
| 803 uint8* dst_argb, int dst_width) { | |
| 804 intptr_t src_stepx_x4 = (intptr_t)(src_stepx); | 797 intptr_t src_stepx_x4 = (intptr_t)(src_stepx); |
| 805 intptr_t src_stepx_x12 = 0; | 798 intptr_t src_stepx_x12 = 0; |
| 806 asm volatile ( | 799 asm volatile ( |
| 807 "lea " MEMLEA3(0x00,1,4) ",%1 \n" | 800 "lea " MEMLEA3(0x00,1,4) ",%1 \n" |
| 808 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" | 801 "lea " MEMLEA4(0x00,1,1,2) ",%4 \n" |
| 809 LABELALIGN | 802 LABELALIGN |
| 810 "1: \n" | 803 "1: \n" |
| 811 "movd " MEMACCESS(0) ",%%xmm0 \n" | 804 "movd " MEMACCESS(0) ",%%xmm0 \n" |
| 812 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 | 805 MEMOPREG(movd,0x00,0,1,1,xmm1) // movd (%0,%1,1),%%xmm1 |
| 813 "punpckldq %%xmm1,%%xmm0 \n" | 806 "punpckldq %%xmm1,%%xmm0 \n" |
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1087 ); | 1080 ); |
| 1088 return num; | 1081 return num; |
| 1089 } | 1082 } |
| 1090 | 1083 |
| 1091 #endif // defined(__x86_64__) || defined(__i386__) | 1084 #endif // defined(__x86_64__) || defined(__i386__) |
| 1092 | 1085 |
| 1093 #ifdef __cplusplus | 1086 #ifdef __cplusplus |
| 1094 } // extern "C" | 1087 } // extern "C" |
| 1095 } // namespace libyuv | 1088 } // namespace libyuv |
| 1096 #endif | 1089 #endif |
| OLD | NEW |