OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 557 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
568 : "+r"(src_ptr), // %0 | 568 : "+r"(src_ptr), // %0 |
569 "+r"(dst_ptr), // %1 | 569 "+r"(dst_ptr), // %1 |
570 "+r"(dst_width) // %2 | 570 "+r"(dst_width) // %2 |
571 : "r"((intptr_t)(src_stride)) // %3 | 571 : "r"((intptr_t)(src_stride)) // %3 |
572 : "memory", "cc", NACL_R14 | 572 : "memory", "cc", NACL_R14 |
573 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" | 573 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" |
574 ); | 574 ); |
575 } | 575 } |
576 | 576 |
577 // Reads 16xN bytes and produces 16 shorts at a time. | 577 // Reads 16xN bytes and produces 16 shorts at a time. |
578 void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride, | 578 void ScaleAddRow_SSE2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { |
579 uint16* dst_ptr, int src_width, int src_height) { | |
580 int tmp_height = 0; | |
581 intptr_t tmp_src = 0; | |
582 asm volatile ( | 579 asm volatile ( |
583 "mov %0,%3 \n" // row pointer | 580 "pxor %%xmm5,%%xmm5 \n" |
584 "mov %5,%2 \n" // height | |
585 "pxor %%xmm0,%%xmm0 \n" // clear accumulators | |
586 "pxor %%xmm1,%%xmm1 \n" | |
587 "pxor %%xmm4,%%xmm4 \n" | |
588 | 581 |
589 LABELALIGN | 582 LABELALIGN |
590 "1: \n" | 583 "1: \n" |
591 "movdqu " MEMACCESS(3) ",%%xmm2 \n" | 584 "movdqu " MEMACCESS(0) ",%%xmm3 \n" |
592 "add %6,%3 \n" | 585 "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 |
593 "movdqa %%xmm2,%%xmm3 \n" | 586 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
594 "punpcklbw %%xmm4,%%xmm2 \n" | 587 "movdqu " MEMACCESS2(0x10,1) ",%%xmm1 \n" |
595 "punpckhbw %%xmm4,%%xmm3 \n" | 588 "movdqa %%xmm3,%%xmm2 \n" |
| 589 "punpcklbw %%xmm5,%%xmm2 \n" |
| 590 "punpckhbw %%xmm5,%%xmm3 \n" |
596 "paddusw %%xmm2,%%xmm0 \n" | 591 "paddusw %%xmm2,%%xmm0 \n" |
597 "paddusw %%xmm3,%%xmm1 \n" | 592 "paddusw %%xmm3,%%xmm1 \n" |
598 "sub $0x1,%2 \n" | |
599 "jg 1b \n" | |
600 | |
601 "movdqu %%xmm0," MEMACCESS(1) " \n" | 593 "movdqu %%xmm0," MEMACCESS(1) " \n" |
602 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" | 594 "movdqu %%xmm1," MEMACCESS2(0x10,1) " \n" |
603 "lea " MEMLEA(0x20,1) ",%1 \n" | 595 "lea " MEMLEA(0x20,1) ",%1 \n" |
604 "lea " MEMLEA(0x10,0) ",%0 \n" // src_ptr += 16 | 596 "sub $0x10,%2 \n" |
605 "mov %0,%3 \n" // row pointer | |
606 "mov %5,%2 \n" // height | |
607 "pxor %%xmm0,%%xmm0 \n" // clear accumulators | |
608 "pxor %%xmm1,%%xmm1 \n" | |
609 "sub $0x10,%4 \n" | |
610 "jg 1b \n" | 597 "jg 1b \n" |
611 : "+r"(src_ptr), // %0 | 598 : "+r"(src_ptr), // %0 |
612 "+r"(dst_ptr), // %1 | 599 "+r"(dst_ptr), // %1 |
613 "+r"(tmp_height), // %2 | 600 "+r"(src_width) // %2 |
614 "+r"(tmp_src), // %3 | 601 : |
615 "+r"(src_width), // %4 | 602 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" |
616 "+rm"(src_height) // %5 | |
617 : "rm"((intptr_t)(src_stride)) // %6 | |
618 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4" | |
619 ); | 603 ); |
620 } | 604 } |
621 | 605 |
622 // Bilinear column filtering. SSSE3 version. | 606 // Bilinear column filtering. SSSE3 version. |
623 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | 607 void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr, |
624 int dst_width, int x, int dx) { | 608 int dst_width, int x, int dx) { |
625 intptr_t x0 = 0, x1 = 0, temp_pixel = 0; | 609 intptr_t x0 = 0, x1 = 0, temp_pixel = 0; |
626 asm volatile ( | 610 asm volatile ( |
627 "movd %6,%%xmm2 \n" | 611 "movd %6,%%xmm2 \n" |
628 "movd %7,%%xmm3 \n" | 612 "movd %7,%%xmm3 \n" |
(...skipping 452 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1081 ); | 1065 ); |
1082 return num; | 1066 return num; |
1083 } | 1067 } |
1084 | 1068 |
1085 #endif // defined(__x86_64__) || defined(__i386__) | 1069 #endif // defined(__x86_64__) || defined(__i386__) |
1086 | 1070 |
1087 #ifdef __cplusplus | 1071 #ifdef __cplusplus |
1088 } // extern "C" | 1072 } // extern "C" |
1089 } // namespace libyuv | 1073 } // namespace libyuv |
1090 #endif | 1074 #endif |
OLD | NEW |