| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2013 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| 11 #include "libyuv/row.h" | 11 #include "libyuv/row.h" |
| 12 #include "libyuv/scale_row.h" |
| 12 | 13 |
| 13 #ifdef __cplusplus | 14 #ifdef __cplusplus |
| 14 namespace libyuv { | 15 namespace libyuv { |
| 15 extern "C" { | 16 extern "C" { |
| 16 #endif | 17 #endif |
| 17 | 18 |
| 18 // This module is for GCC x86 and x64. | 19 // This module is for GCC x86 and x64. |
| 19 #if !defined(LIBYUV_DISABLE_X86) && \ | 20 #if !defined(LIBYUV_DISABLE_X86) && \ |
| 20 (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) | 21 (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) |
| 21 | 22 |
| (...skipping 579 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 601 : | 602 : |
| 602 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" | 603 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" |
| 603 ); | 604 ); |
| 604 } | 605 } |
| 605 | 606 |
| 606 | 607 |
| 607 #ifdef HAS_SCALEADDROW_AVX2 | 608 #ifdef HAS_SCALEADDROW_AVX2 |
| 608 // Reads 32 bytes and accumulates to 32 shorts at a time. | 609 // Reads 32 bytes and accumulates to 32 shorts at a time. |
| 609 void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { | 610 void ScaleAddRow_AVX2(const uint8* src_ptr, uint16* dst_ptr, int src_width) { |
| 610 asm volatile ( | 611 asm volatile ( |
| 611 "vpxor %%xmm5,%%xmm5 \n" | 612 "vpxor %%ymm5,%%ymm5,%%ymm5 \n" |
| 612 | 613 |
| 613 LABELALIGN | 614 LABELALIGN |
| 614 "1: \n" | 615 "1: \n" |
| 615 "vmovdqu " MEMACCESS(0) ",%%ymm3 \n" | 616 "vmovdqu " MEMACCESS(0) ",%%ymm3 \n" |
| 616 "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 16 | 617 "lea " MEMLEA(0x20,0) ",%0 \n" // src_ptr += 32 |
| 617 "vpermq $0xd8,%%ymm3,%%ymm3 \n" | 618 "vpermq $0xd8,%%ymm3,%%ymm3 \n" |
| 618 "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" | 619 "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" |
| 619 "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" | 620 "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" |
| 620 "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n" | 621 "vpaddusw " MEMACCESS(1) ",%%ymm2,%%ymm0 \n" |
| 621 "vpaddusw " MEMACCESS2(0x10,1) ",%%ymm3,%%ymm1 \n" | 622 "vpaddusw " MEMACCESS2(0x10,1) ",%%ymm3,%%ymm1 \n" |
| 622 "vmovdqu %%ymm0," MEMACCESS(1) " \n" | 623 "vmovdqu %%ymm0," MEMACCESS(1) " \n" |
| 623 "vmovdqu %%ymm1," MEMACCESS2(0x10,1) " \n" | 624 "vmovdqu %%ymm1," MEMACCESS2(0x10,1) " \n" |
| 624 "lea " MEMLEA(0x40,1) ",%1 \n" | 625 "lea " MEMLEA(0x40,1) ",%1 \n" |
| 625 "sub $0x20,%2 \n" | 626 "sub $0x20,%2 \n" |
| 626 "jg 1b \n" | 627 "jg 1b \n" |
| (...skipping 469 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1096 ); | 1097 ); |
| 1097 return num; | 1098 return num; |
| 1098 } | 1099 } |
| 1099 | 1100 |
| 1100 #endif // defined(__x86_64__) || defined(__i386__) | 1101 #endif // defined(__x86_64__) || defined(__i386__) |
| 1101 | 1102 |
| 1102 #ifdef __cplusplus | 1103 #ifdef __cplusplus |
| 1103 } // extern "C" | 1104 } // extern "C" |
| 1104 } // namespace libyuv | 1105 } // namespace libyuv |
| 1105 #endif | 1106 #endif |
| OLD | NEW |