OLD | NEW |
---|---|
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 2444 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2455 ); | 2455 ); |
2456 } | 2456 } |
2457 #endif // HAS_MIRRORROW_SSSE3 | 2457 #endif // HAS_MIRRORROW_SSSE3 |
2458 | 2458 |
2459 #ifdef HAS_MIRRORROW_AVX2 | 2459 #ifdef HAS_MIRRORROW_AVX2 |
2460 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { | 2460 void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { |
2461 intptr_t temp_width = (intptr_t)(width); | 2461 intptr_t temp_width = (intptr_t)(width); |
2462 asm volatile ( | 2462 asm volatile ( |
2463 "vbroadcastf128 %3,%%ymm5 \n" | 2463 "vbroadcastf128 %3,%%ymm5 \n" |
2464 LABELALIGN | 2464 LABELALIGN |
2465 IACA_ASM_START | |
fbarchard1
2017/01/13 02:14:43
this is an example of usage. will be removed befo
| |
2465 "1: \n" | 2466 "1: \n" |
2466 MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0 | 2467 MEMOPREG(vmovdqu,-0x20,0,2,1,ymm0) // vmovdqu -0x20(%0,%2),%%ymm0 |
2467 "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" | 2468 "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" |
2468 "vpermq $0x4e,%%ymm0,%%ymm0 \n" | 2469 "vpermq $0x4e,%%ymm0,%%ymm0 \n" |
2469 "vmovdqu %%ymm0," MEMACCESS(1) " \n" | 2470 "vmovdqu %%ymm0," MEMACCESS(1) " \n" |
2470 "lea " MEMLEA(0x20,1) ",%1 \n" | 2471 "lea " MEMLEA(0x20,1) ",%1 \n" |
2471 "sub $0x20,%2 \n" | 2472 "sub $0x20,%2 \n" |
2472 "jg 1b \n" | 2473 "jg 1b \n" |
2474 IACA_ASM_END | |
2473 "vzeroupper \n" | 2475 "vzeroupper \n" |
2474 : "+r"(src), // %0 | 2476 : "+r"(src), // %0 |
2475 "+r"(dst), // %1 | 2477 "+r"(dst), // %1 |
2476 "+r"(temp_width) // %2 | 2478 "+r"(temp_width) // %2 |
2477 : "m"(kShuffleMirror) // %3 | 2479 : "m"(kShuffleMirror) // %3 |
2478 : "memory", "cc", NACL_R14 | 2480 : "memory", "cc", NACL_R14 |
2479 "xmm0", "xmm5" | 2481 "xmm0", "xmm5" |
2480 ); | 2482 ); |
2481 } | 2483 } |
2482 #endif // HAS_MIRRORROW_AVX2 | 2484 #endif // HAS_MIRRORROW_AVX2 |
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2643 #endif // HAS_SPLITUVROW_SSE2 | 2645 #endif // HAS_SPLITUVROW_SSE2 |
2644 | 2646 |
2645 #ifdef HAS_MERGEUVROW_AVX2 | 2647 #ifdef HAS_MERGEUVROW_AVX2 |
2646 void MergeUVRow_AVX2(const uint8* src_u, | 2648 void MergeUVRow_AVX2(const uint8* src_u, |
2647 const uint8* src_v, | 2649 const uint8* src_v, |
2648 uint8* dst_uv, | 2650 uint8* dst_uv, |
2649 int width) { | 2651 int width) { |
2650 asm volatile ( | 2652 asm volatile ( |
2651 "sub %0,%1 \n" | 2653 "sub %0,%1 \n" |
2652 LABELALIGN | 2654 LABELALIGN |
2655 IACA_ASM_START | |
2653 "1: \n" | 2656 "1: \n" |
2654 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" | 2657 "vmovdqu " MEMACCESS(0) ",%%ymm0 \n" |
2655 MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1 | 2658 MEMOPREG(vmovdqu,0x00,0,1,1,ymm1) // vmovdqu (%0,%1,1),%%ymm1 |
2656 "lea " MEMLEA(0x20,0) ",%0 \n" | 2659 "lea " MEMLEA(0x20,0) ",%0 \n" |
2657 "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" | 2660 "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" |
2658 "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" | 2661 "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" |
2659 "vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n" | 2662 "vextractf128 $0x0,%%ymm2," MEMACCESS(2) " \n" |
2660 "vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n" | 2663 "vextractf128 $0x0,%%ymm0," MEMACCESS2(0x10,2) "\n" |
2661 "vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n" | 2664 "vextractf128 $0x1,%%ymm2," MEMACCESS2(0x20,2) "\n" |
2662 "vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n" | 2665 "vextractf128 $0x1,%%ymm0," MEMACCESS2(0x30,2) "\n" |
2663 "lea " MEMLEA(0x40,2) ",%2 \n" | 2666 "lea " MEMLEA(0x40,2) ",%2 \n" |
2664 "sub $0x20,%3 \n" | 2667 "sub $0x20,%3 \n" |
2665 "jg 1b \n" | 2668 "jg 1b \n" |
2669 IACA_ASM_END | |
2666 "vzeroupper \n" | 2670 "vzeroupper \n" |
2667 : "+r"(src_u), // %0 | 2671 : "+r"(src_u), // %0 |
2668 "+r"(src_v), // %1 | 2672 "+r"(src_v), // %1 |
2669 "+r"(dst_uv), // %2 | 2673 "+r"(dst_uv), // %2 |
2670 "+r"(width) // %3 | 2674 "+r"(width) // %3 |
2671 : | 2675 : |
2672 : "memory", "cc", NACL_R14 | 2676 : "memory", "cc", NACL_R14 |
2673 "xmm0", "xmm1", "xmm2" | 2677 "xmm0", "xmm1", "xmm2" |
2674 ); | 2678 ); |
2675 } | 2679 } |
(...skipping 3008 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
5684 ); | 5688 ); |
5685 } | 5689 } |
5686 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5690 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5687 | 5691 |
5688 #endif // defined(__x86_64__) || defined(__i386__) | 5692 #endif // defined(__x86_64__) || defined(__i386__) |
5689 | 5693 |
5690 #ifdef __cplusplus | 5694 #ifdef __cplusplus |
5691 } // extern "C" | 5695 } // extern "C" |
5692 } // namespace libyuv | 5696 } // namespace libyuv |
5693 #endif | 5697 #endif |
OLD | NEW |