OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2715 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2726 "bgt 1b \n" | 2726 "bgt 1b \n" |
2727 : "+r"(src_y0), // %0 | 2727 : "+r"(src_y0), // %0 |
2728 "+r"(src_y1), // %1 | 2728 "+r"(src_y1), // %1 |
2729 "+r"(dst_sobely), // %2 | 2729 "+r"(dst_sobely), // %2 |
2730 "+r"(width) // %3 | 2730 "+r"(width) // %3 |
2731 : "r"(1), // %4 | 2731 : "r"(1), // %4 |
2732 "r"(6) // %5 | 2732 "r"(6) // %5 |
2733 : "cc", "memory", "q0", "q1" // Clobber List | 2733 : "cc", "memory", "q0", "q1" // Clobber List |
2734 ); | 2734 ); |
2735 } | 2735 } |
2736 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2736 |
| 2737 void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { |
| 2738 asm volatile ( |
| 2739 "vdup.32 q0, %3 \n" |
| 2740 |
| 2741 "1: \n" |
| 2742 MEMACCESS(0) |
| 2743 "vld1.8 {q1}, [%0]! \n" // load 8 shorts |
| 2744 "subs %2, %2, #8 \n" // 8 pixels per loop |
| 2745 "vmovl.u8 q2, d2 \n" // 8 int's |
| 2746 "vmovl.u8 q3, d3 \n" |
| 2747 "vcvt.f32.u32 q2, q2 \n" // 8 floats |
| 2748 "vcvt.f32.u32 q3, q3 \n" |
| 2749 "vmul.f32 q2, q2, q0 \n" // adjust exponent |
| 2750 "vmul.f32 q3, q3, q0 \n" |
| 2751 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat |
| 2752 "vqshrn.u32 d3, q3, #13 \n" |
| 2753 MEMACCESS(1) |
| 2754 "vst1.8 {q1}, [%0]! \n" |
| 2755 "bgt 1b \n" |
| 2756 : "+r"(src), // %0 |
| 2757 "+r"(dst), // %1 |
| 2758 "+r"(width) // %2 |
| 2759 : "r"(1.9259299444e-34f) // %3 |
| 2760 : "cc", "memory", "q0", "q1", "q2", "q3" |
| 2761 ); |
| 2762 } |
| 2763 |
| 2764 // TODO(fbarchard): multiply by element. |
| 2765 void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { |
| 2766 asm volatile ( |
| 2767 "vdup.32 q0, %3 \n" |
| 2768 |
| 2769 "1: \n" |
| 2770 MEMACCESS(0) |
| 2771 "vld1.8 {q1}, [%0]! \n" // load 8 shorts |
| 2772 "subs %2, %2, #8 \n" // 8 pixels per loop |
| 2773 "vmovl.u8 q2, d2 \n" // 8 int's |
| 2774 "vmovl.u8 q3, d3 \n" |
| 2775 "vcvt.f32.u32 q2, q2 \n" // 8 floats |
| 2776 "vcvt.f32.u32 q3, q3 \n" |
| 2777 "vmul.f32 q2, q2, q0 \n" // adjust exponent |
| 2778 "vmul.f32 q3, q3, q0 \n" |
| 2779 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat |
| 2780 "vqshrn.u32 d3, q3, #13 \n" |
| 2781 MEMACCESS(1) |
| 2782 "vst1.8 {q1}, [%0]! \n" |
| 2783 "bgt 1b \n" |
| 2784 : "+r"(src), // %0 |
| 2785 "+r"(dst), // %1 |
| 2786 "+r"(width) // %2 |
| 2787 : "r"(scale * 1.9259299444e-34f) // %3 |
| 2788 : "cc", "memory", "q0", "q1", "q2", "q3" |
| 2789 ); |
| 2790 } |
| 2791 |
| 2792 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. |
2737 | 2793 |
2738 #ifdef __cplusplus | 2794 #ifdef __cplusplus |
2739 } // extern "C" | 2795 } // extern "C" |
2740 } // namespace libyuv | 2796 } // namespace libyuv |
2741 #endif | 2797 #endif |
OLD | NEW |