OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2724 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2735 } | 2735 } |
2736 | 2736 |
2737 void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { | 2737 void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { |
2738 asm volatile ( | 2738 asm volatile ( |
2739 "vdup.32 q0, %3 \n" | 2739 "vdup.32 q0, %3 \n" |
2740 | 2740 |
2741 "1: \n" | 2741 "1: \n" |
2742 MEMACCESS(0) | 2742 MEMACCESS(0) |
2743 "vld1.8 {q1}, [%0]! \n" // load 8 shorts | 2743 "vld1.8 {q1}, [%0]! \n" // load 8 shorts |
2744 "subs %2, %2, #8 \n" // 8 pixels per loop | 2744 "subs %2, %2, #8 \n" // 8 pixels per loop |
2745 "vmovl.u8 q2, d2 \n" // 8 int's | 2745 "vmovl.u16 q2, d2 \n" // 8 int's |
2746 "vmovl.u8 q3, d3 \n" | 2746 "vmovl.u16 q3, d3 \n" |
2747 "vcvt.f32.u32 q2, q2 \n" // 8 floats | 2747 "vcvt.f32.u32 q2, q2 \n" // 8 floats |
2748 "vcvt.f32.u32 q3, q3 \n" | 2748 "vcvt.f32.u32 q3, q3 \n" |
2749 "vmul.f32 q2, q2, q0 \n" // adjust exponent | 2749 "vmul.f32 q2, q2, q0 \n" // adjust exponent |
2750 "vmul.f32 q3, q3, q0 \n" | 2750 "vmul.f32 q3, q3, q0 \n" |
2751 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat | 2751 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat |
2752 "vqshrn.u32 d3, q3, #13 \n" | 2752 "vqshrn.u32 d3, q3, #13 \n" |
2753 MEMACCESS(1) | 2753 MEMACCESS(1) |
2754 "vst1.8 {q1}, [%0]! \n" | 2754 "vst1.8 {q1}, [%1]! \n" |
2755 "bgt 1b \n" | 2755 "bgt 1b \n" |
2756 : "+r"(src), // %0 | 2756 : "+r"(src), // %0 |
2757 "+r"(dst), // %1 | 2757 "+r"(dst), // %1 |
2758 "+r"(width) // %2 | 2758 "+r"(width) // %2 |
2759 : "r"(1.9259299444e-34f) // %3 | 2759 : "r"(1.9259299444e-34f) // %3 |
2760 : "cc", "memory", "q0", "q1", "q2", "q3" | 2760 : "cc", "memory", "q0", "q1", "q2", "q3" |
2761 ); | 2761 ); |
2762 } | 2762 } |
2763 | 2763 |
2764 // TODO(fbarchard): multiply by element. | 2764 // TODO(fbarchard): multiply by element. |
2765 void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { | 2765 void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { |
2766 asm volatile ( | 2766 asm volatile ( |
2767 "vdup.32 q0, %3 \n" | 2767 "vdup.32 q0, %3 \n" |
2768 | 2768 |
2769 "1: \n" | 2769 "1: \n" |
2770 MEMACCESS(0) | 2770 MEMACCESS(0) |
2771 "vld1.8 {q1}, [%0]! \n" // load 8 shorts | 2771 "vld1.8 {q1}, [%0]! \n" // load 8 shorts |
2772 "subs %2, %2, #8 \n" // 8 pixels per loop | 2772 "subs %2, %2, #8 \n" // 8 pixels per loop |
2773 "vmovl.u8 q2, d2 \n" // 8 int's | 2773 "vmovl.u16 q2, d2 \n" // 8 int's |
2774 "vmovl.u8 q3, d3 \n" | 2774 "vmovl.u16 q3, d3 \n" |
2775 "vcvt.f32.u32 q2, q2 \n" // 8 floats | 2775 "vcvt.f32.u32 q2, q2 \n" // 8 floats |
2776 "vcvt.f32.u32 q3, q3 \n" | 2776 "vcvt.f32.u32 q3, q3 \n" |
2777 "vmul.f32 q2, q2, q0 \n" // adjust exponent | 2777 "vmul.f32 q2, q2, q0 \n" // adjust exponent |
2778 "vmul.f32 q3, q3, q0 \n" | 2778 "vmul.f32 q3, q3, q0 \n" |
2779 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat | 2779 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat |
2780 "vqshrn.u32 d3, q3, #13 \n" | 2780 "vqshrn.u32 d3, q3, #13 \n" |
2781 MEMACCESS(1) | 2781 MEMACCESS(1) |
2782 "vst1.8 {q1}, [%0]! \n" | 2782 "vst1.8 {q1}, [%1]! \n" |
2783 "bgt 1b \n" | 2783 "bgt 1b \n" |
2784 : "+r"(src), // %0 | 2784 : "+r"(src), // %0 |
2785 "+r"(dst), // %1 | 2785 "+r"(dst), // %1 |
2786 "+r"(width) // %2 | 2786 "+r"(width) // %2 |
2787 : "r"(scale * 1.9259299444e-34f) // %3 | 2787 : "r"(scale * 1.9259299444e-34f) // %3 |
2788 : "cc", "memory", "q0", "q1", "q2", "q3" | 2788 : "cc", "memory", "q0", "q1", "q2", "q3" |
2789 ); | 2789 ); |
2790 } | 2790 } |
2791 | 2791 |
2792 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. | 2792 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. |
2793 | 2793 |
2794 #ifdef __cplusplus | 2794 #ifdef __cplusplus |
2795 } // extern "C" | 2795 } // extern "C" |
2796 } // namespace libyuv | 2796 } // namespace libyuv |
2797 #endif | 2797 #endif |
OLD | NEW |