| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 2724 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2735 } | 2735 } |
| 2736 | 2736 |
| 2737 void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { | 2737 void HalfFloat1Row_NEON(const uint16* src, uint16* dst, float, int width) { |
| 2738 asm volatile ( | 2738 asm volatile ( |
| 2739 "vdup.32 q0, %3 \n" | 2739 "vdup.32 q0, %3 \n" |
| 2740 | 2740 |
| 2741 "1: \n" | 2741 "1: \n" |
| 2742 MEMACCESS(0) | 2742 MEMACCESS(0) |
| 2743 "vld1.8 {q1}, [%0]! \n" // load 8 shorts | 2743 "vld1.8 {q1}, [%0]! \n" // load 8 shorts |
| 2744 "subs %2, %2, #8 \n" // 8 pixels per loop | 2744 "subs %2, %2, #8 \n" // 8 pixels per loop |
| 2745 "vmovl.u8 q2, d2 \n" // 8 int's | 2745 "vmovl.u16 q2, d2 \n" // 8 int's |
| 2746 "vmovl.u8 q3, d3 \n" | 2746 "vmovl.u16 q3, d3 \n" |
| 2747 "vcvt.f32.u32 q2, q2 \n" // 8 floats | 2747 "vcvt.f32.u32 q2, q2 \n" // 8 floats |
| 2748 "vcvt.f32.u32 q3, q3 \n" | 2748 "vcvt.f32.u32 q3, q3 \n" |
| 2749 "vmul.f32 q2, q2, q0 \n" // adjust exponent | 2749 "vmul.f32 q2, q2, q0 \n" // adjust exponent |
| 2750 "vmul.f32 q3, q3, q0 \n" | 2750 "vmul.f32 q3, q3, q0 \n" |
| 2751 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat | 2751 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat |
| 2752 "vqshrn.u32 d3, q3, #13 \n" | 2752 "vqshrn.u32 d3, q3, #13 \n" |
| 2753 MEMACCESS(1) | 2753 MEMACCESS(1) |
| 2754 "vst1.8 {q1}, [%0]! \n" | 2754 "vst1.8 {q1}, [%1]! \n" |
| 2755 "bgt 1b \n" | 2755 "bgt 1b \n" |
| 2756 : "+r"(src), // %0 | 2756 : "+r"(src), // %0 |
| 2757 "+r"(dst), // %1 | 2757 "+r"(dst), // %1 |
| 2758 "+r"(width) // %2 | 2758 "+r"(width) // %2 |
| 2759 : "r"(1.9259299444e-34f) // %3 | 2759 : "r"(1.9259299444e-34f) // %3 |
| 2760 : "cc", "memory", "q0", "q1", "q2", "q3" | 2760 : "cc", "memory", "q0", "q1", "q2", "q3" |
| 2761 ); | 2761 ); |
| 2762 } | 2762 } |
| 2763 | 2763 |
| 2764 // TODO(fbarchard): multiply by element. | 2764 // TODO(fbarchard): multiply by element. |
| 2765 void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { | 2765 void HalfFloatRow_NEON(const uint16* src, uint16* dst, float scale, int width) { |
| 2766 asm volatile ( | 2766 asm volatile ( |
| 2767 "vdup.32 q0, %3 \n" | 2767 "vdup.32 q0, %3 \n" |
| 2768 | 2768 |
| 2769 "1: \n" | 2769 "1: \n" |
| 2770 MEMACCESS(0) | 2770 MEMACCESS(0) |
| 2771 "vld1.8 {q1}, [%0]! \n" // load 8 shorts | 2771 "vld1.8 {q1}, [%0]! \n" // load 8 shorts |
| 2772 "subs %2, %2, #8 \n" // 8 pixels per loop | 2772 "subs %2, %2, #8 \n" // 8 pixels per loop |
| 2773 "vmovl.u8 q2, d2 \n" // 8 int's | 2773 "vmovl.u16 q2, d2 \n" // 8 int's |
| 2774 "vmovl.u8 q3, d3 \n" | 2774 "vmovl.u16 q3, d3 \n" |
| 2775 "vcvt.f32.u32 q2, q2 \n" // 8 floats | 2775 "vcvt.f32.u32 q2, q2 \n" // 8 floats |
| 2776 "vcvt.f32.u32 q3, q3 \n" | 2776 "vcvt.f32.u32 q3, q3 \n" |
| 2777 "vmul.f32 q2, q2, q0 \n" // adjust exponent | 2777 "vmul.f32 q2, q2, q0 \n" // adjust exponent |
| 2778 "vmul.f32 q3, q3, q0 \n" | 2778 "vmul.f32 q3, q3, q0 \n" |
| 2779 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat | 2779 "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat |
| 2780 "vqshrn.u32 d3, q3, #13 \n" | 2780 "vqshrn.u32 d3, q3, #13 \n" |
| 2781 MEMACCESS(1) | 2781 MEMACCESS(1) |
| 2782 "vst1.8 {q1}, [%0]! \n" | 2782 "vst1.8 {q1}, [%1]! \n" |
| 2783 "bgt 1b \n" | 2783 "bgt 1b \n" |
| 2784 : "+r"(src), // %0 | 2784 : "+r"(src), // %0 |
| 2785 "+r"(dst), // %1 | 2785 "+r"(dst), // %1 |
| 2786 "+r"(width) // %2 | 2786 "+r"(width) // %2 |
| 2787 : "r"(scale * 1.9259299444e-34f) // %3 | 2787 : "r"(scale * 1.9259299444e-34f) // %3 |
| 2788 : "cc", "memory", "q0", "q1", "q2", "q3" | 2788 : "cc", "memory", "q0", "q1", "q2", "q3" |
| 2789 ); | 2789 ); |
| 2790 } | 2790 } |
| 2791 | 2791 |
| 2792 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. | 2792 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. |
| 2793 | 2793 |
| 2794 #ifdef __cplusplus | 2794 #ifdef __cplusplus |
| 2795 } // extern "C" | 2795 } // extern "C" |
| 2796 } // namespace libyuv | 2796 } // namespace libyuv |
| 2797 #endif | 2797 #endif |
| OLD | NEW |