OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
12 | 12 |
| 13 #if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) |
| 14 #warning Using GCC 4.6 is not recommended |
| 15 // Some versions of gcc4.6 do not correctly process this function. When built |
| 16 // with any gcc4.6, use the C code. |
| 17 #include "./vp8_rtcd.h" |
| 18 void vp8_short_walsh4x4_neon( |
| 19 int16_t *input, |
| 20 int16_t *output, |
| 21 int pitch) { |
| 22 vp8_short_walsh4x4_c(input, output, pitch); |
| 23 } |
| 24 #else |
13 void vp8_short_walsh4x4_neon( | 25 void vp8_short_walsh4x4_neon( |
14 int16_t *input, | 26 int16_t *input, |
15 int16_t *output, | 27 int16_t *output, |
16 int pitch) { | 28 int pitch) { |
17 uint16x4_t d16u16; | 29 uint16x4_t d16u16; |
18 int16x8_t q0s16, q1s16; | 30 int16x8_t q0s16, q1s16; |
19 int16x4_t dEmptys16, d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; | 31 int16x4_t dEmptys16, d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; |
20 int32x4_t qEmptys32, q0s32, q1s32, q2s32, q3s32, q8s32; | 32 int32x4_t qEmptys32, q0s32, q1s32, q2s32, q3s32, q8s32; |
21 int32x4_t q9s32, q10s32, q11s32, q15s32; | 33 int32x4_t q9s32, q10s32, q11s32, q15s32; |
22 uint32x4_t q8u32, q9u32, q10u32, q11u32; | 34 uint32x4_t q8u32, q9u32, q10u32, q11u32; |
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
109 d2s16 = vshrn_n_s32(q10s32, 3); | 121 d2s16 = vshrn_n_s32(q10s32, 3); |
110 d3s16 = vshrn_n_s32(q11s32, 3); | 122 d3s16 = vshrn_n_s32(q11s32, 3); |
111 | 123 |
112 q0s16 = vcombine_s16(d0s16, d1s16); | 124 q0s16 = vcombine_s16(d0s16, d1s16); |
113 q1s16 = vcombine_s16(d2s16, d3s16); | 125 q1s16 = vcombine_s16(d2s16, d3s16); |
114 | 126 |
115 vst1q_s16(output, q0s16); | 127 vst1q_s16(output, q0s16); |
116 vst1q_s16(output + 8, q1s16); | 128 vst1q_s16(output + 8, q1s16); |
117 return; | 129 return; |
118 } | 130 } |
| 131 #endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) |
OLD | NEW |