OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
11 #include <arm_neon.h> | 11 #include <arm_neon.h> |
12 #include "./vpx_config.h" | 12 #include "./vpx_config.h" |
| 13 #include "vpx_ports/arm.h" |
13 | 14 |
14 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) | 15 #ifdef VPX_INCOMPATIBLE_GCC |
15 static INLINE void write_2x8(unsigned char *dst, int pitch, | |
16 const uint8x8x2_t result, | |
17 const uint8x8x2_t result2) { | |
18 vst2_lane_u8(dst, result, 0); | |
19 dst += pitch; | |
20 vst2_lane_u8(dst, result, 1); | |
21 dst += pitch; | |
22 vst2_lane_u8(dst, result, 2); | |
23 dst += pitch; | |
24 vst2_lane_u8(dst, result, 3); | |
25 dst += pitch; | |
26 vst2_lane_u8(dst, result, 4); | |
27 dst += pitch; | |
28 vst2_lane_u8(dst, result, 5); | |
29 dst += pitch; | |
30 vst2_lane_u8(dst, result, 6); | |
31 dst += pitch; | |
32 vst2_lane_u8(dst, result, 7); | |
33 dst += pitch; | |
34 | |
35 vst2_lane_u8(dst, result2, 0); | |
36 dst += pitch; | |
37 vst2_lane_u8(dst, result2, 1); | |
38 dst += pitch; | |
39 vst2_lane_u8(dst, result2, 2); | |
40 dst += pitch; | |
41 vst2_lane_u8(dst, result2, 3); | |
42 dst += pitch; | |
43 vst2_lane_u8(dst, result2, 4); | |
44 dst += pitch; | |
45 vst2_lane_u8(dst, result2, 5); | |
46 dst += pitch; | |
47 vst2_lane_u8(dst, result2, 6); | |
48 dst += pitch; | |
49 vst2_lane_u8(dst, result2, 7); | |
50 } | |
51 #else | |
52 static INLINE void write_2x4(unsigned char *dst, int pitch, | 16 static INLINE void write_2x4(unsigned char *dst, int pitch, |
53 const uint8x8x2_t result) { | 17 const uint8x8x2_t result) { |
54 /* | 18 /* |
55 * uint8x8x2_t result | 19 * uint8x8x2_t result |
56 00 01 02 03 | 04 05 06 07 | 20 00 01 02 03 | 04 05 06 07 |
57 10 11 12 13 | 14 15 16 17 | 21 10 11 12 13 | 14 15 16 17 |
58 --- | 22 --- |
59 * after vtrn_u8 | 23 * after vtrn_u8 |
60 00 10 02 12 | 04 14 06 16 | 24 00 10 02 12 | 04 14 06 16 |
61 01 11 03 13 | 05 15 07 17 | 25 01 11 03 13 | 05 15 07 17 |
(...skipping 19 matching lines...) Expand all Loading... |
81 vst1_lane_u16((uint16_t *)dst, x_1_5, 3); | 45 vst1_lane_u16((uint16_t *)dst, x_1_5, 3); |
82 } | 46 } |
83 | 47 |
84 static INLINE void write_2x8(unsigned char *dst, int pitch, | 48 static INLINE void write_2x8(unsigned char *dst, int pitch, |
85 const uint8x8x2_t result, | 49 const uint8x8x2_t result, |
86 const uint8x8x2_t result2) { | 50 const uint8x8x2_t result2) { |
87 write_2x4(dst, pitch, result); | 51 write_2x4(dst, pitch, result); |
88 dst += pitch * 8; | 52 dst += pitch * 8; |
89 write_2x4(dst, pitch, result2); | 53 write_2x4(dst, pitch, result2); |
90 } | 54 } |
91 #endif | 55 #else |
| 56 static INLINE void write_2x8(unsigned char *dst, int pitch, |
| 57 const uint8x8x2_t result, |
| 58 const uint8x8x2_t result2) { |
| 59 vst2_lane_u8(dst, result, 0); |
| 60 dst += pitch; |
| 61 vst2_lane_u8(dst, result, 1); |
| 62 dst += pitch; |
| 63 vst2_lane_u8(dst, result, 2); |
| 64 dst += pitch; |
| 65 vst2_lane_u8(dst, result, 3); |
| 66 dst += pitch; |
| 67 vst2_lane_u8(dst, result, 4); |
| 68 dst += pitch; |
| 69 vst2_lane_u8(dst, result, 5); |
| 70 dst += pitch; |
| 71 vst2_lane_u8(dst, result, 6); |
| 72 dst += pitch; |
| 73 vst2_lane_u8(dst, result, 7); |
| 74 dst += pitch; |
| 75 |
| 76 vst2_lane_u8(dst, result2, 0); |
| 77 dst += pitch; |
| 78 vst2_lane_u8(dst, result2, 1); |
| 79 dst += pitch; |
| 80 vst2_lane_u8(dst, result2, 2); |
| 81 dst += pitch; |
| 82 vst2_lane_u8(dst, result2, 3); |
| 83 dst += pitch; |
| 84 vst2_lane_u8(dst, result2, 4); |
| 85 dst += pitch; |
| 86 vst2_lane_u8(dst, result2, 5); |
| 87 dst += pitch; |
| 88 vst2_lane_u8(dst, result2, 6); |
| 89 dst += pitch; |
| 90 vst2_lane_u8(dst, result2, 7); |
| 91 } |
| 92 #endif // VPX_INCOMPATIBLE_GCC |
92 | 93 |
93 | 94 |
94 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) | 95 #ifdef VPX_INCOMPATIBLE_GCC |
95 static INLINE | |
96 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { | |
97 x = vld4_lane_u8(src, x, 0); | |
98 src += pitch; | |
99 x = vld4_lane_u8(src, x, 1); | |
100 src += pitch; | |
101 x = vld4_lane_u8(src, x, 2); | |
102 src += pitch; | |
103 x = vld4_lane_u8(src, x, 3); | |
104 src += pitch; | |
105 x = vld4_lane_u8(src, x, 4); | |
106 src += pitch; | |
107 x = vld4_lane_u8(src, x, 5); | |
108 src += pitch; | |
109 x = vld4_lane_u8(src, x, 6); | |
110 src += pitch; | |
111 x = vld4_lane_u8(src, x, 7); | |
112 return x; | |
113 } | |
114 #else | |
115 static INLINE | 96 static INLINE |
116 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { | 97 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { |
117 const uint8x8_t a = vld1_u8(src); | 98 const uint8x8_t a = vld1_u8(src); |
118 const uint8x8_t b = vld1_u8(src + pitch * 1); | 99 const uint8x8_t b = vld1_u8(src + pitch * 1); |
119 const uint8x8_t c = vld1_u8(src + pitch * 2); | 100 const uint8x8_t c = vld1_u8(src + pitch * 2); |
120 const uint8x8_t d = vld1_u8(src + pitch * 3); | 101 const uint8x8_t d = vld1_u8(src + pitch * 3); |
121 const uint8x8_t e = vld1_u8(src + pitch * 4); | 102 const uint8x8_t e = vld1_u8(src + pitch * 4); |
122 const uint8x8_t f = vld1_u8(src + pitch * 5); | 103 const uint8x8_t f = vld1_u8(src + pitch * 5); |
123 const uint8x8_t g = vld1_u8(src + pitch * 6); | 104 const uint8x8_t g = vld1_u8(src + pitch * 6); |
124 const uint8x8_t h = vld1_u8(src + pitch * 7); | 105 const uint8x8_t h = vld1_u8(src + pitch * 7); |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
162 02 12 22 32 | 42 52 62 72 | 143 02 12 22 32 | 42 52 62 72 |
163 03 13 23 33 | 43 53 63 73 | 144 03 13 23 33 | 43 53 63 73 |
164 */ | 145 */ |
165 x.val[0] = r01_u8.val[0]; | 146 x.val[0] = r01_u8.val[0]; |
166 x.val[1] = r01_u8.val[1]; | 147 x.val[1] = r01_u8.val[1]; |
167 x.val[2] = r23_u8.val[0]; | 148 x.val[2] = r23_u8.val[0]; |
168 x.val[3] = r23_u8.val[1]; | 149 x.val[3] = r23_u8.val[1]; |
169 | 150 |
170 return x; | 151 return x; |
171 } | 152 } |
172 #endif | 153 #else |
| 154 static INLINE |
| 155 uint8x8x4_t read_4x8(unsigned char *src, int pitch, uint8x8x4_t x) { |
| 156 x = vld4_lane_u8(src, x, 0); |
| 157 src += pitch; |
| 158 x = vld4_lane_u8(src, x, 1); |
| 159 src += pitch; |
| 160 x = vld4_lane_u8(src, x, 2); |
| 161 src += pitch; |
| 162 x = vld4_lane_u8(src, x, 3); |
| 163 src += pitch; |
| 164 x = vld4_lane_u8(src, x, 4); |
| 165 src += pitch; |
| 166 x = vld4_lane_u8(src, x, 5); |
| 167 src += pitch; |
| 168 x = vld4_lane_u8(src, x, 6); |
| 169 src += pitch; |
| 170 x = vld4_lane_u8(src, x, 7); |
| 171 return x; |
| 172 } |
| 173 #endif // VPX_INCOMPATIBLE_GCC |
173 | 174 |
174 static INLINE void vp8_loop_filter_simple_vertical_edge_neon( | 175 static INLINE void vp8_loop_filter_simple_vertical_edge_neon( |
175 unsigned char *s, | 176 unsigned char *s, |
176 int p, | 177 int p, |
177 const unsigned char *blimit) { | 178 const unsigned char *blimit) { |
178 unsigned char *src1; | 179 unsigned char *src1; |
179 uint8x16_t qblimit, q0u8; | 180 uint8x16_t qblimit, q0u8; |
180 uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; | 181 uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; |
181 int16x8_t q2s16, q13s16, q11s16; | 182 int16x8_t q2s16, q13s16, q11s16; |
182 int8x8_t d28s8, d29s8; | 183 int8x8_t d28s8, d29s8; |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
270 return; | 271 return; |
271 } | 272 } |
272 | 273 |
273 void vp8_loop_filter_mbvs_neon( | 274 void vp8_loop_filter_mbvs_neon( |
274 unsigned char *y_ptr, | 275 unsigned char *y_ptr, |
275 int y_stride, | 276 int y_stride, |
276 const unsigned char *blimit) { | 277 const unsigned char *blimit) { |
277 vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); | 278 vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); |
278 return; | 279 return; |
279 } | 280 } |
OLD | NEW |