OLD | NEW |
1 /* | 1 /* |
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
244 vst1_u8(v, vget_high_u8(q5)); | 244 vst1_u8(v, vget_high_u8(q5)); |
245 v += pitch; | 245 v += pitch; |
246 vst1_u8(v, vget_high_u8(q6)); | 246 vst1_u8(v, vget_high_u8(q6)); |
247 v += pitch; | 247 v += pitch; |
248 vst1_u8(v, vget_high_u8(q7)); | 248 vst1_u8(v, vget_high_u8(q7)); |
249 v += pitch; | 249 v += pitch; |
250 vst1_u8(v, vget_high_u8(q8)); | 250 vst1_u8(v, vget_high_u8(q8)); |
251 return; | 251 return; |
252 } | 252 } |
253 | 253 |
| 254 #if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) |
| 255 #warning Using GCC 4.6 is not recommended |
| 256 // Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built |
| 257 // with any gcc4.6, use the C code. |
| 258 extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p, |
| 259 const unsigned char *blimit, |
| 260 const unsigned char *limit, |
| 261 const unsigned char *thresh, |
| 262 int count); |
| 263 |
| 264 void vp8_loop_filter_vertical_edge_y_neon( |
| 265 unsigned char *src, |
| 266 int pitch, |
| 267 unsigned char blimit, |
| 268 unsigned char limit, |
| 269 unsigned char thresh) { |
| 270 vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2); |
| 271 } |
| 272 |
| 273 void vp8_loop_filter_vertical_edge_uv_neon( |
| 274 unsigned char *u, |
| 275 int pitch, |
| 276 unsigned char blimit, |
| 277 unsigned char limit, |
| 278 unsigned char thresh, |
| 279 unsigned char *v) { |
| 280 vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1); |
| 281 vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1); |
| 282 } |
| 283 #else |
254 static INLINE void write_4x8(unsigned char *dst, int pitch, | 284 static INLINE void write_4x8(unsigned char *dst, int pitch, |
255 const uint8x8x4_t result) { | 285 const uint8x8x4_t result) { |
256 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) | |
257 vst4_lane_u8(dst, result, 0); | 286 vst4_lane_u8(dst, result, 0); |
258 dst += pitch; | 287 dst += pitch; |
259 vst4_lane_u8(dst, result, 1); | 288 vst4_lane_u8(dst, result, 1); |
260 dst += pitch; | 289 dst += pitch; |
261 vst4_lane_u8(dst, result, 2); | 290 vst4_lane_u8(dst, result, 2); |
262 dst += pitch; | 291 dst += pitch; |
263 vst4_lane_u8(dst, result, 3); | 292 vst4_lane_u8(dst, result, 3); |
264 dst += pitch; | 293 dst += pitch; |
265 vst4_lane_u8(dst, result, 4); | 294 vst4_lane_u8(dst, result, 4); |
266 dst += pitch; | 295 dst += pitch; |
267 vst4_lane_u8(dst, result, 5); | 296 vst4_lane_u8(dst, result, 5); |
268 dst += pitch; | 297 dst += pitch; |
269 vst4_lane_u8(dst, result, 6); | 298 vst4_lane_u8(dst, result, 6); |
270 dst += pitch; | 299 dst += pitch; |
271 vst4_lane_u8(dst, result, 7); | 300 vst4_lane_u8(dst, result, 7); |
272 #else | |
273 /* | |
274 * uint8x8x4_t result | |
275 00 01 02 03 | 04 05 06 07 | |
276 10 11 12 13 | 14 15 16 17 | |
277 20 21 22 23 | 24 25 26 27 | |
278 30 31 32 33 | 34 35 36 37 | |
279 --- | |
280 * after vtrn_u16 | |
281 00 01 20 21 | 04 05 24 25 | |
282 02 03 22 23 | 06 07 26 27 | |
283 10 11 30 31 | 14 15 34 35 | |
284 12 13 32 33 | 16 17 36 37 | |
285 --- | |
286 * after vtrn_u8 | |
287 00 10 20 30 | 04 14 24 34 | |
288 01 11 21 31 | 05 15 25 35 | |
289 02 12 22 32 | 06 16 26 36 | |
290 03 13 23 33 | 07 17 27 37 | |
291 */ | |
292 const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), | |
293 vreinterpret_u16_u8(result.val[2])); | |
294 const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), | |
295 vreinterpret_u16_u8(result.val[3])); | |
296 const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), | |
297 vreinterpret_u8_u16(r13_u16.val[0])); | |
298 const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), | |
299 vreinterpret_u8_u16(r13_u16.val[1])); | |
300 const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); | |
301 const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); | |
302 const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); | |
303 const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); | |
304 vst1_lane_u32((uint32_t *)dst, x_0_4, 0); | |
305 dst += pitch; | |
306 vst1_lane_u32((uint32_t *)dst, x_1_5, 0); | |
307 dst += pitch; | |
308 vst1_lane_u32((uint32_t *)dst, x_2_6, 0); | |
309 dst += pitch; | |
310 vst1_lane_u32((uint32_t *)dst, x_3_7, 0); | |
311 dst += pitch; | |
312 vst1_lane_u32((uint32_t *)dst, x_0_4, 1); | |
313 dst += pitch; | |
314 vst1_lane_u32((uint32_t *)dst, x_1_5, 1); | |
315 dst += pitch; | |
316 vst1_lane_u32((uint32_t *)dst, x_2_6, 1); | |
317 dst += pitch; | |
318 vst1_lane_u32((uint32_t *)dst, x_3_7, 1); | |
319 #endif | |
320 } | 301 } |
321 | 302 |
322 void vp8_loop_filter_vertical_edge_y_neon( | 303 void vp8_loop_filter_vertical_edge_y_neon( |
323 unsigned char *src, | 304 unsigned char *src, |
324 int pitch, | 305 int pitch, |
325 unsigned char blimit, | 306 unsigned char blimit, |
326 unsigned char limit, | 307 unsigned char limit, |
327 unsigned char thresh) { | 308 unsigned char thresh) { |
328 unsigned char *s, *d; | 309 unsigned char *s, *d; |
329 uint8x16_t qblimit, qlimit, qthresh, q3, q4; | 310 uint8x16_t qblimit, qlimit, qthresh, q3, q4; |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
540 ud = u - 2; | 521 ud = u - 2; |
541 write_4x8(ud, pitch, q4ResultL); | 522 write_4x8(ud, pitch, q4ResultL); |
542 | 523 |
543 q4ResultH.val[0] = vget_high_u8(q5); // d11 | 524 q4ResultH.val[0] = vget_high_u8(q5); // d11 |
544 q4ResultH.val[1] = vget_high_u8(q6); // d13 | 525 q4ResultH.val[1] = vget_high_u8(q6); // d13 |
545 q4ResultH.val[2] = vget_high_u8(q7); // d15 | 526 q4ResultH.val[2] = vget_high_u8(q7); // d15 |
546 q4ResultH.val[3] = vget_high_u8(q8); // d17 | 527 q4ResultH.val[3] = vget_high_u8(q8); // d17 |
547 vd = v - 2; | 528 vd = v - 2; |
548 write_4x8(vd, pitch, q4ResultH); | 529 write_4x8(vd, pitch, q4ResultH); |
549 } | 530 } |
| 531 #endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6)) |
OLD | NEW |