Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(212)

Side by Side Diff: source/libvpx/vp8/common/arm/neon/loopfilter_neon.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include <arm_neon.h> 11 #include <arm_neon.h>
12 #include "./vpx_config.h" 12 #include "./vpx_config.h"
13 #include "vpx_ports/arm.h"
13 14
14 static INLINE void vp8_loop_filter_neon( 15 static INLINE void vp8_loop_filter_neon(
15 uint8x16_t qblimit, // flimit 16 uint8x16_t qblimit, // flimit
16 uint8x16_t qlimit, // limit 17 uint8x16_t qlimit, // limit
17 uint8x16_t qthresh, // thresh 18 uint8x16_t qthresh, // thresh
18 uint8x16_t q3, // p3 19 uint8x16_t q3, // p3
19 uint8x16_t q4, // p2 20 uint8x16_t q4, // p2
20 uint8x16_t q5, // p1 21 uint8x16_t q5, // p1
21 uint8x16_t q6, // p0 22 uint8x16_t q6, // p0
22 uint8x16_t q7, // q0 23 uint8x16_t q7, // q0
(...skipping 221 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 vst1_u8(v, vget_high_u8(q5)); 245 vst1_u8(v, vget_high_u8(q5));
245 v += pitch; 246 v += pitch;
246 vst1_u8(v, vget_high_u8(q6)); 247 vst1_u8(v, vget_high_u8(q6));
247 v += pitch; 248 v += pitch;
248 vst1_u8(v, vget_high_u8(q7)); 249 vst1_u8(v, vget_high_u8(q7));
249 v += pitch; 250 v += pitch;
250 vst1_u8(v, vget_high_u8(q8)); 251 vst1_u8(v, vget_high_u8(q8));
251 return; 252 return;
252 } 253 }
253 254
254 #if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
255 #warning Using GCC 4.6 is not recommended
256 // Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built
257 // with any gcc4.6, use the C code.
258 extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p,
259 const unsigned char *blimit,
260 const unsigned char *limit,
261 const unsigned char *thresh,
262 int count);
263
264 void vp8_loop_filter_vertical_edge_y_neon(
265 unsigned char *src,
266 int pitch,
267 unsigned char blimit,
268 unsigned char limit,
269 unsigned char thresh) {
270 vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2);
271 }
272
273 void vp8_loop_filter_vertical_edge_uv_neon(
274 unsigned char *u,
275 int pitch,
276 unsigned char blimit,
277 unsigned char limit,
278 unsigned char thresh,
279 unsigned char *v) {
280 vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1);
281 vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1);
282 }
283 #else
284 static INLINE void write_4x8(unsigned char *dst, int pitch, 255 static INLINE void write_4x8(unsigned char *dst, int pitch,
285 const uint8x8x4_t result) { 256 const uint8x8x4_t result) {
257 #ifdef VPX_INCOMPATIBLE_GCC
258 /*
259 * uint8x8x4_t result
260 00 01 02 03 | 04 05 06 07
261 10 11 12 13 | 14 15 16 17
262 20 21 22 23 | 24 25 26 27
263 30 31 32 33 | 34 35 36 37
264 ---
265 * after vtrn_u16
266 00 01 20 21 | 04 05 24 25
267 02 03 22 23 | 06 07 26 27
268 10 11 30 31 | 14 15 34 35
269 12 13 32 33 | 16 17 36 37
270 ---
271 * after vtrn_u8
272 00 10 20 30 | 04 14 24 34
273 01 11 21 31 | 05 15 25 35
274 02 12 22 32 | 06 16 26 36
275 03 13 23 33 | 07 17 27 37
276 */
277 const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
278 vreinterpret_u16_u8(result.val[2]));
279 const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
280 vreinterpret_u16_u8(result.val[3]));
281 const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
282 vreinterpret_u8_u16(r13_u16.val[0]));
283 const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
284 vreinterpret_u8_u16(r13_u16.val[1]));
285 const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
286 const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
287 const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
288 const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
289 vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
290 dst += pitch;
291 vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
292 dst += pitch;
293 vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
294 dst += pitch;
295 vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
296 dst += pitch;
297 vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
298 dst += pitch;
299 vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
300 dst += pitch;
301 vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
302 dst += pitch;
303 vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
304 #else
286 vst4_lane_u8(dst, result, 0); 305 vst4_lane_u8(dst, result, 0);
287 dst += pitch; 306 dst += pitch;
288 vst4_lane_u8(dst, result, 1); 307 vst4_lane_u8(dst, result, 1);
289 dst += pitch; 308 dst += pitch;
290 vst4_lane_u8(dst, result, 2); 309 vst4_lane_u8(dst, result, 2);
291 dst += pitch; 310 dst += pitch;
292 vst4_lane_u8(dst, result, 3); 311 vst4_lane_u8(dst, result, 3);
293 dst += pitch; 312 dst += pitch;
294 vst4_lane_u8(dst, result, 4); 313 vst4_lane_u8(dst, result, 4);
295 dst += pitch; 314 dst += pitch;
296 vst4_lane_u8(dst, result, 5); 315 vst4_lane_u8(dst, result, 5);
297 dst += pitch; 316 dst += pitch;
298 vst4_lane_u8(dst, result, 6); 317 vst4_lane_u8(dst, result, 6);
299 dst += pitch; 318 dst += pitch;
300 vst4_lane_u8(dst, result, 7); 319 vst4_lane_u8(dst, result, 7);
320 #endif // VPX_INCOMPATIBLE_GCC
301 } 321 }
302 322
303 void vp8_loop_filter_vertical_edge_y_neon( 323 void vp8_loop_filter_vertical_edge_y_neon(
304 unsigned char *src, 324 unsigned char *src,
305 int pitch, 325 int pitch,
306 unsigned char blimit, 326 unsigned char blimit,
307 unsigned char limit, 327 unsigned char limit,
308 unsigned char thresh) { 328 unsigned char thresh) {
309 unsigned char *s, *d; 329 unsigned char *s, *d;
310 uint8x16_t qblimit, qlimit, qthresh, q3, q4; 330 uint8x16_t qblimit, qlimit, qthresh, q3, q4;
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
521 ud = u - 2; 541 ud = u - 2;
522 write_4x8(ud, pitch, q4ResultL); 542 write_4x8(ud, pitch, q4ResultL);
523 543
524 q4ResultH.val[0] = vget_high_u8(q5); // d11 544 q4ResultH.val[0] = vget_high_u8(q5); // d11
525 q4ResultH.val[1] = vget_high_u8(q6); // d13 545 q4ResultH.val[1] = vget_high_u8(q6); // d13
526 q4ResultH.val[2] = vget_high_u8(q7); // d15 546 q4ResultH.val[2] = vget_high_u8(q7); // d15
527 q4ResultH.val[3] = vget_high_u8(q8); // d17 547 q4ResultH.val[3] = vget_high_u8(q8); // d17
528 vd = v - 2; 548 vd = v - 2;
529 write_4x8(vd, pitch, q4ResultH); 549 write_4x8(vd, pitch, q4ResultH);
530 } 550 }
531 #endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
OLDNEW
« no previous file with comments | « source/libvpx/tools_common.c ('k') | source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698