Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: source/libvpx/vp8/common/arm/neon/loopfilter_neon.c

Issue 597033002: Make sure gcc4.6 is not used (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 233 matching lines...) Expand 10 before | Expand all | Expand 10 after
244 vst1_u8(v, vget_high_u8(q5)); 244 vst1_u8(v, vget_high_u8(q5));
245 v += pitch; 245 v += pitch;
246 vst1_u8(v, vget_high_u8(q6)); 246 vst1_u8(v, vget_high_u8(q6));
247 v += pitch; 247 v += pitch;
248 vst1_u8(v, vget_high_u8(q7)); 248 vst1_u8(v, vget_high_u8(q7));
249 v += pitch; 249 v += pitch;
250 vst1_u8(v, vget_high_u8(q8)); 250 vst1_u8(v, vget_high_u8(q8));
251 return; 251 return;
252 } 252 }
253 253
254 #if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
255 #warning Using GCC 4.6 is not recommended
256 // Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built
257 // with any gcc4.6, use the C code.
258 extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p,
259 const unsigned char *blimit,
260 const unsigned char *limit,
261 const unsigned char *thresh,
262 int count);
263
264 void vp8_loop_filter_vertical_edge_y_neon(
265 unsigned char *src,
266 int pitch,
267 unsigned char blimit,
268 unsigned char limit,
269 unsigned char thresh) {
270 vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2);
271 }
272
273 void vp8_loop_filter_vertical_edge_uv_neon(
274 unsigned char *u,
275 int pitch,
276 unsigned char blimit,
277 unsigned char limit,
278 unsigned char thresh,
279 unsigned char *v) {
280 vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1);
281 vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1);
282 }
283 #else
254 static INLINE void write_4x8(unsigned char *dst, int pitch, 284 static INLINE void write_4x8(unsigned char *dst, int pitch,
255 const uint8x8x4_t result) { 285 const uint8x8x4_t result) {
256 #if (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
257 vst4_lane_u8(dst, result, 0); 286 vst4_lane_u8(dst, result, 0);
258 dst += pitch; 287 dst += pitch;
259 vst4_lane_u8(dst, result, 1); 288 vst4_lane_u8(dst, result, 1);
260 dst += pitch; 289 dst += pitch;
261 vst4_lane_u8(dst, result, 2); 290 vst4_lane_u8(dst, result, 2);
262 dst += pitch; 291 dst += pitch;
263 vst4_lane_u8(dst, result, 3); 292 vst4_lane_u8(dst, result, 3);
264 dst += pitch; 293 dst += pitch;
265 vst4_lane_u8(dst, result, 4); 294 vst4_lane_u8(dst, result, 4);
266 dst += pitch; 295 dst += pitch;
267 vst4_lane_u8(dst, result, 5); 296 vst4_lane_u8(dst, result, 5);
268 dst += pitch; 297 dst += pitch;
269 vst4_lane_u8(dst, result, 6); 298 vst4_lane_u8(dst, result, 6);
270 dst += pitch; 299 dst += pitch;
271 vst4_lane_u8(dst, result, 7); 300 vst4_lane_u8(dst, result, 7);
272 #else
273 /*
274 * uint8x8x4_t result
275 00 01 02 03 | 04 05 06 07
276 10 11 12 13 | 14 15 16 17
277 20 21 22 23 | 24 25 26 27
278 30 31 32 33 | 34 35 36 37
279 ---
280 * after vtrn_u16
281 00 01 20 21 | 04 05 24 25
282 02 03 22 23 | 06 07 26 27
283 10 11 30 31 | 14 15 34 35
284 12 13 32 33 | 16 17 36 37
285 ---
286 * after vtrn_u8
287 00 10 20 30 | 04 14 24 34
288 01 11 21 31 | 05 15 25 35
289 02 12 22 32 | 06 16 26 36
290 03 13 23 33 | 07 17 27 37
291 */
292 const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
293 vreinterpret_u16_u8(result.val[2]));
294 const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
295 vreinterpret_u16_u8(result.val[3]));
296 const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
297 vreinterpret_u8_u16(r13_u16.val[0]));
298 const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
299 vreinterpret_u8_u16(r13_u16.val[1]));
300 const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
301 const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
302 const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
303 const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
304 vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
305 dst += pitch;
306 vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
307 dst += pitch;
308 vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
309 dst += pitch;
310 vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
311 dst += pitch;
312 vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
313 dst += pitch;
314 vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
315 dst += pitch;
316 vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
317 dst += pitch;
318 vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
319 #endif
320 } 301 }
321 302
322 void vp8_loop_filter_vertical_edge_y_neon( 303 void vp8_loop_filter_vertical_edge_y_neon(
323 unsigned char *src, 304 unsigned char *src,
324 int pitch, 305 int pitch,
325 unsigned char blimit, 306 unsigned char blimit,
326 unsigned char limit, 307 unsigned char limit,
327 unsigned char thresh) { 308 unsigned char thresh) {
328 unsigned char *s, *d; 309 unsigned char *s, *d;
329 uint8x16_t qblimit, qlimit, qthresh, q3, q4; 310 uint8x16_t qblimit, qlimit, qthresh, q3, q4;
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
540 ud = u - 2; 521 ud = u - 2;
541 write_4x8(ud, pitch, q4ResultL); 522 write_4x8(ud, pitch, q4ResultL);
542 523
543 q4ResultH.val[0] = vget_high_u8(q5); // d11 524 q4ResultH.val[0] = vget_high_u8(q5); // d11
544 q4ResultH.val[1] = vget_high_u8(q6); // d13 525 q4ResultH.val[1] = vget_high_u8(q6); // d13
545 q4ResultH.val[2] = vget_high_u8(q7); // d15 526 q4ResultH.val[2] = vget_high_u8(q7); // d15
546 q4ResultH.val[3] = vget_high_u8(q8); // d17 527 q4ResultH.val[3] = vget_high_u8(q8); // d17
547 vd = v - 2; 528 vd = v - 2;
548 write_4x8(vd, pitch, q4ResultH); 529 write_4x8(vd, pitch, q4ResultH);
549 } 530 }
531 #endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
OLDNEW
« no previous file with comments | « no previous file | source/libvpx/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698