Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Unified Diff: source/libvpx/vp8/common/arm/neon/loopfilter_neon.c

Issue 668403002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/libvpx/tools_common.c ('k') | source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/libvpx/vp8/common/arm/neon/loopfilter_neon.c
===================================================================
--- source/libvpx/vp8/common/arm/neon/loopfilter_neon.c (revision 292608)
+++ source/libvpx/vp8/common/arm/neon/loopfilter_neon.c (working copy)
@@ -10,6 +10,7 @@
#include <arm_neon.h>
#include "./vpx_config.h"
+#include "vpx_ports/arm.h"
static INLINE void vp8_loop_filter_neon(
uint8x16_t qblimit, // flimit
@@ -251,38 +252,56 @@
return;
}
-#if (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
-#warning Using GCC 4.6 is not recommended
-// Some versions of gcc4.6 do not correctly process vst4_lane_u8. When built
-// with any gcc4.6, use the C code.
-extern void vp8_loop_filter_vertical_edge_c(unsigned char *s, int p,
- const unsigned char *blimit,
- const unsigned char *limit,
- const unsigned char *thresh,
- int count);
-
-void vp8_loop_filter_vertical_edge_y_neon(
- unsigned char *src,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh) {
- vp8_loop_filter_vertical_edge_c(src, pitch, &blimit, &limit, &thresh, 2);
-}
-
-void vp8_loop_filter_vertical_edge_uv_neon(
- unsigned char *u,
- int pitch,
- unsigned char blimit,
- unsigned char limit,
- unsigned char thresh,
- unsigned char *v) {
- vp8_loop_filter_vertical_edge_c(u, pitch, &blimit, &limit, &thresh, 1);
- vp8_loop_filter_vertical_edge_c(v, pitch, &blimit, &limit, &thresh, 1);
-}
-#else
static INLINE void write_4x8(unsigned char *dst, int pitch,
const uint8x8x4_t result) {
+#ifdef VPX_INCOMPATIBLE_GCC
+ /*
+ * uint8x8x4_t result
+ 00 01 02 03 | 04 05 06 07
+ 10 11 12 13 | 14 15 16 17
+ 20 21 22 23 | 24 25 26 27
+ 30 31 32 33 | 34 35 36 37
+ ---
+ * after vtrn_u16
+ 00 01 20 21 | 04 05 24 25
+ 02 03 22 23 | 06 07 26 27
+ 10 11 30 31 | 14 15 34 35
+ 12 13 32 33 | 16 17 36 37
+ ---
+ * after vtrn_u8
+ 00 10 20 30 | 04 14 24 34
+ 01 11 21 31 | 05 15 25 35
+ 02 12 22 32 | 06 16 26 36
+ 03 13 23 33 | 07 17 27 37
+ */
+ const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]),
+ vreinterpret_u16_u8(result.val[2]));
+ const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]),
+ vreinterpret_u16_u8(result.val[3]));
+ const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]),
+ vreinterpret_u8_u16(r13_u16.val[0]));
+ const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]),
+ vreinterpret_u8_u16(r13_u16.val[1]));
+ const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]);
+ const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]);
+ const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]);
+ const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]);
+ vst1_lane_u32((uint32_t *)dst, x_0_4, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_1_5, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_2_6, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_3_7, 0);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_0_4, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_1_5, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_2_6, 1);
+ dst += pitch;
+ vst1_lane_u32((uint32_t *)dst, x_3_7, 1);
+#else
vst4_lane_u8(dst, result, 0);
dst += pitch;
vst4_lane_u8(dst, result, 1);
@@ -298,6 +317,7 @@
vst4_lane_u8(dst, result, 6);
dst += pitch;
vst4_lane_u8(dst, result, 7);
+#endif // VPX_INCOMPATIBLE_GCC
}
void vp8_loop_filter_vertical_edge_y_neon(
@@ -528,4 +548,3 @@
vd = v - 2;
write_4x8(vd, pitch, q4ResultH);
}
-#endif // (__GNUC__ == 4 && (__GNUC_MINOR__ == 6))
« no previous file with comments | « source/libvpx/tools_common.c ('k') | source/libvpx/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698