Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(11)

Unified Diff: source/libvpx/vpx_dsp/arm/sad_neon.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: only update to last nights LKGR Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/libvpx/vpx_dsp/arm/sad_media.asm ('k') | source/libvpx/vpx_dsp/sad.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/libvpx/vpx_dsp/arm/sad_neon.c
diff --git a/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c b/source/libvpx/vpx_dsp/arm/sad_neon.c
similarity index 65%
rename from source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c
rename to source/libvpx/vpx_dsp/arm/sad_neon.c
index c4cd856804da7ec7e3f42f8df2b562f27bc62cbf..173f08ac3c3e202764a8dd01a43a9b8877d08289 100644
--- a/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c
+++ b/source/libvpx/vpx_dsp/arm/sad_neon.c
@@ -9,11 +9,113 @@
*/
#include <arm_neon.h>
-#include "./vp9_rtcd.h"
+
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
+unsigned int vpx_sad8x16_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t d0, d8;
+ uint16x8_t q12;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(d0, d8);
+
+ for (i = 0; i < 15; i++) {
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, d0, d8);
+ }
+
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
+
+unsigned int vpx_sad4x4_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t d0, d8;
+ uint16x8_t q12;
+ uint32x2_t d1;
+ uint64x1_t d3;
+ int i;
+
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(d0, d8);
+
+ for (i = 0; i < 3; i++) {
+ d0 = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ d8 = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, d0, d8);
+ }
+
+ d1 = vpaddl_u16(vget_low_u16(q12));
+ d3 = vpaddl_u32(d1);
+
+ return vget_lane_u32(vreinterpret_u32_u64(d3), 0);
+}
+
+unsigned int vpx_sad16x8_neon(
+ unsigned char *src_ptr,
+ int src_stride,
+ unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x16_t q0, q4;
+ uint16x8_t q12, q13;
+ uint32x4_t q1;
+ uint64x2_t q3;
+ uint32x2_t d5;
+ int i;
+
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));
+
+ for (i = 0; i < 7; i++) {
+ q0 = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ q4 = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));
+ q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));
+ }
+
+ q12 = vaddq_u16(q12, q13);
+ q1 = vpaddlq_u16(q12);
+ q3 = vpaddlq_u32(q1);
+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),
+ vreinterpret_u32_u64(vget_high_u64(q3)));
+
+ return vget_lane_u32(d5, 0);
+}
+
static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,
const uint16x8_t vec_hi) {
const uint32x4_t vec_l_lo = vaddl_u16(vget_low_u16(vec_lo),
@@ -34,7 +136,7 @@ static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) {
return vget_lane_u32(c, 0);
}
-unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad64x64_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum_lo = vdupq_n_u16(0);
@@ -70,7 +172,7 @@ unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride,
return horizontal_long_add_16x8(vec_accum_lo, vec_accum_hi);
}
-unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad32x32_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum_lo = vdupq_n_u16(0);
@@ -95,7 +197,7 @@ unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride,
return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
}
-unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad16x16_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum_lo = vdupq_n_u16(0);
@@ -114,7 +216,7 @@ unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride,
return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));
}
-unsigned int vp9_sad8x8_neon(const uint8_t *src, int src_stride,
+unsigned int vpx_sad8x8_neon(const uint8_t *src, int src_stride,
const uint8_t *ref, int ref_stride) {
int i;
uint16x8_t vec_accum = vdupq_n_u16(0);
« no previous file with comments | « source/libvpx/vpx_dsp/arm/sad_media.asm ('k') | source/libvpx/vpx_dsp/sad.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698