source/libvpx/vpx_dsp/arm/sad_neon.c - Issue 1124333011: libvpx: Pull from upstream

Unified Diff: source/libvpx/vpx_dsp/arm/sad_neon.c

Issue 1124333011: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master

Patch Set: only update to last nights LKGR Created 5 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vpx_dsp/arm/sad_neon.c

diff --git a/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c b/source/libvpx/vpx_dsp/arm/sad_neon.c

similarity index 65%

rename from source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c

rename to source/libvpx/vpx_dsp/arm/sad_neon.c

index c4cd856804da7ec7e3f42f8df2b562f27bc62cbf..173f08ac3c3e202764a8dd01a43a9b8877d08289 100644

--- a/source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c

+++ b/source/libvpx/vpx_dsp/arm/sad_neon.c

@@ -9,11 +9,113 @@

#include <arm_neon.h>

-#include "./vp9_rtcd.h"

#include "./vpx_config.h"

#include "vpx/vpx_integer.h"

+unsigned int vpx_sad8x16_neon(

+ unsigned char *src_ptr,

+ int src_stride,

+ unsigned char *ref_ptr,

+ int ref_stride) {

+ uint8x8_t d0, d8;

+ uint16x8_t q12;

+ uint32x4_t q1;

+ uint64x2_t q3;

+ uint32x2_t d5;

+ int i;

+ d0 = vld1_u8(src_ptr);

+ src_ptr += src_stride;

+ d8 = vld1_u8(ref_ptr);

+ ref_ptr += ref_stride;

+ q12 = vabdl_u8(d0, d8);

+ for (i = 0; i < 15; i++) {

+ d0 = vld1_u8(src_ptr);

+ src_ptr += src_stride;

+ d8 = vld1_u8(ref_ptr);

+ ref_ptr += ref_stride;

+ q12 = vabal_u8(q12, d0, d8);

+ }

+ q1 = vpaddlq_u16(q12);

+ q3 = vpaddlq_u32(q1);

+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),

+ vreinterpret_u32_u64(vget_high_u64(q3)));

+ return vget_lane_u32(d5, 0);

+unsigned int vpx_sad4x4_neon(

+ unsigned char *src_ptr,

+ int src_stride,

+ unsigned char *ref_ptr,

+ int ref_stride) {

+ uint8x8_t d0, d8;

+ uint16x8_t q12;

+ uint32x2_t d1;

+ uint64x1_t d3;

+ int i;

+ d0 = vld1_u8(src_ptr);

+ src_ptr += src_stride;

+ d8 = vld1_u8(ref_ptr);

+ ref_ptr += ref_stride;

+ q12 = vabdl_u8(d0, d8);

+ for (i = 0; i < 3; i++) {

+ d0 = vld1_u8(src_ptr);

+ src_ptr += src_stride;

+ d8 = vld1_u8(ref_ptr);

+ ref_ptr += ref_stride;

+ q12 = vabal_u8(q12, d0, d8);

+ }

+ d1 = vpaddl_u16(vget_low_u16(q12));

+ d3 = vpaddl_u32(d1);

+ return vget_lane_u32(vreinterpret_u32_u64(d3), 0);

+unsigned int vpx_sad16x8_neon(

+ unsigned char *src_ptr,

+ int src_stride,

+ unsigned char *ref_ptr,

+ int ref_stride) {

+ uint8x16_t q0, q4;

+ uint16x8_t q12, q13;

+ uint32x4_t q1;

+ uint64x2_t q3;

+ uint32x2_t d5;

+ int i;

+ q0 = vld1q_u8(src_ptr);

+ src_ptr += src_stride;

+ q4 = vld1q_u8(ref_ptr);

+ ref_ptr += ref_stride;

+ q12 = vabdl_u8(vget_low_u8(q0), vget_low_u8(q4));

+ q13 = vabdl_u8(vget_high_u8(q0), vget_high_u8(q4));

+ for (i = 0; i < 7; i++) {

+ q0 = vld1q_u8(src_ptr);

+ src_ptr += src_stride;

+ q4 = vld1q_u8(ref_ptr);

+ ref_ptr += ref_stride;

+ q12 = vabal_u8(q12, vget_low_u8(q0), vget_low_u8(q4));

+ q13 = vabal_u8(q13, vget_high_u8(q0), vget_high_u8(q4));

+ }

+ q12 = vaddq_u16(q12, q13);

+ q1 = vpaddlq_u16(q12);

+ q3 = vpaddlq_u32(q1);

+ d5 = vadd_u32(vreinterpret_u32_u64(vget_low_u64(q3)),

+ vreinterpret_u32_u64(vget_high_u64(q3)));

+ return vget_lane_u32(d5, 0);

static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,

const uint16x8_t vec_hi) {

const uint32x4_t vec_l_lo = vaddl_u16(vget_low_u16(vec_lo),

@@ -34,7 +136,7 @@ static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) {

return vget_lane_u32(c, 0);

}

-unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride,

+unsigned int vpx_sad64x64_neon(const uint8_t *src, int src_stride,

const uint8_t *ref, int ref_stride) {

int i;

uint16x8_t vec_accum_lo = vdupq_n_u16(0);

@@ -70,7 +172,7 @@ unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride,

return horizontal_long_add_16x8(vec_accum_lo, vec_accum_hi);

}

-unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride,

+unsigned int vpx_sad32x32_neon(const uint8_t *src, int src_stride,

const uint8_t *ref, int ref_stride) {

int i;

uint16x8_t vec_accum_lo = vdupq_n_u16(0);

@@ -95,7 +197,7 @@ unsigned int vp9_sad32x32_neon(const uint8_t *src, int src_stride,

return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));

}

-unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride,

+unsigned int vpx_sad16x16_neon(const uint8_t *src, int src_stride,

const uint8_t *ref, int ref_stride) {

int i;

uint16x8_t vec_accum_lo = vdupq_n_u16(0);

@@ -114,7 +216,7 @@ unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride,

return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));

}

-unsigned int vp9_sad8x8_neon(const uint8_t *src, int src_stride,

+unsigned int vpx_sad8x8_neon(const uint8_t *src, int src_stride,

const uint8_t *ref, int ref_stride) {

int i;

uint16x8_t vec_accum = vdupq_n_u16(0);

« no previous file with comments | « source/libvpx/vpx_dsp/arm/sad_media.asm ('k') | source/libvpx/vpx_dsp/sad.c » ('j') | no next file with comments »