source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c - Issue 478033002: libvpx: Pull from upstream

Side by Side Diff: source/libvpx/vp9/encoder/arm/neon/vp9_sad_neon.c

Issue 478033002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.	2 * Copyright (c) 2014 The WebM project authors. All Rights Reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include <arm_neon.h>	11 #include <arm_neon.h>

12 #include "./vp9_rtcd.h"	12 #include "./vp9_rtcd.h"

13 #include "./vpx_config.h"	13 #include "./vpx_config.h"

14	14

15 #include "vpx/vpx_integer.h"	15 #include "vpx/vpx_integer.h"

16	16

17 static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,	17 static INLINE unsigned int horizontal_long_add_16x8(const uint16x8_t vec_lo,

18 const uint16x8_t vec_hi) {	18 const uint16x8_t vec_hi) {

19 const uint32x4_t vec_l_lo = vaddl_u16(vget_low_u16(vec_lo),	19 const uint32x4_t vec_l_lo = vaddl_u16(vget_low_u16(vec_lo),

20 vget_high_u16(vec_lo));	20 vget_high_u16(vec_lo));

21 const uint32x4_t vec_l_hi = vaddl_u16(vget_low_u16(vec_hi),	21 const uint32x4_t vec_l_hi = vaddl_u16(vget_low_u16(vec_hi),

22 vget_high_u16(vec_hi));	22 vget_high_u16(vec_hi));

23 const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi);	23 const uint32x4_t a = vaddq_u32(vec_l_lo, vec_l_hi);

24 const uint64x2_t b = vpaddlq_u32(a);	24 const uint64x2_t b = vpaddlq_u32(a);

25 const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),	25 const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),

26 vreinterpret_u32_u64(vget_high_u64(b)));	26 vreinterpret_u32_u64(vget_high_u64(b)));

27 return vget_lane_u32(c, 0);	27 return vget_lane_u32(c, 0);

28 }	28 }

29 static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_lo,	29 static INLINE unsigned int horizontal_add_16x8(const uint16x8_t vec_16x8) {

30 const uint16x8_t vec_hi) {	30 const uint32x4_t a = vpaddlq_u16(vec_16x8);

31 const uint32x4_t a = vpaddlq_u16(vaddq_u16(vec_lo, vec_hi));

32 const uint64x2_t b = vpaddlq_u32(a);	31 const uint64x2_t b = vpaddlq_u32(a);

33 const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),	32 const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),

34 vreinterpret_u32_u64(vget_high_u64(b)));	33 vreinterpret_u32_u64(vget_high_u64(b)));

35 return vget_lane_u32(c, 0);	34 return vget_lane_u32(c, 0);

36 }	35 }

37	36

38 unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride,	37 unsigned int vp9_sad64x64_neon(const uint8_t *src, int src_stride,

39 const uint8_t *ref, int ref_stride) {	38 const uint8_t *ref, int ref_stride) {

40 int i;	39 int i;

41 uint16x8_t vec_accum_lo = vdupq_n_u16(0);	40 uint16x8_t vec_accum_lo = vdupq_n_u16(0);

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
86 ref += ref_stride;	85 ref += ref_stride;

87 vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00),	86 vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_00),

88 vget_low_u8(vec_ref_00));	87 vget_low_u8(vec_ref_00));

89 vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00),	88 vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_00),

90 vget_high_u8(vec_ref_00));	89 vget_high_u8(vec_ref_00));

91 vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16),	90 vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src_16),

92 vget_low_u8(vec_ref_16));	91 vget_low_u8(vec_ref_16));

93 vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16),	92 vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src_16),

94 vget_high_u8(vec_ref_16));	93 vget_high_u8(vec_ref_16));

95 }	94 }

96 return horizontal_add_16x8(vec_accum_lo, vec_accum_hi);	95 return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));

97 }	96 }

98	97

99 unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride,	98 unsigned int vp9_sad16x16_neon(const uint8_t *src, int src_stride,

100 const uint8_t *ref, int ref_stride) {	99 const uint8_t *ref, int ref_stride) {

101 int i;	100 int i;

102 uint16x8_t vec_accum_lo = vdupq_n_u16(0);	101 uint16x8_t vec_accum_lo = vdupq_n_u16(0);

103 uint16x8_t vec_accum_hi = vdupq_n_u16(0);	102 uint16x8_t vec_accum_hi = vdupq_n_u16(0);

104	103

105 for (i = 0; i < 16; ++i) {	104 for (i = 0; i < 16; ++i) {

106 const uint8x16_t vec_src = vld1q_u8(src);	105 const uint8x16_t vec_src = vld1q_u8(src);

107 const uint8x16_t vec_ref = vld1q_u8(ref);	106 const uint8x16_t vec_ref = vld1q_u8(ref);

108 src += src_stride;	107 src += src_stride;

109 ref += ref_stride;	108 ref += ref_stride;

110 vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src),	109 vec_accum_lo = vabal_u8(vec_accum_lo, vget_low_u8(vec_src),

111 vget_low_u8(vec_ref));	110 vget_low_u8(vec_ref));

112 vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src),	111 vec_accum_hi = vabal_u8(vec_accum_hi, vget_high_u8(vec_src),

113 vget_high_u8(vec_ref));	112 vget_high_u8(vec_ref));

114 }	113 }

115 return horizontal_add_16x8(vec_accum_lo, vec_accum_hi);	114 return horizontal_add_16x8(vaddq_u16(vec_accum_lo, vec_accum_hi));

116 }	115 }

	116

	117 unsigned int vp9_sad8x8_neon(const uint8_t *src, int src_stride,

	118 const uint8_t *ref, int ref_stride) {

	119 int i;

	120 uint16x8_t vec_accum = vdupq_n_u16(0);

	121

	122 for (i = 0; i < 8; ++i) {

	123 const uint8x8_t vec_src = vld1_u8(src);

	124 const uint8x8_t vec_ref = vld1_u8(ref);

	125 src += src_stride;

	126 ref += ref_stride;

	127 vec_accum = vabal_u8(vec_accum, vec_src, vec_ref);

	128 }

	129 return horizontal_add_16x8(vec_accum);

	130 }

OLD	NEW

« no previous file with comments | « source/libvpx/vp9/encoder/arm/neon/vp9_quantize_neon.c ('k') | source/libvpx/vp9/encoder/arm/neon/vp9_subtract_neon.c » ('j') | no next file with comments »