source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm - Issue 592203002: libvpx: Pull from upstream

Unified Diff: source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm

Issue 592203002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm

===================================================================

--- source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm (revision 292072)

+++ source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm (working copy)

@@ -1,123 +0,0 @@

-; Use of this source code is governed by a BSD-style license

-; that can be found in the LICENSE file in the root of the source

-; tree. An additional intellectual property rights grant can be found

-; in the file PATENTS. All contributing project authors may

-; be found in the AUTHORS file in the root of the source tree.

- EXPORT |vp8_mse16x16_neon|

- EXPORT |vp8_get4x4sse_cs_neon|

- ARM

- REQUIRE8

- PRESERVE8

- AREA ||.text||, CODE, READONLY, ALIGN=2

-;============================

-; r0 unsigned char *src_ptr

-; r1 int source_stride

-; r2 unsigned char *ref_ptr

-; r3 int recon_stride

-; stack unsigned int *sse

-;note: in this function, sum is never used. So, we can remove this part of calculation

-;from vp8_variance().

-|vp8_mse16x16_neon| PROC

- vpush {q7}

- vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse

- vmov.i8 q8, #0

- vmov.i8 q9, #0

- vmov.i8 q10, #0

- mov r12, #8

-mse16x16_neon_loop

- vld1.8 {q0}, [r0], r1 ;Load up source and reference

- vld1.8 {q2}, [r2], r3

- vld1.8 {q1}, [r0], r1

- vld1.8 {q3}, [r2], r3

- vsubl.u8 q11, d0, d4

- vsubl.u8 q12, d1, d5

- vsubl.u8 q13, d2, d6

- vsubl.u8 q14, d3, d7

- vmlal.s16 q7, d22, d22

- vmlal.s16 q8, d23, d23

- subs r12, r12, #1

- vmlal.s16 q9, d24, d24

- vmlal.s16 q10, d25, d25

- vmlal.s16 q7, d26, d26

- vmlal.s16 q8, d27, d27

- vmlal.s16 q9, d28, d28

- vmlal.s16 q10, d29, d29

- bne mse16x16_neon_loop

- vadd.u32 q7, q7, q8

- vadd.u32 q9, q9, q10

- ldr r12, [sp, #16] ;load *sse from stack

- vadd.u32 q10, q7, q9

- vpaddl.u32 q1, q10

- vadd.u64 d0, d2, d3

- vst1.32 {d0[0]}, [r12]

- vmov.32 r0, d0[0]

- vpop {q7}

- bx lr

- ENDP

-;=============================

-; r0 unsigned char *src_ptr,

-; r1 int source_stride,

-; r2 unsigned char *ref_ptr,

-; r3 int recon_stride

-|vp8_get4x4sse_cs_neon| PROC

- vpush {q7}

- vld1.8 {d0}, [r0], r1 ;Load up source and reference

- vld1.8 {d4}, [r2], r3

- vld1.8 {d1}, [r0], r1

- vld1.8 {d5}, [r2], r3

- vld1.8 {d2}, [r0], r1

- vld1.8 {d6}, [r2], r3

- vld1.8 {d3}, [r0], r1

- vld1.8 {d7}, [r2], r3

- vsubl.u8 q11, d0, d4

- vsubl.u8 q12, d1, d5

- vsubl.u8 q13, d2, d6

- vsubl.u8 q14, d3, d7

- vmull.s16 q7, d22, d22

- vmull.s16 q8, d24, d24

- vmull.s16 q9, d26, d26

- vmull.s16 q10, d28, d28

- vadd.u32 q7, q7, q8

- vadd.u32 q9, q9, q10

- vadd.u32 q9, q7, q9

- vpaddl.u32 q1, q9

- vadd.u64 d0, d2, d3

- vmov.32 r0, d0[0]

- vpop {q7}

- bx lr

- ENDP

- END

« no previous file with comments | « source/libvpx/vp8/common/rtcd_defs.pl ('k') | source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c » ('j') | no next file with comments »