Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(231)

Side by Side Diff: source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm

Issue 592203002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vp8_mse16x16_neon|
13 EXPORT |vp8_get4x4sse_cs_neon|
14
15 ARM
16 REQUIRE8
17 PRESERVE8
18
19 AREA ||.text||, CODE, READONLY, ALIGN=2
20 ;============================
21 ; r0 unsigned char *src_ptr
22 ; r1 int source_stride
23 ; r2 unsigned char *ref_ptr
24 ; r3 int recon_stride
25 ; stack unsigned int *sse
26 ;note: in this function, sum is never used. So, we can remove this part of calcu lation
27 ;from vp8_variance().
28
29 |vp8_mse16x16_neon| PROC
30 vpush {q7}
31
32 vmov.i8 q7, #0 ;q7, q8, q9, q10 - sse
33 vmov.i8 q8, #0
34 vmov.i8 q9, #0
35 vmov.i8 q10, #0
36
37 mov r12, #8
38
39 mse16x16_neon_loop
40 vld1.8 {q0}, [r0], r1 ;Load up source and reference
41 vld1.8 {q2}, [r2], r3
42 vld1.8 {q1}, [r0], r1
43 vld1.8 {q3}, [r2], r3
44
45 vsubl.u8 q11, d0, d4
46 vsubl.u8 q12, d1, d5
47 vsubl.u8 q13, d2, d6
48 vsubl.u8 q14, d3, d7
49
50 vmlal.s16 q7, d22, d22
51 vmlal.s16 q8, d23, d23
52
53 subs r12, r12, #1
54
55 vmlal.s16 q9, d24, d24
56 vmlal.s16 q10, d25, d25
57 vmlal.s16 q7, d26, d26
58 vmlal.s16 q8, d27, d27
59 vmlal.s16 q9, d28, d28
60 vmlal.s16 q10, d29, d29
61
62 bne mse16x16_neon_loop
63
64 vadd.u32 q7, q7, q8
65 vadd.u32 q9, q9, q10
66
67 ldr r12, [sp, #16] ;load *sse from stack
68
69 vadd.u32 q10, q7, q9
70 vpaddl.u32 q1, q10
71 vadd.u64 d0, d2, d3
72
73 vst1.32 {d0[0]}, [r12]
74 vmov.32 r0, d0[0]
75
76 vpop {q7}
77 bx lr
78
79 ENDP
80
81
82 ;=============================
83 ; r0 unsigned char *src_ptr,
84 ; r1 int source_stride,
85 ; r2 unsigned char *ref_ptr,
86 ; r3 int recon_stride
87 |vp8_get4x4sse_cs_neon| PROC
88 vpush {q7}
89
90 vld1.8 {d0}, [r0], r1 ;Load up source and reference
91 vld1.8 {d4}, [r2], r3
92 vld1.8 {d1}, [r0], r1
93 vld1.8 {d5}, [r2], r3
94 vld1.8 {d2}, [r0], r1
95 vld1.8 {d6}, [r2], r3
96 vld1.8 {d3}, [r0], r1
97 vld1.8 {d7}, [r2], r3
98
99 vsubl.u8 q11, d0, d4
100 vsubl.u8 q12, d1, d5
101 vsubl.u8 q13, d2, d6
102 vsubl.u8 q14, d3, d7
103
104 vmull.s16 q7, d22, d22
105 vmull.s16 q8, d24, d24
106 vmull.s16 q9, d26, d26
107 vmull.s16 q10, d28, d28
108
109 vadd.u32 q7, q7, q8
110 vadd.u32 q9, q9, q10
111 vadd.u32 q9, q7, q9
112
113 vpaddl.u32 q1, q9
114 vadd.u64 d0, d2, d3
115
116 vmov.32 r0, d0[0]
117
118 vpop {q7}
119 bx lr
120
121 ENDP
122
123 END
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/rtcd_defs.pl ('k') | source/libvpx/vp8/encoder/arm/neon/vp8_mse16x16_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698