Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(110)

Side by Side Diff: source/libvpx/vp8/encoder/arm/neon/subtract_neon.asm

Issue 484923003: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11 EXPORT |vp8_subtract_b_neon|
12 EXPORT |vp8_subtract_mby_neon|
13 EXPORT |vp8_subtract_mbuv_neon|
14
15 INCLUDE vp8_asm_enc_offsets.asm
16
17 ARM
18 REQUIRE8
19 PRESERVE8
20
21 AREA ||.text||, CODE, READONLY, ALIGN=2
22
23 ;void vp8_subtract_b_neon(BLOCK *be, BLOCKD *bd, int pitch)
24 |vp8_subtract_b_neon| PROC
25
26 stmfd sp!, {r4-r7}
27
28 ldr r3, [r0, #vp8_block_base_src]
29 ldr r4, [r0, #vp8_block_src]
30 ldr r5, [r0, #vp8_block_src_diff]
31 ldr r3, [r3]
32 ldr r6, [r0, #vp8_block_src_stride]
33 add r3, r3, r4 ; src = *base_src + src
34 ldr r7, [r1, #vp8_blockd_predictor]
35
36 vld1.8 {d0}, [r3], r6 ;load src
37 vld1.8 {d1}, [r7], r2 ;load pred
38 vld1.8 {d2}, [r3], r6
39 vld1.8 {d3}, [r7], r2
40 vld1.8 {d4}, [r3], r6
41 vld1.8 {d5}, [r7], r2
42 vld1.8 {d6}, [r3], r6
43 vld1.8 {d7}, [r7], r2
44
45 vsubl.u8 q10, d0, d1
46 vsubl.u8 q11, d2, d3
47 vsubl.u8 q12, d4, d5
48 vsubl.u8 q13, d6, d7
49
50 mov r2, r2, lsl #1
51
52 vst1.16 {d20}, [r5], r2 ;store diff
53 vst1.16 {d22}, [r5], r2
54 vst1.16 {d24}, [r5], r2
55 vst1.16 {d26}, [r5], r2
56
57 ldmfd sp!, {r4-r7}
58 bx lr
59
60 ENDP
61
62
63 ;==========================================
64 ;void vp8_subtract_mby_neon(short *diff, unsigned char *src, int src_stride
65 ; unsigned char *pred, int pred_stride)
66 |vp8_subtract_mby_neon| PROC
67 push {r4-r7}
68 vpush {d8-d15}
69
70 mov r12, #4
71 ldr r4, [sp, #80] ; pred_stride
72 mov r6, #32 ; "diff" stride x2
73 add r5, r0, #16 ; second diff pointer
74
75 subtract_mby_loop
76 vld1.8 {q0}, [r1], r2 ;load src
77 vld1.8 {q1}, [r3], r4 ;load pred
78 vld1.8 {q2}, [r1], r2
79 vld1.8 {q3}, [r3], r4
80 vld1.8 {q4}, [r1], r2
81 vld1.8 {q5}, [r3], r4
82 vld1.8 {q6}, [r1], r2
83 vld1.8 {q7}, [r3], r4
84
85 vsubl.u8 q8, d0, d2
86 vsubl.u8 q9, d1, d3
87 vsubl.u8 q10, d4, d6
88 vsubl.u8 q11, d5, d7
89 vsubl.u8 q12, d8, d10
90 vsubl.u8 q13, d9, d11
91 vsubl.u8 q14, d12, d14
92 vsubl.u8 q15, d13, d15
93
94 vst1.16 {q8}, [r0], r6 ;store diff
95 vst1.16 {q9}, [r5], r6
96 vst1.16 {q10}, [r0], r6
97 vst1.16 {q11}, [r5], r6
98 vst1.16 {q12}, [r0], r6
99 vst1.16 {q13}, [r5], r6
100 vst1.16 {q14}, [r0], r6
101 vst1.16 {q15}, [r5], r6
102
103 subs r12, r12, #1
104 bne subtract_mby_loop
105
106 vpop {d8-d15}
107 pop {r4-r7}
108 bx lr
109 ENDP
110
111 ;=================================
112 ;void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc,
113 ; int src_stride, unsigned char *upred,
114 ; unsigned char *vpred, int pred_stride)
115
116 |vp8_subtract_mbuv_neon| PROC
117 push {r4-r7}
118 vpush {d8-d15}
119
120 ldr r4, [sp, #80] ; upred
121 ldr r5, [sp, #84] ; vpred
122 ldr r6, [sp, #88] ; pred_stride
123 add r0, r0, #512 ; short *udiff = diff + 256;
124 mov r12, #32 ; "diff" stride x2
125 add r7, r0, #16 ; second diff pointer
126
127 ;u
128 vld1.8 {d0}, [r1], r3 ;load usrc
129 vld1.8 {d1}, [r4], r6 ;load upred
130 vld1.8 {d2}, [r1], r3
131 vld1.8 {d3}, [r4], r6
132 vld1.8 {d4}, [r1], r3
133 vld1.8 {d5}, [r4], r6
134 vld1.8 {d6}, [r1], r3
135 vld1.8 {d7}, [r4], r6
136 vld1.8 {d8}, [r1], r3
137 vld1.8 {d9}, [r4], r6
138 vld1.8 {d10}, [r1], r3
139 vld1.8 {d11}, [r4], r6
140 vld1.8 {d12}, [r1], r3
141 vld1.8 {d13}, [r4], r6
142 vld1.8 {d14}, [r1], r3
143 vld1.8 {d15}, [r4], r6
144
145 vsubl.u8 q8, d0, d1
146 vsubl.u8 q9, d2, d3
147 vsubl.u8 q10, d4, d5
148 vsubl.u8 q11, d6, d7
149 vsubl.u8 q12, d8, d9
150 vsubl.u8 q13, d10, d11
151 vsubl.u8 q14, d12, d13
152 vsubl.u8 q15, d14, d15
153
154 vst1.16 {q8}, [r0], r12 ;store diff
155 vst1.16 {q9}, [r7], r12
156 vst1.16 {q10}, [r0], r12
157 vst1.16 {q11}, [r7], r12
158 vst1.16 {q12}, [r0], r12
159 vst1.16 {q13}, [r7], r12
160 vst1.16 {q14}, [r0], r12
161 vst1.16 {q15}, [r7], r12
162
163 ;v
164 vld1.8 {d0}, [r2], r3 ;load vsrc
165 vld1.8 {d1}, [r5], r6 ;load vpred
166 vld1.8 {d2}, [r2], r3
167 vld1.8 {d3}, [r5], r6
168 vld1.8 {d4}, [r2], r3
169 vld1.8 {d5}, [r5], r6
170 vld1.8 {d6}, [r2], r3
171 vld1.8 {d7}, [r5], r6
172 vld1.8 {d8}, [r2], r3
173 vld1.8 {d9}, [r5], r6
174 vld1.8 {d10}, [r2], r3
175 vld1.8 {d11}, [r5], r6
176 vld1.8 {d12}, [r2], r3
177 vld1.8 {d13}, [r5], r6
178 vld1.8 {d14}, [r2], r3
179 vld1.8 {d15}, [r5], r6
180
181 vsubl.u8 q8, d0, d1
182 vsubl.u8 q9, d2, d3
183 vsubl.u8 q10, d4, d5
184 vsubl.u8 q11, d6, d7
185 vsubl.u8 q12, d8, d9
186 vsubl.u8 q13, d10, d11
187 vsubl.u8 q14, d12, d13
188 vsubl.u8 q15, d14, d15
189
190 vst1.16 {q8}, [r0], r12 ;store diff
191 vst1.16 {q9}, [r7], r12
192 vst1.16 {q10}, [r0], r12
193 vst1.16 {q11}, [r7], r12
194 vst1.16 {q12}, [r0], r12
195 vst1.16 {q13}, [r7], r12
196 vst1.16 {q14}, [r0], r12
197 vst1.16 {q15}, [r7], r12
198
199 vpop {d8-d15}
200 pop {r4-r7}
201 bx lr
202
203 ENDP
204
205 END
OLDNEW
« no previous file with comments | « source/libvpx/vp8/common/rtcd_defs.pl ('k') | source/libvpx/vp8/encoder/arm/neon/subtract_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698