Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: source/libvpx/vpx_dsp/arm/variance_media.asm

Issue 1162573005: libvpx: Pull from upstream (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/libvpx.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/libvpx/vpx/vpx_codec.mk ('k') | source/libvpx/vpx_dsp/arm/variance_neon.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 ;
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved.
3 ;
4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree.
9 ;
10
11
12 EXPORT |vpx_variance16x16_media|
13 EXPORT |vpx_variance8x8_media|
14 EXPORT |vpx_mse16x16_media|
15
16 ARM
17 REQUIRE8
18 PRESERVE8
19
20 AREA ||.text||, CODE, READONLY, ALIGN=2
21
22 ; r0 unsigned char *src_ptr
23 ; r1 int source_stride
24 ; r2 unsigned char *ref_ptr
25 ; r3 int recon_stride
26 ; stack unsigned int *sse
27 |vpx_variance16x16_media| PROC
28
29 stmfd sp!, {r4-r12, lr}
30
31 pld [r0, r1, lsl #0]
32 pld [r2, r3, lsl #0]
33
34 mov r8, #0 ; initialize sum = 0
35 mov r11, #0 ; initialize sse = 0
36 mov r12, #16 ; set loop counter to 16 (=block height)
37
38 loop16x16
39 ; 1st 4 pixels
40 ldr r4, [r0, #0] ; load 4 src pixels
41 ldr r5, [r2, #0] ; load 4 ref pixels
42
43 mov lr, #0 ; constant zero
44
45 usub8 r6, r4, r5 ; calculate difference
46 pld [r0, r1, lsl #1]
47 sel r7, r6, lr ; select bytes with positive difference
48 usub8 r9, r5, r4 ; calculate difference with reversed operands
49 pld [r2, r3, lsl #1]
50 sel r6, r9, lr ; select bytes with negative difference
51
52 ; calculate partial sums
53 usad8 r4, r7, lr ; calculate sum of positive differences
54 usad8 r5, r6, lr ; calculate sum of negative differences
55 orr r6, r6, r7 ; differences of all 4 pixels
56 ; calculate total sum
57 adds r8, r8, r4 ; add positive differences to sum
58 subs r8, r8, r5 ; subtract negative differences from sum
59
60 ; calculate sse
61 uxtb16 r5, r6 ; byte (two pixels) to halfwords
62 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
63 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
64
65 ; 2nd 4 pixels
66 ldr r4, [r0, #4] ; load 4 src pixels
67 ldr r5, [r2, #4] ; load 4 ref pixels
68 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
69
70 usub8 r6, r4, r5 ; calculate difference
71 sel r7, r6, lr ; select bytes with positive difference
72 usub8 r9, r5, r4 ; calculate difference with reversed operands
73 sel r6, r9, lr ; select bytes with negative difference
74
75 ; calculate partial sums
76 usad8 r4, r7, lr ; calculate sum of positive differences
77 usad8 r5, r6, lr ; calculate sum of negative differences
78 orr r6, r6, r7 ; differences of all 4 pixels
79
80 ; calculate total sum
81 add r8, r8, r4 ; add positive differences to sum
82 sub r8, r8, r5 ; subtract negative differences from sum
83
84 ; calculate sse
85 uxtb16 r5, r6 ; byte (two pixels) to halfwords
86 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
87 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
88
89 ; 3rd 4 pixels
90 ldr r4, [r0, #8] ; load 4 src pixels
91 ldr r5, [r2, #8] ; load 4 ref pixels
92 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
93
94 usub8 r6, r4, r5 ; calculate difference
95 sel r7, r6, lr ; select bytes with positive difference
96 usub8 r9, r5, r4 ; calculate difference with reversed operands
97 sel r6, r9, lr ; select bytes with negative difference
98
99 ; calculate partial sums
100 usad8 r4, r7, lr ; calculate sum of positive differences
101 usad8 r5, r6, lr ; calculate sum of negative differences
102 orr r6, r6, r7 ; differences of all 4 pixels
103
104 ; calculate total sum
105 add r8, r8, r4 ; add positive differences to sum
106 sub r8, r8, r5 ; subtract negative differences from sum
107
108 ; calculate sse
109 uxtb16 r5, r6 ; byte (two pixels) to halfwords
110 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
111 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
112
113 ; 4th 4 pixels
114 ldr r4, [r0, #12] ; load 4 src pixels
115 ldr r5, [r2, #12] ; load 4 ref pixels
116 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
117
118 usub8 r6, r4, r5 ; calculate difference
119 add r0, r0, r1 ; set src_ptr to next row
120 sel r7, r6, lr ; select bytes with positive difference
121 usub8 r9, r5, r4 ; calculate difference with reversed operands
122 add r2, r2, r3 ; set dst_ptr to next row
123 sel r6, r9, lr ; select bytes with negative difference
124
125 ; calculate partial sums
126 usad8 r4, r7, lr ; calculate sum of positive differences
127 usad8 r5, r6, lr ; calculate sum of negative differences
128 orr r6, r6, r7 ; differences of all 4 pixels
129
130 ; calculate total sum
131 add r8, r8, r4 ; add positive differences to sum
132 sub r8, r8, r5 ; subtract negative differences from sum
133
134 ; calculate sse
135 uxtb16 r5, r6 ; byte (two pixels) to halfwords
136 uxtb16 r10, r6, ror #8 ; another two pixels to halfwords
137 smlad r11, r5, r5, r11 ; dual signed multiply, add and accumulate (1)
138 smlad r11, r10, r10, r11 ; dual signed multiply, add and accumulate (2)
139
140
141 subs r12, r12, #1
142
143 bne loop16x16
144
145 ; return stuff
146 ldr r6, [sp, #40] ; get address of sse
147 mul r0, r8, r8 ; sum * sum
148 str r11, [r6] ; store sse
149 sub r0, r11, r0, lsr #8 ; return (sse - ((sum * sum) >> 8))
150
151 ldmfd sp!, {r4-r12, pc}
152
153 ENDP
154
155 ; r0 unsigned char *src_ptr
156 ; r1 int source_stride
157 ; r2 unsigned char *ref_ptr
158 ; r3 int recon_stride
159 ; stack unsigned int *sse
160 |vpx_variance8x8_media| PROC
161
162 push {r4-r10, lr}
163
164 pld [r0, r1, lsl #0]
165 pld [r2, r3, lsl #0]
166
167 mov r12, #8 ; set loop counter to 8 (=block height)
168 mov r4, #0 ; initialize sum = 0
169 mov r5, #0 ; initialize sse = 0
170
171 loop8x8
172 ; 1st 4 pixels
173 ldr r6, [r0, #0x0] ; load 4 src pixels
174 ldr r7, [r2, #0x0] ; load 4 ref pixels
175
176 mov lr, #0 ; constant zero
177
178 usub8 r8, r6, r7 ; calculate difference
179 pld [r0, r1, lsl #1]
180 sel r10, r8, lr ; select bytes with positive difference
181 usub8 r9, r7, r6 ; calculate difference with reversed operands
182 pld [r2, r3, lsl #1]
183 sel r8, r9, lr ; select bytes with negative difference
184
185 ; calculate partial sums
186 usad8 r6, r10, lr ; calculate sum of positive differences
187 usad8 r7, r8, lr ; calculate sum of negative differences
188 orr r8, r8, r10 ; differences of all 4 pixels
189 ; calculate total sum
190 add r4, r4, r6 ; add positive differences to sum
191 sub r4, r4, r7 ; subtract negative differences from sum
192
193 ; calculate sse
194 uxtb16 r7, r8 ; byte (two pixels) to halfwords
195 uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
196 smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
197
198 ; 2nd 4 pixels
199 ldr r6, [r0, #0x4] ; load 4 src pixels
200 ldr r7, [r2, #0x4] ; load 4 ref pixels
201 smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
202
203 usub8 r8, r6, r7 ; calculate difference
204 add r0, r0, r1 ; set src_ptr to next row
205 sel r10, r8, lr ; select bytes with positive difference
206 usub8 r9, r7, r6 ; calculate difference with reversed operands
207 add r2, r2, r3 ; set dst_ptr to next row
208 sel r8, r9, lr ; select bytes with negative difference
209
210 ; calculate partial sums
211 usad8 r6, r10, lr ; calculate sum of positive differences
212 usad8 r7, r8, lr ; calculate sum of negative differences
213 orr r8, r8, r10 ; differences of all 4 pixels
214
215 ; calculate total sum
216 add r4, r4, r6 ; add positive differences to sum
217 sub r4, r4, r7 ; subtract negative differences from sum
218
219 ; calculate sse
220 uxtb16 r7, r8 ; byte (two pixels) to halfwords
221 uxtb16 r10, r8, ror #8 ; another two pixels to halfwords
222 smlad r5, r7, r7, r5 ; dual signed multiply, add and accumulate (1)
223 subs r12, r12, #1 ; next row
224 smlad r5, r10, r10, r5 ; dual signed multiply, add and accumulate (2)
225
226 bne loop8x8
227
228 ; return stuff
229 ldr r8, [sp, #32] ; get address of sse
230 mul r1, r4, r4 ; sum * sum
231 str r5, [r8] ; store sse
232 sub r0, r5, r1, ASR #6 ; return (sse - ((sum * sum) >> 6))
233
234 pop {r4-r10, pc}
235
236 ENDP
237
238 ; r0 unsigned char *src_ptr
239 ; r1 int source_stride
240 ; r2 unsigned char *ref_ptr
241 ; r3 int recon_stride
242 ; stack unsigned int *sse
243 ;
244 ;note: Based on vpx_variance16x16_media. In this function, sum is never used.
245 ; So, we can remove this part of calculation.
246
247 |vpx_mse16x16_media| PROC
248
249 push {r4-r9, lr}
250
251 pld [r0, r1, lsl #0]
252 pld [r2, r3, lsl #0]
253
254 mov r12, #16 ; set loop counter to 16 (=block height)
255 mov r4, #0 ; initialize sse = 0
256
257 loopmse
258 ; 1st 4 pixels
259 ldr r5, [r0, #0x0] ; load 4 src pixels
260 ldr r6, [r2, #0x0] ; load 4 ref pixels
261
262 mov lr, #0 ; constant zero
263
264 usub8 r8, r5, r6 ; calculate difference
265 pld [r0, r1, lsl #1]
266 sel r7, r8, lr ; select bytes with positive difference
267 usub8 r9, r6, r5 ; calculate difference with reversed operands
268 pld [r2, r3, lsl #1]
269 sel r8, r9, lr ; select bytes with negative difference
270
271 ; calculate partial sums
272 usad8 r5, r7, lr ; calculate sum of positive differences
273 usad8 r6, r8, lr ; calculate sum of negative differences
274 orr r8, r8, r7 ; differences of all 4 pixels
275
276 ldr r5, [r0, #0x4] ; load 4 src pixels
277
278 ; calculate sse
279 uxtb16 r6, r8 ; byte (two pixels) to halfwords
280 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
281 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
282
283 ; 2nd 4 pixels
284 ldr r6, [r2, #0x4] ; load 4 ref pixels
285 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
286
287 usub8 r8, r5, r6 ; calculate difference
288 sel r7, r8, lr ; select bytes with positive difference
289 usub8 r9, r6, r5 ; calculate difference with reversed operands
290 sel r8, r9, lr ; select bytes with negative difference
291
292 ; calculate partial sums
293 usad8 r5, r7, lr ; calculate sum of positive differences
294 usad8 r6, r8, lr ; calculate sum of negative differences
295 orr r8, r8, r7 ; differences of all 4 pixels
296 ldr r5, [r0, #0x8] ; load 4 src pixels
297 ; calculate sse
298 uxtb16 r6, r8 ; byte (two pixels) to halfwords
299 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
300 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
301
302 ; 3rd 4 pixels
303 ldr r6, [r2, #0x8] ; load 4 ref pixels
304 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
305
306 usub8 r8, r5, r6 ; calculate difference
307 sel r7, r8, lr ; select bytes with positive difference
308 usub8 r9, r6, r5 ; calculate difference with reversed operands
309 sel r8, r9, lr ; select bytes with negative difference
310
311 ; calculate partial sums
312 usad8 r5, r7, lr ; calculate sum of positive differences
313 usad8 r6, r8, lr ; calculate sum of negative differences
314 orr r8, r8, r7 ; differences of all 4 pixels
315
316 ldr r5, [r0, #0xc] ; load 4 src pixels
317
318 ; calculate sse
319 uxtb16 r6, r8 ; byte (two pixels) to halfwords
320 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
321 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
322
323 ; 4th 4 pixels
324 ldr r6, [r2, #0xc] ; load 4 ref pixels
325 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
326
327 usub8 r8, r5, r6 ; calculate difference
328 add r0, r0, r1 ; set src_ptr to next row
329 sel r7, r8, lr ; select bytes with positive difference
330 usub8 r9, r6, r5 ; calculate difference with reversed operands
331 add r2, r2, r3 ; set dst_ptr to next row
332 sel r8, r9, lr ; select bytes with negative difference
333
334 ; calculate partial sums
335 usad8 r5, r7, lr ; calculate sum of positive differences
336 usad8 r6, r8, lr ; calculate sum of negative differences
337 orr r8, r8, r7 ; differences of all 4 pixels
338
339 subs r12, r12, #1 ; next row
340
341 ; calculate sse
342 uxtb16 r6, r8 ; byte (two pixels) to halfwords
343 uxtb16 r7, r8, ror #8 ; another two pixels to halfwords
344 smlad r4, r6, r6, r4 ; dual signed multiply, add and accumulate (1)
345 smlad r4, r7, r7, r4 ; dual signed multiply, add and accumulate (2)
346
347 bne loopmse
348
349 ; return stuff
350 ldr r1, [sp, #28] ; get address of sse
351 mov r0, r4 ; return sse
352 str r4, [r1] ; store sse
353
354 pop {r4-r9, pc}
355
356 ENDP
357
358 END
OLDNEW
« no previous file with comments | « source/libvpx/vpx/vpx_codec.mk ('k') | source/libvpx/vpx_dsp/arm/variance_neon.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698