OLD | NEW |
| (Empty) |
1 ; | |
2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. | |
3 ; | |
4 ; Use of this source code is governed by a BSD-style license | |
5 ; that can be found in the LICENSE file in the root of the source | |
6 ; tree. An additional intellectual property rights grant can be found | |
7 ; in the file PATENTS. All contributing project authors may | |
8 ; be found in the AUTHORS file in the root of the source tree. | |
9 ; | |
10 | |
11 | |
12 EXPORT |vp8_sad16x16_armv6| | |
13 | |
14 ARM | |
15 REQUIRE8 | |
16 PRESERVE8 | |
17 | |
18 AREA ||.text||, CODE, READONLY, ALIGN=2 | |
19 | |
20 ; r0 const unsigned char *src_ptr | |
21 ; r1 int src_stride | |
22 ; r2 const unsigned char *ref_ptr | |
23 ; r3 int ref_stride | |
24 ; stack max_sad (not used) | |
25 |vp8_sad16x16_armv6| PROC | |
26 stmfd sp!, {r4-r12, lr} | |
27 | |
28 pld [r0, r1, lsl #0] | |
29 pld [r2, r3, lsl #0] | |
30 pld [r0, r1, lsl #1] | |
31 pld [r2, r3, lsl #1] | |
32 | |
33 mov r4, #0 ; sad = 0; | |
34 mov r5, #8 ; loop count | |
35 | |
36 loop | |
37 ; 1st row | |
38 ldr r6, [r0, #0x0] ; load 4 src pixels (1A) | |
39 ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) | |
40 ldr r7, [r0, #0x4] ; load 4 src pixels (1A) | |
41 ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) | |
42 ldr r10, [r0, #0x8] ; load 4 src pixels (1B) | |
43 ldr r11, [r0, #0xC] ; load 4 src pixels (1B) | |
44 | |
45 usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels | |
46 usad8 r8, r7, r9 ; calculate sad for 4 pixels | |
47 | |
48 ldr r12, [r2, #0x8] ; load 4 ref pixels (1B) | |
49 ldr lr, [r2, #0xC] ; load 4 ref pixels (1B) | |
50 | |
51 add r0, r0, r1 ; set src pointer to next row | |
52 add r2, r2, r3 ; set dst pointer to next row | |
53 | |
54 pld [r0, r1, lsl #1] | |
55 pld [r2, r3, lsl #1] | |
56 | |
57 usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels | |
58 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels | |
59 | |
60 ldr r6, [r0, #0x0] ; load 4 src pixels (2A) | |
61 ldr r7, [r0, #0x4] ; load 4 src pixels (2A) | |
62 add r4, r4, r8 ; add partial sad values | |
63 | |
64 ; 2nd row | |
65 ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) | |
66 ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) | |
67 ldr r10, [r0, #0x8] ; load 4 src pixels (2B) | |
68 ldr r11, [r0, #0xC] ; load 4 src pixels (2B) | |
69 | |
70 usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels | |
71 usad8 r8, r7, r9 ; calculate sad for 4 pixels | |
72 | |
73 ldr r12, [r2, #0x8] ; load 4 ref pixels (2B) | |
74 ldr lr, [r2, #0xC] ; load 4 ref pixels (2B) | |
75 | |
76 add r0, r0, r1 ; set src pointer to next row | |
77 add r2, r2, r3 ; set dst pointer to next row | |
78 | |
79 usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels | |
80 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels | |
81 | |
82 pld [r0, r1, lsl #1] | |
83 pld [r2, r3, lsl #1] | |
84 | |
85 subs r5, r5, #1 ; decrement loop counter | |
86 add r4, r4, r8 ; add partial sad values | |
87 | |
88 bne loop | |
89 | |
90 mov r0, r4 ; return sad | |
91 ldmfd sp!, {r4-r12, pc} | |
92 | |
93 ENDP | |
94 | |
95 END | |
96 | |
OLD | NEW |