| OLD | NEW |
| 1 ; | 1 ; |
| 2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. | 2 ; Copyright (c) 2011 The WebM project authors. All Rights Reserved. |
| 3 ; | 3 ; |
| 4 ; Use of this source code is governed by a BSD-style license | 4 ; Use of this source code is governed by a BSD-style license |
| 5 ; that can be found in the LICENSE file in the root of the source | 5 ; that can be found in the LICENSE file in the root of the source |
| 6 ; tree. An additional intellectual property rights grant can be found | 6 ; tree. An additional intellectual property rights grant can be found |
| 7 ; in the file PATENTS. All contributing project authors may | 7 ; in the file PATENTS. All contributing project authors may |
| 8 ; be found in the AUTHORS file in the root of the source tree. | 8 ; be found in the AUTHORS file in the root of the source tree. |
| 9 ; | 9 ; |
| 10 | 10 |
| 11 | 11 |
| 12 EXPORT |vp8_sad16x16_armv6| | 12 EXPORT |vp8_sad16x16_armv6| |
| 13 | 13 |
| 14 ARM | 14 ARM |
| 15 REQUIRE8 | 15 REQUIRE8 |
| 16 PRESERVE8 | 16 PRESERVE8 |
| 17 | 17 |
| 18 AREA ||.text||, CODE, READONLY, ALIGN=2 | 18 AREA ||.text||, CODE, READONLY, ALIGN=2 |
| 19 | 19 |
| 20 ; r0 const unsigned char *src_ptr | 20 ; r0 const unsigned char *src_ptr |
| 21 ; r1 int src_stride | 21 ; r1 int src_stride |
| 22 ; r2 const unsigned char *ref_ptr | 22 ; r2 const unsigned char *ref_ptr |
| 23 ; r3 int ref_stride | 23 ; r3 int ref_stride |
| 24 ; stack max_sad (not used) | 24 ; stack max_sad (not used) |
| 25 |vp8_sad16x16_armv6| PROC | 25 |vp8_sad16x16_armv6| PROC |
| 26 stmfd sp!, {r4-r12, lr} | 26 stmfd sp!, {r4-r12, lr} |
| 27 |
| 28 pld [r0, r1, lsl #0] |
| 29 pld [r2, r3, lsl #0] |
| 30 pld [r0, r1, lsl #1] |
| 31 pld [r2, r3, lsl #1] |
| 32 |
| 27 mov r4, #0 ; sad = 0; | 33 mov r4, #0 ; sad = 0; |
| 28 mov r5, #8 ; loop count | 34 mov r5, #8 ; loop count |
| 29 | 35 |
| 30 loop | 36 loop |
| 31 ; 1st row | 37 ; 1st row |
| 32 ldr r6, [r0, #0x0] ; load 4 src pixels (1A) | 38 ldr r6, [r0, #0x0] ; load 4 src pixels (1A) |
| 33 ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) | 39 ldr r8, [r2, #0x0] ; load 4 ref pixels (1A) |
| 34 ldr r7, [r0, #0x4] ; load 4 src pixels (1A) | 40 ldr r7, [r0, #0x4] ; load 4 src pixels (1A) |
| 35 ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) | 41 ldr r9, [r2, #0x4] ; load 4 ref pixels (1A) |
| 36 ldr r10, [r0, #0x8] ; load 4 src pixels (1B) | 42 ldr r10, [r0, #0x8] ; load 4 src pixels (1B) |
| 37 ldr r11, [r0, #0xC] ; load 4 src pixels (1B) | 43 ldr r11, [r0, #0xC] ; load 4 src pixels (1B) |
| 38 | 44 |
| 39 usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels | 45 usada8 r4, r8, r6, r4 ; calculate sad for 4 pixels |
| 40 usad8 r8, r7, r9 ; calculate sad for 4 pixels | 46 usad8 r8, r7, r9 ; calculate sad for 4 pixels |
| 41 | 47 |
| 42 ldr r12, [r2, #0x8] ; load 4 ref pixels (1B) | 48 ldr r12, [r2, #0x8] ; load 4 ref pixels (1B) |
| 43 ldr lr, [r2, #0xC] ; load 4 ref pixels (1B) | 49 ldr lr, [r2, #0xC] ; load 4 ref pixels (1B) |
| 44 | 50 |
| 45 add r0, r0, r1 ; set src pointer to next row | 51 add r0, r0, r1 ; set src pointer to next row |
| 46 add r2, r2, r3 ; set dst pointer to next row | 52 add r2, r2, r3 ; set dst pointer to next row |
| 47 | 53 |
| 54 pld [r0, r1, lsl #1] |
| 55 pld [r2, r3, lsl #1] |
| 56 |
| 48 usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels | 57 usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels |
| 49 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels | 58 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels |
| 50 | 59 |
| 51 ldr r6, [r0, #0x0] ; load 4 src pixels (2A) | 60 ldr r6, [r0, #0x0] ; load 4 src pixels (2A) |
| 52 ldr r7, [r0, #0x4] ; load 4 src pixels (2A) | 61 ldr r7, [r0, #0x4] ; load 4 src pixels (2A) |
| 53 add r4, r4, r8 ; add partial sad values | 62 add r4, r4, r8 ; add partial sad values |
| 54 | 63 |
| 55 ; 2nd row | 64 ; 2nd row |
| 56 ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) | 65 ldr r8, [r2, #0x0] ; load 4 ref pixels (2A) |
| 57 ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) | 66 ldr r9, [r2, #0x4] ; load 4 ref pixels (2A) |
| 58 ldr r10, [r0, #0x8] ; load 4 src pixels (2B) | 67 ldr r10, [r0, #0x8] ; load 4 src pixels (2B) |
| 59 ldr r11, [r0, #0xC] ; load 4 src pixels (2B) | 68 ldr r11, [r0, #0xC] ; load 4 src pixels (2B) |
| 60 | 69 |
| 61 usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels | 70 usada8 r4, r6, r8, r4 ; calculate sad for 4 pixels |
| 62 usad8 r8, r7, r9 ; calculate sad for 4 pixels | 71 usad8 r8, r7, r9 ; calculate sad for 4 pixels |
| 63 | 72 |
| 64 ldr r12, [r2, #0x8] ; load 4 ref pixels (2B) | 73 ldr r12, [r2, #0x8] ; load 4 ref pixels (2B) |
| 65 ldr lr, [r2, #0xC] ; load 4 ref pixels (2B) | 74 ldr lr, [r2, #0xC] ; load 4 ref pixels (2B) |
| 66 | 75 |
| 67 add r0, r0, r1 ; set src pointer to next row | 76 add r0, r0, r1 ; set src pointer to next row |
| 68 add r2, r2, r3 ; set dst pointer to next row | 77 add r2, r2, r3 ; set dst pointer to next row |
| 69 | 78 |
| 70 usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels | 79 usada8 r4, r10, r12, r4 ; calculate sad for 4 pixels |
| 71 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels | 80 usada8 r8, r11, lr, r8 ; calculate sad for 4 pixels |
| 72 | 81 |
| 82 pld [r0, r1, lsl #1] |
| 83 pld [r2, r3, lsl #1] |
| 84 |
| 73 subs r5, r5, #1 ; decrement loop counter | 85 subs r5, r5, #1 ; decrement loop counter |
| 74 add r4, r4, r8 ; add partial sad values | 86 add r4, r4, r8 ; add partial sad values |
| 75 | 87 |
| 76 bne loop | 88 bne loop |
| 77 | 89 |
| 78 mov r0, r4 ; return sad | 90 mov r0, r4 ; return sad |
| 79 ldmfd sp!, {r4-r12, pc} | 91 ldmfd sp!, {r4-r12, pc} |
| 80 | 92 |
| 81 ENDP | 93 ENDP |
| 82 | 94 |
| 83 END | 95 END |
| 84 | 96 |
| OLD | NEW |