| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| 11 | 11 | 
| 12     EXPORT  |vp8_mse16x16_armv6| | 12     EXPORT  |vp8_mse16x16_armv6| | 
| 13 | 13 | 
| 14     ARM | 14     ARM | 
| 15 | 15 | 
| 16     AREA ||.text||, CODE, READONLY, ALIGN=2 | 16     AREA ||.text||, CODE, READONLY, ALIGN=2 | 
| 17 | 17 | 
| 18 ; r0    unsigned char *src_ptr | 18 ; r0    unsigned char *src_ptr | 
| 19 ; r1    int source_stride | 19 ; r1    int source_stride | 
| 20 ; r2    unsigned char *ref_ptr | 20 ; r2    unsigned char *ref_ptr | 
| 21 ; r3    int  recon_stride | 21 ; r3    int  recon_stride | 
| 22 ; stack unsigned int *sse | 22 ; stack unsigned int *sse | 
| 23 ; | 23 ; | 
| 24 ;note: Based on vp8_variance16x16_armv6. In this function, sum is never used. | 24 ;note: Based on vp8_variance16x16_armv6. In this function, sum is never used. | 
| 25 ;      So, we can remove this part of calculation. | 25 ;      So, we can remove this part of calculation. | 
| 26 | 26 | 
| 27 |vp8_mse16x16_armv6| PROC | 27 |vp8_mse16x16_armv6| PROC | 
| 28 | 28 | 
| 29     push    {r4-r9, lr} | 29     push    {r4-r9, lr} | 
|  | 30 | 
|  | 31     pld     [r0, r1, lsl #0] | 
|  | 32     pld     [r2, r3, lsl #0] | 
|  | 33 | 
| 30     mov     r12, #16            ; set loop counter to 16 (=block height) | 34     mov     r12, #16            ; set loop counter to 16 (=block height) | 
| 31 |  | 
| 32     mov     r4, #0              ; initialize sse = 0 | 35     mov     r4, #0              ; initialize sse = 0 | 
| 33 | 36 | 
| 34 loop | 37 loop | 
| 35     ; 1st 4 pixels | 38     ; 1st 4 pixels | 
| 36     ldr     r5, [r0, #0x0]      ; load 4 src pixels | 39     ldr     r5, [r0, #0x0]      ; load 4 src pixels | 
| 37     ldr     r6, [r2, #0x0]      ; load 4 ref pixels | 40     ldr     r6, [r2, #0x0]      ; load 4 ref pixels | 
| 38 | 41 | 
| 39     mov     lr, #0              ; constant zero | 42     mov     lr, #0              ; constant zero | 
| 40 | 43 | 
| 41     usub8   r8, r5, r6          ; calculate difference | 44     usub8   r8, r5, r6          ; calculate difference | 
|  | 45     pld     [r0, r1, lsl #1] | 
| 42     sel     r7, r8, lr          ; select bytes with positive difference | 46     sel     r7, r8, lr          ; select bytes with positive difference | 
| 43     usub8   r9, r6, r5          ; calculate difference with reversed operands | 47     usub8   r9, r6, r5          ; calculate difference with reversed operands | 
|  | 48     pld     [r2, r3, lsl #1] | 
| 44     sel     r8, r9, lr          ; select bytes with negative difference | 49     sel     r8, r9, lr          ; select bytes with negative difference | 
| 45 | 50 | 
| 46     ; calculate partial sums | 51     ; calculate partial sums | 
| 47     usad8   r5, r7, lr          ; calculate sum of positive differences | 52     usad8   r5, r7, lr          ; calculate sum of positive differences | 
| 48     usad8   r6, r8, lr          ; calculate sum of negative differences | 53     usad8   r6, r8, lr          ; calculate sum of negative differences | 
| 49     orr     r8, r8, r7          ; differences of all 4 pixels | 54     orr     r8, r8, r7          ; differences of all 4 pixels | 
| 50 | 55 | 
| 51     ldr     r5, [r0, #0x4]      ; load 4 src pixels | 56     ldr     r5, [r0, #0x4]      ; load 4 src pixels | 
| 52 | 57 | 
| 53     ; calculate sse | 58     ; calculate sse | 
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 124     ; return stuff | 129     ; return stuff | 
| 125     ldr     r1, [sp, #28]       ; get address of sse | 130     ldr     r1, [sp, #28]       ; get address of sse | 
| 126     mov     r0, r4              ; return sse | 131     mov     r0, r4              ; return sse | 
| 127     str     r4, [r1]            ; store sse | 132     str     r4, [r1]            ; store sse | 
| 128 | 133 | 
| 129     pop     {r4-r9, pc} | 134     pop     {r4-r9, pc} | 
| 130 | 135 | 
| 131     ENDP | 136     ENDP | 
| 132 | 137 | 
| 133     END | 138     END | 
| OLD | NEW | 
|---|