| OLD | NEW | 
|---|
| 1 ; | 1 ; | 
| 2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved. | 2 ;  Copyright (c) 2011 The WebM project authors. All Rights Reserved. | 
| 3 ; | 3 ; | 
| 4 ;  Use of this source code is governed by a BSD-style license | 4 ;  Use of this source code is governed by a BSD-style license | 
| 5 ;  that can be found in the LICENSE file in the root of the source | 5 ;  that can be found in the LICENSE file in the root of the source | 
| 6 ;  tree. An additional intellectual property rights grant can be found | 6 ;  tree. An additional intellectual property rights grant can be found | 
| 7 ;  in the file PATENTS.  All contributing project authors may | 7 ;  in the file PATENTS.  All contributing project authors may | 
| 8 ;  be found in the AUTHORS file in the root of the source tree. | 8 ;  be found in the AUTHORS file in the root of the source tree. | 
| 9 ; | 9 ; | 
| 10 | 10 | 
| 11 | 11 | 
| 12     EXPORT  |vp8_variance_halfpixvar16x16_hv_armv6| | 12     EXPORT  |vp8_variance_halfpixvar16x16_hv_armv6| | 
| 13 | 13 | 
| 14     ARM | 14     ARM | 
| 15     REQUIRE8 | 15     REQUIRE8 | 
| 16     PRESERVE8 | 16     PRESERVE8 | 
| 17 | 17 | 
| 18     AREA ||.text||, CODE, READONLY, ALIGN=2 | 18     AREA ||.text||, CODE, READONLY, ALIGN=2 | 
| 19 | 19 | 
| 20 ; r0    unsigned char *src_ptr | 20 ; r0    unsigned char *src_ptr | 
| 21 ; r1    int source_stride | 21 ; r1    int source_stride | 
| 22 ; r2    unsigned char *ref_ptr | 22 ; r2    unsigned char *ref_ptr | 
| 23 ; r3    int  recon_stride | 23 ; r3    int  recon_stride | 
| 24 ; stack unsigned int *sse | 24 ; stack unsigned int *sse | 
| 25 |vp8_variance_halfpixvar16x16_hv_armv6| PROC | 25 |vp8_variance_halfpixvar16x16_hv_armv6| PROC | 
| 26 | 26 | 
| 27     stmfd   sp!, {r4-r12, lr} | 27     stmfd   sp!, {r4-r12, lr} | 
|  | 28 | 
|  | 29     pld     [r0, r1, lsl #0] | 
|  | 30     pld     [r2, r3, lsl #0] | 
|  | 31 | 
| 28     mov     r8, #0              ; initialize sum = 0 | 32     mov     r8, #0              ; initialize sum = 0 | 
| 29     ldr     r10, c80808080 | 33     ldr     r10, c80808080 | 
| 30     mov     r11, #0             ; initialize sse = 0 | 34     mov     r11, #0             ; initialize sse = 0 | 
| 31     mov     r12, #16            ; set loop counter to 16 (=block height) | 35     mov     r12, #16            ; set loop counter to 16 (=block height) | 
| 32     mov     lr, #0              ; constant zero | 36     mov     lr, #0              ; constant zero | 
| 33 loop | 37 loop | 
| 34     add     r9, r0, r1          ; pointer to pixels on the next row | 38     add     r9, r0, r1          ; pointer to pixels on the next row | 
| 35     ; 1st 4 pixels | 39     ; 1st 4 pixels | 
| 36     ldr     r4, [r0, #0]        ; load source pixels a, row N | 40     ldr     r4, [r0, #0]        ; load source pixels a, row N | 
| 37     ldr     r6, [r0, #1]        ; load source pixels b, row N | 41     ldr     r6, [r0, #1]        ; load source pixels b, row N | 
| 38     ldr     r5, [r9, #0]        ; load source pixels c, row N+1 | 42     ldr     r5, [r9, #0]        ; load source pixels c, row N+1 | 
| 39     ldr     r7, [r9, #1]        ; load source pixels d, row N+1 | 43     ldr     r7, [r9, #1]        ; load source pixels d, row N+1 | 
| 40 | 44 | 
| 41     ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N | 45     ; x = (a + b + 1) >> 1, interpolate pixels horizontally on row N | 
| 42     mvn     r6, r6 | 46     mvn     r6, r6 | 
| 43     uhsub8  r4, r4, r6 | 47     uhsub8  r4, r4, r6 | 
| 44     eor     r4, r4, r10 | 48     eor     r4, r4, r10 | 
| 45     ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 | 49     ; y = (c + d + 1) >> 1, interpolate pixels horizontally on row N+1 | 
| 46     mvn     r7, r7 | 50     mvn     r7, r7 | 
| 47     uhsub8  r5, r5, r7 | 51     uhsub8  r5, r5, r7 | 
| 48     eor     r5, r5, r10 | 52     eor     r5, r5, r10 | 
| 49     ; z = (x + y + 1) >> 1, interpolate half pixel values vertically | 53     ; z = (x + y + 1) >> 1, interpolate half pixel values vertically | 
| 50     mvn     r5, r5 | 54     mvn     r5, r5 | 
| 51     uhsub8  r4, r4, r5 | 55     uhsub8  r4, r4, r5 | 
| 52     ldr     r5, [r2, #0]        ; load 4 ref pixels | 56     ldr     r5, [r2, #0]        ; load 4 ref pixels | 
| 53     eor     r4, r4, r10 | 57     eor     r4, r4, r10 | 
| 54 | 58 | 
| 55     usub8   r6, r4, r5          ; calculate difference | 59     usub8   r6, r4, r5          ; calculate difference | 
|  | 60     pld     [r0, r1, lsl #1] | 
| 56     sel     r7, r6, lr          ; select bytes with positive difference | 61     sel     r7, r6, lr          ; select bytes with positive difference | 
| 57     usub8   r6, r5, r4          ; calculate difference with reversed operands | 62     usub8   r6, r5, r4          ; calculate difference with reversed operands | 
|  | 63     pld     [r2, r3, lsl #1] | 
| 58     sel     r6, r6, lr          ; select bytes with negative difference | 64     sel     r6, r6, lr          ; select bytes with negative difference | 
| 59 | 65 | 
| 60     ; calculate partial sums | 66     ; calculate partial sums | 
| 61     usad8   r4, r7, lr          ; calculate sum of positive differences | 67     usad8   r4, r7, lr          ; calculate sum of positive differences | 
| 62     usad8   r5, r6, lr          ; calculate sum of negative differences | 68     usad8   r5, r6, lr          ; calculate sum of negative differences | 
| 63     orr     r6, r6, r7          ; differences of all 4 pixels | 69     orr     r6, r6, r7          ; differences of all 4 pixels | 
| 64     ; calculate total sum | 70     ; calculate total sum | 
| 65     adds    r8, r8, r4          ; add positive differences to sum | 71     adds    r8, r8, r4          ; add positive differences to sum | 
| 66     subs    r8, r8, r5          ; substract negative differences from sum | 72     subs    r8, r8, r5          ; substract negative differences from sum | 
| 67 | 73 | 
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 207     sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) | 213     sub     r0, r11, r0, asr #8 ; return (sse - ((sum * sum) >> 8)) | 
| 208 | 214 | 
| 209     ldmfd   sp!, {r4-r12, pc} | 215     ldmfd   sp!, {r4-r12, pc} | 
| 210 | 216 | 
| 211     ENDP | 217     ENDP | 
| 212 | 218 | 
| 213 c80808080 | 219 c80808080 | 
| 214     DCD     0x80808080 | 220     DCD     0x80808080 | 
| 215 | 221 | 
| 216     END | 222     END | 
| OLD | NEW | 
|---|