| Index: source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm | 
| =================================================================== | 
| --- source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm	(revision 96967) | 
| +++ source/libvpx/vp8/common/arm/armv6/simpleloopfilter_v6.asm	(working copy) | 
| @@ -45,35 +45,28 @@ | 
| MEND | 
|  | 
|  | 
| + | 
| src         RN  r0 | 
| pstep       RN  r1 | 
|  | 
| ;r0     unsigned char *src_ptr, | 
| ;r1     int src_pixel_step, | 
| -;r2     const char *flimit, | 
| -;r3     const char *limit, | 
| -;stack  const char *thresh, | 
| -;stack  int  count | 
| +;r2     const char *blimit | 
|  | 
| -; All 16 elements in flimit are equal. So, in the code, only one load is needed | 
| -; for flimit. Same applies to limit. thresh is not used in simple looopfilter | 
| - | 
| ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | 
| |vp8_loop_filter_simple_horizontal_edge_armv6| PROC | 
| ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | 
| stmdb       sp!, {r4 - r11, lr} | 
|  | 
| -    ldr         r12, [r3]                   ; limit | 
| +    ldrb        r12, [r2]                   ; blimit | 
| ldr         r3, [src, -pstep, lsl #1]   ; p1 | 
| ldr         r4, [src, -pstep]           ; p0 | 
| ldr         r5, [src]                   ; q0 | 
| ldr         r6, [src, pstep]            ; q1 | 
| -    ldr         r7, [r2]                    ; flimit | 
| +    orr         r12, r12, r12, lsl #8       ; blimit | 
| ldr         r2, c0x80808080 | 
| -    ldr         r9, [sp, #40]               ; count for 8-in-parallel | 
| -    uadd8       r7, r7, r7                  ; flimit * 2 | 
| -    mov         r9, r9, lsl #1              ; double the count. we're doing 4 at a time | 
| -    uadd8       r12, r7, r12                ; flimit * 2 + limit | 
| +    orr         r12, r12, r12, lsl #16      ; blimit | 
| +    mov         r9, #4                      ; double the count. we're doing 4 at a time | 
| mov         lr, #0                      ; need 0 in a couple places | 
|  | 
| |simple_hnext8| | 
| @@ -148,30 +141,32 @@ | 
| ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- | 
| stmdb       sp!, {r4 - r11, lr} | 
|  | 
| -    ldr         r12, [r2]                   ; r12: flimit | 
| +    ldrb        r12, [r2]                   ; r12: blimit | 
| ldr         r2, c0x80808080 | 
| -    ldr         r7, [r3]                    ; limit | 
| +    orr         r12, r12, r12, lsl #8 | 
|  | 
| ; load soure data to r7, r8, r9, r10 | 
| ldrh        r3, [src, #-2] | 
| +    pld         [src, #23]                  ; preload for next block | 
| ldrh        r4, [src], pstep | 
| -    uadd8       r12, r12, r12               ; flimit * 2 | 
| +    orr         r12, r12, r12, lsl #16 | 
|  | 
| ldrh        r5, [src, #-2] | 
| +    pld         [src, #23] | 
| ldrh        r6, [src], pstep | 
| -    uadd8       r12, r12, r7                ; flimit * 2 + limit | 
|  | 
| pkhbt       r7, r3, r4, lsl #16 | 
|  | 
| ldrh        r3, [src, #-2] | 
| +    pld         [src, #23] | 
| ldrh        r4, [src], pstep | 
| -    ldr         r11, [sp, #40]              ; count (r11) for 8-in-parallel | 
|  | 
| pkhbt       r8, r5, r6, lsl #16 | 
|  | 
| ldrh        r5, [src, #-2] | 
| +    pld         [src, #23] | 
| ldrh        r6, [src], pstep | 
| -    mov         r11, r11, lsl #1            ; 4-in-parallel | 
| +    mov         r11, #4                     ; double the count. we're doing 4 at a time | 
|  | 
| |simple_vnext8| | 
| ; vp8_simple_filter_mask() function | 
| @@ -259,19 +254,23 @@ | 
|  | 
| ; load soure data to r7, r8, r9, r10 | 
| ldrneh      r3, [src, #-2] | 
| +    pld         [src, #23]                  ; preload for next block | 
| ldrneh      r4, [src], pstep | 
|  | 
| ldrneh      r5, [src, #-2] | 
| +    pld         [src, #23] | 
| ldrneh      r6, [src], pstep | 
|  | 
| pkhbt       r7, r3, r4, lsl #16 | 
|  | 
| ldrneh      r3, [src, #-2] | 
| +    pld         [src, #23] | 
| ldrneh      r4, [src], pstep | 
|  | 
| pkhbt       r8, r5, r6, lsl #16 | 
|  | 
| ldrneh      r5, [src, #-2] | 
| +    pld         [src, #23] | 
| ldrneh      r6, [src], pstep | 
|  | 
| bne         simple_vnext8 | 
|  |