| Index: source/libvpx/vp8/common/x86/loopfilter_mmx.asm | 
| =================================================================== | 
| --- source/libvpx/vp8/common/x86/loopfilter_mmx.asm	(revision 96967) | 
| +++ source/libvpx/vp8/common/x86/loopfilter_mmx.asm	(working copy) | 
| @@ -16,7 +16,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int src_pixel_step, | 
| -;    const char *flimit, | 
| +;    const char *blimit, | 
| ;    const char *limit, | 
| ;    const char *thresh, | 
| ;    int  count | 
| @@ -122,12 +122,10 @@ | 
| paddusb     mm5, mm5              ; abs(p0-q0)*2 | 
| paddusb     mm5, mm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        mov         rdx, arg(2) ;flimit           ; get flimit | 
| -        movq        mm2, [rdx]            ; flimit mm2 | 
| -        paddb       mm2, mm2              ; flimit*2 (less than 255) | 
| -        paddb       mm7, mm2              ; flimit * 2 + limit (less than 255) | 
| +        mov         rdx, arg(2) ;blimit           ; get blimit | 
| +        movq        mm7, [rdx]            ; blimit | 
|  | 
| -        psubusb     mm5,    mm7           ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     mm5,    mm7           ; abs (p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| por         mm1,    mm5 | 
| pxor        mm5,    mm5 | 
| pcmpeqb     mm1,    mm5           ; mask mm1 | 
| @@ -230,7 +228,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int  src_pixel_step, | 
| -;    const char *flimit, | 
| +;    const char *blimit, | 
| ;    const char *limit, | 
| ;    const char *thresh, | 
| ;    int count | 
| @@ -406,9 +404,9 @@ | 
| pand        mm5,        [GLOBAL(tfe)]               ; set lsb of each byte to zero | 
| psrlw       mm5,        1                           ; abs(p1-q1)/2 | 
|  | 
| -        mov         rdx,        arg(2) ;flimit                      ; | 
| +        mov         rdx,        arg(2) ;blimit                      ; | 
|  | 
| -        movq        mm2,        [rdx]                       ;flimit  mm2 | 
| +        movq        mm4,        [rdx]                       ;blimit | 
| movq        mm1,        mm3                         ; mm1=mm3=p0 | 
|  | 
| movq        mm7,        mm6                         ; mm7=mm6=q0 | 
| @@ -419,10 +417,7 @@ | 
| paddusb     mm1,        mm1                         ; abs(q0-p0)*2 | 
| paddusb     mm1,        mm5                         ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        paddb       mm2,        mm2                         ; flimit*2 (less than 255) | 
| -        paddb       mm4,        mm2                         ; flimit * 2 + limit (less than 255) | 
| - | 
| -        psubusb     mm1,        mm4                         ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     mm1,        mm4                         ; abs (p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| por         mm1,        mm0;                        ; mask | 
|  | 
| pxor        mm0,        mm0 | 
| @@ -603,7 +598,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int  src_pixel_step, | 
| -;    const char *flimit, | 
| +;    const char *blimit, | 
| ;    const char *limit, | 
| ;    const char *thresh, | 
| ;    int count | 
| @@ -719,17 +714,15 @@ | 
| paddusb     mm5, mm5              ; abs(p0-q0)*2 | 
| paddusb     mm5, mm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        mov         rdx, arg(2) ;flimit           ; get flimit | 
| -        movq        mm2, [rdx]            ; flimit mm2 | 
| -        paddb       mm2, mm2              ; flimit*2 (less than 255) | 
| -        paddb       mm7, mm2              ; flimit * 2 + limit (less than 255) | 
| +        mov         rdx, arg(2) ;blimit           ; get blimit | 
| +        movq        mm7, [rdx]            ; blimit | 
|  | 
| -        psubusb     mm5,    mm7           ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     mm5,    mm7           ; abs (p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| por         mm1,    mm5 | 
| pxor        mm5,    mm5 | 
| pcmpeqb     mm1,    mm5           ; mask mm1 | 
|  | 
| -        ; mm1 = mask, mm0=q0,  mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0) | 
| +        ; mm1 = mask, mm0=q0,  mm7 = blimit, t0 = abs(q0-q1) t1 = abs(p1-p0) | 
| ; mm6 = p0, | 
|  | 
| ; calculate high edge variance | 
| @@ -922,7 +915,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int  src_pixel_step, | 
| -;    const char *flimit, | 
| +;    const char *blimit, | 
| ;    const char *limit, | 
| ;    const char *thresh, | 
| ;    int count | 
| @@ -1108,9 +1101,9 @@ | 
| pand        mm5,        [GLOBAL(tfe)]               ; set lsb of each byte to zero | 
| psrlw       mm5,        1                           ; abs(p1-q1)/2 | 
|  | 
| -        mov         rdx,        arg(2) ;flimit                      ; | 
| +        mov         rdx,        arg(2) ;blimit                      ; | 
|  | 
| -        movq        mm2,        [rdx]                       ;flimit  mm2 | 
| +        movq        mm4,        [rdx]                       ;blimit | 
| movq        mm1,        mm3                         ; mm1=mm3=p0 | 
|  | 
| movq        mm7,        mm6                         ; mm7=mm6=q0 | 
| @@ -1121,10 +1114,7 @@ | 
| paddusb     mm1,        mm1                         ; abs(q0-p0)*2 | 
| paddusb     mm1,        mm5                         ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        paddb       mm2,        mm2                         ; flimit*2 (less than 255) | 
| -        paddb       mm4,        mm2                         ; flimit * 2 + limit (less than 255) | 
| - | 
| -        psubusb     mm1,        mm4                         ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     mm1,        mm4                         ; abs (p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| por         mm1,        mm0;                        ; mask | 
|  | 
| pxor        mm0,        mm0 | 
| @@ -1392,16 +1382,13 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int  src_pixel_step, | 
| -;    const char *flimit, | 
| -;    const char *limit, | 
| -;    const char *thresh, | 
| -;    int count | 
| +;    const char *blimit | 
| ;) | 
| global sym(vp8_loop_filter_simple_horizontal_edge_mmx) | 
| sym(vp8_loop_filter_simple_horizontal_edge_mmx): | 
| push        rbp | 
| mov         rbp, rsp | 
| -    SHADOW_ARGS_TO_STACK 6 | 
| +    SHADOW_ARGS_TO_STACK 3 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -1410,14 +1397,10 @@ | 
| mov         rsi, arg(0) ;src_ptr | 
| movsxd      rax, dword ptr arg(1) ;src_pixel_step     ; destination pitch? | 
|  | 
| -        movsxd      rcx, dword ptr arg(5) ;count | 
| +        mov         rcx, 2                ; count | 
| nexts8_h: | 
| -        mov         rdx, arg(3) ;limit | 
| -        movq        mm7, [rdx] | 
| -        mov         rdx, arg(2) ;flimit           ; get flimit | 
| +        mov         rdx, arg(2) ;blimit           ; get blimit | 
| movq        mm3, [rdx]            ; | 
| -        paddb       mm3, mm3              ; flimit*2 (less than 255) | 
| -        paddb       mm3, mm7              ; flimit * 2 + limit (less than 255) | 
|  | 
| mov         rdi, rsi              ; rdi points to row +1 for indirect addressing | 
| add         rdi, rax | 
| @@ -1445,7 +1428,7 @@ | 
| paddusb     mm5, mm5              ; abs(p0-q0)*2 | 
| paddusb     mm5, mm1              ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        psubusb     mm5, mm3              ; abs(p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     mm5, mm3              ; abs(p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| pxor        mm3, mm3 | 
| pcmpeqb     mm5, mm3 | 
|  | 
| @@ -1515,16 +1498,13 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int  src_pixel_step, | 
| -;    const char *flimit, | 
| -;    const char *limit, | 
| -;    const char *thresh, | 
| -;    int count | 
| +;    const char *blimit | 
| ;) | 
| global sym(vp8_loop_filter_simple_vertical_edge_mmx) | 
| sym(vp8_loop_filter_simple_vertical_edge_mmx): | 
| push        rbp | 
| mov         rbp, rsp | 
| -    SHADOW_ARGS_TO_STACK 6 | 
| +    SHADOW_ARGS_TO_STACK 3 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -1539,7 +1519,7 @@ | 
| movsxd      rax, dword ptr arg(1) ;src_pixel_step     ; destination pitch? | 
|  | 
| lea         rsi, [rsi + rax*4- 2];  ; | 
| -        movsxd      rcx, dword ptr arg(5) ;count | 
| +        mov         rcx, 2                                      ; count | 
| nexts8_v: | 
|  | 
| lea         rdi,        [rsi + rax]; | 
| @@ -1602,14 +1582,10 @@ | 
| paddusb     mm5,        mm5                             ; abs(p0-q0)*2 | 
| paddusb     mm5,        mm6                             ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        mov         rdx,        arg(2) ;flimit                          ; get flimit | 
| +        mov         rdx,        arg(2) ;blimit                          ; get blimit | 
| movq        mm7,        [rdx] | 
| -        mov         rdx,        arg(3)                          ; get limit | 
| -        movq        mm6,        [rdx] | 
| -        paddb       mm7,        mm7                             ; flimit*2 (less than 255) | 
| -        paddb       mm7,        mm6                             ; flimit * 2 + limit (less than 255) | 
|  | 
| -        psubusb     mm5,        mm7                             ; abs(p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     mm5,        mm7                             ; abs(p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| pxor        mm7,        mm7 | 
| pcmpeqb     mm5,        mm7                             ; mm5 = mask | 
|  | 
|  |