| Index: source/libvpx/vp8/common/x86/loopfilter_sse2.asm | 
| =================================================================== | 
| --- source/libvpx/vp8/common/x86/loopfilter_sse2.asm	(revision 96967) | 
| +++ source/libvpx/vp8/common/x86/loopfilter_sse2.asm	(working copy) | 
| @@ -110,7 +110,7 @@ | 
| psubusb     xmm6,                   xmm5              ; p1-=p0 | 
|  | 
| por         xmm6,                   xmm4              ; abs(p1 - p0) | 
| -        mov         rdx,                    arg(2)            ; get flimit | 
| +        mov         rdx,                    arg(2)            ; get blimit | 
|  | 
| movdqa        t1,                   xmm6              ; save to t1 | 
|  | 
| @@ -123,7 +123,7 @@ | 
| psubusb     xmm1,                   xmm7 | 
| por         xmm2,                   xmm3              ; abs(p1-q1) | 
|  | 
| -        movdqa      xmm4,                   XMMWORD PTR [rdx] ; flimit | 
| +        movdqa      xmm7,                   XMMWORD PTR [rdx] ; blimit | 
|  | 
| movdqa      xmm3,                   xmm0              ; q0 | 
| pand        xmm2,                   [GLOBAL(tfe)]     ; set lsb of each byte to zero | 
| @@ -134,13 +134,11 @@ | 
| psrlw       xmm2,                   1                 ; abs(p1-q1)/2 | 
|  | 
| psubusb     xmm5,                   xmm3              ; p0-=q0 | 
| -        paddb       xmm4,                   xmm4              ; flimit*2 (less than 255) | 
|  | 
| psubusb     xmm3,                   xmm6              ; q0-=p0 | 
| por         xmm5,                   xmm3              ; abs(p0 - q0) | 
|  | 
| paddusb     xmm5,                   xmm5              ; abs(p0-q0)*2 | 
| -        paddb       xmm7,                   xmm4              ; flimit * 2 + limit (less than 255) | 
|  | 
| movdqa      xmm4,                   t0                ; hev get abs (q1 - q0) | 
|  | 
| @@ -150,7 +148,7 @@ | 
|  | 
| movdqa      xmm2,                   XMMWORD PTR [rdx] ; hev | 
|  | 
| -        psubusb     xmm5,                   xmm7              ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     xmm5,                   xmm7              ; abs (p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| psubusb     xmm4,                   xmm2              ; hev | 
|  | 
| psubusb     xmm3,                   xmm2              ; hev | 
| @@ -278,7 +276,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    int            count | 
| @@ -288,7 +286,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -328,7 +326,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    int            count | 
| @@ -338,7 +336,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -574,7 +572,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    int            count | 
| @@ -584,7 +582,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -624,7 +622,7 @@ | 
| ;( | 
| ;    unsigned char *u, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    unsigned char *v | 
| @@ -634,7 +632,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -904,7 +902,7 @@ | 
| movdqa      xmm4,               XMMWORD PTR [rdx]; limit | 
|  | 
| pmaxub      xmm0,               xmm7 | 
| -        mov         rdx,                arg(2)          ; flimit | 
| +        mov         rdx,                arg(2)          ; blimit | 
|  | 
| psubusb     xmm0,               xmm4 | 
| movdqa      xmm5,               xmm2            ; q1 | 
| @@ -921,12 +919,11 @@ | 
| psrlw       xmm5,               1               ; abs(p1-q1)/2 | 
| psubusb     xmm6,               xmm3            ; q0-p0 | 
|  | 
| -        movdqa      xmm2,               XMMWORD PTR [rdx]; flimit | 
| +        movdqa      xmm4,               XMMWORD PTR [rdx]; blimit | 
|  | 
| mov         rdx,                arg(4)          ; get thresh | 
|  | 
| por         xmm1,               xmm6            ; abs(q0-p0) | 
| -        paddb       xmm2,               xmm2            ; flimit*2 (less than 255) | 
|  | 
| movdqa      xmm6,               t0              ; get abs (q1 - q0) | 
|  | 
| @@ -939,10 +936,9 @@ | 
| paddusb     xmm1,               xmm5            ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
| psubusb     xmm6,               xmm7            ; abs(q1 - q0) > thresh | 
|  | 
| -        paddb       xmm4,               xmm2            ; flimit * 2 + limit (less than 255) | 
| psubusb     xmm3,               xmm7            ; abs(p1 - p0)> thresh | 
|  | 
| -        psubusb     xmm1,               xmm4            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     xmm1,               xmm4            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| por         xmm6,               xmm3            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh | 
|  | 
| por         xmm1,               xmm0            ; mask | 
| @@ -1014,7 +1010,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    int            count | 
| @@ -1024,7 +1020,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -1081,7 +1077,7 @@ | 
| ;( | 
| ;    unsigned char *u, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    unsigned char *v | 
| @@ -1091,7 +1087,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -1239,7 +1235,7 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    int            count | 
| @@ -1249,7 +1245,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -1308,7 +1304,7 @@ | 
| ;( | 
| ;    unsigned char *u, | 
| ;    int            src_pixel_step, | 
| -;    const char    *flimit, | 
| +;    const char    *blimit, | 
| ;    const char    *limit, | 
| ;    const char    *thresh, | 
| ;    unsigned char *v | 
| @@ -1318,7 +1314,7 @@ | 
| push        rbp | 
| mov         rbp, rsp | 
| SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -1376,17 +1372,14 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int  src_pixel_step, | 
| -;    const char *flimit, | 
| -;    const char *limit, | 
| -;    const char *thresh, | 
| -;    int count | 
| +;    const char *blimit, | 
| ;) | 
| global sym(vp8_loop_filter_simple_horizontal_edge_sse2) | 
| sym(vp8_loop_filter_simple_horizontal_edge_sse2): | 
| push        rbp | 
| mov         rbp, rsp | 
| -    SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SHADOW_ARGS_TO_STACK 3 | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx | 
| push        rsi | 
| push        rdi | 
| @@ -1394,14 +1387,9 @@ | 
|  | 
| mov         rsi, arg(0)             ;src_ptr | 
| movsxd      rax, dword ptr arg(1)   ;src_pixel_step     ; destination pitch? | 
| -        mov         rdx, arg(2) ;flimit     ; get flimit | 
| +        mov         rdx, arg(2)             ;blimit | 
| movdqa      xmm3, XMMWORD PTR [rdx] | 
| -        mov         rdx, arg(3) ;limit | 
| -        movdqa      xmm7, XMMWORD PTR [rdx] | 
|  | 
| -        paddb       xmm3, xmm3              ; flimit*2 (less than 255) | 
| -        paddb       xmm3, xmm7              ; flimit * 2 + limit (less than 255) | 
| - | 
| mov         rdi, rsi                ; rdi points to row +1 for indirect addressing | 
| add         rdi, rax | 
| neg         rax | 
| @@ -1428,7 +1416,7 @@ | 
| paddusb     xmm5, xmm5              ; abs(p0-q0)*2 | 
| paddusb     xmm5, xmm1              ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        psubusb     xmm5, xmm3              ; abs(p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     xmm5, xmm3              ; abs(p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| pxor        xmm3, xmm3 | 
| pcmpeqb     xmm5, xmm3 | 
|  | 
| @@ -1493,17 +1481,14 @@ | 
| ;( | 
| ;    unsigned char *src_ptr, | 
| ;    int  src_pixel_step, | 
| -;    const char *flimit, | 
| -;    const char *limit, | 
| -;    const char *thresh, | 
| -;    int count | 
| +;    const char *blimit, | 
| ;) | 
| global sym(vp8_loop_filter_simple_vertical_edge_sse2) | 
| sym(vp8_loop_filter_simple_vertical_edge_sse2): | 
| push        rbp         ; save old base pointer value. | 
| mov         rbp, rsp    ; set new base pointer value. | 
| -    SHADOW_ARGS_TO_STACK 6 | 
| -    SAVE_XMM | 
| +    SHADOW_ARGS_TO_STACK 3 | 
| +    SAVE_XMM 7 | 
| GET_GOT     rbx         ; save callee-saved reg | 
| push        rsi | 
| push        rdi | 
| @@ -1607,14 +1592,10 @@ | 
| paddusb     xmm5,       xmm5                            ; abs(p0-q0)*2 | 
| paddusb     xmm5,       xmm6                            ; abs (p0 - q0) *2 + abs(p1-q1)/2 | 
|  | 
| -        mov         rdx,        arg(2)                          ;flimit | 
| +        mov         rdx,        arg(2)                          ;blimit | 
| movdqa      xmm7, XMMWORD PTR [rdx] | 
| -        mov         rdx,        arg(3)                          ; get limit | 
| -        movdqa      xmm6, XMMWORD PTR [rdx] | 
| -        paddb       xmm7,        xmm7                           ; flimit*2 (less than 255) | 
| -        paddb       xmm7,        xmm6                           ; flimit * 2 + limit (less than 255) | 
|  | 
| -        psubusb     xmm5,        xmm7                           ; abs(p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit | 
| +        psubusb     xmm5,        xmm7                           ; abs(p0 - q0) *2 + abs(p1-q1)/2  > blimit | 
| pxor        xmm7,        xmm7 | 
| pcmpeqb     xmm5,        xmm7                           ; mm5 = mask | 
|  | 
|  |