| Index: source/libvpx/third_party/libyuv/source/compare_win.cc
 | 
| diff --git a/source/libvpx/third_party/libyuv/source/compare_win.cc b/source/libvpx/third_party/libyuv/source/compare_win.cc
 | 
| index 99831651f5f58d723dc044dd0f447bc13dcfe7a1..e99009a21dff24c63f341f9ef3344068feffaeb0 100644
 | 
| --- a/source/libvpx/third_party/libyuv/source/compare_win.cc
 | 
| +++ b/source/libvpx/third_party/libyuv/source/compare_win.cc
 | 
| @@ -27,13 +27,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
 | 
|      pxor       xmm0, xmm0
 | 
|      pxor       xmm5, xmm5
 | 
|  
 | 
| -    align      4
 | 
|    wloop:
 | 
| -    movdqa     xmm1, [eax]
 | 
| +    movdqu     xmm1, [eax]
 | 
|      lea        eax,  [eax + 16]
 | 
| -    movdqa     xmm2, [edx]
 | 
| +    movdqu     xmm2, [edx]
 | 
|      lea        edx,  [edx + 16]
 | 
| -    sub        ecx, 16
 | 
|      movdqa     xmm3, xmm1  // abs trick
 | 
|      psubusb    xmm1, xmm2
 | 
|      psubusb    xmm2, xmm3
 | 
| @@ -45,6 +43,7 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
 | 
|      pmaddwd    xmm2, xmm2
 | 
|      paddd      xmm0, xmm1
 | 
|      paddd      xmm0, xmm2
 | 
| +    sub        ecx, 16
 | 
|      jg         wloop
 | 
|  
 | 
|      pshufd     xmm1, xmm0, 0xee
 | 
| @@ -70,12 +69,10 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
 | 
|      vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
 | 
|      sub        edx, eax
 | 
|  
 | 
| -    align      4
 | 
|    wloop:
 | 
|      vmovdqu    ymm1, [eax]
 | 
|      vmovdqu    ymm2, [eax + edx]
 | 
|      lea        eax,  [eax + 32]
 | 
| -    sub        ecx, 32
 | 
|      vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
 | 
|      vpsubusb   ymm2, ymm2, ymm1
 | 
|      vpor       ymm1, ymm2, ymm3
 | 
| @@ -85,6 +82,7 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
 | 
|      vpmaddwd   ymm1, ymm1, ymm1
 | 
|      vpaddd     ymm0, ymm0, ymm1
 | 
|      vpaddd     ymm0, ymm0, ymm2
 | 
| +    sub        ecx, 32
 | 
|      jg         wloop
 | 
|  
 | 
|      vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
 | 
| @@ -145,7 +143,6 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
 | 
|      pxor       xmm7, xmm7        // constant 0 for unpck
 | 
|      movdqa     xmm6, kHash16x33
 | 
|  
 | 
| -    align      4
 | 
|    wloop:
 | 
|      movdqu     xmm1, [eax]       // src[0-15]
 | 
|      lea        eax, [eax + 16]
 | 
| @@ -170,7 +167,6 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
 | 
|      pmulld(0xcd)                 // pmulld     xmm1, xmm5
 | 
|      paddd      xmm3, xmm4        // add 16 results
 | 
|      paddd      xmm1, xmm2
 | 
| -    sub        ecx, 16
 | 
|      paddd      xmm1, xmm3
 | 
|  
 | 
|      pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
 | 
| @@ -178,6 +174,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
 | 
|      pshufd     xmm2, xmm1, 0x01
 | 
|      paddd      xmm1, xmm2
 | 
|      paddd      xmm0, xmm1
 | 
| +    sub        ecx, 16
 | 
|      jg         wloop
 | 
|  
 | 
|      movd       eax, xmm0         // return hash
 | 
| @@ -195,7 +192,6 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
 | 
|      movd       xmm0, [esp + 12]  // seed
 | 
|      movdqa     xmm6, kHash16x33
 | 
|  
 | 
| -    align      4
 | 
|    wloop:
 | 
|      vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3]
 | 
|      pmulld     xmm0, xmm6  // hash *= 33 ^ 16
 | 
| @@ -209,13 +205,13 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
 | 
|      pmulld     xmm1, kHashMul3
 | 
|      paddd      xmm3, xmm4        // add 16 results
 | 
|      paddd      xmm1, xmm2
 | 
| -    sub        ecx, 16
 | 
|      paddd      xmm1, xmm3
 | 
|      pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
 | 
|      paddd      xmm1, xmm2
 | 
|      pshufd     xmm2, xmm1, 0x01
 | 
|      paddd      xmm1, xmm2
 | 
|      paddd      xmm0, xmm1
 | 
| +    sub        ecx, 16
 | 
|      jg         wloop
 | 
|  
 | 
|      movd       eax, xmm0         // return hash
 | 
| 
 |