| Index: source/libvpx/third_party/libyuv/source/scale_posix.cc
 | 
| diff --git a/source/libvpx/third_party/libyuv/source/scale_posix.cc b/source/libvpx/third_party/libyuv/source/scale_posix.cc
 | 
| index 352e66782214fcc75d12da075d6d02cb0bd4d73f..bb6e57efe3228c315bd74d7cd140ec0ee56d8e33 100644
 | 
| --- a/source/libvpx/third_party/libyuv/source/scale_posix.cc
 | 
| +++ b/source/libvpx/third_party/libyuv/source/scale_posix.cc
 | 
| @@ -101,110 +101,6 @@ void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| -    "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
| -    "psrlw     $0x8,%%xmm0                     \n"
 | 
| -    "psrlw     $0x8,%%xmm1                     \n"
 | 
| -    "packuswb  %%xmm1,%%xmm0                   \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
 | 
| -    "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
| -    "sub       $0x10,%2                        \n"
 | 
| -    "jg        1b                              \n"
 | 
| -  : "+r"(src_ptr),    // %0
 | 
| -    "+r"(dst_ptr),    // %1
 | 
| -    "+r"(dst_width)   // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1"
 | 
| -#endif
 | 
| -  );
 | 
| -}
 | 
| -
 | 
| -void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
| -                              uint8* dst_ptr, int dst_width) {
 | 
| -  asm volatile (
 | 
| -    "pcmpeqb   %%xmm5,%%xmm5                   \n"
 | 
| -    "psrlw     $0x8,%%xmm5                     \n"
 | 
| -
 | 
| -    LABELALIGN
 | 
| -  "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10, 0) ",%%xmm1  \n"
 | 
| -    "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
| -    "movdqa    %%xmm0,%%xmm2                   \n"
 | 
| -    "psrlw     $0x8,%%xmm0                     \n"
 | 
| -    "movdqa    %%xmm1,%%xmm3                   \n"
 | 
| -    "psrlw     $0x8,%%xmm1                     \n"
 | 
| -    "pand      %%xmm5,%%xmm2                   \n"
 | 
| -    "pand      %%xmm5,%%xmm3                   \n"
 | 
| -    "pavgw     %%xmm2,%%xmm0                   \n"
 | 
| -    "pavgw     %%xmm3,%%xmm1                   \n"
 | 
| -    "packuswb  %%xmm1,%%xmm0                   \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
 | 
| -    "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
| -    "sub       $0x10,%2                        \n"
 | 
| -    "jg        1b                              \n"
 | 
| -  : "+r"(src_ptr),    // %0
 | 
| -    "+r"(dst_ptr),    // %1
 | 
| -    "+r"(dst_width)   // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm5"
 | 
| -#endif
 | 
| -  );
 | 
| -}
 | 
| -
 | 
| -void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
| -                           uint8* dst_ptr, int dst_width) {
 | 
| -  asm volatile (
 | 
| -    "pcmpeqb   %%xmm5,%%xmm5                   \n"
 | 
| -    "psrlw     $0x8,%%xmm5                     \n"
 | 
| -
 | 
| -    LABELALIGN
 | 
| -  "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| -    MEMOPREG(movdqa,0x00,0,3,1,xmm2)           //  movdqa  (%0,%3,1),%%xmm2
 | 
| -    BUNDLEALIGN
 | 
| -    MEMOPREG(movdqa,0x10,0,3,1,xmm3)           //  movdqa  0x10(%0,%3,1),%%xmm3
 | 
| -    "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
| -    "pavgb     %%xmm2,%%xmm0                   \n"
 | 
| -    "pavgb     %%xmm3,%%xmm1                   \n"
 | 
| -    "movdqa    %%xmm0,%%xmm2                   \n"
 | 
| -    "psrlw     $0x8,%%xmm0                     \n"
 | 
| -    "movdqa    %%xmm1,%%xmm3                   \n"
 | 
| -    "psrlw     $0x8,%%xmm1                     \n"
 | 
| -    "pand      %%xmm5,%%xmm2                   \n"
 | 
| -    "pand      %%xmm5,%%xmm3                   \n"
 | 
| -    "pavgw     %%xmm2,%%xmm0                   \n"
 | 
| -    "pavgw     %%xmm3,%%xmm1                   \n"
 | 
| -    "packuswb  %%xmm1,%%xmm0                   \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
 | 
| -    "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
| -    "sub       $0x10,%2                        \n"
 | 
| -    "jg        1b                              \n"
 | 
| -  : "+r"(src_ptr),    // %0
 | 
| -    "+r"(dst_ptr),    // %1
 | 
| -    "+r"(dst_width)   // %2
 | 
| -  : "r"((intptr_t)(src_stride))   // %3
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
 | 
| -#endif
 | 
| -  );
 | 
| -}
 | 
| -
 | 
| -void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
| -                                  uint8* dst_ptr, int dst_width) {
 | 
| -  asm volatile (
 | 
| -    LABELALIGN
 | 
| -  "1:                                          \n"
 | 
|      "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
|      "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
| @@ -218,17 +114,12 @@ void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|    : "+r"(src_ptr),    // %0
 | 
|      "+r"(dst_ptr),    // %1
 | 
|      "+r"(dst_width)   // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1"
 | 
| -#endif
 | 
| +  :: "memory", "cc", "xmm0", "xmm1"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| -void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
 | 
| -                                        ptrdiff_t src_stride,
 | 
| -                                        uint8* dst_ptr, int dst_width) {
 | 
| +void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
| +                              uint8* dst_ptr, int dst_width) {
 | 
|    asm volatile (
 | 
|      "pcmpeqb   %%xmm5,%%xmm5                   \n"
 | 
|      "psrlw     $0x8,%%xmm5                     \n"
 | 
| @@ -236,7 +127,7 @@ void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
|      "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10, 0) ",%%xmm1  \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "movdqa    %%xmm0,%%xmm2                   \n"
 | 
|      "psrlw     $0x8,%%xmm0                     \n"
 | 
| @@ -254,17 +145,12 @@ void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
 | 
|    : "+r"(src_ptr),    // %0
 | 
|      "+r"(dst_ptr),    // %1
 | 
|      "+r"(dst_width)   // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm5"
 | 
| -#endif
 | 
| +  :: "memory", "cc", "xmm0", "xmm1", "xmm5"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| -void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
 | 
| -                                     ptrdiff_t src_stride,
 | 
| -                                     uint8* dst_ptr, int dst_width) {
 | 
| +void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
| +                           uint8* dst_ptr, int dst_width) {
 | 
|    asm volatile (
 | 
|      "pcmpeqb   %%xmm5,%%xmm5                   \n"
 | 
|      "psrlw     $0x8,%%xmm5                     \n"
 | 
| @@ -274,7 +160,6 @@ void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
 | 
|      "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
|      "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
|      MEMOPREG(movdqu,0x00,0,3,1,xmm2)           //  movdqu  (%0,%3,1),%%xmm2
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPREG(movdqu,0x10,0,3,1,xmm3)           //  movdqu  0x10(%0,%3,1),%%xmm3
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "pavgb     %%xmm2,%%xmm0                   \n"
 | 
| @@ -296,13 +181,8 @@ void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
 | 
|      "+r"(dst_ptr),    // %1
 | 
|      "+r"(dst_width)   // %2
 | 
|    : "r"((intptr_t)(src_stride))   // %3
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -315,8 +195,8 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|  
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "pand      %%xmm5,%%xmm0                   \n"
 | 
|      "pand      %%xmm5,%%xmm1                   \n"
 | 
| @@ -330,11 +210,7 @@ void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|    : "+r"(src_ptr),    // %0
 | 
|      "+r"(dst_ptr),    // %1
 | 
|      "+r"(dst_width)   // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm5"
 | 
| -#endif
 | 
| +  :: "memory", "cc", "xmm0", "xmm1", "xmm5"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -348,18 +224,16 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|  
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| -    MEMOPREG(movdqa,0x00,0,4,1,xmm2)           //  movdqa  (%0,%4,1),%%xmm2
 | 
| -    BUNDLEALIGN
 | 
| -    MEMOPREG(movdqa,0x10,0,4,1,xmm3)           //  movdqa  0x10(%0,%4,1),%%xmm3
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| +    MEMOPREG(movdqu,0x00,0,4,1,xmm2)           //  movdqu  (%0,%4,1),%%xmm2
 | 
| +    MEMOPREG(movdqu,0x10,0,4,1,xmm3)           //  movdqu  0x10(%0,%4,1),%%xmm3
 | 
|      "pavgb     %%xmm2,%%xmm0                   \n"
 | 
|      "pavgb     %%xmm3,%%xmm1                   \n"
 | 
| -    MEMOPREG(movdqa,0x00,0,4,2,xmm2)           //  movdqa  (%0,%4,2),%%xmm2
 | 
| -    BUNDLEALIGN
 | 
| -    MEMOPREG(movdqa,0x10,0,4,2,xmm3)           //  movdqa  0x10(%0,%4,2),%%xmm3
 | 
| -    MEMOPREG(movdqa,0x00,0,3,1,xmm4)           //  movdqa  (%0,%3,1),%%xmm4
 | 
| -    MEMOPREG(movdqa,0x10,0,3,1,xmm5)           //  movdqa  0x10(%0,%3,1),%%xmm5
 | 
| +    MEMOPREG(movdqu,0x00,0,4,2,xmm2)           //  movdqu  (%0,%4,2),%%xmm2
 | 
| +    MEMOPREG(movdqu,0x10,0,4,2,xmm3)           //  movdqu  0x10(%0,%4,2),%%xmm3
 | 
| +    MEMOPREG(movdqu,0x00,0,3,1,xmm4)           //  movdqu  (%0,%3,1),%%xmm4
 | 
| +    MEMOPREG(movdqu,0x10,0,3,1,xmm5)           //  movdqu  0x10(%0,%3,1),%%xmm5
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "pavgb     %%xmm4,%%xmm2                   \n"
 | 
|      "pavgb     %%xmm2,%%xmm0                   \n"
 | 
| @@ -388,13 +262,8 @@ void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|      "+r"(dst_width),   // %2
 | 
|      "+r"(stridex3)     // %3
 | 
|    : "r"((intptr_t)(src_stride))    // %4
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm7"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -412,8 +281,8 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm2   \n"
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm2   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "movdqa    %%xmm2,%%xmm1                   \n"
 | 
|      "palignr   $0x8,%%xmm0,%%xmm1              \n"
 | 
| @@ -429,11 +298,7 @@ void ScaleRowDown34_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|    : "+r"(src_ptr),   // %0
 | 
|      "+r"(dst_ptr),   // %1
 | 
|      "+r"(dst_width)  // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
 | 
| -#endif
 | 
| +  :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -461,8 +326,8 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
 | 
| -    MEMOPREG(movdqa,0x00,0,3,1,xmm7)           //  movdqa  (%0,%3),%%xmm7
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm6         \n"
 | 
| +    MEMOPREG(movdqu,0x00,0,3,1,xmm7)           //  movdqu  (%0,%3),%%xmm7
 | 
|      "pavgb     %%xmm7,%%xmm6                   \n"
 | 
|      "pshufb    %%xmm2,%%xmm6                   \n"
 | 
|      "pmaddubsw %%xmm5,%%xmm6                   \n"
 | 
| @@ -479,9 +344,8 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
 | 
|      "psrlw     $0x2,%%xmm6                     \n"
 | 
|      "packuswb  %%xmm6,%%xmm6                   \n"
 | 
|      "movq      %%xmm6," MEMACCESS2(0x8,1) "    \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
 | 
| -    BUNDLEALIGN
 | 
| -    MEMOPREG(movdqa,0x10,0,3,1,xmm7)           //  movdqa  0x10(%0,%3),%%xmm7
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
 | 
| +    MEMOPREG(movdqu,0x10,0,3,1,xmm7)           //  movdqu  0x10(%0,%3),%%xmm7
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "pavgb     %%xmm7,%%xmm6                   \n"
 | 
|      "pshufb    %%xmm4,%%xmm6                   \n"
 | 
| @@ -498,13 +362,8 @@ void ScaleRowDown34_1_Box_SSSE3(const uint8* src_ptr,
 | 
|      "+r"(dst_width)  // %2
 | 
|    : "r"((intptr_t)(src_stride)),  // %3
 | 
|      "m"(kMadd21)     // %4
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -533,8 +392,8 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm6         \n"
 | 
| -    MEMOPREG(movdqa,0x00,0,3,1,xmm7)           //  movdqa  (%0,%3,1),%%xmm7
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm6         \n"
 | 
| +    MEMOPREG(movdqu,0x00,0,3,1,xmm7)           //  movdqu  (%0,%3,1),%%xmm7
 | 
|      "pavgb     %%xmm6,%%xmm7                   \n"
 | 
|      "pavgb     %%xmm7,%%xmm6                   \n"
 | 
|      "pshufb    %%xmm2,%%xmm6                   \n"
 | 
| @@ -553,8 +412,8 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
 | 
|      "psrlw     $0x2,%%xmm6                     \n"
 | 
|      "packuswb  %%xmm6,%%xmm6                   \n"
 | 
|      "movq      %%xmm6," MEMACCESS2(0x8,1) "    \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
 | 
| -    MEMOPREG(movdqa,0x10,0,3,1,xmm7)           //  movdqa  0x10(%0,%3,1),%%xmm7
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm6   \n"
 | 
| +    MEMOPREG(movdqu,0x10,0,3,1,xmm7)           //  movdqu  0x10(%0,%3,1),%%xmm7
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "pavgb     %%xmm6,%%xmm7                   \n"
 | 
|      "pavgb     %%xmm7,%%xmm6                   \n"
 | 
| @@ -572,13 +431,8 @@ void ScaleRowDown34_0_Box_SSSE3(const uint8* src_ptr,
 | 
|        "+r"(dst_width)  // %2
 | 
|      : "r"((intptr_t)(src_stride)),  // %3
 | 
|        "m"(kMadd21)     // %4
 | 
| -    : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
| -#endif
 | 
| +    : "memory", "cc", NACL_R14
 | 
| +      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -590,8 +444,8 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|  
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "pshufb    %%xmm4,%%xmm0                   \n"
 | 
|      "pshufb    %%xmm5,%%xmm1                   \n"
 | 
| @@ -607,10 +461,7 @@ void ScaleRowDown38_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|      "+r"(dst_width)  // %2
 | 
|    : "m"(kShuf38a),   // %3
 | 
|      "m"(kShuf38b)    // %4
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -      , "xmm0", "xmm1", "xmm4", "xmm5"
 | 
| -#endif
 | 
| +  : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -631,9 +482,10 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    MEMOPREG(pavgb,0x00,0,3,1,xmm0)            //  pavgb   (%0,%3,1),%%xmm0
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    MEMOPREG(movdqu,0x00,0,3,1,xmm1)           //  movdqu  (%0,%3,1),%%xmm1
 | 
|      "lea       " MEMLEA(0x10,0) ",%0           \n"
 | 
| +    "pavgb     %%xmm1,%%xmm0                   \n"
 | 
|      "movdqa    %%xmm0,%%xmm1                   \n"
 | 
|      "pshufb    %%xmm2,%%xmm1                   \n"
 | 
|      "movdqa    %%xmm0,%%xmm6                   \n"
 | 
| @@ -643,23 +495,18 @@ void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
 | 
|      "paddusw   %%xmm0,%%xmm1                   \n"
 | 
|      "pmulhuw   %%xmm5,%%xmm1                   \n"
 | 
|      "packuswb  %%xmm1,%%xmm1                   \n"
 | 
| -    "sub       $0x6,%2                         \n"
 | 
|      "movd      %%xmm1," MEMACCESS(1) "         \n"
 | 
|      "psrlq     $0x10,%%xmm1                    \n"
 | 
|      "movd      %%xmm1," MEMACCESS2(0x2,1) "    \n"
 | 
|      "lea       " MEMLEA(0x6,1) ",%1            \n"
 | 
| +    "sub       $0x6,%2                         \n"
 | 
|      "jg        1b                              \n"
 | 
|    : "+r"(src_ptr),     // %0
 | 
|      "+r"(dst_ptr),     // %1
 | 
|      "+r"(dst_width)    // %2
 | 
|    : "r"((intptr_t)(src_stride))  // %3
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -679,8 +526,8 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    MEMOPREG(movdqa,0x00,0,3,1,xmm6)           //  movdqa  (%0,%3,1),%%xmm6
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    MEMOPREG(movdqu,0x00,0,3,1,xmm6)           //  movdqu  (%0,%3,1),%%xmm6
 | 
|      "movhlps   %%xmm0,%%xmm1                   \n"
 | 
|      "movhlps   %%xmm6,%%xmm7                   \n"
 | 
|      "punpcklbw %%xmm5,%%xmm0                   \n"
 | 
| @@ -689,7 +536,7 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
 | 
|      "punpcklbw %%xmm5,%%xmm7                   \n"
 | 
|      "paddusw   %%xmm6,%%xmm0                   \n"
 | 
|      "paddusw   %%xmm7,%%xmm1                   \n"
 | 
| -    MEMOPREG(movdqa,0x00,0,3,2,xmm6)           //  movdqa  (%0,%3,2),%%xmm6
 | 
| +    MEMOPREG(movdqu,0x00,0,3,2,xmm6)           //  movdqu  (%0,%3,2),%%xmm6
 | 
|      "lea       " MEMLEA(0x10,0) ",%0           \n"
 | 
|      "movhlps   %%xmm6,%%xmm7                   \n"
 | 
|      "punpcklbw %%xmm5,%%xmm6                   \n"
 | 
| @@ -711,23 +558,18 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
 | 
|      "paddusw   %%xmm7,%%xmm6                   \n"
 | 
|      "pmulhuw   %%xmm4,%%xmm6                   \n"
 | 
|      "packuswb  %%xmm6,%%xmm6                   \n"
 | 
| -    "sub       $0x6,%2                         \n"
 | 
|      "movd      %%xmm6," MEMACCESS(1) "         \n"
 | 
|      "psrlq     $0x10,%%xmm6                    \n"
 | 
|      "movd      %%xmm6," MEMACCESS2(0x2,1) "    \n"
 | 
|      "lea       " MEMLEA(0x6,1) ",%1            \n"
 | 
| +    "sub       $0x6,%2                         \n"
 | 
|      "jg        1b                              \n"
 | 
|    : "+r"(src_ptr),    // %0
 | 
|      "+r"(dst_ptr),    // %1
 | 
|      "+r"(dst_width)   // %2
 | 
|    : "r"((intptr_t)(src_stride))   // %3
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -741,7 +583,7 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|  
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
|      "mov       %0,%3                           \n"
 | 
|      "add       %6,%0                           \n"
 | 
|      "movdqa    %%xmm0,%%xmm1                   \n"
 | 
| @@ -753,7 +595,7 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|  
 | 
|      LABELALIGN
 | 
|    "2:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm2         \n"
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm2         \n"
 | 
|      "add       %6,%0                           \n"
 | 
|      "movdqa    %%xmm2,%%xmm3                   \n"
 | 
|      "punpcklbw %%xmm4,%%xmm2                   \n"
 | 
| @@ -765,8 +607,8 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|  
 | 
|      LABELALIGN
 | 
|    "3:                                          \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
 | 
| -    "movdqa    %%xmm1," MEMACCESS2(0x10,1) "   \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
 | 
| +    "movdqu    %%xmm1," MEMACCESS2(0x10,1) "   \n"
 | 
|      "lea       " MEMLEA(0x10,3) ",%0           \n"
 | 
|      "lea       " MEMLEA(0x20,1) ",%1           \n"
 | 
|      "sub       $0x10,%4                        \n"
 | 
| @@ -778,10 +620,7 @@ void ScaleAddRows_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
 | 
|      "+r"(src_width),   // %4
 | 
|      "+rm"(src_height)  // %5
 | 
|    : "rm"((intptr_t)(src_stride))  // %6
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
 | 
| -#endif
 | 
| +  : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -813,7 +652,6 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
 | 
|      MEMOPARG(movzwl,0x00,1,3,1,k2)             //  movzwl  (%1,%3,1),%k2
 | 
|      "movd      %k2,%%xmm0                      \n"
 | 
|      "psrlw     $0x9,%%xmm1                     \n"
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPARG(movzwl,0x00,1,4,1,k2)             //  movzwl  (%1,%4,1),%k2
 | 
|      "movd      %k2,%%xmm4                      \n"
 | 
|      "pshufb    %%xmm5,%%xmm1                   \n"
 | 
| @@ -853,13 +691,8 @@ void ScaleFilterCols_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
 | 
|      "+rm"(dst_width)   // %5
 | 
|    : "rm"(x),           // %6
 | 
|      "rm"(dx)           // %7
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -870,25 +703,21 @@ void ScaleColsUp2_SSE2(uint8* dst_ptr, const uint8* src_ptr,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
 | 
|      "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
|      "movdqa    %%xmm0,%%xmm1                   \n"
 | 
|      "punpcklbw %%xmm0,%%xmm0                   \n"
 | 
|      "punpckhbw %%xmm1,%%xmm1                   \n"
 | 
| -    "sub       $0x20,%2                         \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
 | 
| -    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(0) "         \n"
 | 
| +    "movdqu    %%xmm1," MEMACCESS2(0x10,0) "   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
| +    "sub       $0x20,%2                         \n"
 | 
|      "jg        1b                              \n"
 | 
|  
 | 
|    : "+r"(dst_ptr),     // %0
 | 
|      "+r"(src_ptr),     // %1
 | 
|      "+r"(dst_width)    // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1"
 | 
| -#endif
 | 
| +  :: "memory", "cc", "xmm0", "xmm1"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -898,22 +727,18 @@ void ScaleARGBRowDown2_SSE2(const uint8* src_argb,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "shufps    $0xdd,%%xmm1,%%xmm0             \n"
 | 
| -    "sub       $0x4,%2                         \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
 | 
|      "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
| +    "sub       $0x4,%2                         \n"
 | 
|      "jg        1b                              \n"
 | 
|    : "+r"(src_argb),  // %0
 | 
|      "+r"(dst_argb),  // %1
 | 
|      "+r"(dst_width)  // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1"
 | 
| -#endif
 | 
| +  :: "memory", "cc", "xmm0", "xmm1"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -923,25 +748,21 @@ void ScaleARGBRowDown2Linear_SSE2(const uint8* src_argb,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "movdqa    %%xmm0,%%xmm2                   \n"
 | 
|      "shufps    $0x88,%%xmm1,%%xmm0             \n"
 | 
|      "shufps    $0xdd,%%xmm1,%%xmm2             \n"
 | 
|      "pavgb     %%xmm2,%%xmm0                   \n"
 | 
| -    "sub       $0x4,%2                         \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
 | 
|      "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
| +    "sub       $0x4,%2                         \n"
 | 
|      "jg        1b                              \n"
 | 
|    : "+r"(src_argb),  // %0
 | 
|      "+r"(dst_argb),  // %1
 | 
|      "+r"(dst_width)  // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1"
 | 
| -#endif
 | 
| +  :: "memory", "cc", "xmm0", "xmm1"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -951,11 +772,10 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| -    "movdqa    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| -    BUNDLEALIGN
 | 
| -    MEMOPREG(movdqa,0x00,0,3,1,xmm2)           //  movdqa   (%0,%3,1),%%xmm2
 | 
| -    MEMOPREG(movdqa,0x10,0,3,1,xmm3)           //  movdqa   0x10(%0,%3,1),%%xmm3
 | 
| +    "movdqu    " MEMACCESS(0) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS2(0x10,0) ",%%xmm1   \n"
 | 
| +    MEMOPREG(movdqu,0x00,0,3,1,xmm2)           //  movdqu   (%0,%3,1),%%xmm2
 | 
| +    MEMOPREG(movdqu,0x10,0,3,1,xmm3)           //  movdqu   0x10(%0,%3,1),%%xmm3
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
|      "pavgb     %%xmm2,%%xmm0                   \n"
 | 
|      "pavgb     %%xmm3,%%xmm1                   \n"
 | 
| @@ -963,21 +783,16 @@ void ScaleARGBRowDown2Box_SSE2(const uint8* src_argb,
 | 
|      "shufps    $0x88,%%xmm1,%%xmm0             \n"
 | 
|      "shufps    $0xdd,%%xmm1,%%xmm2             \n"
 | 
|      "pavgb     %%xmm2,%%xmm0                   \n"
 | 
| -    "sub       $0x4,%2                         \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(1) "         \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(1) "         \n"
 | 
|      "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
| +    "sub       $0x4,%2                         \n"
 | 
|      "jg        1b                              \n"
 | 
|    : "+r"(src_argb),   // %0
 | 
|      "+r"(dst_argb),   // %1
 | 
|      "+r"(dst_width)   // %2
 | 
|    : "r"((intptr_t)(src_stride))   // %3
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -996,29 +811,22 @@ void ScaleARGBRowDownEven_SSE2(const uint8* src_argb, ptrdiff_t src_stride,
 | 
|      "movd      " MEMACCESS(0) ",%%xmm0         \n"
 | 
|      MEMOPREG(movd,0x00,0,1,1,xmm1)             //  movd      (%0,%1,1),%%xmm1
 | 
|      "punpckldq %%xmm1,%%xmm0                   \n"
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPREG(movd,0x00,0,1,2,xmm2)             //  movd      (%0,%1,2),%%xmm2
 | 
|      MEMOPREG(movd,0x00,0,4,1,xmm3)             //  movd      (%0,%4,1),%%xmm3
 | 
|      "lea       " MEMLEA4(0x00,0,1,4) ",%0      \n"
 | 
|      "punpckldq %%xmm3,%%xmm2                   \n"
 | 
|      "punpcklqdq %%xmm2,%%xmm0                  \n"
 | 
| -    "sub       $0x4,%3                         \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
 | 
|      "lea       " MEMLEA(0x10,2) ",%2           \n"
 | 
| +    "sub       $0x4,%3                         \n"
 | 
|      "jg        1b                              \n"
 | 
|    : "+r"(src_argb),      // %0
 | 
|      "+r"(src_stepx_x4),  // %1
 | 
|      "+r"(dst_argb),      // %2
 | 
|      "+r"(dst_width),     // %3
 | 
|      "+r"(src_stepx_x12)  // %4
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3"
 | 
| -#endif
 | 
| +  :: "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -1040,11 +848,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
 | 
|      "movq      " MEMACCESS(0) ",%%xmm0         \n"
 | 
|      MEMOPREG(movhps,0x00,0,1,1,xmm0)           //  movhps    (%0,%1,1),%%xmm0
 | 
|      MEMOPREG(movq,0x00,0,1,2,xmm1)             //  movq      (%0,%1,2),%%xmm1
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPREG(movhps,0x00,0,4,1,xmm1)           //  movhps    (%0,%4,1),%%xmm1
 | 
|      "lea       " MEMLEA4(0x00,0,1,4) ",%0      \n"
 | 
|      "movq      " MEMACCESS(5) ",%%xmm2         \n"
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPREG(movhps,0x00,5,1,1,xmm2)           //  movhps    (%5,%1,1),%%xmm2
 | 
|      MEMOPREG(movq,0x00,5,1,2,xmm3)             //  movq      (%5,%1,2),%%xmm3
 | 
|      MEMOPREG(movhps,0x00,5,4,1,xmm3)           //  movhps    (%5,%4,1),%%xmm3
 | 
| @@ -1055,9 +861,9 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
 | 
|      "shufps    $0x88,%%xmm1,%%xmm0             \n"
 | 
|      "shufps    $0xdd,%%xmm1,%%xmm2             \n"
 | 
|      "pavgb     %%xmm2,%%xmm0                   \n"
 | 
| -    "sub       $0x4,%3                         \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(2) "         \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(2) "         \n"
 | 
|      "lea       " MEMLEA(0x10,2) ",%2           \n"
 | 
| +    "sub       $0x4,%3                         \n"
 | 
|      "jg        1b                              \n"
 | 
|    : "+r"(src_argb),       // %0
 | 
|      "+r"(src_stepx_x4),   // %1
 | 
| @@ -1065,14 +871,8 @@ void ScaleARGBRowDownEvenBox_SSE2(const uint8* src_argb,
 | 
|      "+rm"(dst_width),     // %3
 | 
|      "+r"(src_stepx_x12),  // %4
 | 
|      "+r"(row1)            // %5
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3"
 | 
| -#endif
 | 
| +  :: "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -1111,15 +911,14 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
 | 
|      "pextrw    $0x3,%%xmm2,%k1                 \n"
 | 
|      "punpckldq %%xmm4,%%xmm1                   \n"
 | 
|      "punpcklqdq %%xmm1,%%xmm0                  \n"
 | 
| -    "sub       $0x4,%4                         \n"
 | 
|      "movdqu    %%xmm0," MEMACCESS(2) "         \n"
 | 
|      "lea       " MEMLEA(0x10,2) ",%2           \n"
 | 
| +    "sub       $0x4,%4                         \n"
 | 
|      "jge       40b                             \n"
 | 
|  
 | 
|    "49:                                         \n"
 | 
|      "test      $0x2,%4                         \n"
 | 
|      "je        29f                             \n"
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPREG(movd,0x00,3,0,4,xmm0)             //  movd      (%3,%0,4),%%xmm0
 | 
|      MEMOPREG(movd,0x00,3,1,4,xmm1)             //  movd      (%3,%1,4),%%xmm1
 | 
|      "pextrw    $0x5,%%xmm2,%k0                 \n"
 | 
| @@ -1139,13 +938,8 @@ void ScaleARGBCols_SSE2(uint8* dst_argb, const uint8* src_argb,
 | 
|      "+r"(dst_width)    // %4
 | 
|    : "rm"(x),           // %5
 | 
|      "rm"(dx)           // %6
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -1156,28 +950,22 @@ void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
 | 
|    asm volatile (
 | 
|      LABELALIGN
 | 
|    "1:                                          \n"
 | 
| -    "movdqa    " MEMACCESS(1) ",%%xmm0         \n"
 | 
| +    "movdqu    " MEMACCESS(1) ",%%xmm0         \n"
 | 
|      "lea       " MEMLEA(0x10,1) ",%1           \n"
 | 
|      "movdqa    %%xmm0,%%xmm1                   \n"
 | 
|      "punpckldq %%xmm0,%%xmm0                   \n"
 | 
|      "punpckhdq %%xmm1,%%xmm1                   \n"
 | 
| -    "sub       $0x8,%2                         \n"
 | 
| -    "movdqa    %%xmm0," MEMACCESS(0) "         \n"
 | 
| -    "movdqa    %%xmm1," MEMACCESS2(0x10,0) "   \n"
 | 
| +    "movdqu    %%xmm0," MEMACCESS(0) "         \n"
 | 
| +    "movdqu    %%xmm1," MEMACCESS2(0x10,0) "   \n"
 | 
|      "lea       " MEMLEA(0x20,0) ",%0           \n"
 | 
| +    "sub       $0x8,%2                         \n"
 | 
|      "jg        1b                              \n"
 | 
|  
 | 
|    : "+r"(dst_argb),    // %0
 | 
|      "+r"(src_argb),    // %1
 | 
|      "+r"(dst_width)    // %2
 | 
| -  :
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1"
 | 
| -#endif
 | 
| +  :: "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -1225,7 +1013,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
 | 
|      "paddd     %%xmm3,%%xmm2                   \n"
 | 
|      MEMOPREG(movq,0x00,1,3,4,xmm0)             //  movq      (%1,%3,4),%%xmm0
 | 
|      "psrlw     $0x9,%%xmm1                     \n"
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPREG(movhps,0x00,1,4,4,xmm0)           //  movhps    (%1,%4,4),%%xmm0
 | 
|      "pshufb    %%xmm5,%%xmm1                   \n"
 | 
|      "pshufb    %%xmm4,%%xmm0                   \n"
 | 
| @@ -1245,7 +1032,6 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
 | 
|      "add       $0x1,%2                         \n"
 | 
|      "jl        99f                             \n"
 | 
|      "psrlw     $0x9,%%xmm2                     \n"
 | 
| -    BUNDLEALIGN
 | 
|      MEMOPREG(movq,0x00,1,3,4,xmm0)             //  movq      (%1,%3,4),%%xmm0
 | 
|      "pshufb    %%xmm5,%%xmm2                   \n"
 | 
|      "pshufb    %%xmm4,%%xmm0                   \n"
 | 
| @@ -1264,13 +1050,8 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
 | 
|      "+r"(x1)           // %4
 | 
|    : "rm"(x),           // %5
 | 
|      "rm"(dx)           // %6
 | 
| -  : "memory", "cc"
 | 
| -#if defined(__native_client__) && defined(__x86_64__)
 | 
| -    , "r14"
 | 
| -#endif
 | 
| -#if defined(__SSE2__)
 | 
| -    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
 | 
| -#endif
 | 
| +  : "memory", "cc", NACL_R14
 | 
| +    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| 
 |