| Index: source/libvpx/third_party/libyuv/source/rotate.cc
 | 
| diff --git a/source/libvpx/third_party/libyuv/source/rotate.cc b/source/libvpx/third_party/libyuv/source/rotate.cc
 | 
| index 2ef3228cb80219ff6693fd944637b979d0f19e0a..5acaccfd89d3323b1c1f0770fc73e2b053b901d6 100644
 | 
| --- a/source/libvpx/third_party/libyuv/source/rotate.cc
 | 
| +++ b/source/libvpx/third_party/libyuv/source/rotate.cc
 | 
| @@ -42,11 +42,7 @@ extern "C" {
 | 
|  #endif
 | 
|  
 | 
|  #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
 | 
| -    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
 | 
| -#define HAS_MIRRORROW_NEON
 | 
| -void MirrorRow_NEON(const uint8* src, uint8* dst, int width);
 | 
| -#define HAS_MIRRORROW_UV_NEON
 | 
| -void MirrorUVRow_NEON(const uint8* src, uint8* dst_a, uint8* dst_b, int width);
 | 
| +    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
 | 
|  #define HAS_TRANSPOSE_WX8_NEON
 | 
|  void TransposeWx8_NEON(const uint8* src, int src_stride,
 | 
|                         uint8* dst, int dst_stride, int width);
 | 
| @@ -55,7 +51,7 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
 | 
|                           uint8* dst_a, int dst_stride_a,
 | 
|                           uint8* dst_b, int dst_stride_b,
 | 
|                           int width);
 | 
| -#endif  // defined(__ARM_NEON__)
 | 
| +#endif
 | 
|  
 | 
|  #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
 | 
|      defined(__mips__) && \
 | 
| @@ -194,31 +190,31 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|   convertloop:
 | 
|      // Read in the data from the source pointer.
 | 
|      // First round of bit swap.
 | 
| -    movdqa    xmm0, [eax]
 | 
| -    movdqa    xmm1, [eax + edi]
 | 
| +    movdqu    xmm0, [eax]
 | 
| +    movdqu    xmm1, [eax + edi]
 | 
|      lea       eax, [eax + 2 * edi]
 | 
|      movdqa    xmm7, xmm0  // use xmm7 as temp register.
 | 
|      punpcklbw xmm0, xmm1
 | 
|      punpckhbw xmm7, xmm1
 | 
|      movdqa    xmm1, xmm7
 | 
| -    movdqa    xmm2, [eax]
 | 
| -    movdqa    xmm3, [eax + edi]
 | 
| +    movdqu    xmm2, [eax]
 | 
| +    movdqu    xmm3, [eax + edi]
 | 
|      lea       eax, [eax + 2 * edi]
 | 
|      movdqa    xmm7, xmm2
 | 
|      punpcklbw xmm2, xmm3
 | 
|      punpckhbw xmm7, xmm3
 | 
|      movdqa    xmm3, xmm7
 | 
| -    movdqa    xmm4, [eax]
 | 
| -    movdqa    xmm5, [eax + edi]
 | 
| +    movdqu    xmm4, [eax]
 | 
| +    movdqu    xmm5, [eax + edi]
 | 
|      lea       eax, [eax + 2 * edi]
 | 
|      movdqa    xmm7, xmm4
 | 
|      punpcklbw xmm4, xmm5
 | 
|      punpckhbw xmm7, xmm5
 | 
|      movdqa    xmm5, xmm7
 | 
| -    movdqa    xmm6, [eax]
 | 
| -    movdqa    xmm7, [eax + edi]
 | 
| +    movdqu    xmm6, [eax]
 | 
| +    movdqu    xmm7, [eax + edi]
 | 
|      lea       eax, [eax + 2 * edi]
 | 
| -    movdqa    [esp], xmm5  // backup xmm5
 | 
| +    movdqu    [esp], xmm5  // backup xmm5
 | 
|      neg       edi
 | 
|      movdqa    xmm5, xmm6   // use xmm5 as temp register.
 | 
|      punpcklbw xmm6, xmm7
 | 
| @@ -239,8 +235,8 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|      punpcklwd xmm4, xmm6
 | 
|      punpckhwd xmm5, xmm6
 | 
|      movdqa    xmm6, xmm5
 | 
| -    movdqa    xmm5, [esp]  // restore xmm5
 | 
| -    movdqa    [esp], xmm6  // backup xmm6
 | 
| +    movdqu    xmm5, [esp]  // restore xmm5
 | 
| +    movdqu    [esp], xmm6  // backup xmm6
 | 
|      movdqa    xmm6, xmm5    // use xmm6 as temp register.
 | 
|      punpcklwd xmm5, xmm7
 | 
|      punpckhwd xmm6, xmm7
 | 
| @@ -251,7 +247,7 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|      punpckldq xmm0, xmm4
 | 
|      punpckhdq xmm6, xmm4
 | 
|      movdqa    xmm4, xmm6
 | 
| -    movdqa    xmm6, [esp]  // restore xmm6
 | 
| +    movdqu    xmm6, [esp]  // restore xmm6
 | 
|      movlpd    qword ptr [edx], xmm0
 | 
|      movhpd    qword ptr [ebx], xmm0
 | 
|      movlpd    qword ptr [edx + esi], xmm4
 | 
| @@ -296,7 +292,8 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|      ret
 | 
|    }
 | 
|  }
 | 
| -#elif !defined(LIBYUV_DISABLE_X86) && \
 | 
| +#endif
 | 
| +#if !defined(LIBYUV_DISABLE_X86) && \
 | 
|      (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
 | 
|  #define HAS_TRANSPOSE_WX8_SSSE3
 | 
|  static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
 | 
| @@ -379,10 +376,8 @@ static void TransposeWx8_SSSE3(const uint8* src, int src_stride,
 | 
|        "+r"(width)   // %2
 | 
|      : "r"((intptr_t)(src_stride)),  // %3
 | 
|        "r"((intptr_t)(dst_stride))   // %4
 | 
| -    : "memory", "cc"
 | 
| -  #if defined(__SSE2__)
 | 
| -      , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
| -  #endif
 | 
| +    : "memory", "cc",
 | 
| +      "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
 | 
|    );
 | 
|  }
 | 
|  
 | 
| @@ -411,31 +406,31 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|      "mov    0x2c(%ecx),%ecx                    \n"
 | 
|  
 | 
|  "1:                                            \n"
 | 
| -    "movdqa (%eax),%xmm0                       \n"
 | 
| -    "movdqa (%eax,%edi,1),%xmm1                \n"
 | 
| +    "movdqu (%eax),%xmm0                       \n"
 | 
| +    "movdqu (%eax,%edi,1),%xmm1                \n"
 | 
|      "lea    (%eax,%edi,2),%eax                 \n"
 | 
|      "movdqa %xmm0,%xmm7                        \n"
 | 
|      "punpcklbw %xmm1,%xmm0                     \n"
 | 
|      "punpckhbw %xmm1,%xmm7                     \n"
 | 
|      "movdqa %xmm7,%xmm1                        \n"
 | 
| -    "movdqa (%eax),%xmm2                       \n"
 | 
| -    "movdqa (%eax,%edi,1),%xmm3                \n"
 | 
| +    "movdqu (%eax),%xmm2                       \n"
 | 
| +    "movdqu (%eax,%edi,1),%xmm3                \n"
 | 
|      "lea    (%eax,%edi,2),%eax                 \n"
 | 
|      "movdqa %xmm2,%xmm7                        \n"
 | 
|      "punpcklbw %xmm3,%xmm2                     \n"
 | 
|      "punpckhbw %xmm3,%xmm7                     \n"
 | 
|      "movdqa %xmm7,%xmm3                        \n"
 | 
| -    "movdqa (%eax),%xmm4                       \n"
 | 
| -    "movdqa (%eax,%edi,1),%xmm5                \n"
 | 
| +    "movdqu (%eax),%xmm4                       \n"
 | 
| +    "movdqu (%eax,%edi,1),%xmm5                \n"
 | 
|      "lea    (%eax,%edi,2),%eax                 \n"
 | 
|      "movdqa %xmm4,%xmm7                        \n"
 | 
|      "punpcklbw %xmm5,%xmm4                     \n"
 | 
|      "punpckhbw %xmm5,%xmm7                     \n"
 | 
|      "movdqa %xmm7,%xmm5                        \n"
 | 
| -    "movdqa (%eax),%xmm6                       \n"
 | 
| -    "movdqa (%eax,%edi,1),%xmm7                \n"
 | 
| +    "movdqu (%eax),%xmm6                       \n"
 | 
| +    "movdqu (%eax,%edi,1),%xmm7                \n"
 | 
|      "lea    (%eax,%edi,2),%eax                 \n"
 | 
| -    "movdqa %xmm5,(%esp)                       \n"
 | 
| +    "movdqu %xmm5,(%esp)                       \n"
 | 
|      "neg    %edi                               \n"
 | 
|      "movdqa %xmm6,%xmm5                        \n"
 | 
|      "punpcklbw %xmm7,%xmm6                     \n"
 | 
| @@ -455,8 +450,8 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|      "punpcklwd %xmm6,%xmm4                     \n"
 | 
|      "punpckhwd %xmm6,%xmm5                     \n"
 | 
|      "movdqa %xmm5,%xmm6                        \n"
 | 
| -    "movdqa (%esp),%xmm5                       \n"
 | 
| -    "movdqa %xmm6,(%esp)                       \n"
 | 
| +    "movdqu (%esp),%xmm5                       \n"
 | 
| +    "movdqu %xmm6,(%esp)                       \n"
 | 
|      "movdqa %xmm5,%xmm6                        \n"
 | 
|      "punpcklwd %xmm7,%xmm5                     \n"
 | 
|      "punpckhwd %xmm7,%xmm6                     \n"
 | 
| @@ -465,7 +460,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|      "punpckldq %xmm4,%xmm0                     \n"
 | 
|      "punpckhdq %xmm4,%xmm6                     \n"
 | 
|      "movdqa %xmm6,%xmm4                        \n"
 | 
| -    "movdqa (%esp),%xmm6                       \n"
 | 
| +    "movdqu (%esp),%xmm6                       \n"
 | 
|      "movlpd %xmm0,(%edx)                       \n"
 | 
|      "movhpd %xmm0,(%ebx)                       \n"
 | 
|      "movlpd %xmm4,(%edx,%esi,1)                \n"
 | 
| @@ -514,7 +509,8 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|      "ret                                       \n"
 | 
|  #endif
 | 
|  );
 | 
| -#elif !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
 | 
| +#endif
 | 
| +#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
 | 
|      defined(__x86_64__)
 | 
|  // 64 bit version has enough registers to do 16x8 to 8x16 at a time.
 | 
|  #define HAS_TRANSPOSE_WX8_FAST_SSSE3
 | 
| @@ -525,38 +521,38 @@ static void TransposeWx8_FAST_SSSE3(const uint8* src, int src_stride,
 | 
|    // First round of bit swap.
 | 
|    ".p2align  2                                 \n"
 | 
|  "1:                                            \n"
 | 
| -  "movdqa     (%0),%%xmm0                      \n"
 | 
| -  "movdqa     (%0,%3),%%xmm1                   \n"
 | 
| +  "movdqu     (%0),%%xmm0                      \n"
 | 
| +  "movdqu     (%0,%3),%%xmm1                   \n"
 | 
|    "lea        (%0,%3,2),%0                     \n"
 | 
|    "movdqa     %%xmm0,%%xmm8                    \n"
 | 
|    "punpcklbw  %%xmm1,%%xmm0                    \n"
 | 
|    "punpckhbw  %%xmm1,%%xmm8                    \n"
 | 
| -  "movdqa     (%0),%%xmm2                      \n"
 | 
| +  "movdqu     (%0),%%xmm2                      \n"
 | 
|    "movdqa     %%xmm0,%%xmm1                    \n"
 | 
|    "movdqa     %%xmm8,%%xmm9                    \n"
 | 
|    "palignr    $0x8,%%xmm1,%%xmm1               \n"
 | 
|    "palignr    $0x8,%%xmm9,%%xmm9               \n"
 | 
| -  "movdqa     (%0,%3),%%xmm3                   \n"
 | 
| +  "movdqu     (%0,%3),%%xmm3                   \n"
 | 
|    "lea        (%0,%3,2),%0                     \n"
 | 
|    "movdqa     %%xmm2,%%xmm10                   \n"
 | 
|    "punpcklbw  %%xmm3,%%xmm2                    \n"
 | 
|    "punpckhbw  %%xmm3,%%xmm10                   \n"
 | 
|    "movdqa     %%xmm2,%%xmm3                    \n"
 | 
|    "movdqa     %%xmm10,%%xmm11                  \n"
 | 
| -  "movdqa     (%0),%%xmm4                      \n"
 | 
| +  "movdqu     (%0),%%xmm4                      \n"
 | 
|    "palignr    $0x8,%%xmm3,%%xmm3               \n"
 | 
|    "palignr    $0x8,%%xmm11,%%xmm11             \n"
 | 
| -  "movdqa     (%0,%3),%%xmm5                   \n"
 | 
| +  "movdqu     (%0,%3),%%xmm5                   \n"
 | 
|    "lea        (%0,%3,2),%0                     \n"
 | 
|    "movdqa     %%xmm4,%%xmm12                   \n"
 | 
|    "punpcklbw  %%xmm5,%%xmm4                    \n"
 | 
|    "punpckhbw  %%xmm5,%%xmm12                   \n"
 | 
|    "movdqa     %%xmm4,%%xmm5                    \n"
 | 
|    "movdqa     %%xmm12,%%xmm13                  \n"
 | 
| -  "movdqa     (%0),%%xmm6                      \n"
 | 
| +  "movdqu     (%0),%%xmm6                      \n"
 | 
|    "palignr    $0x8,%%xmm5,%%xmm5               \n"
 | 
|    "palignr    $0x8,%%xmm13,%%xmm13             \n"
 | 
| -  "movdqa     (%0,%3),%%xmm7                   \n"
 | 
| +  "movdqu     (%0,%3),%%xmm7                   \n"
 | 
|    "lea        (%0,%3,2),%0                     \n"
 | 
|    "movdqa     %%xmm6,%%xmm14                   \n"
 | 
|    "punpcklbw  %%xmm7,%%xmm6                    \n"
 | 
| @@ -666,29 +662,29 @@ static void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 | 
|    // First round of bit swap.
 | 
|    ".p2align  2                                 \n"
 | 
|  "1:                                            \n"
 | 
| -  "movdqa     (%0),%%xmm0                      \n"
 | 
| -  "movdqa     (%0,%4),%%xmm1                   \n"
 | 
| +  "movdqu     (%0),%%xmm0                      \n"
 | 
| +  "movdqu     (%0,%4),%%xmm1                   \n"
 | 
|    "lea        (%0,%4,2),%0                     \n"
 | 
|    "movdqa     %%xmm0,%%xmm8                    \n"
 | 
|    "punpcklbw  %%xmm1,%%xmm0                    \n"
 | 
|    "punpckhbw  %%xmm1,%%xmm8                    \n"
 | 
|    "movdqa     %%xmm8,%%xmm1                    \n"
 | 
| -  "movdqa     (%0),%%xmm2                      \n"
 | 
| -  "movdqa     (%0,%4),%%xmm3                   \n"
 | 
| +  "movdqu     (%0),%%xmm2                      \n"
 | 
| +  "movdqu     (%0,%4),%%xmm3                   \n"
 | 
|    "lea        (%0,%4,2),%0                     \n"
 | 
|    "movdqa     %%xmm2,%%xmm8                    \n"
 | 
|    "punpcklbw  %%xmm3,%%xmm2                    \n"
 | 
|    "punpckhbw  %%xmm3,%%xmm8                    \n"
 | 
|    "movdqa     %%xmm8,%%xmm3                    \n"
 | 
| -  "movdqa     (%0),%%xmm4                      \n"
 | 
| -  "movdqa     (%0,%4),%%xmm5                   \n"
 | 
| +  "movdqu     (%0),%%xmm4                      \n"
 | 
| +  "movdqu     (%0,%4),%%xmm5                   \n"
 | 
|    "lea        (%0,%4,2),%0                     \n"
 | 
|    "movdqa     %%xmm4,%%xmm8                    \n"
 | 
|    "punpcklbw  %%xmm5,%%xmm4                    \n"
 | 
|    "punpckhbw  %%xmm5,%%xmm8                    \n"
 | 
|    "movdqa     %%xmm8,%%xmm5                    \n"
 | 
| -  "movdqa     (%0),%%xmm6                      \n"
 | 
| -  "movdqa     (%0,%4),%%xmm7                   \n"
 | 
| +  "movdqu     (%0),%%xmm6                      \n"
 | 
| +  "movdqu     (%0,%4),%%xmm7                   \n"
 | 
|    "lea        (%0,%4,2),%0                     \n"
 | 
|    "movdqa     %%xmm6,%%xmm8                    \n"
 | 
|    "punpcklbw  %%xmm7,%%xmm6                    \n"
 | 
| @@ -818,9 +814,7 @@ void TransposePlane(const uint8* src, int src_stride,
 | 
|    }
 | 
|  #endif
 | 
|  #if defined(HAS_TRANSPOSE_WX8_FAST_SSSE3)
 | 
| -  if (TestCpuFlag(kCpuHasSSSE3) &&
 | 
| -      IS_ALIGNED(width, 16) &&
 | 
| -      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
 | 
| +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
 | 
|      TransposeWx8 = TransposeWx8_FAST_SSSE3;
 | 
|    }
 | 
|  #endif
 | 
| @@ -883,29 +877,38 @@ void RotatePlane180(const uint8* src, int src_stride,
 | 
|    void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
 | 
|    void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
 | 
|  #if defined(HAS_MIRRORROW_NEON)
 | 
| -  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 16)) {
 | 
| -    MirrorRow = MirrorRow_NEON;
 | 
| +  if (TestCpuFlag(kCpuHasNEON)) {
 | 
| +    MirrorRow = MirrorRow_Any_NEON;
 | 
| +    if (IS_ALIGNED(width, 16)) {
 | 
| +      MirrorRow = MirrorRow_NEON;
 | 
| +    }
 | 
|    }
 | 
|  #endif
 | 
|  #if defined(HAS_MIRRORROW_SSE2)
 | 
| -  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 16) &&
 | 
| -      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
 | 
| -      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
 | 
| -    MirrorRow = MirrorRow_SSE2;
 | 
| +  if (TestCpuFlag(kCpuHasSSE2)) {
 | 
| +    MirrorRow = MirrorRow_Any_SSE2;
 | 
| +    if (IS_ALIGNED(width, 16)) {
 | 
| +      MirrorRow = MirrorRow_SSE2;
 | 
| +    }
 | 
|    }
 | 
|  #endif
 | 
|  #if defined(HAS_MIRRORROW_SSSE3)
 | 
| -  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
 | 
| -      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
 | 
| -      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
 | 
| -    MirrorRow = MirrorRow_SSSE3;
 | 
| +  if (TestCpuFlag(kCpuHasSSSE3)) {
 | 
| +    MirrorRow = MirrorRow_Any_SSSE3;
 | 
| +    if (IS_ALIGNED(width, 16)) {
 | 
| +      MirrorRow = MirrorRow_SSSE3;
 | 
| +    }
 | 
|    }
 | 
|  #endif
 | 
|  #if defined(HAS_MIRRORROW_AVX2)
 | 
| -  if (TestCpuFlag(kCpuHasAVX2) && IS_ALIGNED(width, 32)) {
 | 
| -    MirrorRow = MirrorRow_AVX2;
 | 
| +  if (TestCpuFlag(kCpuHasAVX2)) {
 | 
| +    MirrorRow = MirrorRow_Any_AVX2;
 | 
| +    if (IS_ALIGNED(width, 32)) {
 | 
| +      MirrorRow = MirrorRow_AVX2;
 | 
| +    }
 | 
|    }
 | 
|  #endif
 | 
| +// TODO(fbarchard): Mirror on mips handle unaligned memory.
 | 
|  #if defined(HAS_MIRRORROW_MIPS_DSPR2)
 | 
|    if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
 | 
|        IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
 | 
| @@ -913,21 +916,14 @@ void RotatePlane180(const uint8* src, int src_stride,
 | 
|      MirrorRow = MirrorRow_MIPS_DSPR2;
 | 
|    }
 | 
|  #endif
 | 
| -#if defined(HAS_COPYROW_NEON)
 | 
| -  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
 | 
| -    CopyRow = CopyRow_NEON;
 | 
| -  }
 | 
| -#endif
 | 
| -#if defined(HAS_COPYROW_X86)
 | 
| -  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
 | 
| -    CopyRow = CopyRow_X86;
 | 
| +#if defined(HAS_COPYROW_SSE2)
 | 
| +  if (TestCpuFlag(kCpuHasSSE2)) {
 | 
| +    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
 | 
|    }
 | 
|  #endif
 | 
| -#if defined(HAS_COPYROW_SSE2)
 | 
| -  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
 | 
| -      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16) &&
 | 
| -      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
 | 
| -    CopyRow = CopyRow_SSE2;
 | 
| +#if defined(HAS_COPYROW_AVX)
 | 
| +  if (TestCpuFlag(kCpuHasAVX)) {
 | 
| +    CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
 | 
|    }
 | 
|  #endif
 | 
|  #if defined(HAS_COPYROW_ERMS)
 | 
| @@ -935,6 +931,11 @@ void RotatePlane180(const uint8* src, int src_stride,
 | 
|      CopyRow = CopyRow_ERMS;
 | 
|    }
 | 
|  #endif
 | 
| +#if defined(HAS_COPYROW_NEON)
 | 
| +  if (TestCpuFlag(kCpuHasNEON)) {
 | 
| +    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
 | 
| +  }
 | 
| +#endif
 | 
|  #if defined(HAS_COPYROW_MIPS)
 | 
|    if (TestCpuFlag(kCpuHasMIPS)) {
 | 
|      CopyRow = CopyRow_MIPS;
 | 
| @@ -1010,13 +1011,13 @@ void TransposeUV(const uint8* src, int src_stride,
 | 
|    if (TestCpuFlag(kCpuHasNEON)) {
 | 
|      TransposeUVWx8 = TransposeUVWx8_NEON;
 | 
|    }
 | 
| -#elif defined(HAS_TRANSPOSE_UVWX8_SSE2)
 | 
| -  if (TestCpuFlag(kCpuHasSSE2) &&
 | 
| -      IS_ALIGNED(width, 8) &&
 | 
| -      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
 | 
| +#endif
 | 
| +#if defined(HAS_TRANSPOSE_UVWX8_SSE2)
 | 
| +  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 8)) {
 | 
|      TransposeUVWx8 = TransposeUVWx8_SSE2;
 | 
|    }
 | 
| -#elif defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
 | 
| +#endif
 | 
| +#if defined(HAS_TRANSPOSE_UVWx8_MIPS_DSPR2)
 | 
|    if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 2) &&
 | 
|        IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
 | 
|      TransposeUVWx8 = TransposeUVWx8_MIPS_DSPR2;
 | 
| @@ -1084,12 +1085,13 @@ void RotateUV180(const uint8* src, int src_stride,
 | 
|    if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
 | 
|      MirrorRowUV = MirrorUVRow_NEON;
 | 
|    }
 | 
| -#elif defined(HAS_MIRRORROW_UV_SSSE3)
 | 
| -  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16) &&
 | 
| -      IS_ALIGNED(src, 16) && IS_ALIGNED(src_stride, 16)) {
 | 
| +#endif
 | 
| +#if defined(HAS_MIRRORROW_UV_SSSE3)
 | 
| +  if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) {
 | 
|      MirrorRowUV = MirrorUVRow_SSSE3;
 | 
|    }
 | 
| -#elif defined(HAS_MIRRORUVROW_MIPS_DSPR2)
 | 
| +#endif
 | 
| +#if defined(HAS_MIRRORUVROW_MIPS_DSPR2)
 | 
|    if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
 | 
|        IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
 | 
|      MirrorRowUV = MirrorUVRow_MIPS_DSPR2;
 | 
| 
 |