| Index: src/opts/SkXfermode_opts_arm_neon.cpp
 | 
| diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp
 | 
| index f4ff18c37e614da8dadb1ea18b6d0b58312cd3e8..b8d8ef521b2a48310223f87f3ee58c2ad1b7ef1b 100644
 | 
| --- a/src/opts/SkXfermode_opts_arm_neon.cpp
 | 
| +++ b/src/opts/SkXfermode_opts_arm_neon.cpp
 | 
| @@ -574,13 +574,14 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
 | 
|  
 | 
|      SkXfermodeProc proc = this->getProc();
 | 
|      SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
 | 
| +    SkASSERT(procSIMD != NULL);
 | 
|  
 | 
|      if (NULL == aa) {
 | 
|          // Unrolled NEON code
 | 
|          while (count >= 8) {
 | 
|              uint8x8x4_t vsrc, vdst, vres;
 | 
|  
 | 
| -#if (__GNUC__ == 4) && (__GNUC_MINOR__ > 6)
 | 
| +#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
 | 
|              asm volatile (
 | 
|                  "vld4.u8    %h[vsrc], [%[src]]!  \t\n"
 | 
|                  "vld4.u8    %h[vdst], [%[dst]]   \t\n"
 | 
| @@ -639,6 +640,74 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
 | 
|      }
 | 
|  }
 | 
|  
 | 
| +void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
 | 
| +                                     const SkPMColor* SK_RESTRICT src, int count,
 | 
| +                                     const SkAlpha* SK_RESTRICT aa) const {
 | 
| +    SkASSERT(dst && src && count >= 0);
 | 
| +
 | 
| +    SkXfermodeProc proc = this->getProc();
 | 
| +    SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
 | 
| +    SkASSERT(procSIMD != NULL);
 | 
| +
 | 
| +    if (NULL == aa) {
 | 
| +        while(count >= 8) {
 | 
| +            uint16x8_t vdst, vres16;
 | 
| +            uint8x8x4_t vdst32, vsrc, vres;
 | 
| +
 | 
| +            vdst = vld1q_u16(dst);
 | 
| +
 | 
| +#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
 | 
| +            asm volatile (
 | 
| +                "vld4.u8    %h[vsrc], [%[src]]!  \t\n"
 | 
| +                : [vsrc] "=w" (vsrc), [src] "+&r" (src)
 | 
| +                : :
 | 
| +            );
 | 
| +#else
 | 
| +            register uint8x8_t d0 asm("d0");
 | 
| +            register uint8x8_t d1 asm("d1");
 | 
| +            register uint8x8_t d2 asm("d2");
 | 
| +            register uint8x8_t d3 asm("d3");
 | 
| +
 | 
| +            asm volatile (
 | 
| +                "vld4.u8    {d0-d3},[%[src]]!;"
 | 
| +                : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3),
 | 
| +                  [src] "+&r" (src)
 | 
| +                : :
 | 
| +            );
 | 
| +            vsrc.val[0] = d0;
 | 
| +            vsrc.val[1] = d1;
 | 
| +            vsrc.val[2] = d2;
 | 
| +            vsrc.val[3] = d3;
 | 
| +#endif
 | 
| +
 | 
| +            vdst32 = SkPixel16ToPixel32_neon8(vdst);
 | 
| +            vres = procSIMD(vsrc, vdst32);
 | 
| +            vres16 = SkPixel32ToPixel16_neon8(vres);
 | 
| +
 | 
| +            vst1q_u16(dst, vres16);
 | 
| +
 | 
| +            count -= 8;
 | 
| +            dst += 8;
 | 
| +        }
 | 
| +        for (int i = 0; i < count; i++) {
 | 
| +            SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
 | 
| +            dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC));
 | 
| +        }
 | 
| +    } else {
 | 
| +        for (int i = count - 1; i >= 0; --i) {
 | 
| +            unsigned a = aa[i];
 | 
| +            if (0 != a) {
 | 
| +                SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
 | 
| +                SkPMColor C = proc(src[i], dstC);
 | 
| +                if (0xFF != a) {
 | 
| +                    C = SkFourByteInterp(C, dstC, a);
 | 
| +                }
 | 
| +                dst[i] = SkPixel32ToPixel16_ToU16(C);
 | 
| +            }
 | 
| +        }
 | 
| +    }
 | 
| +}
 | 
| +
 | 
|  #ifdef SK_DEVELOPER
 | 
|  void SkNEONProcCoeffXfermode::toString(SkString* str) const {
 | 
|      this->INHERITED::toString(str);
 | 
| 
 |