| Index: src/opts/SkXfermode_opts_arm_neon.cpp
|
| diff --git a/src/opts/SkXfermode_opts_arm_neon.cpp b/src/opts/SkXfermode_opts_arm_neon.cpp
|
| index f4ff18c37e614da8dadb1ea18b6d0b58312cd3e8..b8d8ef521b2a48310223f87f3ee58c2ad1b7ef1b 100644
|
| --- a/src/opts/SkXfermode_opts_arm_neon.cpp
|
| +++ b/src/opts/SkXfermode_opts_arm_neon.cpp
|
| @@ -574,13 +574,14 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
|
|
|
| SkXfermodeProc proc = this->getProc();
|
| SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
|
| + SkASSERT(procSIMD != NULL);
|
|
|
| if (NULL == aa) {
|
| // Unrolled NEON code
|
| while (count >= 8) {
|
| uint8x8x4_t vsrc, vdst, vres;
|
|
|
| -#if (__GNUC__ == 4) && (__GNUC_MINOR__ > 6)
|
| +#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
|
| asm volatile (
|
| "vld4.u8 %h[vsrc], [%[src]]! \t\n"
|
| "vld4.u8 %h[vdst], [%[dst]] \t\n"
|
| @@ -639,6 +640,74 @@ void SkNEONProcCoeffXfermode::xfer32(SkPMColor dst[], const SkPMColor src[],
|
| }
|
| }
|
|
|
| +void SkNEONProcCoeffXfermode::xfer16(uint16_t* SK_RESTRICT dst,
|
| + const SkPMColor* SK_RESTRICT src, int count,
|
| + const SkAlpha* SK_RESTRICT aa) const {
|
| + SkASSERT(dst && src && count >= 0);
|
| +
|
| + SkXfermodeProc proc = this->getProc();
|
| + SkXfermodeProcSIMD procSIMD = reinterpret_cast<SkXfermodeProcSIMD>(fProcSIMD);
|
| + SkASSERT(procSIMD != NULL);
|
| +
|
| + if (NULL == aa) {
|
| + while(count >= 8) {
|
| + uint16x8_t vdst, vres16;
|
| + uint8x8x4_t vdst32, vsrc, vres;
|
| +
|
| + vdst = vld1q_u16(dst);
|
| +
|
| +#if (__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 6))
|
| + asm volatile (
|
| + "vld4.u8 %h[vsrc], [%[src]]! \t\n"
|
| + : [vsrc] "=w" (vsrc), [src] "+&r" (src)
|
| + : :
|
| + );
|
| +#else
|
| + register uint8x8_t d0 asm("d0");
|
| + register uint8x8_t d1 asm("d1");
|
| + register uint8x8_t d2 asm("d2");
|
| + register uint8x8_t d3 asm("d3");
|
| +
|
| + asm volatile (
|
| + "vld4.u8 {d0-d3},[%[src]]!;"
|
| + : "=w" (d0), "=w" (d1), "=w" (d2), "=w" (d3),
|
| + [src] "+&r" (src)
|
| + : :
|
| + );
|
| + vsrc.val[0] = d0;
|
| + vsrc.val[1] = d1;
|
| + vsrc.val[2] = d2;
|
| + vsrc.val[3] = d3;
|
| +#endif
|
| +
|
| + vdst32 = SkPixel16ToPixel32_neon8(vdst);
|
| + vres = procSIMD(vsrc, vdst32);
|
| + vres16 = SkPixel32ToPixel16_neon8(vres);
|
| +
|
| + vst1q_u16(dst, vres16);
|
| +
|
| + count -= 8;
|
| + dst += 8;
|
| + }
|
| + for (int i = 0; i < count; i++) {
|
| + SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
|
| + dst[i] = SkPixel32ToPixel16_ToU16(proc(src[i], dstC));
|
| + }
|
| + } else {
|
| + for (int i = count - 1; i >= 0; --i) {
|
| + unsigned a = aa[i];
|
| + if (0 != a) {
|
| + SkPMColor dstC = SkPixel16ToPixel32(dst[i]);
|
| + SkPMColor C = proc(src[i], dstC);
|
| + if (0xFF != a) {
|
| + C = SkFourByteInterp(C, dstC, a);
|
| + }
|
| + dst[i] = SkPixel32ToPixel16_ToU16(C);
|
| + }
|
| + }
|
| + }
|
| +}
|
| +
|
| #ifdef SK_DEVELOPER
|
| void SkNEONProcCoeffXfermode::toString(SkString* str) const {
|
| this->INHERITED::toString(str);
|
|
|