OLD | NEW |
(Empty) | |
| 1 |
| 2 #include "SkBlitMask.h" |
| 3 #include "SkColor_opts_neon.h" |
| 4 |
| 5 static void D32_A8_Black_neon(void* SK_RESTRICT dst, size_t dstRB, |
| 6 const void* SK_RESTRICT maskPtr, size_t maskRB, |
| 7 SkColor, int width, int height) { |
| 8 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
| 9 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
| 10 |
| 11 maskRB -= width; |
| 12 dstRB -= (width << 2); |
| 13 do { |
| 14 int w = width; |
| 15 while (w >= 8) { |
| 16 uint8x8_t vmask = vld1_u8(mask); |
| 17 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
| 18 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); |
| 19 |
| 20 vdevice = SkAlphaMulQ_neon8(vdevice, vscale); |
| 21 vdevice.val[NEON_A] += vmask; |
| 22 |
| 23 vst4_u8((uint8_t*)device, vdevice); |
| 24 |
| 25 mask += 8; |
| 26 device += 8; |
| 27 w -= 8; |
| 28 } |
| 29 while (w-- > 0) { |
| 30 unsigned aa = *mask++; |
| 31 *device = (aa << SK_A32_SHIFT) |
| 32 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
| 33 device += 1; |
| 34 }; |
| 35 device = (uint32_t*)((char*)device + dstRB); |
| 36 mask += maskRB; |
| 37 } while (--height != 0); |
| 38 } |
| 39 |
| 40 template <bool isColor> |
| 41 static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, |
| 42 const void* SK_RESTRICT maskPtr, size_t mas
kRB, |
| 43 SkColor color, int width, int height) { |
| 44 SkPMColor pmc = SkPreMultiplyColor(color); |
| 45 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; |
| 46 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; |
| 47 uint8x8x4_t vpmc; |
| 48 |
| 49 maskRB -= width; |
| 50 dstRB -= (width << 2); |
| 51 |
| 52 if (width >= 8) { |
| 53 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); |
| 54 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); |
| 55 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); |
| 56 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); |
| 57 } |
| 58 do { |
| 59 int w = width; |
| 60 while (w >= 8) { |
| 61 uint8x8_t vmask = vld1_u8(mask); |
| 62 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); |
| 63 if (isColor) { |
| 64 vscale = vsubw_u8(vdupq_n_u16(256), |
| 65 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); |
| 66 } else { |
| 67 vscale = vsubw_u8(vdupq_n_u16(256), vmask); |
| 68 } |
| 69 uint8x8x4_t vdev = vld4_u8((uint8_t*)device); |
| 70 |
| 71 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256) |
| 72 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); |
| 73 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256) |
| 74 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); |
| 75 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256) |
| 76 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); |
| 77 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256) |
| 78 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); |
| 79 |
| 80 vst4_u8((uint8_t*)device, vdev); |
| 81 |
| 82 mask += 8; |
| 83 device += 8; |
| 84 w -= 8; |
| 85 } |
| 86 |
| 87 while (w--) { |
| 88 unsigned aa = *mask++; |
| 89 if (isColor) { |
| 90 *device = SkBlendARGB32(pmc, *device, aa); |
| 91 } else { |
| 92 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) |
| 93 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); |
| 94 } |
| 95 device += 1; |
| 96 }; |
| 97 |
| 98 device = (uint32_t*)((char*)device + dstRB); |
| 99 mask += maskRB; |
| 100 |
| 101 } while (--height != 0); |
| 102 } |
| 103 |
| 104 static void D32_A8_Opaque_neon(void* SK_RESTRICT dst, size_t dstRB, |
| 105 const void* SK_RESTRICT maskPtr, size_t maskRB, |
| 106 SkColor color, int width, int height) { |
| 107 D32_A8_Opaque_Color_neon<false>(dst, dstRB, maskPtr, maskRB, color, width, h
eight); |
| 108 } |
| 109 |
| 110 static void D32_A8_Color_neon(void* SK_RESTRICT dst, size_t dstRB, |
| 111 const void* SK_RESTRICT maskPtr, size_t maskRB, |
| 112 SkColor color, int width, int height) { |
| 113 D32_A8_Opaque_Color_neon<true>(dst, dstRB, maskPtr, maskRB, color, width, he
ight); |
| 114 } |
| 115 |
| 116 SkBlitMask::ColorProc D32_A8_Factory_neon(SkColor color) { |
| 117 if (SK_ColorBLACK == color) { |
| 118 return D32_A8_Black_neon; |
| 119 } else if (0xFF == SkColorGetA(color)) { |
| 120 return D32_A8_Opaque_neon; |
| 121 } else { |
| 122 return D32_A8_Color_neon; |
| 123 } |
| 124 } |
| 125 |
| 126 //////////////////////////////////////////////////////////////////////////////// |
| 127 |
| 128 void SkBlitLCD16OpaqueRow_neon(SkPMColor dst[], const uint16_t src[], |
| 129 SkColor color, int width, |
| 130 SkPMColor opaqueDst) { |
| 131 int colR = SkColorGetR(color); |
| 132 int colG = SkColorGetG(color); |
| 133 int colB = SkColorGetB(color); |
| 134 |
| 135 uint8x8_t vcolR, vcolG, vcolB; |
| 136 uint8x8_t vopqDstA, vopqDstR, vopqDstG, vopqDstB; |
| 137 |
| 138 if (width >= 8) { |
| 139 vcolR = vdup_n_u8(colR); |
| 140 vcolG = vdup_n_u8(colG); |
| 141 vcolB = vdup_n_u8(colB); |
| 142 vopqDstA = vdup_n_u8(SkGetPackedA32(opaqueDst)); |
| 143 vopqDstR = vdup_n_u8(SkGetPackedR32(opaqueDst)); |
| 144 vopqDstG = vdup_n_u8(SkGetPackedG32(opaqueDst)); |
| 145 vopqDstB = vdup_n_u8(SkGetPackedB32(opaqueDst)); |
| 146 } |
| 147 |
| 148 while (width >= 8) { |
| 149 uint8x8x4_t vdst; |
| 150 uint16x8_t vmask; |
| 151 uint16x8_t vmaskR, vmaskG, vmaskB; |
| 152 uint8x8_t vsel_trans, vsel_opq; |
| 153 |
| 154 vdst = vld4_u8((uint8_t*)dst); |
| 155 vmask = vld1q_u16(src); |
| 156 |
| 157 // Prepare compare masks |
| 158 vsel_trans = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0))); |
| 159 vsel_opq = vmovn_u16(vceqq_u16(vmask, vdupq_n_u16(0xFFFF))); |
| 160 |
| 161 // Get all the color masks on 5 bits |
| 162 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); |
| 163 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), |
| 164 SK_B16_BITS + SK_R16_BITS + 1); |
| 165 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); |
| 166 |
| 167 // Upscale to 0..32 |
| 168 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); |
| 169 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); |
| 170 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); |
| 171 |
| 172 vdst.val[NEON_A] = vbsl_u8(vsel_trans, vdst.val[NEON_A], vdup_n_u8(0xFF)
); |
| 173 vdst.val[NEON_A] = vbsl_u8(vsel_opq, vopqDstA, vdst.val[NEON_A]); |
| 174 |
| 175 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); |
| 176 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); |
| 177 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); |
| 178 |
| 179 vdst.val[NEON_R] = vbsl_u8(vsel_opq, vopqDstR, vdst.val[NEON_R]); |
| 180 vdst.val[NEON_G] = vbsl_u8(vsel_opq, vopqDstG, vdst.val[NEON_G]); |
| 181 vdst.val[NEON_B] = vbsl_u8(vsel_opq, vopqDstB, vdst.val[NEON_B]); |
| 182 |
| 183 vst4_u8((uint8_t*)dst, vdst); |
| 184 |
| 185 dst += 8; |
| 186 src += 8; |
| 187 width -= 8; |
| 188 } |
| 189 |
| 190 // Leftovers |
| 191 for (int i = 0; i < width; i++) { |
| 192 dst[i] = SkBlendLCD16Opaque(colR, colG, colB, dst[i], src[i], |
| 193 opaqueDst); |
| 194 } |
| 195 } |
| 196 |
| 197 void SkBlitLCD16Row_neon(SkPMColor dst[], const uint16_t src[], |
| 198 SkColor color, int width, SkPMColor) { |
| 199 int colA = SkColorGetA(color); |
| 200 int colR = SkColorGetR(color); |
| 201 int colG = SkColorGetG(color); |
| 202 int colB = SkColorGetB(color); |
| 203 |
| 204 colA = SkAlpha255To256(colA); |
| 205 |
| 206 uint8x8_t vcolR, vcolG, vcolB; |
| 207 uint16x8_t vcolA; |
| 208 |
| 209 if (width >= 8) { |
| 210 vcolA = vdupq_n_u16(colA); |
| 211 vcolR = vdup_n_u8(colR); |
| 212 vcolG = vdup_n_u8(colG); |
| 213 vcolB = vdup_n_u8(colB); |
| 214 } |
| 215 |
| 216 while (width >= 8) { |
| 217 uint8x8x4_t vdst; |
| 218 uint16x8_t vmask; |
| 219 uint16x8_t vmaskR, vmaskG, vmaskB; |
| 220 |
| 221 vdst = vld4_u8((uint8_t*)dst); |
| 222 vmask = vld1q_u16(src); |
| 223 |
| 224 // Get all the color masks on 5 bits |
| 225 vmaskR = vshrq_n_u16(vmask, SK_R16_SHIFT); |
| 226 vmaskG = vshrq_n_u16(vshlq_n_u16(vmask, SK_R16_BITS), |
| 227 SK_B16_BITS + SK_R16_BITS + 1); |
| 228 vmaskB = vmask & vdupq_n_u16(SK_B16_MASK); |
| 229 |
| 230 // Upscale to 0..32 |
| 231 vmaskR = vmaskR + vshrq_n_u16(vmaskR, 4); |
| 232 vmaskG = vmaskG + vshrq_n_u16(vmaskG, 4); |
| 233 vmaskB = vmaskB + vshrq_n_u16(vmaskB, 4); |
| 234 |
| 235 vmaskR = vshrq_n_u16(vmaskR * vcolA, 8); |
| 236 vmaskG = vshrq_n_u16(vmaskG * vcolA, 8); |
| 237 vmaskB = vshrq_n_u16(vmaskB * vcolA, 8); |
| 238 |
| 239 vdst.val[NEON_A] = vdup_n_u8(0xFF); |
| 240 vdst.val[NEON_R] = SkBlend32_neon8(vcolR, vdst.val[NEON_R], vmaskR); |
| 241 vdst.val[NEON_G] = SkBlend32_neon8(vcolG, vdst.val[NEON_G], vmaskG); |
| 242 vdst.val[NEON_B] = SkBlend32_neon8(vcolB, vdst.val[NEON_B], vmaskB); |
| 243 |
| 244 vst4_u8((uint8_t*)dst, vdst); |
| 245 |
| 246 dst += 8; |
| 247 src += 8; |
| 248 width -= 8; |
| 249 } |
| 250 |
| 251 for (int i = 0; i < width; i++) { |
| 252 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); |
| 253 } |
| 254 } |
| 255 |
OLD | NEW |