Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkBlitMask_opts_DEFINED | 8 #ifndef SkBlitMask_opts_DEFINED |
| 9 #define SkBlitMask_opts_DEFINED | 9 #define SkBlitMask_opts_DEFINED |
| 10 | 10 |
| 11 #include "Sk4px.h" | 11 #include "Sk4px.h" |
| 12 #include "SkPx.h" | |
| 12 | 13 |
| 13 namespace SK_OPTS_NS { | 14 namespace SK_OPTS_NS { |
| 14 | 15 |
| 15 #if defined(SK_ARM_HAS_NEON) | 16 template <typename Fn> |
| 16 // The Sk4px versions below will work fine with NEON, but we have had many i ndications | 17 static void blit_mask_d32_a8(const Fn& fn, SkPMColor* dst, size_t dstRB, |
| 17 // that it doesn't perform as well as this NEON-specific code. TODO(mtklein ): why? | 18 const SkAlpha* mask, size_t maskRB, |
| 18 #include "SkColor_opts_neon.h" | 19 int w, int h) { |
| 20 while (h --> 0) { | |
| 21 int n = w, N = SkPx::N; | |
| 22 while (n >= N) { | |
| 23 fn(SkPx::LoadN(dst), SkPx::Alpha::LoadN(mask)).storeN(dst); | |
| 24 dst += N; mask += N; n -= N; | |
| 25 } | |
| 26 if (n > 0) { | |
| 27 fn(SkPx::Load(n, dst), SkPx::Alpha::Load(n, mask)).store(n, dst); | |
| 28 dst += n; mask += n; | |
| 29 } | |
| 30 dst += dstRB / sizeof(*dst) - w; | |
| 31 mask += maskRB / sizeof(*mask) - w; | |
| 32 } | |
| 33 } | |
| 19 | 34 |
| 20 template <bool isColor> | 35 static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, |
| 21 static void D32_A8_Opaque_Color_neon(void* SK_RESTRICT dst, size_t dstRB, | 36 const SkAlpha* mask, size_t maskRB, |
| 22 const void* SK_RESTRICT maskPtr, size_t maskRB, | 37 SkColor color, int w, int h) { |
| 23 SkColor color, int width, int height) { | 38 auto s = SkPx::Dup(SkPreMultiplyColor(color)); |
| 24 SkPMColor pmc = SkPreMultiplyColor(color); | |
| 25 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; | |
| 26 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; | |
| 27 uint8x8x4_t vpmc; | |
| 28 | 39 |
| 29 maskRB -= width; | 40 if (color == SK_ColorBLACK) { |
|
msarett
2015/09/08 15:33:12
You use different functions for different special
mtklein
2015/09/08 15:45:26
Yep. It is clever. Sadly, not my cleverness, but
| |
| 30 dstRB -= (width << 2); | 41 auto fn = [](const SkPx& d, const SkPx::Alpha& aa) { |
| 31 | 42 // = (s + d(1-sa))aa + d(1-aa) |
| 32 if (width >= 8) { | 43 // = s*aa + d(1-sa*aa) |
| 33 vpmc.val[NEON_A] = vdup_n_u8(SkGetPackedA32(pmc)); | 44 // ~~~> |
| 34 vpmc.val[NEON_R] = vdup_n_u8(SkGetPackedR32(pmc)); | 45 // a = 1*aa + d(1-1*aa) = aa + d(1-aa) |
| 35 vpmc.val[NEON_G] = vdup_n_u8(SkGetPackedG32(pmc)); | 46 // c = 0*aa + d(1-1*aa) = d(1-aa) |
| 36 vpmc.val[NEON_B] = vdup_n_u8(SkGetPackedB32(pmc)); | 47 return d.approxMulDiv255(aa.inv()).addAlpha(aa); |
| 37 } | |
| 38 do { | |
| 39 int w = width; | |
| 40 while (w >= 8) { | |
| 41 uint8x8_t vmask = vld1_u8(mask); | |
| 42 uint16x8_t vscale, vmask256 = SkAlpha255To256_neon8(vmask); | |
| 43 if (isColor) { | |
| 44 vscale = vsubw_u8(vdupq_n_u16(256), | |
| 45 SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256)); | |
| 46 } else { | |
| 47 vscale = vsubw_u8(vdupq_n_u16(256), vmask); | |
| 48 } | |
| 49 uint8x8x4_t vdev = vld4_u8((uint8_t*)device); | |
| 50 | |
| 51 vdev.val[NEON_A] = SkAlphaMul_neon8(vpmc.val[NEON_A], vmask256 ) | |
| 52 + SkAlphaMul_neon8(vdev.val[NEON_A], vscale); | |
| 53 vdev.val[NEON_R] = SkAlphaMul_neon8(vpmc.val[NEON_R], vmask256 ) | |
| 54 + SkAlphaMul_neon8(vdev.val[NEON_R], vscale); | |
| 55 vdev.val[NEON_G] = SkAlphaMul_neon8(vpmc.val[NEON_G], vmask256 ) | |
| 56 + SkAlphaMul_neon8(vdev.val[NEON_G], vscale); | |
| 57 vdev.val[NEON_B] = SkAlphaMul_neon8(vpmc.val[NEON_B], vmask256 ) | |
| 58 + SkAlphaMul_neon8(vdev.val[NEON_B], vscale); | |
| 59 | |
| 60 vst4_u8((uint8_t*)device, vdev); | |
| 61 | |
| 62 mask += 8; | |
| 63 device += 8; | |
| 64 w -= 8; | |
| 65 } | |
| 66 | |
| 67 while (w--) { | |
| 68 unsigned aa = *mask++; | |
| 69 if (isColor) { | |
| 70 *device = SkBlendARGB32(pmc, *device, aa); | |
| 71 } else { | |
| 72 *device = SkAlphaMulQ(pmc, SkAlpha255To256(aa)) | |
| 73 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); | |
| 74 } | |
| 75 device += 1; | |
| 76 }; | |
| 77 | |
| 78 device = (uint32_t*)((char*)device + dstRB); | |
| 79 mask += maskRB; | |
| 80 | |
| 81 } while (--height != 0); | |
| 82 } | |
| 83 | |
| 84 static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, | |
| 85 const SkAlpha* mask, size_t maskRB, | |
| 86 SkColor color, int w, int h) { | |
| 87 D32_A8_Opaque_Color_neon<true>(dst, dstRB, mask, maskRB, color, w, h); | |
| 88 } | |
| 89 | |
| 90 // As above, but made slightly simpler by requiring that color is opaque. | |
| 91 static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, | |
| 92 const SkAlpha* mask, size_t maskRB, | |
| 93 SkColor color, int w, int h) { | |
| 94 D32_A8_Opaque_Color_neon<false>(dst, dstRB, mask, maskRB, color, w, h); | |
| 95 } | |
| 96 | |
| 97 // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and ev en simpler case. | |
| 98 static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, | |
| 99 const SkAlpha* maskPtr, size_t maskRB, | |
| 100 int width, int height) { | |
| 101 SkPMColor* SK_RESTRICT device = (SkPMColor*)dst; | |
| 102 const uint8_t* SK_RESTRICT mask = (const uint8_t*)maskPtr; | |
| 103 | |
| 104 maskRB -= width; | |
| 105 dstRB -= (width << 2); | |
| 106 do { | |
| 107 int w = width; | |
| 108 while (w >= 8) { | |
| 109 uint8x8_t vmask = vld1_u8(mask); | |
| 110 uint16x8_t vscale = vsubw_u8(vdupq_n_u16(256), vmask); | |
| 111 uint8x8x4_t vdevice = vld4_u8((uint8_t*)device); | |
| 112 | |
| 113 vdevice = SkAlphaMulQ_neon8(vdevice, vscale); | |
| 114 vdevice.val[NEON_A] += vmask; | |
| 115 | |
| 116 vst4_u8((uint8_t*)device, vdevice); | |
| 117 | |
| 118 mask += 8; | |
| 119 device += 8; | |
| 120 w -= 8; | |
| 121 } | |
| 122 while (w-- > 0) { | |
| 123 unsigned aa = *mask++; | |
| 124 *device = (aa << SK_A32_SHIFT) | |
| 125 + SkAlphaMulQ(*device, SkAlpha255To256(255 - aa)); | |
| 126 device += 1; | |
| 127 }; | |
| 128 device = (uint32_t*)((char*)device + dstRB); | |
| 129 mask += maskRB; | |
| 130 } while (--height != 0); | |
| 131 } | |
| 132 | |
| 133 #else | |
| 134 static void blit_mask_d32_a8_general(SkPMColor* dst, size_t dstRB, | |
| 135 const SkAlpha* mask, size_t maskRB, | |
| 136 SkColor color, int w, int h) { | |
| 137 auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); | |
| 138 auto fn = [&](const Sk4px& d, const Sk4px& aa) { | |
| 139 // = (s + d(1-sa))aa + d(1-aa) | |
| 140 // = s*aa + d(1-sa*aa) | |
| 141 auto left = s.approxMulDiv255(aa), | |
| 142 right = d.approxMulDiv255(left.alphas().inv()); | |
| 143 return left + right; // This does not overflow (exhaustively checke d). | |
| 144 }; | 48 }; |
| 145 while (h --> 0) { | 49 blit_mask_d32_a8(fn, dst, dstRB, mask, maskRB, w, h); |
| 146 Sk4px::MapDstAlpha(w, dst, mask, fn); | 50 } else if (SkColorGetA(color) == 0xFF) { |
| 147 dst += dstRB / sizeof(*dst); | 51 auto fn = [&](const SkPx& d, const SkPx::Alpha& aa) { |
| 148 mask += maskRB / sizeof(*mask); | |
| 149 } | |
| 150 } | |
| 151 | |
| 152 // As above, but made slightly simpler by requiring that color is opaque. | |
| 153 static void blit_mask_d32_a8_opaque(SkPMColor* dst, size_t dstRB, | |
| 154 const SkAlpha* mask, size_t maskRB, | |
| 155 SkColor color, int w, int h) { | |
| 156 SkASSERT(SkColorGetA(color) == 0xFF); | |
| 157 auto s = Sk4px::DupPMColor(SkPreMultiplyColor(color)); | |
| 158 auto fn = [&](const Sk4px& d, const Sk4px& aa) { | |
| 159 // = (s + d(1-sa))aa + d(1-aa) | 52 // = (s + d(1-sa))aa + d(1-aa) |
| 160 // = s*aa + d(1-sa*aa) | 53 // = s*aa + d(1-sa*aa) |
| 161 // ~~~> | 54 // ~~~> |
| 162 // = s*aa + d(1-aa) | 55 // = s*aa + d(1-aa) |
| 163 return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); | 56 return s.approxMulDiv255(aa) + d.approxMulDiv255(aa.inv()); |
| 164 }; | 57 }; |
| 165 while (h --> 0) { | 58 blit_mask_d32_a8(fn, dst, dstRB, mask, maskRB, w, h); |
| 166 Sk4px::MapDstAlpha(w, dst, mask, fn); | 59 } else { |
| 167 dst += dstRB / sizeof(*dst); | 60 auto fn = [&](const SkPx& d, const SkPx::Alpha& aa) { |
| 168 mask += maskRB / sizeof(*mask); | 61 // = (s + d(1-sa))aa + d(1-aa) |
| 169 } | 62 // = s*aa + d(1-sa*aa) |
| 170 } | 63 auto left = s.approxMulDiv255(aa), |
| 171 | 64 right = d.approxMulDiv255(left.alpha().inv()); |
| 172 // Same as _opaque, but assumes color == SK_ColorBLACK, a very common and ev en simpler case. | 65 return left + right; // This does not overflow (exhaustively checke d). |
| 173 static void blit_mask_d32_a8_black(SkPMColor* dst, size_t dstRB, | |
| 174 const SkAlpha* mask, size_t maskRB, | |
| 175 int w, int h) { | |
| 176 auto fn = [](const Sk4px& d, const Sk4px& aa) { | |
| 177 // = (s + d(1-sa))aa + d(1-aa) | |
| 178 // = s*aa + d(1-sa*aa) | |
| 179 // ~~~> | |
| 180 // a = 1*aa + d(1-1*aa) = aa + d(1-aa) | |
| 181 // c = 0*aa + d(1-1*aa) = d(1-aa) | |
| 182 return aa.zeroColors() + d.approxMulDiv255(aa.inv()); | |
| 183 }; | 66 }; |
| 184 while (h --> 0) { | 67 blit_mask_d32_a8(fn, dst, dstRB, mask, maskRB, w, h); |
| 185 Sk4px::MapDstAlpha(w, dst, mask, fn); | |
| 186 dst += dstRB / sizeof(*dst); | |
| 187 mask += maskRB / sizeof(*mask); | |
| 188 } | |
| 189 } | |
| 190 #endif | |
| 191 | |
| 192 static void blit_mask_d32_a8(SkPMColor* dst, size_t dstRB, | |
| 193 const SkAlpha* mask, size_t maskRB, | |
| 194 SkColor color, int w, int h) { | |
| 195 if (color == SK_ColorBLACK) { | |
| 196 blit_mask_d32_a8_black(dst, dstRB, mask, maskRB, w, h); | |
| 197 } else if (SkColorGetA(color) == 0xFF) { | |
| 198 blit_mask_d32_a8_opaque(dst, dstRB, mask, maskRB, color, w, h); | |
| 199 } else { | |
| 200 blit_mask_d32_a8_general(dst, dstRB, mask, maskRB, color, w, h); | |
| 201 } | 68 } |
| 202 } | 69 } |
| 203 | 70 |
| 204 } // SK_OPTS_NS | 71 } // SK_OPTS_NS |
| 205 | 72 |
| 206 #endif//SkBlitMask_opts_DEFINED | 73 #endif//SkBlitMask_opts_DEFINED |
| OLD | NEW |