OLD | NEW |
1 #include "SkXfermode.h" | 1 #include "SkXfermode.h" |
2 #include "SkXfermode_proccoeff.h" | 2 #include "SkXfermode_proccoeff.h" |
3 #include "SkColorPriv.h" | 3 #include "SkColorPriv.h" |
4 | 4 |
5 #include <arm_neon.h> | 5 #include <arm_neon.h> |
6 #include "SkColor_opts_neon.h" | 6 #include "SkColor_opts_neon.h" |
7 #include "SkXfermode_opts_arm_neon.h" | 7 #include "SkXfermode_opts_arm_neon.h" |
8 | 8 |
9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) | 9 #define SkAlphaMulAlpha(a, b) SkMulDiv255Round(a, b) |
10 | 10 |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 | 86 |
87 // Insert where false and previous test false | 87 // Insert where false and previous test false |
88 cmp8 = cmp8 | cmp8_1; | 88 cmp8 = cmp8 | cmp8_1; |
89 ret = vbsl_u8(cmp8, ret, div); | 89 ret = vbsl_u8(cmp8, ret, div); |
90 | 90 |
91 // Return the final combination | 91 // Return the final combination |
92 return ret; | 92 return ret; |
93 } | 93 } |
94 | 94 |
95 //////////////////////////////////////////////////////////////////////////////// | 95 //////////////////////////////////////////////////////////////////////////////// |
| 96 // 1 pixel modeprocs |
| 97 //////////////////////////////////////////////////////////////////////////////// |
| 98 |
| 99 // kSrcATop_Mode, //!< [Da, Sc * Da + (1 - Sa) * Dc] |
| 100 SkPMColor srcatop_modeproc_neon(SkPMColor src, SkPMColor dst) { |
| 101 unsigned sa = SkGetPackedA32(src); |
| 102 unsigned da = SkGetPackedA32(dst); |
| 103 unsigned isa = 255 - sa; |
| 104 |
| 105 uint8x8_t vda, visa, vsrc, vdst; |
| 106 |
| 107 vda = vdup_n_u8(da); |
| 108 visa = vdup_n_u8(isa); |
| 109 |
| 110 uint16x8_t vsrc_wide, vdst_wide; |
| 111 vsrc_wide = vmull_u8(vda, vreinterpret_u8_u32(vdup_n_u32(src))); |
| 112 vdst_wide = vmull_u8(visa, vreinterpret_u8_u32(vdup_n_u32(dst))); |
| 113 |
| 114 vsrc_wide += vdupq_n_u16(128); |
| 115 vsrc_wide += vshrq_n_u16(vsrc_wide, 8); |
| 116 |
| 117 vdst_wide += vdupq_n_u16(128); |
| 118 vdst_wide += vshrq_n_u16(vdst_wide, 8); |
| 119 |
| 120 vsrc = vshrn_n_u16(vsrc_wide, 8); |
| 121 vdst = vshrn_n_u16(vdst_wide, 8); |
| 122 |
| 123 vsrc += vdst; |
| 124 vsrc = vset_lane_u8(da, vsrc, 3); |
| 125 |
| 126 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
| 127 } |
| 128 |
| 129 // kDstATop_Mode, //!< [Sa, Sa * Dc + Sc * (1 - Da)] |
| 130 SkPMColor dstatop_modeproc_neon(SkPMColor src, SkPMColor dst) { |
| 131 unsigned sa = SkGetPackedA32(src); |
| 132 unsigned da = SkGetPackedA32(dst); |
| 133 unsigned ida = 255 - da; |
| 134 |
| 135 uint8x8_t vsa, vida, vsrc, vdst; |
| 136 |
| 137 vsa = vdup_n_u8(sa); |
| 138 vida = vdup_n_u8(ida); |
| 139 |
| 140 uint16x8_t vsrc_wide, vdst_wide; |
| 141 vsrc_wide = vmull_u8(vida, vreinterpret_u8_u32(vdup_n_u32(src))); |
| 142 vdst_wide = vmull_u8(vsa, vreinterpret_u8_u32(vdup_n_u32(dst))); |
| 143 |
| 144 vsrc_wide += vdupq_n_u16(128); |
| 145 vsrc_wide += vshrq_n_u16(vsrc_wide, 8); |
| 146 |
| 147 vdst_wide += vdupq_n_u16(128); |
| 148 vdst_wide += vshrq_n_u16(vdst_wide, 8); |
| 149 |
| 150 vsrc = vshrn_n_u16(vsrc_wide, 8); |
| 151 vdst = vshrn_n_u16(vdst_wide, 8); |
| 152 |
| 153 vsrc += vdst; |
| 154 vsrc = vset_lane_u8(sa, vsrc, 3); |
| 155 |
| 156 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
| 157 } |
| 158 |
| 159 // kXor_Mode [Sa + Da - 2 * Sa * Da, Sc * (1 - Da) + (1 - Sa) * Dc] |
| 160 SkPMColor xor_modeproc_neon(SkPMColor src, SkPMColor dst) { |
| 161 unsigned sa = SkGetPackedA32(src); |
| 162 unsigned da = SkGetPackedA32(dst); |
| 163 unsigned ret_alpha = sa + da - (SkAlphaMulAlpha(sa, da) << 1); |
| 164 unsigned isa = 255 - sa; |
| 165 unsigned ida = 255 - da; |
| 166 |
| 167 uint8x8_t vsrc, vdst, visa, vida; |
| 168 uint16x8_t vsrc_wide, vdst_wide; |
| 169 |
| 170 visa = vdup_n_u8(isa); |
| 171 vida = vdup_n_u8(ida); |
| 172 vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); |
| 173 vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); |
| 174 |
| 175 vsrc_wide = vmull_u8(vsrc, vida); |
| 176 vdst_wide = vmull_u8(vdst, visa); |
| 177 |
| 178 vsrc_wide += vdupq_n_u16(128); |
| 179 vsrc_wide += vshrq_n_u16(vsrc_wide, 8); |
| 180 |
| 181 vdst_wide += vdupq_n_u16(128); |
| 182 vdst_wide += vshrq_n_u16(vdst_wide, 8); |
| 183 |
| 184 vsrc = vshrn_n_u16(vsrc_wide, 8); |
| 185 vdst = vshrn_n_u16(vdst_wide, 8); |
| 186 |
| 187 vsrc += vdst; |
| 188 |
| 189 vsrc = vset_lane_u8(ret_alpha, vsrc, 3); |
| 190 |
| 191 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
| 192 } |
| 193 |
| 194 // kPlus_Mode |
| 195 SkPMColor plus_modeproc_neon(SkPMColor src, SkPMColor dst) { |
| 196 uint8x8_t vsrc, vdst; |
| 197 vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); |
| 198 vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); |
| 199 vsrc = vqadd_u8(vsrc, vdst); |
| 200 |
| 201 return vget_lane_u32(vreinterpret_u32_u8(vsrc), 0); |
| 202 } |
| 203 |
| 204 // kModulate_Mode |
| 205 SkPMColor modulate_modeproc_neon(SkPMColor src, SkPMColor dst) { |
| 206 uint8x8_t vsrc, vdst, vres; |
| 207 uint16x8_t vres_wide; |
| 208 |
| 209 vsrc = vreinterpret_u8_u32(vdup_n_u32(src)); |
| 210 vdst = vreinterpret_u8_u32(vdup_n_u32(dst)); |
| 211 |
| 212 vres_wide = vmull_u8(vsrc, vdst); |
| 213 |
| 214 vres_wide += vdupq_n_u16(128); |
| 215 vres_wide += vshrq_n_u16(vres_wide, 8); |
| 216 |
| 217 vres = vshrn_n_u16(vres_wide, 8); |
| 218 |
| 219 return vget_lane_u32(vreinterpret_u32_u8(vres), 0); |
| 220 } |
| 221 |
| 222 //////////////////////////////////////////////////////////////////////////////// |
96 // 8 pixels modeprocs | 223 // 8 pixels modeprocs |
97 //////////////////////////////////////////////////////////////////////////////// | 224 //////////////////////////////////////////////////////////////////////////////// |
98 | 225 |
99 uint8x8x4_t dstover_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { | 226 uint8x8x4_t dstover_modeproc_neon8(uint8x8x4_t src, uint8x8x4_t dst) { |
100 uint8x8x4_t ret; | 227 uint8x8x4_t ret; |
101 uint16x8_t src_scale; | 228 uint16x8_t src_scale; |
102 | 229 |
103 src_scale = vsubw_u8(vdupq_n_u16(256), dst.val[NEON_A]); | 230 src_scale = vsubw_u8(vdupq_n_u16(256), dst.val[NEON_A]); |
104 | 231 |
105 ret.val[NEON_A] = dst.val[NEON_A] + SkAlphaMul_neon8(src.val[NEON_A], src_sc
ale); | 232 ret.val[NEON_A] = dst.val[NEON_A] + SkAlphaMul_neon8(src.val[NEON_A], src_sc
ale); |
(...skipping 652 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
758 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, | 885 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_neon(const ProcCoeff& rec, |
759 SkXfermode::Mode mode)
{ | 886 SkXfermode::Mode mode)
{ |
760 | 887 |
761 void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]); | 888 void* procSIMD = reinterpret_cast<void*>(gNEONXfermodeProcs[mode]); |
762 | 889 |
763 if (procSIMD != NULL) { | 890 if (procSIMD != NULL) { |
764 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); | 891 return SkNEW_ARGS(SkNEONProcCoeffXfermode, (rec, mode, procSIMD)); |
765 } | 892 } |
766 return NULL; | 893 return NULL; |
767 } | 894 } |
OLD | NEW |