OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBlitMask.h" | 8 #include "SkBlitMask.h" |
9 #include "SkColor_opts_neon.h" | 9 #include "SkColor_opts_neon.h" |
10 | 10 |
(...skipping 240 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
251 | 251 |
252 dst += 8; | 252 dst += 8; |
253 src += 8; | 253 src += 8; |
254 width -= 8; | 254 width -= 8; |
255 } | 255 } |
256 | 256 |
257 for (int i = 0; i < width; i++) { | 257 for (int i = 0; i < width; i++) { |
258 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); | 258 dst[i] = SkBlendLCD16(colA, colR, colG, colB, dst[i], src[i]); |
259 } | 259 } |
260 } | 260 } |
| 261 |
| 262 #define LOAD_LANE_16(reg, n) \ |
| 263 reg = vld1q_lane_u16(device, reg, n); \ |
| 264 device = (uint16_t*)((char*)device + deviceRB); |
| 265 |
| 266 #define STORE_LANE_16(reg, n) \ |
| 267 vst1_lane_u16(dst, reg, n); \ |
| 268 dst = (uint16_t*)((char*)dst + deviceRB); |
| 269 |
| 270 void SkRGB16BlitterBlitV_neon(uint16_t* device, |
| 271 int height, |
| 272 size_t deviceRB, |
| 273 unsigned scale, |
| 274 uint32_t src32) { |
| 275 if (height >= 8) |
| 276 { |
| 277 uint16_t* dst = device; |
| 278 |
| 279 // prepare constants |
| 280 uint16x8_t vdev = vdupq_n_u16(0); |
| 281 uint16x8_t vmaskq_g16 = vdupq_n_u16(SK_G16_MASK_IN_PLACE); |
| 282 uint16x8_t vmaskq_ng16 = vdupq_n_u16(~SK_G16_MASK_IN_PLACE); |
| 283 uint32x4_t vsrc32 = vdupq_n_u32(src32); |
| 284 uint32x4_t vscale5 = vdupq_n_u32((uint32_t)scale); |
| 285 |
| 286 while (height >= 8){ |
| 287 LOAD_LANE_16(vdev, 0) |
| 288 LOAD_LANE_16(vdev, 1) |
| 289 LOAD_LANE_16(vdev, 2) |
| 290 LOAD_LANE_16(vdev, 3) |
| 291 LOAD_LANE_16(vdev, 4) |
| 292 LOAD_LANE_16(vdev, 5) |
| 293 LOAD_LANE_16(vdev, 6) |
| 294 LOAD_LANE_16(vdev, 7) |
| 295 |
| 296 // Expand_rgb_16 |
| 297 uint16x8x2_t vdst = vzipq_u16((vdev & vmaskq_ng16), (vdev & vmaskq_g
16)); |
| 298 uint32x4_t vdst32_lo = vmulq_u32(vreinterpretq_u32_u16(vdst.val[0]),
vscale5); |
| 299 uint32x4_t vdst32_hi = vmulq_u32(vreinterpretq_u32_u16(vdst.val[1]),
vscale5); |
| 300 |
| 301 // Compact_rgb_16 |
| 302 vdst32_lo = vaddq_u32(vdst32_lo, vsrc32); |
| 303 vdst32_hi = vaddq_u32(vdst32_hi, vsrc32); |
| 304 vdst32_lo = vshrq_n_u32(vdst32_lo, 5); |
| 305 vdst32_hi = vshrq_n_u32(vdst32_hi, 5); |
| 306 |
| 307 uint16x4_t vtmp_lo = vmovn_u32(vdst32_lo) & vget_low_u16(vmaskq_ng16
); |
| 308 uint16x4_t vtmp_hi = vshrn_n_u32(vdst32_lo, 16) & vget_low_u16(vmask
q_g16); |
| 309 uint16x4_t vdst16_lo = vorr_u16(vtmp_lo, vtmp_hi); |
| 310 vtmp_lo = vmovn_u32(vdst32_hi) & vget_low_u16(vmaskq_ng16); |
| 311 vtmp_hi = vshrn_n_u32(vdst32_hi, 16) & vget_low_u16(vmaskq_g16); |
| 312 uint16x4_t vdst16_hi = vorr_u16(vtmp_lo, vtmp_hi); |
| 313 |
| 314 STORE_LANE_16(vdst16_lo, 0) |
| 315 STORE_LANE_16(vdst16_lo, 1) |
| 316 STORE_LANE_16(vdst16_lo, 2) |
| 317 STORE_LANE_16(vdst16_lo, 3) |
| 318 STORE_LANE_16(vdst16_hi, 0) |
| 319 STORE_LANE_16(vdst16_hi, 1) |
| 320 STORE_LANE_16(vdst16_hi, 2) |
| 321 STORE_LANE_16(vdst16_hi, 3) |
| 322 height -= 8; |
| 323 } |
| 324 } |
| 325 while (height != 0){ |
| 326 uint32_t dst32 = SkExpand_rgb_16(*device) * scale; |
| 327 *device = SkCompact_rgb_16((src32 + dst32) >> 5); |
| 328 device = (uint16_t*)((char*)device + deviceRB); |
| 329 height--; |
| 330 } |
| 331 } |
| 332 |
| 333 #undef LOAD_LANE_16 |
| 334 #undef STORE_LANE_16 |
OLD | NEW |