Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "SkBlitRow_opts_arm.h" | 8 #include "SkBlitRow_opts_arm.h" |
| 9 | 9 |
| 10 #include "SkBlitMask.h" | 10 #include "SkBlitMask.h" |
| (...skipping 408 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 419 | 419 |
| 420 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; | 420 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; |
| 421 alpha_mask = vld1_u8(alpha_mask_setup); | 421 alpha_mask = vld1_u8(alpha_mask_setup); |
| 422 | 422 |
| 423 /* do the NEON unrolled code */ | 423 /* do the NEON unrolled code */ |
| 424 #define UNROLL 4 | 424 #define UNROLL 4 |
| 425 while (count >= UNROLL) { | 425 while (count >= UNROLL) { |
| 426 uint8x8_t src_raw, dst_raw, dst_final; | 426 uint8x8_t src_raw, dst_raw, dst_final; |
| 427 uint8x8_t src_raw_2, dst_raw_2, dst_final_2; | 427 uint8x8_t src_raw_2, dst_raw_2, dst_final_2; |
| 428 | 428 |
| 429 __builtin_prefetch(src+32); | |
| 430 __builtin_prefetch(dst+32); | |
|
djsollen
2013/07/11 15:24:47
can you put the comment here that this *may* be sl
kevin.petit.not.used.account
2013/07/11 15:42:14
Done.
| |
| 431 | |
| 429 /* get the source */ | 432 /* get the source */ |
| 430 src_raw = vreinterpret_u8_u32(vld1_u32(src)); | 433 src_raw = vreinterpret_u8_u32(vld1_u32(src)); |
| 431 #if UNROLL > 2 | 434 #if UNROLL > 2 |
| 432 src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); | 435 src_raw_2 = vreinterpret_u8_u32(vld1_u32(src+2)); |
| 433 #endif | 436 #endif |
| 434 | 437 |
| 435 /* get and hold the dst too */ | 438 /* get and hold the dst too */ |
| 436 dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); | 439 dst_raw = vreinterpret_u8_u32(vld1_u32(dst)); |
| 437 #if UNROLL > 2 | 440 #if UNROLL > 2 |
| 438 dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); | 441 dst_raw_2 = vreinterpret_u8_u32(vld1_u32(dst+2)); |
| 439 #endif | 442 #endif |
| 440 | 443 |
| 441 /* 1st and 2nd bits of the unrolling */ | 444 /* 1st and 2nd bits of the unrolling */ |
| 442 { | 445 { |
| 443 uint8x8_t dst_cooked; | 446 uint8x8_t dst_cooked; |
| 444 uint16x8_t dst_wide; | 447 uint16x8_t dst_wide; |
| 445 uint8x8_t alpha_narrow; | 448 uint8x8_t alpha_narrow; |
| 446 uint16x8_t alpha_wide; | 449 uint16x8_t alpha_wide; |
| 447 | 450 |
| 448 /* get the alphas spread out properly */ | 451 /* get the alphas spread out properly */ |
| 449 alpha_narrow = vtbl1_u8(src_raw, alpha_mask); | 452 alpha_narrow = vtbl1_u8(src_raw, alpha_mask); |
| 450 #if 1 | |
| 451 /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ | |
| 452 /* we collapsed (255-a)+1 ... */ | |
| 453 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); | 453 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); |
| 454 #else | |
| 455 alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow); | |
| 456 alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7)); | |
| 457 #endif | |
| 458 | 454 |
| 459 /* spread the dest */ | 455 /* spread the dest */ |
| 460 dst_wide = vmovl_u8(dst_raw); | 456 dst_wide = vmovl_u8(dst_raw); |
| 461 | 457 |
| 462 /* alpha mul the dest */ | 458 /* alpha mul the dest */ |
| 463 dst_wide = vmulq_u16 (dst_wide, alpha_wide); | 459 dst_wide = vmulq_u16 (dst_wide, alpha_wide); |
| 464 dst_cooked = vshrn_n_u16(dst_wide, 8); | 460 dst_cooked = vshrn_n_u16(dst_wide, 8); |
| 465 | 461 |
| 466 /* sum -- ignoring any byte lane overflows */ | 462 /* sum -- ignoring any byte lane overflows */ |
| 467 dst_final = vadd_u8(src_raw, dst_cooked); | 463 dst_final = vadd_u8(src_raw, dst_cooked); |
| 468 } | 464 } |
| 469 | 465 |
| 470 #if UNROLL > 2 | 466 #if UNROLL > 2 |
| 471 /* the 3rd and 4th bits of our unrolling */ | 467 /* the 3rd and 4th bits of our unrolling */ |
| 472 { | 468 { |
| 473 uint8x8_t dst_cooked; | 469 uint8x8_t dst_cooked; |
| 474 uint16x8_t dst_wide; | 470 uint16x8_t dst_wide; |
| 475 uint8x8_t alpha_narrow; | 471 uint8x8_t alpha_narrow; |
| 476 uint16x8_t alpha_wide; | 472 uint16x8_t alpha_wide; |
| 477 | 473 |
| 478 alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); | 474 alpha_narrow = vtbl1_u8(src_raw_2, alpha_mask); |
| 479 #if 1 | |
| 480 /* reflect SkAlpha255To256() semantics a+1 vs a+a>>7 */ | |
| 481 /* we collapsed (255-a)+1 ... */ | |
| 482 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); | 475 alpha_wide = vsubw_u8(vdupq_n_u16(256), alpha_narrow); |
| 483 #else | |
| 484 alpha_wide = vsubw_u8(vdupq_n_u16(255), alpha_narrow); | |
| 485 alpha_wide = vaddq_u16(alpha_wide, vshrq_n_u16(alpha_wide,7)); | |
| 486 #endif | |
| 487 | 476 |
| 488 /* spread the dest */ | 477 /* spread the dest */ |
| 489 dst_wide = vmovl_u8(dst_raw_2); | 478 dst_wide = vmovl_u8(dst_raw_2); |
| 490 | 479 |
| 491 /* alpha mul the dest */ | 480 /* alpha mul the dest */ |
| 492 dst_wide = vmulq_u16 (dst_wide, alpha_wide); | 481 dst_wide = vmulq_u16 (dst_wide, alpha_wide); |
| 493 dst_cooked = vshrn_n_u16(dst_wide, 8); | 482 dst_cooked = vshrn_n_u16(dst_wide, 8); |
| 494 | 483 |
| 495 /* sum -- ignoring any byte lane overflows */ | 484 /* sum -- ignoring any byte lane overflows */ |
| 496 dst_final_2 = vadd_u8(src_raw_2, dst_cooked); | 485 dst_final_2 = vadd_u8(src_raw_2, dst_cooked); |
| (...skipping 792 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1289 * case where we do not inspect the src alpha. | 1278 * case where we do not inspect the src alpha. |
| 1290 */ | 1279 */ |
| 1291 #if SK_A32_SHIFT == 24 | 1280 #if SK_A32_SHIFT == 24 |
| 1292 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor | 1281 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor |
| 1293 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, | 1282 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, |
| 1294 #else | 1283 #else |
| 1295 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, | 1284 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, |
| 1296 #endif | 1285 #endif |
| 1297 S32A_Blend_BlitRow32_arm // S32A_Blend | 1286 S32A_Blend_BlitRow32_arm // S32A_Blend |
| 1298 }; | 1287 }; |
| OLD | NEW |