OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBlitRow_opts_arm.h" | 8 #include "SkBlitRow_opts_arm.h" |
9 | 9 |
10 #include "SkBlitMask.h" | 10 #include "SkBlitMask.h" |
(...skipping 749 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
760 src += 1; | 760 src += 1; |
761 dst += 1; | 761 dst += 1; |
762 } while (--count > 0); | 762 } while (--count > 0); |
763 } | 763 } |
764 #endif | 764 #endif |
765 | 765 |
766 #undef UNROLL | 766 #undef UNROLL |
767 } | 767 } |
768 } | 768 } |
769 | 769 |
770 void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst, | |
771 const SkPMColor* SK_RESTRICT src, | |
772 int count, U8CPU alpha) { | |
773 | |
774 SkASSERT(255 >= alpha); | |
775 | |
776 if (count <= 0) { | |
777 return; | |
778 } | |
779 | |
780 unsigned alpha256 = SkAlpha255To256(alpha); | |
781 | |
782 // First deal with odd counts | |
783 if (count & 1) { | |
784 uint8x8_t vsrc, vdst, vres; | |
785 uint16x8_t vdst_wide, vsrc_wide; | |
786 unsigned dst_scale; | |
787 | |
788 // Load | |
789 vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0)); | |
790 vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0)); | |
791 | |
792 // Calc dst_scale | |
793 dst_scale = vget_lane_u8(vsrc, 3); | |
794 dst_scale *= alpha256; | |
795 dst_scale >>= 8; | |
796 dst_scale = 256 - dst_scale; | |
797 | |
798 // Process src | |
799 vsrc_wide = vmovl_u8(vsrc); | |
800 vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256); | |
801 | |
802 // Process dst | |
803 vdst_wide = vmovl_u8(vdst); | |
804 vdst_wide = vmulq_n_u16(vdst_wide, dst_scale); | |
805 | |
806 // Combine | |
807 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); | |
808 | |
809 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0); | |
810 dst++; | |
811 src++; | |
812 count--; | |
813 } | |
814 | |
815 if (count) { | |
816 uint8x8_t alpha_mask; | |
817 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7}; | |
818 alpha_mask = vld1_u8(alpha_mask_setup); | |
819 | |
820 do { | |
821 | |
822 uint8x8_t vsrc, vdst, vres, vsrc_alphas; | |
823 uint16x8_t vdst_wide, vsrc_wide, vsrc_scale, vdst_scale; | |
824 | |
825 __builtin_prefetch(src+32); | |
826 __builtin_prefetch(dst+32); | |
827 | |
828 // Load | |
829 vsrc = vreinterpret_u8_u32(vld1_u32(src)); | |
830 vdst = vreinterpret_u8_u32(vld1_u32(dst)); | |
831 | |
832 // Prepare src_scale | |
833 vsrc_scale = vdupq_n_u16(alpha256); | |
834 | |
835 // Calc dst_scale | |
836 vsrc_alphas = vtbl1_u8(vsrc, alpha_mask); | |
837 vdst_scale = vmovl_u8(vsrc_alphas); | |
838 vdst_scale *= vsrc_scale; | |
839 vdst_scale = vshrq_n_u16(vdst_scale, 8); | |
840 vdst_scale = vsubq_u16(vdupq_n_u16(256), vdst_scale); | |
841 | |
842 // Process src | |
843 vsrc_wide = vmovl_u8(vsrc); | |
844 vsrc_wide *= vsrc_scale; | |
845 | |
846 // Process dst | |
847 vdst_wide = vmovl_u8(vdst); | |
848 vdst_wide *= vdst_scale; | |
849 | |
850 // Combine | |
851 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8); | |
852 | |
853 vst1_u32(dst, vreinterpret_u32_u8(vres)); | |
854 | |
855 src += 2; | |
856 dst += 2; | |
857 count -= 2; | |
858 } while(count); | |
859 } | |
860 } | |
861 | |
770 /////////////////////////////////////////////////////////////////////////////// | 862 /////////////////////////////////////////////////////////////////////////////// |
771 | 863 |
772 #undef DEBUG_OPAQUE_DITHER | 864 #undef DEBUG_OPAQUE_DITHER |
773 | 865 |
774 #if defined(DEBUG_OPAQUE_DITHER) | 866 #if defined(DEBUG_OPAQUE_DITHER) |
775 static void showme8(char *str, void *p, int len) | 867 static void showme8(char *str, void *p, int len) |
776 { | 868 { |
777 static char buf[256]; | 869 static char buf[256]; |
778 char tbuf[32]; | 870 char tbuf[32]; |
779 int i; | 871 int i; |
(...skipping 507 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1287 * the performance will almost certainly be worse. However, for many | 1379 * the performance will almost certainly be worse. However, for many |
1288 * common cases the performance is equivalent or better than the standard | 1380 * common cases the performance is equivalent or better than the standard |
1289 * case where we do not inspect the src alpha. | 1381 * case where we do not inspect the src alpha. |
1290 */ | 1382 */ |
1291 #if SK_A32_SHIFT == 24 | 1383 #if SK_A32_SHIFT == 24 |
1292 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor | 1384 // This proc assumes the alpha value occupies bits 24-32 of each SkPMColor |
1293 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, | 1385 S32A_Opaque_BlitRow32_neon_src_alpha, // S32A_Opaque, |
1294 #else | 1386 #else |
1295 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, | 1387 S32A_Opaque_BlitRow32_neon, // S32A_Opaque, |
1296 #endif | 1388 #endif |
1297 S32A_Blend_BlitRow32_arm // S32A_Blend | 1389 S32A_Blend_BlitRow32_neon // S32A_Blend |
djsollen
2013/07/12 19:45:40
we should remove the extern definition in SkBlitRo
| |
1298 }; | 1390 }; |
OLD | NEW |