src/opts/SkBlitRow_opts_arm_neon.cpp - Issue 2097883002: revise row blits to keep intermediate precision so that color is preserved when blended against its…

Side by Side Diff: src/opts/SkBlitRow_opts_arm_neon.cpp

Issue 2097883002: revise row blits to keep intermediate precision so that color is preserved when blended against its… (Closed) Base URL: https://skia.googlesource.com/skia@master

Patch Set: Fix overflow in destination scale calculation Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2012 The Android Open Source Project	2 * Copyright 2012 The Android Open Source Project

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #include "SkBlitRow_opts_arm_neon.h"	8 #include "SkBlitRow_opts_arm_neon.h"

9	9

10 #include "SkBlitMask.h"	10 #include "SkBlitMask.h"

(...skipping 891 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
902 vdst = vreinterpret_u8_u32(vld1_u32(dst));	902 vdst = vreinterpret_u8_u32(vld1_u32(dst));

903	903

904 // Process src	904 // Process src

905 vsrc_wide = vmovl_u8(vsrc);	905 vsrc_wide = vmovl_u8(vsrc);

906 vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));	906 vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));

907	907

908 // Process dst	908 // Process dst

909 vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));	909 vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));

910	910

911 // Combine	911 // Combine

912 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);	912 vdst_wide += vsrc_wide;

	913 vres = vshrn_n_u16(vdst_wide, 8);

913	914

914 // Store	915 // Store

915 vst1_u32(dst, vreinterpret_u32_u8(vres));	916 vst1_u32(dst, vreinterpret_u32_u8(vres));

916	917

917 src += 2;	918 src += 2;

918 dst += 2;	919 dst += 2;

919 count -= 2;	920 count -= 2;

920 }	921 }

921	922

922 if (count == 1) {	923 if (count == 1) {

923 uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres;	924 uint8x8_t vsrc = vdup_n_u8(0), vdst = vdup_n_u8(0), vres;

924 uint16x8_t vsrc_wide, vdst_wide;	925 uint16x8_t vsrc_wide, vdst_wide;

925	926

926 // Load	927 // Load

927 vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));	928 vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));

928 vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));	929 vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));

929	930

930 // Process	931 // Process

931 vsrc_wide = vmovl_u8(vsrc);	932 vsrc_wide = vmovl_u8(vsrc);

932 vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));	933 vsrc_wide = vmulq_u16(vsrc_wide, vdupq_n_u16(src_scale));

933 vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));	934 vdst_wide = vmull_u8(vdst, vdup_n_u8(dst_scale));

934 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);	935 vdst_wide += vsrc_wide;

	936 vres = vshrn_n_u16(vdst_wide, 8);

935	937

936 // Store	938 // Store

937 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);	939 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);

938 }	940 }

939 }	941 }

940	942

941 #ifdef SK_CPU_ARM32	943 #ifdef SK_CPU_ARM32

942 void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,	944 void S32A_Blend_BlitRow32_neon(SkPMColor* SK_RESTRICT dst,

943 const SkPMColor* SK_RESTRICT src,	945 const SkPMColor* SK_RESTRICT src,

944 int count, U8CPU alpha) {	946 int count, U8CPU alpha) {

(...skipping 12 matching lines...) Expand all Loading...
957 uint16x8_t vdst_wide, vsrc_wide;	959 uint16x8_t vdst_wide, vsrc_wide;

958 unsigned dst_scale;	960 unsigned dst_scale;

959	961

960 // Load	962 // Load

961 vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));	963 vsrc = vreinterpret_u8_u32(vld1_lane_u32(src, vreinterpret_u32_u8(vsrc), 0));

962 vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));	964 vdst = vreinterpret_u8_u32(vld1_lane_u32(dst, vreinterpret_u32_u8(vdst), 0));

963	965

964 // Calc dst_scale	966 // Calc dst_scale

965 dst_scale = vget_lane_u8(vsrc, 3);	967 dst_scale = vget_lane_u8(vsrc, 3);

966 dst_scale *= alpha256;	968 dst_scale *= alpha256;

	969 dst_scale = (256<<8) - dst_scale;

967 dst_scale >>= 8;	970 dst_scale >>= 8;

968 dst_scale = 256 - dst_scale;

969	971

970 // Process src	972 // Process src

971 vsrc_wide = vmovl_u8(vsrc);	973 vsrc_wide = vmovl_u8(vsrc);

972 vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256);	974 vsrc_wide = vmulq_n_u16(vsrc_wide, alpha256);

973	975

974 // Process dst	976 // Process dst

975 vdst_wide = vmovl_u8(vdst);	977 vdst_wide = vmovl_u8(vdst);

976 vdst_wide = vmulq_n_u16(vdst_wide, dst_scale);	978 vdst_wide = vmulq_n_u16(vdst_wide, dst_scale);

977	979

978 // Combine	980 // Combine

979 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);	981 vdst_wide += vsrc_wide;

	982 vres = vshrn_n_u16(vdst_wide, 8);

980	983

981 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);	984 vst1_lane_u32(dst, vreinterpret_u32_u8(vres), 0);

982 dst++;	985 dst++;

983 src++;	986 src++;

984 count--;	987 count--;

985 }	988 }

986	989

987 if (count) {	990 if (count) {

988 uint8x8_t alpha_mask;	991 uint8x8_t alpha_mask;

989 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};	992 static const uint8_t alpha_mask_setup[] = {3,3,3,3,7,7,7,7};

(...skipping 10 matching lines...) Expand all Loading...
1000 // Load	1003 // Load

1001 vsrc = vreinterpret_u8_u32(vld1_u32(src));	1004 vsrc = vreinterpret_u8_u32(vld1_u32(src));

1002 vdst = vreinterpret_u8_u32(vld1_u32(dst));	1005 vdst = vreinterpret_u8_u32(vld1_u32(dst));

1003	1006

1004 // Prepare src_scale	1007 // Prepare src_scale

1005 vsrc_scale = vdupq_n_u16(alpha256);	1008 vsrc_scale = vdupq_n_u16(alpha256);

1006	1009

1007 // Calc dst_scale	1010 // Calc dst_scale

1008 vsrc_alphas = vtbl1_u8(vsrc, alpha_mask);	1011 vsrc_alphas = vtbl1_u8(vsrc, alpha_mask);

1009 vdst_scale = vmovl_u8(vsrc_alphas);	1012 vdst_scale = vmovl_u8(vsrc_alphas);

1010 vdst_scale *= vsrc_scale;	1013 vdst_scale = vmlsq_u16(vdupq_n_u16(255<<8), vdst_scale, vsrc_scale);

1011 vdst_scale = vshrq_n_u16(vdst_scale, 8);	1014 vdst_scale = vsraq_n_u16(vdupq_n_u16(1), vdst_scale, 8);

1012 vdst_scale = vsubq_u16(vdupq_n_u16(256), vdst_scale);

1013	1015

1014 // Process src	1016 // Process src

1015 vsrc_wide = vmovl_u8(vsrc);	1017 vsrc_wide = vmovl_u8(vsrc);

1016 vsrc_wide *= vsrc_scale;	1018 vsrc_wide *= vsrc_scale;

1017	1019

1018 // Process dst	1020 // Process dst

1019 vdst_wide = vmovl_u8(vdst);	1021 vdst_wide = vmovl_u8(vdst);

1020 vdst_wide *= vdst_scale;	1022 vdst_wide *= vdst_scale;

1021	1023

1022 // Combine	1024 // Combine

1023 vres = vshrn_n_u16(vdst_wide, 8) + vshrn_n_u16(vsrc_wide, 8);	1025 vdst_wide += vsrc_wide;

	1026 vres = vshrn_n_u16(vdst_wide, 8);

1024	1027

1025 vst1_u32(dst, vreinterpret_u32_u8(vres));	1028 vst1_u32(dst, vreinterpret_u32_u8(vres));

1026	1029

1027 src += 2;	1030 src += 2;

1028 dst += 2;	1031 dst += 2;

1029 count -= 2;	1032 count -= 2;

1030 } while(count);	1033 } while(count);

1031 }	1034 }

1032 }	1035 }

1033	1036

(...skipping 251 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1285 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {	1288 const SkBlitRow::Proc32 sk_blitrow_platform_32_procs_arm_neon[] = {

1286 nullptr, // S32_Opaque,	1289 nullptr, // S32_Opaque,

1287 S32_Blend_BlitRow32_neon, // S32_Blend,	1290 S32_Blend_BlitRow32_neon, // S32_Blend,

1288 nullptr, // Ported to SkOpts	1291 nullptr, // Ported to SkOpts

1289 #ifdef SK_CPU_ARM32	1292 #ifdef SK_CPU_ARM32

1290 S32A_Blend_BlitRow32_neon // S32A_Blend	1293 S32A_Blend_BlitRow32_neon // S32A_Blend

1291 #else	1294 #else

1292 nullptr	1295 nullptr

1293 #endif	1296 #endif

1294 };	1297 };

OLD	NEW

« include/core/SkColorPriv.h ('K') | « src/opts/SkBlitRow_opts_SSE2.cpp ('k') | src/opts/SkBlitRow_opts_mips_dsp.cpp » ('j') | no next file with comments »