OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include <emmintrin.h> | 8 #include <emmintrin.h> |
9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
10 #include "SkBlitRow_opts_SSE2.h" | 10 #include "SkBlitRow_opts_SSE2.h" |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
60 } | 60 } |
61 | 61 |
62 while (count > 0) { | 62 while (count > 0) { |
63 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale); | 63 *dst = SkAlphaMulQ(*src, src_scale) + SkAlphaMulQ(*dst, dst_scale); |
64 src++; | 64 src++; |
65 dst++; | 65 dst++; |
66 count--; | 66 count--; |
67 } | 67 } |
68 } | 68 } |
69 | 69 |
70 void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | |
71 const SkPMColor* SK_RESTRICT src, | |
72 int count, U8CPU alpha) { | |
73 sk_msan_assert_initialized(src, src+count); | |
74 | |
75 SkASSERT(alpha == 255); | |
76 if (count <= 0) { | |
77 return; | |
78 } | |
79 | |
80 int count16 = count / 16; | |
81 __m128i* dst4 = (__m128i*)dst; | |
82 const __m128i* src4 = (const __m128i*)src; | |
83 | |
84 for (int i = 0; i < count16 * 4; i += 4) { | |
85 // Load 16 source pixels. | |
86 __m128i s0 = _mm_loadu_si128(src4+i+0), | |
87 s1 = _mm_loadu_si128(src4+i+1), | |
88 s2 = _mm_loadu_si128(src4+i+2), | |
89 s3 = _mm_loadu_si128(src4+i+3); | |
90 | |
91 const __m128i alphaMask = _mm_set1_epi32(0xFF << SK_A32_SHIFT); | |
92 const __m128i ORed = _mm_or_si128(s3, _mm_or_si128(s2, _mm_or_si128(s1,
s0))); | |
93 __m128i cmp = _mm_cmpeq_epi8(_mm_and_si128(ORed, alphaMask), _mm_setzero
_si128()); | |
94 if (0xffff == _mm_movemask_epi8(cmp)) { | |
95 // All 16 source pixels are fully transparent. There's nothing to do
! | |
96 continue; | |
97 } | |
98 const __m128i ANDed = _mm_and_si128(s3, _mm_and_si128(s2, _mm_and_si128(
s1, s0))); | |
99 cmp = _mm_cmpeq_epi8(_mm_and_si128(ANDed, alphaMask), alphaMask); | |
100 if (0xffff == _mm_movemask_epi8(cmp)) { | |
101 // All 16 source pixels are fully opaque. There's no need to read ds
t or blend it. | |
102 _mm_storeu_si128(dst4+i+0, s0); | |
103 _mm_storeu_si128(dst4+i+1, s1); | |
104 _mm_storeu_si128(dst4+i+2, s2); | |
105 _mm_storeu_si128(dst4+i+3, s3); | |
106 continue; | |
107 } | |
108 // The general slow case: do the blend for all 16 pixels. | |
109 _mm_storeu_si128(dst4+i+0, SkPMSrcOver_SSE2(s0, _mm_loadu_si128(dst4+i+0
))); | |
110 _mm_storeu_si128(dst4+i+1, SkPMSrcOver_SSE2(s1, _mm_loadu_si128(dst4+i+1
))); | |
111 _mm_storeu_si128(dst4+i+2, SkPMSrcOver_SSE2(s2, _mm_loadu_si128(dst4+i+2
))); | |
112 _mm_storeu_si128(dst4+i+3, SkPMSrcOver_SSE2(s3, _mm_loadu_si128(dst4+i+3
))); | |
113 } | |
114 | |
115 // Wrap up the last <= 15 pixels. | |
116 SkASSERT(count - (count16*16) <= 15); | |
117 for (int i = count16*16; i < count; i++) { | |
118 // This check is not really necessarily, but it prevents pointless autov
ectorization. | |
119 if (src[i] & 0xFF000000) { | |
120 dst[i] = SkPMSrcOver(src[i], dst[i]); | |
121 } | |
122 } | |
123 } | |
124 | |
125 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 70 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, |
126 const SkPMColor* SK_RESTRICT src, | 71 const SkPMColor* SK_RESTRICT src, |
127 int count, U8CPU alpha) { | 72 int count, U8CPU alpha) { |
128 SkASSERT(alpha <= 255); | 73 SkASSERT(alpha <= 255); |
129 if (count <= 0) { | 74 if (count <= 0) { |
130 return; | 75 return; |
131 } | 76 } |
132 | 77 |
133 if (count >= 4) { | 78 if (count >= 4) { |
134 while (((size_t)dst & 0x0F) != 0) { | 79 while (((size_t)dst & 0x0F) != 0) { |
(...skipping 906 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1041 uint32_t dst_expanded = SkExpand_rgb_16(*dst); | 986 uint32_t dst_expanded = SkExpand_rgb_16(*dst); |
1042 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); | 987 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); |
1043 // now src and dst expanded are in g:11 r:10 x:1 b:10 | 988 // now src and dst expanded are in g:11 r:10 x:1 b:10 |
1044 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); | 989 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); |
1045 } | 990 } |
1046 dst += 1; | 991 dst += 1; |
1047 DITHER_INC_X(x); | 992 DITHER_INC_X(x); |
1048 } while (--count != 0); | 993 } while (--count != 0); |
1049 } | 994 } |
1050 } | 995 } |
OLD | NEW |