OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2012 The Android Open Source Project | 2 * Copyright 2012 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include <emmintrin.h> | 8 #include <emmintrin.h> |
9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
10 #include "SkBlitRow_opts_SSE2.h" | 10 #include "SkBlitRow_opts_SSE2.h" |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
70 void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 70 void S32A_Opaque_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, |
71 const SkPMColor* SK_RESTRICT src, | 71 const SkPMColor* SK_RESTRICT src, |
72 int count, U8CPU alpha) { | 72 int count, U8CPU alpha) { |
73 sk_msan_assert_initialized(src, src+count); | 73 sk_msan_assert_initialized(src, src+count); |
74 | 74 |
75 SkASSERT(alpha == 255); | 75 SkASSERT(alpha == 255); |
76 if (count <= 0) { | 76 if (count <= 0) { |
77 return; | 77 return; |
78 } | 78 } |
79 | 79 |
80 #ifdef SK_USE_ACCURATE_BLENDING | |
81 if (count >= 4) { | |
82 SkASSERT(((size_t)dst & 0x03) == 0); | |
83 while (((size_t)dst & 0x0F) != 0) { | |
84 *dst = SkPMSrcOver(*src, *dst); | |
85 src++; | |
86 dst++; | |
87 count--; | |
88 } | |
89 | |
90 const __m128i *s = reinterpret_cast<const __m128i*>(src); | |
91 __m128i *d = reinterpret_cast<__m128i*>(dst); | |
92 __m128i rb_mask = _mm_set1_epi32(0x00FF00FF); | |
93 __m128i c_128 = _mm_set1_epi16(128); // 8 copies of 128 (16-bit) | |
94 __m128i c_255 = _mm_set1_epi16(255); // 8 copies of 255 (16-bit) | |
95 while (count >= 4) { | |
96 // Load 4 pixels | |
97 __m128i src_pixel = _mm_loadu_si128(s); | |
98 __m128i dst_pixel = _mm_load_si128(d); | |
99 | |
100 __m128i dst_rb = _mm_and_si128(rb_mask, dst_pixel); | |
101 __m128i dst_ag = _mm_srli_epi16(dst_pixel, 8); | |
102 // Shift alphas down to lower 8 bits of each quad. | |
103 __m128i alpha = _mm_srli_epi32(src_pixel, 24); | |
104 | |
105 // Copy alpha to upper 3rd byte of each quad | |
106 alpha = _mm_or_si128(alpha, _mm_slli_epi32(alpha, 16)); | |
107 | |
108 // Subtract alphas from 255, to get 0..255 | |
109 alpha = _mm_sub_epi16(c_255, alpha); | |
110 | |
111 // Multiply by red and blue by src alpha. | |
112 dst_rb = _mm_mullo_epi16(dst_rb, alpha); | |
113 // Multiply by alpha and green by src alpha. | |
114 dst_ag = _mm_mullo_epi16(dst_ag, alpha); | |
115 | |
116 // dst_rb_low = (dst_rb >> 8) | |
117 __m128i dst_rb_low = _mm_srli_epi16(dst_rb, 8); | |
118 __m128i dst_ag_low = _mm_srli_epi16(dst_ag, 8); | |
119 | |
120 // dst_rb = (dst_rb + dst_rb_low + 128) >> 8 | |
121 dst_rb = _mm_add_epi16(dst_rb, dst_rb_low); | |
122 dst_rb = _mm_add_epi16(dst_rb, c_128); | |
123 dst_rb = _mm_srli_epi16(dst_rb, 8); | |
124 | |
125 // dst_ag = (dst_ag + dst_ag_low + 128) & ag_mask | |
126 dst_ag = _mm_add_epi16(dst_ag, dst_ag_low); | |
127 dst_ag = _mm_add_epi16(dst_ag, c_128); | |
128 dst_ag = _mm_andnot_si128(rb_mask, dst_ag); | |
129 | |
130 // Combine back into RGBA. | |
131 dst_pixel = _mm_or_si128(dst_rb, dst_ag); | |
132 | |
133 // Add result | |
134 __m128i result = _mm_add_epi8(src_pixel, dst_pixel); | |
135 _mm_store_si128(d, result); | |
136 s++; | |
137 d++; | |
138 count -= 4; | |
139 } | |
140 src = reinterpret_cast<const SkPMColor*>(s); | |
141 dst = reinterpret_cast<SkPMColor*>(d); | |
142 } | |
143 | |
144 while (count > 0) { | |
145 *dst = SkPMSrcOver(*src, *dst); | |
146 src++; | |
147 dst++; | |
148 count--; | |
149 } | |
150 #else | |
151 int count16 = count / 16; | 80 int count16 = count / 16; |
152 __m128i* dst4 = (__m128i*)dst; | 81 __m128i* dst4 = (__m128i*)dst; |
153 const __m128i* src4 = (const __m128i*)src; | 82 const __m128i* src4 = (const __m128i*)src; |
154 | 83 |
155 for (int i = 0; i < count16 * 4; i += 4) { | 84 for (int i = 0; i < count16 * 4; i += 4) { |
156 // Load 16 source pixels. | 85 // Load 16 source pixels. |
157 __m128i s0 = _mm_loadu_si128(src4+i+0), | 86 __m128i s0 = _mm_loadu_si128(src4+i+0), |
158 s1 = _mm_loadu_si128(src4+i+1), | 87 s1 = _mm_loadu_si128(src4+i+1), |
159 s2 = _mm_loadu_si128(src4+i+2), | 88 s2 = _mm_loadu_si128(src4+i+2), |
160 s3 = _mm_loadu_si128(src4+i+3); | 89 s3 = _mm_loadu_si128(src4+i+3); |
(...skipping 23 matching lines...) Expand all Loading... |
184 } | 113 } |
185 | 114 |
186 // Wrap up the last <= 15 pixels. | 115 // Wrap up the last <= 15 pixels. |
187 SkASSERT(count - (count16*16) <= 15); | 116 SkASSERT(count - (count16*16) <= 15); |
188 for (int i = count16*16; i < count; i++) { | 117 for (int i = count16*16; i < count; i++) { |
189 // This check is not really necessarily, but it prevents pointless autov
ectorization. | 118 // This check is not really necessarily, but it prevents pointless autov
ectorization. |
190 if (src[i] & 0xFF000000) { | 119 if (src[i] & 0xFF000000) { |
191 dst[i] = SkPMSrcOver(src[i], dst[i]); | 120 dst[i] = SkPMSrcOver(src[i], dst[i]); |
192 } | 121 } |
193 } | 122 } |
194 #endif | |
195 } | 123 } |
196 | 124 |
197 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, | 125 void S32A_Blend_BlitRow32_SSE2(SkPMColor* SK_RESTRICT dst, |
198 const SkPMColor* SK_RESTRICT src, | 126 const SkPMColor* SK_RESTRICT src, |
199 int count, U8CPU alpha) { | 127 int count, U8CPU alpha) { |
200 SkASSERT(alpha <= 255); | 128 SkASSERT(alpha <= 255); |
201 if (count <= 0) { | 129 if (count <= 0) { |
202 return; | 130 return; |
203 } | 131 } |
204 | 132 |
(...skipping 908 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1113 uint32_t dst_expanded = SkExpand_rgb_16(*dst); | 1041 uint32_t dst_expanded = SkExpand_rgb_16(*dst); |
1114 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); | 1042 dst_expanded = dst_expanded * (SkAlpha255To256(255 - a) >> 3); |
1115 // now src and dst expanded are in g:11 r:10 x:1 b:10 | 1043 // now src and dst expanded are in g:11 r:10 x:1 b:10 |
1116 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); | 1044 *dst = SkCompact_rgb_16((src_expanded + dst_expanded) >> 5); |
1117 } | 1045 } |
1118 dst += 1; | 1046 dst += 1; |
1119 DITHER_INC_X(x); | 1047 DITHER_INC_X(x); |
1120 } while (--count != 0); | 1048 } while (--count != 0); |
1121 } | 1049 } |
1122 } | 1050 } |
OLD | NEW |