OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
| 8 #include "SkBitmapFilter_opts_SSE2.h" |
8 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
9 #include "SkBitmapProcState_opts_SSSE3.h" | 10 #include "SkBitmapProcState_opts_SSSE3.h" |
10 #include "SkBitmapFilter_opts_SSE2.h" | |
11 #include "SkBlitMask.h" | 11 #include "SkBlitMask.h" |
| 12 #include "SkBlitRect_opts_SSE2.h" |
12 #include "SkBlitRow.h" | 13 #include "SkBlitRow.h" |
13 #include "SkBlitRect_opts_SSE2.h" | |
14 #include "SkBlitRow_opts_SSE2.h" | 14 #include "SkBlitRow_opts_SSE2.h" |
15 #include "SkBlurImage_opts_SSE2.h" | 15 #include "SkBlurImage_opts_SSE2.h" |
16 #include "SkUtils_opts_SSE2.h" | |
17 #include "SkUtils.h" | |
18 #include "SkMorphology_opts.h" | 16 #include "SkMorphology_opts.h" |
19 #include "SkMorphology_opts_SSE2.h" | 17 #include "SkMorphology_opts_SSE2.h" |
| 18 #include "SkRTConf.h" |
| 19 #include "SkUtils.h" |
| 20 #include "SkUtils_opts_SSE2.h" |
20 #include "SkXfermode.h" | 21 #include "SkXfermode.h" |
21 #include "SkXfermode_proccoeff.h" | 22 #include "SkXfermode_proccoeff.h" |
22 | 23 |
23 #include "SkRTConf.h" | |
24 | |
25 #if defined(_MSC_VER) && defined(_WIN64) | 24 #if defined(_MSC_VER) && defined(_WIN64) |
26 #include <intrin.h> | 25 #include <intrin.h> |
27 #endif | 26 #endif |
28 | 27 |
29 /* This file must *not* be compiled with -msse or -msse2, otherwise | 28 /* This file must *not* be compiled with -msse or -msse2, otherwise |
30 gcc may generate sse2 even for scalar ops (and thus give an invalid | 29 gcc may generate sse2 even for scalar ops (and thus give an invalid |
31 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp | 30 instruction on Pentium3 on the code below). Only files named *_SSE2.cpp |
32 in this directory should be compiled with -msse2. */ | 31 in this directory should be compiled with -msse2. */ |
33 | 32 |
34 | 33 |
| 34 /* Function to get the CPU SSE-level in runtime, for different compilers. */ |
35 #ifdef _MSC_VER | 35 #ifdef _MSC_VER |
36 static inline void getcpuid(int info_type, int info[4]) { | 36 static inline void getcpuid(int info_type, int info[4]) { |
37 #if defined(_WIN64) | 37 #if defined(_WIN64) |
38 __cpuid(info, info_type); | 38 __cpuid(info, info_type); |
39 #else | 39 #else |
40 __asm { | 40 __asm { |
41 mov eax, [info_type] | 41 mov eax, [info_type] |
42 cpuid | 42 cpuid |
43 mov edi, [info] | 43 mov edi, [info] |
44 mov [edi], eax | 44 mov [edi], eax |
(...skipping 20 matching lines...) Expand all Loading... |
65 "cpuid \n\t" | 65 "cpuid \n\t" |
66 "movl %%ebx, %1 \n\t" | 66 "movl %%ebx, %1 \n\t" |
67 "popl %%ebx \n\t" | 67 "popl %%ebx \n\t" |
68 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) | 68 : "=a"(info[0]), "=r"(info[1]), "=c"(info[2]), "=d"(info[3]) |
69 : "a"(info_type) | 69 : "a"(info_type) |
70 ); | 70 ); |
71 } | 71 } |
72 #endif | 72 #endif |
73 #endif | 73 #endif |
74 | 74 |
| 75 //////////////////////////////////////////////////////////////////////////////// |
| 76 |
75 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEV
EL_SSE2 | 77 #if defined(__x86_64__) || defined(_WIN64) || SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEV
EL_SSE2 |
76 /* All x86_64 machines have SSE2, or we know it's supported at compile time, so
don't even bother checking. */ | 78 /* All x86_64 machines have SSE2, or we know it's supported at compile time, so
don't even bother checking. */ |
77 static inline bool hasSSE2() { | 79 static inline bool hasSSE2() { |
78 return true; | 80 return true; |
79 } | 81 } |
80 #else | 82 #else |
81 | 83 |
82 static inline bool hasSSE2() { | 84 static inline bool hasSSE2() { |
83 int cpu_info[4] = { 0 }; | 85 int cpu_info[4] = { 0 }; |
84 getcpuid(1, cpu_info); | 86 getcpuid(1, cpu_info); |
(...skipping 28 matching lines...) Expand all Loading... |
113 static bool cachedHasSSE2() { | 115 static bool cachedHasSSE2() { |
114 static bool gHasSSE2 = hasSSE2(); | 116 static bool gHasSSE2 = hasSSE2(); |
115 return gHasSSE2; | 117 return gHasSSE2; |
116 } | 118 } |
117 | 119 |
118 static bool cachedHasSSSE3() { | 120 static bool cachedHasSSSE3() { |
119 static bool gHasSSSE3 = hasSSSE3(); | 121 static bool gHasSSSE3 = hasSSSE3(); |
120 return gHasSSSE3; | 122 return gHasSSSE3; |
121 } | 123 } |
122 | 124 |
| 125 //////////////////////////////////////////////////////////////////////////////// |
| 126 |
123 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "U
se SSE optimized version of high quality image filters"); | 127 SK_CONF_DECLARE( bool, c_hqfilter_sse, "bitmap.filter.highQualitySSE", false, "U
se SSE optimized version of high quality image filters"); |
124 | 128 |
125 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { | 129 void SkBitmapProcState::platformConvolutionProcs(SkConvolutionProcs* procs) { |
126 if (cachedHasSSE2()) { | 130 if (cachedHasSSE2()) { |
127 procs->fExtraHorizontalReads = 3; | 131 procs->fExtraHorizontalReads = 3; |
128 procs->fConvolveVertically = &convolveVertically_SSE2; | 132 procs->fConvolveVertically = &convolveVertically_SSE2; |
129 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; | 133 procs->fConvolve4RowsHorizontally = &convolve4RowsHorizontally_SSE2; |
130 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; | 134 procs->fConvolveHorizontally = &convolveHorizontally_SSE2; |
131 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; | 135 procs->fApplySIMDPadding = &applySIMDPadding_SSE2; |
132 } | 136 } |
133 } | 137 } |
134 | 138 |
| 139 //////////////////////////////////////////////////////////////////////////////// |
| 140 |
135 void SkBitmapProcState::platformProcs() { | 141 void SkBitmapProcState::platformProcs() { |
136 /* Every optimization in the function requires at least SSE2 */ | 142 /* Every optimization in the function requires at least SSE2 */ |
137 if (!cachedHasSSE2()) { | 143 if (!cachedHasSSE2()) { |
138 return; | 144 return; |
139 } | 145 } |
140 | 146 |
141 /* Check fSampleProc32 */ | 147 /* Check fSampleProc32 */ |
142 if (fSampleProc32 == S32_opaque_D32_filter_DX) { | 148 if (fSampleProc32 == S32_opaque_D32_filter_DX) { |
143 if (cachedHasSSSE3()) { | 149 if (cachedHasSSSE3()) { |
144 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; | 150 fSampleProc32 = S32_opaque_D32_filter_DX_SSSE3; |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
178 } | 184 } |
179 | 185 |
180 /* Check fShaderProc32 */ | 186 /* Check fShaderProc32 */ |
181 if (c_hqfilter_sse) { | 187 if (c_hqfilter_sse) { |
182 if (fShaderProc32 == highQualityFilter32) { | 188 if (fShaderProc32 == highQualityFilter32) { |
183 fShaderProc32 = highQualityFilter_SSE2; | 189 fShaderProc32 = highQualityFilter_SSE2; |
184 } | 190 } |
185 } | 191 } |
186 } | 192 } |
187 | 193 |
| 194 //////////////////////////////////////////////////////////////////////////////// |
| 195 |
188 static SkBlitRow::Proc platform_16_procs[] = { | 196 static SkBlitRow::Proc platform_16_procs[] = { |
189 S32_D565_Opaque_SSE2, // S32_D565_Opaque | 197 S32_D565_Opaque_SSE2, // S32_D565_Opaque |
190 NULL, // S32_D565_Blend | 198 NULL, // S32_D565_Blend |
191 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque | 199 S32A_D565_Opaque_SSE2, // S32A_D565_Opaque |
192 NULL, // S32A_D565_Blend | 200 NULL, // S32A_D565_Blend |
193 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither | 201 S32_D565_Opaque_Dither_SSE2, // S32_D565_Opaque_Dither |
194 NULL, // S32_D565_Blend_Dither | 202 NULL, // S32_D565_Blend_Dither |
195 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither | 203 S32A_D565_Opaque_Dither_SSE2, // S32A_D565_Opaque_Dither |
196 NULL, // S32A_D565_Blend_Dither | 204 NULL, // S32A_D565_Blend_Dither |
197 }; | 205 }; |
198 | 206 |
| 207 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { |
| 208 if (cachedHasSSE2()) { |
| 209 return platform_16_procs[flags]; |
| 210 } else { |
| 211 return NULL; |
| 212 } |
| 213 } |
| 214 |
199 static SkBlitRow::Proc32 platform_32_procs[] = { | 215 static SkBlitRow::Proc32 platform_32_procs[] = { |
200 NULL, // S32_Opaque, | 216 NULL, // S32_Opaque, |
201 S32_Blend_BlitRow32_SSE2, // S32_Blend, | 217 S32_Blend_BlitRow32_SSE2, // S32_Blend, |
202 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque | 218 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque |
203 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, | 219 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, |
204 }; | 220 }; |
205 | 221 |
206 SkBlitRow::Proc SkBlitRow::PlatformProcs565(unsigned flags) { | 222 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { |
207 if (cachedHasSSE2()) { | 223 if (cachedHasSSE2()) { |
208 return platform_16_procs[flags]; | 224 return platform_32_procs[flags]; |
209 } else { | 225 } else { |
210 return NULL; | 226 return NULL; |
211 } | 227 } |
212 } | 228 } |
213 | 229 |
214 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { | 230 SkBlitRow::ColorProc SkBlitRow::PlatformColorProc() { |
215 if (cachedHasSSE2()) { | 231 if (cachedHasSSE2()) { |
216 return Color32_SSE2; | 232 return Color32_SSE2; |
217 } else { | 233 } else { |
218 return NULL; | 234 return NULL; |
219 } | 235 } |
220 } | 236 } |
221 | 237 |
222 SkBlitRow::Proc32 SkBlitRow::PlatformProcs32(unsigned flags) { | 238 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning |
| 239 |
| 240 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { |
| 241 /* Return NULL for now, since the optimized path in ColorRect32_SSE2 is disabled
. |
223 if (cachedHasSSE2()) { | 242 if (cachedHasSSE2()) { |
224 return platform_32_procs[flags]; | 243 return ColorRect32_SSE2; |
225 } else { | 244 } else { |
226 return NULL; | 245 return NULL; |
227 } | 246 } |
| 247 */ |
| 248 return NULL; |
228 } | 249 } |
229 | 250 |
| 251 //////////////////////////////////////////////////////////////////////////////// |
230 | 252 |
231 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, | 253 SkBlitMask::ColorProc SkBlitMask::PlatformColorProcs(SkBitmap::Config dstConfig, |
232 SkMask::Format maskFormat, | 254 SkMask::Format maskFormat, |
233 SkColor color) { | 255 SkColor color) { |
234 if (SkMask::kA8_Format != maskFormat) { | 256 if (SkMask::kA8_Format != maskFormat) { |
235 return NULL; | 257 return NULL; |
236 } | 258 } |
237 | 259 |
238 ColorProc proc = NULL; | 260 ColorProc proc = NULL; |
239 if (cachedHasSSE2()) { | 261 if (cachedHasSSE2()) { |
(...skipping 17 matching lines...) Expand all Loading... |
257 if (isOpaque) { | 279 if (isOpaque) { |
258 return SkBlitLCD16OpaqueRow_SSE2; | 280 return SkBlitLCD16OpaqueRow_SSE2; |
259 } else { | 281 } else { |
260 return SkBlitLCD16Row_SSE2; | 282 return SkBlitLCD16Row_SSE2; |
261 } | 283 } |
262 } else { | 284 } else { |
263 return NULL; | 285 return NULL; |
264 } | 286 } |
265 | 287 |
266 } | 288 } |
| 289 |
267 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, | 290 SkBlitMask::RowProc SkBlitMask::PlatformRowProcs(SkBitmap::Config dstConfig, |
268 SkMask::Format maskFormat, | 291 SkMask::Format maskFormat, |
269 RowFlags flags) { | 292 RowFlags flags) { |
270 return NULL; | 293 return NULL; |
271 } | 294 } |
272 | 295 |
| 296 //////////////////////////////////////////////////////////////////////////////// |
| 297 |
273 SkMemset16Proc SkMemset16GetPlatformProc() { | 298 SkMemset16Proc SkMemset16GetPlatformProc() { |
274 if (cachedHasSSE2()) { | 299 if (cachedHasSSE2()) { |
275 return sk_memset16_SSE2; | 300 return sk_memset16_SSE2; |
276 } else { | 301 } else { |
277 return NULL; | 302 return NULL; |
278 } | 303 } |
279 } | 304 } |
280 | 305 |
281 SkMemset32Proc SkMemset32GetPlatformProc() { | 306 SkMemset32Proc SkMemset32GetPlatformProc() { |
282 if (cachedHasSSE2()) { | 307 if (cachedHasSSE2()) { |
283 return sk_memset32_SSE2; | 308 return sk_memset32_SSE2; |
284 } else { | 309 } else { |
285 return NULL; | 310 return NULL; |
286 } | 311 } |
287 } | 312 } |
288 | 313 |
| 314 //////////////////////////////////////////////////////////////////////////////// |
| 315 |
289 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType t
ype) { | 316 SkMorphologyImageFilter::Proc SkMorphologyGetPlatformProc(SkMorphologyProcType t
ype) { |
290 if (!cachedHasSSE2()) { | 317 if (!cachedHasSSE2()) { |
291 return NULL; | 318 return NULL; |
292 } | 319 } |
293 switch (type) { | 320 switch (type) { |
294 case kDilateX_SkMorphologyProcType: | 321 case kDilateX_SkMorphologyProcType: |
295 return SkDilateX_SSE2; | 322 return SkDilateX_SSE2; |
296 case kDilateY_SkMorphologyProcType: | 323 case kDilateY_SkMorphologyProcType: |
297 return SkDilateY_SSE2; | 324 return SkDilateY_SSE2; |
298 case kErodeX_SkMorphologyProcType: | 325 case kErodeX_SkMorphologyProcType: |
299 return SkErodeX_SSE2; | 326 return SkErodeX_SSE2; |
300 case kErodeY_SkMorphologyProcType: | 327 case kErodeY_SkMorphologyProcType: |
301 return SkErodeY_SSE2; | 328 return SkErodeY_SSE2; |
302 default: | 329 default: |
303 return NULL; | 330 return NULL; |
304 } | 331 } |
305 } | 332 } |
306 | 333 |
| 334 //////////////////////////////////////////////////////////////////////////////// |
| 335 |
307 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, | 336 bool SkBoxBlurGetPlatformProcs(SkBoxBlurProc* boxBlurX, |
308 SkBoxBlurProc* boxBlurY, | 337 SkBoxBlurProc* boxBlurY, |
309 SkBoxBlurProc* boxBlurXY, | 338 SkBoxBlurProc* boxBlurXY, |
310 SkBoxBlurProc* boxBlurYX) { | 339 SkBoxBlurProc* boxBlurYX) { |
311 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION | 340 #ifdef SK_DISABLE_BLUR_DIVISION_OPTIMIZATION |
312 return false; | 341 return false; |
313 #else | 342 #else |
314 if (!cachedHasSSE2()) { | 343 if (!cachedHasSSE2()) { |
315 return false; | 344 return false; |
316 } | 345 } |
317 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlur
YX); | 346 return SkBoxBlurGetPlatformProcs_SSE2(boxBlurX, boxBlurY, boxBlurXY, boxBlur
YX); |
318 #endif | 347 #endif |
319 } | 348 } |
320 | 349 |
321 SkBlitRow::ColorRectProc PlatformColorRectProcFactory(); // suppress warning | 350 //////////////////////////////////////////////////////////////////////////////// |
322 | |
323 SkBlitRow::ColorRectProc PlatformColorRectProcFactory() { | |
324 if (cachedHasSSE2()) { | |
325 return ColorRect32_SSE2; | |
326 } else { | |
327 return NULL; | |
328 } | |
329 } | |
330 | 351 |
331 extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff&
rec, | 352 extern SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl_SSE2(const ProcCoeff&
rec, |
332 SkXfermode::Mode
mode); | 353 SkXfermode::Mode
mode); |
333 | 354 |
334 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, | 355 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, |
335 SkXfermode::Mode mode); | 356 SkXfermode::Mode mode); |
336 | 357 |
337 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, | 358 SkProcCoeffXfermode* SkPlatformXfermodeFactory_impl(const ProcCoeff& rec, |
338 SkXfermode::Mode mode) { | 359 SkXfermode::Mode mode) { |
339 return NULL; | 360 return NULL; |
340 } | 361 } |
341 | 362 |
342 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, | 363 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, |
343 SkXfermode::Mode mode); | 364 SkXfermode::Mode mode); |
344 | 365 |
345 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, | 366 SkProcCoeffXfermode* SkPlatformXfermodeFactory(const ProcCoeff& rec, |
346 SkXfermode::Mode mode) { | 367 SkXfermode::Mode mode) { |
347 if (cachedHasSSE2()) { | 368 if (cachedHasSSE2()) { |
348 return SkPlatformXfermodeFactory_impl_SSE2(rec, mode); | 369 return SkPlatformXfermodeFactory_impl_SSE2(rec, mode); |
349 } else { | 370 } else { |
350 return SkPlatformXfermodeFactory_impl(rec, mode); | 371 return SkPlatformXfermodeFactory_impl(rec, mode); |
351 } | 372 } |
352 } | 373 } |
353 | 374 |
354 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); | 375 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); |
355 | 376 |
356 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { | 377 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { |
357 return NULL; | 378 return NULL; |
358 } | 379 } |
OLD | NEW |