OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2009 The Android Open Source Project | 2 * Copyright 2009 The Android Open Source Project |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #include "SkBitmapFilter_opts_SSE2.h" | 8 #include "SkBitmapFilter_opts_SSE2.h" |
9 #include "SkBitmapProcState_opts_SSE2.h" | 9 #include "SkBitmapProcState_opts_SSE2.h" |
10 #include "SkBitmapProcState_opts_SSSE3.h" | 10 #include "SkBitmapProcState_opts_SSSE3.h" |
(...skipping 198 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
209 }; | 209 }; |
210 | 210 |
211 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { | 211 SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { |
212 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | 212 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
213 return platform_16_procs[flags]; | 213 return platform_16_procs[flags]; |
214 } else { | 214 } else { |
215 return NULL; | 215 return NULL; |
216 } | 216 } |
217 } | 217 } |
218 | 218 |
219 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { | |
220 Color32A_D565_SSE2, // Color32A_D565, | |
221 NULL, // Color32A_D565_Dither | |
222 }; | |
223 | |
219 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE4[] = { | 224 static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE4[] = { |
220 Color32A_D565_SSE4, // Color32A_D565, | 225 Color32A_D565_SSE4, // Color32A_D565, |
221 NULL, // Color32A_D565_Dither | 226 NULL, // Color32A_D565_Dither |
222 }; | 227 }; |
223 | 228 |
224 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { | 229 SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { |
230 /* Ironically, the 32-bit multiplication used in the SSE4 version of this | |
231 * optimization (MULLD) is 5-10 times slower on an Atom Silvermont, than on | |
232 * the Core CPUs. This makes the SSE2 version almost twice as fast on Atom. | |
233 */ | |
234 #if !defined(__slm__) | |
mtklein
2015/02/12 14:56:31
Is this __slm__ for SiLverMont? Is there a way to
henrik.smiding
2015/02/12 15:45:06
Yes, it's one of the defines set by gcc/llvm when
| |
225 if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) { | 235 if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) { |
226 return platform_565_colorprocs_SSE4[flags]; | 236 return platform_565_colorprocs_SSE4[flags]; |
237 } else | |
238 #endif | |
239 if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { | |
240 return platform_565_colorprocs_SSE2[flags]; | |
227 } else { | 241 } else { |
228 return NULL; | 242 return NULL; |
229 } | 243 } |
230 } | 244 } |
231 | 245 |
232 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { | 246 static const SkBlitRow::Proc32 platform_32_procs_SSE2[] = { |
233 NULL, // S32_Opaque, | 247 NULL, // S32_Opaque, |
234 S32_Blend_BlitRow32_SSE2, // S32_Blend, | 248 S32_Blend_BlitRow32_SSE2, // S32_Blend, |
235 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque | 249 S32A_Opaque_BlitRow32_SSE2, // S32A_Opaque |
236 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, | 250 S32A_Blend_BlitRow32_SSE2, // S32A_Blend, |
(...skipping 169 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
406 } else { | 420 } else { |
407 return SkPlatformXfermodeFactory_impl(rec, mode); | 421 return SkPlatformXfermodeFactory_impl(rec, mode); |
408 } | 422 } |
409 } | 423 } |
410 | 424 |
411 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); | 425 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode); |
412 | 426 |
413 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { | 427 SkXfermodeProc SkPlatformXfermodeProcFactory(SkXfermode::Mode mode) { |
414 return NULL; | 428 return NULL; |
415 } | 429 } |
OLD | NEW |