Index: src/opts/opts_check_x86.cpp |
diff --git a/src/opts/opts_check_x86.cpp b/src/opts/opts_check_x86.cpp |
index 2d3b794283dd85cf5d3c346eb58231844543bf73..42661125525ec9fe15dca01ec9b3426681210a54 100644 |
--- a/src/opts/opts_check_x86.cpp |
+++ b/src/opts/opts_check_x86.cpp |
@@ -216,14 +216,28 @@ SkBlitRow::Proc16 SkBlitRow::PlatformFactory565(unsigned flags) { |
} |
} |
+static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE2[] = { |
+ Color32A_D565_SSE2, // Color32A_D565, |
+ NULL, // Color32A_D565_Dither |
+}; |
+ |
static const SkBlitRow::ColorProc16 platform_565_colorprocs_SSE4[] = { |
Color32A_D565_SSE4, // Color32A_D565, |
NULL, // Color32A_D565_Dither |
}; |
SkBlitRow::ColorProc16 SkBlitRow::PlatformColorFactory565(unsigned flags) { |
+/* Ironically, the 32-bit multiplication used in the SSE4 version of this |
+ * optimization (MULLD) is 5-10 times slower on an Atom Silvermont, than on |
+ * the Core CPUs. This makes the SSE2 version almost twice as fast on Atom. |
+*/ |
+#if !defined(__slm__) |
mtklein
2015/02/12 14:56:31
Is this __slm__ for SiLverMont? Is there a way to
henrik.smiding
2015/02/12 15:45:06
Yes, it's one of the defines set by gcc/llvm when
|
if (supports_simd(SK_CPU_SSE_LEVEL_SSE41)) { |
return platform_565_colorprocs_SSE4[flags]; |
+ } else |
+#endif |
+ if (supports_simd(SK_CPU_SSE_LEVEL_SSE2)) { |
+ return platform_565_colorprocs_SSE2[flags]; |
} else { |
return NULL; |
} |