Index: src/core/Sk4x.h |
diff --git a/src/core/Sk4x.h b/src/core/Sk4x.h |
index 5b15560ede46cdcab4792974658cf134c15f27b1..7c246dffc01b7265c44772f2328f0ad385adba1c 100644 |
--- a/src/core/Sk4x.h |
+++ b/src/core/Sk4x.h |
@@ -4,13 +4,22 @@ |
#include "SkTypes.h" |
#define SK4X_PREAMBLE 1 |
- #include "Sk4x_portable.h" |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
+ #include "Sk4x_sse.h" |
+ #else |
+ #include "Sk4x_portable.h" |
+ #endif |
#undef SK4X_PREAMBLE |
template <typename T> class Sk4x; |
typedef Sk4x<float> Sk4f; |
typedef Sk4x<int32_t> Sk4i; |
+// Some Sk4x methods are implemented only for Sk4f or Sk4i. |
+// They might be unavailable, really slow, or just a bad idea. |
+// Talk to mtklein if you find yourself unable to link and |
+// really need one of those methods. |
+ |
template <typename T> class Sk4x { |
public: |
Sk4x(); // Uninitialized; use Sk4x(0,0,0,0) for zero. |
@@ -34,6 +43,7 @@ public: |
Sk4x bitNot() const; |
Sk4x bitAnd(const Sk4x&) const; |
Sk4x bitOr(const Sk4x&) const; |
+ // TODO: Sk4x bitAndNot(const Sk4x&) const; is efficient in SSE. |
Sk4x add(const Sk4x&) const; |
Sk4x subtract(const Sk4x&) const; |
Sk4x multiply(const Sk4x&) const; |
@@ -56,15 +66,27 @@ public: |
static Sk4x XYAB(const Sk4x& xyzw, const Sk4x& abcd); |
static Sk4x ZWCD(const Sk4x& xyzw, const Sk4x& abcd); |
+ // TODO: these are particularly efficient in SSE. Useful? Also efficient in NEON? |
+ // static Sk4x XAYB(const Sk4x& xyzw, const Sk4x& abcd); |
+ // static Sk4x ZCWD(const Sk4x& xyzw, const Sk4x& abcd); |
+ |
private: |
// It's handy to have Sk4f and Sk4i be mutual friends. |
template <typename S> friend class Sk4x; |
#define SK4X_PRIVATE 1 |
- #include "Sk4x_portable.h" |
+ #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
+ #include "Sk4x_sse.h" |
+ #else |
+ #include "Sk4x_portable.h" |
+ #endif |
#undef SK4X_PRIVATE |
}; |
-#include "Sk4x_portable.h" |
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
+ #include "Sk4x_sse.h" |
+#else |
+ #include "Sk4x_portable.h" |
+#endif |
#endif//Sk4x_DEFINED |