OLD | NEW |
(Empty) | |
| 1 // It is important _not_ to put header guards here. |
| 2 // This file will be intentionally included three times. |
| 3 |
| 4 // Useful reading: |
| 5 // https://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html |
| 6 |
| 7 #if defined(SK4X_PREAMBLE) |
| 8 |
| 9 #elif defined(SK4X_PRIVATE) |
| 10 typedef T Vector __attribute__((vector_size(16))); |
| 11 |
| 12 /*implicit*/ Sk4x(Vector vec) : fVec(vec) {} |
| 13 static inline Vector ShuffleImpl(Vector a, Vector b, int __attribute__((vect
or_size(16))) mask); |
| 14 template <int m, int a, int s, int k> |
| 15 static Sk4x Shuffle(const Sk4x&, const Sk4x&); |
| 16 |
| 17 Vector fVec; |
| 18 |
| 19 #else // defined(SK4X_PRIVATE) |
| 20 |
| 21 template <typename T> |
| 22 Sk4x<T>::Sk4x() { } |
| 23 |
| 24 template <typename T> |
| 25 Sk4x<T>::Sk4x(T a, T b, T c, T d) { this->set(a,b,c,d); } |
| 26 |
| 27 template <typename T> |
| 28 Sk4x<T>::Sk4x(const T vals[4]) { |
| 29 fVec = *reinterpret_cast<const Vector*>(vals); // Should compile to moveaps
or moveups. |
| 30 } |
| 31 |
| 32 template <typename T> |
| 33 Sk4x<T>::Sk4x(const Sk4x<T>& other) { *this = other; } |
| 34 |
| 35 template <typename T> |
| 36 Sk4x<T>& Sk4x<T>::operator=(const Sk4x<T>& other) { fVec = other.fVec; return *t
his; } |
| 37 |
| 38 template <typename T> |
| 39 void Sk4x<T>::set(T a, T b, T c, T d) { |
| 40 Vector v = { a, b, c, d }; |
| 41 fVec = v; |
| 42 } |
| 43 |
| 44 template <typename T> |
| 45 void Sk4x<T>::store(T vals[4]) const { |
| 46 *reinterpret_cast<Vector*>(vals) = fVec; |
| 47 } |
| 48 |
| 49 template <typename T> |
| 50 template <typename Dst> Dst Sk4x<T>::reinterpret() const { |
| 51 return Dst((typename Dst::Vector)fVec); |
| 52 } |
| 53 |
| 54 template <typename T> |
| 55 template <typename Dst> Dst Sk4x<T>::cast() const { |
| 56 return Dst(fVec[0], fVec[1], fVec[2], fVec[3]); |
| 57 } |
| 58 |
| 59 template <typename T> |
| 60 bool Sk4x<T>::allTrue() const { return fVec[0] & fVec[1] & fVec[2] & fVec[3]; } |
| 61 template <typename T> |
| 62 bool Sk4x<T>::anyTrue() const { return fVec[0] | fVec[1] | fVec[2] | fVec[3]; } |
| 63 |
| 64 template <typename T> Sk4x<T> Sk4x<T>::bitNot() const { return Sk4i(~fVec); } |
| 65 |
| 66 template <typename T> Sk4x<T> Sk4x<T>::bitAnd(const Sk4x& other) const { return
fVec & other.fVec; } |
| 67 template <typename T> Sk4x<T> Sk4x<T>::bitOr (const Sk4x& other) const { return
fVec | other.fVec; } |
| 68 |
| 69 template <typename T> |
| 70 Sk4i Sk4x<T>:: equal(const Sk4x<T>& other) const { return fVec == othe
r.fVec; } |
| 71 template <typename T> |
| 72 Sk4i Sk4x<T>:: notEqual(const Sk4x<T>& other) const { return fVec != othe
r.fVec; } |
| 73 template <typename T> |
| 74 Sk4i Sk4x<T>:: lessThan(const Sk4x<T>& other) const { return fVec < othe
r.fVec; } |
| 75 template <typename T> |
| 76 Sk4i Sk4x<T>:: greaterThan(const Sk4x<T>& other) const { return fVec > othe
r.fVec; } |
| 77 template <typename T> |
| 78 Sk4i Sk4x<T>:: lessThanEqual(const Sk4x<T>& other) const { return fVec <= othe
r.fVec; } |
| 79 template <typename T> |
| 80 Sk4i Sk4x<T>::greaterThanEqual(const Sk4x<T>& other) const { return fVec >= othe
r.fVec; } |
| 81 |
| 82 template <typename T> |
| 83 Sk4x<T> Sk4x<T>:: add(const Sk4x<T>& other) const { return fVec + other.fVec
; } |
| 84 template <typename T> |
| 85 Sk4x<T> Sk4x<T>::subtract(const Sk4x<T>& other) const { return fVec - other.fVec
; } |
| 86 template <typename T> |
| 87 Sk4x<T> Sk4x<T>::multiply(const Sk4x<T>& other) const { return fVec * other.fVec
; } |
| 88 template <typename T> |
| 89 Sk4x<T> Sk4x<T>:: divide(const Sk4x<T>& other) const { return fVec / other.fVec
; } |
| 90 |
| 91 template <typename T> |
| 92 Sk4x<T> Sk4x<T>::Min(const Sk4x<T>& a, const Sk4x<T>& b) { |
| 93 return a.fVec < b.fVec ? a.fVec : b.fVec; // This makes great SSE code (1 m
inps op)... |
| 94 } |
| 95 |
| 96 template <typename T> |
| 97 Sk4x<T> Sk4x<T>::Max(const Sk4x<T>& a, const Sk4x<T>& b) { |
| 98 return a.fVec < b.fVec ? b.fVec : a.fVec; // ...but this doesn't look so go
od (7 ops?). |
| 99 } |
| 100 |
| 101 // GCC 4.8 has a bug that leads it to segfault when presented with the obvious c
ode for Shuffle: |
| 102 // Sk4i::Vector mask = { m,a,s,k }; |
| 103 // return __builtin_shuffle(x.fVec, y.fVec, mask); |
| 104 // |
| 105 // This roundabout implementation via ShuffleImpl works around that bug, |
| 106 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57509 |
| 107 |
| 108 template <> |
| 109 inline Sk4i::Vector Sk4i::ShuffleImpl(Sk4i::Vector x, Sk4i::Vector y, Sk4i::Vect
or mask) { |
| 110 return __builtin_shuffle(x,y, mask); |
| 111 } |
| 112 |
| 113 template <> |
| 114 inline Sk4f::Vector Sk4f::ShuffleImpl(Sk4f::Vector x, Sk4f::Vector y, Sk4i::Vect
or mask) { |
| 115 return __builtin_shuffle(x,y, mask); |
| 116 } |
| 117 |
| 118 template <typename T> |
| 119 template <int m, int a, int s, int k> |
| 120 Sk4x<T> Sk4x<T>::Shuffle(const Sk4x<T>& x, const Sk4x<T>& y) { |
| 121 Sk4i::Vector mask = { m,a,s,k }; |
| 122 return ShuffleImpl(x.fVec, y.fVec, mask); |
| 123 } |
| 124 |
| 125 template <typename T> |
| 126 Sk4x<T> Sk4x<T>::zwxy() const { return Shuffle<2,3,0,1>(*this, *this); } |
| 127 |
| 128 template <typename T> |
| 129 Sk4x<T> Sk4x<T>::XYAB(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<0,1,4
,5>(xyzw, abcd); } |
| 130 |
| 131 template <typename T> |
| 132 Sk4x<T> Sk4x<T>::ZWCD(const Sk4x& xyzw, const Sk4x& abcd) { return Shuffle<2,3,6
,7>(xyzw, abcd); } |
| 133 |
| 134 #endif // defined(SK4X_PRIVATE) |
OLD | NEW |