OLD | NEW |
1 // It is important _not_ to put header guards here. | 1 // It is important _not_ to put header guards here. |
2 // This file will be intentionally included three times. | 2 // This file will be intentionally included three times. |
3 | 3 |
4 // Useful reading: | 4 // Useful reading: |
5 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/ | 5 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/ |
6 | 6 |
7 #if defined(SK4X_PREAMBLE) | 7 #if defined(SK4X_PREAMBLE) |
8 // Code in this file may assume SSE and SSE2. | 8 // Code in this file may assume SSE and SSE2. |
9 #include <emmintrin.h> | 9 #include <emmintrin.h> |
10 | 10 |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
92 | 92 |
93 // We're going to try a little experiment here and skip allTrue(), anyTrue(), an
d bit-manipulators | 93 // We're going to try a little experiment here and skip allTrue(), anyTrue(), an
d bit-manipulators |
94 // for Sk4f. Code that calls them probably does so accidentally. | 94 // for Sk4f. Code that calls them probably does so accidentally. |
95 // Ask mtklein to fill these in if you really need them. | 95 // Ask mtklein to fill these in if you really need them. |
96 | 96 |
97 M(Sk4f) add (const Sk4f& o) const { return _mm_add_ps(fVec, o.fVec); } | 97 M(Sk4f) add (const Sk4f& o) const { return _mm_add_ps(fVec, o.fVec); } |
98 M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); } | 98 M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); } |
99 M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); } | 99 M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); } |
100 M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); } | 100 M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); } |
101 | 101 |
| 102 M(Sk4f) rsqrt() const { return _mm_rsqrt_ps(fVec); } |
| 103 M(Sk4f) sqrt() const { return _mm_sqrt_ps( fVec); } |
| 104 |
102 M(Sk4i) equal (const Sk4f& o) const { return _mm_cmpeq_ps (fVec, o.fVe
c); } | 105 M(Sk4i) equal (const Sk4f& o) const { return _mm_cmpeq_ps (fVec, o.fVe
c); } |
103 M(Sk4i) notEqual (const Sk4f& o) const { return _mm_cmpneq_ps(fVec, o.fVe
c); } | 106 M(Sk4i) notEqual (const Sk4f& o) const { return _mm_cmpneq_ps(fVec, o.fVe
c); } |
104 M(Sk4i) lessThan (const Sk4f& o) const { return _mm_cmplt_ps (fVec, o.fVe
c); } | 107 M(Sk4i) lessThan (const Sk4f& o) const { return _mm_cmplt_ps (fVec, o.fVe
c); } |
105 M(Sk4i) greaterThan (const Sk4f& o) const { return _mm_cmpgt_ps (fVec, o.fVe
c); } | 108 M(Sk4i) greaterThan (const Sk4f& o) const { return _mm_cmpgt_ps (fVec, o.fVe
c); } |
106 M(Sk4i) lessThanEqual (const Sk4f& o) const { return _mm_cmple_ps (fVec, o.fVe
c); } | 109 M(Sk4i) lessThanEqual (const Sk4f& o) const { return _mm_cmple_ps (fVec, o.fVe
c); } |
107 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return _mm_cmpge_ps (fVec, o.fVe
c); } | 110 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return _mm_cmpge_ps (fVec, o.fVe
c); } |
108 | 111 |
109 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return _mm_min_ps(a.fVec, b.fVec); } | 112 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return _mm_min_ps(a.fVec, b.fVec); } |
110 M(Sk4f) Max(const Sk4f& a, const Sk4f& b) { return _mm_max_ps(a.fVec, b.fVec); } | 113 M(Sk4f) Max(const Sk4f& a, const Sk4f& b) { return _mm_max_ps(a.fVec, b.fVec); } |
111 | 114 |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
170 } | 173 } |
171 M(Sk4i) Max(const Sk4i& a, const Sk4i& b) { | 174 M(Sk4i) Max(const Sk4i& a, const Sk4i& b) { |
172 Sk4i less = a.lessThan(b); | 175 Sk4i less = a.lessThan(b); |
173 return b.bitAnd(less).bitOr(a.andNot(less)); | 176 return b.bitAnd(less).bitOr(a.andNot(less)); |
174 } | 177 } |
175 #endif | 178 #endif |
176 | 179 |
177 #undef M | 180 #undef M |
178 | 181 |
179 #endif//Method definitions. | 182 #endif//Method definitions. |
OLD | NEW |