OLD | NEW |
1 // It is important _not_ to put header guards here. | 1 // It is important _not_ to put header guards here. |
2 // This file will be intentionally included three times. | 2 // This file will be intentionally included three times. |
3 | 3 |
4 // Useful reading: | 4 // Useful reading: |
5 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/ | 5 // https://software.intel.com/sites/landingpage/IntrinsicsGuide/ |
6 | 6 |
7 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362 | 7 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362 |
8 | 8 |
9 #if defined(SK4X_PREAMBLE) | 9 #if defined(SK4X_PREAMBLE) |
10 // Code in this file may assume SSE and SSE2. | 10 // Code in this file may assume SSE and SSE2. |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
79 | 79 |
80 M() Sk4x(float v) : fVec(_mm_set1_ps(v)) {} | 80 M() Sk4x(float v) : fVec(_mm_set1_ps(v)) {} |
81 M() Sk4x(float a, float b, float c, float d) : fVec(_mm_set_ps(d,c,b,a)) {} | 81 M() Sk4x(float a, float b, float c, float d) : fVec(_mm_set_ps(d,c,b,a)) {} |
82 | 82 |
83 M(Sk4f) Load (const float fs[4]) { return _mm_loadu_ps(fs); } | 83 M(Sk4f) Load (const float fs[4]) { return _mm_loadu_ps(fs); } |
84 M(Sk4f) LoadAligned(const float fs[4]) { return _mm_load_ps (fs); } | 84 M(Sk4f) LoadAligned(const float fs[4]) { return _mm_load_ps (fs); } |
85 | 85 |
86 M(void) store (float fs[4]) const { _mm_storeu_ps(fs, fVec); } | 86 M(void) store (float fs[4]) const { _mm_storeu_ps(fs, fVec); } |
87 M(void) storeAligned(float fs[4]) const { _mm_store_ps (fs, fVec); } | 87 M(void) storeAligned(float fs[4]) const { _mm_store_ps (fs, fVec); } |
88 | 88 |
89 template <> | 89 template <> M(Sk4i) reinterpret<Sk4i>() const { return as_4i(fVec); } |
90 M(Sk4i) reinterpret<Sk4i>() const { return as_4i(fVec); } | |
91 | 90 |
92 template <> | 91 // cvttps truncates, same as (int) when positive. |
93 M(Sk4i) cast<Sk4i>() const { return _mm_cvtps_epi32(fVec); } | 92 template <> M(Sk4i) cast<Sk4i>() const { return _mm_cvttps_epi32(fVec); } |
94 | 93 |
95 // We're going to try a little experiment here and skip allTrue(), anyTrue(), an
d bit-manipulators | 94 // We're going to try a little experiment here and skip allTrue(), anyTrue(), an
d bit-manipulators |
96 // for Sk4f. Code that calls them probably does so accidentally. | 95 // for Sk4f. Code that calls them probably does so accidentally. |
97 // Ask mtklein to fill these in if you really need them. | 96 // Ask mtklein to fill these in if you really need them. |
98 | 97 |
99 M(Sk4f) add (const Sk4f& o) const { return _mm_add_ps(fVec, o.fVec); } | 98 M(Sk4f) add (const Sk4f& o) const { return _mm_add_ps(fVec, o.fVec); } |
100 M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); } | 99 M(Sk4f) subtract(const Sk4f& o) const { return _mm_sub_ps(fVec, o.fVec); } |
101 M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); } | 100 M(Sk4f) multiply(const Sk4f& o) const { return _mm_mul_ps(fVec, o.fVec); } |
102 M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); } | 101 M(Sk4f) divide (const Sk4f& o) const { return _mm_div_ps(fVec, o.fVec); } |
103 | 102 |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
175 } | 174 } |
176 M(Sk4i) Max(const Sk4i& a, const Sk4i& b) { | 175 M(Sk4i) Max(const Sk4i& a, const Sk4i& b) { |
177 Sk4i less = a.lessThan(b); | 176 Sk4i less = a.lessThan(b); |
178 return b.bitAnd(less).bitOr(a.andNot(less)); | 177 return b.bitAnd(less).bitOr(a.andNot(less)); |
179 } | 178 } |
180 #endif | 179 #endif |
181 | 180 |
182 #undef M | 181 #undef M |
183 | 182 |
184 #endif//Method definitions. | 183 #endif//Method definitions. |
OLD | NEW |