| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
| 9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
| 10 | 10 |
| (...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 135 SkASSERT(0 <= k && k < 2); | 135 SkASSERT(0 <= k && k < 2); |
| 136 union { __m128d v; double ds[2]; } pun = {fVec}; | 136 union { __m128d v; double ds[2]; } pun = {fVec}; |
| 137 return pun.ds[k&1]; | 137 return pun.ds[k&1]; |
| 138 } | 138 } |
| 139 | 139 |
| 140 private: | 140 private: |
| 141 __m128d fVec; | 141 __m128d fVec; |
| 142 }; | 142 }; |
| 143 | 143 |
| 144 template <> | 144 template <> |
| 145 class SkNi<4, int> { | |
| 146 public: | |
| 147 SkNi(const __m128i& vec) : fVec(vec) {} | |
| 148 | |
| 149 SkNi() {} | |
| 150 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} | |
| 151 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } | |
| 152 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} | |
| 153 | |
| 154 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } | |
| 155 | |
| 156 SkNi operator + (const SkNi& o) const { return _mm_add_epi32(fVec, o.fVec);
} | |
| 157 SkNi operator - (const SkNi& o) const { return _mm_sub_epi32(fVec, o.fVec);
} | |
| 158 SkNi operator * (const SkNi& o) const { | |
| 159 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), | |
| 160 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); | |
| 161 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, | |
| 162 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); | |
| 163 } | |
| 164 | |
| 165 SkNi operator << (int bits) const { return _mm_slli_epi32(fVec, bits); } | |
| 166 SkNi operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); } | |
| 167 | |
| 168 template <int k> int kth() const { | |
| 169 SkASSERT(0 <= k && k < 4); | |
| 170 switch (k) { | |
| 171 case 0: return _mm_cvtsi128_si32(fVec); | |
| 172 case 1: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 4)); | |
| 173 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); | |
| 174 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); | |
| 175 default: SkASSERT(false); return 0; | |
| 176 } | |
| 177 } | |
| 178 protected: | |
| 179 __m128i fVec; | |
| 180 }; | |
| 181 | |
| 182 template <> | |
| 183 class SkNf<4, float> { | 145 class SkNf<4, float> { |
| 184 typedef SkNb<4, 4> Nb; | 146 typedef SkNb<4, 4> Nb; |
| 185 public: | 147 public: |
| 186 SkNf(const __m128& vec) : fVec(vec) {} | 148 SkNf(const __m128& vec) : fVec(vec) {} |
| 187 | 149 |
| 188 SkNf() {} | 150 SkNf() {} |
| 189 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} | 151 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
| 190 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 152 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
| 191 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 153 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
| 192 | 154 |
| 193 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 155 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
| 194 | 156 |
| 195 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } | |
| 196 | |
| 197 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 157 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
| 198 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 158 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
| 199 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 159 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
| 200 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 160 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
| 201 | 161 |
| 202 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } | 162 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } |
| 203 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } | 163 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } |
| 204 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } | 164 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } |
| 205 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } | 165 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } |
| 206 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } | 166 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 274 | 234 |
| 275 template <int k> uint16_t kth() const { | 235 template <int k> uint16_t kth() const { |
| 276 SkASSERT(0 <= k && k < 8); | 236 SkASSERT(0 <= k && k < 8); |
| 277 return _mm_extract_epi16(fVec, k); | 237 return _mm_extract_epi16(fVec, k); |
| 278 } | 238 } |
| 279 protected: | 239 protected: |
| 280 __m128i fVec; | 240 __m128i fVec; |
| 281 }; | 241 }; |
| 282 | 242 |
| 283 #endif//SkNx_sse_DEFINED | 243 #endif//SkNx_sse_DEFINED |
| OLD | NEW |