| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
| 9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
| 10 | 10 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 45 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 45 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
| 46 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 46 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
| 47 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 47 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
| 48 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 48 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
| 49 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 49 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} |
| 50 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} | 50 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} |
| 51 | 51 |
| 52 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 52 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
| 53 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 53 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
| 54 | 54 |
| 55 SkNx sqrt () const { return _mm_sqrt_ps (fVec); } | 55 SkNx sqrt() const { return _mm_sqrt_ps (fVec); } |
| 56 SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); } | 56 SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } |
| 57 SkNx rsqrt1() const { return this->rsqrt0(); } | 57 SkNx invert() const { return _mm_rcp_ps(fVec); } |
| 58 SkNx rsqrt2() const { return this->rsqrt1(); } | |
| 59 | |
| 60 SkNx invert() const { return SkNx(1) / *this; } | |
| 61 SkNx approxInvert() const { return _mm_rcp_ps(fVec); } | |
| 62 | 58 |
| 63 float operator[](int k) const { | 59 float operator[](int k) const { |
| 64 SkASSERT(0 <= k && k < 2); | 60 SkASSERT(0 <= k && k < 2); |
| 65 union { __m128 v; float fs[4]; } pun = {fVec}; | 61 union { __m128 v; float fs[4]; } pun = {fVec}; |
| 66 return pun.fs[k&1]; | 62 return pun.fs[k&1]; |
| 67 } | 63 } |
| 68 | 64 |
| 69 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 65 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
| 70 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 66 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
| 71 | 67 |
| (...skipping 24 matching lines...) Expand all Loading... |
| 96 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 92 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
| 97 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 93 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} |
| 98 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} | 94 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} |
| 99 | 95 |
| 100 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 96 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
| 101 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 97 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
| 102 | 98 |
| 103 SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } | 99 SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } |
| 104 SkNx floor() const { return sse2_mm_floor_ps(fVec); } | 100 SkNx floor() const { return sse2_mm_floor_ps(fVec); } |
| 105 | 101 |
| 106 SkNx sqrt () const { return _mm_sqrt_ps (fVec); } | 102 SkNx sqrt() const { return _mm_sqrt_ps (fVec); } |
| 107 SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); } | 103 SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } |
| 108 SkNx rsqrt1() const { return this->rsqrt0(); } | 104 SkNx invert() const { return _mm_rcp_ps(fVec); } |
| 109 SkNx rsqrt2() const { return this->rsqrt1(); } | |
| 110 | |
| 111 SkNx invert() const { return SkNx(1) / *this; } | |
| 112 SkNx approxInvert() const { return _mm_rcp_ps(fVec); } | |
| 113 | 105 |
| 114 float operator[](int k) const { | 106 float operator[](int k) const { |
| 115 SkASSERT(0 <= k && k < 4); | 107 SkASSERT(0 <= k && k < 4); |
| 116 union { __m128 v; float fs[4]; } pun = {fVec}; | 108 union { __m128 v; float fs[4]; } pun = {fVec}; |
| 117 return pun.fs[k&3]; | 109 return pun.fs[k&3]; |
| 118 } | 110 } |
| 119 | 111 |
| 120 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } | 112 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } |
| 121 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } | 113 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } |
| 122 | 114 |
| (...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 339 _32 = _mm_unpacklo_epi16(_16, _mm_setzero_si128()); | 331 _32 = _mm_unpacklo_epi16(_16, _mm_setzero_si128()); |
| 340 #endif | 332 #endif |
| 341 return _mm_cvtepi32_ps(_32); | 333 return _mm_cvtepi32_ps(_32); |
| 342 } | 334 } |
| 343 | 335 |
| 344 template<> /*static*/ inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { | 336 template<> /*static*/ inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { |
| 345 auto _32 = _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); | 337 auto _32 = _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); |
| 346 return _mm_cvtepi32_ps(_32); | 338 return _mm_cvtepi32_ps(_32); |
| 347 } | 339 } |
| 348 | 340 |
| 349 static inline void Sk4f_ToBytes(uint8_t bytes[16], | 341 template<> /*static*/ inline Sk16b SkNx_cast<uint8_t, float>(const Sk16f& src) { |
| 350 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { | 342 Sk8f ab, cd; |
| 351 _mm_storeu_si128((__m128i*)bytes, | 343 SkNx_split(src, &ab, &cd); |
| 352 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), | 344 |
| 353 _mm_cvttps_epi32(b.fVec))
, | 345 Sk4f a,b,c,d; |
| 354 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), | 346 SkNx_split(ab, &a, &b); |
| 355 _mm_cvttps_epi32(d.fVec))
)); | 347 SkNx_split(cd, &c, &d); |
| 348 |
| 349 return _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), |
| 350 _mm_cvttps_epi32(b.fVec)), |
| 351 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), |
| 352 _mm_cvttps_epi32(d.fVec))); |
| 356 } | 353 } |
| 357 | 354 |
| 358 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src)
{ | 355 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src)
{ |
| 359 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); | 356 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); |
| 360 } | 357 } |
| 361 | 358 |
| 362 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src)
{ | 359 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src)
{ |
| 363 return _mm_packus_epi16(src.fVec, src.fVec); | 360 return _mm_packus_epi16(src.fVec, src.fVec); |
| 364 } | 361 } |
| 365 | 362 |
| 366 #endif//SkNx_sse_DEFINED | 363 #endif//SkNx_sse_DEFINED |
| OLD | NEW |