OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 45 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
46 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 46 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
47 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 47 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
48 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 48 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
49 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 49 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} |
50 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} | 50 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} |
51 | 51 |
52 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 52 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
53 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 53 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
54 | 54 |
55 SkNx sqrt () const { return _mm_sqrt_ps (fVec); } | 55 SkNx sqrt() const { return _mm_sqrt_ps (fVec); } |
56 SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); } | 56 SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } |
57 SkNx rsqrt1() const { return this->rsqrt0(); } | 57 SkNx invert() const { return _mm_rcp_ps(fVec); } |
58 SkNx rsqrt2() const { return this->rsqrt1(); } | |
59 | |
60 SkNx invert() const { return SkNx(1) / *this; } | |
61 SkNx approxInvert() const { return _mm_rcp_ps(fVec); } | |
62 | 58 |
63 float operator[](int k) const { | 59 float operator[](int k) const { |
64 SkASSERT(0 <= k && k < 2); | 60 SkASSERT(0 <= k && k < 2); |
65 union { __m128 v; float fs[4]; } pun = {fVec}; | 61 union { __m128 v; float fs[4]; } pun = {fVec}; |
66 return pun.fs[k&1]; | 62 return pun.fs[k&1]; |
67 } | 63 } |
68 | 64 |
69 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 65 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
70 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 66 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
71 | 67 |
(...skipping 24 matching lines...) Expand all Loading... |
96 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 92 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
97 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 93 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec);
} |
98 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} | 94 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec);
} |
99 | 95 |
100 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 96 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
101 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 97 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
102 | 98 |
103 SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } | 99 SkNx abs() const { return _mm_andnot_ps(_mm_set1_ps(-0.0f), fVec); } |
104 SkNx floor() const { return sse2_mm_floor_ps(fVec); } | 100 SkNx floor() const { return sse2_mm_floor_ps(fVec); } |
105 | 101 |
106 SkNx sqrt () const { return _mm_sqrt_ps (fVec); } | 102 SkNx sqrt() const { return _mm_sqrt_ps (fVec); } |
107 SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); } | 103 SkNx rsqrt() const { return _mm_rsqrt_ps(fVec); } |
108 SkNx rsqrt1() const { return this->rsqrt0(); } | 104 SkNx invert() const { return _mm_rcp_ps(fVec); } |
109 SkNx rsqrt2() const { return this->rsqrt1(); } | |
110 | |
111 SkNx invert() const { return SkNx(1) / *this; } | |
112 SkNx approxInvert() const { return _mm_rcp_ps(fVec); } | |
113 | 105 |
114 float operator[](int k) const { | 106 float operator[](int k) const { |
115 SkASSERT(0 <= k && k < 4); | 107 SkASSERT(0 <= k && k < 4); |
116 union { __m128 v; float fs[4]; } pun = {fVec}; | 108 union { __m128 v; float fs[4]; } pun = {fVec}; |
117 return pun.fs[k&3]; | 109 return pun.fs[k&3]; |
118 } | 110 } |
119 | 111 |
120 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } | 112 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } |
121 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } | 113 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f
Vec)); } |
122 | 114 |
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
339 _32 = _mm_unpacklo_epi16(_16, _mm_setzero_si128()); | 331 _32 = _mm_unpacklo_epi16(_16, _mm_setzero_si128()); |
340 #endif | 332 #endif |
341 return _mm_cvtepi32_ps(_32); | 333 return _mm_cvtepi32_ps(_32); |
342 } | 334 } |
343 | 335 |
344 template<> /*static*/ inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { | 336 template<> /*static*/ inline Sk4f SkNx_cast<float, uint16_t>(const Sk4h& src) { |
345 auto _32 = _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); | 337 auto _32 = _mm_unpacklo_epi16(src.fVec, _mm_setzero_si128()); |
346 return _mm_cvtepi32_ps(_32); | 338 return _mm_cvtepi32_ps(_32); |
347 } | 339 } |
348 | 340 |
349 static inline void Sk4f_ToBytes(uint8_t bytes[16], | 341 template<> /*static*/ inline Sk16b SkNx_cast<uint8_t, float>(const Sk16f& src) { |
350 const Sk4f& a, const Sk4f& b, const Sk4f& c, con
st Sk4f& d) { | 342 Sk8f ab, cd; |
351 _mm_storeu_si128((__m128i*)bytes, | 343 SkNx_split(src, &ab, &cd); |
352 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), | 344 |
353 _mm_cvttps_epi32(b.fVec))
, | 345 Sk4f a,b,c,d; |
354 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), | 346 SkNx_split(ab, &a, &b); |
355 _mm_cvttps_epi32(d.fVec))
)); | 347 SkNx_split(cd, &c, &d); |
| 348 |
| 349 return _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), |
| 350 _mm_cvttps_epi32(b.fVec)), |
| 351 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), |
| 352 _mm_cvttps_epi32(d.fVec))); |
356 } | 353 } |
357 | 354 |
358 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src)
{ | 355 template<> /*static*/ inline Sk4h SkNx_cast<uint16_t, uint8_t>(const Sk4b& src)
{ |
359 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); | 356 return _mm_unpacklo_epi8(src.fVec, _mm_setzero_si128()); |
360 } | 357 } |
361 | 358 |
362 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src)
{ | 359 template<> /*static*/ inline Sk4b SkNx_cast<uint8_t, uint16_t>(const Sk4h& src)
{ |
363 return _mm_packus_epi16(src.fVec, src.fVec); | 360 return _mm_packus_epi16(src.fVec, src.fVec); |
364 } | 361 } |
365 | 362 |
366 #endif//SkNx_sse_DEFINED | 363 #endif//SkNx_sse_DEFINED |
OLD | NEW |