OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
58 return pun.fs[k&1]; | 58 return pun.fs[k&1]; |
59 } | 59 } |
60 | 60 |
61 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 61 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
62 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 62 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
63 | 63 |
64 __m128 fVec; | 64 __m128 fVec; |
65 }; | 65 }; |
66 | 66 |
67 template <> | 67 template <> |
| 68 class SkNx<2, double> { |
| 69 public: |
| 70 SkNx(const __m128d& vec) : fVec(vec) {} |
| 71 |
| 72 SkNx() {} |
| 73 SkNx(double val) : fVec(_mm_set1_pd(val)) {} |
| 74 static SkNx Load(const double vals[2]) { return _mm_loadu_pd(vals); } |
| 75 SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {} |
| 76 |
| 77 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } |
| 78 |
| 79 SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); } |
| 80 SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); } |
| 81 SkNx operator * (const SkNx& o) const { return _mm_mul_pd(fVec, o.fVec); } |
| 82 SkNx operator / (const SkNx& o) const { return _mm_div_pd(fVec, o.fVec); } |
| 83 |
| 84 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} |
| 85 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} |
| 86 SkNx operator < (const SkNx& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} |
| 87 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} |
| 88 SkNx operator <= (const SkNx& o) const { return _mm_cmple_pd (fVec, o.fVec);
} |
| 89 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_pd (fVec, o.fVec);
} |
| 90 |
| 91 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_pd(l.fVec, r.
fVec); } |
| 92 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_pd(l.fVec, r.
fVec); } |
| 93 |
| 94 SkNx sqrt() const { return _mm_sqrt_pd(fVec); } |
| 95 |
| 96 template <int k> double kth() const { |
| 97 SkASSERT(0 <= k && k < 2); |
| 98 union { __m128d v; double fs[2]; } pun = {fVec}; |
| 99 return pun.fs[k&1]; |
| 100 } |
| 101 |
| 102 bool allTrue() const { return 0x3 == _mm_movemask_pd(fVec); } |
| 103 bool anyTrue() const { return 0x0 != _mm_movemask_pd(fVec); } |
| 104 |
| 105 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
| 106 return _mm_or_pd(_mm_and_pd (fVec, t.fVec), |
| 107 _mm_andnot_pd(fVec, e.fVec)); |
| 108 } |
| 109 |
| 110 __m128d fVec; |
| 111 }; |
| 112 |
| 113 template <> |
68 class SkNx<4, int> { | 114 class SkNx<4, int> { |
69 public: | 115 public: |
70 SkNx(const __m128i& vec) : fVec(vec) {} | 116 SkNx(const __m128i& vec) : fVec(vec) {} |
71 | 117 |
72 SkNx() {} | 118 SkNx() {} |
73 SkNx(int val) : fVec(_mm_set1_epi32(val)) {} | 119 SkNx(int val) : fVec(_mm_set1_epi32(val)) {} |
74 static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } | 120 static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } |
75 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} | 121 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} |
76 | 122 |
77 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } | 123 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } |
(...skipping 250 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
328 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), | 374 _mm_packus_epi16(_mm_packus_epi16(_mm_cvttps_epi32(a.fVec), |
329 _mm_cvttps_epi32(b.fVec))
, | 375 _mm_cvttps_epi32(b.fVec))
, |
330 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), | 376 _mm_packus_epi16(_mm_cvttps_epi32(c.fVec), |
331 _mm_cvttps_epi32(d.fVec))
)); | 377 _mm_cvttps_epi32(d.fVec))
)); |
332 } | 378 } |
333 | 379 |
334 | 380 |
335 } // namespace | 381 } // namespace |
336 | 382 |
337 #endif//SkNx_sse_DEFINED | 383 #endif//SkNx_sse_DEFINED |
OLD | NEW |