OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. | 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. |
12 | 12 |
13 namespace { // See SkNx.h | 13 namespace { // See SkNx.h |
14 | 14 |
15 | 15 |
16 template <> | 16 template <> |
17 class SkNf<2, float> { | 17 class SkNf<2> { |
18 public: | 18 public: |
19 SkNf(const __m128& vec) : fVec(vec) {} | 19 SkNf(const __m128& vec) : fVec(vec) {} |
20 | 20 |
21 SkNf() {} | 21 SkNf() {} |
22 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} | 22 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} |
23 static SkNf Load(const float vals[2]) { | 23 static SkNf Load(const float vals[2]) { |
24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); | 24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); |
25 } | 25 } |
26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} | 26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} |
27 | 27 |
(...skipping 28 matching lines...) Expand all Loading... |
56 return pun.fs[k&1]; | 56 return pun.fs[k&1]; |
57 } | 57 } |
58 | 58 |
59 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 59 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
60 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 60 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
61 | 61 |
62 __m128 fVec; | 62 __m128 fVec; |
63 }; | 63 }; |
64 | 64 |
65 template <> | 65 template <> |
66 class SkNf<2, double> { | |
67 public: | |
68 SkNf(const __m128d& vec) : fVec(vec) {} | |
69 | |
70 SkNf() {} | |
71 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} | |
72 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } | |
73 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} | |
74 | |
75 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } | |
76 | |
77 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } | |
78 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } | |
79 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } | |
80 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } | |
81 | |
82 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} | |
83 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} | |
84 SkNf operator < (const SkNf& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} | |
85 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} | |
86 SkNf operator <= (const SkNf& o) const { return _mm_cmple_pd (fVec, o.fVec);
} | |
87 SkNf operator >= (const SkNf& o) const { return _mm_cmpge_pd (fVec, o.fVec);
} | |
88 | |
89 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.
fVec); } | |
90 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.
fVec); } | |
91 | |
92 SkNf sqrt() const { return _mm_sqrt_pd(fVec); } | |
93 SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec)));
} | |
94 SkNf rsqrt1() const { return this->rsqrt0(); } | |
95 SkNf rsqrt2() const { return this->rsqrt1(); } | |
96 | |
97 SkNf invert() const { return SkNf(1) / *this; } | |
98 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec
))); } | |
99 | |
100 template <int k> double kth() const { | |
101 SkASSERT(0 <= k && k < 2); | |
102 union { __m128d v; double ds[2]; } pun = {fVec}; | |
103 return pun.ds[k&1]; | |
104 } | |
105 | |
106 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castpd_si128(f
Vec)); } | |
107 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castpd_si128(f
Vec)); } | |
108 | |
109 __m128d fVec; | |
110 }; | |
111 | |
112 template <> | |
113 class SkNi<4, int> { | 66 class SkNi<4, int> { |
114 public: | 67 public: |
115 SkNi(const __m128i& vec) : fVec(vec) {} | 68 SkNi(const __m128i& vec) : fVec(vec) {} |
116 | 69 |
117 SkNi() {} | 70 SkNi() {} |
118 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} | 71 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} |
119 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } | 72 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } |
120 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} | 73 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} |
121 | 74 |
122 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } | 75 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } |
(...skipping 18 matching lines...) Expand all Loading... |
141 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); | 94 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); |
142 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); | 95 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); |
143 default: SkASSERT(false); return 0; | 96 default: SkASSERT(false); return 0; |
144 } | 97 } |
145 } | 98 } |
146 | 99 |
147 __m128i fVec; | 100 __m128i fVec; |
148 }; | 101 }; |
149 | 102 |
150 template <> | 103 template <> |
151 class SkNf<4, float> { | 104 class SkNf<4> { |
152 public: | 105 public: |
153 SkNf(const __m128& vec) : fVec(vec) {} | 106 SkNf(const __m128& vec) : fVec(vec) {} |
154 | 107 |
155 SkNf() {} | 108 SkNf() {} |
156 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} | 109 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
157 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 110 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
158 | 111 |
159 static SkNf FromBytes(const uint8_t bytes[4]) { | 112 static SkNf FromBytes(const uint8_t bytes[4]) { |
160 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); | 113 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); |
161 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 114 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
(...skipping 10 matching lines...) Expand all Loading... |
172 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 125 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
173 | 126 |
174 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 127 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
175 void toBytes(uint8_t bytes[4]) const { | 128 void toBytes(uint8_t bytes[4]) const { |
176 __m128i fix8_32 = _mm_cvttps_epi32(fVec), | 129 __m128i fix8_32 = _mm_cvttps_epi32(fVec), |
177 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), | 130 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), |
178 fix8 = _mm_packus_epi16(fix8_16, fix8_16); | 131 fix8 = _mm_packus_epi16(fix8_16, fix8_16); |
179 *(int*)bytes = _mm_cvtsi128_si32(fix8); | 132 *(int*)bytes = _mm_cvtsi128_si32(fix8); |
180 } | 133 } |
181 | 134 |
182 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } | |
183 | |
184 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 135 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
185 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 136 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
186 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 137 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
187 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 138 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
188 | 139 |
189 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 140 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
190 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 141 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
191 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 142 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
192 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 143 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
193 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 144 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec);
} |
(...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
329 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), | 280 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), |
330 _mm_andnot_si128(fVec, e.fVec)); | 281 _mm_andnot_si128(fVec, e.fVec)); |
331 } | 282 } |
332 | 283 |
333 __m128i fVec; | 284 __m128i fVec; |
334 }; | 285 }; |
335 | 286 |
336 } // namespace | 287 } // namespace |
337 | 288 |
338 #endif//SkNx_sse_DEFINED | 289 #endif//SkNx_sse_DEFINED |
OLD | NEW |