OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. | 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. |
12 #include <immintrin.h> | 12 #include <immintrin.h> |
13 | 13 |
14 template <> | 14 template <> |
15 class SkNi<2, int32_t> { | 15 class SkNb<2, 4> { |
16 public: | 16 public: |
17 SkNi(const __m128i& vec) : fVec(vec) {} | 17 SkNb(const __m128i& vec) : fVec(vec) {} |
18 | 18 |
19 SkNi() {} | 19 SkNb() {} |
20 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } | 20 bool allTrue() const { return 0xff == (_mm_movemask_epi8(fVec) & 0xff); } |
21 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } | 21 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(fVec) & 0xff); } |
22 | 22 |
23 private: | 23 private: |
24 __m128i fVec; | 24 __m128i fVec; |
25 }; | 25 }; |
26 | 26 |
27 template <> | 27 template <> |
28 class SkNi<4, int32_t> { | 28 class SkNb<4, 4> { |
29 public: | 29 public: |
30 SkNi(const __m128i& vec) : fVec(vec) {} | 30 SkNb(const __m128i& vec) : fVec(vec) {} |
31 | 31 |
32 SkNi() {} | 32 SkNb() {} |
33 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } | 33 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } |
34 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } | 34 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } |
35 | 35 |
36 private: | 36 private: |
37 __m128i fVec; | 37 __m128i fVec; |
38 }; | 38 }; |
39 | 39 |
40 template <> | 40 template <> |
41 class SkNi<2, int64_t> { | 41 class SkNb<2, 8> { |
42 public: | 42 public: |
43 SkNi(const __m128i& vec) : fVec(vec) {} | 43 SkNb(const __m128i& vec) : fVec(vec) {} |
44 | 44 |
45 SkNi() {} | 45 SkNb() {} |
46 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } | 46 bool allTrue() const { return 0xffff == _mm_movemask_epi8(fVec); } |
47 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } | 47 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(fVec); } |
48 | 48 |
49 private: | 49 private: |
50 __m128i fVec; | 50 __m128i fVec; |
51 }; | 51 }; |
52 | 52 |
53 | 53 |
54 template <> | 54 template <> |
55 class SkNf<2, float> { | 55 class SkNf<2, float> { |
56 typedef SkNi<2, int32_t> Ni; | 56 typedef SkNb<2, 4> Nb; |
57 public: | 57 public: |
58 SkNf(const __m128& vec) : fVec(vec) {} | 58 SkNf(const __m128& vec) : fVec(vec) {} |
59 | 59 |
60 SkNf() {} | 60 SkNf() {} |
61 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} | 61 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} |
62 static SkNf Load(const float vals[2]) { | 62 static SkNf Load(const float vals[2]) { |
63 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); | 63 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); |
64 } | 64 } |
65 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} | 65 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} |
66 | 66 |
67 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } | 67 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } |
68 | 68 |
69 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 69 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
70 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 70 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
71 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 71 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
72 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 72 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
73 | 73 |
74 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } | 74 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } |
75 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } | 75 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } |
76 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } | 76 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } |
77 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } | 77 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } |
78 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } | 78 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } |
79 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps
(fVec, o.fVec)); } | 79 Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps
(fVec, o.fVec)); } |
80 | 80 |
81 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 81 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
82 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 82 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
83 | 83 |
84 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } | 84 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } |
85 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } | 85 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } |
86 | 86 |
87 SkNf invert() const { return SkNf(1) / *this; } | 87 SkNf invert() const { return SkNf(1) / *this; } |
88 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } | 88 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } |
89 | 89 |
90 template <int k> float kth() const { | 90 template <int k> float kth() const { |
91 SkASSERT(0 <= k && k < 2); | 91 SkASSERT(0 <= k && k < 2); |
92 union { __m128 v; float fs[4]; } pun = {fVec}; | 92 union { __m128 v; float fs[4]; } pun = {fVec}; |
93 return pun.fs[k&1]; | 93 return pun.fs[k&1]; |
94 } | 94 } |
95 | 95 |
96 private: | 96 private: |
97 __m128 fVec; | 97 __m128 fVec; |
98 }; | 98 }; |
99 | 99 |
100 template <> | 100 template <> |
101 class SkNf<2, double> { | 101 class SkNf<2, double> { |
102 typedef SkNi<2, int64_t> Ni; | 102 typedef SkNb<2, 8> Nb; |
103 public: | 103 public: |
104 SkNf(const __m128d& vec) : fVec(vec) {} | 104 SkNf(const __m128d& vec) : fVec(vec) {} |
105 | 105 |
106 SkNf() {} | 106 SkNf() {} |
107 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} | 107 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} |
108 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } | 108 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } |
109 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} | 109 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} |
110 | 110 |
111 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } | 111 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } |
112 | 112 |
113 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } | 113 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } |
114 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } | 114 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } |
115 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } | 115 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } |
116 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } | 116 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } |
117 | 117 |
118 Ni operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd
(fVec, o.fVec)); } | 118 Nb operator == (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpeq_pd
(fVec, o.fVec)); } |
119 Ni operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd
(fVec, o.fVec)); } | 119 Nb operator != (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpneq_pd
(fVec, o.fVec)); } |
120 Ni operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd
(fVec, o.fVec)); } | 120 Nb operator < (const SkNf& o) const { return _mm_castpd_si128(_mm_cmplt_pd
(fVec, o.fVec)); } |
121 Ni operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd
(fVec, o.fVec)); } | 121 Nb operator > (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpgt_pd
(fVec, o.fVec)); } |
122 Ni operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd
(fVec, o.fVec)); } | 122 Nb operator <= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmple_pd
(fVec, o.fVec)); } |
123 Ni operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd
(fVec, o.fVec)); } | 123 Nb operator >= (const SkNf& o) const { return _mm_castpd_si128(_mm_cmpge_pd
(fVec, o.fVec)); } |
124 | 124 |
125 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.
fVec); } | 125 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.
fVec); } |
126 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.
fVec); } | 126 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.
fVec); } |
127 | 127 |
128 SkNf sqrt() const { return _mm_sqrt_pd(fVec); } | 128 SkNf sqrt() const { return _mm_sqrt_pd(fVec); } |
129 SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec)));
} | 129 SkNf rsqrt() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec)));
} |
130 | 130 |
131 SkNf invert() const { return SkNf(1) / *this; } | 131 SkNf invert() const { return SkNf(1) / *this; } |
132 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec
))); } | 132 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec
))); } |
133 | 133 |
134 template <int k> double kth() const { | 134 template <int k> double kth() const { |
135 SkASSERT(0 <= k && k < 2); | 135 SkASSERT(0 <= k && k < 2); |
136 union { __m128d v; double ds[2]; } pun = {fVec}; | 136 union { __m128d v; double ds[2]; } pun = {fVec}; |
137 return pun.ds[k&1]; | 137 return pun.ds[k&1]; |
138 } | 138 } |
139 | 139 |
140 private: | 140 private: |
141 __m128d fVec; | 141 __m128d fVec; |
142 }; | 142 }; |
143 | 143 |
144 template <> | 144 template <> |
145 class SkNf<4, float> { | 145 class SkNf<4, float> { |
146 typedef SkNi<4, int32_t> Ni; | 146 typedef SkNb<4, 4> Nb; |
147 public: | 147 public: |
148 SkNf(const __m128& vec) : fVec(vec) {} | 148 SkNf(const __m128& vec) : fVec(vec) {} |
149 | 149 |
150 SkNf() {} | 150 SkNf() {} |
151 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} | 151 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
152 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 152 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
153 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 153 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
154 | 154 |
155 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 155 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
156 | 156 |
157 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 157 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
158 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 158 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
159 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 159 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
160 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 160 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
161 | 161 |
162 Ni operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } | 162 Nb operator == (const SkNf& o) const { return _mm_castps_si128(_mm_cmpeq_ps
(fVec, o.fVec)); } |
163 Ni operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } | 163 Nb operator != (const SkNf& o) const { return _mm_castps_si128(_mm_cmpneq_ps
(fVec, o.fVec)); } |
164 Ni operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } | 164 Nb operator < (const SkNf& o) const { return _mm_castps_si128(_mm_cmplt_ps
(fVec, o.fVec)); } |
165 Ni operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } | 165 Nb operator > (const SkNf& o) const { return _mm_castps_si128(_mm_cmpgt_ps
(fVec, o.fVec)); } |
166 Ni operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } | 166 Nb operator <= (const SkNf& o) const { return _mm_castps_si128(_mm_cmple_ps
(fVec, o.fVec)); } |
167 Ni operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps
(fVec, o.fVec)); } | 167 Nb operator >= (const SkNf& o) const { return _mm_castps_si128(_mm_cmpge_ps
(fVec, o.fVec)); } |
168 | 168 |
169 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } | 169 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r.
fVec); } |
170 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } | 170 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r.
fVec); } |
171 | 171 |
172 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } | 172 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } |
173 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } | 173 SkNf rsqrt() const { return _mm_rsqrt_ps(fVec); } |
174 | 174 |
175 SkNf invert() const { return SkNf(1) / *this; } | 175 SkNf invert() const { return SkNf(1) / *this; } |
176 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } | 176 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } |
177 | 177 |
178 template <int k> float kth() const { | 178 template <int k> float kth() const { |
179 SkASSERT(0 <= k && k < 4); | 179 SkASSERT(0 <= k && k < 4); |
180 union { __m128 v; float fs[4]; } pun = {fVec}; | 180 union { __m128 v; float fs[4]; } pun = {fVec}; |
181 return pun.fs[k&3]; | 181 return pun.fs[k&3]; |
182 } | 182 } |
183 | 183 |
184 protected: | 184 protected: |
185 __m128 fVec; | 185 __m128 fVec; |
186 }; | 186 }; |
187 | 187 |
188 | 188 |
189 #endif//SkNx_sse_DEFINED | 189 #endif//SkNx_sse_DEFINED |
OLD | NEW |