OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
10 | 10 |
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
148 }; | 148 }; |
149 | 149 |
150 template <> | 150 template <> |
151 class SkNf<4, float> { | 151 class SkNf<4, float> { |
152 public: | 152 public: |
153 SkNf(const __m128& vec) : fVec(vec) {} | 153 SkNf(const __m128& vec) : fVec(vec) {} |
154 | 154 |
155 SkNf() {} | 155 SkNf() {} |
156 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} | 156 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
157 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 157 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
| 158 |
| 159 static SkNf FromBytes(const uint8_t bytes[4]) { |
| 160 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); |
| 161 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| 162 const char _ = ~0; // Zero these bytes. |
| 163 __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_,
2,_,_,_, 3,_,_,_)); |
| 164 #else |
| 165 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), |
| 166 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); |
| 167 #endif |
| 168 return SkNf(_mm_cvtepi32_ps(fix8_32)); |
| 169 // TODO: use _mm_cvtepu8_epi32 w/SSE4.1? |
| 170 } |
| 171 |
158 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 172 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
159 | 173 |
160 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 174 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
| 175 void toBytes(uint8_t bytes[4]) const { |
| 176 __m128i fix8_32 = _mm_cvttps_epi32(fVec), |
| 177 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), |
| 178 fix8 = _mm_packus_epi16(fix8_16, fix8_16); |
| 179 *(int*)bytes = _mm_cvtsi128_si32(fix8); |
| 180 // TODO: use _mm_shuffle_epi8 w/SSSE3? |
| 181 } |
161 | 182 |
162 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } | 183 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } |
163 | 184 |
164 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 185 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
165 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 186 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
166 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 187 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
167 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 188 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
168 | 189 |
169 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 190 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
170 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 191 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
(...skipping 138 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
309 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), | 330 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), |
310 _mm_andnot_si128(fVec, e.fVec)); | 331 _mm_andnot_si128(fVec, e.fVec)); |
311 } | 332 } |
312 | 333 |
313 __m128i fVec; | 334 __m128i fVec; |
314 }; | 335 }; |
315 | 336 |
316 } // namespace | 337 } // namespace |
317 | 338 |
318 #endif//SkNx_sse_DEFINED | 339 #endif//SkNx_sse_DEFINED |
OLD | NEW |