| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
| 9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
| 10 | 10 |
| 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. | 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. |
| 12 | 12 |
| 13 #define SKNX_IS_FAST | 13 #define SKNX_IS_FAST |
| 14 | 14 |
| 15 namespace { // See SkNx.h | 15 namespace { // See SkNx.h |
| 16 | 16 |
| 17 | 17 |
| 18 template <> | 18 template <> |
| 19 class SkNx<2, float> { | 19 class SkNx<2, float> { |
| 20 public: | 20 public: |
| 21 SkNx(const __m128& vec) : fVec(vec) {} | 21 SkNx(const __m128& vec) : fVec(vec) {} |
| 22 | 22 |
| 23 SkNx() {} | 23 SkNx() {} |
| 24 SkNx(float val) : fVec(_mm_set1_ps(val)) {} | 24 SkNx(float val) : fVec(_mm_set1_ps(val)) {} |
| 25 static SkNx Load(const float vals[2]) { | 25 static SkNx Load(const void* ptr) { |
| 26 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); | 26 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)ptr)); |
| 27 } | 27 } |
| 28 SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} | 28 SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} |
| 29 | 29 |
| 30 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } | 30 void store(void* ptr) const { _mm_storel_pi((__m64*)ptr, fVec); } |
| 31 | 31 |
| 32 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } | 32 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } |
| 33 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } | 33 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } |
| 34 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } | 34 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } |
| 35 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } | 35 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } |
| 36 | 36 |
| 37 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 37 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
| 38 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 38 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
| 39 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 39 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
| 40 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 40 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
| (...skipping 23 matching lines...) Expand all Loading... |
| 64 __m128 fVec; | 64 __m128 fVec; |
| 65 }; | 65 }; |
| 66 | 66 |
| 67 template <> | 67 template <> |
| 68 class SkNx<2, double> { | 68 class SkNx<2, double> { |
| 69 public: | 69 public: |
| 70 SkNx(const __m128d& vec) : fVec(vec) {} | 70 SkNx(const __m128d& vec) : fVec(vec) {} |
| 71 | 71 |
| 72 SkNx() {} | 72 SkNx() {} |
| 73 SkNx(double val) : fVec(_mm_set1_pd(val)) {} | 73 SkNx(double val) : fVec(_mm_set1_pd(val)) {} |
| 74 static SkNx Load(const double vals[2]) { return _mm_loadu_pd(vals); } | 74 static SkNx Load(const void* ptr) { return _mm_loadu_pd((const double*)ptr);
} |
| 75 SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {} | 75 SkNx(double a, double b) : fVec(_mm_setr_pd(a,b)) {} |
| 76 | 76 |
| 77 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } | 77 void store(void* ptr) const { _mm_storeu_pd((double*)ptr, fVec); } |
| 78 | 78 |
| 79 SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); } | 79 SkNx operator + (const SkNx& o) const { return _mm_add_pd(fVec, o.fVec); } |
| 80 SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); } | 80 SkNx operator - (const SkNx& o) const { return _mm_sub_pd(fVec, o.fVec); } |
| 81 SkNx operator * (const SkNx& o) const { return _mm_mul_pd(fVec, o.fVec); } | 81 SkNx operator * (const SkNx& o) const { return _mm_mul_pd(fVec, o.fVec); } |
| 82 SkNx operator / (const SkNx& o) const { return _mm_div_pd(fVec, o.fVec); } | 82 SkNx operator / (const SkNx& o) const { return _mm_div_pd(fVec, o.fVec); } |
| 83 | 83 |
| 84 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} | 84 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} |
| 85 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} | 85 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} |
| 86 SkNx operator < (const SkNx& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} | 86 SkNx operator < (const SkNx& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} |
| 87 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} | 87 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} |
| (...skipping 22 matching lines...) Expand all Loading... |
| 110 __m128d fVec; | 110 __m128d fVec; |
| 111 }; | 111 }; |
| 112 | 112 |
| 113 template <> | 113 template <> |
| 114 class SkNx<4, int> { | 114 class SkNx<4, int> { |
| 115 public: | 115 public: |
| 116 SkNx(const __m128i& vec) : fVec(vec) {} | 116 SkNx(const __m128i& vec) : fVec(vec) {} |
| 117 | 117 |
| 118 SkNx() {} | 118 SkNx() {} |
| 119 SkNx(int val) : fVec(_mm_set1_epi32(val)) {} | 119 SkNx(int val) : fVec(_mm_set1_epi32(val)) {} |
| 120 static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } | 120 static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)p
tr); } |
| 121 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} | 121 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} |
| 122 | 122 |
| 123 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } | 123 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } |
| 124 | 124 |
| 125 SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec);
} | 125 SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec);
} |
| 126 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec);
} | 126 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec);
} |
| 127 SkNx operator * (const SkNx& o) const { | 127 SkNx operator * (const SkNx& o) const { |
| 128 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), | 128 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), |
| 129 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); | 129 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o.
fVec, 4)); |
| 130 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, | 130 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0))
, |
| 131 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); | 131 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0))
); |
| 132 } | 132 } |
| 133 | 133 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 148 __m128i fVec; | 148 __m128i fVec; |
| 149 }; | 149 }; |
| 150 | 150 |
| 151 template <> | 151 template <> |
| 152 class SkNx<4, float> { | 152 class SkNx<4, float> { |
| 153 public: | 153 public: |
| 154 SkNx(const __m128& vec) : fVec(vec) {} | 154 SkNx(const __m128& vec) : fVec(vec) {} |
| 155 | 155 |
| 156 SkNx() {} | 156 SkNx() {} |
| 157 SkNx(float val) : fVec( _mm_set1_ps(val) ) {} | 157 SkNx(float val) : fVec( _mm_set1_ps(val) ) {} |
| 158 static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 158 static SkNx Load(const void* ptr) { return _mm_loadu_ps((const float*)ptr);
} |
| 159 | 159 |
| 160 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 160 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
| 161 | 161 |
| 162 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 162 void store(void* ptr) const { _mm_storeu_ps((float*)ptr, fVec); } |
| 163 | 163 |
| 164 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } | 164 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); } |
| 165 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } | 165 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); } |
| 166 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } | 166 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); } |
| 167 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } | 167 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); } |
| 168 | 168 |
| 169 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 169 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
| 170 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 170 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
| 171 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 171 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
| 172 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 172 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
| (...skipping 30 matching lines...) Expand all Loading... |
| 203 __m128 fVec; | 203 __m128 fVec; |
| 204 }; | 204 }; |
| 205 | 205 |
| 206 template <> | 206 template <> |
| 207 class SkNx<4, uint16_t> { | 207 class SkNx<4, uint16_t> { |
| 208 public: | 208 public: |
| 209 SkNx(const __m128i& vec) : fVec(vec) {} | 209 SkNx(const __m128i& vec) : fVec(vec) {} |
| 210 | 210 |
| 211 SkNx() {} | 211 SkNx() {} |
| 212 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} | 212 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} |
| 213 static SkNx Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m
128i*)vals); } | 213 static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)p
tr); } |
| 214 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a
,b,c,d,0,0,0,0)) {} | 214 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a
,b,c,d,0,0,0,0)) {} |
| 215 | 215 |
| 216 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec);
} | 216 void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); } |
| 217 | 217 |
| 218 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} | 218 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} |
| 219 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} | 219 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} |
| 220 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } | 220 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } |
| 221 | 221 |
| 222 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } | 222 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } |
| 223 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } | 223 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } |
| 224 | 224 |
| 225 template <int k> uint16_t kth() const { | 225 template <int k> uint16_t kth() const { |
| 226 SkASSERT(0 <= k && k < 4); | 226 SkASSERT(0 <= k && k < 4); |
| 227 return _mm_extract_epi16(fVec, k); | 227 return _mm_extract_epi16(fVec, k); |
| 228 } | 228 } |
| 229 | 229 |
| 230 __m128i fVec; | 230 __m128i fVec; |
| 231 }; | 231 }; |
| 232 | 232 |
| 233 template <> | 233 template <> |
| 234 class SkNx<8, uint16_t> { | 234 class SkNx<8, uint16_t> { |
| 235 public: | 235 public: |
| 236 SkNx(const __m128i& vec) : fVec(vec) {} | 236 SkNx(const __m128i& vec) : fVec(vec) {} |
| 237 | 237 |
| 238 SkNx() {} | 238 SkNx() {} |
| 239 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} | 239 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {} |
| 240 static SkNx Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m
128i*)vals); } | 240 static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)p
tr); } |
| 241 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, | 241 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
| 242 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a
,b,c,d,e,f,g,h)) {} | 242 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a
,b,c,d,e,f,g,h)) {} |
| 243 | 243 |
| 244 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} | 244 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } |
| 245 | 245 |
| 246 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} | 246 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec);
} |
| 247 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} | 247 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec);
} |
| 248 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } | 248 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec)
; } |
| 249 | 249 |
| 250 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } | 250 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } |
| 251 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } | 251 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } |
| 252 | 252 |
| 253 static SkNx Min(const SkNx& a, const SkNx& b) { | 253 static SkNx Min(const SkNx& a, const SkNx& b) { |
| 254 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u
se the | 254 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u
se the |
| (...skipping 16 matching lines...) Expand all Loading... |
| 271 | 271 |
| 272 __m128i fVec; | 272 __m128i fVec; |
| 273 }; | 273 }; |
| 274 | 274 |
| 275 template <> | 275 template <> |
| 276 class SkNx<4, uint8_t> { | 276 class SkNx<4, uint8_t> { |
| 277 public: | 277 public: |
| 278 SkNx(const __m128i& vec) : fVec(vec) {} | 278 SkNx(const __m128i& vec) : fVec(vec) {} |
| 279 | 279 |
| 280 SkNx() {} | 280 SkNx() {} |
| 281 static SkNx Load(const uint8_t vals[4]) { return _mm_cvtsi32_si128(*(const i
nt*)vals); } | 281 static SkNx Load(const void* ptr) { return _mm_cvtsi32_si128(*(const int*)pt
r); } |
| 282 void store(uint8_t vals[4]) const { *(int*)vals = _mm_cvtsi128_si32(fVec); } | 282 void store(void* ptr) const { *(int*)ptr = _mm_cvtsi128_si32(fVec); } |
| 283 | 283 |
| 284 // TODO as needed | 284 // TODO as needed |
| 285 | 285 |
| 286 __m128i fVec; | 286 __m128i fVec; |
| 287 }; | 287 }; |
| 288 | 288 |
| 289 template <> | 289 template <> |
| 290 class SkNx<8, uint8_t> { | 290 class SkNx<8, uint8_t> { |
| 291 public: | 291 public: |
| 292 SkNx(const __m128i& vec) : fVec(vec) {} | 292 SkNx(const __m128i& vec) : fVec(vec) {} |
| 293 | 293 |
| 294 SkNx() {} | 294 SkNx() {} |
| 295 static SkNx Load(const uint8_t vals[8]) { return _mm_loadl_epi64((const __m1
28i*)vals); } | 295 static SkNx Load(const void* ptr) { return _mm_loadl_epi64((const __m128i*)p
tr); } |
| 296 void store(uint8_t vals[8]) const { _mm_storel_epi64((__m128i*)vals, fVec);
} | 296 void store(void* ptr) const { _mm_storel_epi64((__m128i*)ptr, fVec); } |
| 297 | 297 |
| 298 // TODO as needed | 298 // TODO as needed |
| 299 | 299 |
| 300 __m128i fVec; | 300 __m128i fVec; |
| 301 }; | 301 }; |
| 302 | 302 |
| 303 template <> | 303 template <> |
| 304 class SkNx<16, uint8_t> { | 304 class SkNx<16, uint8_t> { |
| 305 public: | 305 public: |
| 306 SkNx(const __m128i& vec) : fVec(vec) {} | 306 SkNx(const __m128i& vec) : fVec(vec) {} |
| 307 | 307 |
| 308 SkNx() {} | 308 SkNx() {} |
| 309 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {} | 309 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {} |
| 310 static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m
128i*)vals); } | 310 static SkNx Load(const void* ptr) { return _mm_loadu_si128((const __m128i*)p
tr); } |
| 311 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, | 311 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d, |
| 312 uint8_t e, uint8_t f, uint8_t g, uint8_t h, | 312 uint8_t e, uint8_t f, uint8_t g, uint8_t h, |
| 313 uint8_t i, uint8_t j, uint8_t k, uint8_t l, | 313 uint8_t i, uint8_t j, uint8_t k, uint8_t l, |
| 314 uint8_t m, uint8_t n, uint8_t o, uint8_t p) | 314 uint8_t m, uint8_t n, uint8_t o, uint8_t p) |
| 315 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} | 315 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} |
| 316 | 316 |
| 317 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec);
} | 317 void store(void* ptr) const { _mm_storeu_si128((__m128i*)ptr, fVec); } |
| 318 | 318 |
| 319 SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec);
} | 319 SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec);
} |
| 320 | 320 |
| 321 SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); } | 321 SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); } |
| 322 SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); } | 322 SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); } |
| 323 | 323 |
| 324 static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec,
b.fVec); } | 324 static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec,
b.fVec); } |
| 325 SkNx operator < (const SkNx& o) const { | 325 SkNx operator < (const SkNx& o) const { |
| 326 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use
a signed compare. | 326 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use
a signed compare. |
| 327 auto flip = _mm_set1_epi8(char(0x80)); | 327 auto flip = _mm_set1_epi8(char(0x80)); |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 384 } | 384 } |
| 385 | 385 |
| 386 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { | 386 template<> inline Sk4b SkNx_cast<uint8_t, uint16_t, 4>(const Sk4h& src) { |
| 387 return _mm_packus_epi16(src.fVec, src.fVec); | 387 return _mm_packus_epi16(src.fVec, src.fVec); |
| 388 } | 388 } |
| 389 | 389 |
| 390 | 390 |
| 391 } // namespace | 391 } // namespace |
| 392 | 392 |
| 393 #endif//SkNx_sse_DEFINED | 393 #endif//SkNx_sse_DEFINED |
| OLD | NEW |