| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_sse_DEFINED | 8 #ifndef SkNx_sse_DEFINED |
| 9 #define SkNx_sse_DEFINED | 9 #define SkNx_sse_DEFINED |
| 10 | 10 |
| 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. | 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo
re recent. |
| 12 | 12 |
| 13 namespace { // See SkNx.h | 13 namespace { // See SkNx.h |
| 14 | 14 |
| 15 | 15 |
| 16 template <> | 16 template <> |
| 17 class SkNf<2, float> { | 17 class SkNf<2> { |
| 18 public: | 18 public: |
| 19 SkNf(const __m128& vec) : fVec(vec) {} | 19 SkNf(const __m128& vec) : fVec(vec) {} |
| 20 | 20 |
| 21 SkNf() {} | 21 SkNf() {} |
| 22 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} | 22 explicit SkNf(float val) : fVec(_mm_set1_ps(val)) {} |
| 23 static SkNf Load(const float vals[2]) { | 23 static SkNf Load(const float vals[2]) { |
| 24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); | 24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); |
| 25 } | 25 } |
| 26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} | 26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} |
| 27 | 27 |
| (...skipping 28 matching lines...) Expand all Loading... |
| 56 return pun.fs[k&1]; | 56 return pun.fs[k&1]; |
| 57 } | 57 } |
| 58 | 58 |
| 59 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 59 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
| 60 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } | 60 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV
ec)) & 0xff); } |
| 61 | 61 |
| 62 __m128 fVec; | 62 __m128 fVec; |
| 63 }; | 63 }; |
| 64 | 64 |
| 65 template <> | 65 template <> |
| 66 class SkNf<2, double> { | |
| 67 public: | |
| 68 SkNf(const __m128d& vec) : fVec(vec) {} | |
| 69 | |
| 70 SkNf() {} | |
| 71 explicit SkNf(double val) : fVec( _mm_set1_pd(val) ) {} | |
| 72 static SkNf Load(const double vals[2]) { return _mm_loadu_pd(vals); } | |
| 73 SkNf(double a, double b) : fVec(_mm_setr_pd(a,b)) {} | |
| 74 | |
| 75 void store(double vals[2]) const { _mm_storeu_pd(vals, fVec); } | |
| 76 | |
| 77 SkNf operator + (const SkNf& o) const { return _mm_add_pd(fVec, o.fVec); } | |
| 78 SkNf operator - (const SkNf& o) const { return _mm_sub_pd(fVec, o.fVec); } | |
| 79 SkNf operator * (const SkNf& o) const { return _mm_mul_pd(fVec, o.fVec); } | |
| 80 SkNf operator / (const SkNf& o) const { return _mm_div_pd(fVec, o.fVec); } | |
| 81 | |
| 82 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_pd (fVec, o.fVec);
} | |
| 83 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_pd(fVec, o.fVec);
} | |
| 84 SkNf operator < (const SkNf& o) const { return _mm_cmplt_pd (fVec, o.fVec);
} | |
| 85 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_pd (fVec, o.fVec);
} | |
| 86 SkNf operator <= (const SkNf& o) const { return _mm_cmple_pd (fVec, o.fVec);
} | |
| 87 SkNf operator >= (const SkNf& o) const { return _mm_cmpge_pd (fVec, o.fVec);
} | |
| 88 | |
| 89 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_pd(l.fVec, r.
fVec); } | |
| 90 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_pd(l.fVec, r.
fVec); } | |
| 91 | |
| 92 SkNf sqrt() const { return _mm_sqrt_pd(fVec); } | |
| 93 SkNf rsqrt0() const { return _mm_cvtps_pd(_mm_rsqrt_ps(_mm_cvtpd_ps(fVec)));
} | |
| 94 SkNf rsqrt1() const { return this->rsqrt0(); } | |
| 95 SkNf rsqrt2() const { return this->rsqrt1(); } | |
| 96 | |
| 97 SkNf invert() const { return SkNf(1) / *this; } | |
| 98 SkNf approxInvert() const { return _mm_cvtps_pd(_mm_rcp_ps(_mm_cvtpd_ps(fVec
))); } | |
| 99 | |
| 100 template <int k> double kth() const { | |
| 101 SkASSERT(0 <= k && k < 2); | |
| 102 union { __m128d v; double ds[2]; } pun = {fVec}; | |
| 103 return pun.ds[k&1]; | |
| 104 } | |
| 105 | |
| 106 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castpd_si128(f
Vec)); } | |
| 107 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castpd_si128(f
Vec)); } | |
| 108 | |
| 109 __m128d fVec; | |
| 110 }; | |
| 111 | |
| 112 template <> | |
| 113 class SkNi<4, int> { | 66 class SkNi<4, int> { |
| 114 public: | 67 public: |
| 115 SkNi(const __m128i& vec) : fVec(vec) {} | 68 SkNi(const __m128i& vec) : fVec(vec) {} |
| 116 | 69 |
| 117 SkNi() {} | 70 SkNi() {} |
| 118 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} | 71 explicit SkNi(int val) : fVec(_mm_set1_epi32(val)) {} |
| 119 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } | 72 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i*
)vals); } |
| 120 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} | 73 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} |
| 121 | 74 |
| 122 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } | 75 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 141 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); | 94 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); |
| 142 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); | 95 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); |
| 143 default: SkASSERT(false); return 0; | 96 default: SkASSERT(false); return 0; |
| 144 } | 97 } |
| 145 } | 98 } |
| 146 | 99 |
| 147 __m128i fVec; | 100 __m128i fVec; |
| 148 }; | 101 }; |
| 149 | 102 |
| 150 template <> | 103 template <> |
| 151 class SkNf<4, float> { | 104 class SkNf<4> { |
| 152 public: | 105 public: |
| 153 SkNf(const __m128& vec) : fVec(vec) {} | 106 SkNf(const __m128& vec) : fVec(vec) {} |
| 154 | 107 |
| 155 SkNf() {} | 108 SkNf() {} |
| 156 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} | 109 explicit SkNf(float val) : fVec( _mm_set1_ps(val) ) {} |
| 157 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } | 110 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } |
| 158 | 111 |
| 159 static SkNf FromBytes(const uint8_t bytes[4]) { | 112 static SkNf FromBytes(const uint8_t bytes[4]) { |
| 160 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); | 113 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); |
| 161 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 | 114 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 172 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} | 125 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} |
| 173 | 126 |
| 174 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } | 127 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } |
| 175 void toBytes(uint8_t bytes[4]) const { | 128 void toBytes(uint8_t bytes[4]) const { |
| 176 __m128i fix8_32 = _mm_cvttps_epi32(fVec), | 129 __m128i fix8_32 = _mm_cvttps_epi32(fVec), |
| 177 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), | 130 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), |
| 178 fix8 = _mm_packus_epi16(fix8_16, fix8_16); | 131 fix8 = _mm_packus_epi16(fix8_16, fix8_16); |
| 179 *(int*)bytes = _mm_cvtsi128_si32(fix8); | 132 *(int*)bytes = _mm_cvtsi128_si32(fix8); |
| 180 } | 133 } |
| 181 | 134 |
| 182 SkNi<4, int> castTrunc() const { return _mm_cvttps_epi32(fVec); } | |
| 183 | |
| 184 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } | 135 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } |
| 185 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } | 136 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } |
| 186 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } | 137 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } |
| 187 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } | 138 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } |
| 188 | 139 |
| 189 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} | 140 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec);
} |
| 190 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} | 141 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec);
} |
| 191 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} | 142 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec);
} |
| 192 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} | 143 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec);
} |
| 193 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec);
} | 144 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec);
} |
| (...skipping 135 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 329 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), | 280 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), |
| 330 _mm_andnot_si128(fVec, e.fVec)); | 281 _mm_andnot_si128(fVec, e.fVec)); |
| 331 } | 282 } |
| 332 | 283 |
| 333 __m128i fVec; | 284 __m128i fVec; |
| 334 }; | 285 }; |
| 335 | 286 |
| 336 } // namespace | 287 } // namespace |
| 337 | 288 |
| 338 #endif//SkNx_sse_DEFINED | 289 #endif//SkNx_sse_DEFINED |
| OLD | NEW |