Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/opts/SkNx_sse.h

Issue 1464623002: Add SkNx_cast(). (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: derp, is stands for _ints_ Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | tests/SkNxTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_sse_DEFINED 8 #ifndef SkNx_sse_DEFINED
9 #define SkNx_sse_DEFINED 9 #define SkNx_sse_DEFINED
10 10
11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent. 11 // This file may assume <= SSE2, but must check SK_CPU_SSE_LEVEL for anything mo re recent.
12 12
13 namespace { // See SkNx.h 13 namespace { // See SkNx.h
14 14
15 15
16 template <> 16 template <>
17 class SkNf<2> { 17 class SkNx<2, float> {
18 public: 18 public:
19 SkNf(const __m128& vec) : fVec(vec) {} 19 SkNx(const __m128& vec) : fVec(vec) {}
20 20
21 SkNf() {} 21 SkNx() {}
22 SkNf(float val) : fVec(_mm_set1_ps(val)) {} 22 SkNx(float val) : fVec(_mm_set1_ps(val)) {}
23 static SkNf Load(const float vals[2]) { 23 static SkNx Load(const float vals[2]) {
24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals)); 24 return _mm_castsi128_ps(_mm_loadl_epi64((const __m128i*)vals));
25 } 25 }
26 SkNf(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {} 26 SkNx(float a, float b) : fVec(_mm_setr_ps(a,b,0,0)) {}
27 27
28 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); } 28 void store(float vals[2]) const { _mm_storel_pi((__m64*)vals, fVec); }
29 29
30 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } 30 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
31 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } 31 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
32 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } 32 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
33 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } 33 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); }
34 34
35 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } 35 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }
36 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } 36 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }
37 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec); } 37 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }
38 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } 38 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }
39 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec); } 39 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec); }
40 SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec); } 40 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec); }
41 41
42 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r. fVec); } 42 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r. fVec); }
43 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r. fVec); } 43 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r. fVec); }
44 44
45 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } 45 SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
46 SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } 46 SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); }
47 SkNf rsqrt1() const { return this->rsqrt0(); } 47 SkNx rsqrt1() const { return this->rsqrt0(); }
48 SkNf rsqrt2() const { return this->rsqrt1(); } 48 SkNx rsqrt2() const { return this->rsqrt1(); }
49 49
50 SkNf invert() const { return SkNf(1) / *this; } 50 SkNx invert() const { return SkNx(1) / *this; }
51 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } 51 SkNx approxInvert() const { return _mm_rcp_ps(fVec); }
52 52
53 template <int k> float kth() const { 53 template <int k> float kth() const {
54 SkASSERT(0 <= k && k < 2); 54 SkASSERT(0 <= k && k < 2);
55 union { __m128 v; float fs[4]; } pun = {fVec}; 55 union { __m128 v; float fs[4]; } pun = {fVec};
56 return pun.fs[k&1]; 56 return pun.fs[k&1];
57 } 57 }
58 58
59 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); } 59 bool allTrue() const { return 0xff == (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); }
60 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); } 60 bool anyTrue() const { return 0x00 != (_mm_movemask_epi8(_mm_castps_si128(fV ec)) & 0xff); }
61 61
62 __m128 fVec; 62 __m128 fVec;
63 }; 63 };
64 64
65 template <> 65 template <>
66 class SkNi<4, int> { 66 class SkNx<4, int> {
67 public: 67 public:
68 SkNi(const __m128i& vec) : fVec(vec) {} 68 SkNx(const __m128i& vec) : fVec(vec) {}
69 69
70 SkNi() {} 70 SkNx() {}
71 SkNi(int val) : fVec(_mm_set1_epi32(val)) {} 71 SkNx(int val) : fVec(_mm_set1_epi32(val)) {}
72 static SkNi Load(const int vals[4]) { return _mm_loadu_si128((const __m128i* )vals); } 72 static SkNx Load(const int vals[4]) { return _mm_loadu_si128((const __m128i* )vals); }
73 SkNi(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {} 73 SkNx(int a, int b, int c, int d) : fVec(_mm_setr_epi32(a,b,c,d)) {}
74 74
75 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 75 void store(int vals[4]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
76 76
77 SkNi operator + (const SkNi& o) const { return _mm_add_epi32(fVec, o.fVec); } 77 SkNx operator + (const SkNx& o) const { return _mm_add_epi32(fVec, o.fVec); }
78 SkNi operator - (const SkNi& o) const { return _mm_sub_epi32(fVec, o.fVec); } 78 SkNx operator - (const SkNx& o) const { return _mm_sub_epi32(fVec, o.fVec); }
79 SkNi operator * (const SkNi& o) const { 79 SkNx operator * (const SkNx& o) const {
80 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec), 80 __m128i mul20 = _mm_mul_epu32(fVec, o.fVec),
81 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4)); 81 mul31 = _mm_mul_epu32(_mm_srli_si128(fVec, 4), _mm_srli_si128(o. fVec, 4));
82 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0)) , 82 return _mm_unpacklo_epi32(_mm_shuffle_epi32(mul20, _MM_SHUFFLE(0,0,2,0)) ,
83 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0)) ); 83 _mm_shuffle_epi32(mul31, _MM_SHUFFLE(0,0,2,0)) );
84 } 84 }
85 85
86 SkNi operator << (int bits) const { return _mm_slli_epi32(fVec, bits); } 86 SkNx operator << (int bits) const { return _mm_slli_epi32(fVec, bits); }
87 SkNi operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); } 87 SkNx operator >> (int bits) const { return _mm_srai_epi32(fVec, bits); }
88 88
89 template <int k> int kth() const { 89 template <int k> int kth() const {
90 SkASSERT(0 <= k && k < 4); 90 SkASSERT(0 <= k && k < 4);
91 switch (k) { 91 switch (k) {
92 case 0: return _mm_cvtsi128_si32(fVec); 92 case 0: return _mm_cvtsi128_si32(fVec);
93 case 1: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 4)); 93 case 1: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 4));
94 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8)); 94 case 2: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 8));
95 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12)); 95 case 3: return _mm_cvtsi128_si32(_mm_srli_si128(fVec, 12));
96 default: SkASSERT(false); return 0; 96 default: SkASSERT(false); return 0;
97 } 97 }
98 } 98 }
99 99
100 __m128i fVec; 100 __m128i fVec;
101 }; 101 };
102 102
103 template <> 103 template <>
104 class SkNf<4> { 104 class SkNx<4, float> {
105 public: 105 public:
106 SkNf(const __m128& vec) : fVec(vec) {} 106 SkNx(const __m128& vec) : fVec(vec) {}
107 107
108 SkNf() {} 108 SkNx() {}
109 SkNf(float val) : fVec( _mm_set1_ps(val) ) {} 109 SkNx(float val) : fVec( _mm_set1_ps(val) ) {}
110 static SkNf Load(const float vals[4]) { return _mm_loadu_ps(vals); } 110 static SkNx Load(const float vals[4]) { return _mm_loadu_ps(vals); }
111 111
112 static SkNf FromBytes(const uint8_t bytes[4]) { 112 static SkNx FromBytes(const uint8_t bytes[4]) {
113 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes); 113 __m128i fix8 = _mm_cvtsi32_si128(*(const int*)bytes);
114 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3 114 #if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSSE3
115 const char _ = ~0; // Zero these bytes. 115 const char _ = ~0; // Zero these bytes.
116 __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_)); 116 __m128i fix8_32 = _mm_shuffle_epi8(fix8, _mm_setr_epi8(0,_,_,_, 1,_,_,_, 2,_,_,_, 3,_,_,_));
117 #else 117 #else
118 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()), 118 __m128i fix8_16 = _mm_unpacklo_epi8 (fix8, _mm_setzero_si128()),
119 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128()); 119 fix8_32 = _mm_unpacklo_epi16(fix8_16, _mm_setzero_si128());
120 #endif 120 #endif
121 return SkNf(_mm_cvtepi32_ps(fix8_32)); 121 return SkNx(_mm_cvtepi32_ps(fix8_32));
122 // TODO: use _mm_cvtepu8_epi32 w/SSE4.1? 122 // TODO: use _mm_cvtepu8_epi32 w/SSE4.1?
123 } 123 }
124 124
125 SkNf(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {} 125 SkNx(float a, float b, float c, float d) : fVec(_mm_setr_ps(a,b,c,d)) {}
126 126
127 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); } 127 void store(float vals[4]) const { _mm_storeu_ps(vals, fVec); }
128 void toBytes(uint8_t bytes[4]) const { 128 void toBytes(uint8_t bytes[4]) const {
129 __m128i fix8_32 = _mm_cvttps_epi32(fVec), 129 __m128i fix8_32 = _mm_cvttps_epi32(fVec),
130 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32), 130 fix8_16 = _mm_packus_epi16(fix8_32, fix8_32),
131 fix8 = _mm_packus_epi16(fix8_16, fix8_16); 131 fix8 = _mm_packus_epi16(fix8_16, fix8_16);
132 *(int*)bytes = _mm_cvtsi128_si32(fix8); 132 *(int*)bytes = _mm_cvtsi128_si32(fix8);
133 } 133 }
134 134
135 SkNf operator + (const SkNf& o) const { return _mm_add_ps(fVec, o.fVec); } 135 SkNx operator + (const SkNx& o) const { return _mm_add_ps(fVec, o.fVec); }
136 SkNf operator - (const SkNf& o) const { return _mm_sub_ps(fVec, o.fVec); } 136 SkNx operator - (const SkNx& o) const { return _mm_sub_ps(fVec, o.fVec); }
137 SkNf operator * (const SkNf& o) const { return _mm_mul_ps(fVec, o.fVec); } 137 SkNx operator * (const SkNx& o) const { return _mm_mul_ps(fVec, o.fVec); }
138 SkNf operator / (const SkNf& o) const { return _mm_div_ps(fVec, o.fVec); } 138 SkNx operator / (const SkNx& o) const { return _mm_div_ps(fVec, o.fVec); }
139 139
140 SkNf operator == (const SkNf& o) const { return _mm_cmpeq_ps (fVec, o.fVec); } 140 SkNx operator == (const SkNx& o) const { return _mm_cmpeq_ps (fVec, o.fVec); }
141 SkNf operator != (const SkNf& o) const { return _mm_cmpneq_ps(fVec, o.fVec); } 141 SkNx operator != (const SkNx& o) const { return _mm_cmpneq_ps(fVec, o.fVec); }
142 SkNf operator < (const SkNf& o) const { return _mm_cmplt_ps (fVec, o.fVec); } 142 SkNx operator < (const SkNx& o) const { return _mm_cmplt_ps (fVec, o.fVec); }
143 SkNf operator > (const SkNf& o) const { return _mm_cmpgt_ps (fVec, o.fVec); } 143 SkNx operator > (const SkNx& o) const { return _mm_cmpgt_ps (fVec, o.fVec); }
144 SkNf operator <= (const SkNf& o) const { return _mm_cmple_ps (fVec, o.fVec); } 144 SkNx operator <= (const SkNx& o) const { return _mm_cmple_ps (fVec, o.fVec); }
145 SkNf operator >= (const SkNf& o) const { return _mm_cmpge_ps (fVec, o.fVec); } 145 SkNx operator >= (const SkNx& o) const { return _mm_cmpge_ps (fVec, o.fVec); }
146 146
147 static SkNf Min(const SkNf& l, const SkNf& r) { return _mm_min_ps(l.fVec, r. fVec); } 147 static SkNx Min(const SkNx& l, const SkNx& r) { return _mm_min_ps(l.fVec, r. fVec); }
148 static SkNf Max(const SkNf& l, const SkNf& r) { return _mm_max_ps(l.fVec, r. fVec); } 148 static SkNx Max(const SkNx& l, const SkNx& r) { return _mm_max_ps(l.fVec, r. fVec); }
149 149
150 SkNf sqrt() const { return _mm_sqrt_ps (fVec); } 150 SkNx sqrt() const { return _mm_sqrt_ps (fVec); }
151 SkNf rsqrt0() const { return _mm_rsqrt_ps(fVec); } 151 SkNx rsqrt0() const { return _mm_rsqrt_ps(fVec); }
152 SkNf rsqrt1() const { return this->rsqrt0(); } 152 SkNx rsqrt1() const { return this->rsqrt0(); }
153 SkNf rsqrt2() const { return this->rsqrt1(); } 153 SkNx rsqrt2() const { return this->rsqrt1(); }
154 154
155 SkNf invert() const { return SkNf(1) / *this; } 155 SkNx invert() const { return SkNx(1) / *this; }
156 SkNf approxInvert() const { return _mm_rcp_ps(fVec); } 156 SkNx approxInvert() const { return _mm_rcp_ps(fVec); }
157 157
158 template <int k> float kth() const { 158 template <int k> float kth() const {
159 SkASSERT(0 <= k && k < 4); 159 SkASSERT(0 <= k && k < 4);
160 union { __m128 v; float fs[4]; } pun = {fVec}; 160 union { __m128 v; float fs[4]; } pun = {fVec};
161 return pun.fs[k&3]; 161 return pun.fs[k&3];
162 } 162 }
163 163
164 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f Vec)); } 164 bool allTrue() const { return 0xffff == _mm_movemask_epi8(_mm_castps_si128(f Vec)); }
165 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f Vec)); } 165 bool anyTrue() const { return 0x0000 != _mm_movemask_epi8(_mm_castps_si128(f Vec)); }
166 166
167 SkNf thenElse(const SkNf& t, const SkNf& e) const { 167 SkNx thenElse(const SkNx& t, const SkNx& e) const {
168 return _mm_or_ps(_mm_and_ps (fVec, t.fVec), 168 return _mm_or_ps(_mm_and_ps (fVec, t.fVec),
169 _mm_andnot_ps(fVec, e.fVec)); 169 _mm_andnot_ps(fVec, e.fVec));
170 } 170 }
171 171
172 __m128 fVec; 172 __m128 fVec;
173 }; 173 };
174 174
175 template <> 175 template <>
176 class SkNi<4, uint16_t> { 176 class SkNx<4, uint16_t> {
177 public: 177 public:
178 SkNi(const __m128i& vec) : fVec(vec) {} 178 SkNx(const __m128i& vec) : fVec(vec) {}
179 179
180 SkNi() {} 180 SkNx() {}
181 SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} 181 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
182 static SkNi Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m 128i*)vals); } 182 static SkNx Load(const uint16_t vals[4]) { return _mm_loadl_epi64((const __m 128i*)vals); }
183 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a ,b,c,d,0,0,0,0)) {} 183 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d) : fVec(_mm_setr_epi16(a ,b,c,d,0,0,0,0)) {}
184 184
185 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); } 185 void store(uint16_t vals[4]) const { _mm_storel_epi64((__m128i*)vals, fVec); }
186 186
187 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); } 187 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
188 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); } 188 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
189 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; } 189 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; }
190 190
191 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } 191 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
192 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } 192 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
193 193
194 template <int k> uint16_t kth() const { 194 template <int k> uint16_t kth() const {
195 SkASSERT(0 <= k && k < 4); 195 SkASSERT(0 <= k && k < 4);
196 return _mm_extract_epi16(fVec, k); 196 return _mm_extract_epi16(fVec, k);
197 } 197 }
198 198
199 __m128i fVec; 199 __m128i fVec;
200 }; 200 };
201 201
202 template <> 202 template <>
203 class SkNi<8, uint16_t> { 203 class SkNx<8, uint16_t> {
204 public: 204 public:
205 SkNi(const __m128i& vec) : fVec(vec) {} 205 SkNx(const __m128i& vec) : fVec(vec) {}
206 206
207 SkNi() {} 207 SkNx() {}
208 SkNi(uint16_t val) : fVec(_mm_set1_epi16(val)) {} 208 SkNx(uint16_t val) : fVec(_mm_set1_epi16(val)) {}
209 static SkNi Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m 128i*)vals); } 209 static SkNx Load(const uint16_t vals[8]) { return _mm_loadu_si128((const __m 128i*)vals); }
210 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, 210 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
211 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a ,b,c,d,e,f,g,h)) {} 211 uint16_t e, uint16_t f, uint16_t g, uint16_t h) : fVec(_mm_setr_epi16(a ,b,c,d,e,f,g,h)) {}
212 212
213 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 213 void store(uint16_t vals[8]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
214 214
215 SkNi operator + (const SkNi& o) const { return _mm_add_epi16(fVec, o.fVec); } 215 SkNx operator + (const SkNx& o) const { return _mm_add_epi16(fVec, o.fVec); }
216 SkNi operator - (const SkNi& o) const { return _mm_sub_epi16(fVec, o.fVec); } 216 SkNx operator - (const SkNx& o) const { return _mm_sub_epi16(fVec, o.fVec); }
217 SkNi operator * (const SkNi& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; } 217 SkNx operator * (const SkNx& o) const { return _mm_mullo_epi16(fVec, o.fVec) ; }
218 218
219 SkNi operator << (int bits) const { return _mm_slli_epi16(fVec, bits); } 219 SkNx operator << (int bits) const { return _mm_slli_epi16(fVec, bits); }
220 SkNi operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); } 220 SkNx operator >> (int bits) const { return _mm_srli_epi16(fVec, bits); }
221 221
222 static SkNi Min(const SkNi& a, const SkNi& b) { 222 static SkNx Min(const SkNx& a, const SkNx& b) {
223 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u se the 223 // No unsigned _mm_min_epu16, so we'll shift into a space where we can u se the
224 // signed version, _mm_min_epi16, then shift back. 224 // signed version, _mm_min_epi16, then shift back.
225 const uint16_t top = 0x8000; // Keep this separate from _mm_set1_epi16 o r MSVC will whine. 225 const uint16_t top = 0x8000; // Keep this separate from _mm_set1_epi16 o r MSVC will whine.
226 const __m128i top_8x = _mm_set1_epi16(top); 226 const __m128i top_8x = _mm_set1_epi16(top);
227 return _mm_add_epi8(top_8x, _mm_min_epi16(_mm_sub_epi8(a.fVec, top_8x), 227 return _mm_add_epi8(top_8x, _mm_min_epi16(_mm_sub_epi8(a.fVec, top_8x),
228 _mm_sub_epi8(b.fVec, top_8x))) ; 228 _mm_sub_epi8(b.fVec, top_8x))) ;
229 } 229 }
230 230
231 SkNi thenElse(const SkNi& t, const SkNi& e) const { 231 SkNx thenElse(const SkNx& t, const SkNx& e) const {
232 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), 232 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),
233 _mm_andnot_si128(fVec, e.fVec)); 233 _mm_andnot_si128(fVec, e.fVec));
234 } 234 }
235 235
236 template <int k> uint16_t kth() const { 236 template <int k> uint16_t kth() const {
237 SkASSERT(0 <= k && k < 8); 237 SkASSERT(0 <= k && k < 8);
238 return _mm_extract_epi16(fVec, k); 238 return _mm_extract_epi16(fVec, k);
239 } 239 }
240 240
241 __m128i fVec; 241 __m128i fVec;
242 }; 242 };
243 243
244 template <> 244 template <>
245 class SkNi<16, uint8_t> { 245 class SkNx<16, uint8_t> {
246 public: 246 public:
247 SkNi(const __m128i& vec) : fVec(vec) {} 247 SkNx(const __m128i& vec) : fVec(vec) {}
248 248
249 SkNi() {} 249 SkNx() {}
250 SkNi(uint8_t val) : fVec(_mm_set1_epi8(val)) {} 250 SkNx(uint8_t val) : fVec(_mm_set1_epi8(val)) {}
251 static SkNi Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); } 251 static SkNx Load(const uint8_t vals[16]) { return _mm_loadu_si128((const __m 128i*)vals); }
252 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, 252 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
253 uint8_t e, uint8_t f, uint8_t g, uint8_t h, 253 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
254 uint8_t i, uint8_t j, uint8_t k, uint8_t l, 254 uint8_t i, uint8_t j, uint8_t k, uint8_t l,
255 uint8_t m, uint8_t n, uint8_t o, uint8_t p) 255 uint8_t m, uint8_t n, uint8_t o, uint8_t p)
256 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {} 256 : fVec(_mm_setr_epi8(a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p)) {}
257 257
258 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); } 258 void store(uint8_t vals[16]) const { _mm_storeu_si128((__m128i*)vals, fVec); }
259 259
260 SkNi saturatedAdd(const SkNi& o) const { return _mm_adds_epu8(fVec, o.fVec); } 260 SkNx saturatedAdd(const SkNx& o) const { return _mm_adds_epu8(fVec, o.fVec); }
261 261
262 SkNi operator + (const SkNi& o) const { return _mm_add_epi8(fVec, o.fVec); } 262 SkNx operator + (const SkNx& o) const { return _mm_add_epi8(fVec, o.fVec); }
263 SkNi operator - (const SkNi& o) const { return _mm_sub_epi8(fVec, o.fVec); } 263 SkNx operator - (const SkNx& o) const { return _mm_sub_epi8(fVec, o.fVec); }
264 264
265 static SkNi Min(const SkNi& a, const SkNi& b) { return _mm_min_epu8(a.fVec, b.fVec); } 265 static SkNx Min(const SkNx& a, const SkNx& b) { return _mm_min_epu8(a.fVec, b.fVec); }
266 SkNi operator < (const SkNi& o) const { 266 SkNx operator < (const SkNx& o) const {
267 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare. 267 // There's no unsigned _mm_cmplt_epu8, so we flip the sign bits then use a signed compare.
268 auto flip = _mm_set1_epi8(char(0x80)); 268 auto flip = _mm_set1_epi8(char(0x80));
269 return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.f Vec)); 269 return _mm_cmplt_epi8(_mm_xor_si128(flip, fVec), _mm_xor_si128(flip, o.f Vec));
270 } 270 }
271 271
272 template <int k> uint8_t kth() const { 272 template <int k> uint8_t kth() const {
273 SkASSERT(0 <= k && k < 16); 273 SkASSERT(0 <= k && k < 16);
274 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea d 16-bits instead. 274 // SSE4.1 would just `return _mm_extract_epi8(fVec, k)`. We have to rea d 16-bits instead.
275 int pair = _mm_extract_epi16(fVec, k/2); 275 int pair = _mm_extract_epi16(fVec, k/2);
276 return k % 2 == 0 ? pair : (pair >> 8); 276 return k % 2 == 0 ? pair : (pair >> 8);
277 } 277 }
278 278
279 SkNi thenElse(const SkNi& t, const SkNi& e) const { 279 SkNx thenElse(const SkNx& t, const SkNx& e) const {
280 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec), 280 return _mm_or_si128(_mm_and_si128 (fVec, t.fVec),
281 _mm_andnot_si128(fVec, e.fVec)); 281 _mm_andnot_si128(fVec, e.fVec));
282 } 282 }
283 283
284 __m128i fVec; 284 __m128i fVec;
285 }; 285 };
286 286
287
288 template<>
289 inline SkNx<4, int> SkNx_cast<int, float, 4>(const SkNx<4, float>& src) {
290 return _mm_cvttps_epi32(src.fVec);
291 }
292
287 } // namespace 293 } // namespace
288 294
289 #endif//SkNx_sse_DEFINED 295 #endif//SkNx_sse_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_neon.h ('k') | tests/SkNxTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698