Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1464623002: Add SkNx_cast(). (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: derp, is stands for _ints_ Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkNx_avx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2015 Google Inc. 2 * Copyright 2015 Google Inc.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license that can be 4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file. 5 * found in the LICENSE file.
6 */ 6 */
7 7
8 #ifndef SkNx_neon_DEFINED 8 #ifndef SkNx_neon_DEFINED
9 #define SkNx_neon_DEFINED 9 #define SkNx_neon_DEFINED
10 10
(...skipping 15 matching lines...) Expand all
26 26
27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \ 27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \
28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \ 28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \
29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \ 29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \
30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \ 30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \
31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \ 31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \
32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \ 32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \
33 case 31: return op(v, 31); } return fVec 33 case 31: return op(v, 31); } return fVec
34 34
35 template <> 35 template <>
36 class SkNf<2> { 36 class SkNx<2, float> {
37 public: 37 public:
38 SkNf(float32x2_t vec) : fVec(vec) {} 38 SkNx(float32x2_t vec) : fVec(vec) {}
39 39
40 SkNf() {} 40 SkNx() {}
41 SkNf(float val) : fVec(vdup_n_f32(val)) {} 41 SkNx(float val) : fVec(vdup_n_f32(val)) {}
42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } 42 static SkNx Load(const float vals[2]) { return vld1_f32(vals); }
43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } 43 SkNx(float a, float b) { fVec = (float32x2_t) { a, b }; }
44 44
45 void store(float vals[2]) const { vst1_f32(vals, fVec); } 45 void store(float vals[2]) const { vst1_f32(vals, fVec); }
46 46
47 SkNf approxInvert() const { 47 SkNx approxInvert() const {
48 float32x2_t est0 = vrecpe_f32(fVec), 48 float32x2_t est0 = vrecpe_f32(fVec),
49 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); 49 est1 = vmul_f32(vrecps_f32(est0, fVec), est0);
50 return est1; 50 return est1;
51 } 51 }
52 SkNf invert() const { 52 SkNx invert() const {
53 float32x2_t est1 = this->approxInvert().fVec, 53 float32x2_t est1 = this->approxInvert().fVec,
54 est2 = vmul_f32(vrecps_f32(est1, fVec), est1); 54 est2 = vmul_f32(vrecps_f32(est1, fVec), est1);
55 return est2; 55 return est2;
56 } 56 }
57 57
58 SkNf operator + (const SkNf& o) const { return vadd_f32(fVec, o.fVec); } 58 SkNx operator + (const SkNx& o) const { return vadd_f32(fVec, o.fVec); }
59 SkNf operator - (const SkNf& o) const { return vsub_f32(fVec, o.fVec); } 59 SkNx operator - (const SkNx& o) const { return vsub_f32(fVec, o.fVec); }
60 SkNf operator * (const SkNf& o) const { return vmul_f32(fVec, o.fVec); } 60 SkNx operator * (const SkNx& o) const { return vmul_f32(fVec, o.fVec); }
61 SkNf operator / (const SkNf& o) const { 61 SkNx operator / (const SkNx& o) const {
62 #if defined(SK_CPU_ARM64) 62 #if defined(SK_CPU_ARM64)
63 return vdiv_f32(fVec, o.fVec); 63 return vdiv_f32(fVec, o.fVec);
64 #else 64 #else
65 return vmul_f32(fVec, o.invert().fVec); 65 return vmul_f32(fVec, o.invert().fVec);
66 #endif 66 #endif
67 } 67 }
68 68
69 SkNf operator == (const SkNf& o) const { return vreinterpret_f32_u32(vceq_f3 2(fVec, o.fVec)); } 69 SkNx operator == (const SkNx& o) const { return vreinterpret_f32_u32(vceq_f3 2(fVec, o.fVec)); }
70 SkNf operator < (const SkNf& o) const { return vreinterpret_f32_u32(vclt_f3 2(fVec, o.fVec)); } 70 SkNx operator < (const SkNx& o) const { return vreinterpret_f32_u32(vclt_f3 2(fVec, o.fVec)); }
71 SkNf operator > (const SkNf& o) const { return vreinterpret_f32_u32(vcgt_f3 2(fVec, o.fVec)); } 71 SkNx operator > (const SkNx& o) const { return vreinterpret_f32_u32(vcgt_f3 2(fVec, o.fVec)); }
72 SkNf operator <= (const SkNf& o) const { return vreinterpret_f32_u32(vcle_f3 2(fVec, o.fVec)); } 72 SkNx operator <= (const SkNx& o) const { return vreinterpret_f32_u32(vcle_f3 2(fVec, o.fVec)); }
73 SkNf operator >= (const SkNf& o) const { return vreinterpret_f32_u32(vcge_f3 2(fVec, o.fVec)); } 73 SkNx operator >= (const SkNx& o) const { return vreinterpret_f32_u32(vcge_f3 2(fVec, o.fVec)); }
74 SkNf operator != (const SkNf& o) const { 74 SkNx operator != (const SkNx& o) const {
75 return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec))); 75 return vreinterpret_f32_u32(vmvn_u32(vceq_f32(fVec, o.fVec)));
76 } 76 }
77 77
78 static SkNf Min(const SkNf& l, const SkNf& r) { return vmin_f32(l.fVec, r.fV ec); } 78 static SkNx Min(const SkNx& l, const SkNx& r) { return vmin_f32(l.fVec, r.fV ec); }
79 static SkNf Max(const SkNf& l, const SkNf& r) { return vmax_f32(l.fVec, r.fV ec); } 79 static SkNx Max(const SkNx& l, const SkNx& r) { return vmax_f32(l.fVec, r.fV ec); }
80 80
81 SkNf rsqrt0() const { return vrsqrte_f32(fVec); } 81 SkNx rsqrt0() const { return vrsqrte_f32(fVec); }
82 SkNf rsqrt1() const { 82 SkNx rsqrt1() const {
83 float32x2_t est0 = this->rsqrt0().fVec; 83 float32x2_t est0 = this->rsqrt0().fVec;
84 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0); 84 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est0, est0)), est0);
85 } 85 }
86 SkNf rsqrt2() const { 86 SkNx rsqrt2() const {
87 float32x2_t est1 = this->rsqrt1().fVec; 87 float32x2_t est1 = this->rsqrt1().fVec;
88 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1); 88 return vmul_f32(vrsqrts_f32(fVec, vmul_f32(est1, est1)), est1);
89 } 89 }
90 90
91 SkNf sqrt() const { 91 SkNx sqrt() const {
92 #if defined(SK_CPU_ARM64) 92 #if defined(SK_CPU_ARM64)
93 return vsqrt_f32(fVec); 93 return vsqrt_f32(fVec);
94 #else 94 #else
95 return *this * this->rsqrt2(); 95 return *this * this->rsqrt2();
96 #endif 96 #endif
97 } 97 }
98 98
99 template <int k> float kth() const { 99 template <int k> float kth() const {
100 SkASSERT(0 <= k && k < 2); 100 SkASSERT(0 <= k && k < 2);
101 return vget_lane_f32(fVec, k&1); 101 return vget_lane_f32(fVec, k&1);
102 } 102 }
103 103
104 bool allTrue() const { 104 bool allTrue() const {
105 auto v = vreinterpret_u32_f32(fVec); 105 auto v = vreinterpret_u32_f32(fVec);
106 return vget_lane_u32(v,0) && vget_lane_u32(v,1); 106 return vget_lane_u32(v,0) && vget_lane_u32(v,1);
107 } 107 }
108 bool anyTrue() const { 108 bool anyTrue() const {
109 auto v = vreinterpret_u32_f32(fVec); 109 auto v = vreinterpret_u32_f32(fVec);
110 return vget_lane_u32(v,0) || vget_lane_u32(v,1); 110 return vget_lane_u32(v,0) || vget_lane_u32(v,1);
111 } 111 }
112 112
113 float32x2_t fVec; 113 float32x2_t fVec;
114 }; 114 };
115 115
116 template <> 116 template <>
117 class SkNi<4, int> { 117 class SkNx<4, int> {
118 public: 118 public:
119 SkNi(const int32x4_t& vec) : fVec(vec) {} 119 SkNx(const int32x4_t& vec) : fVec(vec) {}
120 120
121 SkNi() {} 121 SkNx() {}
122 SkNi(int val) : fVec(vdupq_n_s32(val)) {} 122 SkNx(int val) : fVec(vdupq_n_s32(val)) {}
123 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } 123 static SkNx Load(const int vals[4]) { return vld1q_s32(vals); }
124 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } 124 SkNx(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }
125 125
126 void store(int vals[4]) const { vst1q_s32(vals, fVec); } 126 void store(int vals[4]) const { vst1q_s32(vals, fVec); }
127 127
128 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } 128 SkNx operator + (const SkNx& o) const { return vaddq_s32(fVec, o.fVec); }
129 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } 129 SkNx operator - (const SkNx& o) const { return vsubq_s32(fVec, o.fVec); }
130 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } 130 SkNx operator * (const SkNx& o) const { return vmulq_s32(fVec, o.fVec); }
131 131
132 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } 132 SkNx operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }
133 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } 133 SkNx operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }
134 134
135 template <int k> int kth() const { 135 template <int k> int kth() const {
136 SkASSERT(0 <= k && k < 4); 136 SkASSERT(0 <= k && k < 4);
137 return vgetq_lane_s32(fVec, k&3); 137 return vgetq_lane_s32(fVec, k&3);
138 } 138 }
139 139
140 int32x4_t fVec; 140 int32x4_t fVec;
141 }; 141 };
142 142
143 template <> 143 template <>
144 class SkNf<4> { 144 class SkNx<4, float> {
145 public: 145 public:
146 SkNf(float32x4_t vec) : fVec(vec) {} 146 SkNx(float32x4_t vec) : fVec(vec) {}
147 147
148 SkNf() {} 148 SkNx() {}
149 SkNf(float val) : fVec(vdupq_n_f32(val)) {} 149 SkNx(float val) : fVec(vdupq_n_f32(val)) {}
150 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } 150 static SkNx Load(const float vals[4]) { return vld1q_f32(vals); }
151 static SkNf FromBytes(const uint8_t vals[4]) { 151 static SkNx FromBytes(const uint8_t vals[4]) {
152 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); 152 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);
153 uint16x8_t fix8_16 = vmovl_u8(fix8); 153 uint16x8_t fix8_16 = vmovl_u8(fix8);
154 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); 154 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));
155 return SkNf(vcvtq_f32_u32(fix8_32)); 155 return SkNx(vcvtq_f32_u32(fix8_32));
156 } 156 }
157 157
158 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; } 158 SkNx(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }
159 159
160 void store(float vals[4]) const { vst1q_f32(vals, fVec); } 160 void store(float vals[4]) const { vst1q_f32(vals, fVec); }
161 void toBytes(uint8_t bytes[4]) const { 161 void toBytes(uint8_t bytes[4]) const {
162 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec); 162 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec);
163 uint16x4_t fix8_16 = vqmovn_u32(fix8_32); 163 uint16x4_t fix8_16 = vqmovn_u32(fix8_32);
164 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0))); 164 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));
165 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0); 165 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0);
166 } 166 }
167 167
168 SkNf approxInvert() const { 168 SkNx approxInvert() const {
169 float32x4_t est0 = vrecpeq_f32(fVec), 169 float32x4_t est0 = vrecpeq_f32(fVec),
170 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0); 170 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);
171 return est1; 171 return est1;
172 } 172 }
173 SkNf invert() const { 173 SkNx invert() const {
174 float32x4_t est1 = this->approxInvert().fVec, 174 float32x4_t est1 = this->approxInvert().fVec,
175 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1); 175 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);
176 return est2; 176 return est2;
177 } 177 }
178 178
179 SkNf operator + (const SkNf& o) const { return vaddq_f32(fVec, o.fVec); } 179 SkNx operator + (const SkNx& o) const { return vaddq_f32(fVec, o.fVec); }
180 SkNf operator - (const SkNf& o) const { return vsubq_f32(fVec, o.fVec); } 180 SkNx operator - (const SkNx& o) const { return vsubq_f32(fVec, o.fVec); }
181 SkNf operator * (const SkNf& o) const { return vmulq_f32(fVec, o.fVec); } 181 SkNx operator * (const SkNx& o) const { return vmulq_f32(fVec, o.fVec); }
182 SkNf operator / (const SkNf& o) const { 182 SkNx operator / (const SkNx& o) const {
183 #if defined(SK_CPU_ARM64) 183 #if defined(SK_CPU_ARM64)
184 return vdivq_f32(fVec, o.fVec); 184 return vdivq_f32(fVec, o.fVec);
185 #else 185 #else
186 return vmulq_f32(fVec, o.invert().fVec); 186 return vmulq_f32(fVec, o.invert().fVec);
187 #endif 187 #endif
188 } 188 }
189 189
190 SkNf operator==(const SkNf& o) const { return vreinterpretq_f32_u32(vceqq_f3 2(fVec, o.fVec)); } 190 SkNx operator==(const SkNx& o) const { return vreinterpretq_f32_u32(vceqq_f3 2(fVec, o.fVec)); }
191 SkNf operator <(const SkNf& o) const { return vreinterpretq_f32_u32(vcltq_f3 2(fVec, o.fVec)); } 191 SkNx operator <(const SkNx& o) const { return vreinterpretq_f32_u32(vcltq_f3 2(fVec, o.fVec)); }
192 SkNf operator >(const SkNf& o) const { return vreinterpretq_f32_u32(vcgtq_f3 2(fVec, o.fVec)); } 192 SkNx operator >(const SkNx& o) const { return vreinterpretq_f32_u32(vcgtq_f3 2(fVec, o.fVec)); }
193 SkNf operator<=(const SkNf& o) const { return vreinterpretq_f32_u32(vcleq_f3 2(fVec, o.fVec)); } 193 SkNx operator<=(const SkNx& o) const { return vreinterpretq_f32_u32(vcleq_f3 2(fVec, o.fVec)); }
194 SkNf operator>=(const SkNf& o) const { return vreinterpretq_f32_u32(vcgeq_f3 2(fVec, o.fVec)); } 194 SkNx operator>=(const SkNx& o) const { return vreinterpretq_f32_u32(vcgeq_f3 2(fVec, o.fVec)); }
195 SkNf operator!=(const SkNf& o) const { 195 SkNx operator!=(const SkNx& o) const {
196 return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec))); 196 return vreinterpretq_f32_u32(vmvnq_u32(vceqq_f32(fVec, o.fVec)));
197 } 197 }
198 198
199 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f32(l.fVec, r.f Vec); } 199 static SkNx Min(const SkNx& l, const SkNx& r) { return vminq_f32(l.fVec, r.f Vec); }
200 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f32(l.fVec, r.f Vec); } 200 static SkNx Max(const SkNx& l, const SkNx& r) { return vmaxq_f32(l.fVec, r.f Vec); }
201 201
202 SkNf rsqrt0() const { return vrsqrteq_f32(fVec); } 202 SkNx rsqrt0() const { return vrsqrteq_f32(fVec); }
203 SkNf rsqrt1() const { 203 SkNx rsqrt1() const {
204 float32x4_t est0 = this->rsqrt0().fVec; 204 float32x4_t est0 = this->rsqrt0().fVec;
205 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0); 205 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0);
206 } 206 }
207 SkNf rsqrt2() const { 207 SkNx rsqrt2() const {
208 float32x4_t est1 = this->rsqrt1().fVec; 208 float32x4_t est1 = this->rsqrt1().fVec;
209 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1); 209 return vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
210 } 210 }
211 211
212 SkNf sqrt() const { 212 SkNx sqrt() const {
213 #if defined(SK_CPU_ARM64) 213 #if defined(SK_CPU_ARM64)
214 return vsqrtq_f32(fVec); 214 return vsqrtq_f32(fVec);
215 #else 215 #else
216 return *this * this->rsqrt2(); 216 return *this * this->rsqrt2();
217 #endif 217 #endif
218 } 218 }
219 219
220 template <int k> float kth() const { 220 template <int k> float kth() const {
221 SkASSERT(0 <= k && k < 4); 221 SkASSERT(0 <= k && k < 4);
222 return vgetq_lane_f32(fVec, k&3); 222 return vgetq_lane_f32(fVec, k&3);
223 } 223 }
224 224
225 bool allTrue() const { 225 bool allTrue() const {
226 auto v = vreinterpretq_u32_f32(fVec); 226 auto v = vreinterpretq_u32_f32(fVec);
227 return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1) 227 return vgetq_lane_u32(v,0) && vgetq_lane_u32(v,1)
228 && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3); 228 && vgetq_lane_u32(v,2) && vgetq_lane_u32(v,3);
229 } 229 }
230 bool anyTrue() const { 230 bool anyTrue() const {
231 auto v = vreinterpretq_u32_f32(fVec); 231 auto v = vreinterpretq_u32_f32(fVec);
232 return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1) 232 return vgetq_lane_u32(v,0) || vgetq_lane_u32(v,1)
233 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3); 233 || vgetq_lane_u32(v,2) || vgetq_lane_u32(v,3);
234 } 234 }
235 235
236 SkNf thenElse(const SkNf& t, const SkNf& e) const { 236 SkNx thenElse(const SkNx& t, const SkNx& e) const {
237 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec); 237 return vbslq_f32(vreinterpretq_u32_f32(fVec), t.fVec, e.fVec);
238 } 238 }
239 239
240 float32x4_t fVec; 240 float32x4_t fVec;
241 }; 241 };
242 242
243 template <> 243 template <>
244 class SkNi<8, uint16_t> { 244 class SkNx<8, uint16_t> {
245 public: 245 public:
246 SkNi(const uint16x8_t& vec) : fVec(vec) {} 246 SkNx(const uint16x8_t& vec) : fVec(vec) {}
247 247
248 SkNi() {} 248 SkNx() {}
249 SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {} 249 SkNx(uint16_t val) : fVec(vdupq_n_u16(val)) {}
250 static SkNi Load(const uint16_t vals[8]) { return vld1q_u16(vals); } 250 static SkNx Load(const uint16_t vals[8]) { return vld1q_u16(vals); }
251 251
252 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, 252 SkNx(uint16_t a, uint16_t b, uint16_t c, uint16_t d,
253 uint16_t e, uint16_t f, uint16_t g, uint16_t h) { 253 uint16_t e, uint16_t f, uint16_t g, uint16_t h) {
254 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h }; 254 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h };
255 } 255 }
256 256
257 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); } 257 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); }
258 258
259 SkNi operator + (const SkNi& o) const { return vaddq_u16(fVec, o.fVec); } 259 SkNx operator + (const SkNx& o) const { return vaddq_u16(fVec, o.fVec); }
260 SkNi operator - (const SkNi& o) const { return vsubq_u16(fVec, o.fVec); } 260 SkNx operator - (const SkNx& o) const { return vsubq_u16(fVec, o.fVec); }
261 SkNi operator * (const SkNi& o) const { return vmulq_u16(fVec, o.fVec); } 261 SkNx operator * (const SkNx& o) const { return vmulq_u16(fVec, o.fVec); }
262 262
263 SkNi operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); } 263 SkNx operator << (int bits) const { SHIFT16(vshlq_n_u16, fVec, bits); }
264 SkNi operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); } 264 SkNx operator >> (int bits) const { SHIFT16(vshrq_n_u16, fVec, bits); }
265 265
266 static SkNi Min(const SkNi& a, const SkNi& b) { return vminq_u16(a.fVec, b.f Vec); } 266 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u16(a.fVec, b.f Vec); }
267 267
268 template <int k> uint16_t kth() const { 268 template <int k> uint16_t kth() const {
269 SkASSERT(0 <= k && k < 8); 269 SkASSERT(0 <= k && k < 8);
270 return vgetq_lane_u16(fVec, k&7); 270 return vgetq_lane_u16(fVec, k&7);
271 } 271 }
272 272
273 SkNi thenElse(const SkNi& t, const SkNi& e) const { 273 SkNx thenElse(const SkNx& t, const SkNx& e) const {
274 return vbslq_u16(fVec, t.fVec, e.fVec); 274 return vbslq_u16(fVec, t.fVec, e.fVec);
275 } 275 }
276 276
277 uint16x8_t fVec; 277 uint16x8_t fVec;
278 }; 278 };
279 279
280 template <> 280 template <>
281 class SkNi<16, uint8_t> { 281 class SkNx<16, uint8_t> {
282 public: 282 public:
283 SkNi(const uint8x16_t& vec) : fVec(vec) {} 283 SkNx(const uint8x16_t& vec) : fVec(vec) {}
284 284
285 SkNi() {} 285 SkNx() {}
286 SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {} 286 SkNx(uint8_t val) : fVec(vdupq_n_u8(val)) {}
287 static SkNi Load(const uint8_t vals[16]) { return vld1q_u8(vals); } 287 static SkNx Load(const uint8_t vals[16]) { return vld1q_u8(vals); }
288 288
289 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, 289 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d,
290 uint8_t e, uint8_t f, uint8_t g, uint8_t h, 290 uint8_t e, uint8_t f, uint8_t g, uint8_t h,
291 uint8_t i, uint8_t j, uint8_t k, uint8_t l, 291 uint8_t i, uint8_t j, uint8_t k, uint8_t l,
292 uint8_t m, uint8_t n, uint8_t o, uint8_t p) { 292 uint8_t m, uint8_t n, uint8_t o, uint8_t p) {
293 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p }; 293 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p };
294 } 294 }
295 295
296 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } 296 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); }
297 297
298 SkNi saturatedAdd(const SkNi& o) const { return vqaddq_u8(fVec, o.fVec); } 298 SkNx saturatedAdd(const SkNx& o) const { return vqaddq_u8(fVec, o.fVec); }
299 299
300 SkNi operator + (const SkNi& o) const { return vaddq_u8(fVec, o.fVec); } 300 SkNx operator + (const SkNx& o) const { return vaddq_u8(fVec, o.fVec); }
301 SkNi operator - (const SkNi& o) const { return vsubq_u8(fVec, o.fVec); } 301 SkNx operator - (const SkNx& o) const { return vsubq_u8(fVec, o.fVec); }
302 302
303 static SkNi Min(const SkNi& a, const SkNi& b) { return vminq_u8(a.fVec, b.fV ec); } 303 static SkNx Min(const SkNx& a, const SkNx& b) { return vminq_u8(a.fVec, b.fV ec); }
304 SkNi operator < (const SkNi& o) const { return vcltq_u8(fVec, o.fVec); } 304 SkNx operator < (const SkNx& o) const { return vcltq_u8(fVec, o.fVec); }
305 305
306 template <int k> uint8_t kth() const { 306 template <int k> uint8_t kth() const {
307 SkASSERT(0 <= k && k < 15); 307 SkASSERT(0 <= k && k < 15);
308 return vgetq_lane_u8(fVec, k&16); 308 return vgetq_lane_u8(fVec, k&16);
309 } 309 }
310 310
311 SkNi thenElse(const SkNi& t, const SkNi& e) const { 311 SkNx thenElse(const SkNx& t, const SkNx& e) const {
312 return vbslq_u8(fVec, t.fVec, e.fVec); 312 return vbslq_u8(fVec, t.fVec, e.fVec);
313 } 313 }
314 314
315 uint8x16_t fVec; 315 uint8x16_t fVec;
316 }; 316 };
317 317
318 #undef SHIFT32 318 #undef SHIFT32
319 #undef SHIFT16 319 #undef SHIFT16
320 #undef SHIFT8 320 #undef SHIFT8
321 321
322 template<>
323 inline SkNx<4, int> SkNx_cast<int, float, 4>(const SkNx<4, float>& src) {
324 return vcvtq_s32_f32(src.fVec);
325 }
326
322 } // namespace 327 } // namespace
323 328
324 #endif//SkNx_neon_DEFINED 329 #endif//SkNx_neon_DEFINED
OLDNEW
« no previous file with comments | « src/opts/SkNx_avx.h ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698