src/opts/SkNx_neon.h - Issue 1411563008: prune unused SkNx features

Side by Side Diff: src/opts/SkNx_neon.h

Issue 1411563008: prune unused SkNx features (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: 0.001f Created 5 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2015 Google Inc.	2 * Copyright 2015 Google Inc.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license that can be	4 * Use of this source code is governed by a BSD-style license that can be

5 * found in the LICENSE file.	5 * found in the LICENSE file.

6 */	6 */

7	7

8 #ifndef SkNx_neon_DEFINED	8 #ifndef SkNx_neon_DEFINED

9 #define SkNx_neon_DEFINED	9 #define SkNx_neon_DEFINED

10	10

(...skipping 15 matching lines...) Expand all Loading...
26	26

27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \	27 #define SHIFT32(op, v, bits) if (bits < 16) { SHIFT16(op, v, bits); } switch(bit s) { \

28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \	28 case 16: return op(v, 16); case 17: return op(v, 17); case 18: return op(v , 18); \

29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \	29 case 19: return op(v, 19); case 20: return op(v, 20); case 21: return op(v , 21); \

30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \	30 case 22: return op(v, 22); case 23: return op(v, 23); case 24: return op(v , 24); \

31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \	31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v , 27); \

32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \	32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v , 30); \

33 case 31: return op(v, 31); } return fVec	33 case 31: return op(v, 31); } return fVec

34	34

35 template <>	35 template <>

36 class SkNf<2, float> {	36 class SkNf<2> {

37 public:	37 public:

38 SkNf(float32x2_t vec) : fVec(vec) {}	38 SkNf(float32x2_t vec) : fVec(vec) {}

39	39

40 SkNf() {}	40 SkNf() {}

41 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {}	41 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {}

42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); }	42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); }

43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; }	43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; }

44	44

45 void store(float vals[2]) const { vst1_f32(vals, fVec); }	45 void store(float vals[2]) const { vst1_f32(vals, fVec); }

46	46

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
106 return vget_lane_u32(v,0) && vget_lane_u32(v,1);	106 return vget_lane_u32(v,0) && vget_lane_u32(v,1);

107 }	107 }

108 bool anyTrue() const {	108 bool anyTrue() const {

109 auto v = vreinterpret_u32_f32(fVec);	109 auto v = vreinterpret_u32_f32(fVec);

110 return vget_lane_u32(v,0) \|\| vget_lane_u32(v,1);	110 return vget_lane_u32(v,0) \|\| vget_lane_u32(v,1);

111 }	111 }

112	112

113 float32x2_t fVec;	113 float32x2_t fVec;

114 };	114 };

115	115

116 #if defined(SK_CPU_ARM64)

117 template <>

118 class SkNf<2, double> {

119 public:

120 SkNf(float64x2_t vec) : fVec(vec) {}

121

122 SkNf() {}

123 explicit SkNf(double val) : fVec(vdupq_n_f64(val)) {}

124 static SkNf Load(const double vals[2]) { return vld1q_f64(vals); }

125 SkNf(double a, double b) { fVec = (float64x2_t) { a, b }; }

126

127 void store(double vals[2]) const { vst1q_f64(vals, fVec); }

128

129 SkNf operator + (const SkNf& o) const { return vaddq_f64(fVec, o.fVec); }

130 SkNf operator - (const SkNf& o) const { return vsubq_f64(fVec, o.fVec); }

131 SkNf operator * (const SkNf& o) const { return vmulq_f64(fVec, o.fVec); }

132 SkNf operator / (const SkNf& o) const { return vdivq_f64(fVec, o.fVec); }

133

134 // vreinterpretq_f64_u64 and vreinterpretq_f64_u32 don't seem to exist.... weird.

135 SkNf operator==(const SkNf& o) const { return (float64x2_t)(vceqq_f64(fVec, o.fVec)); }

136 SkNf operator <(const SkNf& o) const { return (float64x2_t)(vcltq_f64(fVec, o.fVec)); }

137 SkNf operator >(const SkNf& o) const { return (float64x2_t)(vcgtq_f64(fVec, o.fVec)); }

138 SkNf operator<=(const SkNf& o) const { return (float64x2_t)(vcleq_f64(fVec, o.fVec)); }

139 SkNf operator>=(const SkNf& o) const { return (float64x2_t)(vcgeq_f64(fVec, o.fVec)); }

140 SkNf operator != (const SkNf& o) const {

141 return (float64x2_t)(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(fVec, o.f Vec))));

142 }

143

144 static SkNf Min(const SkNf& l, const SkNf& r) { return vminq_f64(l.fVec, r.f Vec); }

145 static SkNf Max(const SkNf& l, const SkNf& r) { return vmaxq_f64(l.fVec, r.f Vec); }

146

147 SkNf sqrt() const { return vsqrtq_f64(fVec); }

148

149 SkNf rsqrt0() const { return vrsqrteq_f64(fVec); }

150 SkNf rsqrt1() const {

151 float64x2_t est0 = this->rsqrt0().fVec;

152 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est0, est0)), est0);

153 }

154 SkNf rsqrt2() const {

155 float64x2_t est1 = this->rsqrt1().fVec;

156 return vmulq_f64(vrsqrtsq_f64(fVec, vmulq_f64(est1, est1)), est1);

157 }

158

159 SkNf approxInvert() const {

160 float64x2_t est0 = vrecpeq_f64(fVec),

161 est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);

162 return est1;

163 }

164

165 SkNf invert() const {

166 float64x2_t est1 = this->approxInvert().fVec,

167 est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),

168 est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);

169 return est3;

170 }

171

172 template <int k> double kth() const {

173 SkASSERT(0 <= k && k < 2);

174 return vgetq_lane_f64(fVec, k&1);

175 }

176

177 // vreinterpretq_u64_f64 doesn't seem to exist.... weird.

178 bool allTrue() const {

179 auto v = (uint64x2_t)(fVec);

180 return vgetq_lane_u64(v,0) && vgetq_lane_u64(v,1);

181 }

182 bool anyTrue() const {

183 auto v = (uint64x2_t)(fVec);

184 return vgetq_lane_u64(v,0) \|\| vgetq_lane_u64(v,1);

185 }

186

187 float64x2_t fVec;

188 };

189 #endif//defined(SK_CPU_ARM64)

190

191 template <>	116 template <>

192 class SkNi<4, int> {	117 class SkNi<4, int> {

193 public:	118 public:

194 SkNi(const int32x4_t& vec) : fVec(vec) {}	119 SkNi(const int32x4_t& vec) : fVec(vec) {}

195	120

196 SkNi() {}	121 SkNi() {}

197 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {}	122 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {}

198 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); }	123 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); }

199 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }	124 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; }

200	125

201 void store(int vals[4]) const { vst1q_s32(vals, fVec); }	126 void store(int vals[4]) const { vst1q_s32(vals, fVec); }

202	127

203 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); }	128 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); }

204 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); }	129 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); }

205 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); }	130 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); }

206	131

207 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }	132 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); }

208 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }	133 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); }

209	134

210 template <int k> int kth() const {	135 template <int k> int kth() const {

211 SkASSERT(0 <= k && k < 4);	136 SkASSERT(0 <= k && k < 4);

212 return vgetq_lane_s32(fVec, k&3);	137 return vgetq_lane_s32(fVec, k&3);

213 }	138 }

214	139

215 int32x4_t fVec;	140 int32x4_t fVec;

216 };	141 };

217	142

218 template <>	143 template <>

219 class SkNf<4, float> {	144 class SkNf<4> {

220 public:	145 public:

221 SkNf(float32x4_t vec) : fVec(vec) {}	146 SkNf(float32x4_t vec) : fVec(vec) {}

222	147

223 SkNf() {}	148 SkNf() {}

224 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {}	149 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {}

225 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); }	150 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); }

226 static SkNf FromBytes(const uint8_t vals[4]) {	151 static SkNf FromBytes(const uint8_t vals[4]) {

227 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);	152 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals);

228 uint16x8_t fix8_16 = vmovl_u8(fix8);	153 uint16x8_t fix8_16 = vmovl_u8(fix8);

229 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));	154 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16));

230 return SkNf(vcvtq_f32_u32(fix8_32));	155 return SkNf(vcvtq_f32_u32(fix8_32));

231 }	156 }

232	157

233 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }	158 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d }; }

234	159

235 void store(float vals[4]) const { vst1q_f32(vals, fVec); }	160 void store(float vals[4]) const { vst1q_f32(vals, fVec); }

236 void toBytes(uint8_t bytes[4]) const {	161 void toBytes(uint8_t bytes[4]) const {

237 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec);	162 uint32x4_t fix8_32 = vcvtq_u32_f32(fVec);

238 uint16x4_t fix8_16 = vqmovn_u32(fix8_32);	163 uint16x4_t fix8_16 = vqmovn_u32(fix8_32);

239 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));	164 uint8x8_t fix8 = vqmovn_u16(vcombine_u16(fix8_16, vdup_n_u16(0)));

240 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0);	165 vst1_lane_u32((uint32_t*)bytes, (uint32x2_t)fix8, 0);

241 }	166 }

242	167

243 SkNi<4, int> castTrunc() const { return vcvtq_s32_f32(fVec); }

244

245 SkNf approxInvert() const {	168 SkNf approxInvert() const {

246 float32x4_t est0 = vrecpeq_f32(fVec),	169 float32x4_t est0 = vrecpeq_f32(fVec),

247 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);	170 est1 = vmulq_f32(vrecpsq_f32(est0, fVec), est0);

248 return est1;	171 return est1;

249 }	172 }

250 SkNf invert() const {	173 SkNf invert() const {

251 float32x4_t est1 = this->approxInvert().fVec,	174 float32x4_t est1 = this->approxInvert().fVec,

252 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);	175 est2 = vmulq_f32(vrecpsq_f32(est1, fVec), est1);

253 return est2;	176 return est2;

254 }	177 }

(...skipping 137 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
392 uint8x16_t fVec;	315 uint8x16_t fVec;

393 };	316 };

394	317

395 #undef SHIFT32	318 #undef SHIFT32

396 #undef SHIFT16	319 #undef SHIFT16

397 #undef SHIFT8	320 #undef SHIFT8

398	321

399 } // namespace	322 } // namespace

400	323

401 #endif//SkNx_neon_DEFINED	324 #endif//SkNx_neon_DEFINED

OLD	NEW

« no previous file with comments | « src/effects/gradients/SkRadialGradient.cpp ('k') | src/opts/SkNx_sse.h » ('j') | no next file with comments »