| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
| 9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
| 10 | 10 |
| (...skipping 20 matching lines...) Expand all Loading... |
| 31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ | 31 case 25: return op(v, 25); case 26: return op(v, 26); case 27: return op(v
, 27); \ |
| 32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ | 32 case 28: return op(v, 28); case 29: return op(v, 29); case 30: return op(v
, 30); \ |
| 33 case 31: return op(v, 31); } return fVec | 33 case 31: return op(v, 31); } return fVec |
| 34 | 34 |
| 35 template <> | 35 template <> |
| 36 class SkNf<2> { | 36 class SkNf<2> { |
| 37 public: | 37 public: |
| 38 SkNf(float32x2_t vec) : fVec(vec) {} | 38 SkNf(float32x2_t vec) : fVec(vec) {} |
| 39 | 39 |
| 40 SkNf() {} | 40 SkNf() {} |
| 41 explicit SkNf(float val) : fVec(vdup_n_f32(val)) {} | 41 SkNf(float val) : fVec(vdup_n_f32(val)) {} |
| 42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } | 42 static SkNf Load(const float vals[2]) { return vld1_f32(vals); } |
| 43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } | 43 SkNf(float a, float b) { fVec = (float32x2_t) { a, b }; } |
| 44 | 44 |
| 45 void store(float vals[2]) const { vst1_f32(vals, fVec); } | 45 void store(float vals[2]) const { vst1_f32(vals, fVec); } |
| 46 | 46 |
| 47 SkNf approxInvert() const { | 47 SkNf approxInvert() const { |
| 48 float32x2_t est0 = vrecpe_f32(fVec), | 48 float32x2_t est0 = vrecpe_f32(fVec), |
| 49 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); | 49 est1 = vmul_f32(vrecps_f32(est0, fVec), est0); |
| 50 return est1; | 50 return est1; |
| 51 } | 51 } |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 112 | 112 |
| 113 float32x2_t fVec; | 113 float32x2_t fVec; |
| 114 }; | 114 }; |
| 115 | 115 |
| 116 template <> | 116 template <> |
| 117 class SkNi<4, int> { | 117 class SkNi<4, int> { |
| 118 public: | 118 public: |
| 119 SkNi(const int32x4_t& vec) : fVec(vec) {} | 119 SkNi(const int32x4_t& vec) : fVec(vec) {} |
| 120 | 120 |
| 121 SkNi() {} | 121 SkNi() {} |
| 122 explicit SkNi(int val) : fVec(vdupq_n_s32(val)) {} | 122 SkNi(int val) : fVec(vdupq_n_s32(val)) {} |
| 123 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } | 123 static SkNi Load(const int vals[4]) { return vld1q_s32(vals); } |
| 124 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } | 124 SkNi(int a, int b, int c, int d) { fVec = (int32x4_t) { a, b, c, d }; } |
| 125 | 125 |
| 126 void store(int vals[4]) const { vst1q_s32(vals, fVec); } | 126 void store(int vals[4]) const { vst1q_s32(vals, fVec); } |
| 127 | 127 |
| 128 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } | 128 SkNi operator + (const SkNi& o) const { return vaddq_s32(fVec, o.fVec); } |
| 129 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } | 129 SkNi operator - (const SkNi& o) const { return vsubq_s32(fVec, o.fVec); } |
| 130 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } | 130 SkNi operator * (const SkNi& o) const { return vmulq_s32(fVec, o.fVec); } |
| 131 | 131 |
| 132 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } | 132 SkNi operator << (int bits) const { SHIFT32(vshlq_n_s32, fVec, bits); } |
| 133 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } | 133 SkNi operator >> (int bits) const { SHIFT32(vshrq_n_s32, fVec, bits); } |
| 134 | 134 |
| 135 template <int k> int kth() const { | 135 template <int k> int kth() const { |
| 136 SkASSERT(0 <= k && k < 4); | 136 SkASSERT(0 <= k && k < 4); |
| 137 return vgetq_lane_s32(fVec, k&3); | 137 return vgetq_lane_s32(fVec, k&3); |
| 138 } | 138 } |
| 139 | 139 |
| 140 int32x4_t fVec; | 140 int32x4_t fVec; |
| 141 }; | 141 }; |
| 142 | 142 |
| 143 template <> | 143 template <> |
| 144 class SkNf<4> { | 144 class SkNf<4> { |
| 145 public: | 145 public: |
| 146 SkNf(float32x4_t vec) : fVec(vec) {} | 146 SkNf(float32x4_t vec) : fVec(vec) {} |
| 147 | 147 |
| 148 SkNf() {} | 148 SkNf() {} |
| 149 explicit SkNf(float val) : fVec(vdupq_n_f32(val)) {} | 149 SkNf(float val) : fVec(vdupq_n_f32(val)) {} |
| 150 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } | 150 static SkNf Load(const float vals[4]) { return vld1q_f32(vals); } |
| 151 static SkNf FromBytes(const uint8_t vals[4]) { | 151 static SkNf FromBytes(const uint8_t vals[4]) { |
| 152 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); | 152 uint8x8_t fix8 = (uint8x8_t)vld1_dup_u32((const uint32_t*)vals); |
| 153 uint16x8_t fix8_16 = vmovl_u8(fix8); | 153 uint16x8_t fix8_16 = vmovl_u8(fix8); |
| 154 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); | 154 uint32x4_t fix8_32 = vmovl_u16(vget_low_u16(fix8_16)); |
| 155 return SkNf(vcvtq_f32_u32(fix8_32)); | 155 return SkNf(vcvtq_f32_u32(fix8_32)); |
| 156 } | 156 } |
| 157 | 157 |
| 158 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } | 158 SkNf(float a, float b, float c, float d) { fVec = (float32x4_t) { a, b, c, d
}; } |
| 159 | 159 |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 239 | 239 |
| 240 float32x4_t fVec; | 240 float32x4_t fVec; |
| 241 }; | 241 }; |
| 242 | 242 |
| 243 template <> | 243 template <> |
| 244 class SkNi<8, uint16_t> { | 244 class SkNi<8, uint16_t> { |
| 245 public: | 245 public: |
| 246 SkNi(const uint16x8_t& vec) : fVec(vec) {} | 246 SkNi(const uint16x8_t& vec) : fVec(vec) {} |
| 247 | 247 |
| 248 SkNi() {} | 248 SkNi() {} |
| 249 explicit SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {} | 249 SkNi(uint16_t val) : fVec(vdupq_n_u16(val)) {} |
| 250 static SkNi Load(const uint16_t vals[8]) { return vld1q_u16(vals); } | 250 static SkNi Load(const uint16_t vals[8]) { return vld1q_u16(vals); } |
| 251 | 251 |
| 252 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, | 252 SkNi(uint16_t a, uint16_t b, uint16_t c, uint16_t d, |
| 253 uint16_t e, uint16_t f, uint16_t g, uint16_t h) { | 253 uint16_t e, uint16_t f, uint16_t g, uint16_t h) { |
| 254 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h }; | 254 fVec = (uint16x8_t) { a,b,c,d, e,f,g,h }; |
| 255 } | 255 } |
| 256 | 256 |
| 257 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); } | 257 void store(uint16_t vals[8]) const { vst1q_u16(vals, fVec); } |
| 258 | 258 |
| 259 SkNi operator + (const SkNi& o) const { return vaddq_u16(fVec, o.fVec); } | 259 SkNi operator + (const SkNi& o) const { return vaddq_u16(fVec, o.fVec); } |
| (...skipping 16 matching lines...) Expand all Loading... |
| 276 | 276 |
| 277 uint16x8_t fVec; | 277 uint16x8_t fVec; |
| 278 }; | 278 }; |
| 279 | 279 |
| 280 template <> | 280 template <> |
| 281 class SkNi<16, uint8_t> { | 281 class SkNi<16, uint8_t> { |
| 282 public: | 282 public: |
| 283 SkNi(const uint8x16_t& vec) : fVec(vec) {} | 283 SkNi(const uint8x16_t& vec) : fVec(vec) {} |
| 284 | 284 |
| 285 SkNi() {} | 285 SkNi() {} |
| 286 explicit SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {} | 286 SkNi(uint8_t val) : fVec(vdupq_n_u8(val)) {} |
| 287 static SkNi Load(const uint8_t vals[16]) { return vld1q_u8(vals); } | 287 static SkNi Load(const uint8_t vals[16]) { return vld1q_u8(vals); } |
| 288 | 288 |
| 289 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, | 289 SkNi(uint8_t a, uint8_t b, uint8_t c, uint8_t d, |
| 290 uint8_t e, uint8_t f, uint8_t g, uint8_t h, | 290 uint8_t e, uint8_t f, uint8_t g, uint8_t h, |
| 291 uint8_t i, uint8_t j, uint8_t k, uint8_t l, | 291 uint8_t i, uint8_t j, uint8_t k, uint8_t l, |
| 292 uint8_t m, uint8_t n, uint8_t o, uint8_t p) { | 292 uint8_t m, uint8_t n, uint8_t o, uint8_t p) { |
| 293 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p }; | 293 fVec = (uint8x16_t) { a,b,c,d, e,f,g,h, i,j,k,l, m,n,o,p }; |
| 294 } | 294 } |
| 295 | 295 |
| 296 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } | 296 void store(uint8_t vals[16]) const { vst1q_u8(vals, fVec); } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 315 uint8x16_t fVec; | 315 uint8x16_t fVec; |
| 316 }; | 316 }; |
| 317 | 317 |
| 318 #undef SHIFT32 | 318 #undef SHIFT32 |
| 319 #undef SHIFT16 | 319 #undef SHIFT16 |
| 320 #undef SHIFT8 | 320 #undef SHIFT8 |
| 321 | 321 |
| 322 } // namespace | 322 } // namespace |
| 323 | 323 |
| 324 #endif//SkNx_neon_DEFINED | 324 #endif//SkNx_neon_DEFINED |
| OLD | NEW |