OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2015 Google Inc. | 2 * Copyright 2015 Google Inc. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
6 */ | 6 */ |
7 | 7 |
8 #ifndef SkNx_neon_DEFINED | 8 #ifndef SkNx_neon_DEFINED |
9 #define SkNx_neon_DEFINED | 9 #define SkNx_neon_DEFINED |
10 | 10 |
(...skipping 278 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
289 SkNx thenElse(const SkNx& t, const SkNx& e) const { | 289 SkNx thenElse(const SkNx& t, const SkNx& e) const { |
290 return vbslq_u16(fVec, t.fVec, e.fVec); | 290 return vbslq_u16(fVec, t.fVec, e.fVec); |
291 } | 291 } |
292 | 292 |
293 uint16x8_t fVec; | 293 uint16x8_t fVec; |
294 }; | 294 }; |
295 | 295 |
296 template <> | 296 template <> |
297 class SkNx<4, uint8_t> { | 297 class SkNx<4, uint8_t> { |
298 public: | 298 public: |
| 299 typedef uint32_t __attribute__((aligned(1))) unaligned_uint32_t; |
| 300 |
299 SkNx(const uint8x8_t& vec) : fVec(vec) {} | 301 SkNx(const uint8x8_t& vec) : fVec(vec) {} |
300 | 302 |
301 SkNx() {} | 303 SkNx() {} |
302 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { | 304 SkNx(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { |
303 fVec = (uint8x8_t){a,b,c,d, 0,0,0,0}; | 305 fVec = (uint8x8_t){a,b,c,d, 0,0,0,0}; |
304 } | 306 } |
305 static SkNx Load(const void* ptr) { | 307 static SkNx Load(const void* ptr) { |
306 return (uint8x8_t)vld1_dup_u32((const uint32_t*)ptr); | 308 return (uint8x8_t)vld1_dup_u32((const unaligned_uint32_t*)ptr); |
307 } | 309 } |
308 void store(void* ptr) const { | 310 void store(void* ptr) const { |
309 return vst1_lane_u32((uint32_t*)ptr, (uint32x2_t)fVec, 0); | 311 return vst1_lane_u32((unaligned_uint32_t*)ptr, (uint32x2_t)fVec, 0); |
310 } | 312 } |
311 uint8_t operator[](int k) const { | 313 uint8_t operator[](int k) const { |
312 SkASSERT(0 <= k && k < 4); | 314 SkASSERT(0 <= k && k < 4); |
313 union { uint8x8_t v; uint8_t us[8]; } pun = {fVec}; | 315 union { uint8x8_t v; uint8_t us[8]; } pun = {fVec}; |
314 return pun.us[k&3]; | 316 return pun.us[k&3]; |
315 } | 317 } |
316 | 318 |
317 // TODO as needed | 319 // TODO as needed |
318 | 320 |
319 uint8x8_t fVec; | 321 uint8x8_t fVec; |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
496 uint16x4x4_t rgba = {{ | 498 uint16x4x4_t rgba = {{ |
497 r.fVec, | 499 r.fVec, |
498 g.fVec, | 500 g.fVec, |
499 b.fVec, | 501 b.fVec, |
500 a.fVec, | 502 a.fVec, |
501 }}; | 503 }}; |
502 vst4_u16((uint16_t*) dst, rgba); | 504 vst4_u16((uint16_t*) dst, rgba); |
503 } | 505 } |
504 | 506 |
505 #endif//SkNx_neon_DEFINED | 507 #endif//SkNx_neon_DEFINED |
OLD | NEW |