| OLD | NEW | 
|    1 // It is important _not_ to put header guards here. |    1 // It is important _not_ to put header guards here. | 
|    2 // This file will be intentionally included three times. |    2 // This file will be intentionally included three times. | 
|    3  |    3  | 
|    4 #include "SkTypes.h"  // Keep this before any #ifdef for skbug.com/3362 |    4 #include "SkTypes.h"  // Keep this before any #ifdef for skbug.com/3362 | 
|    5  |    5  | 
|    6 #if defined(SK4X_PREAMBLE) |    6 #if defined(SK4X_PREAMBLE) | 
|    7     #include <arm_neon.h> |    7     #include <arm_neon.h> | 
|    8  |    8  | 
|    9     // Template metaprogramming to map scalar types to vector types. |    9     // Template metaprogramming to map scalar types to vector types. | 
|   10     template <typename T> struct SkScalarToSIMD; |   10     template <typename T> struct SkScalarToSIMD; | 
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|   75 M(Sk4f) multiply(const Sk4f& o) const { return vmulq_f32(fVec, o.fVec); } |   75 M(Sk4f) multiply(const Sk4f& o) const { return vmulq_f32(fVec, o.fVec); } | 
|   76  |   76  | 
|   77 M(Sk4f) divide  (const Sk4f& o) const { |   77 M(Sk4f) divide  (const Sk4f& o) const { | 
|   78     float32x4_t est0 = vrecpeq_f32(o.fVec); |   78     float32x4_t est0 = vrecpeq_f32(o.fVec); | 
|   79     float32x4_t est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0); |   79     float32x4_t est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0); | 
|   80     float32x4_t est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1); |   80     float32x4_t est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1); | 
|   81     return vmulq_f32(est2, fVec); |   81     return vmulq_f32(est2, fVec); | 
|   82 } |   82 } | 
|   83  |   83  | 
|   84 M(Sk4f) rsqrt() const { |   84 M(Sk4f) rsqrt() const { | 
|   85     float32x4_t est0 = vrsqrteq_f32(fVec); |   85     float32x4_t est0 = vrsqrteq_f32(fVec), | 
|   86     float32x4_t est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0
     ); |   86                 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0
     ); | 
|   87     float32x4_t est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1
     ); |   87     return est1; | 
|   88     return est2; |  | 
|   89 } |   88 } | 
|   90  |   89  | 
|   91 M(Sk4f)  sqrt() const { return this->multiply(this->rsqrt()); } |   90 M(Sk4f)  sqrt() const { | 
 |   91     float32x4_t est1 = this->rsqrt().fVec, | 
 |   92     // An extra step of Newton's method to refine the estimate of 1/sqrt(this). | 
 |   93                 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1
     ); | 
 |   94     return vmulq_f32(fVec, est2); | 
 |   95 } | 
|   92  |   96  | 
|   93 M(Sk4i) equal           (const Sk4f& o) const { return vreinterpretq_s32_u32(vce
     qq_f32(fVec, o.fVec)); } |   97 M(Sk4i) equal           (const Sk4f& o) const { return vreinterpretq_s32_u32(vce
     qq_f32(fVec, o.fVec)); } | 
|   94 M(Sk4i) notEqual        (const Sk4f& o) const { return vreinterpretq_s32_u32(vmv
     nq_u32(vceqq_f32(fVec, o.fVec))); } |   98 M(Sk4i) notEqual        (const Sk4f& o) const { return vreinterpretq_s32_u32(vmv
     nq_u32(vceqq_f32(fVec, o.fVec))); } | 
|   95 M(Sk4i) lessThan        (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl
     tq_f32(fVec, o.fVec)); } |   99 M(Sk4i) lessThan        (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl
     tq_f32(fVec, o.fVec)); } | 
|   96 M(Sk4i) greaterThan     (const Sk4f& o) const { return vreinterpretq_s32_u32(vcg
     tq_f32(fVec, o.fVec)); } |  100 M(Sk4i) greaterThan     (const Sk4f& o) const { return vreinterpretq_s32_u32(vcg
     tq_f32(fVec, o.fVec)); } | 
|   97 M(Sk4i) lessThanEqual   (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl
     eq_f32(fVec, o.fVec)); } |  101 M(Sk4i) lessThanEqual   (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl
     eq_f32(fVec, o.fVec)); } | 
|   98 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return vreinterpretq_s32_u32(vcg
     eq_f32(fVec, o.fVec)); } |  102 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return vreinterpretq_s32_u32(vcg
     eq_f32(fVec, o.fVec)); } | 
|   99  |  103  | 
|  100 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return vminq_f32(a.fVec, b.fVec); } |  104 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return vminq_f32(a.fVec, b.fVec); } | 
|  101 M(Sk4f) Max(const Sk4f& a, const Sk4f& b) { return vmaxq_f32(a.fVec, b.fVec); } |  105 M(Sk4f) Max(const Sk4f& a, const Sk4f& b) { return vmaxq_f32(a.fVec, b.fVec); } | 
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
|  213 } |  217 } | 
|  214 M(Sk4i) ZWCD(const Sk4i& xyzw, const Sk4i& abcd) { |  218 M(Sk4i) ZWCD(const Sk4i& xyzw, const Sk4i& abcd) { | 
|  215     int32x4x2_t xayb_zcwd = vzipq_s32(xyzw.fVec, abcd.fVec); |  219     int32x4x2_t xayb_zcwd = vzipq_s32(xyzw.fVec, abcd.fVec); | 
|  216     int32x4x2_t axby_czdw = vzipq_s32(abcd.fVec, xyzw.fVec); |  220     int32x4x2_t axby_czdw = vzipq_s32(abcd.fVec, xyzw.fVec); | 
|  217     return vuzpq_s32(xayb_zcwd.val[1], axby_czdw.val[1]).val[0]; |  221     return vuzpq_s32(xayb_zcwd.val[1], axby_czdw.val[1]).val[0]; | 
|  218 } |  222 } | 
|  219  |  223  | 
|  220 #undef M |  224 #undef M | 
|  221  |  225  | 
|  222 #endif |  226 #endif | 
| OLD | NEW |