Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(441)

Side by Side Diff: src/opts/Sk4x_neon.h

Issue 1027753003: Add divide to Sk2x, use native vdiv and vsqrt on ARM 64. (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/Sk2x_sse.h ('k') | tests/Sk2xTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // It is important _not_ to put header guards here. 1 // It is important _not_ to put header guards here.
2 // This file will be intentionally included three times. 2 // This file will be intentionally included three times.
3 3
4 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362 4 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362
5 5
6 #if defined(SK4X_PREAMBLE) 6 #if defined(SK4X_PREAMBLE)
7 #include <arm_neon.h> 7 #include <arm_neon.h>
8 8
9 // Template metaprogramming to map scalar types to vector types. 9 // Template metaprogramming to map scalar types to vector types.
10 template <typename T> struct SkScalarToSIMD; 10 template <typename T> struct SkScalarToSIMD;
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
68 M(Sk4i) cast<Sk4i>() const { return vcvtq_s32_f32(fVec); } 68 M(Sk4i) cast<Sk4i>() const { return vcvtq_s32_f32(fVec); }
69 69
70 // We're going to skip allTrue(), anyTrue(), and bit-manipulators 70 // We're going to skip allTrue(), anyTrue(), and bit-manipulators
71 // for Sk4f. Code that calls them probably does so accidentally. 71 // for Sk4f. Code that calls them probably does so accidentally.
72 // Ask msarett or mtklein to fill these in if you really need them. 72 // Ask msarett or mtklein to fill these in if you really need them.
73 M(Sk4f) add (const Sk4f& o) const { return vaddq_f32(fVec, o.fVec); } 73 M(Sk4f) add (const Sk4f& o) const { return vaddq_f32(fVec, o.fVec); }
74 M(Sk4f) subtract(const Sk4f& o) const { return vsubq_f32(fVec, o.fVec); } 74 M(Sk4f) subtract(const Sk4f& o) const { return vsubq_f32(fVec, o.fVec); }
75 M(Sk4f) multiply(const Sk4f& o) const { return vmulq_f32(fVec, o.fVec); } 75 M(Sk4f) multiply(const Sk4f& o) const { return vmulq_f32(fVec, o.fVec); }
76 76
77 M(Sk4f) divide (const Sk4f& o) const { 77 M(Sk4f) divide (const Sk4f& o) const {
78 float32x4_t est0 = vrecpeq_f32(o.fVec); 78 #if defined(SK_CPU_ARM64)
79 float32x4_t est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0); 79 return vdivq_f32(fVec, o.fVec);
80 float32x4_t est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1); 80 #else
81 float32x4_t est0 = vrecpeq_f32(o.fVec),
82 est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0),
83 est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1);
81 return vmulq_f32(est2, fVec); 84 return vmulq_f32(est2, fVec);
85 #endif
82 } 86 }
83 87
84 M(Sk4f) rsqrt() const { 88 M(Sk4f) rsqrt() const {
85 float32x4_t est0 = vrsqrteq_f32(fVec), 89 float32x4_t est0 = vrsqrteq_f32(fVec),
86 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0 ); 90 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0 );
87 return est1; 91 return est1;
88 } 92 }
89 93
90 M(Sk4f) sqrt() const { 94 M(Sk4f) sqrt() const {
95 #if defined(SK_CPU_ARM64)
96 return vsqrtq_f32(fVec);
97 #else
91 float32x4_t est1 = this->rsqrt().fVec, 98 float32x4_t est1 = this->rsqrt().fVec,
92 // An extra step of Newton's method to refine the estimate of 1/sqrt(this). 99 // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
93 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1 ); 100 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1 );
94 return vmulq_f32(fVec, est2); 101 return vmulq_f32(fVec, est2);
102 #endif
95 } 103 }
96 104
97 M(Sk4i) equal (const Sk4f& o) const { return vreinterpretq_s32_u32(vce qq_f32(fVec, o.fVec)); } 105 M(Sk4i) equal (const Sk4f& o) const { return vreinterpretq_s32_u32(vce qq_f32(fVec, o.fVec)); }
98 M(Sk4i) notEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vmv nq_u32(vceqq_f32(fVec, o.fVec))); } 106 M(Sk4i) notEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vmv nq_u32(vceqq_f32(fVec, o.fVec))); }
99 M(Sk4i) lessThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl tq_f32(fVec, o.fVec)); } 107 M(Sk4i) lessThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl tq_f32(fVec, o.fVec)); }
100 M(Sk4i) greaterThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcg tq_f32(fVec, o.fVec)); } 108 M(Sk4i) greaterThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcg tq_f32(fVec, o.fVec)); }
101 M(Sk4i) lessThanEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl eq_f32(fVec, o.fVec)); } 109 M(Sk4i) lessThanEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl eq_f32(fVec, o.fVec)); }
102 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return vreinterpretq_s32_u32(vcg eq_f32(fVec, o.fVec)); } 110 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return vreinterpretq_s32_u32(vcg eq_f32(fVec, o.fVec)); }
103 111
104 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return vminq_f32(a.fVec, b.fVec); } 112 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return vminq_f32(a.fVec, b.fVec); }
(...skipping 112 matching lines...) Expand 10 before | Expand all | Expand 10 after
217 } 225 }
218 M(Sk4i) ZWCD(const Sk4i& xyzw, const Sk4i& abcd) { 226 M(Sk4i) ZWCD(const Sk4i& xyzw, const Sk4i& abcd) {
219 int32x4x2_t xayb_zcwd = vzipq_s32(xyzw.fVec, abcd.fVec); 227 int32x4x2_t xayb_zcwd = vzipq_s32(xyzw.fVec, abcd.fVec);
220 int32x4x2_t axby_czdw = vzipq_s32(abcd.fVec, xyzw.fVec); 228 int32x4x2_t axby_czdw = vzipq_s32(abcd.fVec, xyzw.fVec);
221 return vuzpq_s32(xayb_zcwd.val[1], axby_czdw.val[1]).val[0]; 229 return vuzpq_s32(xayb_zcwd.val[1], axby_czdw.val[1]).val[0];
222 } 230 }
223 231
224 #undef M 232 #undef M
225 233
226 #endif 234 #endif
OLDNEW
« no previous file with comments | « src/opts/Sk2x_sse.h ('k') | tests/Sk2xTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698