Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/opts/Sk4x_neon.h

Issue 1018423003: Sk2x for NEON (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: unused header Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/Sk2x_neon.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // It is important _not_ to put header guards here. 1 // It is important _not_ to put header guards here.
2 // This file will be intentionally included three times. 2 // This file will be intentionally included three times.
3 3
4 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362 4 #include "SkTypes.h" // Keep this before any #ifdef for skbug.com/3362
5 5
6 #if defined(SK4X_PREAMBLE) 6 #if defined(SK4X_PREAMBLE)
7 #include <arm_neon.h> 7 #include <arm_neon.h>
8 8
9 // Template metaprogramming to map scalar types to vector types. 9 // Template metaprogramming to map scalar types to vector types.
10 template <typename T> struct SkScalarToSIMD; 10 template <typename T> struct SkScalarToSIMD;
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
75 M(Sk4f) multiply(const Sk4f& o) const { return vmulq_f32(fVec, o.fVec); } 75 M(Sk4f) multiply(const Sk4f& o) const { return vmulq_f32(fVec, o.fVec); }
76 76
77 M(Sk4f) divide (const Sk4f& o) const { 77 M(Sk4f) divide (const Sk4f& o) const {
78 float32x4_t est0 = vrecpeq_f32(o.fVec); 78 float32x4_t est0 = vrecpeq_f32(o.fVec);
79 float32x4_t est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0); 79 float32x4_t est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0);
80 float32x4_t est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1); 80 float32x4_t est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1);
81 return vmulq_f32(est2, fVec); 81 return vmulq_f32(est2, fVec);
82 } 82 }
83 83
84 M(Sk4f) rsqrt() const { 84 M(Sk4f) rsqrt() const {
85 float32x4_t est0 = vrsqrteq_f32(fVec); 85 float32x4_t est0 = vrsqrteq_f32(fVec),
86 float32x4_t est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0 ); 86 est1 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est0, est0)), est0 );
87 float32x4_t est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1 ); 87 return est1;
88 return est2;
89 } 88 }
90 89
91 M(Sk4f) sqrt() const { return this->multiply(this->rsqrt()); } 90 M(Sk4f) sqrt() const {
91 float32x4_t est1 = this->rsqrt().fVec,
92 // An extra step of Newton's method to refine the estimate of 1/sqrt(this).
93 est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1 );
94 return vmulq_f32(fVec, est2);
95 }
92 96
93 M(Sk4i) equal (const Sk4f& o) const { return vreinterpretq_s32_u32(vce qq_f32(fVec, o.fVec)); } 97 M(Sk4i) equal (const Sk4f& o) const { return vreinterpretq_s32_u32(vce qq_f32(fVec, o.fVec)); }
94 M(Sk4i) notEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vmv nq_u32(vceqq_f32(fVec, o.fVec))); } 98 M(Sk4i) notEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vmv nq_u32(vceqq_f32(fVec, o.fVec))); }
95 M(Sk4i) lessThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl tq_f32(fVec, o.fVec)); } 99 M(Sk4i) lessThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl tq_f32(fVec, o.fVec)); }
96 M(Sk4i) greaterThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcg tq_f32(fVec, o.fVec)); } 100 M(Sk4i) greaterThan (const Sk4f& o) const { return vreinterpretq_s32_u32(vcg tq_f32(fVec, o.fVec)); }
97 M(Sk4i) lessThanEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl eq_f32(fVec, o.fVec)); } 101 M(Sk4i) lessThanEqual (const Sk4f& o) const { return vreinterpretq_s32_u32(vcl eq_f32(fVec, o.fVec)); }
98 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return vreinterpretq_s32_u32(vcg eq_f32(fVec, o.fVec)); } 102 M(Sk4i) greaterThanEqual(const Sk4f& o) const { return vreinterpretq_s32_u32(vcg eq_f32(fVec, o.fVec)); }
99 103
100 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return vminq_f32(a.fVec, b.fVec); } 104 M(Sk4f) Min(const Sk4f& a, const Sk4f& b) { return vminq_f32(a.fVec, b.fVec); }
101 M(Sk4f) Max(const Sk4f& a, const Sk4f& b) { return vmaxq_f32(a.fVec, b.fVec); } 105 M(Sk4f) Max(const Sk4f& a, const Sk4f& b) { return vmaxq_f32(a.fVec, b.fVec); }
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after
213 } 217 }
214 M(Sk4i) ZWCD(const Sk4i& xyzw, const Sk4i& abcd) { 218 M(Sk4i) ZWCD(const Sk4i& xyzw, const Sk4i& abcd) {
215 int32x4x2_t xayb_zcwd = vzipq_s32(xyzw.fVec, abcd.fVec); 219 int32x4x2_t xayb_zcwd = vzipq_s32(xyzw.fVec, abcd.fVec);
216 int32x4x2_t axby_czdw = vzipq_s32(abcd.fVec, xyzw.fVec); 220 int32x4x2_t axby_czdw = vzipq_s32(abcd.fVec, xyzw.fVec);
217 return vuzpq_s32(xayb_zcwd.val[1], axby_czdw.val[1]).val[0]; 221 return vuzpq_s32(xayb_zcwd.val[1], axby_czdw.val[1]).val[0];
218 } 222 }
219 223
220 #undef M 224 #undef M
221 225
222 #endif 226 #endif
OLDNEW
« no previous file with comments | « src/opts/Sk2x_neon.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698