src/opts/Sk2x_neon.h - Issue 1024993002: Sk2x::invert() and Sk2x::approxInvert()

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: src/opts/Sk2x_neon.h

Issue 1024993002: Sk2x::invert() and Sk2x::approxInvert() (Closed) Base URL: https://skia.googlesource.com/skia@master

Patch Set: invert() and approxInvert() Created 5 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/opts/Sk2x_neon.h

diff --git a/src/opts/Sk2x_neon.h b/src/opts/Sk2x_neon.h

index ef61df4823a0327cc50d6331e6625fe59c6444ab..8e6e46164b9fde728303ed43854cd5798335055a 100644

--- a/src/opts/Sk2x_neon.h

+++ b/src/opts/Sk2x_neon.h

@@ -38,6 +38,18 @@ M(Sk2f&) operator=(const Sk2f& o) { fVec = o.fVec; return *this; }

M(Sk2f) Load(const float vals[2]) { return vld1_f32(vals); }

M(void) store(float vals[2]) const { vst1_f32(vals, fVec); }

+M(Sk2f) approxInvert() const {

+ float32x2_t est0 = vrecpe_f32(fVec),

+ est1 = vmul_f32(vrecps_f32(est0, fVec), est0);

+ return est1;

+M(Sk2f) invert() const {

+ float32x2_t est1 = this->approxInvert().fVec,

+ est2 = vmul_f32(vrecps_f32(est1, fVec), est1);

+ return est2;

M(Sk2f) add(const Sk2f& o) const { return vadd_f32(fVec, o.fVec); }

M(Sk2f) subtract(const Sk2f& o) const { return vsub_f32(fVec, o.fVec); }

M(Sk2f) multiply(const Sk2f& o) const { return vmul_f32(fVec, o.fVec); }

@@ -45,10 +57,7 @@ M(Sk2f) divide(const Sk2f& o) const {

#if defined(SK_CPU_ARM64)

return vdiv_f32(fVec, o.fVec);

#else

- float32x2_t est0 = vrecpe_f32(o.fVec),

- est1 = vmul_f32(vrecps_f32(est0, o.fVec), est0),

- est2 = vmul_f32(vrecps_f32(est1, o.fVec), est1);

- return vmul_f32(est2, fVec);

+ return vmul_f32(fVec, o.invert().fVec);

#endif

}

@@ -99,6 +108,19 @@ M(Sk2f) sqrt() const {

}

M(Sk2d) sqrt() const { return vsqrtq_f64(fVec); }

+ M(Sk2d) approxInvert() const {

+ float64x2_t est0 = vrecpeq_f64(fVec),

+ est1 = vmulq_f64(vrecpsq_f64(est0, fVec), est0);

+ return est1;

+ }

+ M(Sk2d) invert() const {

+ float64x2_t est1 = this->approxInvert().fVec,

+ est2 = vmulq_f64(vrecpsq_f64(est1, fVec), est1),

+ est3 = vmulq_f64(vrecpsq_f64(est2, fVec), est2);

+ return est3;

+ }

#else // Scalar implementation for 32-bit chips, which don't have float64x2_t.

M() Sk2x() {}

M() Sk2x(double val) { fVec[0] = fVec[1] = val; }

@@ -126,6 +148,9 @@ M(Sk2f) sqrt() const {

M(Sk2d) rsqrt() const { return Sk2d(1.0/::sqrt(fVec[0]), 1.0/::sqrt(fVec[1])); }

M(Sk2d) sqrt() const { return Sk2d( ::sqrt(fVec[0]), ::sqrt(fVec[1])); }

+ M(Sk2d) invert() const { return Sk2d(1.0 / fVec[0], 1.0 / fVec[1]); }

+ M(Sk2d) approxInvert() const { return this->invert(); }

#endif

#undef M

« no previous file with comments | « src/core/Sk2x.h ('k') | src/opts/Sk2x_none.h » ('j') | no next file with comments »