Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(706)

Unified Diff: src/opts/Sk4x_neon.h

Issue 1027753003: Add divide to Sk2x, use native vdiv and vsqrt on ARM 64. (Closed) Base URL: https://skia.googlesource.com/skia@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/opts/Sk2x_sse.h ('k') | tests/Sk2xTest.cpp » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/opts/Sk4x_neon.h
diff --git a/src/opts/Sk4x_neon.h b/src/opts/Sk4x_neon.h
index 3f35fe785b9af2c39ae9ca948e5cacee698ca41f..c86fdea6894dbe285c48bd477f405d14c09feb90 100644
--- a/src/opts/Sk4x_neon.h
+++ b/src/opts/Sk4x_neon.h
@@ -75,10 +75,14 @@ M(Sk4f) subtract(const Sk4f& o) const { return vsubq_f32(fVec, o.fVec); }
M(Sk4f) multiply(const Sk4f& o) const { return vmulq_f32(fVec, o.fVec); }
M(Sk4f) divide (const Sk4f& o) const {
- float32x4_t est0 = vrecpeq_f32(o.fVec);
- float32x4_t est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0);
- float32x4_t est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1);
+#if defined(SK_CPU_ARM64)
+ return vdivq_f32(fVec, o.fVec);
+#else
+ float32x4_t est0 = vrecpeq_f32(o.fVec),
+ est1 = vmulq_f32(vrecpsq_f32(est0, o.fVec), est0),
+ est2 = vmulq_f32(vrecpsq_f32(est1, o.fVec), est1);
return vmulq_f32(est2, fVec);
+#endif
}
M(Sk4f) rsqrt() const {
@@ -88,10 +92,14 @@ M(Sk4f) rsqrt() const {
}
M(Sk4f) sqrt() const {
+#if defined(SK_CPU_ARM64)
+ return vsqrtq_f32(fVec);
+#else
float32x4_t est1 = this->rsqrt().fVec,
// An extra step of Newton's method to refine the estimate of 1/sqrt(this).
est2 = vmulq_f32(vrsqrtsq_f32(fVec, vmulq_f32(est1, est1)), est1);
return vmulq_f32(fVec, est2);
+#endif
}
M(Sk4i) equal (const Sk4f& o) const { return vreinterpretq_s32_u32(vceqq_f32(fVec, o.fVec)); }
« no previous file with comments | « src/opts/Sk2x_sse.h ('k') | tests/Sk2xTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698