Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(135)

Unified Diff: src/core/SkCanvas.cpp

Issue 2225393002: Optimized implementation of quickReject() (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Cleaning things up Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « include/core/SkCanvas.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/core/SkCanvas.cpp
diff --git a/src/core/SkCanvas.cpp b/src/core/SkCanvas.cpp
index 0b0434dab895963f0132fcbabe7ca99d93b3e8a2..3442c57245def31c09b898a1df1d9efb26adf279 100644
--- a/src/core/SkCanvas.cpp
+++ b/src/core/SkCanvas.cpp
@@ -22,6 +22,7 @@
#include "SkLatticeIter.h"
#include "SkMatrixUtils.h"
#include "SkMetaData.h"
+#include "SkNx.h"
#include "SkPaintPriv.h"
#include "SkPatchUtils.h"
#include "SkPicture.h"
@@ -264,6 +265,13 @@ struct DeviceCM {
}
};
+
+static inline void set_qr_clip_bounds(SkRect* dst, const SkIRect& src) {
mtklein 2016/08/11 01:24:41 Give that this is all inline, it'll probably make
msarett 2016/08/11 16:04:01 Agreed. Went with this->setCachedClipDeviceBounds
msarett 2016/08/11 20:15:16 Changed my mind. Going with qr_clip_bounds(). It
+ // Expand bounds out by 1 in case we are anti-aliasing. We store the
+ // bounds as floats to enable a faster quick reject implementation.
+ SkNx_cast<float>(Sk4i::Load(&src.fLeft) + Sk4i(-1, -1, 1, 1)).store(&dst->fLeft);
+}
+
/* This is the record we keep for each save/restore level in the stack.
Since a level optionally copies the matrix and/or stack, we have pointers
for these fields. If the value is copied for this level, the copy is
@@ -300,8 +308,9 @@ public:
// don't bother initializing fNext
inc_rec();
}
- MCRec(const MCRec& prev) : fRasterClip(prev.fRasterClip), fMatrix(prev.fMatrix),
- fCurDrawDepth(prev.fCurDrawDepth) {
+ MCRec(const MCRec& prev) : fRasterClip(prev.fRasterClip) , fMatrix(prev.fMatrix)
+ , fCurDrawDepth(prev.fCurDrawDepth)
+ {
fFilter = SkSafeRef(prev.fFilter);
fLayer = nullptr;
fTopLayer = prev.fTopLayer;
@@ -632,14 +641,14 @@ bool AutoDrawLooper::doNext(SkDrawFilter::Type drawType) {
void SkCanvas::resetForNextPicture(const SkIRect& bounds) {
this->restoreToCount(1);
- fCachedLocalClipBounds.setEmpty();
- fCachedLocalClipBoundsDirty = true;
fClipStack->reset();
fMCRec->reset(bounds);
// We're peering through a lot of structs here. Only at this scope do we
// know that the device is an SkBitmapDevice (really an SkNoPixelsBitmapDevice).
static_cast<SkBitmapDevice*>(fMCRec->fLayer->fDevice)->setNewSize(bounds.size());
+ set_qr_clip_bounds(&fDeviceClipBounds, bounds);
+ fIsScaleTranslate = true;
}
SkBaseDevice* SkCanvas::init(SkBaseDevice* device, InitFlags flags) {
@@ -650,8 +659,6 @@ SkBaseDevice* SkCanvas::init(SkBaseDevice* device, InitFlags flags) {
// const-cast.
*const_cast<bool*>(&fConservativeRasterClip) = SkToBool(flags & kConservativeRasterClip_InitFlag);
- fCachedLocalClipBounds.setEmpty();
- fCachedLocalClipBoundsDirty = true;
fAllowSoftClip = true;
fAllowSimplifyClip = false;
fDeviceCMDirty = true;
@@ -680,7 +687,10 @@ SkBaseDevice* SkCanvas::init(SkBaseDevice* device, InitFlags flags) {
SkASSERT(fProps.pixelGeometry() == device->surfaceProps().pixelGeometry());
fMCRec->fLayer->fDevice = SkRef(device);
fMCRec->fRasterClip.setRect(device->getGlobalBounds());
+ set_qr_clip_bounds(&fDeviceClipBounds, device->getGlobalBounds());
+ fIsScaleTranslate = true;
mtklein 2016/08/11 01:24:41 Might want to name fConservativeIsScaleTranslate,
msarett 2016/08/11 16:04:00 Done.
}
+
return device;
}
@@ -1095,8 +1105,8 @@ bool SkCanvas::clipRectBounds(const SkRect* bounds, SaveLayerFlags saveLayerFlag
// early exit if the layer's bounds are clipped out
if (!ir.intersect(clipBounds)) {
if (BoundsAffectsClip(saveLayerFlags)) {
- fCachedLocalClipBoundsDirty = true;
fMCRec->fRasterClip.setEmpty();
+ fDeviceClipBounds.setEmpty();
}
return false;
}
@@ -1107,9 +1117,9 @@ bool SkCanvas::clipRectBounds(const SkRect* bounds, SaveLayerFlags saveLayerFlag
if (BoundsAffectsClip(saveLayerFlags)) {
// Simplify the current clips since they will be applied properly during restore()
- fCachedLocalClipBoundsDirty = true;
fClipStack->clipDevRect(ir, SkRegion::kReplace_Op);
fMCRec->fRasterClip.setRect(ir);
+ set_qr_clip_bounds(&fDeviceClipBounds, ir);
}
if (intersection) {
@@ -1301,7 +1311,6 @@ void SkCanvas::internalRestore() {
SkASSERT(fMCStack.count() != 0);
fDeviceCMDirty = true;
- fCachedLocalClipBoundsDirty = true;
fClipStack->restore();
@@ -1335,6 +1344,11 @@ void SkCanvas::internalRestore() {
// no need to update fMCRec, 'cause we're killing the canvas
}
}
+
+ if (fMCRec) {
+ fIsScaleTranslate = fMCRec->fMatrix.isScaleTranslate();
+ set_qr_clip_bounds(&fDeviceClipBounds, fMCRec->fRasterClip.getBounds());
+ }
}
sk_sp<SkSurface> SkCanvas::makeSurface(const SkImageInfo& info, const SkSurfaceProps* props) {
@@ -1489,21 +1503,20 @@ void SkCanvas::concat(const SkMatrix& matrix) {
this->checkForDeferredSave();
fDeviceCMDirty = true;
- fCachedLocalClipBoundsDirty = true;
fMCRec->fMatrix.preConcat(matrix);
-
+ fIsScaleTranslate = fMCRec->fMatrix.isScaleTranslate();
mtklein 2016/08/11 01:24:41 I think we can conservatively approximate this as
mtklein 2016/08/11 01:25:43 err, meant to write not-st x not-st == st.
msarett 2016/08/11 16:04:01 Got it, I think this is a good guess.
this->didConcat(matrix);
}
void SkCanvas::internalSetMatrix(const SkMatrix& matrix) {
fDeviceCMDirty = true;
- fCachedLocalClipBoundsDirty = true;
fMCRec->fMatrix = matrix;
}
void SkCanvas::setMatrix(const SkMatrix& matrix) {
this->checkForDeferredSave();
this->internalSetMatrix(matrix);
+ fIsScaleTranslate = matrix.isScaleTranslate();
this->didSetMatrix(matrix);
}
@@ -1604,7 +1617,6 @@ void SkCanvas::onClipRect(const SkRect& rect, SkRegion::Op op, ClipEdgeStyle edg
AutoValidateClip avc(this);
fDeviceCMDirty = true;
- fCachedLocalClipBoundsDirty = true;
if (isScaleTrans) {
const bool isAA = kSoft_ClipEdgeStyle == edgeStyle;
@@ -1620,6 +1632,8 @@ void SkCanvas::onClipRect(const SkRect& rect, SkRegion::Op op, ClipEdgeStyle edg
path.addRect(rect);
this->SkCanvas::onClipPath(path, op, edgeStyle);
}
+
+ set_qr_clip_bounds(&fDeviceClipBounds, fMCRec->fRasterClip.getBounds());
}
void SkCanvas::clipRRect(const SkRRect& rrect, SkRegion::Op op, bool doAA) {
@@ -1638,7 +1652,6 @@ void SkCanvas::onClipRRect(const SkRRect& rrect, SkRegion::Op op, ClipEdgeStyle
AutoValidateClip avc(this);
fDeviceCMDirty = true;
- fCachedLocalClipBoundsDirty = true;
if (!fAllowSoftClip) {
edgeStyle = kHard_ClipEdgeStyle;
}
@@ -1647,6 +1660,7 @@ void SkCanvas::onClipRRect(const SkRRect& rrect, SkRegion::Op op, ClipEdgeStyle
fMCRec->fRasterClip.op(transformedRRect, this->getTopLayerBounds(), op,
kSoft_ClipEdgeStyle == edgeStyle);
+ set_qr_clip_bounds(&fDeviceClipBounds, fMCRec->fRasterClip.getBounds());
return;
}
@@ -1702,7 +1716,6 @@ void SkCanvas::onClipPath(const SkPath& path, SkRegion::Op op, ClipEdgeStyle edg
AutoValidateClip avc(this);
fDeviceCMDirty = true;
- fCachedLocalClipBoundsDirty = true;
if (!fAllowSoftClip) {
edgeStyle = kHard_ClipEdgeStyle;
}
@@ -1733,6 +1746,7 @@ void SkCanvas::onClipPath(const SkPath& path, SkRegion::Op op, ClipEdgeStyle edg
}
fMCRec->fRasterClip.op(devPath, this->getTopLayerBounds(), op, edgeStyle);
+ set_qr_clip_bounds(&fDeviceClipBounds, fMCRec->fRasterClip.getBounds());
}
void SkCanvas::clipRegion(const SkRegion& rgn, SkRegion::Op op) {
@@ -1744,13 +1758,13 @@ void SkCanvas::onClipRegion(const SkRegion& rgn, SkRegion::Op op) {
AutoValidateClip avc(this);
fDeviceCMDirty = true;
- fCachedLocalClipBoundsDirty = true;
// todo: signal fClipStack that we have a region, and therefore (I guess)
// we have to ignore it, and use the region directly?
fClipStack->clipDevRect(rgn.getBounds(), op);
fMCRec->fRasterClip.op(rgn, op);
+ set_qr_clip_bounds(&fDeviceClipBounds, fMCRec->fRasterClip.getBounds());
}
#ifdef SK_DEBUG
@@ -1807,31 +1821,75 @@ bool SkCanvas::isClipRect() const {
return fMCRec->fRasterClip.isRect();
}
-bool SkCanvas::quickReject(const SkRect& rect) const {
- if (!rect.isFinite())
- return true;
+static inline bool is_nan_or_clipped(const Sk4f& devRect, const Sk4f& devClip) {
+#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2
+ __m128 lltt = _mm_unpacklo_ps(devRect.fVec, devClip.fVec);
mtklein 2016/08/11 01:24:41 Might help to make one rect ltrb and the other LTR
msarett 2016/08/11 16:04:01 Agreed, done.
+ __m128 rrbb = _mm_unpackhi_ps(devClip.fVec, devRect.fVec);
+ __m128 mask = _mm_cmplt_ps(lltt, rrbb);
+ return 0xF != _mm_movemask_ps(mask);
+#elif defined(SK_ARM_HAS_NEON)
+ float32x4_t lltt = vzipq_f32(devRect.fVec, devClip.fVec).val[0];
+ float32x4_t rrbb = vzipq_f32(devClip.fVec, devRect.fVec).val[1];
+ uint32x4_t mask = vcltq_f32(lltt, rrbb);
+ return 0xFFFFFFFFFFFFFFFF != (uint64_t) vmovn_u32(mask);
+#else
+ SkASSERT(false);
mtklein 2016/08/11 01:24:41 ಠ_ಠ
msarett 2016/08/11 16:04:00 Guessing this means I should implement the scalar
+ return false;
+#endif
+}
- if (fMCRec->fRasterClip.isEmpty()) {
- return true;
+// It's important for this function to not be inlined. Otherwise the compiler will share code
+// between the fast path and the slow path, resulting in two slow paths.
+static __attribute__((noinline)) bool quick_reject_slow_path(const SkRect& src,
mtklein 2016/08/11 01:24:41 Let's add SK_NEVER_INLINE alongside SK_ALWAYS_INLI
msarett 2016/08/11 16:04:01 SK_NEVER_INLINE sgtm.
+ const SkRect& deviceClip,
+ const SkMatrix& matrix) {
+ SkRect deviceRect;
+ matrix.mapRect(&deviceRect, src);
+ return !deviceRect.isFinite() || !deviceRect.intersect(deviceClip);
+}
+
+bool SkCanvas::quickReject(const SkRect& src) const {
+ if (!fIsScaleTranslate) {
+ return quick_reject_slow_path(src, fDeviceClipBounds, fMCRec->fMatrix);
mtklein 2016/08/11 01:24:41 We might follow up to see how bad things are if we
msarett 2016/08/11 16:04:00 SGTM
}
- if (fMCRec->fMatrix.hasPerspective()) {
- SkRect dst;
- fMCRec->fMatrix.mapRect(&dst, rect);
- return !SkIRect::Intersects(dst.roundOut(), fMCRec->fRasterClip.getBounds());
- } else {
- const SkRect& clipR = this->getLocalClipBounds();
+ // We inline the implementation of mapScaleTranslate() for the fast path.
+ float sx = fMCRec->fMatrix.getScaleX();
+ float sy = fMCRec->fMatrix.getScaleY();
+ float tx = fMCRec->fMatrix.getTranslateX();
+ float ty = fMCRec->fMatrix.getTranslateY();
+ Sk4f scale(sx, sy, sx, sy);
+ Sk4f trans(tx, ty, tx, ty);
- // for speed, do the most likely reject compares first
- // TODO: should we use | instead, or compare all 4 at once?
- if (rect.fTop >= clipR.fBottom || rect.fBottom <= clipR.fTop) {
- return true;
- }
- if (rect.fLeft >= clipR.fRight || rect.fRight <= clipR.fLeft) {
- return true;
- }
+ // Apply matrix.
+ Sk4f ltrb = Sk4f::Load(&src.fLeft) * scale + trans;
+
+ // Make sure left < right, top < bottom.
+ Sk4f rblt(ltrb[2], ltrb[3], ltrb[0], ltrb[1]);
+ Sk4f min = Sk4f::Min(ltrb, rblt);
+ Sk4f max = Sk4f::Max(ltrb, rblt);
+ // We can extract either pair [0,1] or [2,3] from min and max and be correct, but on
+ // ARM this sequence generates the fastest (a single instruction).
+ Sk4f devRect = Sk4f(min[2], min[3], max[0], max[1]);
+
+ // Check if the device rect is NaN or outside the clip.
+ return is_nan_or_clipped(devRect, Sk4f::Load(&fDeviceClipBounds.fLeft));
+}
+
+bool SkCanvas::quickRejectY(SkScalar top, SkScalar bottom) const {
mtklein 2016/08/11 01:24:41 I think we should consider a pre-patch deleting qu
msarett 2016/08/11 16:04:00 SGTM. I wanted to delete it... Glad to know the
+ if (!fIsScaleTranslate) {
+ // This is only called when we draw text. Will we ever draw rotated or perspective text?
return false;
}
+
+ top = top * fMCRec->fMatrix.getScaleY() + fMCRec->fMatrix.getTranslateY();
mtklein 2016/08/11 01:24:41 :,( "top " lines up so much more nicely with "b
msarett 2016/08/11 16:04:01 Acknowledged.
+ bottom = bottom * fMCRec->fMatrix.getScaleY() + fMCRec->fMatrix.getTranslateY();
+
+ // In the case where the clip is empty and we are provided with a
+ // negative top and positive bottom parameter then this test will return
+ // false even though it will be clipped. We have chosen to exclude that
+ // check as it is rare and would result double the comparisons.
+ return top >= fDeviceClipBounds.fBottom || bottom <= fDeviceClipBounds.fTop;
}
bool SkCanvas::quickReject(const SkPath& path) const {
« no previous file with comments | « include/core/SkCanvas.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698