Chromium Code Reviews| Index: src/core/SkCanvas.cpp |
| diff --git a/src/core/SkCanvas.cpp b/src/core/SkCanvas.cpp |
| index 0b0434dab895963f0132fcbabe7ca99d93b3e8a2..2dd38154ef8894dac9a6e8977586f615d20a3ff6 100644 |
| --- a/src/core/SkCanvas.cpp |
| +++ b/src/core/SkCanvas.cpp |
| @@ -22,6 +22,7 @@ |
| #include "SkLatticeIter.h" |
| #include "SkMatrixUtils.h" |
| #include "SkMetaData.h" |
| +#include "SkNx.h" |
| #include "SkPaintPriv.h" |
| #include "SkPatchUtils.h" |
| #include "SkPicture.h" |
| @@ -632,14 +633,14 @@ bool AutoDrawLooper::doNext(SkDrawFilter::Type drawType) { |
| void SkCanvas::resetForNextPicture(const SkIRect& bounds) { |
| this->restoreToCount(1); |
| - fCachedLocalClipBounds.setEmpty(); |
| - fCachedLocalClipBoundsDirty = true; |
| fClipStack->reset(); |
| fMCRec->reset(bounds); |
| // We're peering through a lot of structs here. Only at this scope do we |
| // know that the device is an SkBitmapDevice (really an SkNoPixelsBitmapDevice). |
| static_cast<SkBitmapDevice*>(fMCRec->fLayer->fDevice)->setNewSize(bounds.size()); |
| + this->setCachedClipDeviceBounds(bounds); |
| + fConservativeIsScaleTranslate = true; |
| } |
| SkBaseDevice* SkCanvas::init(SkBaseDevice* device, InitFlags flags) { |
| @@ -650,8 +651,6 @@ SkBaseDevice* SkCanvas::init(SkBaseDevice* device, InitFlags flags) { |
| // const-cast. |
| *const_cast<bool*>(&fConservativeRasterClip) = SkToBool(flags & kConservativeRasterClip_InitFlag); |
| - fCachedLocalClipBounds.setEmpty(); |
| - fCachedLocalClipBoundsDirty = true; |
| fAllowSoftClip = true; |
| fAllowSimplifyClip = false; |
| fDeviceCMDirty = true; |
| @@ -680,7 +679,10 @@ SkBaseDevice* SkCanvas::init(SkBaseDevice* device, InitFlags flags) { |
| SkASSERT(fProps.pixelGeometry() == device->surfaceProps().pixelGeometry()); |
| fMCRec->fLayer->fDevice = SkRef(device); |
| fMCRec->fRasterClip.setRect(device->getGlobalBounds()); |
| + this->setCachedClipDeviceBounds(device->getGlobalBounds()); |
| + fConservativeIsScaleTranslate = true; |
| } |
| + |
| return device; |
| } |
| @@ -1095,8 +1097,8 @@ bool SkCanvas::clipRectBounds(const SkRect* bounds, SaveLayerFlags saveLayerFlag |
| // early exit if the layer's bounds are clipped out |
| if (!ir.intersect(clipBounds)) { |
| if (BoundsAffectsClip(saveLayerFlags)) { |
| - fCachedLocalClipBoundsDirty = true; |
| fMCRec->fRasterClip.setEmpty(); |
| + fDeviceClipBounds.setEmpty(); |
| } |
| return false; |
| } |
| @@ -1107,9 +1109,9 @@ bool SkCanvas::clipRectBounds(const SkRect* bounds, SaveLayerFlags saveLayerFlag |
| if (BoundsAffectsClip(saveLayerFlags)) { |
| // Simplify the current clips since they will be applied properly during restore() |
| - fCachedLocalClipBoundsDirty = true; |
| fClipStack->clipDevRect(ir, SkRegion::kReplace_Op); |
| fMCRec->fRasterClip.setRect(ir); |
| + this->setCachedClipDeviceBounds(ir); |
| } |
| if (intersection) { |
| @@ -1301,7 +1303,6 @@ void SkCanvas::internalRestore() { |
| SkASSERT(fMCStack.count() != 0); |
| fDeviceCMDirty = true; |
| - fCachedLocalClipBoundsDirty = true; |
| fClipStack->restore(); |
| @@ -1335,6 +1336,11 @@ void SkCanvas::internalRestore() { |
| // no need to update fMCRec, 'cause we're killing the canvas |
| } |
| } |
| + |
| + if (fMCRec) { |
| + fConservativeIsScaleTranslate = fMCRec->fMatrix.isScaleTranslate(); |
| + this->setCachedClipDeviceBounds(fMCRec->fRasterClip.getBounds()); |
| + } |
| } |
| sk_sp<SkSurface> SkCanvas::makeSurface(const SkImageInfo& info, const SkSurfaceProps* props) { |
| @@ -1489,21 +1495,23 @@ void SkCanvas::concat(const SkMatrix& matrix) { |
| this->checkForDeferredSave(); |
| fDeviceCMDirty = true; |
| - fCachedLocalClipBoundsDirty = true; |
| fMCRec->fMatrix.preConcat(matrix); |
| + // We may miss some rare cases here. Ex: rotate 45 then -45. But being data |
| + // independ of preConcat() may allow us to go faster. |
| + fConservativeIsScaleTranslate = fConservativeIsScaleTranslate && matrix.isScaleTranslate(); |
| this->didConcat(matrix); |
| } |
| void SkCanvas::internalSetMatrix(const SkMatrix& matrix) { |
| fDeviceCMDirty = true; |
| - fCachedLocalClipBoundsDirty = true; |
| fMCRec->fMatrix = matrix; |
| } |
| void SkCanvas::setMatrix(const SkMatrix& matrix) { |
| this->checkForDeferredSave(); |
| this->internalSetMatrix(matrix); |
| + fConservativeIsScaleTranslate = matrix.isScaleTranslate(); |
| this->didSetMatrix(matrix); |
| } |
| @@ -1604,7 +1612,6 @@ void SkCanvas::onClipRect(const SkRect& rect, SkRegion::Op op, ClipEdgeStyle edg |
| AutoValidateClip avc(this); |
| fDeviceCMDirty = true; |
| - fCachedLocalClipBoundsDirty = true; |
| if (isScaleTrans) { |
| const bool isAA = kSoft_ClipEdgeStyle == edgeStyle; |
| @@ -1620,6 +1627,8 @@ void SkCanvas::onClipRect(const SkRect& rect, SkRegion::Op op, ClipEdgeStyle edg |
| path.addRect(rect); |
| this->SkCanvas::onClipPath(path, op, edgeStyle); |
| } |
| + |
| + this->setCachedClipDeviceBounds(fMCRec->fRasterClip.getBounds()); |
| } |
| void SkCanvas::clipRRect(const SkRRect& rrect, SkRegion::Op op, bool doAA) { |
| @@ -1638,7 +1647,6 @@ void SkCanvas::onClipRRect(const SkRRect& rrect, SkRegion::Op op, ClipEdgeStyle |
| AutoValidateClip avc(this); |
| fDeviceCMDirty = true; |
| - fCachedLocalClipBoundsDirty = true; |
| if (!fAllowSoftClip) { |
| edgeStyle = kHard_ClipEdgeStyle; |
| } |
| @@ -1647,6 +1655,7 @@ void SkCanvas::onClipRRect(const SkRRect& rrect, SkRegion::Op op, ClipEdgeStyle |
| fMCRec->fRasterClip.op(transformedRRect, this->getTopLayerBounds(), op, |
| kSoft_ClipEdgeStyle == edgeStyle); |
| + this->setCachedClipDeviceBounds(fMCRec->fRasterClip.getBounds()); |
| return; |
| } |
| @@ -1702,7 +1711,6 @@ void SkCanvas::onClipPath(const SkPath& path, SkRegion::Op op, ClipEdgeStyle edg |
| AutoValidateClip avc(this); |
| fDeviceCMDirty = true; |
| - fCachedLocalClipBoundsDirty = true; |
| if (!fAllowSoftClip) { |
| edgeStyle = kHard_ClipEdgeStyle; |
| } |
| @@ -1733,6 +1741,7 @@ void SkCanvas::onClipPath(const SkPath& path, SkRegion::Op op, ClipEdgeStyle edg |
| } |
| fMCRec->fRasterClip.op(devPath, this->getTopLayerBounds(), op, edgeStyle); |
| + this->setCachedClipDeviceBounds(fMCRec->fRasterClip.getBounds()); |
| } |
| void SkCanvas::clipRegion(const SkRegion& rgn, SkRegion::Op op) { |
| @@ -1744,13 +1753,13 @@ void SkCanvas::onClipRegion(const SkRegion& rgn, SkRegion::Op op) { |
| AutoValidateClip avc(this); |
| fDeviceCMDirty = true; |
| - fCachedLocalClipBoundsDirty = true; |
| // todo: signal fClipStack that we have a region, and therefore (I guess) |
| // we have to ignore it, and use the region directly? |
| fClipStack->clipDevRect(rgn.getBounds(), op); |
| fMCRec->fRasterClip.op(rgn, op); |
| + this->setCachedClipDeviceBounds(fMCRec->fRasterClip.getBounds()); |
| } |
| #ifdef SK_DEBUG |
| @@ -1807,31 +1816,61 @@ bool SkCanvas::isClipRect() const { |
| return fMCRec->fRasterClip.isRect(); |
| } |
| -bool SkCanvas::quickReject(const SkRect& rect) const { |
| - if (!rect.isFinite()) |
| - return true; |
| +static inline bool is_nan_or_clipped(const Sk4f& devRect, const Sk4f& devClip) { |
| +#if SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_SSE2 |
| + __m128 lLtT = _mm_unpacklo_ps(devRect.fVec, devClip.fVec); |
| + __m128 RrBb = _mm_unpackhi_ps(devClip.fVec, devRect.fVec); |
| + __m128 mask = _mm_cmplt_ps(lLtT, RrBb); |
| + return 0xF != _mm_movemask_ps(mask); |
| +#elif defined(SK_ARM_HAS_NEON) |
| + float32x4_t lLtT = vzipq_f32(devRect.fVec, devClip.fVec).val[0]; |
| + float32x4_t RrBb = vzipq_f32(devClip.fVec, devRect.fVec).val[1]; |
| + uint32x4_t mask = vcltq_f32(lLtT, RrBb); |
| + return 0xFFFFFFFFFFFFFFFF != (uint64_t) vmovn_u32(mask); |
| +#else |
| + SkRect devRectAsRect; |
| + SkRect devClipAsRect; |
| + devRect.store(&devRectAsRect.fLeft); |
| + devClip.store(&devClipAsRect.fLeft); |
| + return !devRectAsRect.isFinite() || !devRectAsRect.intersect(devClipAsRect); |
| +#endif |
| +} |
| - if (fMCRec->fRasterClip.isEmpty()) { |
| - return true; |
| +// It's important for this function to not be inlined. Otherwise the compiler will share code |
| +// between the fast path and the slow path, resulting in two slow paths. |
| +static SK_NEVER_INLINE bool quick_reject_slow_path(const SkRect& src, const SkRect& deviceClip, |
| + const SkMatrix& matrix) { |
| + SkRect deviceRect; |
| + matrix.mapRect(&deviceRect, src); |
| + return !deviceRect.isFinite() || !deviceRect.intersect(deviceClip); |
| +} |
| + |
| +bool SkCanvas::quickReject(const SkRect& src) const { |
|
reed1
2016/08/11 18:36:28
SkASSERT(compute_float_from_rasterclip_bounds == f
msarett
2016/08/11 20:15:16
Done.
|
| + if (!fConservativeIsScaleTranslate) { |
| + return quick_reject_slow_path(src, fDeviceClipBounds, fMCRec->fMatrix); |
| } |
| - if (fMCRec->fMatrix.hasPerspective()) { |
| - SkRect dst; |
| - fMCRec->fMatrix.mapRect(&dst, rect); |
| - return !SkIRect::Intersects(dst.roundOut(), fMCRec->fRasterClip.getBounds()); |
| - } else { |
| - const SkRect& clipR = this->getLocalClipBounds(); |
| + // We inline the implementation of mapScaleTranslate() for the fast path. |
| + float sx = fMCRec->fMatrix.getScaleX(); |
| + float sy = fMCRec->fMatrix.getScaleY(); |
| + float tx = fMCRec->fMatrix.getTranslateX(); |
| + float ty = fMCRec->fMatrix.getTranslateY(); |
| + Sk4f scale(sx, sy, sx, sy); |
| + Sk4f trans(tx, ty, tx, ty); |
| - // for speed, do the most likely reject compares first |
| - // TODO: should we use | instead, or compare all 4 at once? |
| - if (rect.fTop >= clipR.fBottom || rect.fBottom <= clipR.fTop) { |
| - return true; |
| - } |
| - if (rect.fLeft >= clipR.fRight || rect.fRight <= clipR.fLeft) { |
| - return true; |
| - } |
| - return false; |
| - } |
| + // Apply matrix. |
| + Sk4f ltrb = Sk4f::Load(&src.fLeft) * scale + trans; |
| + |
| + // Make sure left < right, top < bottom. |
| + Sk4f rblt(ltrb[2], ltrb[3], ltrb[0], ltrb[1]); |
| + Sk4f min = Sk4f::Min(ltrb, rblt); |
| + Sk4f max = Sk4f::Max(ltrb, rblt); |
| + // We can extract either pair [0,1] or [2,3] from min and max and be correct, but on |
| + // ARM this sequence generates the fastest (a single instruction). |
| + Sk4f devRect = Sk4f(min[2], min[3], max[0], max[1]); |
| + |
| + // Check if the device rect is NaN or outside the clip. |
| + return is_nan_or_clipped(devRect, Sk4f::Load(&fDeviceClipBounds.fLeft)); |
| } |
| bool SkCanvas::quickReject(const SkPath& path) const { |
| @@ -1880,6 +1919,12 @@ bool SkCanvas::getClipDeviceBounds(SkIRect* bounds) const { |
| return true; |
| } |
| +void SkCanvas::setCachedClipDeviceBounds(const SkIRect& bounds) { |
| + // Expand bounds out by 1 in case we are anti-aliasing. We store the |
| + // bounds as floats to enable a faster quick reject implementation. |
| + SkNx_cast<float>(Sk4i::Load(&bounds.fLeft) + Sk4i(-1,-1,1,1)).store(&fDeviceClipBounds.fLeft); |
| +} |
| + |
| const SkMatrix& SkCanvas::getTotalMatrix() const { |
| return fMCRec->fMatrix; |
| } |