Index: src/core/SkMatrix.cpp |
diff --git a/src/core/SkMatrix.cpp b/src/core/SkMatrix.cpp |
index f9c8c9d07e78f0ebf4f43ffc183ba27cef2fb5bb..0fd802087fc0c5f52e6da2be56edbfb1b9fcc442 100644 |
--- a/src/core/SkMatrix.cpp |
+++ b/src/core/SkMatrix.cpp |
@@ -1097,7 +1097,8 @@ |
} |
} |
-SkRect SkMatrix::mapRectScaleTranslate(SkRect src) const { |
+void SkMatrix::mapRectScaleTranslate(SkRect* dst, const SkRect& src) const { |
+ SkASSERT(dst); |
SkASSERT(this->isScaleTranslate()); |
SkScalar sx = fMat[kMScaleX]; |
@@ -1106,25 +1107,22 @@ |
SkScalar ty = fMat[kMTransY]; |
Sk4f scale(sx, sy, sx, sy); |
Sk4f trans(tx, ty, tx, ty); |
- |
+ |
Sk4f ltrb = Sk4f::Load(&src.fLeft) * scale + trans; |
// need to sort so we're not inverted |
Sk4f rblt(ltrb[2], ltrb[3], ltrb[0], ltrb[1]); |
Sk4f min = Sk4f::Min(ltrb, rblt); |
Sk4f max = Sk4f::Max(ltrb, rblt); |
- // We can extract either pair [0,1] or [2,3] from min and max and be correct. |
- // However, the current ABI for returning multiple floats is to use only 2 slots in each |
- // vector register. Thus we take [0..1] from min and max, as that perfectly matches the ABI. |
- SkRect dst; |
- Sk4f(min[0], min[1], max[0], max[1]).store(&dst.fLeft); |
- return dst; |
+ // We can extract either pair [0,1] or [2,3] from min and max and be correct, but on |
+ // ARM this sequence generates the fastest (a single instruction). |
+ Sk4f(min[2], min[3], max[0], max[1]).store(&dst->fLeft); |
} |
bool SkMatrix::mapRect(SkRect* dst, const SkRect& src) const { |
SkASSERT(dst); |
if (this->isScaleTranslate()) { |
- *dst = this->mapRectScaleTranslate(src); |
+ this->mapRectScaleTranslate(dst, src); |
return true; |
} else { |
SkPoint quad[4]; |