| Index: src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| diff --git a/src/opts/SkBitmapFilter_opts_SSE2.cpp b/src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| index 04f14863d744fa46c1bd2258efe05c8ac445bcc3..2996f535d97abe1413cb7ed810f631668b29033d 100644
|
| --- a/src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| +++ b/src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| @@ -49,6 +49,7 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
|
| const int maxX = s.fBitmap->width();
|
| const int maxY = s.fBitmap->height();
|
| SkAutoTMalloc<SkScalar> xWeights(maxX);
|
| + const SkBitmapFilter* filter = s.getBitmapFilter();
|
|
|
| while (count-- > 0) {
|
| SkPoint srcPt;
|
| @@ -59,34 +60,37 @@ void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
|
| __m128 weight = _mm_setzero_ps();
|
| __m128 accum = _mm_setzero_ps();
|
|
|
| - int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY);
|
| - int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()+1), maxY);
|
| - int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
|
| - int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX);
|
| + int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY - filter->width()), maxY);
|
| + int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY + filter->width() + 1), maxY);
|
| + int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX - filter->width()), maxX);
|
| + int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX + filter->width()) + 1, maxX);
|
|
|
| for (int srcX = x0; srcX < x1 ; srcX++) {
|
| // Looking these up once instead of each loop is a ~15% speedup.
|
| - xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
|
| + xWeights[srcX - x0] = filter->lookupScalar((srcPt.fX - srcX));
|
| }
|
|
|
| for (int srcY = y0; srcY < y1; srcY++) {
|
| - SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));
|
| + SkScalar yWeight = filter->lookupScalar((srcPt.fY - srcY));
|
|
|
| for (int srcX = x0; srcX < x1 ; srcX++) {
|
| SkScalar xWeight = xWeights[srcX - x0];
|
|
|
| SkScalar combined_weight = SkScalarMul(xWeight, yWeight);
|
| + __m128 weightVector = _mm_set1_ps(combined_weight);
|
| + weight = _mm_add_ps( weight, weightVector );
|
|
|
| SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY);
|
| + if (!color) {
|
| + continue;
|
| + }
|
|
|
| __m128i c = _mm_cvtsi32_si128(color);
|
| c = _mm_unpacklo_epi8(c, _mm_setzero_si128());
|
| c = _mm_unpacklo_epi16(c, _mm_setzero_si128());
|
| __m128 cfloat = _mm_cvtepi32_ps(c);
|
|
|
| - __m128 weightVector = _mm_set1_ps(combined_weight);
|
| accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector));
|
| - weight = _mm_add_ps( weight, weightVector );
|
| }
|
| }
|
|
|
|
|