| Index: src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| diff --git a/src/opts/SkBitmapFilter_opts_SSE2.cpp b/src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| index 17aaa229097e71e4006e208bc55f70dd4835d391..b0405669218ba635017e36d308374235eae44953 100644
|
| --- a/src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| +++ b/src/opts/SkBitmapFilter_opts_SSE2.cpp
|
| @@ -46,45 +46,45 @@
|
| void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
|
| SkPMColor* SK_RESTRICT colors, int count) {
|
|
|
| - const int maxX = s.fBitmap->width();
|
| - const int maxY = s.fBitmap->height();
|
| - SkAutoTMalloc<SkScalar> xWeights(maxX);
|
| + const int maxX = s.fBitmap->width() - 1;
|
| + const int maxY = s.fBitmap->height() - 1;
|
|
|
| while (count-- > 0) {
|
| SkPoint srcPt;
|
| - s.fInvProc(s.fInvMatrix, x + 0.5f, y + 0.5f, &srcPt);
|
| + s.fInvProc(s.fInvMatrix, SkIntToScalar(x),
|
| + SkIntToScalar(y), &srcPt);
|
| srcPt.fX -= SK_ScalarHalf;
|
| srcPt.fY -= SK_ScalarHalf;
|
|
|
| + int sx = SkScalarFloorToInt(srcPt.fX);
|
| + int sy = SkScalarFloorToInt(srcPt.fY);
|
| +
|
| __m128 weight = _mm_setzero_ps();
|
| __m128 accum = _mm_setzero_ps();
|
|
|
| - int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY-s.getBitmapFilter()->width()), maxY);
|
| - int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY+s.getBitmapFilter()->width()+1), maxY);
|
| - int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX-s.getBitmapFilter()->width()), maxX);
|
| - int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX+s.getBitmapFilter()->width())+1, maxX);
|
| -
|
| - for (int srcX = x0; srcX < x1 ; srcX++) {
|
| - // Looking these up once instead of each loop is a ~15% speedup.
|
| - xWeights[srcX - x0] = s.getBitmapFilter()->lookupScalar((srcPt.fX - srcX));
|
| - }
|
| -
|
| - for (int srcY = y0; srcY < y1; srcY++) {
|
| - SkScalar yWeight = s.getBitmapFilter()->lookupScalar((srcPt.fY - srcY));
|
| -
|
| - for (int srcX = x0; srcX < x1 ; srcX++) {
|
| - SkScalar xWeight = xWeights[srcX - x0];
|
| -
|
| - SkScalar combined_weight = SkScalarMul(xWeight, yWeight);
|
| -
|
| - SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY);
|
| -
|
| - __m128i c = _mm_cvtsi32_si128(color);
|
| + int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f)));
|
| + int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f)));
|
| + int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f)));
|
| + int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f)));
|
| +
|
| + for (int src_y = y0; src_y <= y1; src_y++) {
|
| + float yweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fY - src_y));
|
| +
|
| + for (int src_x = x0; src_x <= x1 ; src_x++) {
|
| + float xweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(srcPt.fX - src_x));
|
| +
|
| + float combined_weight = xweight * yweight;
|
| +
|
| + SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y);
|
| +
|
| + __m128i c = _mm_cvtsi32_si128( color );
|
| c = _mm_unpacklo_epi8(c, _mm_setzero_si128());
|
| c = _mm_unpacklo_epi16(c, _mm_setzero_si128());
|
| - __m128 cfloat = _mm_cvtepi32_ps(c);
|
| +
|
| + __m128 cfloat = _mm_cvtepi32_ps( c );
|
|
|
| __m128 weightVector = _mm_set1_ps(combined_weight);
|
| +
|
| accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector));
|
| weight = _mm_add_ps( weight, weightVector );
|
| }
|
| @@ -92,13 +92,15 @@
|
|
|
| accum = _mm_div_ps(accum, weight);
|
| accum = _mm_add_ps(accum, _mm_set1_ps(0.5f));
|
| - __m128i accumInt = _mm_cvttps_epi32(accum);
|
| -
|
| - int* localResult = (int*)(&accumInt);
|
| - int a = SkClampMax(localResult[3], 255);
|
| - int r = SkClampMax(localResult[2], a);
|
| - int g = SkClampMax(localResult[1], a);
|
| - int b = SkClampMax(localResult[0], a);
|
| +
|
| + __m128i accumInt = _mm_cvtps_epi32( accum );
|
| +
|
| + int localResult[4];
|
| + _mm_storeu_si128((__m128i *) (localResult), accumInt);
|
| + int a = SkClampMax(localResult[0], 255);
|
| + int r = SkClampMax(localResult[1], a);
|
| + int g = SkClampMax(localResult[2], a);
|
| + int b = SkClampMax(localResult[3], a);
|
|
|
| *colors++ = SkPackARGB32(a, r, g, b);
|
|
|
|
|