| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2013 Google Inc. | 2 * Copyright 2013 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include <emmintrin.h> | 8 #include <emmintrin.h> |
| 9 #include "SkBitmap.h" | 9 #include "SkBitmap.h" |
| 10 #include "SkBitmapFilter_opts_SSE2.h" | 10 #include "SkBitmapFilter_opts_SSE2.h" |
| (...skipping 22 matching lines...) Expand all Loading... |
| 33 v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15] | 33 v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15] |
| 34 ); | 34 ); |
| 35 } | 35 } |
| 36 | 36 |
| 37 static inline void print128f(__m128 value) { | 37 static inline void print128f(__m128 value) { |
| 38 float *f = (float*) &value; | 38 float *f = (float*) &value; |
| 39 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]); | 39 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]); |
| 40 } | 40 } |
| 41 #endif | 41 #endif |
| 42 | 42 |
| 43 // because the border is handled specially, this is guaranteed to have all 16 pi
xels | |
| 44 // available to it without running off the bitmap's edge. | |
| 45 | |
| 46 void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, | |
| 47 SkPMColor* SK_RESTRICT colors, int count) { | |
| 48 | |
| 49 const int maxX = s.fBitmap->width(); | |
| 50 const int maxY = s.fBitmap->height(); | |
| 51 SkAutoTMalloc<SkScalar> xWeights(maxX); | |
| 52 const SkBitmapFilter* filter = s.getBitmapFilter(); | |
| 53 | |
| 54 while (count-- > 0) { | |
| 55 SkPoint srcPt; | |
| 56 s.fInvProc(s.fInvMatrix, x + 0.5f, y + 0.5f, &srcPt); | |
| 57 srcPt.fX -= SK_ScalarHalf; | |
| 58 srcPt.fY -= SK_ScalarHalf; | |
| 59 | |
| 60 __m128 weight = _mm_setzero_ps(); | |
| 61 __m128 accum = _mm_setzero_ps(); | |
| 62 | |
| 63 int y0 = SkClampMax(SkScalarCeilToInt(srcPt.fY - filter->width()), maxY)
; | |
| 64 int y1 = SkClampMax(SkScalarFloorToInt(srcPt.fY + filter->width() + 1),
maxY); | |
| 65 int x0 = SkClampMax(SkScalarCeilToInt(srcPt.fX - filter->width()), maxX)
; | |
| 66 int x1 = SkClampMax(SkScalarFloorToInt(srcPt.fX + filter->width()) + 1,
maxX); | |
| 67 | |
| 68 for (int srcX = x0; srcX < x1 ; srcX++) { | |
| 69 // Looking these up once instead of each loop is a ~15% speedup. | |
| 70 xWeights[srcX - x0] = filter->lookupScalar((srcPt.fX - srcX)); | |
| 71 } | |
| 72 | |
| 73 for (int srcY = y0; srcY < y1; srcY++) { | |
| 74 SkScalar yWeight = filter->lookupScalar((srcPt.fY - srcY)); | |
| 75 | |
| 76 for (int srcX = x0; srcX < x1 ; srcX++) { | |
| 77 SkScalar xWeight = xWeights[srcX - x0]; | |
| 78 | |
| 79 SkScalar combined_weight = SkScalarMul(xWeight, yWeight); | |
| 80 __m128 weightVector = _mm_set1_ps(combined_weight); | |
| 81 weight = _mm_add_ps( weight, weightVector ); | |
| 82 | |
| 83 SkPMColor color = *s.fBitmap->getAddr32(srcX, srcY); | |
| 84 if (!color) { | |
| 85 continue; | |
| 86 } | |
| 87 | |
| 88 __m128i c = _mm_cvtsi32_si128(color); | |
| 89 c = _mm_unpacklo_epi8(c, _mm_setzero_si128()); | |
| 90 c = _mm_unpacklo_epi16(c, _mm_setzero_si128()); | |
| 91 __m128 cfloat = _mm_cvtepi32_ps(c); | |
| 92 | |
| 93 accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector)); | |
| 94 } | |
| 95 } | |
| 96 | |
| 97 accum = _mm_div_ps(accum, weight); | |
| 98 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); | |
| 99 __m128i accumInt = _mm_cvttps_epi32(accum); | |
| 100 accumInt = _mm_packs_epi32(accumInt, _mm_setzero_si128()); | |
| 101 accumInt = _mm_packus_epi16(accumInt, _mm_setzero_si128()); | |
| 102 SkPMColor c = _mm_cvtsi128_si32(accumInt); | |
| 103 | |
| 104 int a = SkClampMax(SkGetPackedA32(c), 255); | |
| 105 int r = SkClampMax(SkGetPackedR32(c), a); | |
| 106 int g = SkClampMax(SkGetPackedG32(c), a); | |
| 107 int b = SkClampMax(SkGetPackedB32(c), a); | |
| 108 | |
| 109 *colors++ = SkPackARGB32(a, r, g, b); | |
| 110 | |
| 111 x++; | |
| 112 } | |
| 113 } | |
| 114 | |
| 115 // Convolves horizontally along a single row. The row data is given in | 43 // Convolves horizontally along a single row. The row data is given in |
| 116 // |src_data| and continues for the num_values() of the filter. | 44 // |src_data| and continues for the num_values() of the filter. |
| 117 void convolveHorizontally_SSE2(const unsigned char* src_data, | 45 void convolveHorizontally_SSE2(const unsigned char* src_data, |
| 118 const SkConvolutionFilter1D& filter, | 46 const SkConvolutionFilter1D& filter, |
| 119 unsigned char* out_row, | 47 unsigned char* out_row, |
| 120 bool /*has_alpha*/) { | 48 bool /*has_alpha*/) { |
| 121 int num_values = filter.numValues(); | 49 int num_values = filter.numValues(); |
| 122 | 50 |
| 123 int filter_offset, filter_length; | 51 int filter_offset, filter_length; |
| 124 __m128i zero = _mm_setzero_si128(); | 52 __m128i zero = _mm_setzero_si128(); |
| (...skipping 432 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 557 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) { | 485 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) { |
| 558 // Padding |paddingCount| of more dummy coefficients after the coefficients | 486 // Padding |paddingCount| of more dummy coefficients after the coefficients |
| 559 // of last filter to prevent SIMD instructions which load 8 or 16 bytes | 487 // of last filter to prevent SIMD instructions which load 8 or 16 bytes |
| 560 // together to access invalid memory areas. We are not trying to align the | 488 // together to access invalid memory areas. We are not trying to align the |
| 561 // coefficients right now due to the opaqueness of <vector> implementation. | 489 // coefficients right now due to the opaqueness of <vector> implementation. |
| 562 // This has to be done after all |AddFilter| calls. | 490 // This has to be done after all |AddFilter| calls. |
| 563 for (int i = 0; i < 8; ++i) { | 491 for (int i = 0; i < 8; ++i) { |
| 564 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix
ed>(0)); | 492 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix
ed>(0)); |
| 565 } | 493 } |
| 566 } | 494 } |
| OLD | NEW |