| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2013 Google Inc. | 2 * Copyright 2013 Google Inc. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be | 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. | 5 * found in the LICENSE file. |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include <emmintrin.h> | 8 #include <emmintrin.h> |
| 9 #include "SkBitmap.h" | 9 #include "SkBitmap.h" |
| 10 #include "SkBitmapFilter_opts_SSE2.h" | 10 #include "SkBitmapFilter_opts_SSE2.h" |
| (...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 101 int r = SkClampMax(localResult[1], a); | 101 int r = SkClampMax(localResult[1], a); |
| 102 int g = SkClampMax(localResult[2], a); | 102 int g = SkClampMax(localResult[2], a); |
| 103 int b = SkClampMax(localResult[3], a); | 103 int b = SkClampMax(localResult[3], a); |
| 104 | 104 |
| 105 *colors++ = SkPackARGB32(a, r, g, b); | 105 *colors++ = SkPackARGB32(a, r, g, b); |
| 106 | 106 |
| 107 x++; | 107 x++; |
| 108 } | 108 } |
| 109 } | 109 } |
| 110 | 110 |
| 111 void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y, | |
| 112 SkPMColor *SK_RESTRICT colors, int count) { | |
| 113 const int maxX = s.fBitmap->width() - 1; | |
| 114 const int maxY = s.fBitmap->height() - 1; | |
| 115 | |
| 116 SkPoint srcPt; | |
| 117 s.fInvProc(s.fInvMatrix, SkIntToScalar(x), | |
| 118 SkIntToScalar(y), &srcPt); | |
| 119 srcPt.fY -= SK_ScalarHalf; | |
| 120 int sy = SkScalarFloorToInt(srcPt.fY); | |
| 121 | |
| 122 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f))); | |
| 123 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f))); | |
| 124 | |
| 125 while (count-- > 0) { | |
| 126 srcPt.fX -= SK_ScalarHalf; | |
| 127 srcPt.fY -= SK_ScalarHalf; | |
| 128 | |
| 129 int sx = SkScalarFloorToInt(srcPt.fX); | |
| 130 | |
| 131 float weight = 0; | |
| 132 __m128 accum = _mm_setzero_ps(); | |
| 133 | |
| 134 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f))); | |
| 135 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f))
); | |
| 136 | |
| 137 for (int src_y = y0; src_y <= y1; src_y++) { | |
| 138 float yweight = SkScalarToFloat(s.getBitmapFilter()->lookupScalar(sr
cPt.fY - src_y)); | |
| 139 | |
| 140 for (int src_x = x0; src_x <= x1 ; src_x++) { | |
| 141 float xweight = SkScalarToFloat(s.getBitmapFilter()->lookupScala
r(srcPt.fX - src_x)); | |
| 142 | |
| 143 float combined_weight = xweight * yweight; | |
| 144 | |
| 145 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y); | |
| 146 | |
| 147 __m128 c = _mm_set_ps((float)SkGetPackedB32(color), | |
| 148 (float)SkGetPackedG32(color), | |
| 149 (float)SkGetPackedR32(color), | |
| 150 (float)SkGetPackedA32(color)); | |
| 151 | |
| 152 __m128 weightVector = _mm_set1_ps(combined_weight); | |
| 153 | |
| 154 accum = _mm_add_ps(accum, _mm_mul_ps(c, weightVector)); | |
| 155 weight += combined_weight; | |
| 156 } | |
| 157 } | |
| 158 | |
| 159 __m128 totalWeightVector = _mm_set1_ps(weight); | |
| 160 accum = _mm_div_ps(accum, totalWeightVector); | |
| 161 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); | |
| 162 | |
| 163 float localResult[4]; | |
| 164 _mm_storeu_ps(localResult, accum); | |
| 165 int a = SkClampMax(int(localResult[0]), 255); | |
| 166 int r = SkClampMax(int(localResult[1]), a); | |
| 167 int g = SkClampMax(int(localResult[2]), a); | |
| 168 int b = SkClampMax(int(localResult[3]), a); | |
| 169 | |
| 170 *colors++ = SkPackARGB32(a, r, g, b); | |
| 171 | |
| 172 x++; | |
| 173 | |
| 174 s.fInvProc(s.fInvMatrix, SkIntToScalar(x), | |
| 175 SkIntToScalar(y), &srcPt); | |
| 176 } | |
| 177 } | |
| 178 | |
| 179 // Convolves horizontally along a single row. The row data is given in | 111 // Convolves horizontally along a single row. The row data is given in |
| 180 // |src_data| and continues for the num_values() of the filter. | 112 // |src_data| and continues for the num_values() of the filter. |
| 181 void convolveHorizontally_SSE2(const unsigned char* src_data, | 113 void convolveHorizontally_SSE2(const unsigned char* src_data, |
| 182 const SkConvolutionFilter1D& filter, | 114 const SkConvolutionFilter1D& filter, |
| 183 unsigned char* out_row, | 115 unsigned char* out_row, |
| 184 bool /*has_alpha*/) { | 116 bool /*has_alpha*/) { |
| 185 int num_values = filter.numValues(); | 117 int num_values = filter.numValues(); |
| 186 | 118 |
| 187 int filter_offset, filter_length; | 119 int filter_offset, filter_length; |
| 188 __m128i zero = _mm_setzero_si128(); | 120 __m128i zero = _mm_setzero_si128(); |
| (...skipping 432 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 621 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) { | 553 void applySIMDPadding_SSE2(SkConvolutionFilter1D *filter) { |
| 622 // Padding |paddingCount| of more dummy coefficients after the coefficients | 554 // Padding |paddingCount| of more dummy coefficients after the coefficients |
| 623 // of last filter to prevent SIMD instructions which load 8 or 16 bytes | 555 // of last filter to prevent SIMD instructions which load 8 or 16 bytes |
| 624 // together to access invalid memory areas. We are not trying to align the | 556 // together to access invalid memory areas. We are not trying to align the |
| 625 // coefficients right now due to the opaqueness of <vector> implementation. | 557 // coefficients right now due to the opaqueness of <vector> implementation. |
| 626 // This has to be done after all |AddFilter| calls. | 558 // This has to be done after all |AddFilter| calls. |
| 627 for (int i = 0; i < 8; ++i) { | 559 for (int i = 0; i < 8; ++i) { |
| 628 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix
ed>(0)); | 560 filter->addFilterValue(static_cast<SkConvolutionFilter1D::ConvolutionFix
ed>(0)); |
| 629 } | 561 } |
| 630 } | 562 } |
| OLD | NEW |