OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2013 Google Inc. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. |
| 6 */ |
| 7 |
| 8 #include "SkBitmapProcState.h" |
| 9 #include "SkBitmap.h" |
| 10 #include "SkColor.h" |
| 11 #include "SkColorPriv.h" |
| 12 #include "SkUnPreMultiply.h" |
| 13 #include "SkShader.h" |
| 14 |
| 15 #include <emmintrin.h> |
| 16 |
| 17 #if 0 |
| 18 static inline void print128i(__m128i value) { |
| 19 int *v = (int*) &value; |
| 20 printf("% .11d % .11d % .11d % .11d\n", v[0], v[1], v[2], v[3]); |
| 21 } |
| 22 |
| 23 static inline void print128i_16(__m128i value) { |
| 24 short *v = (short*) &value; |
| 25 printf("% .5d % .5d % .5d % .5d % .5d % .5d % .5d % .5d\n", v[0], v[1], v[2]
, v[3], v[4], v[5], v[6], v[7]); |
| 26 } |
| 27 |
| 28 static inline void print128i_8(__m128i value) { |
| 29 unsigned char *v = (unsigned char*) &value; |
| 30 printf("%.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3
u %.3u %.3u\n", |
| 31 v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], |
| 32 v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15] |
| 33 ); |
| 34 } |
| 35 |
| 36 static inline void print128f(__m128 value) { |
| 37 float *f = (float*) &value; |
| 38 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]); |
| 39 } |
| 40 #endif |
| 41 |
| 42 // because the border is handled specially, this is guaranteed to have all 16 pi
xels |
| 43 // available to it without running off the bitmap's edge. |
| 44 |
| 45 int debug_x = 20; |
| 46 int debug_y = 255; |
| 47 |
| 48 void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, |
| 49 SkPMColor* SK_RESTRICT colors, int count) { |
| 50 |
| 51 SkPMColor *orig_colors = colors; |
| 52 |
| 53 const int maxX = s.fBitmap->width() - 1; |
| 54 const int maxY = s.fBitmap->height() - 1; |
| 55 |
| 56 while (count-- > 0) { |
| 57 SkPoint srcPt; |
| 58 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), |
| 59 SkIntToScalar(y), &srcPt); |
| 60 srcPt.fX -= SK_ScalarHalf; |
| 61 srcPt.fY -= SK_ScalarHalf; |
| 62 SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX); |
| 63 SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY); |
| 64 |
| 65 int sx = SkScalarFloorToInt(srcPt.fX); |
| 66 int sy = SkScalarFloorToInt(srcPt.fY); |
| 67 |
| 68 __m128 weight = _mm_setzero_ps(); |
| 69 __m128 accum = _mm_setzero_ps(); |
| 70 |
| 71 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f))); |
| 72 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f))
); |
| 73 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f))); |
| 74 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f))
); |
| 75 |
| 76 for (int src_y = y0; src_y <= y1; src_y++) { |
| 77 float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y)
); |
| 78 |
| 79 for (int src_x = x0; src_x <= x1 ; src_x++) { |
| 80 float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - sr
c_x) ); |
| 81 |
| 82 float combined_weight = xweight * yweight; |
| 83 |
| 84 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y); |
| 85 |
| 86 __m128i c = _mm_cvtsi32_si128( color ); |
| 87 c = _mm_unpacklo_epi8(c, _mm_setzero_si128()); |
| 88 c = _mm_unpacklo_epi16(c, _mm_setzero_si128()); |
| 89 |
| 90 __m128 cfloat = _mm_cvtepi32_ps( c ); |
| 91 |
| 92 __m128 weightVector = _mm_set1_ps(combined_weight); |
| 93 |
| 94 accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector)); |
| 95 weight = _mm_add_ps( weight, weightVector ); |
| 96 } |
| 97 } |
| 98 |
| 99 accum = _mm_div_ps(accum, weight); |
| 100 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); |
| 101 |
| 102 __m128i accumInt = _mm_cvtps_epi32( accum ); |
| 103 |
| 104 int localResult[4]; |
| 105 _mm_storeu_si128((__m128i *) (localResult), accumInt); |
| 106 int a = SkClampMax(localResult[0], 255); |
| 107 int r = SkClampMax(localResult[1], a); |
| 108 int g = SkClampMax(localResult[2], a); |
| 109 int b = SkClampMax(localResult[3], a); |
| 110 |
| 111 *colors++ = SkPackARGB32(a, r, g, b); |
| 112 |
| 113 x++; |
| 114 } |
| 115 } |
| 116 |
| 117 void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y, |
| 118 SkPMColor *SK_RESTRICT colors, int count) { |
| 119 SkPMColor *orig_colors = colors; |
| 120 |
| 121 const int maxX = s.fBitmap->width() - 1; |
| 122 const int maxY = s.fBitmap->height() - 1; |
| 123 |
| 124 SkPoint srcPt; |
| 125 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), |
| 126 SkIntToScalar(y), &srcPt); |
| 127 srcPt.fY -= SK_ScalarHalf; |
| 128 int sy = SkScalarFloorToInt(srcPt.fY); |
| 129 |
| 130 SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY); |
| 131 |
| 132 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f))); |
| 133 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f))); |
| 134 |
| 135 while (count-- > 0) { |
| 136 srcPt.fX -= SK_ScalarHalf; |
| 137 srcPt.fY -= SK_ScalarHalf; |
| 138 SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX); |
| 139 |
| 140 int sx = SkScalarFloorToInt(srcPt.fX); |
| 141 |
| 142 float weight = 0; |
| 143 __m128 accum = _mm_setzero_ps(); |
| 144 |
| 145 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f))); |
| 146 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f))
); |
| 147 |
| 148 for (int src_y = y0; src_y <= y1; src_y++) { |
| 149 float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y)
); |
| 150 |
| 151 for (int src_x = x0; src_x <= x1 ; src_x++) { |
| 152 float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - sr
c_x) ); |
| 153 |
| 154 float combined_weight = xweight * yweight; |
| 155 |
| 156 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y); |
| 157 |
| 158 __m128 c = _mm_set_ps(SkGetPackedB32(color), |
| 159 SkGetPackedG32(color), |
| 160 SkGetPackedR32(color), |
| 161 SkGetPackedA32(color)); |
| 162 |
| 163 __m128 weightVector = _mm_set1_ps(combined_weight); |
| 164 |
| 165 accum = _mm_add_ps(accum, _mm_mul_ps(c, weightVector)); |
| 166 weight += combined_weight; |
| 167 } |
| 168 } |
| 169 |
| 170 __m128 totalWeightVector = _mm_set1_ps(weight); |
| 171 accum = _mm_div_ps(accum, totalWeightVector); |
| 172 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); |
| 173 |
| 174 float localResult[4]; |
| 175 _mm_storeu_ps(localResult, accum); |
| 176 int a = SkClampMax(int(localResult[0]), 255); |
| 177 int r = SkClampMax(int(localResult[1]), a); |
| 178 int g = SkClampMax(int(localResult[2]), a); |
| 179 int b = SkClampMax(int(localResult[3]), a); |
| 180 |
| 181 *colors++ = SkPackARGB32(a, r, g, b); |
| 182 |
| 183 x++; |
| 184 |
| 185 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), |
| 186 SkIntToScalar(y), &srcPt); |
| 187 |
| 188 } |
| 189 } |
OLD | NEW |