OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright 2013 Google Inc. |
| 3 * |
| 4 * Use of this source code is governed by a BSD-style license that can be |
| 5 * found in the LICENSE file. |
| 6 */ |
| 7 |
| 8 #include "SkBitmapProcState.h" |
| 9 #include "SkBitmap.h" |
| 10 #include "SkColor.h" |
| 11 #include "SkColorPriv.h" |
| 12 #include "SkUnPreMultiply.h" |
| 13 #include "SkShader.h" |
| 14 |
| 15 #include "SkBitmapFilter_opts_SSE2.h" |
| 16 |
| 17 #include <emmintrin.h> |
| 18 |
| 19 #if 0 |
| 20 static inline void print128i(__m128i value) { |
| 21 int *v = (int*) &value; |
| 22 printf("% .11d % .11d % .11d % .11d\n", v[0], v[1], v[2], v[3]); |
| 23 } |
| 24 |
| 25 static inline void print128i_16(__m128i value) { |
| 26 short *v = (short*) &value; |
| 27 printf("% .5d % .5d % .5d % .5d % .5d % .5d % .5d % .5d\n", v[0], v[1], v[2]
, v[3], v[4], v[5], v[6], v[7]); |
| 28 } |
| 29 |
| 30 static inline void print128i_8(__m128i value) { |
| 31 unsigned char *v = (unsigned char*) &value; |
| 32 printf("%.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3
u %.3u %.3u\n", |
| 33 v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], |
| 34 v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15] |
| 35 ); |
| 36 } |
| 37 |
| 38 static inline void print128f(__m128 value) { |
| 39 float *f = (float*) &value; |
| 40 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]); |
| 41 } |
| 42 #endif |
| 43 |
| 44 // because the border is handled specially, this is guaranteed to have all 16 pi
xels |
| 45 // available to it without running off the bitmap's edge. |
| 46 |
| 47 int debug_x = 20; |
| 48 int debug_y = 255; |
| 49 |
| 50 void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y, |
| 51 SkPMColor* SK_RESTRICT colors, int count) { |
| 52 |
| 53 const int maxX = s.fBitmap->width() - 1; |
| 54 const int maxY = s.fBitmap->height() - 1; |
| 55 |
| 56 while (count-- > 0) { |
| 57 SkPoint srcPt; |
| 58 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), |
| 59 SkIntToScalar(y), &srcPt); |
| 60 srcPt.fX -= SK_ScalarHalf; |
| 61 srcPt.fY -= SK_ScalarHalf; |
| 62 |
| 63 int sx = SkScalarFloorToInt(srcPt.fX); |
| 64 int sy = SkScalarFloorToInt(srcPt.fY); |
| 65 |
| 66 __m128 weight = _mm_setzero_ps(); |
| 67 __m128 accum = _mm_setzero_ps(); |
| 68 |
| 69 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f))); |
| 70 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f))
); |
| 71 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f))); |
| 72 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f))
); |
| 73 |
| 74 for (int src_y = y0; src_y <= y1; src_y++) { |
| 75 float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y)
); |
| 76 |
| 77 for (int src_x = x0; src_x <= x1 ; src_x++) { |
| 78 float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - sr
c_x) ); |
| 79 |
| 80 float combined_weight = xweight * yweight; |
| 81 |
| 82 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y); |
| 83 |
| 84 __m128i c = _mm_cvtsi32_si128( color ); |
| 85 c = _mm_unpacklo_epi8(c, _mm_setzero_si128()); |
| 86 c = _mm_unpacklo_epi16(c, _mm_setzero_si128()); |
| 87 |
| 88 __m128 cfloat = _mm_cvtepi32_ps( c ); |
| 89 |
| 90 __m128 weightVector = _mm_set1_ps(combined_weight); |
| 91 |
| 92 accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector)); |
| 93 weight = _mm_add_ps( weight, weightVector ); |
| 94 } |
| 95 } |
| 96 |
| 97 accum = _mm_div_ps(accum, weight); |
| 98 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); |
| 99 |
| 100 __m128i accumInt = _mm_cvtps_epi32( accum ); |
| 101 |
| 102 int localResult[4]; |
| 103 _mm_storeu_si128((__m128i *) (localResult), accumInt); |
| 104 int a = SkClampMax(localResult[0], 255); |
| 105 int r = SkClampMax(localResult[1], a); |
| 106 int g = SkClampMax(localResult[2], a); |
| 107 int b = SkClampMax(localResult[3], a); |
| 108 |
| 109 *colors++ = SkPackARGB32(a, r, g, b); |
| 110 |
| 111 x++; |
| 112 } |
| 113 } |
| 114 |
| 115 void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y, |
| 116 SkPMColor *SK_RESTRICT colors, int count) { |
| 117 const int maxX = s.fBitmap->width() - 1; |
| 118 const int maxY = s.fBitmap->height() - 1; |
| 119 |
| 120 SkPoint srcPt; |
| 121 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), |
| 122 SkIntToScalar(y), &srcPt); |
| 123 srcPt.fY -= SK_ScalarHalf; |
| 124 int sy = SkScalarFloorToInt(srcPt.fY); |
| 125 |
| 126 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f))); |
| 127 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f))); |
| 128 |
| 129 while (count-- > 0) { |
| 130 srcPt.fX -= SK_ScalarHalf; |
| 131 srcPt.fY -= SK_ScalarHalf; |
| 132 |
| 133 int sx = SkScalarFloorToInt(srcPt.fX); |
| 134 |
| 135 float weight = 0; |
| 136 __m128 accum = _mm_setzero_ps(); |
| 137 |
| 138 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f))); |
| 139 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f))
); |
| 140 |
| 141 for (int src_y = y0; src_y <= y1; src_y++) { |
| 142 float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y)
); |
| 143 |
| 144 for (int src_x = x0; src_x <= x1 ; src_x++) { |
| 145 float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - sr
c_x) ); |
| 146 |
| 147 float combined_weight = xweight * yweight; |
| 148 |
| 149 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y); |
| 150 |
| 151 __m128 c = _mm_set_ps(SkGetPackedB32(color), |
| 152 SkGetPackedG32(color), |
| 153 SkGetPackedR32(color), |
| 154 SkGetPackedA32(color)); |
| 155 |
| 156 __m128 weightVector = _mm_set1_ps(combined_weight); |
| 157 |
| 158 accum = _mm_add_ps(accum, _mm_mul_ps(c, weightVector)); |
| 159 weight += combined_weight; |
| 160 } |
| 161 } |
| 162 |
| 163 __m128 totalWeightVector = _mm_set1_ps(weight); |
| 164 accum = _mm_div_ps(accum, totalWeightVector); |
| 165 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f)); |
| 166 |
| 167 float localResult[4]; |
| 168 _mm_storeu_ps(localResult, accum); |
| 169 int a = SkClampMax(int(localResult[0]), 255); |
| 170 int r = SkClampMax(int(localResult[1]), a); |
| 171 int g = SkClampMax(int(localResult[2]), a); |
| 172 int b = SkClampMax(int(localResult[3]), a); |
| 173 |
| 174 *colors++ = SkPackARGB32(a, r, g, b); |
| 175 |
| 176 x++; |
| 177 |
| 178 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), |
| 179 SkIntToScalar(y), &srcPt); |
| 180 |
| 181 } |
| 182 } |
OLD | NEW |