Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(274)

Side by Side Diff: src/opts/SkBitmapFilter_opts_SSE2.cpp

Issue 17381008: More general version of image filtering; reworked to be robust and easier to SSE (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkBitmapProcState.h"
9 #include "SkBitmap.h"
10 #include "SkColor.h"
11 #include "SkColorPriv.h"
12 #include "SkUnPreMultiply.h"
13 #include "SkShader.h"
14
15 #include <emmintrin.h>
16
17 #define DS(x) SkDoubleToScalar(x)
18
19 #define MUL(a, b) ((a) * (b))
20
21 static inline void print128i(__m128i value) {
22 int *v = (int*) &value;
23 printf("% .11d % .11d % .11d % .11d\n", v[0], v[1], v[2], v[3]);
24 }
25
26 static inline void print128i_16(__m128i value) {
27 short *v = (short*) &value;
28 printf("% .5d % .5d % .5d % .5d % .5d % .5d % .5d % .5d\n", v[0], v[1], v[2] , v[3], v[4], v[5], v[6], v[7]);
29 }
30
31 static inline void print128i_8(__m128i value) {
32 unsigned char *v = (unsigned char*) &value;
33 printf("%.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3 u %.3u %.3u\n",
34 v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7],
35 v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15]
36 );
37 }
38
39 static inline void print128f(__m128 value) {
40 float *f = (float*) &value;
41 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]);
42 }
43
44 #define BLAH( s ) if (debug) { s; }
45
46 // some SSE blending code adapted from theowl84's bilinear blog:
47 // http://fastcpp.blogspot.com/2011/06/bilinear-pixel-interpolation-using-sse.ht ml
48
49 #define WEIGHT_BITS 6
50
51 static inline SkPMColor cubicBlendSSE(__m128 coeffs, SkPMColor *p, bool debug=fa lse) {
52
53 BLAH( printf( "*(p+0) = (%.3u %.3u %.3u %.3u)\n", SkGetPackedR32(*(p+0)), Sk GetPackedG32(*(p+0)), SkGetPackedB32(*(p+0)), SkGetPackedA32(*(p+0)) ) )
54 BLAH( printf( "*(p+1) = (%.3u %.3u %.3u %.3u)\n", SkGetPackedR32(*(p+1)), Sk GetPackedG32(*(p+1)), SkGetPackedB32(*(p+1)), SkGetPackedA32(*(p+1)) ) )
55 BLAH( printf( "*(p+2) = (%.3u %.3u %.3u %.3u)\n", SkGetPackedR32(*(p+2)), Sk GetPackedG32(*(p+2)), SkGetPackedB32(*(p+2)), SkGetPackedA32(*(p+2)) ) )
56 BLAH( printf( "*(p+3) = (%.3u %.3u %.3u %.3u)\n", SkGetPackedR32(*(p+3)), Sk GetPackedG32(*(p+3)), SkGetPackedB32(*(p+3)), SkGetPackedA32(*(p+3)) ) )
57
58 __m128 p1 = _mm_cvtpu8_ps( _mm_set1_pi32( *(p+0) ) );
59 __m128 p2 = _mm_cvtpu8_ps( _mm_set1_pi32( *(p+1) ) );
60 __m128 p3 = _mm_cvtpu8_ps( _mm_set1_pi32( *(p+2) ) );
61 __m128 p4 = _mm_cvtpu8_ps( _mm_set1_pi32( *(p+3) ) );
62
63 BLAH( printf( "p1 = " ) );
64 BLAH( print128f( p1 ));
65 BLAH( printf( "p2 = " ) );
66 BLAH( print128f( p1 ));
67 BLAH( printf( "p3 = " ) );
68 BLAH( print128f( p1 ));
69 BLAH( printf( "p4 = " ) );
70 BLAH( print128f( p1 ));
71
72
73
74 return (SkPMColor) 0;
75
76 }
77
78 static __m128 build_coeff_sse(float t, bool debug=false) {
79 static __m128 coeffs[4];
80 static const __m128 CONST_WEIGHT_FIXED = _mm_set1_ps(1 << WEIGHT_BITS);
81 static bool once = true;
82 if (once) {
83 once = false;
84 // note these coefficients are stored as the transpose of the ones in th e
85 // scalar CPU code so we can do all four dot products at once (SOA organ ization)
86 coeffs[0] = _mm_set_ps( 0.0f / 18.0f , 1.0f / 18.0f, 16.0f / 18.0f , 1.0f / 18.0f);
87 coeffs[1] = _mm_set_ps( 0.0f / 18.0f , 9.0f / 18.0f, 0.0f / 18.0f , -9.0f / 18.0f);
88 coeffs[2] = _mm_set_ps( -6.0f / 18.0f , 27.0f / 18.0f, -36.0f / 18.0f , 15.0f / 18.0f);
89 coeffs[3] = _mm_set_ps( 7.0f / 18.0f ,-21.0f / 18.0f, 21.0f / 18.0f , -7.0f / 18.0f);
90 };
91
92 // evaluate the cubic polynomial
93
94 __m128 tvec = _mm_set1_ps( t );
95 __m128 t2vec = _mm_mul_ps( tvec, tvec );
96 __m128 t3vec = _mm_mul_ps( tvec, t2vec );
97
98 __m128 weight = _mm_mul_ps( t3vec, coeffs[3] );
99 weight = _mm_add_ps( weight, _mm_mul_ps( t2vec, coeffs[2]));
100 weight = _mm_add_ps( weight, _mm_mul_ps( tvec, coeffs[1]));
101 weight = _mm_add_ps( weight, coeffs[0] );
102
103 BLAH( printf( "t = %f\n", t ) )
104 BLAH( printf( "weight = " ) )
105 BLAH( print128f( weight ) )
106
107 return weight;
108 }
109
110 // because the border is handled specially, this is guaranteed to have all 16 pi xels
111 // available to it without running off the bitmap's edge.
112
113 // the coefficients are split across two registers as prepared by the build_coef f function above.
114 static SkPMColor doBicubicFilterSSE(const SkBitmap *bm,
115 __m128 coeffX, __m128 coeffY,
116 int sx, int sy,
117 bool debug=false )
118 {
119 SkPMColor x_blends[4];
120
121 const int maxX = bm->width() - 1;
122 const int maxY = bm->height() - 1;
123
124 SkPMColor temp[4][4]; // used for special-casing the border
125 SkPMColor *rows[4];
126
127 BLAH( printf( "in doBicubicFilterSSE with sx=%d, sy=%d\n", sx, sy))
128
129 if (sx <= 0 || sy <= 0 || sx >= maxX-2 || sy >= maxY-2) {
130 // handle border specially
131 int x0 = SkClampMax(sx - 1, maxX);
132 int x1 = SkClampMax(sx , maxX);
133 int x2 = SkClampMax(sx + 1, maxX);
134 int x3 = SkClampMax(sx + 2, maxX);
135 int y0 = SkClampMax(sy - 1, maxY);
136 int y1 = SkClampMax(sy , maxY);
137 int y2 = SkClampMax(sy + 1, maxY);
138 int y3 = SkClampMax(sy + 2, maxY);
139 temp[0][0] = *bm->getAddr32(x0, y0);
140 temp[0][1] = *bm->getAddr32(x1, y0);
141 temp[0][2] = *bm->getAddr32(x2, y0);
142 temp[0][3] = *bm->getAddr32(x3, y0);
143 temp[1][0] = *bm->getAddr32(x0, y1);
144 temp[1][1] = *bm->getAddr32(x1, y1);
145 temp[1][2] = *bm->getAddr32(x2, y1);
146 temp[1][3] = *bm->getAddr32(x3, y1);
147 temp[2][0] = *bm->getAddr32(x0, y2);
148 temp[2][1] = *bm->getAddr32(x1, y2);
149 temp[2][2] = *bm->getAddr32(x2, y2);
150 temp[2][3] = *bm->getAddr32(x3, y2);
151 temp[3][0] = *bm->getAddr32(x0, y3);
152 temp[3][1] = *bm->getAddr32(x1, y3);
153 temp[3][2] = *bm->getAddr32(x2, y3);
154 temp[3][3] = *bm->getAddr32(x3, y3);
155
156 rows[0] = temp[0];
157 rows[1] = temp[1];
158 rows[2] = temp[2];
159 rows[3] = temp[3];
160 } else {
161 rows[0] = bm->getAddr32(sx-1,sy-1);
162 rows[1] = bm->getAddr32(sx-1,sy-0);
163 rows[2] = bm->getAddr32(sx-1,sy+1);
164 rows[3] = bm->getAddr32(sx-1,sy+2);
165 }
166
167 x_blends[0] = cubicBlendSSE(coeffX, rows[0], debug);
168 x_blends[1] = cubicBlendSSE(coeffX, rows[1], debug);
169 x_blends[2] = cubicBlendSSE(coeffX, rows[2], debug);
170 x_blends[3] = cubicBlendSSE(coeffX, rows[3], debug);
171
172 return cubicBlendSSE(coeffY, x_blends, debug);
173 }
174
175
176 int debug_x = 20;
177 int debug_y = 255;
178
179 void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
180 SkPMColor* SK_RESTRICT colors, int count) {
181
182 SkPMColor *orig_colors = colors;
183
184 while (count-- > 0) {
185 SkPoint srcPt;
186 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
187 SkIntToScalar(y), &srcPt);
188
189 srcPt.fX -= SK_ScalarHalf;
190 srcPt.fY -= SK_ScalarHalf;
191 SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX);
192 SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY);
193
194 __m128 coeffX, coeffY;
195 int sx = SkScalarFloorToInt(srcPt.fX);
196 int sy = SkScalarFloorToInt(srcPt.fY);
197
198 coeffX = build_coeff_sse(fractx);
199 coeffY = build_coeff_sse(fracty);
200
201 SkPMColor color = doBicubicFilterSSE( s.fBitmap, coeffX, coeffY, sx, sy, 1 );
202 SkPMColorAssert(color);
203 *colors++ = color;
204 x++;
205 }
206
207 }
208
209 void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y,
210 SkPMColor *SK_RESTRICT colors, int count) {
211 const int maxX = s.fBitmap->width() - 1;
212 const int maxY = s.fBitmap->height() - 1;
213
214 SkPoint srcPt;
215 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), SkIntToScalar(y), &srcPt);
216 srcPt.fY -= SK_ScalarHalf;
217 SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY);
218 __m128 coeffX, coeffY;
219 coeffY = build_coeff_sse(fracty);
220 int sy = SkScalarFloorToInt(srcPt.fY);
221
222 while (count-- > 0) {
223 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x), SkIntToScalar(y), &srcPt);
224 srcPt.fX -= SK_ScalarHalf;
225 SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX);
226
227 int sx = SkScalarFloorToInt(srcPt.fX);
228
229 coeffX = build_coeff_sse(fractx);
230 SkPMColor c = doBicubicFilterSSE( s.fBitmap, coeffX, coeffY, sx, sy );
231 SkPMColorAssert(c);
232 *colors++ = c;
233
234 x++;
235 }
236 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698