Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(501)

Side by Side Diff: src/opts/SkBitmapFilter_opts_SSE2.cpp

Issue 17381008: More general version of image filtering; reworked to be robust and easier to SSE (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: make temp. scale function private; disable GM and bench for that function Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "SkBitmapProcState.h"
9 #include "SkBitmap.h"
10 #include "SkColor.h"
11 #include "SkColorPriv.h"
12 #include "SkUnPreMultiply.h"
13 #include "SkShader.h"
14
15 #include <emmintrin.h>
16
17 #if 0
18 static inline void print128i(__m128i value) {
19 int *v = (int*) &value;
20 printf("% .11d % .11d % .11d % .11d\n", v[0], v[1], v[2], v[3]);
21 }
22
23 static inline void print128i_16(__m128i value) {
24 short *v = (short*) &value;
25 printf("% .5d % .5d % .5d % .5d % .5d % .5d % .5d % .5d\n", v[0], v[1], v[2] , v[3], v[4], v[5], v[6], v[7]);
26 }
27
28 static inline void print128i_8(__m128i value) {
29 unsigned char *v = (unsigned char*) &value;
30 printf("%.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3 u %.3u %.3u\n",
31 v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7],
32 v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15]
33 );
34 }
35
36 static inline void print128f(__m128 value) {
37 float *f = (float*) &value;
38 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]);
39 }
40 #endif
41
42 // because the border is handled specially, this is guaranteed to have all 16 pi xels
43 // available to it without running off the bitmap's edge.
44
45 int debug_x = 20;
46 int debug_y = 255;
47
48 void highQualityFilter_SSE2(const SkBitmapProcState& s, int x, int y,
49 SkPMColor* SK_RESTRICT colors, int count) {
50
51 SkPMColor *orig_colors = colors;
52
53 const int maxX = s.fBitmap->width() - 1;
54 const int maxY = s.fBitmap->height() - 1;
55
56 while (count-- > 0) {
57 SkPoint srcPt;
58 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
59 SkIntToScalar(y), &srcPt);
60 srcPt.fX -= SK_ScalarHalf;
61 srcPt.fY -= SK_ScalarHalf;
62 SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX);
63 SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY);
64
65 int sx = SkScalarFloorToInt(srcPt.fX);
66 int sy = SkScalarFloorToInt(srcPt.fY);
67
68 __m128 weight = _mm_setzero_ps();
69 __m128 accum = _mm_setzero_ps();
70
71 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f)));
72 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f)) );
73 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f)));
74 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f)) );
75
76 for (int src_y = y0; src_y <= y1; src_y++) {
77 float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y) );
78
79 for (int src_x = x0; src_x <= x1 ; src_x++) {
80 float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - sr c_x) );
81
82 float combined_weight = xweight * yweight;
83
84 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y);
85
86 __m128i c = _mm_cvtsi32_si128( color );
87 c = _mm_unpacklo_epi8(c, _mm_setzero_si128());
88 c = _mm_unpacklo_epi16(c, _mm_setzero_si128());
89
90 __m128 cfloat = _mm_cvtepi32_ps( c );
91
92 __m128 weightVector = _mm_set1_ps(combined_weight);
93
94 accum = _mm_add_ps(accum, _mm_mul_ps(cfloat, weightVector));
95 weight = _mm_add_ps( weight, weightVector );
96 }
97 }
98
99 accum = _mm_div_ps(accum, weight);
100 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f));
101
102 __m128i accumInt = _mm_cvtps_epi32( accum );
103
104 int localResult[4];
105 _mm_storeu_si128((__m128i *) (localResult), accumInt);
106 int a = SkClampMax(localResult[0], 255);
107 int r = SkClampMax(localResult[1], a);
108 int g = SkClampMax(localResult[2], a);
109 int b = SkClampMax(localResult[3], a);
110
111 *colors++ = SkPackARGB32(a, r, g, b);
112
113 x++;
114 }
115 }
116
117 void highQualityFilter_ScaleOnly_SSE2(const SkBitmapProcState &s, int x, int y,
118 SkPMColor *SK_RESTRICT colors, int count) {
119 SkPMColor *orig_colors = colors;
120
121 const int maxX = s.fBitmap->width() - 1;
122 const int maxY = s.fBitmap->height() - 1;
123
124 SkPoint srcPt;
125 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
126 SkIntToScalar(y), &srcPt);
127 srcPt.fY -= SK_ScalarHalf;
128 int sy = SkScalarFloorToInt(srcPt.fY);
129
130 SkScalar fracty = srcPt.fY - SkScalarFloorToScalar(srcPt.fY);
131
132 int y0 = SkTMax(0, int(ceil(sy-s.getBitmapFilter()->width() + 0.5f)));
133 int y1 = SkTMin(maxY, int(floor(sy+s.getBitmapFilter()->width() + 0.5f)));
134
135 while (count-- > 0) {
136 srcPt.fX -= SK_ScalarHalf;
137 srcPt.fY -= SK_ScalarHalf;
138 SkScalar fractx = srcPt.fX - SkScalarFloorToScalar(srcPt.fX);
139
140 int sx = SkScalarFloorToInt(srcPt.fX);
141
142 float weight = 0;
143 __m128 accum = _mm_setzero_ps();
144
145 int x0 = SkTMax(0, int(ceil(sx-s.getBitmapFilter()->width() + 0.5f)));
146 int x1 = SkTMin(maxX, int(floor(sx+s.getBitmapFilter()->width() + 0.5f)) );
147
148 for (int src_y = y0; src_y <= y1; src_y++) {
149 float yweight = s.getBitmapFilter()->lookupFloat( (srcPt.fY - src_y) );
150
151 for (int src_x = x0; src_x <= x1 ; src_x++) {
152 float xweight = s.getBitmapFilter()->lookupFloat( (srcPt.fX - sr c_x) );
153
154 float combined_weight = xweight * yweight;
155
156 SkPMColor color = *s.fBitmap->getAddr32(src_x, src_y);
157
158 __m128 c = _mm_set_ps(SkGetPackedB32(color),
159 SkGetPackedG32(color),
160 SkGetPackedR32(color),
161 SkGetPackedA32(color));
162
163 __m128 weightVector = _mm_set1_ps(combined_weight);
164
165 accum = _mm_add_ps(accum, _mm_mul_ps(c, weightVector));
166 weight += combined_weight;
167 }
168 }
169
170 __m128 totalWeightVector = _mm_set1_ps(weight);
171 accum = _mm_div_ps(accum, totalWeightVector);
172 accum = _mm_add_ps(accum, _mm_set1_ps(0.5f));
173
174 float localResult[4];
175 _mm_storeu_ps(localResult, accum);
176 int a = SkClampMax(int(localResult[0]), 255);
177 int r = SkClampMax(int(localResult[1]), a);
178 int g = SkClampMax(int(localResult[2]), a);
179 int b = SkClampMax(int(localResult[3]), a);
180
181 *colors++ = SkPackARGB32(a, r, g, b);
182
183 x++;
184
185 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x),
186 SkIntToScalar(y), &srcPt);
187
188 }
189 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698