Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(6)

Side by Side Diff: src/core/SkConvolver.cpp

Issue 2500113004: Port convolve functions to SkOpts (Closed)
Patch Set: Fix typo Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/core/SkConvolver.h ('k') | src/core/SkOpts.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "SkConvolver.h" 5 #include "SkConvolver.h"
6 #include "SkOpts.h"
6 #include "SkTArray.h" 7 #include "SkTArray.h"
7 8
8 namespace { 9 namespace {
9
10 // Converts the argument to an 8-bit unsigned value by clamping to the range
11 // 0-255.
12 inline unsigned char ClampTo8(int a) {
13 if (static_cast<unsigned>(a) < 256) {
14 return a; // Avoid the extra check in the common case.
15 }
16 if (a < 0) {
17 return 0;
18 }
19 return 255;
20 }
21
22 // Stores a list of rows in a circular buffer. The usage is you write into i t 10 // Stores a list of rows in a circular buffer. The usage is you write into i t
23 // by calling AdvanceRow. It will keep track of which row in the buffer it 11 // by calling AdvanceRow. It will keep track of which row in the buffer it
24 // should use next, and the total number of rows added. 12 // should use next, and the total number of rows added.
25 class CircularRowBuffer { 13 class CircularRowBuffer {
26 public: 14 public:
27 // The number of pixels in each row is given in |sourceRowPixelWidth|. 15 // The number of pixels in each row is given in |sourceRowPixelWidth|.
28 // The maximum number of rows needed in the buffer is |maxYFilterSize| 16 // The maximum number of rows needed in the buffer is |maxYFilterSize|
29 // (we only need to store enough rows for the biggest filter). 17 // (we only need to store enough rows for the biggest filter).
30 // 18 //
31 // We use the |firstInputRow| to compute the coordinates of all of the 19 // We use the |firstInputRow| to compute the coordinates of all of the
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
101 int fNextRow; 89 int fNextRow;
102 90
103 // The y coordinate of the |fNextRow|. This is incremented each time a 91 // The y coordinate of the |fNextRow|. This is incremented each time a
104 // new row is appended and does not wrap. 92 // new row is appended and does not wrap.
105 int fNextRowCoordinate; 93 int fNextRowCoordinate;
106 94
107 // Buffer used by GetRowAddresses(). 95 // Buffer used by GetRowAddresses().
108 SkTArray<unsigned char*> fRowAddresses; 96 SkTArray<unsigned char*> fRowAddresses;
109 }; 97 };
110 98
111 // Convolves horizontally along a single row. The row data is given in
112 // |srcData| and continues for the numValues() of the filter.
113 template<bool hasAlpha>
114 void ConvolveHorizontally(const unsigned char* srcData,
115 const SkConvolutionFilter1D& filter,
116 unsigned char* outRow) {
117 // Loop over each pixel on this row in the output image.
118 int numValues = filter.numValues();
119 for (int outX = 0; outX < numValues; outX++) {
120 // Get the filter that determines the current output pixel.
121 int filterOffset, filterLength;
122 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
123 filter.FilterForValue(outX, &filterOffset, &filterLength);
124
125 // Compute the first pixel in this row that the filter affects. It w ill
126 // touch |filterLength| pixels (4 bytes each) after this.
127 const unsigned char* rowToFilter = &srcData[filterOffset * 4];
128
129 // Apply the filter to the row to get the destination pixel in |accu m|.
130 int accum[4] = {0};
131 for (int filterX = 0; filterX < filterLength; filterX++) {
132 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues [filterX];
133 accum[0] += curFilter * rowToFilter[filterX * 4 + 0];
134 accum[1] += curFilter * rowToFilter[filterX * 4 + 1];
135 accum[2] += curFilter * rowToFilter[filterX * 4 + 2];
136 if (hasAlpha) {
137 accum[3] += curFilter * rowToFilter[filterX * 4 + 3];
138 }
139 }
140
141 // Bring this value back in range. All of the filter scaling factors
142 // are in fixed point with kShiftBits bits of fractional part.
143 accum[0] >>= SkConvolutionFilter1D::kShiftBits;
144 accum[1] >>= SkConvolutionFilter1D::kShiftBits;
145 accum[2] >>= SkConvolutionFilter1D::kShiftBits;
146 if (hasAlpha) {
147 accum[3] >>= SkConvolutionFilter1D::kShiftBits;
148 }
149
150 // Store the new pixel.
151 outRow[outX * 4 + 0] = ClampTo8(accum[0]);
152 outRow[outX * 4 + 1] = ClampTo8(accum[1]);
153 outRow[outX * 4 + 2] = ClampTo8(accum[2]);
154 if (hasAlpha) {
155 outRow[outX * 4 + 3] = ClampTo8(accum[3]);
156 }
157 }
158 }
159
160 // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize ). We originally
161 // thought this was 32 bit only, but subsequent tests show that some 64 bit gcc compiles
162 // suffer here too.
163 //
164 // Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. http s://bug.skia.org/2575
165 #if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE)
166 #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), no inline))
167 #else
168 #define SK_MAYBE_DISABLE_VECTORIZATION
169 #endif
170
171 SK_MAYBE_DISABLE_VECTORIZATION
172 static void ConvolveHorizontallyAlpha(const unsigned char* srcData,
173 const SkConvolutionFilter1D& filter,
174 unsigned char* outRow) {
175 return ConvolveHorizontally<true>(srcData, filter, outRow);
176 }
177
178 SK_MAYBE_DISABLE_VECTORIZATION
179 static void ConvolveHorizontallyNoAlpha(const unsigned char* srcData,
180 const SkConvolutionFilter1D& filter,
181 unsigned char* outRow) {
182 return ConvolveHorizontally<false>(srcData, filter, outRow);
183 }
184
185 #undef SK_MAYBE_DISABLE_VECTORIZATION
186
187
188 // Does vertical convolution to produce one output row. The filter values and
189 // length are given in the first two parameters. These are applied to each
190 // of the rows pointed to in the |sourceDataRows| array, with each row
191 // being |pixelWidth| wide.
192 //
193 // The output must have room for |pixelWidth * 4| bytes.
194 template<bool hasAlpha>
195 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte rValues,
196 int filterLength,
197 unsigned char* const* sourceDataRows,
198 int pixelWidth,
199 unsigned char* outRow) {
200 // We go through each column in the output and do a vertical convolution ,
201 // generating one output pixel each time.
202 for (int outX = 0; outX < pixelWidth; outX++) {
203 // Compute the number of bytes over in each row that the current col umn
204 // we're convolving starts at. The pixel will cover the next 4 bytes .
205 int byteOffset = outX * 4;
206
207 // Apply the filter to one column of pixels.
208 int accum[4] = {0};
209 for (int filterY = 0; filterY < filterLength; filterY++) {
210 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues [filterY];
211 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0];
212 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1];
213 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2];
214 if (hasAlpha) {
215 accum[3] += curFilter * sourceDataRows[filterY][byteOffset + 3];
216 }
217 }
218
219 // Bring this value back in range. All of the filter scaling factors
220 // are in fixed point with kShiftBits bits of precision.
221 accum[0] >>= SkConvolutionFilter1D::kShiftBits;
222 accum[1] >>= SkConvolutionFilter1D::kShiftBits;
223 accum[2] >>= SkConvolutionFilter1D::kShiftBits;
224 if (hasAlpha) {
225 accum[3] >>= SkConvolutionFilter1D::kShiftBits;
226 }
227
228 // Store the new pixel.
229 outRow[byteOffset + 0] = ClampTo8(accum[0]);
230 outRow[byteOffset + 1] = ClampTo8(accum[1]);
231 outRow[byteOffset + 2] = ClampTo8(accum[2]);
232 if (hasAlpha) {
233 unsigned char alpha = ClampTo8(accum[3]);
234
235 // Make sure the alpha channel doesn't come out smaller than any of the
236 // color channels. We use premultipled alpha channels, so this s hould
237 // never happen, but rounding errors will cause this from time t o time.
238 // These "impossible" colors will cause overflows (and hence ran dom pixel
239 // values) when the resulting bitmap is drawn to the screen.
240 //
241 // We only need to do this when generating the final output row (here).
242 int maxColorChannel = SkTMax(outRow[byteOffset + 0],
243 SkTMax(outRow[byteOffset + 1],
244 outRow[byteOffset + 2]));
245 if (alpha < maxColorChannel) {
246 outRow[byteOffset + 3] = maxColorChannel;
247 } else {
248 outRow[byteOffset + 3] = alpha;
249 }
250 } else {
251 // No alpha channel, the image is opaque.
252 outRow[byteOffset + 3] = 0xff;
253 }
254 }
255 }
256
257 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte rValues,
258 int filterLength,
259 unsigned char* const* sourceDataRows,
260 int pixelWidth,
261 unsigned char* outRow,
262 bool sourceHasAlpha) {
263 if (sourceHasAlpha) {
264 ConvolveVertically<true>(filterValues, filterLength,
265 sourceDataRows, pixelWidth,
266 outRow);
267 } else {
268 ConvolveVertically<false>(filterValues, filterLength,
269 sourceDataRows, pixelWidth,
270 outRow);
271 }
272 }
273
274 } // namespace 99 } // namespace
275 100
276 // SkConvolutionFilter1D ------------------------------------------------------- -- 101 // SkConvolutionFilter1D ------------------------------------------------------- --
277 102
278 SkConvolutionFilter1D::SkConvolutionFilter1D() 103 SkConvolutionFilter1D::SkConvolutionFilter1D()
279 : fMaxFilter(0) { 104 : fMaxFilter(0) {
280 } 105 }
281 106
282 SkConvolutionFilter1D::~SkConvolutionFilter1D() { 107 SkConvolutionFilter1D::~SkConvolutionFilter1D() {
283 } 108 }
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
339 164
340 return &fFilterValues[filter.fDataLocation]; 165 return &fFilterValues[filter.fDataLocation];
341 } 166 }
342 167
343 bool BGRAConvolve2D(const unsigned char* sourceData, 168 bool BGRAConvolve2D(const unsigned char* sourceData,
344 int sourceByteRowStride, 169 int sourceByteRowStride,
345 bool sourceHasAlpha, 170 bool sourceHasAlpha,
346 const SkConvolutionFilter1D& filterX, 171 const SkConvolutionFilter1D& filterX,
347 const SkConvolutionFilter1D& filterY, 172 const SkConvolutionFilter1D& filterY,
348 int outputByteRowStride, 173 int outputByteRowStride,
349 unsigned char* output, 174 unsigned char* output) {
350 const SkConvolutionProcs& convolveProcs,
351 bool useSimdIfPossible) {
352 175
353 int maxYFilterSize = filterY.maxFilter(); 176 int maxYFilterSize = filterY.maxFilter();
354 177
355 // The next row in the input that we will generate a horizontally 178 // The next row in the input that we will generate a horizontally
356 // convolved row for. If the filter doesn't start at the beginning of the 179 // convolved row for. If the filter doesn't start at the beginning of the
357 // image (this is the case when we are only resizing a subset), then we 180 // image (this is the case when we are only resizing a subset), then we
358 // don't want to generate any output rows before that. Compute the starting 181 // don't want to generate any output rows before that. Compute the starting
359 // row for convolution as the first pixel for the first vertical filter. 182 // row for convolution as the first pixel for the first vertical filter.
360 int filterOffset, filterLength; 183 int filterOffset, filterLength;
361 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = 184 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =
362 filterY.FilterForValue(0, &filterOffset, &filterLength); 185 filterY.FilterForValue(0, &filterOffset, &filterLength);
363 int nextXRow = filterOffset; 186 int nextXRow = filterOffset;
364 187
365 // We loop over each row in the input doing a horizontal convolution. This 188 // We loop over each row in the input doing a horizontal convolution. This
366 // will result in a horizontally convolved image. We write the results into 189 // will result in a horizontally convolved image. We write the results into
367 // a circular buffer of convolved rows and do vertical convolution as rows 190 // a circular buffer of convolved rows and do vertical convolution as rows
368 // are available. This prevents us from having to store the entire 191 // are available. This prevents us from having to store the entire
369 // intermediate image and helps cache coherency. 192 // intermediate image and helps cache coherency.
370 // We will need four extra rows to allow horizontal convolution could be don e 193 // We will need four extra rows to allow horizontal convolution could be don e
371 // simultaneously. We also pad each row in row buffer to be aligned-up to 194 // simultaneously. We also pad each row in row buffer to be aligned-up to
372 // 16 bytes. 195 // 16 bytes.
373 // TODO(jiesun): We do not use aligned load from row buffer in vertical 196 // TODO(jiesun): We do not use aligned load from row buffer in vertical
374 // convolution pass yet. Somehow Windows does not like it. 197 // convolution pass yet. Somehow Windows does not like it.
375 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; 198 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF;
376 int rowBufferHeight = maxYFilterSize + 199 int rowBufferHeight = maxYFilterSize +
377 (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); 200 (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 : 0);
378 201
379 // check for too-big allocation requests : crbug.com/528628 202 // check for too-big allocation requests : crbug.com/528628
380 { 203 {
381 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); 204 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight);
382 // need some limit, to avoid over-committing success from malloc, but th en 205 // need some limit, to avoid over-committing success from malloc, but th en
383 // crashing when we try to actually use the memory. 206 // crashing when we try to actually use the memory.
384 // 100meg seems big enough to allow "normal" zoom factors and image size s through 207 // 100meg seems big enough to allow "normal" zoom factors and image size s through
385 // while avoiding the crash seen by the bug (crbug.com/528628) 208 // while avoiding the crash seen by the bug (crbug.com/528628)
386 if (size > 100 * 1024 * 1024) { 209 if (size > 100 * 1024 * 1024) {
387 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); 210 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size);
(...skipping 15 matching lines...) Expand all
403 int lastFilterOffset, lastFilterLength; 226 int lastFilterOffset, lastFilterLength;
404 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, 227 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,
405 &lastFilterLength); 228 &lastFilterLength);
406 229
407 for (int outY = 0; outY < numOutputRows; outY++) { 230 for (int outY = 0; outY < numOutputRows; outY++) {
408 filterValues = filterY.FilterForValue(outY, 231 filterValues = filterY.FilterForValue(outY,
409 &filterOffset, &filterLength); 232 &filterOffset, &filterLength);
410 233
411 // Generate output rows until we have enough to run the current filter. 234 // Generate output rows until we have enough to run the current filter.
412 while (nextXRow < filterOffset + filterLength) { 235 while (nextXRow < filterOffset + filterLength) {
413 if (convolveProcs.fConvolve4RowsHorizontally && 236 if (SkOpts::convolve_4_rows_horizontally != nullptr &&
414 nextXRow + 3 < lastFilterOffset + lastFilterLength) { 237 nextXRow + 3 < lastFilterOffset + lastFilterLength) {
415 const unsigned char* src[4]; 238 const unsigned char* src[4];
416 unsigned char* outRow[4]; 239 unsigned char* outRow[4];
417 for (int i = 0; i < 4; ++i) { 240 for (int i = 0; i < 4; ++i) {
418 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo wStride]; 241 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo wStride];
419 outRow[i] = rowBuffer.advanceRow(); 242 outRow[i] = rowBuffer.advanceRow();
420 } 243 }
421 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4 *rowBufferWidth); 244 SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*row BufferWidth);
422 nextXRow += 4; 245 nextXRow += 4;
423 } else { 246 } else {
424 if (convolveProcs.fConvolveHorizontally) { 247 SkOpts::convolve_horizontally(
425 convolveProcs.fConvolveHorizontally(
426 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], 248 &sourceData[(uint64_t)nextXRow * sourceByteRowStride],
427 filterX, rowBuffer.advanceRow(), sourceHasAlpha); 249 filterX, rowBuffer.advanceRow(), sourceHasAlpha);
428 } else {
429 if (sourceHasAlpha) {
430 ConvolveHorizontallyAlpha(
431 &sourceData[(uint64_t)nextXRow * sourceByteRowStride ],
432 filterX, rowBuffer.advanceRow());
433 } else {
434 ConvolveHorizontallyNoAlpha(
435 &sourceData[(uint64_t)nextXRow * sourceByteRowStride ],
436 filterX, rowBuffer.advanceRow());
437 }
438 }
439 nextXRow++; 250 nextXRow++;
440 } 251 }
441 } 252 }
442 253
443 // Compute where in the output image this row of final data will go. 254 // Compute where in the output image this row of final data will go.
444 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri de]; 255 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri de];
445 256
446 // Get the list of rows that the circular buffer has, in order. 257 // Get the list of rows that the circular buffer has, in order.
447 int firstRowInCircularBuffer; 258 int firstRowInCircularBuffer;
448 unsigned char* const* rowsToConvolve = 259 unsigned char* const* rowsToConvolve =
449 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); 260 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);
450 261
451 // Now compute the start of the subset of those rows that the filter 262 // Now compute the start of the subset of those rows that the filter nee ds.
452 // needs.
453 unsigned char* const* firstRowForFilter = 263 unsigned char* const* firstRowForFilter =
454 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; 264 &rowsToConvolve[filterOffset - firstRowInCircularBuffer];
455 265
456 if (convolveProcs.fConvolveVertically) { 266 SkOpts::convolve_vertically(filterValues, filterLength,
457 convolveProcs.fConvolveVertically(filterValues, filterLength, 267 firstRowForFilter,
458 firstRowForFilter, 268 filterX.numValues(), curOutputRow,
459 filterX.numValues(), curOutputRow , 269 sourceHasAlpha);
460 sourceHasAlpha);
461 } else {
462 ConvolveVertically(filterValues, filterLength,
463 firstRowForFilter,
464 filterX.numValues(), curOutputRow,
465 sourceHasAlpha);
466 }
467 } 270 }
468 return true; 271 return true;
469 } 272 }
OLDNEW
« no previous file with comments | « src/core/SkConvolver.h ('k') | src/core/SkOpts.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698