OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "SkConvolver.h" | 5 #include "SkConvolver.h" |
| 6 #include "SkOpts.h" |
6 #include "SkTArray.h" | 7 #include "SkTArray.h" |
7 | 8 |
8 namespace { | 9 namespace { |
9 | |
10 // Converts the argument to an 8-bit unsigned value by clamping to the range | |
11 // 0-255. | |
12 inline unsigned char ClampTo8(int a) { | |
13 if (static_cast<unsigned>(a) < 256) { | |
14 return a; // Avoid the extra check in the common case. | |
15 } | |
16 if (a < 0) { | |
17 return 0; | |
18 } | |
19 return 255; | |
20 } | |
21 | |
22 // Stores a list of rows in a circular buffer. The usage is you write into i
t | 10 // Stores a list of rows in a circular buffer. The usage is you write into i
t |
23 // by calling AdvanceRow. It will keep track of which row in the buffer it | 11 // by calling AdvanceRow. It will keep track of which row in the buffer it |
24 // should use next, and the total number of rows added. | 12 // should use next, and the total number of rows added. |
25 class CircularRowBuffer { | 13 class CircularRowBuffer { |
26 public: | 14 public: |
27 // The number of pixels in each row is given in |sourceRowPixelWidth|. | 15 // The number of pixels in each row is given in |sourceRowPixelWidth|. |
28 // The maximum number of rows needed in the buffer is |maxYFilterSize| | 16 // The maximum number of rows needed in the buffer is |maxYFilterSize| |
29 // (we only need to store enough rows for the biggest filter). | 17 // (we only need to store enough rows for the biggest filter). |
30 // | 18 // |
31 // We use the |firstInputRow| to compute the coordinates of all of the | 19 // We use the |firstInputRow| to compute the coordinates of all of the |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
101 int fNextRow; | 89 int fNextRow; |
102 | 90 |
103 // The y coordinate of the |fNextRow|. This is incremented each time a | 91 // The y coordinate of the |fNextRow|. This is incremented each time a |
104 // new row is appended and does not wrap. | 92 // new row is appended and does not wrap. |
105 int fNextRowCoordinate; | 93 int fNextRowCoordinate; |
106 | 94 |
107 // Buffer used by GetRowAddresses(). | 95 // Buffer used by GetRowAddresses(). |
108 SkTArray<unsigned char*> fRowAddresses; | 96 SkTArray<unsigned char*> fRowAddresses; |
109 }; | 97 }; |
110 | 98 |
111 // Convolves horizontally along a single row. The row data is given in | |
112 // |srcData| and continues for the numValues() of the filter. | |
113 template<bool hasAlpha> | |
114 void ConvolveHorizontally(const unsigned char* srcData, | |
115 const SkConvolutionFilter1D& filter, | |
116 unsigned char* outRow) { | |
117 // Loop over each pixel on this row in the output image. | |
118 int numValues = filter.numValues(); | |
119 for (int outX = 0; outX < numValues; outX++) { | |
120 // Get the filter that determines the current output pixel. | |
121 int filterOffset, filterLength; | |
122 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | |
123 filter.FilterForValue(outX, &filterOffset, &filterLength); | |
124 | |
125 // Compute the first pixel in this row that the filter affects. It w
ill | |
126 // touch |filterLength| pixels (4 bytes each) after this. | |
127 const unsigned char* rowToFilter = &srcData[filterOffset * 4]; | |
128 | |
129 // Apply the filter to the row to get the destination pixel in |accu
m|. | |
130 int accum[4] = {0}; | |
131 for (int filterX = 0; filterX < filterLength; filterX++) { | |
132 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues
[filterX]; | |
133 accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; | |
134 accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; | |
135 accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; | |
136 if (hasAlpha) { | |
137 accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; | |
138 } | |
139 } | |
140 | |
141 // Bring this value back in range. All of the filter scaling factors | |
142 // are in fixed point with kShiftBits bits of fractional part. | |
143 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
144 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
145 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
146 if (hasAlpha) { | |
147 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
148 } | |
149 | |
150 // Store the new pixel. | |
151 outRow[outX * 4 + 0] = ClampTo8(accum[0]); | |
152 outRow[outX * 4 + 1] = ClampTo8(accum[1]); | |
153 outRow[outX * 4 + 2] = ClampTo8(accum[2]); | |
154 if (hasAlpha) { | |
155 outRow[outX * 4 + 3] = ClampTo8(accum[3]); | |
156 } | |
157 } | |
158 } | |
159 | |
160 // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize
). We originally | |
161 // thought this was 32 bit only, but subsequent tests show that some 64 bit
gcc compiles | |
162 // suffer here too. | |
163 // | |
164 // Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. http
s://bug.skia.org/2575 | |
165 #if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE) | |
166 #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), no
inline)) | |
167 #else | |
168 #define SK_MAYBE_DISABLE_VECTORIZATION | |
169 #endif | |
170 | |
171 SK_MAYBE_DISABLE_VECTORIZATION | |
172 static void ConvolveHorizontallyAlpha(const unsigned char* srcData, | |
173 const SkConvolutionFilter1D& filter, | |
174 unsigned char* outRow) { | |
175 return ConvolveHorizontally<true>(srcData, filter, outRow); | |
176 } | |
177 | |
178 SK_MAYBE_DISABLE_VECTORIZATION | |
179 static void ConvolveHorizontallyNoAlpha(const unsigned char* srcData, | |
180 const SkConvolutionFilter1D& filter, | |
181 unsigned char* outRow) { | |
182 return ConvolveHorizontally<false>(srcData, filter, outRow); | |
183 } | |
184 | |
185 #undef SK_MAYBE_DISABLE_VECTORIZATION | |
186 | |
187 | |
188 // Does vertical convolution to produce one output row. The filter values and | |
189 // length are given in the first two parameters. These are applied to each | |
190 // of the rows pointed to in the |sourceDataRows| array, with each row | |
191 // being |pixelWidth| wide. | |
192 // | |
193 // The output must have room for |pixelWidth * 4| bytes. | |
194 template<bool hasAlpha> | |
195 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte
rValues, | |
196 int filterLength, | |
197 unsigned char* const* sourceDataRows, | |
198 int pixelWidth, | |
199 unsigned char* outRow) { | |
200 // We go through each column in the output and do a vertical convolution
, | |
201 // generating one output pixel each time. | |
202 for (int outX = 0; outX < pixelWidth; outX++) { | |
203 // Compute the number of bytes over in each row that the current col
umn | |
204 // we're convolving starts at. The pixel will cover the next 4 bytes
. | |
205 int byteOffset = outX * 4; | |
206 | |
207 // Apply the filter to one column of pixels. | |
208 int accum[4] = {0}; | |
209 for (int filterY = 0; filterY < filterLength; filterY++) { | |
210 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues
[filterY]; | |
211 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; | |
212 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; | |
213 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; | |
214 if (hasAlpha) { | |
215 accum[3] += curFilter * sourceDataRows[filterY][byteOffset +
3]; | |
216 } | |
217 } | |
218 | |
219 // Bring this value back in range. All of the filter scaling factors | |
220 // are in fixed point with kShiftBits bits of precision. | |
221 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
222 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
223 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
224 if (hasAlpha) { | |
225 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
226 } | |
227 | |
228 // Store the new pixel. | |
229 outRow[byteOffset + 0] = ClampTo8(accum[0]); | |
230 outRow[byteOffset + 1] = ClampTo8(accum[1]); | |
231 outRow[byteOffset + 2] = ClampTo8(accum[2]); | |
232 if (hasAlpha) { | |
233 unsigned char alpha = ClampTo8(accum[3]); | |
234 | |
235 // Make sure the alpha channel doesn't come out smaller than any
of the | |
236 // color channels. We use premultipled alpha channels, so this s
hould | |
237 // never happen, but rounding errors will cause this from time t
o time. | |
238 // These "impossible" colors will cause overflows (and hence ran
dom pixel | |
239 // values) when the resulting bitmap is drawn to the screen. | |
240 // | |
241 // We only need to do this when generating the final output row
(here). | |
242 int maxColorChannel = SkTMax(outRow[byteOffset + 0], | |
243 SkTMax(outRow[byteOffset + 1], | |
244 outRow[byteOffset + 2])); | |
245 if (alpha < maxColorChannel) { | |
246 outRow[byteOffset + 3] = maxColorChannel; | |
247 } else { | |
248 outRow[byteOffset + 3] = alpha; | |
249 } | |
250 } else { | |
251 // No alpha channel, the image is opaque. | |
252 outRow[byteOffset + 3] = 0xff; | |
253 } | |
254 } | |
255 } | |
256 | |
257 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte
rValues, | |
258 int filterLength, | |
259 unsigned char* const* sourceDataRows, | |
260 int pixelWidth, | |
261 unsigned char* outRow, | |
262 bool sourceHasAlpha) { | |
263 if (sourceHasAlpha) { | |
264 ConvolveVertically<true>(filterValues, filterLength, | |
265 sourceDataRows, pixelWidth, | |
266 outRow); | |
267 } else { | |
268 ConvolveVertically<false>(filterValues, filterLength, | |
269 sourceDataRows, pixelWidth, | |
270 outRow); | |
271 } | |
272 } | |
273 | |
274 } // namespace | 99 } // namespace |
275 | 100 |
276 // SkConvolutionFilter1D -------------------------------------------------------
-- | 101 // SkConvolutionFilter1D -------------------------------------------------------
-- |
277 | 102 |
278 SkConvolutionFilter1D::SkConvolutionFilter1D() | 103 SkConvolutionFilter1D::SkConvolutionFilter1D() |
279 : fMaxFilter(0) { | 104 : fMaxFilter(0) { |
280 } | 105 } |
281 | 106 |
282 SkConvolutionFilter1D::~SkConvolutionFilter1D() { | 107 SkConvolutionFilter1D::~SkConvolutionFilter1D() { |
283 } | 108 } |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
339 | 164 |
340 return &fFilterValues[filter.fDataLocation]; | 165 return &fFilterValues[filter.fDataLocation]; |
341 } | 166 } |
342 | 167 |
343 bool BGRAConvolve2D(const unsigned char* sourceData, | 168 bool BGRAConvolve2D(const unsigned char* sourceData, |
344 int sourceByteRowStride, | 169 int sourceByteRowStride, |
345 bool sourceHasAlpha, | 170 bool sourceHasAlpha, |
346 const SkConvolutionFilter1D& filterX, | 171 const SkConvolutionFilter1D& filterX, |
347 const SkConvolutionFilter1D& filterY, | 172 const SkConvolutionFilter1D& filterY, |
348 int outputByteRowStride, | 173 int outputByteRowStride, |
349 unsigned char* output, | 174 unsigned char* output) { |
350 const SkConvolutionProcs& convolveProcs, | |
351 bool useSimdIfPossible) { | |
352 | 175 |
353 int maxYFilterSize = filterY.maxFilter(); | 176 int maxYFilterSize = filterY.maxFilter(); |
354 | 177 |
355 // The next row in the input that we will generate a horizontally | 178 // The next row in the input that we will generate a horizontally |
356 // convolved row for. If the filter doesn't start at the beginning of the | 179 // convolved row for. If the filter doesn't start at the beginning of the |
357 // image (this is the case when we are only resizing a subset), then we | 180 // image (this is the case when we are only resizing a subset), then we |
358 // don't want to generate any output rows before that. Compute the starting | 181 // don't want to generate any output rows before that. Compute the starting |
359 // row for convolution as the first pixel for the first vertical filter. | 182 // row for convolution as the first pixel for the first vertical filter. |
360 int filterOffset, filterLength; | 183 int filterOffset, filterLength; |
361 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | 184 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = |
362 filterY.FilterForValue(0, &filterOffset, &filterLength); | 185 filterY.FilterForValue(0, &filterOffset, &filterLength); |
363 int nextXRow = filterOffset; | 186 int nextXRow = filterOffset; |
364 | 187 |
365 // We loop over each row in the input doing a horizontal convolution. This | 188 // We loop over each row in the input doing a horizontal convolution. This |
366 // will result in a horizontally convolved image. We write the results into | 189 // will result in a horizontally convolved image. We write the results into |
367 // a circular buffer of convolved rows and do vertical convolution as rows | 190 // a circular buffer of convolved rows and do vertical convolution as rows |
368 // are available. This prevents us from having to store the entire | 191 // are available. This prevents us from having to store the entire |
369 // intermediate image and helps cache coherency. | 192 // intermediate image and helps cache coherency. |
370 // We will need four extra rows to allow horizontal convolution could be don
e | 193 // We will need four extra rows to allow horizontal convolution could be don
e |
371 // simultaneously. We also pad each row in row buffer to be aligned-up to | 194 // simultaneously. We also pad each row in row buffer to be aligned-up to |
372 // 16 bytes. | 195 // 16 bytes. |
373 // TODO(jiesun): We do not use aligned load from row buffer in vertical | 196 // TODO(jiesun): We do not use aligned load from row buffer in vertical |
374 // convolution pass yet. Somehow Windows does not like it. | 197 // convolution pass yet. Somehow Windows does not like it. |
375 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; | 198 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; |
376 int rowBufferHeight = maxYFilterSize + | 199 int rowBufferHeight = maxYFilterSize + |
377 (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); | 200 (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 :
0); |
378 | 201 |
379 // check for too-big allocation requests : crbug.com/528628 | 202 // check for too-big allocation requests : crbug.com/528628 |
380 { | 203 { |
381 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); | 204 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); |
382 // need some limit, to avoid over-committing success from malloc, but th
en | 205 // need some limit, to avoid over-committing success from malloc, but th
en |
383 // crashing when we try to actually use the memory. | 206 // crashing when we try to actually use the memory. |
384 // 100meg seems big enough to allow "normal" zoom factors and image size
s through | 207 // 100meg seems big enough to allow "normal" zoom factors and image size
s through |
385 // while avoiding the crash seen by the bug (crbug.com/528628) | 208 // while avoiding the crash seen by the bug (crbug.com/528628) |
386 if (size > 100 * 1024 * 1024) { | 209 if (size > 100 * 1024 * 1024) { |
387 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); | 210 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); |
(...skipping 15 matching lines...) Expand all Loading... |
403 int lastFilterOffset, lastFilterLength; | 226 int lastFilterOffset, lastFilterLength; |
404 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, | 227 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, |
405 &lastFilterLength); | 228 &lastFilterLength); |
406 | 229 |
407 for (int outY = 0; outY < numOutputRows; outY++) { | 230 for (int outY = 0; outY < numOutputRows; outY++) { |
408 filterValues = filterY.FilterForValue(outY, | 231 filterValues = filterY.FilterForValue(outY, |
409 &filterOffset, &filterLength); | 232 &filterOffset, &filterLength); |
410 | 233 |
411 // Generate output rows until we have enough to run the current filter. | 234 // Generate output rows until we have enough to run the current filter. |
412 while (nextXRow < filterOffset + filterLength) { | 235 while (nextXRow < filterOffset + filterLength) { |
413 if (convolveProcs.fConvolve4RowsHorizontally && | 236 if (SkOpts::convolve_4_rows_horizontally != nullptr && |
414 nextXRow + 3 < lastFilterOffset + lastFilterLength) { | 237 nextXRow + 3 < lastFilterOffset + lastFilterLength) { |
415 const unsigned char* src[4]; | 238 const unsigned char* src[4]; |
416 unsigned char* outRow[4]; | 239 unsigned char* outRow[4]; |
417 for (int i = 0; i < 4; ++i) { | 240 for (int i = 0; i < 4; ++i) { |
418 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; | 241 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; |
419 outRow[i] = rowBuffer.advanceRow(); | 242 outRow[i] = rowBuffer.advanceRow(); |
420 } | 243 } |
421 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4
*rowBufferWidth); | 244 SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*row
BufferWidth); |
422 nextXRow += 4; | 245 nextXRow += 4; |
423 } else { | 246 } else { |
424 if (convolveProcs.fConvolveHorizontally) { | 247 SkOpts::convolve_horizontally( |
425 convolveProcs.fConvolveHorizontally( | |
426 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], | 248 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], |
427 filterX, rowBuffer.advanceRow(), sourceHasAlpha); | 249 filterX, rowBuffer.advanceRow(), sourceHasAlpha); |
428 } else { | |
429 if (sourceHasAlpha) { | |
430 ConvolveHorizontallyAlpha( | |
431 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], | |
432 filterX, rowBuffer.advanceRow()); | |
433 } else { | |
434 ConvolveHorizontallyNoAlpha( | |
435 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], | |
436 filterX, rowBuffer.advanceRow()); | |
437 } | |
438 } | |
439 nextXRow++; | 250 nextXRow++; |
440 } | 251 } |
441 } | 252 } |
442 | 253 |
443 // Compute where in the output image this row of final data will go. | 254 // Compute where in the output image this row of final data will go. |
444 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri
de]; | 255 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri
de]; |
445 | 256 |
446 // Get the list of rows that the circular buffer has, in order. | 257 // Get the list of rows that the circular buffer has, in order. |
447 int firstRowInCircularBuffer; | 258 int firstRowInCircularBuffer; |
448 unsigned char* const* rowsToConvolve = | 259 unsigned char* const* rowsToConvolve = |
449 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); | 260 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); |
450 | 261 |
451 // Now compute the start of the subset of those rows that the filter | 262 // Now compute the start of the subset of those rows that the filter nee
ds. |
452 // needs. | |
453 unsigned char* const* firstRowForFilter = | 263 unsigned char* const* firstRowForFilter = |
454 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; | 264 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; |
455 | 265 |
456 if (convolveProcs.fConvolveVertically) { | 266 SkOpts::convolve_vertically(filterValues, filterLength, |
457 convolveProcs.fConvolveVertically(filterValues, filterLength, | 267 firstRowForFilter, |
458 firstRowForFilter, | 268 filterX.numValues(), curOutputRow, |
459 filterX.numValues(), curOutputRow
, | 269 sourceHasAlpha); |
460 sourceHasAlpha); | |
461 } else { | |
462 ConvolveVertically(filterValues, filterLength, | |
463 firstRowForFilter, | |
464 filterX.numValues(), curOutputRow, | |
465 sourceHasAlpha); | |
466 } | |
467 } | 270 } |
468 return true; | 271 return true; |
469 } | 272 } |
OLD | NEW |