| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "SkConvolver.h" | 5 #include "SkConvolver.h" |
| 6 #include "SkOpts.h" |
| 6 #include "SkTArray.h" | 7 #include "SkTArray.h" |
| 7 | 8 |
| 8 namespace { | 9 namespace { |
| 9 | |
| 10 // Converts the argument to an 8-bit unsigned value by clamping to the range | |
| 11 // 0-255. | |
| 12 inline unsigned char ClampTo8(int a) { | |
| 13 if (static_cast<unsigned>(a) < 256) { | |
| 14 return a; // Avoid the extra check in the common case. | |
| 15 } | |
| 16 if (a < 0) { | |
| 17 return 0; | |
| 18 } | |
| 19 return 255; | |
| 20 } | |
| 21 | |
| 22 // Stores a list of rows in a circular buffer. The usage is you write into i
t | 10 // Stores a list of rows in a circular buffer. The usage is you write into i
t |
| 23 // by calling AdvanceRow. It will keep track of which row in the buffer it | 11 // by calling AdvanceRow. It will keep track of which row in the buffer it |
| 24 // should use next, and the total number of rows added. | 12 // should use next, and the total number of rows added. |
| 25 class CircularRowBuffer { | 13 class CircularRowBuffer { |
| 26 public: | 14 public: |
| 27 // The number of pixels in each row is given in |sourceRowPixelWidth|. | 15 // The number of pixels in each row is given in |sourceRowPixelWidth|. |
| 28 // The maximum number of rows needed in the buffer is |maxYFilterSize| | 16 // The maximum number of rows needed in the buffer is |maxYFilterSize| |
| 29 // (we only need to store enough rows for the biggest filter). | 17 // (we only need to store enough rows for the biggest filter). |
| 30 // | 18 // |
| 31 // We use the |firstInputRow| to compute the coordinates of all of the | 19 // We use the |firstInputRow| to compute the coordinates of all of the |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 101 int fNextRow; | 89 int fNextRow; |
| 102 | 90 |
| 103 // The y coordinate of the |fNextRow|. This is incremented each time a | 91 // The y coordinate of the |fNextRow|. This is incremented each time a |
| 104 // new row is appended and does not wrap. | 92 // new row is appended and does not wrap. |
| 105 int fNextRowCoordinate; | 93 int fNextRowCoordinate; |
| 106 | 94 |
| 107 // Buffer used by GetRowAddresses(). | 95 // Buffer used by GetRowAddresses(). |
| 108 SkTArray<unsigned char*> fRowAddresses; | 96 SkTArray<unsigned char*> fRowAddresses; |
| 109 }; | 97 }; |
| 110 | 98 |
| 111 // Convolves horizontally along a single row. The row data is given in | |
| 112 // |srcData| and continues for the numValues() of the filter. | |
| 113 template<bool hasAlpha> | |
| 114 void ConvolveHorizontally(const unsigned char* srcData, | |
| 115 const SkConvolutionFilter1D& filter, | |
| 116 unsigned char* outRow) { | |
| 117 // Loop over each pixel on this row in the output image. | |
| 118 int numValues = filter.numValues(); | |
| 119 for (int outX = 0; outX < numValues; outX++) { | |
| 120 // Get the filter that determines the current output pixel. | |
| 121 int filterOffset, filterLength; | |
| 122 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | |
| 123 filter.FilterForValue(outX, &filterOffset, &filterLength); | |
| 124 | |
| 125 // Compute the first pixel in this row that the filter affects. It w
ill | |
| 126 // touch |filterLength| pixels (4 bytes each) after this. | |
| 127 const unsigned char* rowToFilter = &srcData[filterOffset * 4]; | |
| 128 | |
| 129 // Apply the filter to the row to get the destination pixel in |accu
m|. | |
| 130 int accum[4] = {0}; | |
| 131 for (int filterX = 0; filterX < filterLength; filterX++) { | |
| 132 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues
[filterX]; | |
| 133 accum[0] += curFilter * rowToFilter[filterX * 4 + 0]; | |
| 134 accum[1] += curFilter * rowToFilter[filterX * 4 + 1]; | |
| 135 accum[2] += curFilter * rowToFilter[filterX * 4 + 2]; | |
| 136 if (hasAlpha) { | |
| 137 accum[3] += curFilter * rowToFilter[filterX * 4 + 3]; | |
| 138 } | |
| 139 } | |
| 140 | |
| 141 // Bring this value back in range. All of the filter scaling factors | |
| 142 // are in fixed point with kShiftBits bits of fractional part. | |
| 143 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
| 144 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
| 145 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
| 146 if (hasAlpha) { | |
| 147 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
| 148 } | |
| 149 | |
| 150 // Store the new pixel. | |
| 151 outRow[outX * 4 + 0] = ClampTo8(accum[0]); | |
| 152 outRow[outX * 4 + 1] = ClampTo8(accum[1]); | |
| 153 outRow[outX * 4 + 2] = ClampTo8(accum[2]); | |
| 154 if (hasAlpha) { | |
| 155 outRow[outX * 4 + 3] = ClampTo8(accum[3]); | |
| 156 } | |
| 157 } | |
| 158 } | |
| 159 | |
| 160 // There's a bug somewhere here with GCC autovectorization (-ftree-vectorize
). We originally | |
| 161 // thought this was 32 bit only, but subsequent tests show that some 64 bit
gcc compiles | |
| 162 // suffer here too. | |
| 163 // | |
| 164 // Dropping to -O2 disables -ftree-vectorize. GCC 4.6 needs noinline. http
s://bug.skia.org/2575 | |
| 165 #if SK_HAS_ATTRIBUTE(optimize) && defined(SK_RELEASE) | |
| 166 #define SK_MAYBE_DISABLE_VECTORIZATION __attribute__((optimize("O2"), no
inline)) | |
| 167 #else | |
| 168 #define SK_MAYBE_DISABLE_VECTORIZATION | |
| 169 #endif | |
| 170 | |
| 171 SK_MAYBE_DISABLE_VECTORIZATION | |
| 172 static void ConvolveHorizontallyAlpha(const unsigned char* srcData, | |
| 173 const SkConvolutionFilter1D& filter, | |
| 174 unsigned char* outRow) { | |
| 175 return ConvolveHorizontally<true>(srcData, filter, outRow); | |
| 176 } | |
| 177 | |
| 178 SK_MAYBE_DISABLE_VECTORIZATION | |
| 179 static void ConvolveHorizontallyNoAlpha(const unsigned char* srcData, | |
| 180 const SkConvolutionFilter1D& filter, | |
| 181 unsigned char* outRow) { | |
| 182 return ConvolveHorizontally<false>(srcData, filter, outRow); | |
| 183 } | |
| 184 | |
| 185 #undef SK_MAYBE_DISABLE_VECTORIZATION | |
| 186 | |
| 187 | |
| 188 // Does vertical convolution to produce one output row. The filter values and | |
| 189 // length are given in the first two parameters. These are applied to each | |
| 190 // of the rows pointed to in the |sourceDataRows| array, with each row | |
| 191 // being |pixelWidth| wide. | |
| 192 // | |
| 193 // The output must have room for |pixelWidth * 4| bytes. | |
| 194 template<bool hasAlpha> | |
| 195 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte
rValues, | |
| 196 int filterLength, | |
| 197 unsigned char* const* sourceDataRows, | |
| 198 int pixelWidth, | |
| 199 unsigned char* outRow) { | |
| 200 // We go through each column in the output and do a vertical convolution
, | |
| 201 // generating one output pixel each time. | |
| 202 for (int outX = 0; outX < pixelWidth; outX++) { | |
| 203 // Compute the number of bytes over in each row that the current col
umn | |
| 204 // we're convolving starts at. The pixel will cover the next 4 bytes
. | |
| 205 int byteOffset = outX * 4; | |
| 206 | |
| 207 // Apply the filter to one column of pixels. | |
| 208 int accum[4] = {0}; | |
| 209 for (int filterY = 0; filterY < filterLength; filterY++) { | |
| 210 SkConvolutionFilter1D::ConvolutionFixed curFilter = filterValues
[filterY]; | |
| 211 accum[0] += curFilter * sourceDataRows[filterY][byteOffset + 0]; | |
| 212 accum[1] += curFilter * sourceDataRows[filterY][byteOffset + 1]; | |
| 213 accum[2] += curFilter * sourceDataRows[filterY][byteOffset + 2]; | |
| 214 if (hasAlpha) { | |
| 215 accum[3] += curFilter * sourceDataRows[filterY][byteOffset +
3]; | |
| 216 } | |
| 217 } | |
| 218 | |
| 219 // Bring this value back in range. All of the filter scaling factors | |
| 220 // are in fixed point with kShiftBits bits of precision. | |
| 221 accum[0] >>= SkConvolutionFilter1D::kShiftBits; | |
| 222 accum[1] >>= SkConvolutionFilter1D::kShiftBits; | |
| 223 accum[2] >>= SkConvolutionFilter1D::kShiftBits; | |
| 224 if (hasAlpha) { | |
| 225 accum[3] >>= SkConvolutionFilter1D::kShiftBits; | |
| 226 } | |
| 227 | |
| 228 // Store the new pixel. | |
| 229 outRow[byteOffset + 0] = ClampTo8(accum[0]); | |
| 230 outRow[byteOffset + 1] = ClampTo8(accum[1]); | |
| 231 outRow[byteOffset + 2] = ClampTo8(accum[2]); | |
| 232 if (hasAlpha) { | |
| 233 unsigned char alpha = ClampTo8(accum[3]); | |
| 234 | |
| 235 // Make sure the alpha channel doesn't come out smaller than any
of the | |
| 236 // color channels. We use premultipled alpha channels, so this s
hould | |
| 237 // never happen, but rounding errors will cause this from time t
o time. | |
| 238 // These "impossible" colors will cause overflows (and hence ran
dom pixel | |
| 239 // values) when the resulting bitmap is drawn to the screen. | |
| 240 // | |
| 241 // We only need to do this when generating the final output row
(here). | |
| 242 int maxColorChannel = SkTMax(outRow[byteOffset + 0], | |
| 243 SkTMax(outRow[byteOffset + 1], | |
| 244 outRow[byteOffset + 2])); | |
| 245 if (alpha < maxColorChannel) { | |
| 246 outRow[byteOffset + 3] = maxColorChannel; | |
| 247 } else { | |
| 248 outRow[byteOffset + 3] = alpha; | |
| 249 } | |
| 250 } else { | |
| 251 // No alpha channel, the image is opaque. | |
| 252 outRow[byteOffset + 3] = 0xff; | |
| 253 } | |
| 254 } | |
| 255 } | |
| 256 | |
| 257 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte
rValues, | |
| 258 int filterLength, | |
| 259 unsigned char* const* sourceDataRows, | |
| 260 int pixelWidth, | |
| 261 unsigned char* outRow, | |
| 262 bool sourceHasAlpha) { | |
| 263 if (sourceHasAlpha) { | |
| 264 ConvolveVertically<true>(filterValues, filterLength, | |
| 265 sourceDataRows, pixelWidth, | |
| 266 outRow); | |
| 267 } else { | |
| 268 ConvolveVertically<false>(filterValues, filterLength, | |
| 269 sourceDataRows, pixelWidth, | |
| 270 outRow); | |
| 271 } | |
| 272 } | |
| 273 | |
| 274 } // namespace | 99 } // namespace |
| 275 | 100 |
| 276 // SkConvolutionFilter1D -------------------------------------------------------
-- | 101 // SkConvolutionFilter1D -------------------------------------------------------
-- |
| 277 | 102 |
| 278 SkConvolutionFilter1D::SkConvolutionFilter1D() | 103 SkConvolutionFilter1D::SkConvolutionFilter1D() |
| 279 : fMaxFilter(0) { | 104 : fMaxFilter(0) { |
| 280 } | 105 } |
| 281 | 106 |
| 282 SkConvolutionFilter1D::~SkConvolutionFilter1D() { | 107 SkConvolutionFilter1D::~SkConvolutionFilter1D() { |
| 283 } | 108 } |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 339 | 164 |
| 340 return &fFilterValues[filter.fDataLocation]; | 165 return &fFilterValues[filter.fDataLocation]; |
| 341 } | 166 } |
| 342 | 167 |
| 343 bool BGRAConvolve2D(const unsigned char* sourceData, | 168 bool BGRAConvolve2D(const unsigned char* sourceData, |
| 344 int sourceByteRowStride, | 169 int sourceByteRowStride, |
| 345 bool sourceHasAlpha, | 170 bool sourceHasAlpha, |
| 346 const SkConvolutionFilter1D& filterX, | 171 const SkConvolutionFilter1D& filterX, |
| 347 const SkConvolutionFilter1D& filterY, | 172 const SkConvolutionFilter1D& filterY, |
| 348 int outputByteRowStride, | 173 int outputByteRowStride, |
| 349 unsigned char* output, | 174 unsigned char* output) { |
| 350 const SkConvolutionProcs& convolveProcs, | |
| 351 bool useSimdIfPossible) { | |
| 352 | 175 |
| 353 int maxYFilterSize = filterY.maxFilter(); | 176 int maxYFilterSize = filterY.maxFilter(); |
| 354 | 177 |
| 355 // The next row in the input that we will generate a horizontally | 178 // The next row in the input that we will generate a horizontally |
| 356 // convolved row for. If the filter doesn't start at the beginning of the | 179 // convolved row for. If the filter doesn't start at the beginning of the |
| 357 // image (this is the case when we are only resizing a subset), then we | 180 // image (this is the case when we are only resizing a subset), then we |
| 358 // don't want to generate any output rows before that. Compute the starting | 181 // don't want to generate any output rows before that. Compute the starting |
| 359 // row for convolution as the first pixel for the first vertical filter. | 182 // row for convolution as the first pixel for the first vertical filter. |
| 360 int filterOffset, filterLength; | 183 int filterOffset, filterLength; |
| 361 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | 184 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = |
| 362 filterY.FilterForValue(0, &filterOffset, &filterLength); | 185 filterY.FilterForValue(0, &filterOffset, &filterLength); |
| 363 int nextXRow = filterOffset; | 186 int nextXRow = filterOffset; |
| 364 | 187 |
| 365 // We loop over each row in the input doing a horizontal convolution. This | 188 // We loop over each row in the input doing a horizontal convolution. This |
| 366 // will result in a horizontally convolved image. We write the results into | 189 // will result in a horizontally convolved image. We write the results into |
| 367 // a circular buffer of convolved rows and do vertical convolution as rows | 190 // a circular buffer of convolved rows and do vertical convolution as rows |
| 368 // are available. This prevents us from having to store the entire | 191 // are available. This prevents us from having to store the entire |
| 369 // intermediate image and helps cache coherency. | 192 // intermediate image and helps cache coherency. |
| 370 // We will need four extra rows to allow horizontal convolution could be don
e | 193 // We will need four extra rows to allow horizontal convolution could be don
e |
| 371 // simultaneously. We also pad each row in row buffer to be aligned-up to | 194 // simultaneously. We also pad each row in row buffer to be aligned-up to |
| 372 // 16 bytes. | 195 // 16 bytes. |
| 373 // TODO(jiesun): We do not use aligned load from row buffer in vertical | 196 // TODO(jiesun): We do not use aligned load from row buffer in vertical |
| 374 // convolution pass yet. Somehow Windows does not like it. | 197 // convolution pass yet. Somehow Windows does not like it. |
| 375 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; | 198 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; |
| 376 int rowBufferHeight = maxYFilterSize + | 199 int rowBufferHeight = maxYFilterSize + |
| 377 (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); | 200 (SkOpts::convolve_4_rows_horizontally != nullptr ? 4 :
0); |
| 378 | 201 |
| 379 // check for too-big allocation requests : crbug.com/528628 | 202 // check for too-big allocation requests : crbug.com/528628 |
| 380 { | 203 { |
| 381 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); | 204 int64_t size = sk_64_mul(rowBufferWidth, rowBufferHeight); |
| 382 // need some limit, to avoid over-committing success from malloc, but th
en | 205 // need some limit, to avoid over-committing success from malloc, but th
en |
| 383 // crashing when we try to actually use the memory. | 206 // crashing when we try to actually use the memory. |
| 384 // 100meg seems big enough to allow "normal" zoom factors and image size
s through | 207 // 100meg seems big enough to allow "normal" zoom factors and image size
s through |
| 385 // while avoiding the crash seen by the bug (crbug.com/528628) | 208 // while avoiding the crash seen by the bug (crbug.com/528628) |
| 386 if (size > 100 * 1024 * 1024) { | 209 if (size > 100 * 1024 * 1024) { |
| 387 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); | 210 // SkDebugf("BGRAConvolve2D: tmp allocation [%lld] too big\n", size); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 403 int lastFilterOffset, lastFilterLength; | 226 int lastFilterOffset, lastFilterLength; |
| 404 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, | 227 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, |
| 405 &lastFilterLength); | 228 &lastFilterLength); |
| 406 | 229 |
| 407 for (int outY = 0; outY < numOutputRows; outY++) { | 230 for (int outY = 0; outY < numOutputRows; outY++) { |
| 408 filterValues = filterY.FilterForValue(outY, | 231 filterValues = filterY.FilterForValue(outY, |
| 409 &filterOffset, &filterLength); | 232 &filterOffset, &filterLength); |
| 410 | 233 |
| 411 // Generate output rows until we have enough to run the current filter. | 234 // Generate output rows until we have enough to run the current filter. |
| 412 while (nextXRow < filterOffset + filterLength) { | 235 while (nextXRow < filterOffset + filterLength) { |
| 413 if (convolveProcs.fConvolve4RowsHorizontally && | 236 if (SkOpts::convolve_4_rows_horizontally != nullptr && |
| 414 nextXRow + 3 < lastFilterOffset + lastFilterLength) { | 237 nextXRow + 3 < lastFilterOffset + lastFilterLength) { |
| 415 const unsigned char* src[4]; | 238 const unsigned char* src[4]; |
| 416 unsigned char* outRow[4]; | 239 unsigned char* outRow[4]; |
| 417 for (int i = 0; i < 4; ++i) { | 240 for (int i = 0; i < 4; ++i) { |
| 418 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; | 241 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; |
| 419 outRow[i] = rowBuffer.advanceRow(); | 242 outRow[i] = rowBuffer.advanceRow(); |
| 420 } | 243 } |
| 421 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4
*rowBufferWidth); | 244 SkOpts::convolve_4_rows_horizontally(src, filterX, outRow, 4*row
BufferWidth); |
| 422 nextXRow += 4; | 245 nextXRow += 4; |
| 423 } else { | 246 } else { |
| 424 if (convolveProcs.fConvolveHorizontally) { | 247 SkOpts::convolve_horizontally( |
| 425 convolveProcs.fConvolveHorizontally( | |
| 426 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], | 248 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], |
| 427 filterX, rowBuffer.advanceRow(), sourceHasAlpha); | 249 filterX, rowBuffer.advanceRow(), sourceHasAlpha); |
| 428 } else { | |
| 429 if (sourceHasAlpha) { | |
| 430 ConvolveHorizontallyAlpha( | |
| 431 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], | |
| 432 filterX, rowBuffer.advanceRow()); | |
| 433 } else { | |
| 434 ConvolveHorizontallyNoAlpha( | |
| 435 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], | |
| 436 filterX, rowBuffer.advanceRow()); | |
| 437 } | |
| 438 } | |
| 439 nextXRow++; | 250 nextXRow++; |
| 440 } | 251 } |
| 441 } | 252 } |
| 442 | 253 |
| 443 // Compute where in the output image this row of final data will go. | 254 // Compute where in the output image this row of final data will go. |
| 444 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri
de]; | 255 unsigned char* curOutputRow = &output[(uint64_t)outY * outputByteRowStri
de]; |
| 445 | 256 |
| 446 // Get the list of rows that the circular buffer has, in order. | 257 // Get the list of rows that the circular buffer has, in order. |
| 447 int firstRowInCircularBuffer; | 258 int firstRowInCircularBuffer; |
| 448 unsigned char* const* rowsToConvolve = | 259 unsigned char* const* rowsToConvolve = |
| 449 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); | 260 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); |
| 450 | 261 |
| 451 // Now compute the start of the subset of those rows that the filter | 262 // Now compute the start of the subset of those rows that the filter nee
ds. |
| 452 // needs. | |
| 453 unsigned char* const* firstRowForFilter = | 263 unsigned char* const* firstRowForFilter = |
| 454 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; | 264 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; |
| 455 | 265 |
| 456 if (convolveProcs.fConvolveVertically) { | 266 SkOpts::convolve_vertically(filterValues, filterLength, |
| 457 convolveProcs.fConvolveVertically(filterValues, filterLength, | 267 firstRowForFilter, |
| 458 firstRowForFilter, | 268 filterX.numValues(), curOutputRow, |
| 459 filterX.numValues(), curOutputRow
, | 269 sourceHasAlpha); |
| 460 sourceHasAlpha); | |
| 461 } else { | |
| 462 ConvolveVertically(filterValues, filterLength, | |
| 463 firstRowForFilter, | |
| 464 filterX.numValues(), curOutputRow, | |
| 465 sourceHasAlpha); | |
| 466 } | |
| 467 } | 270 } |
| 468 return true; | 271 return true; |
| 469 } | 272 } |
| OLD | NEW |