| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "SkConvolver.h" | 5 #include "SkConvolver.h" |
| 6 #include "SkSize.h" | 6 #include "SkSize.h" |
| 7 #include "SkTypes.h" | 7 #include "SkTypes.h" |
| 8 | 8 |
| 9 namespace { | 9 namespace { |
| 10 | 10 |
| (...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 330 return &fFilterValues[filter.fDataLocation]; | 330 return &fFilterValues[filter.fDataLocation]; |
| 331 } | 331 } |
| 332 | 332 |
| 333 void BGRAConvolve2D(const unsigned char* sourceData, | 333 void BGRAConvolve2D(const unsigned char* sourceData, |
| 334 int sourceByteRowStride, | 334 int sourceByteRowStride, |
| 335 bool sourceHasAlpha, | 335 bool sourceHasAlpha, |
| 336 const SkConvolutionFilter1D& filterX, | 336 const SkConvolutionFilter1D& filterX, |
| 337 const SkConvolutionFilter1D& filterY, | 337 const SkConvolutionFilter1D& filterY, |
| 338 int outputByteRowStride, | 338 int outputByteRowStride, |
| 339 unsigned char* output, | 339 unsigned char* output, |
| 340 SkConvolutionProcs* convolveProcs, | 340 const SkConvolutionProcs& convolveProcs, |
| 341 bool useSimdIfPossible) { | 341 bool useSimdIfPossible) { |
| 342 | 342 |
| 343 int maxYFilterSize = filterY.maxFilter(); | 343 int maxYFilterSize = filterY.maxFilter(); |
| 344 | 344 |
| 345 // The next row in the input that we will generate a horizontally | 345 // The next row in the input that we will generate a horizontally |
| 346 // convolved row for. If the filter doesn't start at the beginning of the | 346 // convolved row for. If the filter doesn't start at the beginning of the |
| 347 // image (this is the case when we are only resizing a subset), then we | 347 // image (this is the case when we are only resizing a subset), then we |
| 348 // don't want to generate any output rows before that. Compute the starting | 348 // don't want to generate any output rows before that. Compute the starting |
| 349 // row for convolution as the first pixel for the first vertical filter. | 349 // row for convolution as the first pixel for the first vertical filter. |
| 350 int filterOffset, filterLength; | 350 int filterOffset, filterLength; |
| 351 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = | 351 const SkConvolutionFilter1D::ConvolutionFixed* filterValues = |
| 352 filterY.FilterForValue(0, &filterOffset, &filterLength); | 352 filterY.FilterForValue(0, &filterOffset, &filterLength); |
| 353 int nextXRow = filterOffset; | 353 int nextXRow = filterOffset; |
| 354 | 354 |
| 355 // We loop over each row in the input doing a horizontal convolution. This | 355 // We loop over each row in the input doing a horizontal convolution. This |
| 356 // will result in a horizontally convolved image. We write the results into | 356 // will result in a horizontally convolved image. We write the results into |
| 357 // a circular buffer of convolved rows and do vertical convolution as rows | 357 // a circular buffer of convolved rows and do vertical convolution as rows |
| 358 // are available. This prevents us from having to store the entire | 358 // are available. This prevents us from having to store the entire |
| 359 // intermediate image and helps cache coherency. | 359 // intermediate image and helps cache coherency. |
| 360 // We will need four extra rows to allow horizontal convolution could be don
e | 360 // We will need four extra rows to allow horizontal convolution could be don
e |
| 361 // simultaneously. We also pad each row in row buffer to be aligned-up to | 361 // simultaneously. We also pad each row in row buffer to be aligned-up to |
| 362 // 16 bytes. | 362 // 16 bytes. |
| 363 // TODO(jiesun): We do not use aligned load from row buffer in vertical | 363 // TODO(jiesun): We do not use aligned load from row buffer in vertical |
| 364 // convolution pass yet. Somehow Windows does not like it. | 364 // convolution pass yet. Somehow Windows does not like it. |
| 365 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; | 365 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF; |
| 366 int rowBufferHeight = maxYFilterSize + | 366 int rowBufferHeight = maxYFilterSize + |
| 367 (convolveProcs->fConvolve4RowsHorizontally ? 4 : 0); | 367 (convolveProcs.fConvolve4RowsHorizontally ? 4 : 0); |
| 368 CircularRowBuffer rowBuffer(rowBufferWidth, | 368 CircularRowBuffer rowBuffer(rowBufferWidth, |
| 369 rowBufferHeight, | 369 rowBufferHeight, |
| 370 filterOffset); | 370 filterOffset); |
| 371 | 371 |
| 372 // Loop over every possible output row, processing just enough horizontal | 372 // Loop over every possible output row, processing just enough horizontal |
| 373 // convolutions to run each subsequent vertical convolution. | 373 // convolutions to run each subsequent vertical convolution. |
| 374 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); | 374 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); |
| 375 int numOutputRows = filterY.numValues(); | 375 int numOutputRows = filterY.numValues(); |
| 376 | 376 |
| 377 // We need to check which is the last line to convolve before we advance 4 | 377 // We need to check which is the last line to convolve before we advance 4 |
| 378 // lines in one iteration. | 378 // lines in one iteration. |
| 379 int lastFilterOffset, lastFilterLength; | 379 int lastFilterOffset, lastFilterLength; |
| 380 | 380 |
| 381 // SSE2 can access up to 3 extra pixels past the end of the | 381 // SSE2 can access up to 3 extra pixels past the end of the |
| 382 // buffer. At the bottom of the image, we have to be careful | 382 // buffer. At the bottom of the image, we have to be careful |
| 383 // not to access data past the end of the buffer. Normally | 383 // not to access data past the end of the buffer. Normally |
| 384 // we fall back to the C++ implementation for the last row. | 384 // we fall back to the C++ implementation for the last row. |
| 385 // If the last row is less than 3 pixels wide, we may have to fall | 385 // If the last row is less than 3 pixels wide, we may have to fall |
| 386 // back to the C++ version for more rows. Compute how many | 386 // back to the C++ version for more rows. Compute how many |
| 387 // rows we need to avoid the SSE implementation for here. | 387 // rows we need to avoid the SSE implementation for here. |
| 388 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, | 388 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, |
| 389 &lastFilterLength); | 389 &lastFilterLength); |
| 390 int avoidSimdRows = 1 + convolveProcs->fExtraHorizontalReads / | 390 int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads / |
| 391 (lastFilterOffset + lastFilterLength); | 391 (lastFilterOffset + lastFilterLength); |
| 392 | 392 |
| 393 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, | 393 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, |
| 394 &lastFilterLength); | 394 &lastFilterLength); |
| 395 | 395 |
| 396 for (int outY = 0; outY < numOutputRows; outY++) { | 396 for (int outY = 0; outY < numOutputRows; outY++) { |
| 397 filterValues = filterY.FilterForValue(outY, | 397 filterValues = filterY.FilterForValue(outY, |
| 398 &filterOffset, &filterLength); | 398 &filterOffset, &filterLength); |
| 399 | 399 |
| 400 // Generate output rows until we have enough to run the current filter. | 400 // Generate output rows until we have enough to run the current filter. |
| 401 while (nextXRow < filterOffset + filterLength) { | 401 while (nextXRow < filterOffset + filterLength) { |
| 402 if (convolveProcs->fConvolve4RowsHorizontally && | 402 if (convolveProcs.fConvolve4RowsHorizontally && |
| 403 nextXRow + 3 < lastFilterOffset + lastFilterLength - | 403 nextXRow + 3 < lastFilterOffset + lastFilterLength - |
| 404 avoidSimdRows) { | 404 avoidSimdRows) { |
| 405 const unsigned char* src[4]; | 405 const unsigned char* src[4]; |
| 406 unsigned char* outRow[4]; | 406 unsigned char* outRow[4]; |
| 407 for (int i = 0; i < 4; ++i) { | 407 for (int i = 0; i < 4; ++i) { |
| 408 src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride]; | 408 src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride]; |
| 409 outRow[i] = rowBuffer.advanceRow(); | 409 outRow[i] = rowBuffer.advanceRow(); |
| 410 } | 410 } |
| 411 convolveProcs->fConvolve4RowsHorizontally(src, filterX, outRow); | 411 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow); |
| 412 nextXRow += 4; | 412 nextXRow += 4; |
| 413 } else { | 413 } else { |
| 414 // Check if we need to avoid SSE2 for this row. | 414 // Check if we need to avoid SSE2 for this row. |
| 415 if (convolveProcs->fConvolveHorizontally && | 415 if (convolveProcs.fConvolveHorizontally && |
| 416 nextXRow < lastFilterOffset + lastFilterLength - | 416 nextXRow < lastFilterOffset + lastFilterLength - |
| 417 avoidSimdRows) { | 417 avoidSimdRows) { |
| 418 convolveProcs->fConvolveHorizontally( | 418 convolveProcs.fConvolveHorizontally( |
| 419 &sourceData[nextXRow * sourceByteRowStride], | 419 &sourceData[nextXRow * sourceByteRowStride], |
| 420 filterX, rowBuffer.advanceRow(), sourceHasAlpha); | 420 filterX, rowBuffer.advanceRow(), sourceHasAlpha); |
| 421 } else { | 421 } else { |
| 422 if (sourceHasAlpha) { | 422 if (sourceHasAlpha) { |
| 423 ConvolveHorizontally<true>( | 423 ConvolveHorizontally<true>( |
| 424 &sourceData[nextXRow * sourceByteRowStride], | 424 &sourceData[nextXRow * sourceByteRowStride], |
| 425 filterX, rowBuffer.advanceRow()); | 425 filterX, rowBuffer.advanceRow()); |
| 426 } else { | 426 } else { |
| 427 ConvolveHorizontally<false>( | 427 ConvolveHorizontally<false>( |
| 428 &sourceData[nextXRow * sourceByteRowStride], | 428 &sourceData[nextXRow * sourceByteRowStride], |
| (...skipping 10 matching lines...) Expand all Loading... |
| 439 // Get the list of rows that the circular buffer has, in order. | 439 // Get the list of rows that the circular buffer has, in order. |
| 440 int firstRowInCircularBuffer; | 440 int firstRowInCircularBuffer; |
| 441 unsigned char* const* rowsToConvolve = | 441 unsigned char* const* rowsToConvolve = |
| 442 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); | 442 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer); |
| 443 | 443 |
| 444 // Now compute the start of the subset of those rows that the filter | 444 // Now compute the start of the subset of those rows that the filter |
| 445 // needs. | 445 // needs. |
| 446 unsigned char* const* firstRowForFilter = | 446 unsigned char* const* firstRowForFilter = |
| 447 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; | 447 &rowsToConvolve[filterOffset - firstRowInCircularBuffer]; |
| 448 | 448 |
| 449 if (convolveProcs->fConvolveVertically) { | 449 if (convolveProcs.fConvolveVertically) { |
| 450 convolveProcs->fConvolveVertically(filterValues, filterLength, | 450 convolveProcs.fConvolveVertically(filterValues, filterLength, |
| 451 firstRowForFilter, | 451 firstRowForFilter, |
| 452 filterX.numValues(), curOutputRow
, | 452 filterX.numValues(), curOutputRow
, |
| 453 sourceHasAlpha); | 453 sourceHasAlpha); |
| 454 } else { | 454 } else { |
| 455 ConvolveVertically(filterValues, filterLength, | 455 ConvolveVertically(filterValues, filterLength, |
| 456 firstRowForFilter, | 456 firstRowForFilter, |
| 457 filterX.numValues(), curOutputRow, | 457 filterX.numValues(), curOutputRow, |
| 458 sourceHasAlpha); | 458 sourceHasAlpha); |
| 459 } | 459 } |
| 460 } | 460 } |
| 461 } | 461 } |
| OLD | NEW |