| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "SkConvolver.h" | 5 #include "SkConvolver.h" |
| 6 #include "SkTArray.h" | 6 #include "SkTArray.h" |
| 7 | 7 |
| 8 namespace { | 8 namespace { |
| 9 | 9 |
| 10 // Converts the argument to an 8-bit unsigned value by clamping to the range | 10 // Converts the argument to an 8-bit unsigned value by clamping to the range |
| (...skipping 383 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 394 filterOffset); | 394 filterOffset); |
| 395 | 395 |
| 396 // Loop over every possible output row, processing just enough horizontal | 396 // Loop over every possible output row, processing just enough horizontal |
| 397 // convolutions to run each subsequent vertical convolution. | 397 // convolutions to run each subsequent vertical convolution. |
| 398 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); | 398 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); |
| 399 int numOutputRows = filterY.numValues(); | 399 int numOutputRows = filterY.numValues(); |
| 400 | 400 |
| 401 // We need to check which is the last line to convolve before we advance 4 | 401 // We need to check which is the last line to convolve before we advance 4 |
| 402 // lines in one iteration. | 402 // lines in one iteration. |
| 403 int lastFilterOffset, lastFilterLength; | 403 int lastFilterOffset, lastFilterLength; |
| 404 | |
| 405 // SSE2 can access up to 3 extra pixels past the end of the | |
| 406 // buffer. At the bottom of the image, we have to be careful | |
| 407 // not to access data past the end of the buffer. Normally | |
| 408 // we fall back to the C++ implementation for the last row. | |
| 409 // If the last row is less than 3 pixels wide, we may have to fall | |
| 410 // back to the C++ version for more rows. Compute how many | |
| 411 // rows we need to avoid the SSE implementation for here. | |
| 412 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, | |
| 413 &lastFilterLength); | |
| 414 int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads / | |
| 415 (lastFilterOffset + lastFilterLength); | |
| 416 | |
| 417 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, | 404 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, |
| 418 &lastFilterLength); | 405 &lastFilterLength); |
| 419 | 406 |
| 420 for (int outY = 0; outY < numOutputRows; outY++) { | 407 for (int outY = 0; outY < numOutputRows; outY++) { |
| 421 filterValues = filterY.FilterForValue(outY, | 408 filterValues = filterY.FilterForValue(outY, |
| 422 &filterOffset, &filterLength); | 409 &filterOffset, &filterLength); |
| 423 | 410 |
| 424 // Generate output rows until we have enough to run the current filter. | 411 // Generate output rows until we have enough to run the current filter. |
| 425 while (nextXRow < filterOffset + filterLength) { | 412 while (nextXRow < filterOffset + filterLength) { |
| 426 if (convolveProcs.fConvolve4RowsHorizontally && | 413 if (convolveProcs.fConvolve4RowsHorizontally && |
| 427 nextXRow + 3 < lastFilterOffset + lastFilterLength - | 414 nextXRow + 3 < lastFilterOffset + lastFilterLength) { |
| 428 avoidSimdRows) { | |
| 429 const unsigned char* src[4]; | 415 const unsigned char* src[4]; |
| 430 unsigned char* outRow[4]; | 416 unsigned char* outRow[4]; |
| 431 for (int i = 0; i < 4; ++i) { | 417 for (int i = 0; i < 4; ++i) { |
| 432 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; | 418 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; |
| 433 outRow[i] = rowBuffer.advanceRow(); | 419 outRow[i] = rowBuffer.advanceRow(); |
| 434 } | 420 } |
| 435 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4
*rowBufferWidth); | 421 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4
*rowBufferWidth); |
| 436 nextXRow += 4; | 422 nextXRow += 4; |
| 437 } else { | 423 } else { |
| 438 // Check if we need to avoid SSE2 for this row. | 424 if (convolveProcs.fConvolveHorizontally) { |
| 439 if (convolveProcs.fConvolveHorizontally && | |
| 440 nextXRow < lastFilterOffset + lastFilterLength - | |
| 441 avoidSimdRows) { | |
| 442 convolveProcs.fConvolveHorizontally( | 425 convolveProcs.fConvolveHorizontally( |
| 443 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], | 426 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], |
| 444 filterX, rowBuffer.advanceRow(), sourceHasAlpha); | 427 filterX, rowBuffer.advanceRow(), sourceHasAlpha); |
| 445 } else { | 428 } else { |
| 446 if (sourceHasAlpha) { | 429 if (sourceHasAlpha) { |
| 447 ConvolveHorizontallyAlpha( | 430 ConvolveHorizontallyAlpha( |
| 448 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], | 431 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], |
| 449 filterX, rowBuffer.advanceRow()); | 432 filterX, rowBuffer.advanceRow()); |
| 450 } else { | 433 } else { |
| 451 ConvolveHorizontallyNoAlpha( | 434 ConvolveHorizontallyNoAlpha( |
| (...skipping 25 matching lines...) Expand all Loading... |
| 477 sourceHasAlpha); | 460 sourceHasAlpha); |
| 478 } else { | 461 } else { |
| 479 ConvolveVertically(filterValues, filterLength, | 462 ConvolveVertically(filterValues, filterLength, |
| 480 firstRowForFilter, | 463 firstRowForFilter, |
| 481 filterX.numValues(), curOutputRow, | 464 filterX.numValues(), curOutputRow, |
| 482 sourceHasAlpha); | 465 sourceHasAlpha); |
| 483 } | 466 } |
| 484 } | 467 } |
| 485 return true; | 468 return true; |
| 486 } | 469 } |
| OLD | NEW |