OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "SkConvolver.h" | 5 #include "SkConvolver.h" |
6 #include "SkTArray.h" | 6 #include "SkTArray.h" |
7 | 7 |
8 namespace { | 8 namespace { |
9 | 9 |
10 // Converts the argument to an 8-bit unsigned value by clamping to the range | 10 // Converts the argument to an 8-bit unsigned value by clamping to the range |
(...skipping 383 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
394 filterOffset); | 394 filterOffset); |
395 | 395 |
396 // Loop over every possible output row, processing just enough horizontal | 396 // Loop over every possible output row, processing just enough horizontal |
397 // convolutions to run each subsequent vertical convolution. | 397 // convolutions to run each subsequent vertical convolution. |
398 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); | 398 SkASSERT(outputByteRowStride >= filterX.numValues() * 4); |
399 int numOutputRows = filterY.numValues(); | 399 int numOutputRows = filterY.numValues(); |
400 | 400 |
401 // We need to check which is the last line to convolve before we advance 4 | 401 // We need to check which is the last line to convolve before we advance 4 |
402 // lines in one iteration. | 402 // lines in one iteration. |
403 int lastFilterOffset, lastFilterLength; | 403 int lastFilterOffset, lastFilterLength; |
404 | |
405 // SSE2 can access up to 3 extra pixels past the end of the | |
406 // buffer. At the bottom of the image, we have to be careful | |
407 // not to access data past the end of the buffer. Normally | |
408 // we fall back to the C++ implementation for the last row. | |
409 // If the last row is less than 3 pixels wide, we may have to fall | |
410 // back to the C++ version for more rows. Compute how many | |
411 // rows we need to avoid the SSE implementation for here. | |
412 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset, | |
413 &lastFilterLength); | |
414 int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads / | |
415 (lastFilterOffset + lastFilterLength); | |
416 | |
417 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, | 404 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset, |
418 &lastFilterLength); | 405 &lastFilterLength); |
419 | 406 |
420 for (int outY = 0; outY < numOutputRows; outY++) { | 407 for (int outY = 0; outY < numOutputRows; outY++) { |
421 filterValues = filterY.FilterForValue(outY, | 408 filterValues = filterY.FilterForValue(outY, |
422 &filterOffset, &filterLength); | 409 &filterOffset, &filterLength); |
423 | 410 |
424 // Generate output rows until we have enough to run the current filter. | 411 // Generate output rows until we have enough to run the current filter. |
425 while (nextXRow < filterOffset + filterLength) { | 412 while (nextXRow < filterOffset + filterLength) { |
426 if (convolveProcs.fConvolve4RowsHorizontally && | 413 if (convolveProcs.fConvolve4RowsHorizontally && |
427 nextXRow + 3 < lastFilterOffset + lastFilterLength - | 414 nextXRow + 3 < lastFilterOffset + lastFilterLength) { |
428 avoidSimdRows) { | |
429 const unsigned char* src[4]; | 415 const unsigned char* src[4]; |
430 unsigned char* outRow[4]; | 416 unsigned char* outRow[4]; |
431 for (int i = 0; i < 4; ++i) { | 417 for (int i = 0; i < 4; ++i) { |
432 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; | 418 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo
wStride]; |
433 outRow[i] = rowBuffer.advanceRow(); | 419 outRow[i] = rowBuffer.advanceRow(); |
434 } | 420 } |
435 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4
*rowBufferWidth); | 421 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4
*rowBufferWidth); |
436 nextXRow += 4; | 422 nextXRow += 4; |
437 } else { | 423 } else { |
438 // Check if we need to avoid SSE2 for this row. | 424 if (convolveProcs.fConvolveHorizontally) { |
439 if (convolveProcs.fConvolveHorizontally && | |
440 nextXRow < lastFilterOffset + lastFilterLength - | |
441 avoidSimdRows) { | |
442 convolveProcs.fConvolveHorizontally( | 425 convolveProcs.fConvolveHorizontally( |
443 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], | 426 &sourceData[(uint64_t)nextXRow * sourceByteRowStride], |
444 filterX, rowBuffer.advanceRow(), sourceHasAlpha); | 427 filterX, rowBuffer.advanceRow(), sourceHasAlpha); |
445 } else { | 428 } else { |
446 if (sourceHasAlpha) { | 429 if (sourceHasAlpha) { |
447 ConvolveHorizontallyAlpha( | 430 ConvolveHorizontallyAlpha( |
448 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], | 431 &sourceData[(uint64_t)nextXRow * sourceByteRowStride
], |
449 filterX, rowBuffer.advanceRow()); | 432 filterX, rowBuffer.advanceRow()); |
450 } else { | 433 } else { |
451 ConvolveHorizontallyNoAlpha( | 434 ConvolveHorizontallyNoAlpha( |
(...skipping 25 matching lines...) Expand all Loading... |
477 sourceHasAlpha); | 460 sourceHasAlpha); |
478 } else { | 461 } else { |
479 ConvolveVertically(filterValues, filterLength, | 462 ConvolveVertically(filterValues, filterLength, |
480 firstRowForFilter, | 463 firstRowForFilter, |
481 filterX.numValues(), curOutputRow, | 464 filterX.numValues(), curOutputRow, |
482 sourceHasAlpha); | 465 sourceHasAlpha); |
483 } | 466 } |
484 } | 467 } |
485 return true; | 468 return true; |
486 } | 469 } |
OLD | NEW |