src/core/SkConvolver.cpp - Issue 2481733003: Make SSE2/Neon convolution functions not to read extra bytes

Side by Side Diff: src/core/SkConvolver.cpp

Issue 2481733003: Make SSE2/Neon convolution functions not to read extra bytes (Closed)

Patch Set: Change macros to functions Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "SkConvolver.h"	5 #include "SkConvolver.h"

6 #include "SkTArray.h"	6 #include "SkTArray.h"

7	7

8 namespace {	8 namespace {

9	9

10 // Converts the argument to an 8-bit unsigned value by clamping to the range	10 // Converts the argument to an 8-bit unsigned value by clamping to the range

(...skipping 383 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
394 filterOffset);	394 filterOffset);

395	395

396 // Loop over every possible output row, processing just enough horizontal	396 // Loop over every possible output row, processing just enough horizontal

397 // convolutions to run each subsequent vertical convolution.	397 // convolutions to run each subsequent vertical convolution.

398 SkASSERT(outputByteRowStride >= filterX.numValues() * 4);	398 SkASSERT(outputByteRowStride >= filterX.numValues() * 4);

399 int numOutputRows = filterY.numValues();	399 int numOutputRows = filterY.numValues();

400	400

401 // We need to check which is the last line to convolve before we advance 4	401 // We need to check which is the last line to convolve before we advance 4

402 // lines in one iteration.	402 // lines in one iteration.

403 int lastFilterOffset, lastFilterLength;	403 int lastFilterOffset, lastFilterLength;

404

405 // SSE2 can access up to 3 extra pixels past the end of the

406 // buffer. At the bottom of the image, we have to be careful

407 // not to access data past the end of the buffer. Normally

408 // we fall back to the C++ implementation for the last row.

409 // If the last row is less than 3 pixels wide, we may have to fall

410 // back to the C++ version for more rows. Compute how many

411 // rows we need to avoid the SSE implementation for here.

412 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset,

413 &lastFilterLength);

414 int avoidSimdRows = 1 + convolveProcs.fExtraHorizontalReads /

415 (lastFilterOffset + lastFilterLength);

416

417 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,	404 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,

418 &lastFilterLength);	405 &lastFilterLength);

419	406

420 for (int outY = 0; outY < numOutputRows; outY++) {	407 for (int outY = 0; outY < numOutputRows; outY++) {

421 filterValues = filterY.FilterForValue(outY,	408 filterValues = filterY.FilterForValue(outY,

422 &filterOffset, &filterLength);	409 &filterOffset, &filterLength);

423	410

424 // Generate output rows until we have enough to run the current filter.	411 // Generate output rows until we have enough to run the current filter.

425 while (nextXRow < filterOffset + filterLength) {	412 while (nextXRow < filterOffset + filterLength) {

426 if (convolveProcs.fConvolve4RowsHorizontally &&	413 if (convolveProcs.fConvolve4RowsHorizontally &&

427 nextXRow + 3 < lastFilterOffset + lastFilterLength -	414 nextXRow + 3 < lastFilterOffset + lastFilterLength) {

428 avoidSimdRows) {

429 const unsigned char* src[4];	415 const unsigned char* src[4];

430 unsigned char* outRow[4];	416 unsigned char* outRow[4];

431 for (int i = 0; i < 4; ++i) {	417 for (int i = 0; i < 4; ++i) {

432 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo wStride];	418 src[i] = &sourceData[(uint64_t)(nextXRow + i) * sourceByteRo wStride];

433 outRow[i] = rowBuffer.advanceRow();	419 outRow[i] = rowBuffer.advanceRow();

434 }	420 }

435 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4 *rowBufferWidth);	421 convolveProcs.fConvolve4RowsHorizontally(src, filterX, outRow, 4 *rowBufferWidth);

436 nextXRow += 4;	422 nextXRow += 4;

437 } else {	423 } else {

438 // Check if we need to avoid SSE2 for this row.	424 if (convolveProcs.fConvolveHorizontally) {

439 if (convolveProcs.fConvolveHorizontally &&

440 nextXRow < lastFilterOffset + lastFilterLength -

441 avoidSimdRows) {

442 convolveProcs.fConvolveHorizontally(	425 convolveProcs.fConvolveHorizontally(

443 &sourceData[(uint64_t)nextXRow * sourceByteRowStride],	426 &sourceData[(uint64_t)nextXRow * sourceByteRowStride],

444 filterX, rowBuffer.advanceRow(), sourceHasAlpha);	427 filterX, rowBuffer.advanceRow(), sourceHasAlpha);

445 } else {	428 } else {

446 if (sourceHasAlpha) {	429 if (sourceHasAlpha) {

447 ConvolveHorizontallyAlpha(	430 ConvolveHorizontallyAlpha(

448 &sourceData[(uint64_t)nextXRow * sourceByteRowStride ],	431 &sourceData[(uint64_t)nextXRow * sourceByteRowStride ],

449 filterX, rowBuffer.advanceRow());	432 filterX, rowBuffer.advanceRow());

450 } else {	433 } else {

451 ConvolveHorizontallyNoAlpha(	434 ConvolveHorizontallyNoAlpha(

(...skipping 25 matching lines...) Expand all Loading...
477 sourceHasAlpha);	460 sourceHasAlpha);

478 } else {	461 } else {

479 ConvolveVertically(filterValues, filterLength,	462 ConvolveVertically(filterValues, filterLength,

480 firstRowForFilter,	463 firstRowForFilter,

481 filterX.numValues(), curOutputRow,	464 filterX.numValues(), curOutputRow,

482 sourceHasAlpha);	465 sourceHasAlpha);

483 }	466 }

484 }	467 }

485 return true;	468 return true;

486 }	469 }

OLD	NEW

« no previous file with comments | « src/core/SkConvolver.h ('k') | src/opts/SkBitmapFilter_opts_SSE2.cpp » ('j') | no next file with comments »