src/core/SkConvolver.cpp - Issue 19335002: Production quality fast image up/downsampler

Side by Side Diff: src/core/SkConvolver.cpp

Issue 19335002: Production quality fast image up/downsampler (Closed) Base URL: https://skia.googlecode.com/svn/trunk

Patch Set: Mike style nits Created 7 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "SkConvolver.h"

	6 #include "SkSize.h"

	7 #include "SkTypes.h"

	8

	9 namespace {

	10

	11 // Converts the argument to an 8-bit unsigned value by clamping to the range

	12 // 0-255.

	13 inline unsigned char ClampTo8(int a) {

	14 if (static_cast<unsigned>(a) < 256)

	15 return a; // Avoid the extra check in the common case.

	16 if (a < 0)

	17 return 0;

	18 return 255;

	19 }

	20

	21 // Takes the value produced by accumulating element-wise product of image wi th

	22 // a kernel and brings it back into range.

	23 // All of the filter scaling factors are in fixed point with kShiftBits bits of

	24 // fractional part.

	25 inline unsigned char BringBackTo8(int a, bool takeAbsolute) {

	26 a >>= SkConvolutionFilter1D::kShiftBits;

	27 if (takeAbsolute) {
	reed1 2013/07/19 18:39:05 Thanks for this fix. Just about every "if" in this Thanks for this fix. Just about every "if" in this file (and likely the other files that were copied from chrome) has the same problem. If we're not going to address these now, it should be a high-priority to do as a next CL.
	28 a = abs(a);

	29 }

	30 return ClampTo8(a);

	31 }

	32

	33 // Stores a list of rows in a circular buffer. The usage is you write into i t

	34 // by calling AdvanceRow. It will keep track of which row in the buffer it

	35 // should use next, and the total number of rows added.

	36 class CircularRowBuffer {

	37 public:

	38 // The number of pixels in each row is given in \|source_row_pixel_width\| .

	39 // The maximum number of rows needed in the buffer is \|max_y_filter_size \|

	40 // (we only need to store enough rows for the biggest filter).

	41 //

	42 // We use the \|first_input_row\| to compute the coordinates of all of the

	43 // following rows returned by Advance().

	44 CircularRowBuffer(int destRowPixelWidth, int maxYFilterSize,

	45 int firstInputRow)

	46 : fRowByteWidth(destRowPixelWidth * 4),

	47 fNumRows(maxYFilterSize),

	48 fNextRow(0),

	49 fNextRowCoordinate(firstInputRow) {

	50 fBuffer.reset(fRowByteWidth * maxYFilterSize);

	51 fRowAddresses.reset(fNumRows);

	52 }

	53

	54 // Moves to the next row in the buffer, returning a pointer to the begin ning

	55 // of it.

	56 unsigned char* advanceRow() {

	57 unsigned char* row = &fBuffer[fNextRow * fRowByteWidth];

	58 fNextRowCoordinate++;

	59

	60 // Set the pointer to the next row to use, wrapping around if necess ary.

	61 fNextRow++;

	62 if (fNextRow == fNumRows)

	63 fNextRow = 0;

	64 return row;

	65 }

	66

	67 // Returns a pointer to an "unrolled" array of rows. These rows will sta rt

	68 // at the y coordinate placed into \|*first_row_index\| and will continue in

	69 // order for the maximum number of rows in this circular buffer.

	70 //

	71 // The \|first_row_index_\| may be negative. This means the circular buffe r

	72 // starts before the top of the image (it hasn't been filled yet).

	73 unsigned char* const* GetRowAddresses(int* firstRowIndex) {

	74 // Example for a 4-element circular buffer holding coords 6-9.

	75 // Row 0 Coord 8

	76 // Row 1 Coord 9

	77 // Row 2 Coord 6 <- fNextRow = 2, fNextRowCoordinate = 10.

	78 // Row 3 Coord 7

	79 //

	80 // The "next" row is also the first (lowest) coordinate. This comput ation

	81 // may yield a negative value, but that's OK, the math will work out

	82 // since the user of this buffer will compute the offset relative

	83 // to the firstRowIndex and the negative rows will never be used.

	84 *firstRowIndex = fNextRowCoordinate - fNumRows;

	85

	86 int cur_row = fNextRow;

	87 for (int i = 0; i < fNumRows; i++) {

	88 fRowAddresses[i] = &fBuffer[cur_row * fRowByteWidth];

	89

	90 // Advance to the next row, wrapping if necessary.

	91 cur_row++;

	92 if (cur_row == fNumRows)

	93 cur_row = 0;

	94 }

	95 return &fRowAddresses[0];

	96 }

	97

	98 private:

	99 // The buffer storing the rows. They are packed, each one fRowByteWidth.

	100 SkTArray<unsigned char> fBuffer;

	101

	102 // Number of bytes per row in the \|buffer_\|.

	103 int fRowByteWidth;

	104

	105 // The number of rows available in the buffer.

	106 int fNumRows;

	107

	108 // The next row index we should write into. This wraps around as the

	109 // circular buffer is used.

	110 int fNextRow;

	111

	112 // The y coordinate of the \|fNextRow\|. This is incremented each time a

	113 // new row is appended and does not wrap.

	114 int fNextRowCoordinate;

	115

	116 // Buffer used by GetRowAddresses().

	117 SkTArray<unsigned char*> fRowAddresses;

	118 };

	119

	120 // Convolves horizontally along a single row. The row data is given in

	121 // \|src_data\| and continues for the numValues() of the filter.

	122 template<bool has_alpha>

	123 void ConvolveHorizontally(const unsigned char* src_data,

	124 const SkConvolutionFilter1D& filter,

	125 unsigned char* out_row) {

	126 // Loop over each pixel on this row in the output image.

	127 int numValues = filter.numValues();

	128 for (int out_x = 0; out_x < numValues; out_x++) {

	129 // Get the filter that determines the current output pixel.

	130 int filterOffset, filterLength;

	131 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =

	132 filter.FilterForValue(out_x, &filterOffset, &filterLength);

	133

	134 // Compute the first pixel in this row that the filter affects. It will

	135 // touch \|filterLength\| pixels (4 bytes each) after this.

	136 const unsigned char* row_to_filter = &src_data[filterOffset * 4];

	137

	138 // Apply the filter to the row to get the destination pixel in \|accum\|.

	139 int accum[4] = {0};

	140 for (int filter_x = 0; filter_x < filterLength; filter_x++) {

	141 SkConvolutionFilter1D::ConvolutionFixed cur_filter = filterValue s[filter_x];

	142 accum[0] += cur_filter * row_to_filter[filter_x * 4 + 0];

	143 accum[1] += cur_filter * row_to_filter[filter_x * 4 + 1];

	144 accum[2] += cur_filter * row_to_filter[filter_x * 4 + 2];

	145 if (has_alpha)

	146 accum[3] += cur_filter * row_to_filter[filter_x * 4 + 3];

	147 }

	148

	149 // Bring this value back in range. All of the filter scaling factors

	150 // are in fixed point with kShiftBits bits of fractional part.

	151 accum[0] >>= SkConvolutionFilter1D::kShiftBits;

	152 accum[1] >>= SkConvolutionFilter1D::kShiftBits;

	153 accum[2] >>= SkConvolutionFilter1D::kShiftBits;

	154 if (has_alpha)

	155 accum[3] >>= SkConvolutionFilter1D::kShiftBits;

	156

	157 // Store the new pixel.

	158 out_row[out_x * 4 + 0] = ClampTo8(accum[0]);

	159 out_row[out_x * 4 + 1] = ClampTo8(accum[1]);

	160 out_row[out_x * 4 + 2] = ClampTo8(accum[2]);

	161 if (has_alpha)

	162 out_row[out_x * 4 + 3] = ClampTo8(accum[3]);

	163 }

	164 }

	165

	166 // Does vertical convolution to produce one output row. The filter values and

	167 // length are given in the first two parameters. These are applied to each

	168 // of the rows pointed to in the \|source_data_rows\| array, with each row

	169 // being \|pixel_width\| wide.

	170 //

	171 // The output must have room for \|pixel_width * 4\| bytes.

	172 template<bool has_alpha>

	173 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte rValues,

	174 int filterLength,

	175 unsigned char* const* source_data_rows,

	176 int pixel_width,

	177 unsigned char* out_row) {

	178 // We go through each column in the output and do a vertical convolution,

	179 // generating one output pixel each time.

	180 for (int out_x = 0; out_x < pixel_width; out_x++) {

	181 // Compute the number of bytes over in each row that the current column

	182 // we're convolving starts at. The pixel will cover the next 4 bytes.

	183 int byte_offset = out_x * 4;

	184

	185 // Apply the filter to one column of pixels.

	186 int accum[4] = {0};

	187 for (int filter_y = 0; filter_y < filterLength; filter_y++) {

	188 SkConvolutionFilter1D::ConvolutionFixed cur_filter = filterValue s[filter_y];

	189 accum[0] += cur_filter * source_data_rows[filter_y][byte_offset + 0];

	190 accum[1] += cur_filter * source_data_rows[filter_y][byte_offset + 1];

	191 accum[2] += cur_filter * source_data_rows[filter_y][byte_offset + 2];

	192 if (has_alpha)

	193 accum[3] += cur_filter * source_data_rows[filter_y][byte_off set + 3];

	194 }

	195

	196 // Bring this value back in range. All of the filter scaling factors

	197 // are in fixed point with kShiftBits bits of precision.

	198 accum[0] >>= SkConvolutionFilter1D::kShiftBits;

	199 accum[1] >>= SkConvolutionFilter1D::kShiftBits;

	200 accum[2] >>= SkConvolutionFilter1D::kShiftBits;

	201 if (has_alpha)

	202 accum[3] >>= SkConvolutionFilter1D::kShiftBits;

	203

	204 // Store the new pixel.

	205 out_row[byte_offset + 0] = ClampTo8(accum[0]);

	206 out_row[byte_offset + 1] = ClampTo8(accum[1]);

	207 out_row[byte_offset + 2] = ClampTo8(accum[2]);

	208 if (has_alpha) {

	209 unsigned char alpha = ClampTo8(accum[3]);

	210

	211 // Make sure the alpha channel doesn't come out smaller than any of the

	212 // color channels. We use premultipled alpha channels, so this should

	213 // never happen, but rounding errors will cause this from time to time.

	214 // These "impossible" colors will cause overflows (and hence random pixel

	215 // values) when the resulting bitmap is drawn to the screen.

	216 //

	217 // We only need to do this when generating the final output row (here).

	218 int max_color_channel = SkTMax(out_row[byte_offset + 0],

	219 SkTMax(out_row[byte_offset + 1], out_row[byte_offset + 2]));

	220 if (alpha < max_color_channel)

	221 out_row[byte_offset + 3] = max_color_channel;

	222 else

	223 out_row[byte_offset + 3] = alpha;

	224 } else {

	225 // No alpha channel, the image is opaque.

	226 out_row[byte_offset + 3] = 0xff;

	227 }

	228 }

	229 }

	230

	231 void ConvolveVertically(const SkConvolutionFilter1D::ConvolutionFixed* filte rValues,

	232 int filterLength,

	233 unsigned char* const* source_data_rows,

	234 int pixel_width,

	235 unsigned char* out_row,

	236 bool source_has_alpha) {

	237 if (source_has_alpha) {

	238 ConvolveVertically<true>(filterValues, filterLength,

	239 source_data_rows,

	240 pixel_width,

	241 out_row);

	242 } else {

	243 ConvolveVertically<false>(filterValues, filterLength,

	244 source_data_rows,

	245 pixel_width,

	246 out_row);

	247 }

	248 }

	249

	250 } // namespace

	251

	252 // SkConvolutionFilter1D ------------------------------------------------------- --

	253

	254 SkConvolutionFilter1D::SkConvolutionFilter1D()

	255 : fMaxFilter(0) {

	256 }

	257

	258 SkConvolutionFilter1D::~SkConvolutionFilter1D() {

	259 }

	260

	261 void SkConvolutionFilter1D::AddFilter(int filterOffset,

	262 const float* filterValues,

	263 int filterLength) {

	264 SkASSERT(filterLength > 0);

	265

	266 SkTArray<ConvolutionFixed> fixed_values;

	267 fixed_values.reset(filterLength);

	268

	269 for (int i = 0; i < filterLength; ++i)

	270 fixed_values.push_back(FloatToFixed(filterValues[i]));

	271

	272 AddFilter(filterOffset, &fixed_values[0], filterLength);

	273 }

	274

	275 void SkConvolutionFilter1D::AddFilter(int filterOffset,

	276 const ConvolutionFixed* filterValues,

	277 int filterLength) {

	278 // It is common for leading/trailing filter values to be zeros. In such

	279 // cases it is beneficial to only store the central factors.

	280 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on

	281 // a 1080p image this optimization gives a ~10% speed improvement.

	282 int filter_size = filterLength;

	283 int first_non_zero = 0;

	284 while (first_non_zero < filterLength && filterValues[first_non_zero] == 0)

	285 first_non_zero++;

	286

	287 if (first_non_zero < filterLength) {

	288 // Here we have at least one non-zero factor.

	289 int last_non_zero = filterLength - 1;

	290 while (last_non_zero >= 0 && filterValues[last_non_zero] == 0)

	291 last_non_zero--;

	292

	293 filterOffset += first_non_zero;

	294 filterLength = last_non_zero + 1 - first_non_zero;

	295 SkASSERT(filterLength > 0);

	296

	297 for (int i = first_non_zero; i <= last_non_zero; i++)

	298 fFilterValues.push_back(filterValues[i]);

	299 } else {

	300 // Here all the factors were zeroes.

	301 filterLength = 0;

	302 }

	303

	304 FilterInstance instance;

	305

	306 // We pushed filterLength elements onto fFilterValues

	307 instance.fDataLocation = (static_cast<int>(fFilterValues.count()) -

	308 filterLength);

	309 instance.fOffset = filterOffset;

	310 instance.fTrimmedLength = filterLength;

	311 instance.fLength = filter_size;

	312 fFilters.push_back(instance);

	313

	314 fMaxFilter = SkTMax(fMaxFilter, filterLength);

	315 }

	316

	317 const SkConvolutionFilter1D::ConvolutionFixed* SkConvolutionFilter1D::GetSingleF ilter(

	318 int* specified_filterLength,

	319 int* filterOffset,

	320 int* filterLength) const {

	321 const FilterInstance& filter = fFilters[0];

	322 *filterOffset = filter.fOffset;

	323 *filterLength = filter.fTrimmedLength;

	324 *specified_filterLength = filter.fLength;

	325 if (filter.fTrimmedLength == 0) {

	326 return NULL;

	327 }

	328

	329 return &fFilterValues[filter.fDataLocation];

	330 }

	331

	332 void BGRAConvolve2D(const unsigned char* sourceData,

	333 int sourceByteRowStride,

	334 bool sourceHasAlpha,

	335 const SkConvolutionFilter1D& filterX,

	336 const SkConvolutionFilter1D& filterY,

	337 int outputByteRowStride,

	338 unsigned char* output,

	339 SkConvolutionProcs *convolveProcs,

	340 bool useSimdIfPossible) {

	341

	342 int maxYFilterSize = filterY.maxFilter();

	343

	344 // The next row in the input that we will generate a horizontally

	345 // convolved row for. If the filter doesn't start at the beginning of the

	346 // image (this is the case when we are only resizing a subset), then we

	347 // don't want to generate any output rows before that. Compute the starting

	348 // row for convolution as the first pixel for the first vertical filter.

	349 int filterOffset, filterLength;

	350 const SkConvolutionFilter1D::ConvolutionFixed* filterValues =

	351 filterY.FilterForValue(0, &filterOffset, &filterLength);

	352 int nextXRow = filterOffset;

	353

	354 // We loop over each row in the input doing a horizontal convolution. This

	355 // will result in a horizontally convolved image. We write the results into

	356 // a circular buffer of convolved rows and do vertical convolution as rows

	357 // are available. This prevents us from having to store the entire

	358 // intermediate image and helps cache coherency.

	359 // We will need four extra rows to allow horizontal convolution could be don e

	360 // simultaneously. We also pad each row in row buffer to be aligned-up to

	361 // 16 bytes.

	362 // TODO(jiesun): We do not use aligned load from row buffer in vertical

	363 // convolution pass yet. Somehow Windows does not like it.

	364 int rowBufferWidth = (filterX.numValues() + 15) & ~0xF;

	365 int rowBufferHeight = maxYFilterSize +

	366 (convolveProcs->fConvolve4RowsHorizontally ? 4 : 0);

	367 CircularRowBuffer rowBuffer(rowBufferWidth,

	368 rowBufferHeight,

	369 filterOffset);

	370

	371 // Loop over every possible output row, processing just enough horizontal

	372 // convolutions to run each subsequent vertical convolution.

	373 SkASSERT(outputByteRowStride >= filterX.numValues() * 4);

	374 int numOutputRows = filterY.numValues();

	375

	376 // We need to check which is the last line to convolve before we advance 4

	377 // lines in one iteration.

	378 int lastFilterOffset, lastFilterLength;

	379

	380 // SSE2 can access up to 3 extra pixels past the end of the

	381 // buffer. At the bottom of the image, we have to be careful

	382 // not to access data past the end of the buffer. Normally

	383 // we fall back to the C++ implementation for the last row.

	384 // If the last row is less than 3 pixels wide, we may have to fall

	385 // back to the C++ version for more rows. Compute how many

	386 // rows we need to avoid the SSE implementation for here.

	387 filterX.FilterForValue(filterX.numValues() - 1, &lastFilterOffset,

	388 &lastFilterLength);

	389 int avoidSimdRows = 1 + convolveProcs->fExtraHorizontalReads /

	390 (lastFilterOffset + lastFilterLength);

	391

	392 filterY.FilterForValue(numOutputRows - 1, &lastFilterOffset,

	393 &lastFilterLength);

	394

	395 for (int outY = 0; outY < numOutputRows; outY++) {

	396 filterValues = filterY.FilterForValue(outY,

	397 &filterOffset, &filterLength);

	398

	399 // Generate output rows until we have enough to run the current filter.

	400 while (nextXRow < filterOffset + filterLength) {

	401 if (convolveProcs->fConvolve4RowsHorizontally &&

	402 nextXRow + 3 < lastFilterOffset + lastFilterLength -

	403 avoidSimdRows) {

	404 const unsigned char* src[4];

	405 unsigned char* outRow[4];

	406 for (int i = 0; i < 4; ++i) {

	407 src[i] = &sourceData[(nextXRow + i) * sourceByteRowStride];

	408 outRow[i] = rowBuffer.advanceRow();

	409 }

	410 convolveProcs->fConvolve4RowsHorizontally(src, filterX, outRow);

	411 nextXRow += 4;

	412 } else {

	413 // Check if we need to avoid SSE2 for this row.

	414 if (convolveProcs->fConvolveHorizontally &&

	415 nextXRow < lastFilterOffset + lastFilterLength -

	416 avoidSimdRows) {

	417 convolveProcs->fConvolveHorizontally(

	418 &sourceData[nextXRow * sourceByteRowStride],

	419 filterX, rowBuffer.advanceRow(), sourceHasAlpha);

	420 } else {

	421 if (sourceHasAlpha) {

	422 ConvolveHorizontally<true>(

	423 &sourceData[nextXRow * sourceByteRowStride],

	424 filterX, rowBuffer.advanceRow());

	425 } else {

	426 ConvolveHorizontally<false>(

	427 &sourceData[nextXRow * sourceByteRowStride],

	428 filterX, rowBuffer.advanceRow());

	429 }

	430 }

	431 nextXRow++;

	432 }

	433 }

	434

	435 // Compute where in the output image this row of final data will go.

	436 unsigned char* curOutputRow = &output[outY * outputByteRowStride];

	437

	438 // Get the list of rows that the circular buffer has, in order.

	439 int firstRowInCircularBuffer;

	440 unsigned char* const* rowsToConvolve =

	441 rowBuffer.GetRowAddresses(&firstRowInCircularBuffer);

	442

	443 // Now compute the start of the subset of those rows that the filter

	444 // needs.

	445 unsigned char* const* firstRowForFilter =

	446 &rowsToConvolve[filterOffset - firstRowInCircularBuffer];

	447

	448 if (convolveProcs->fConvolveVertically) {

	449 convolveProcs->fConvolveVertically(filterValues, filterLength,

	450 firstRowForFilter,

	451 filterX.numValues(), curOutputRow,

	452 sourceHasAlpha);

	453 } else {

	454 ConvolveVertically(filterValues, filterLength,

	455 firstRowForFilter,

	456 filterX.numValues(), curOutputRow,

	457 sourceHasAlpha);

	458 }

	459 }

	460 }

OLD	NEW

« no previous file with comments | « src/core/SkConvolver.h ('k') | src/opts/SkBitmapFilter_opts_SSE2.h » ('j') | no next file with comments »