skia/ext/convolver.cc - Issue 2011713003: Roll skia to 8cc209111876b7c78b5ec577c9221d8ed5e21024

Side by Side Diff: skia/ext/convolver.cc

Issue 2011713003: Roll skia to 8cc209111876b7c78b5ec577c9221d8ed5e21024 (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include <algorithm>

	6

	7 #include "base/logging.h"

	8 #include "skia/ext/convolver.h"

	9 #include "skia/ext/convolver_SSE2.h"

	10 #include "skia/ext/convolver_mips_dspr2.h"

	11 #include "third_party/skia/include/core/SkSize.h"

	12 #include "third_party/skia/include/core/SkTypes.h"

	13

	14 namespace skia {

	15

	16 namespace {

	17

	18 // Converts the argument to an 8-bit unsigned value by clamping to the range

	19 // 0-255.

	20 inline unsigned char ClampTo8(int a) {

	21 if (static_cast<unsigned>(a) < 256)

	22 return a; // Avoid the extra check in the common case.

	23 if (a < 0)

	24 return 0;

	25 return 255;

	26 }

	27

	28 // Takes the value produced by accumulating element-wise product of image with

	29 // a kernel and brings it back into range.

	30 // All of the filter scaling factors are in fixed point with kShiftBits bits of

	31 // fractional part.

	32 inline unsigned char BringBackTo8(int a, bool take_absolute) {

	33 a >>= ConvolutionFilter1D::kShiftBits;

	34 if (take_absolute)

	35 a = std::abs(a);

	36 return ClampTo8(a);

	37 }

	38

	39 // Stores a list of rows in a circular buffer. The usage is you write into it

	40 // by calling AdvanceRow. It will keep track of which row in the buffer it

	41 // should use next, and the total number of rows added.

	42 class CircularRowBuffer {

	43 public:

	44 // The number of pixels in each row is given in \|source_row_pixel_width\|.

	45 // The maximum number of rows needed in the buffer is \|max_y_filter_size\|

	46 // (we only need to store enough rows for the biggest filter).

	47 //

	48 // We use the \|first_input_row\| to compute the coordinates of all of the

	49 // following rows returned by Advance().

	50 CircularRowBuffer(int dest_row_pixel_width,

	51 int max_y_filter_size,

	52 int first_input_row)

	53 : row_byte_width_(dest_row_pixel_width * 4),

	54 num_rows_(max_y_filter_size),

	55 next_row_(0),

	56 next_row_coordinate_(first_input_row) {

	57 buffer_.resize(row_byte_width_ * max_y_filter_size);

	58 row_addresses_.resize(num_rows_);

	59 }

	60

	61 // Moves to the next row in the buffer, returning a pointer to the beginning

	62 // of it.

	63 unsigned char* AdvanceRow() {

	64 unsigned char* row = &buffer_[next_row_ * row_byte_width_];

	65 next_row_coordinate_++;

	66

	67 // Set the pointer to the next row to use, wrapping around if necessary.

	68 next_row_++;

	69 if (next_row_ == num_rows_)

	70 next_row_ = 0;

	71 return row;

	72 }

	73

	74 // Returns a pointer to an "unrolled" array of rows. These rows will start

	75 // at the y coordinate placed into \|*first_row_index\| and will continue in

	76 // order for the maximum number of rows in this circular buffer.

	77 //

	78 // The \|first_row_index_\| may be negative. This means the circular buffer

	79 // starts before the top of the image (it hasn't been filled yet).

	80 unsigned char* const* GetRowAddresses(int* first_row_index) {

	81 // Example for a 4-element circular buffer holding coords 6-9.

	82 // Row 0 Coord 8

	83 // Row 1 Coord 9

	84 // Row 2 Coord 6 <- next_row_ = 2, next_row_coordinate_ = 10.

	85 // Row 3 Coord 7

	86 //

	87 // The "next" row is also the first (lowest) coordinate. This computation

	88 // may yield a negative value, but that's OK, the math will work out

	89 // since the user of this buffer will compute the offset relative

	90 // to the first_row_index and the negative rows will never be used.

	91 *first_row_index = next_row_coordinate_ - num_rows_;

	92

	93 int cur_row = next_row_;

	94 for (int i = 0; i < num_rows_; i++) {

	95 row_addresses_[i] = &buffer_[cur_row * row_byte_width_];

	96

	97 // Advance to the next row, wrapping if necessary.

	98 cur_row++;

	99 if (cur_row == num_rows_)

	100 cur_row = 0;

	101 }

	102 return &row_addresses_[0];

	103 }

	104

	105 private:

	106 // The buffer storing the rows. They are packed, each one row_byte_width_.

	107 std::vector<unsigned char> buffer_;

	108

	109 // Number of bytes per row in the \|buffer_\|.

	110 int row_byte_width_;

	111

	112 // The number of rows available in the buffer.

	113 int num_rows_;

	114

	115 // The next row index we should write into. This wraps around as the

	116 // circular buffer is used.

	117 int next_row_;

	118

	119 // The y coordinate of the \|next_row_\|. This is incremented each time a

	120 // new row is appended and does not wrap.

	121 int next_row_coordinate_;

	122

	123 // Buffer used by GetRowAddresses().

	124 std::vector<unsigned char*> row_addresses_;

	125 };

	126

	127 // Convolves horizontally along a single row. The row data is given in

	128 // \|src_data\| and continues for the num_values() of the filter.

	129 template <bool has_alpha>

	130 void ConvolveHorizontally(const unsigned char* src_data,

	131 const ConvolutionFilter1D& filter,

	132 unsigned char* out_row) {

	133 // Loop over each pixel on this row in the output image.

	134 int num_values = filter.num_values();

	135 for (int out_x = 0; out_x < num_values; out_x++) {

	136 // Get the filter that determines the current output pixel.

	137 int filter_offset, filter_length;

	138 const ConvolutionFilter1D::Fixed* filter_values =

	139 filter.FilterForValue(out_x, &filter_offset, &filter_length);

	140

	141 // Compute the first pixel in this row that the filter affects. It will

	142 // touch \|filter_length\| pixels (4 bytes each) after this.

	143 const unsigned char* row_to_filter = &src_data[filter_offset * 4];

	144

	145 // Apply the filter to the row to get the destination pixel in \|accum\|.

	146 int accum[4] = {0};

	147 for (int filter_x = 0; filter_x < filter_length; filter_x++) {

	148 ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_x];

	149 accum[0] += cur_filter * row_to_filter[filter_x * 4 + 0];

	150 accum[1] += cur_filter * row_to_filter[filter_x * 4 + 1];

	151 accum[2] += cur_filter * row_to_filter[filter_x * 4 + 2];

	152 if (has_alpha)

	153 accum[3] += cur_filter * row_to_filter[filter_x * 4 + 3];

	154 }

	155

	156 // Bring this value back in range. All of the filter scaling factors

	157 // are in fixed point with kShiftBits bits of fractional part.

	158 accum[0] >>= ConvolutionFilter1D::kShiftBits;

	159 accum[1] >>= ConvolutionFilter1D::kShiftBits;

	160 accum[2] >>= ConvolutionFilter1D::kShiftBits;

	161 if (has_alpha)

	162 accum[3] >>= ConvolutionFilter1D::kShiftBits;

	163

	164 // Store the new pixel.

	165 out_row[out_x * 4 + 0] = ClampTo8(accum[0]);

	166 out_row[out_x * 4 + 1] = ClampTo8(accum[1]);

	167 out_row[out_x * 4 + 2] = ClampTo8(accum[2]);

	168 if (has_alpha)

	169 out_row[out_x * 4 + 3] = ClampTo8(accum[3]);

	170 }

	171 }

	172

	173 // Does vertical convolution to produce one output row. The filter values and

	174 // length are given in the first two parameters. These are applied to each

	175 // of the rows pointed to in the \|source_data_rows\| array, with each row

	176 // being \|pixel_width\| wide.

	177 //

	178 // The output must have room for \|pixel_width * 4\| bytes.

	179 template <bool has_alpha>

	180 void ConvolveVertically(const ConvolutionFilter1D::Fixed* filter_values,

	181 int filter_length,

	182 unsigned char* const* source_data_rows,

	183 int pixel_width,

	184 unsigned char* out_row) {

	185 // We go through each column in the output and do a vertical convolution,

	186 // generating one output pixel each time.

	187 for (int out_x = 0; out_x < pixel_width; out_x++) {

	188 // Compute the number of bytes over in each row that the current column

	189 // we're convolving starts at. The pixel will cover the next 4 bytes.

	190 int byte_offset = out_x * 4;

	191

	192 // Apply the filter to one column of pixels.

	193 int accum[4] = {0};

	194 for (int filter_y = 0; filter_y < filter_length; filter_y++) {

	195 ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_y];

	196 accum[0] += cur_filter * source_data_rows[filter_y][byte_offset + 0];

	197 accum[1] += cur_filter * source_data_rows[filter_y][byte_offset + 1];

	198 accum[2] += cur_filter * source_data_rows[filter_y][byte_offset + 2];

	199 if (has_alpha)

	200 accum[3] += cur_filter * source_data_rows[filter_y][byte_offset + 3];

	201 }

	202

	203 // Bring this value back in range. All of the filter scaling factors

	204 // are in fixed point with kShiftBits bits of precision.

	205 accum[0] >>= ConvolutionFilter1D::kShiftBits;

	206 accum[1] >>= ConvolutionFilter1D::kShiftBits;

	207 accum[2] >>= ConvolutionFilter1D::kShiftBits;

	208 if (has_alpha)

	209 accum[3] >>= ConvolutionFilter1D::kShiftBits;

	210

	211 // Store the new pixel.

	212 out_row[byte_offset + 0] = ClampTo8(accum[0]);

	213 out_row[byte_offset + 1] = ClampTo8(accum[1]);

	214 out_row[byte_offset + 2] = ClampTo8(accum[2]);

	215 if (has_alpha) {

	216 unsigned char alpha = ClampTo8(accum[3]);

	217

	218 // Make sure the alpha channel doesn't come out smaller than any of the

	219 // color channels. We use premultipled alpha channels, so this should

	220 // never happen, but rounding errors will cause this from time to time.

	221 // These "impossible" colors will cause overflows (and hence random pixel

	222 // values) when the resulting bitmap is drawn to the screen.

	223 //

	224 // We only need to do this when generating the final output row (here).

	225 int max_color_channel = std::max(

	226 out_row[byte_offset + 0],

	227 std::max(out_row[byte_offset + 1], out_row[byte_offset + 2]));

	228 if (alpha < max_color_channel)

	229 out_row[byte_offset + 3] = max_color_channel;

	230 else

	231 out_row[byte_offset + 3] = alpha;

	232 } else {

	233 // No alpha channel, the image is opaque.

	234 out_row[byte_offset + 3] = 0xff;

	235 }

	236 }

	237 }

	238

	239 void ConvolveVertically(const ConvolutionFilter1D::Fixed* filter_values,

	240 int filter_length,

	241 unsigned char* const* source_data_rows,

	242 int pixel_width,

	243 unsigned char* out_row,

	244 bool source_has_alpha) {

	245 if (source_has_alpha) {

	246 ConvolveVertically<true>(filter_values, filter_length, source_data_rows,

	247 pixel_width, out_row);

	248 } else {

	249 ConvolveVertically<false>(filter_values, filter_length, source_data_rows,

	250 pixel_width, out_row);

	251 }

	252 }

	253

	254 } // namespace

	255

	256 // ConvolutionFilter1D ---------------------------------------------------------

	257

	258 ConvolutionFilter1D::ConvolutionFilter1D() : max_filter_(0) {}

	259

	260 ConvolutionFilter1D::~ConvolutionFilter1D() {}

	261

	262 void ConvolutionFilter1D::AddFilter(int filter_offset,

	263 const float* filter_values,

	264 int filter_length) {

	265 SkASSERT(filter_length > 0);

	266

	267 std::vector<Fixed> fixed_values;

	268 fixed_values.reserve(filter_length);

	269

	270 for (int i = 0; i < filter_length; ++i)

	271 fixed_values.push_back(FloatToFixed(filter_values[i]));

	272

	273 AddFilter(filter_offset, &fixed_values[0], filter_length);

	274 }

	275

	276 void ConvolutionFilter1D::AddFilter(int filter_offset,

	277 const Fixed* filter_values,

	278 int filter_length) {

	279 // It is common for leading/trailing filter values to be zeros. In such

	280 // cases it is beneficial to only store the central factors.

	281 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on

	282 // a 1080p image this optimization gives a ~10% speed improvement.

	283 int filter_size = filter_length;

	284 int first_non_zero = 0;

	285 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0)

	286 first_non_zero++;

	287

	288 if (first_non_zero < filter_length) {

	289 // Here we have at least one non-zero factor.

	290 int last_non_zero = filter_length - 1;

	291 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0)

	292 last_non_zero--;

	293

	294 filter_offset += first_non_zero;

	295 filter_length = last_non_zero + 1 - first_non_zero;

	296 SkASSERT(filter_length > 0);

	297

	298 for (int i = first_non_zero; i <= last_non_zero; i++)

	299 filter_values_.push_back(filter_values[i]);

	300 } else {

	301 // Here all the factors were zeroes.

	302 filter_length = 0;

	303 }

	304

	305 FilterInstance instance;

	306

	307 // We pushed filter_length elements onto filter_values_

	308 instance.data_location =

	309 (static_cast<int>(filter_values_.size()) - filter_length);

	310 instance.offset = filter_offset;

	311 instance.trimmed_length = filter_length;

	312 instance.length = filter_size;

	313 filters_.push_back(instance);

	314

	315 max_filter_ = std::max(max_filter_, filter_length);

	316 }

	317

	318 const ConvolutionFilter1D::Fixed* ConvolutionFilter1D::GetSingleFilter(

	319 int* specified_filter_length,

	320 int* filter_offset,

	321 int* filter_length) const {

	322 const FilterInstance& filter = filters_[0];

	323 *filter_offset = filter.offset;

	324 *filter_length = filter.trimmed_length;

	325 *specified_filter_length = filter.length;

	326 if (filter.trimmed_length == 0)

	327 return NULL;

	328

	329 return &filter_values_[filter.data_location];

	330 }

	331

	332 typedef void (*ConvolveVertically_pointer)(

	333 const ConvolutionFilter1D::Fixed* filter_values,

	334 int filter_length,

	335 unsigned char* const* source_data_rows,

	336 int pixel_width,

	337 unsigned char* out_row,

	338 bool has_alpha);

	339 typedef void (*Convolve4RowsHorizontally_pointer)(

	340 const unsigned char* src_data[4],

	341 const ConvolutionFilter1D& filter,

	342 unsigned char* out_row[4]);

	343 typedef void (ConvolveHorizontally_pointer)(const unsigned char src_data,

	344 const ConvolutionFilter1D& filter,

	345 unsigned char* out_row,

	346 bool has_alpha);

	347

	348 struct ConvolveProcs {

	349 // This is how many extra pixels may be read by the

	350 // conolve*horizontally functions.

	351 int extra_horizontal_reads;

	352 ConvolveVertically_pointer convolve_vertically;

	353 Convolve4RowsHorizontally_pointer convolve_4rows_horizontally;

	354 ConvolveHorizontally_pointer convolve_horizontally;

	355 };

	356

	357 void SetupSIMD(ConvolveProcs* procs) {

	358 #ifdef SIMD_SSE2

	359 procs->extra_horizontal_reads = 3;

	360 procs->convolve_vertically = &ConvolveVertically_SSE2;

	361 procs->convolve_4rows_horizontally = &Convolve4RowsHorizontally_SSE2;

	362 procs->convolve_horizontally = &ConvolveHorizontally_SSE2;

	363 #elif defined SIMD_MIPS_DSPR2

	364 procs->extra_horizontal_reads = 3;

	365 procs->convolve_vertically = &ConvolveVertically_mips_dspr2;

	366 procs->convolve_horizontally = &ConvolveHorizontally_mips_dspr2;

	367 #endif

	368 }

	369

	370 void BGRAConvolve2D(const unsigned char* source_data,

	371 int source_byte_row_stride,

	372 bool source_has_alpha,

	373 const ConvolutionFilter1D& filter_x,

	374 const ConvolutionFilter1D& filter_y,

	375 int output_byte_row_stride,

	376 unsigned char* output,

	377 bool use_simd_if_possible) {

	378 ConvolveProcs simd;

	379 simd.extra_horizontal_reads = 0;

	380 simd.convolve_vertically = NULL;

	381 simd.convolve_4rows_horizontally = NULL;

	382 simd.convolve_horizontally = NULL;

	383 if (use_simd_if_possible) {

	384 SetupSIMD(&simd);

	385 }

	386

	387 int max_y_filter_size = filter_y.max_filter();

	388

	389 // The next row in the input that we will generate a horizontally

	390 // convolved row for. If the filter doesn't start at the beginning of the

	391 // image (this is the case when we are only resizing a subset), then we

	392 // don't want to generate any output rows before that. Compute the starting

	393 // row for convolution as the first pixel for the first vertical filter.

	394 int filter_offset, filter_length;

	395 const ConvolutionFilter1D::Fixed* filter_values =

	396 filter_y.FilterForValue(0, &filter_offset, &filter_length);

	397 int next_x_row = filter_offset;

	398

	399 // We loop over each row in the input doing a horizontal convolution. This

	400 // will result in a horizontally convolved image. We write the results into

	401 // a circular buffer of convolved rows and do vertical convolution as rows

	402 // are available. This prevents us from having to store the entire

	403 // intermediate image and helps cache coherency.

	404 // We will need four extra rows to allow horizontal convolution could be done

	405 // simultaneously. We also padding each row in row buffer to be aligned-up to

	406 // 16 bytes.

	407 // TODO(jiesun): We do not use aligned load from row buffer in vertical

	408 // convolution pass yet. Somehow Windows does not like it.

	409 int row_buffer_width = (filter_x.num_values() + 15) & ~0xF;

	410 int row_buffer_height =

	411 max_y_filter_size + (simd.convolve_4rows_horizontally ? 4 : 0);

	412 CircularRowBuffer row_buffer(row_buffer_width, row_buffer_height,

	413 filter_offset);

	414

	415 // Loop over every possible output row, processing just enough horizontal

	416 // convolutions to run each subsequent vertical convolution.

	417 SkASSERT(output_byte_row_stride >= filter_x.num_values() * 4);

	418 int num_output_rows = filter_y.num_values();

	419

	420 // We need to check which is the last line to convolve before we advance 4

	421 // lines in one iteration.

	422 int last_filter_offset, last_filter_length;

	423

	424 // SSE2 can access up to 3 extra pixels past the end of the

	425 // buffer. At the bottom of the image, we have to be careful

	426 // not to access data past the end of the buffer. Normally

	427 // we fall back to the C++ implementation for the last row.

	428 // If the last row is less than 3 pixels wide, we may have to fall

	429 // back to the C++ version for more rows. Compute how many

	430 // rows we need to avoid the SSE implementation for here.

	431 filter_x.FilterForValue(filter_x.num_values() - 1, &last_filter_offset,

	432 &last_filter_length);

	433 int avoid_simd_rows =

	434 1 +

	435 simd.extra_horizontal_reads / (last_filter_offset + last_filter_length);

	436

	437 filter_y.FilterForValue(num_output_rows - 1, &last_filter_offset,

	438 &last_filter_length);

	439

	440 for (int out_y = 0; out_y < num_output_rows; out_y++) {

	441 filter_values =

	442 filter_y.FilterForValue(out_y, &filter_offset, &filter_length);

	443

	444 // Generate output rows until we have enough to run the current filter.

	445 while (next_x_row < filter_offset + filter_length) {

	446 if (simd.convolve_4rows_horizontally &&

	447 next_x_row + 3 <

	448 last_filter_offset + last_filter_length - avoid_simd_rows) {

	449 const unsigned char* src[4];

	450 unsigned char* out_row[4];

	451 for (int i = 0; i < 4; ++i) {

	452 src[i] = &source_data[(next_x_row + i) * source_byte_row_stride];

	453 out_row[i] = row_buffer.AdvanceRow();

	454 }

	455 simd.convolve_4rows_horizontally(src, filter_x, out_row);

	456 next_x_row += 4;

	457 } else {

	458 // Check if we need to avoid SSE2 for this row.

	459 if (simd.convolve_horizontally &&

	460 next_x_row <

	461 last_filter_offset + last_filter_length - avoid_simd_rows) {

	462 simd.convolve_horizontally(

	463 &source_data[next_x_row * source_byte_row_stride], filter_x,

	464 row_buffer.AdvanceRow(), source_has_alpha);

	465 } else {

	466 if (source_has_alpha) {

	467 ConvolveHorizontally<true>(

	468 &source_data[next_x_row * source_byte_row_stride], filter_x,

	469 row_buffer.AdvanceRow());

	470 } else {

	471 ConvolveHorizontally<false>(

	472 &source_data[next_x_row * source_byte_row_stride], filter_x,

	473 row_buffer.AdvanceRow());

	474 }

	475 }

	476 next_x_row++;

	477 }

	478 }

	479

	480 // Compute where in the output image this row of final data will go.

	481 unsigned char* cur_output_row = &output[out_y * output_byte_row_stride];

	482

	483 // Get the list of rows that the circular buffer has, in order.

	484 int first_row_in_circular_buffer;

	485 unsigned char* const* rows_to_convolve =

	486 row_buffer.GetRowAddresses(&first_row_in_circular_buffer);

	487

	488 // Now compute the start of the subset of those rows that the filter

	489 // needs.

	490 unsigned char* const* first_row_for_filter =

	491 &rows_to_convolve[filter_offset - first_row_in_circular_buffer];

	492

	493 if (simd.convolve_vertically) {

	494 simd.convolve_vertically(filter_values, filter_length,

	495 first_row_for_filter, filter_x.num_values(),

	496 cur_output_row, source_has_alpha);

	497 } else {

	498 ConvolveVertically(filter_values, filter_length, first_row_for_filter,

	499 filter_x.num_values(), cur_output_row,

	500 source_has_alpha);

	501 }

	502 }

	503 }

	504

	505 void SingleChannelConvolveX1D(const unsigned char* source_data,

	506 int source_byte_row_stride,

	507 int input_channel_index,

	508 int input_channel_count,

	509 const ConvolutionFilter1D& filter,

	510 const SkISize& image_size,

	511 unsigned char* output,

	512 int output_byte_row_stride,

	513 int output_channel_index,

	514 int output_channel_count,

	515 bool absolute_values) {

	516 int filter_offset, filter_length, filter_size;

	517 // Very much unlike BGRAConvolve2D, here we expect to have the same filter

	518 // for all pixels.

	519 const ConvolutionFilter1D::Fixed* filter_values =

	520 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length);

	521

	522 if (filter_values == NULL \|\| image_size.width() < filter_size) {

	523 NOTREACHED();

	524 return;

	525 }

	526

	527 int centrepoint = filter_length / 2;

	528 if (filter_size - filter_offset != 2 * filter_offset) {

	529 // This means the original filter was not symmetrical AND

	530 // got clipped from one side more than from the other.

	531 centrepoint = filter_size / 2 - filter_offset;

	532 }

	533

	534 const unsigned char* source_data_row = source_data;

	535 unsigned char* output_row = output;

	536

	537 for (int r = 0; r < image_size.height(); ++r) {

	538 unsigned char* target_byte = output_row + output_channel_index;

	539 // Process the lead part, padding image to the left with the first pixel.

	540 int c = 0;

	541 for (; c < centrepoint; ++c, target_byte += output_channel_count) {

	542 int accval = 0;

	543 int i = 0;

	544 int pixel_byte_index = input_channel_index;

	545 for (; i < centrepoint - c; ++i) // Padding part.

	546 accval += filter_values[i] * source_data_row[pixel_byte_index];

	547

	548 for (; i < filter_length; ++i, pixel_byte_index += input_channel_count)

	549 accval += filter_values[i] * source_data_row[pixel_byte_index];

	550

	551 *target_byte = BringBackTo8(accval, absolute_values);

	552 }

	553

	554 // Now for the main event.

	555 for (; c < image_size.width() - centrepoint;

	556 ++c, target_byte += output_channel_count) {

	557 int accval = 0;

	558 int pixel_byte_index =

	559 (c - centrepoint) * input_channel_count + input_channel_index;

	560

	561 for (int i = 0; i < filter_length;

	562 ++i, pixel_byte_index += input_channel_count) {

	563 accval += filter_values[i] * source_data_row[pixel_byte_index];

	564 }

	565

	566 *target_byte = BringBackTo8(accval, absolute_values);

	567 }

	568

	569 for (; c < image_size.width(); ++c, target_byte += output_channel_count) {

	570 int accval = 0;

	571 int overlap_taps = image_size.width() - c + centrepoint;

	572 int pixel_byte_index =

	573 (c - centrepoint) * input_channel_count + input_channel_index;

	574 int i = 0;

	575 for (; i < overlap_taps - 1; ++i, pixel_byte_index += input_channel_count)

	576 accval += filter_values[i] * source_data_row[pixel_byte_index];

	577

	578 for (; i < filter_length; ++i)

	579 accval += filter_values[i] * source_data_row[pixel_byte_index];

	580

	581 *target_byte = BringBackTo8(accval, absolute_values);

	582 }

	583

	584 source_data_row += source_byte_row_stride;

	585 output_row += output_byte_row_stride;

	586 }

	587 }

	588

	589 void SingleChannelConvolveY1D(const unsigned char* source_data,

	590 int source_byte_row_stride,

	591 int input_channel_index,

	592 int input_channel_count,

	593 const ConvolutionFilter1D& filter,

	594 const SkISize& image_size,

	595 unsigned char* output,

	596 int output_byte_row_stride,

	597 int output_channel_index,

	598 int output_channel_count,

	599 bool absolute_values) {

	600 int filter_offset, filter_length, filter_size;

	601 // Very much unlike BGRAConvolve2D, here we expect to have the same filter

	602 // for all pixels.

	603 const ConvolutionFilter1D::Fixed* filter_values =

	604 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length);

	605

	606 if (filter_values == NULL \|\| image_size.height() < filter_size) {

	607 NOTREACHED();

	608 return;

	609 }

	610

	611 int centrepoint = filter_length / 2;

	612 if (filter_size - filter_offset != 2 * filter_offset) {

	613 // This means the original filter was not symmetrical AND

	614 // got clipped from one side more than from the other.

	615 centrepoint = filter_size / 2 - filter_offset;

	616 }

	617

	618 for (int c = 0; c < image_size.width(); ++c) {

	619 unsigned char* target_byte =

	620 output + c * output_channel_count + output_channel_index;

	621 int r = 0;

	622

	623 for (; r < centrepoint; ++r, target_byte += output_byte_row_stride) {

	624 int accval = 0;

	625 int i = 0;

	626 int pixel_byte_index = c * input_channel_count + input_channel_index;

	627

	628 for (; i < centrepoint - r; ++i) // Padding part.

	629 accval += filter_values[i] * source_data[pixel_byte_index];

	630

	631 for (; i < filter_length; ++i, pixel_byte_index += source_byte_row_stride)

	632 accval += filter_values[i] * source_data[pixel_byte_index];

	633

	634 *target_byte = BringBackTo8(accval, absolute_values);

	635 }

	636

	637 for (; r < image_size.height() - centrepoint;

	638 ++r, target_byte += output_byte_row_stride) {

	639 int accval = 0;

	640 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride +

	641 c * input_channel_count + input_channel_index;

	642 for (int i = 0; i < filter_length;

	643 ++i, pixel_byte_index += source_byte_row_stride) {

	644 accval += filter_values[i] * source_data[pixel_byte_index];

	645 }

	646

	647 *target_byte = BringBackTo8(accval, absolute_values);

	648 }

	649

	650 for (; r < image_size.height();

	651 ++r, target_byte += output_byte_row_stride) {

	652 int accval = 0;

	653 int overlap_taps = image_size.height() - r + centrepoint;

	654 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride +

	655 c * input_channel_count + input_channel_index;

	656 int i = 0;

	657 for (; i < overlap_taps - 1;

	658 ++i, pixel_byte_index += source_byte_row_stride) {

	659 accval += filter_values[i] * source_data[pixel_byte_index];

	660 }

	661

	662 for (; i < filter_length; ++i)

	663 accval += filter_values[i] * source_data[pixel_byte_index];

	664

	665 *target_byte = BringBackTo8(accval, absolute_values);

	666 }

	667 }

	668 }

	669

	670 void SetUpGaussianConvolutionKernel(ConvolutionFilter1D* filter,

	671 float kernel_sigma,

	672 bool derivative) {

	673 DCHECK(filter != NULL);

	674 DCHECK_GT(kernel_sigma, 0.0);

	675 const int tail_length = static_cast<int>(4.0f * kernel_sigma + 0.5f);

	676 const int kernel_size = tail_length * 2 + 1;

	677 const float sigmasq = kernel_sigma * kernel_sigma;

	678 std::vector<float> kernel_weights(kernel_size, 0.0);

	679 float kernel_sum = 1.0f;

	680

	681 kernel_weights[tail_length] = 1.0f;

	682

	683 for (int ii = 1; ii <= tail_length; ++ii) {

	684 float v = std::exp(-0.5f * ii * ii / sigmasq);

	685 kernel_weights[tail_length + ii] = v;

	686 kernel_weights[tail_length - ii] = v;

	687 kernel_sum += 2.0f * v;

	688 }

	689

	690 for (int i = 0; i < kernel_size; ++i)

	691 kernel_weights[i] /= kernel_sum;

	692

	693 if (derivative) {

	694 kernel_weights[tail_length] = 0.0;

	695 for (int ii = 1; ii <= tail_length; ++ii) {

	696 float v = sigmasq * kernel_weights[tail_length + ii] / ii;

	697 kernel_weights[tail_length + ii] = v;

	698 kernel_weights[tail_length - ii] = -v;

	699 }

	700 }

	701

	702 filter->AddFilter(0, &kernel_weights[0], kernel_weights.size());

	703 }

	704

	705 } // namespace skia

OLD	NEW

« no previous file with comments | « skia/ext/convolver.h ('k') | skia/ext/convolver_SSE2.h » ('j') | no next file with comments »