Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(591)

Side by Side Diff: skia/ext/convolver.cc

Issue 2011713003: Roll skia to 8cc209111876b7c78b5ec577c9221d8ed5e21024 (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « skia/ext/convolver.h ('k') | skia/ext/convolver_SSE2.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <algorithm>
6
7 #include "base/logging.h"
8 #include "skia/ext/convolver.h"
9 #include "skia/ext/convolver_SSE2.h"
10 #include "skia/ext/convolver_mips_dspr2.h"
11 #include "third_party/skia/include/core/SkSize.h"
12 #include "third_party/skia/include/core/SkTypes.h"
13
14 namespace skia {
15
16 namespace {
17
18 // Converts the argument to an 8-bit unsigned value by clamping to the range
19 // 0-255.
20 inline unsigned char ClampTo8(int a) {
21 if (static_cast<unsigned>(a) < 256)
22 return a; // Avoid the extra check in the common case.
23 if (a < 0)
24 return 0;
25 return 255;
26 }
27
28 // Takes the value produced by accumulating element-wise product of image with
29 // a kernel and brings it back into range.
30 // All of the filter scaling factors are in fixed point with kShiftBits bits of
31 // fractional part.
32 inline unsigned char BringBackTo8(int a, bool take_absolute) {
33 a >>= ConvolutionFilter1D::kShiftBits;
34 if (take_absolute)
35 a = std::abs(a);
36 return ClampTo8(a);
37 }
38
39 // Stores a list of rows in a circular buffer. The usage is you write into it
40 // by calling AdvanceRow. It will keep track of which row in the buffer it
41 // should use next, and the total number of rows added.
42 class CircularRowBuffer {
43 public:
44 // The number of pixels in each row is given in |source_row_pixel_width|.
45 // The maximum number of rows needed in the buffer is |max_y_filter_size|
46 // (we only need to store enough rows for the biggest filter).
47 //
48 // We use the |first_input_row| to compute the coordinates of all of the
49 // following rows returned by Advance().
50 CircularRowBuffer(int dest_row_pixel_width,
51 int max_y_filter_size,
52 int first_input_row)
53 : row_byte_width_(dest_row_pixel_width * 4),
54 num_rows_(max_y_filter_size),
55 next_row_(0),
56 next_row_coordinate_(first_input_row) {
57 buffer_.resize(row_byte_width_ * max_y_filter_size);
58 row_addresses_.resize(num_rows_);
59 }
60
61 // Moves to the next row in the buffer, returning a pointer to the beginning
62 // of it.
63 unsigned char* AdvanceRow() {
64 unsigned char* row = &buffer_[next_row_ * row_byte_width_];
65 next_row_coordinate_++;
66
67 // Set the pointer to the next row to use, wrapping around if necessary.
68 next_row_++;
69 if (next_row_ == num_rows_)
70 next_row_ = 0;
71 return row;
72 }
73
74 // Returns a pointer to an "unrolled" array of rows. These rows will start
75 // at the y coordinate placed into |*first_row_index| and will continue in
76 // order for the maximum number of rows in this circular buffer.
77 //
78 // The |first_row_index_| may be negative. This means the circular buffer
79 // starts before the top of the image (it hasn't been filled yet).
80 unsigned char* const* GetRowAddresses(int* first_row_index) {
81 // Example for a 4-element circular buffer holding coords 6-9.
82 // Row 0 Coord 8
83 // Row 1 Coord 9
84 // Row 2 Coord 6 <- next_row_ = 2, next_row_coordinate_ = 10.
85 // Row 3 Coord 7
86 //
87 // The "next" row is also the first (lowest) coordinate. This computation
88 // may yield a negative value, but that's OK, the math will work out
89 // since the user of this buffer will compute the offset relative
90 // to the first_row_index and the negative rows will never be used.
91 *first_row_index = next_row_coordinate_ - num_rows_;
92
93 int cur_row = next_row_;
94 for (int i = 0; i < num_rows_; i++) {
95 row_addresses_[i] = &buffer_[cur_row * row_byte_width_];
96
97 // Advance to the next row, wrapping if necessary.
98 cur_row++;
99 if (cur_row == num_rows_)
100 cur_row = 0;
101 }
102 return &row_addresses_[0];
103 }
104
105 private:
106 // The buffer storing the rows. They are packed, each one row_byte_width_.
107 std::vector<unsigned char> buffer_;
108
109 // Number of bytes per row in the |buffer_|.
110 int row_byte_width_;
111
112 // The number of rows available in the buffer.
113 int num_rows_;
114
115 // The next row index we should write into. This wraps around as the
116 // circular buffer is used.
117 int next_row_;
118
119 // The y coordinate of the |next_row_|. This is incremented each time a
120 // new row is appended and does not wrap.
121 int next_row_coordinate_;
122
123 // Buffer used by GetRowAddresses().
124 std::vector<unsigned char*> row_addresses_;
125 };
126
127 // Convolves horizontally along a single row. The row data is given in
128 // |src_data| and continues for the num_values() of the filter.
129 template <bool has_alpha>
130 void ConvolveHorizontally(const unsigned char* src_data,
131 const ConvolutionFilter1D& filter,
132 unsigned char* out_row) {
133 // Loop over each pixel on this row in the output image.
134 int num_values = filter.num_values();
135 for (int out_x = 0; out_x < num_values; out_x++) {
136 // Get the filter that determines the current output pixel.
137 int filter_offset, filter_length;
138 const ConvolutionFilter1D::Fixed* filter_values =
139 filter.FilterForValue(out_x, &filter_offset, &filter_length);
140
141 // Compute the first pixel in this row that the filter affects. It will
142 // touch |filter_length| pixels (4 bytes each) after this.
143 const unsigned char* row_to_filter = &src_data[filter_offset * 4];
144
145 // Apply the filter to the row to get the destination pixel in |accum|.
146 int accum[4] = {0};
147 for (int filter_x = 0; filter_x < filter_length; filter_x++) {
148 ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_x];
149 accum[0] += cur_filter * row_to_filter[filter_x * 4 + 0];
150 accum[1] += cur_filter * row_to_filter[filter_x * 4 + 1];
151 accum[2] += cur_filter * row_to_filter[filter_x * 4 + 2];
152 if (has_alpha)
153 accum[3] += cur_filter * row_to_filter[filter_x * 4 + 3];
154 }
155
156 // Bring this value back in range. All of the filter scaling factors
157 // are in fixed point with kShiftBits bits of fractional part.
158 accum[0] >>= ConvolutionFilter1D::kShiftBits;
159 accum[1] >>= ConvolutionFilter1D::kShiftBits;
160 accum[2] >>= ConvolutionFilter1D::kShiftBits;
161 if (has_alpha)
162 accum[3] >>= ConvolutionFilter1D::kShiftBits;
163
164 // Store the new pixel.
165 out_row[out_x * 4 + 0] = ClampTo8(accum[0]);
166 out_row[out_x * 4 + 1] = ClampTo8(accum[1]);
167 out_row[out_x * 4 + 2] = ClampTo8(accum[2]);
168 if (has_alpha)
169 out_row[out_x * 4 + 3] = ClampTo8(accum[3]);
170 }
171 }
172
173 // Does vertical convolution to produce one output row. The filter values and
174 // length are given in the first two parameters. These are applied to each
175 // of the rows pointed to in the |source_data_rows| array, with each row
176 // being |pixel_width| wide.
177 //
178 // The output must have room for |pixel_width * 4| bytes.
179 template <bool has_alpha>
180 void ConvolveVertically(const ConvolutionFilter1D::Fixed* filter_values,
181 int filter_length,
182 unsigned char* const* source_data_rows,
183 int pixel_width,
184 unsigned char* out_row) {
185 // We go through each column in the output and do a vertical convolution,
186 // generating one output pixel each time.
187 for (int out_x = 0; out_x < pixel_width; out_x++) {
188 // Compute the number of bytes over in each row that the current column
189 // we're convolving starts at. The pixel will cover the next 4 bytes.
190 int byte_offset = out_x * 4;
191
192 // Apply the filter to one column of pixels.
193 int accum[4] = {0};
194 for (int filter_y = 0; filter_y < filter_length; filter_y++) {
195 ConvolutionFilter1D::Fixed cur_filter = filter_values[filter_y];
196 accum[0] += cur_filter * source_data_rows[filter_y][byte_offset + 0];
197 accum[1] += cur_filter * source_data_rows[filter_y][byte_offset + 1];
198 accum[2] += cur_filter * source_data_rows[filter_y][byte_offset + 2];
199 if (has_alpha)
200 accum[3] += cur_filter * source_data_rows[filter_y][byte_offset + 3];
201 }
202
203 // Bring this value back in range. All of the filter scaling factors
204 // are in fixed point with kShiftBits bits of precision.
205 accum[0] >>= ConvolutionFilter1D::kShiftBits;
206 accum[1] >>= ConvolutionFilter1D::kShiftBits;
207 accum[2] >>= ConvolutionFilter1D::kShiftBits;
208 if (has_alpha)
209 accum[3] >>= ConvolutionFilter1D::kShiftBits;
210
211 // Store the new pixel.
212 out_row[byte_offset + 0] = ClampTo8(accum[0]);
213 out_row[byte_offset + 1] = ClampTo8(accum[1]);
214 out_row[byte_offset + 2] = ClampTo8(accum[2]);
215 if (has_alpha) {
216 unsigned char alpha = ClampTo8(accum[3]);
217
218 // Make sure the alpha channel doesn't come out smaller than any of the
219 // color channels. We use premultipled alpha channels, so this should
220 // never happen, but rounding errors will cause this from time to time.
221 // These "impossible" colors will cause overflows (and hence random pixel
222 // values) when the resulting bitmap is drawn to the screen.
223 //
224 // We only need to do this when generating the final output row (here).
225 int max_color_channel = std::max(
226 out_row[byte_offset + 0],
227 std::max(out_row[byte_offset + 1], out_row[byte_offset + 2]));
228 if (alpha < max_color_channel)
229 out_row[byte_offset + 3] = max_color_channel;
230 else
231 out_row[byte_offset + 3] = alpha;
232 } else {
233 // No alpha channel, the image is opaque.
234 out_row[byte_offset + 3] = 0xff;
235 }
236 }
237 }
238
239 void ConvolveVertically(const ConvolutionFilter1D::Fixed* filter_values,
240 int filter_length,
241 unsigned char* const* source_data_rows,
242 int pixel_width,
243 unsigned char* out_row,
244 bool source_has_alpha) {
245 if (source_has_alpha) {
246 ConvolveVertically<true>(filter_values, filter_length, source_data_rows,
247 pixel_width, out_row);
248 } else {
249 ConvolveVertically<false>(filter_values, filter_length, source_data_rows,
250 pixel_width, out_row);
251 }
252 }
253
254 } // namespace
255
256 // ConvolutionFilter1D ---------------------------------------------------------
257
258 ConvolutionFilter1D::ConvolutionFilter1D() : max_filter_(0) {}
259
260 ConvolutionFilter1D::~ConvolutionFilter1D() {}
261
262 void ConvolutionFilter1D::AddFilter(int filter_offset,
263 const float* filter_values,
264 int filter_length) {
265 SkASSERT(filter_length > 0);
266
267 std::vector<Fixed> fixed_values;
268 fixed_values.reserve(filter_length);
269
270 for (int i = 0; i < filter_length; ++i)
271 fixed_values.push_back(FloatToFixed(filter_values[i]));
272
273 AddFilter(filter_offset, &fixed_values[0], filter_length);
274 }
275
276 void ConvolutionFilter1D::AddFilter(int filter_offset,
277 const Fixed* filter_values,
278 int filter_length) {
279 // It is common for leading/trailing filter values to be zeros. In such
280 // cases it is beneficial to only store the central factors.
281 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on
282 // a 1080p image this optimization gives a ~10% speed improvement.
283 int filter_size = filter_length;
284 int first_non_zero = 0;
285 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0)
286 first_non_zero++;
287
288 if (first_non_zero < filter_length) {
289 // Here we have at least one non-zero factor.
290 int last_non_zero = filter_length - 1;
291 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0)
292 last_non_zero--;
293
294 filter_offset += first_non_zero;
295 filter_length = last_non_zero + 1 - first_non_zero;
296 SkASSERT(filter_length > 0);
297
298 for (int i = first_non_zero; i <= last_non_zero; i++)
299 filter_values_.push_back(filter_values[i]);
300 } else {
301 // Here all the factors were zeroes.
302 filter_length = 0;
303 }
304
305 FilterInstance instance;
306
307 // We pushed filter_length elements onto filter_values_
308 instance.data_location =
309 (static_cast<int>(filter_values_.size()) - filter_length);
310 instance.offset = filter_offset;
311 instance.trimmed_length = filter_length;
312 instance.length = filter_size;
313 filters_.push_back(instance);
314
315 max_filter_ = std::max(max_filter_, filter_length);
316 }
317
318 const ConvolutionFilter1D::Fixed* ConvolutionFilter1D::GetSingleFilter(
319 int* specified_filter_length,
320 int* filter_offset,
321 int* filter_length) const {
322 const FilterInstance& filter = filters_[0];
323 *filter_offset = filter.offset;
324 *filter_length = filter.trimmed_length;
325 *specified_filter_length = filter.length;
326 if (filter.trimmed_length == 0)
327 return NULL;
328
329 return &filter_values_[filter.data_location];
330 }
331
332 typedef void (*ConvolveVertically_pointer)(
333 const ConvolutionFilter1D::Fixed* filter_values,
334 int filter_length,
335 unsigned char* const* source_data_rows,
336 int pixel_width,
337 unsigned char* out_row,
338 bool has_alpha);
339 typedef void (*Convolve4RowsHorizontally_pointer)(
340 const unsigned char* src_data[4],
341 const ConvolutionFilter1D& filter,
342 unsigned char* out_row[4]);
343 typedef void (*ConvolveHorizontally_pointer)(const unsigned char* src_data,
344 const ConvolutionFilter1D& filter,
345 unsigned char* out_row,
346 bool has_alpha);
347
348 struct ConvolveProcs {
349 // This is how many extra pixels may be read by the
350 // conolve*horizontally functions.
351 int extra_horizontal_reads;
352 ConvolveVertically_pointer convolve_vertically;
353 Convolve4RowsHorizontally_pointer convolve_4rows_horizontally;
354 ConvolveHorizontally_pointer convolve_horizontally;
355 };
356
357 void SetupSIMD(ConvolveProcs* procs) {
358 #ifdef SIMD_SSE2
359 procs->extra_horizontal_reads = 3;
360 procs->convolve_vertically = &ConvolveVertically_SSE2;
361 procs->convolve_4rows_horizontally = &Convolve4RowsHorizontally_SSE2;
362 procs->convolve_horizontally = &ConvolveHorizontally_SSE2;
363 #elif defined SIMD_MIPS_DSPR2
364 procs->extra_horizontal_reads = 3;
365 procs->convolve_vertically = &ConvolveVertically_mips_dspr2;
366 procs->convolve_horizontally = &ConvolveHorizontally_mips_dspr2;
367 #endif
368 }
369
370 void BGRAConvolve2D(const unsigned char* source_data,
371 int source_byte_row_stride,
372 bool source_has_alpha,
373 const ConvolutionFilter1D& filter_x,
374 const ConvolutionFilter1D& filter_y,
375 int output_byte_row_stride,
376 unsigned char* output,
377 bool use_simd_if_possible) {
378 ConvolveProcs simd;
379 simd.extra_horizontal_reads = 0;
380 simd.convolve_vertically = NULL;
381 simd.convolve_4rows_horizontally = NULL;
382 simd.convolve_horizontally = NULL;
383 if (use_simd_if_possible) {
384 SetupSIMD(&simd);
385 }
386
387 int max_y_filter_size = filter_y.max_filter();
388
389 // The next row in the input that we will generate a horizontally
390 // convolved row for. If the filter doesn't start at the beginning of the
391 // image (this is the case when we are only resizing a subset), then we
392 // don't want to generate any output rows before that. Compute the starting
393 // row for convolution as the first pixel for the first vertical filter.
394 int filter_offset, filter_length;
395 const ConvolutionFilter1D::Fixed* filter_values =
396 filter_y.FilterForValue(0, &filter_offset, &filter_length);
397 int next_x_row = filter_offset;
398
399 // We loop over each row in the input doing a horizontal convolution. This
400 // will result in a horizontally convolved image. We write the results into
401 // a circular buffer of convolved rows and do vertical convolution as rows
402 // are available. This prevents us from having to store the entire
403 // intermediate image and helps cache coherency.
404 // We will need four extra rows to allow horizontal convolution could be done
405 // simultaneously. We also padding each row in row buffer to be aligned-up to
406 // 16 bytes.
407 // TODO(jiesun): We do not use aligned load from row buffer in vertical
408 // convolution pass yet. Somehow Windows does not like it.
409 int row_buffer_width = (filter_x.num_values() + 15) & ~0xF;
410 int row_buffer_height =
411 max_y_filter_size + (simd.convolve_4rows_horizontally ? 4 : 0);
412 CircularRowBuffer row_buffer(row_buffer_width, row_buffer_height,
413 filter_offset);
414
415 // Loop over every possible output row, processing just enough horizontal
416 // convolutions to run each subsequent vertical convolution.
417 SkASSERT(output_byte_row_stride >= filter_x.num_values() * 4);
418 int num_output_rows = filter_y.num_values();
419
420 // We need to check which is the last line to convolve before we advance 4
421 // lines in one iteration.
422 int last_filter_offset, last_filter_length;
423
424 // SSE2 can access up to 3 extra pixels past the end of the
425 // buffer. At the bottom of the image, we have to be careful
426 // not to access data past the end of the buffer. Normally
427 // we fall back to the C++ implementation for the last row.
428 // If the last row is less than 3 pixels wide, we may have to fall
429 // back to the C++ version for more rows. Compute how many
430 // rows we need to avoid the SSE implementation for here.
431 filter_x.FilterForValue(filter_x.num_values() - 1, &last_filter_offset,
432 &last_filter_length);
433 int avoid_simd_rows =
434 1 +
435 simd.extra_horizontal_reads / (last_filter_offset + last_filter_length);
436
437 filter_y.FilterForValue(num_output_rows - 1, &last_filter_offset,
438 &last_filter_length);
439
440 for (int out_y = 0; out_y < num_output_rows; out_y++) {
441 filter_values =
442 filter_y.FilterForValue(out_y, &filter_offset, &filter_length);
443
444 // Generate output rows until we have enough to run the current filter.
445 while (next_x_row < filter_offset + filter_length) {
446 if (simd.convolve_4rows_horizontally &&
447 next_x_row + 3 <
448 last_filter_offset + last_filter_length - avoid_simd_rows) {
449 const unsigned char* src[4];
450 unsigned char* out_row[4];
451 for (int i = 0; i < 4; ++i) {
452 src[i] = &source_data[(next_x_row + i) * source_byte_row_stride];
453 out_row[i] = row_buffer.AdvanceRow();
454 }
455 simd.convolve_4rows_horizontally(src, filter_x, out_row);
456 next_x_row += 4;
457 } else {
458 // Check if we need to avoid SSE2 for this row.
459 if (simd.convolve_horizontally &&
460 next_x_row <
461 last_filter_offset + last_filter_length - avoid_simd_rows) {
462 simd.convolve_horizontally(
463 &source_data[next_x_row * source_byte_row_stride], filter_x,
464 row_buffer.AdvanceRow(), source_has_alpha);
465 } else {
466 if (source_has_alpha) {
467 ConvolveHorizontally<true>(
468 &source_data[next_x_row * source_byte_row_stride], filter_x,
469 row_buffer.AdvanceRow());
470 } else {
471 ConvolveHorizontally<false>(
472 &source_data[next_x_row * source_byte_row_stride], filter_x,
473 row_buffer.AdvanceRow());
474 }
475 }
476 next_x_row++;
477 }
478 }
479
480 // Compute where in the output image this row of final data will go.
481 unsigned char* cur_output_row = &output[out_y * output_byte_row_stride];
482
483 // Get the list of rows that the circular buffer has, in order.
484 int first_row_in_circular_buffer;
485 unsigned char* const* rows_to_convolve =
486 row_buffer.GetRowAddresses(&first_row_in_circular_buffer);
487
488 // Now compute the start of the subset of those rows that the filter
489 // needs.
490 unsigned char* const* first_row_for_filter =
491 &rows_to_convolve[filter_offset - first_row_in_circular_buffer];
492
493 if (simd.convolve_vertically) {
494 simd.convolve_vertically(filter_values, filter_length,
495 first_row_for_filter, filter_x.num_values(),
496 cur_output_row, source_has_alpha);
497 } else {
498 ConvolveVertically(filter_values, filter_length, first_row_for_filter,
499 filter_x.num_values(), cur_output_row,
500 source_has_alpha);
501 }
502 }
503 }
504
505 void SingleChannelConvolveX1D(const unsigned char* source_data,
506 int source_byte_row_stride,
507 int input_channel_index,
508 int input_channel_count,
509 const ConvolutionFilter1D& filter,
510 const SkISize& image_size,
511 unsigned char* output,
512 int output_byte_row_stride,
513 int output_channel_index,
514 int output_channel_count,
515 bool absolute_values) {
516 int filter_offset, filter_length, filter_size;
517 // Very much unlike BGRAConvolve2D, here we expect to have the same filter
518 // for all pixels.
519 const ConvolutionFilter1D::Fixed* filter_values =
520 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length);
521
522 if (filter_values == NULL || image_size.width() < filter_size) {
523 NOTREACHED();
524 return;
525 }
526
527 int centrepoint = filter_length / 2;
528 if (filter_size - filter_offset != 2 * filter_offset) {
529 // This means the original filter was not symmetrical AND
530 // got clipped from one side more than from the other.
531 centrepoint = filter_size / 2 - filter_offset;
532 }
533
534 const unsigned char* source_data_row = source_data;
535 unsigned char* output_row = output;
536
537 for (int r = 0; r < image_size.height(); ++r) {
538 unsigned char* target_byte = output_row + output_channel_index;
539 // Process the lead part, padding image to the left with the first pixel.
540 int c = 0;
541 for (; c < centrepoint; ++c, target_byte += output_channel_count) {
542 int accval = 0;
543 int i = 0;
544 int pixel_byte_index = input_channel_index;
545 for (; i < centrepoint - c; ++i) // Padding part.
546 accval += filter_values[i] * source_data_row[pixel_byte_index];
547
548 for (; i < filter_length; ++i, pixel_byte_index += input_channel_count)
549 accval += filter_values[i] * source_data_row[pixel_byte_index];
550
551 *target_byte = BringBackTo8(accval, absolute_values);
552 }
553
554 // Now for the main event.
555 for (; c < image_size.width() - centrepoint;
556 ++c, target_byte += output_channel_count) {
557 int accval = 0;
558 int pixel_byte_index =
559 (c - centrepoint) * input_channel_count + input_channel_index;
560
561 for (int i = 0; i < filter_length;
562 ++i, pixel_byte_index += input_channel_count) {
563 accval += filter_values[i] * source_data_row[pixel_byte_index];
564 }
565
566 *target_byte = BringBackTo8(accval, absolute_values);
567 }
568
569 for (; c < image_size.width(); ++c, target_byte += output_channel_count) {
570 int accval = 0;
571 int overlap_taps = image_size.width() - c + centrepoint;
572 int pixel_byte_index =
573 (c - centrepoint) * input_channel_count + input_channel_index;
574 int i = 0;
575 for (; i < overlap_taps - 1; ++i, pixel_byte_index += input_channel_count)
576 accval += filter_values[i] * source_data_row[pixel_byte_index];
577
578 for (; i < filter_length; ++i)
579 accval += filter_values[i] * source_data_row[pixel_byte_index];
580
581 *target_byte = BringBackTo8(accval, absolute_values);
582 }
583
584 source_data_row += source_byte_row_stride;
585 output_row += output_byte_row_stride;
586 }
587 }
588
589 void SingleChannelConvolveY1D(const unsigned char* source_data,
590 int source_byte_row_stride,
591 int input_channel_index,
592 int input_channel_count,
593 const ConvolutionFilter1D& filter,
594 const SkISize& image_size,
595 unsigned char* output,
596 int output_byte_row_stride,
597 int output_channel_index,
598 int output_channel_count,
599 bool absolute_values) {
600 int filter_offset, filter_length, filter_size;
601 // Very much unlike BGRAConvolve2D, here we expect to have the same filter
602 // for all pixels.
603 const ConvolutionFilter1D::Fixed* filter_values =
604 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length);
605
606 if (filter_values == NULL || image_size.height() < filter_size) {
607 NOTREACHED();
608 return;
609 }
610
611 int centrepoint = filter_length / 2;
612 if (filter_size - filter_offset != 2 * filter_offset) {
613 // This means the original filter was not symmetrical AND
614 // got clipped from one side more than from the other.
615 centrepoint = filter_size / 2 - filter_offset;
616 }
617
618 for (int c = 0; c < image_size.width(); ++c) {
619 unsigned char* target_byte =
620 output + c * output_channel_count + output_channel_index;
621 int r = 0;
622
623 for (; r < centrepoint; ++r, target_byte += output_byte_row_stride) {
624 int accval = 0;
625 int i = 0;
626 int pixel_byte_index = c * input_channel_count + input_channel_index;
627
628 for (; i < centrepoint - r; ++i) // Padding part.
629 accval += filter_values[i] * source_data[pixel_byte_index];
630
631 for (; i < filter_length; ++i, pixel_byte_index += source_byte_row_stride)
632 accval += filter_values[i] * source_data[pixel_byte_index];
633
634 *target_byte = BringBackTo8(accval, absolute_values);
635 }
636
637 for (; r < image_size.height() - centrepoint;
638 ++r, target_byte += output_byte_row_stride) {
639 int accval = 0;
640 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride +
641 c * input_channel_count + input_channel_index;
642 for (int i = 0; i < filter_length;
643 ++i, pixel_byte_index += source_byte_row_stride) {
644 accval += filter_values[i] * source_data[pixel_byte_index];
645 }
646
647 *target_byte = BringBackTo8(accval, absolute_values);
648 }
649
650 for (; r < image_size.height();
651 ++r, target_byte += output_byte_row_stride) {
652 int accval = 0;
653 int overlap_taps = image_size.height() - r + centrepoint;
654 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride +
655 c * input_channel_count + input_channel_index;
656 int i = 0;
657 for (; i < overlap_taps - 1;
658 ++i, pixel_byte_index += source_byte_row_stride) {
659 accval += filter_values[i] * source_data[pixel_byte_index];
660 }
661
662 for (; i < filter_length; ++i)
663 accval += filter_values[i] * source_data[pixel_byte_index];
664
665 *target_byte = BringBackTo8(accval, absolute_values);
666 }
667 }
668 }
669
670 void SetUpGaussianConvolutionKernel(ConvolutionFilter1D* filter,
671 float kernel_sigma,
672 bool derivative) {
673 DCHECK(filter != NULL);
674 DCHECK_GT(kernel_sigma, 0.0);
675 const int tail_length = static_cast<int>(4.0f * kernel_sigma + 0.5f);
676 const int kernel_size = tail_length * 2 + 1;
677 const float sigmasq = kernel_sigma * kernel_sigma;
678 std::vector<float> kernel_weights(kernel_size, 0.0);
679 float kernel_sum = 1.0f;
680
681 kernel_weights[tail_length] = 1.0f;
682
683 for (int ii = 1; ii <= tail_length; ++ii) {
684 float v = std::exp(-0.5f * ii * ii / sigmasq);
685 kernel_weights[tail_length + ii] = v;
686 kernel_weights[tail_length - ii] = v;
687 kernel_sum += 2.0f * v;
688 }
689
690 for (int i = 0; i < kernel_size; ++i)
691 kernel_weights[i] /= kernel_sum;
692
693 if (derivative) {
694 kernel_weights[tail_length] = 0.0;
695 for (int ii = 1; ii <= tail_length; ++ii) {
696 float v = sigmasq * kernel_weights[tail_length + ii] / ii;
697 kernel_weights[tail_length + ii] = v;
698 kernel_weights[tail_length - ii] = -v;
699 }
700 }
701
702 filter->AddFilter(0, &kernel_weights[0], kernel_weights.size());
703 }
704
705 } // namespace skia
OLDNEW
« no previous file with comments | « skia/ext/convolver.h ('k') | skia/ext/convolver_SSE2.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698