OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 | 6 |
7 #include "skia/ext/convolver.h" | 7 #include "skia/ext/convolver.h" |
8 #include "skia/ext/convolver_SSE2.h" | 8 #include "skia/ext/convolver_SSE2.h" |
9 #include "third_party/skia/include/core/SkSize.h" | |
9 #include "third_party/skia/include/core/SkTypes.h" | 10 #include "third_party/skia/include/core/SkTypes.h" |
10 | 11 |
11 namespace skia { | 12 namespace skia { |
12 | 13 |
13 namespace { | 14 namespace { |
14 | 15 |
15 // Converts the argument to an 8-bit unsigned value by clamping to the range | 16 // Converts the argument to an 8-bit unsigned value by clamping to the range |
16 // 0-255. | 17 // 0-255. |
17 inline unsigned char ClampTo8(int a) { | 18 inline unsigned char ClampTo8(int a) { |
18 if (static_cast<unsigned>(a) < 256) | 19 if (static_cast<unsigned>(a) < 256) |
19 return a; // Avoid the extra check in the common case. | 20 return a; // Avoid the extra check in the common case. |
20 if (a < 0) | 21 if (a < 0) |
21 return 0; | 22 return 0; |
22 return 255; | 23 return 255; |
23 } | 24 } |
24 | 25 |
26 // Takes the value produced by accumulating element-wise product of image with | |
27 // a kernel and brings it back into range. | |
28 // All of the filter scaling factors are in fixed point with kShiftBits bits of | |
29 // fractional part. | |
30 inline unsigned char BringBackTo8(int a, bool take_absolute) { | |
31 a >>= ConvolutionFilter1D::kShiftBits; | |
32 if (take_absolute) | |
33 a = std::abs(a); | |
34 return ClampTo8(a); | |
35 } | |
36 | |
25 // Stores a list of rows in a circular buffer. The usage is you write into it | 37 // Stores a list of rows in a circular buffer. The usage is you write into it |
26 // by calling AdvanceRow. It will keep track of which row in the buffer it | 38 // by calling AdvanceRow. It will keep track of which row in the buffer it |
27 // should use next, and the total number of rows added. | 39 // should use next, and the total number of rows added. |
28 class CircularRowBuffer { | 40 class CircularRowBuffer { |
29 public: | 41 public: |
30 // The number of pixels in each row is given in |source_row_pixel_width|. | 42 // The number of pixels in each row is given in |source_row_pixel_width|. |
31 // The maximum number of rows needed in the buffer is |max_y_filter_size| | 43 // The maximum number of rows needed in the buffer is |max_y_filter_size| |
32 // (we only need to store enough rows for the biggest filter). | 44 // (we only need to store enough rows for the biggest filter). |
33 // | 45 // |
34 // We use the |first_input_row| to compute the coordinates of all of the | 46 // We use the |first_input_row| to compute the coordinates of all of the |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
264 AddFilter(filter_offset, &fixed_values[0], filter_length); | 276 AddFilter(filter_offset, &fixed_values[0], filter_length); |
265 } | 277 } |
266 | 278 |
267 void ConvolutionFilter1D::AddFilter(int filter_offset, | 279 void ConvolutionFilter1D::AddFilter(int filter_offset, |
268 const Fixed* filter_values, | 280 const Fixed* filter_values, |
269 int filter_length) { | 281 int filter_length) { |
270 // It is common for leading/trailing filter values to be zeros. In such | 282 // It is common for leading/trailing filter values to be zeros. In such |
271 // cases it is beneficial to only store the central factors. | 283 // cases it is beneficial to only store the central factors. |
272 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on | 284 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on |
273 // a 1080p image this optimization gives a ~10% speed improvement. | 285 // a 1080p image this optimization gives a ~10% speed improvement. |
286 int filter_size = filter_length; | |
274 int first_non_zero = 0; | 287 int first_non_zero = 0; |
275 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) | 288 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) |
276 first_non_zero++; | 289 first_non_zero++; |
277 | 290 |
278 if (first_non_zero < filter_length) { | 291 if (first_non_zero < filter_length) { |
279 // Here we have at least one non-zero factor. | 292 // Here we have at least one non-zero factor. |
280 int last_non_zero = filter_length - 1; | 293 int last_non_zero = filter_length - 1; |
281 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) | 294 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) |
282 last_non_zero--; | 295 last_non_zero--; |
283 | 296 |
284 filter_offset += first_non_zero; | 297 filter_offset += first_non_zero; |
285 filter_length = last_non_zero + 1 - first_non_zero; | 298 filter_length = last_non_zero + 1 - first_non_zero; |
286 SkASSERT(filter_length > 0); | 299 SkASSERT(filter_length > 0); |
287 | 300 |
288 for (int i = first_non_zero; i <= last_non_zero; i++) | 301 for (int i = first_non_zero; i <= last_non_zero; i++) |
289 filter_values_.push_back(filter_values[i]); | 302 filter_values_.push_back(filter_values[i]); |
290 } else { | 303 } else { |
291 // Here all the factors were zeroes. | 304 // Here all the factors were zeroes. |
292 filter_length = 0; | 305 filter_length = 0; |
293 } | 306 } |
294 | 307 |
295 FilterInstance instance; | 308 FilterInstance instance; |
296 | 309 |
297 // We pushed filter_length elements onto filter_values_ | 310 // We pushed filter_length elements onto filter_values_ |
298 instance.data_location = (static_cast<int>(filter_values_.size()) - | 311 instance.data_location = (static_cast<int>(filter_values_.size()) - |
299 filter_length); | 312 filter_length); |
300 instance.offset = filter_offset; | 313 instance.offset = filter_offset; |
301 instance.length = filter_length; | 314 instance.trimmed_length = filter_length; |
315 instance.length = filter_size; | |
302 filters_.push_back(instance); | 316 filters_.push_back(instance); |
303 | 317 |
304 max_filter_ = std::max(max_filter_, filter_length); | 318 max_filter_ = std::max(max_filter_, filter_length); |
305 } | 319 } |
306 | 320 |
321 const ConvolutionFilter1D::Fixed* ConvolutionFilter1D::GetSingleFilter( | |
322 int* specified_filter_length, | |
323 int* filter_offset, | |
324 int* filter_length) const { | |
325 const FilterInstance& filter = filters_[0]; | |
326 *filter_offset = filter.offset; | |
327 *filter_length = filter.trimmed_length; | |
328 *specified_filter_length = filter.length; | |
329 if (filter.trimmed_length == 0) | |
330 return NULL; | |
331 | |
332 return &filter_values_[filter.data_location]; | |
333 } | |
334 | |
307 typedef void (*ConvolveVertically_pointer)( | 335 typedef void (*ConvolveVertically_pointer)( |
308 const ConvolutionFilter1D::Fixed* filter_values, | 336 const ConvolutionFilter1D::Fixed* filter_values, |
309 int filter_length, | 337 int filter_length, |
310 unsigned char* const* source_data_rows, | 338 unsigned char* const* source_data_rows, |
311 int pixel_width, | 339 int pixel_width, |
312 unsigned char* out_row, | 340 unsigned char* out_row, |
313 bool has_alpha); | 341 bool has_alpha); |
314 typedef void (*Convolve4RowsHorizontally_pointer)( | 342 typedef void (*Convolve4RowsHorizontally_pointer)( |
315 const unsigned char* src_data[4], | 343 const unsigned char* src_data[4], |
316 const ConvolutionFilter1D& filter, | 344 const ConvolutionFilter1D& filter, |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
471 source_has_alpha); | 499 source_has_alpha); |
472 } else { | 500 } else { |
473 ConvolveVertically(filter_values, filter_length, | 501 ConvolveVertically(filter_values, filter_length, |
474 first_row_for_filter, | 502 first_row_for_filter, |
475 filter_x.num_values(), cur_output_row, | 503 filter_x.num_values(), cur_output_row, |
476 source_has_alpha); | 504 source_has_alpha); |
477 } | 505 } |
478 } | 506 } |
479 } | 507 } |
480 | 508 |
509 void SingleChannelConvolveX1D(const unsigned char* source_data, | |
510 int source_byte_row_stride, | |
511 int input_channel_index, | |
512 int input_channel_count, | |
513 const ConvolutionFilter1D& filter, | |
514 const SkISize& image_size, | |
515 unsigned char* output, | |
516 int output_byte_row_stride, | |
517 int output_channel_index, | |
518 int output_channel_count, | |
519 bool absolute_values) { | |
520 int filter_offset, filter_length, filter_size; | |
521 // Very much unlike BGRAConvolve2D, here we expect to have the same filter | |
522 // for all pixels. | |
523 const ConvolutionFilter1D::Fixed* filter_values = | |
524 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); | |
525 | |
526 if (filter_values == NULL) | |
527 return; | |
528 | |
529 int centrepoint = filter_length / 2; | |
530 if (filter_size - filter_offset != 2 * filter_offset) { | |
531 // This means the original filter was not symmetrical AND | |
532 // got clipped from one side more than from the other. | |
533 centrepoint = filter_size / 2 - filter_offset; | |
534 } | |
535 | |
536 const unsigned char* source_data_row = source_data; | |
537 unsigned char* output_row = output; | |
538 | |
539 for (int r = 0; r < image_size.height(); ++r) { | |
540 unsigned char* target_byte = output_row + output_channel_index; | |
541 // Process the lead part, padding image to the left with the first pixel. | |
542 int c = 0; | |
543 for (; c < centrepoint; ++c, target_byte += output_channel_count) { | |
544 int accval = 0; | |
545 int i = 0; | |
546 int pixel_byte_index = input_channel_index; | |
547 for (; i < centrepoint - c; ++i) // Padding part. | |
548 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
549 | |
550 for (; i < filter_length; ++i, pixel_byte_index += input_channel_count) | |
551 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
552 | |
553 *target_byte = BringBackTo8(accval, absolute_values); | |
554 } | |
555 | |
556 // Now for the main event. | |
557 for (; c < image_size.width() - centrepoint; | |
558 ++c, target_byte += output_channel_count) { | |
559 int accval = 0; | |
560 int pixel_byte_index = (c - centrepoint) * input_channel_count + | |
561 input_channel_index; | |
562 | |
563 for (int i = 0; i < filter_length; | |
564 ++i, pixel_byte_index += input_channel_count) { | |
565 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
566 } | |
567 | |
568 *target_byte = BringBackTo8(accval, absolute_values); | |
569 } | |
570 | |
571 for (; c < image_size.width(); ++c, target_byte += output_channel_count) { | |
572 int accval = 0; | |
573 int overlap_taps = image_size.width() - c + centrepoint; | |
Stephen White
2013/04/12 12:15:21
Does this handle the case where the image width is
| |
574 int pixel_byte_index = (c - centrepoint) * input_channel_count + | |
575 input_channel_index; | |
576 int i = 0; | |
577 for (; i < overlap_taps - 1; ++i, pixel_byte_index += input_channel_count) | |
578 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
579 | |
580 for (; i < filter_length; ++i) | |
581 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
582 | |
583 *target_byte = BringBackTo8(accval, absolute_values); | |
584 } | |
585 | |
586 source_data_row += source_byte_row_stride; | |
587 output_row += output_byte_row_stride; | |
588 } | |
589 } | |
590 | |
591 void SingleChannelConvolveY1D(const unsigned char* source_data, | |
592 int source_byte_row_stride, | |
593 int input_channel_index, | |
594 int input_channel_count, | |
595 const ConvolutionFilter1D& filter, | |
596 const SkISize& image_size, | |
597 unsigned char* output, | |
598 int output_byte_row_stride, | |
599 int output_channel_index, | |
600 int output_channel_count, | |
601 bool absolute_values) { | |
602 int filter_offset, filter_length, filter_size; | |
603 // Very much unlike BGRAConvolve2D, here we expect to have the same filter | |
604 // for all pixels. | |
605 const ConvolutionFilter1D::Fixed* filter_values = | |
606 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); | |
607 | |
608 if (filter_values == NULL) | |
609 return; | |
610 | |
611 int centrepoint = filter_length / 2; | |
612 if (filter_size - filter_offset != 2 * filter_offset) { | |
613 // This means the original filter was not symmetrical AND | |
614 // got clipped from one side more than from the other. | |
615 centrepoint = filter_size / 2 - filter_offset; | |
616 } | |
617 | |
618 for (int c = 0; c < image_size.width(); ++c) { | |
619 unsigned char* target_byte = output + c * output_channel_count + | |
620 output_channel_index; | |
621 int r = 0; | |
622 | |
623 for (; r < centrepoint; ++r, target_byte += output_byte_row_stride) { | |
624 int accval = 0; | |
625 int i = 0; | |
626 int pixel_byte_index = c * input_channel_count + input_channel_index; | |
627 | |
628 for (; i < centrepoint - r; ++i) // Padding part. | |
629 accval += filter_values[i] * source_data[pixel_byte_index]; | |
630 | |
631 for (; i < filter_length; ++i, pixel_byte_index += source_byte_row_stride) | |
632 accval += filter_values[i] * source_data[pixel_byte_index]; | |
633 | |
634 *target_byte = BringBackTo8(accval, absolute_values); | |
635 } | |
636 | |
637 for (; r < image_size.height() - centrepoint; | |
638 ++r, target_byte += output_byte_row_stride) { | |
639 int accval = 0; | |
640 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride + | |
641 c * input_channel_count + input_channel_index; | |
642 for (int i = 0; i < filter_length; | |
643 ++i, pixel_byte_index += source_byte_row_stride) { | |
644 accval += filter_values[i] * source_data[pixel_byte_index]; | |
645 } | |
646 | |
647 *target_byte = BringBackTo8(accval, absolute_values); | |
648 } | |
649 | |
650 for (; r < image_size.height(); | |
651 ++r, target_byte += output_byte_row_stride) { | |
652 int accval = 0; | |
653 int overlap_taps = image_size.height() - r + centrepoint; | |
654 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride + | |
655 c * input_channel_count + input_channel_index; | |
656 int i = 0; | |
657 for (; i < overlap_taps - 1; | |
658 ++i, pixel_byte_index += source_byte_row_stride) { | |
659 accval += filter_values[i] * source_data[pixel_byte_index]; | |
660 } | |
661 | |
662 for (; i < filter_length; ++i) | |
663 accval += filter_values[i] * source_data[pixel_byte_index]; | |
664 | |
665 *target_byte = BringBackTo8(accval, absolute_values); | |
666 } | |
667 } | |
668 } | |
669 | |
481 } // namespace skia | 670 } // namespace skia |
OLD | NEW |