OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 | 6 |
7 #include "skia/ext/convolver.h" | 7 #include "skia/ext/convolver.h" |
8 #include "skia/ext/convolver_SSE2.h" | 8 #include "skia/ext/convolver_SSE2.h" |
9 #include "third_party/skia/include/core/SkSize.h" | |
9 #include "third_party/skia/include/core/SkTypes.h" | 10 #include "third_party/skia/include/core/SkTypes.h" |
10 | 11 |
11 namespace skia { | 12 namespace skia { |
12 | 13 |
13 namespace { | 14 namespace { |
14 | 15 |
15 // Converts the argument to an 8-bit unsigned value by clamping to the range | 16 // Converts the argument to an 8-bit unsigned value by clamping to the range |
16 // 0-255. | 17 // 0-255. |
17 inline unsigned char ClampTo8(int a) { | 18 inline unsigned char ClampTo8(int a) { |
18 if (static_cast<unsigned>(a) < 256) | 19 if (static_cast<unsigned>(a) < 256) |
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
264 AddFilter(filter_offset, &fixed_values[0], filter_length); | 265 AddFilter(filter_offset, &fixed_values[0], filter_length); |
265 } | 266 } |
266 | 267 |
267 void ConvolutionFilter1D::AddFilter(int filter_offset, | 268 void ConvolutionFilter1D::AddFilter(int filter_offset, |
268 const Fixed* filter_values, | 269 const Fixed* filter_values, |
269 int filter_length) { | 270 int filter_length) { |
270 // It is common for leading/trailing filter values to be zeros. In such | 271 // It is common for leading/trailing filter values to be zeros. In such |
271 // cases it is beneficial to only store the central factors. | 272 // cases it is beneficial to only store the central factors. |
272 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on | 273 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on |
273 // a 1080p image this optimization gives a ~10% speed improvement. | 274 // a 1080p image this optimization gives a ~10% speed improvement. |
275 int filter_size = filter_length; | |
274 int first_non_zero = 0; | 276 int first_non_zero = 0; |
275 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) | 277 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) |
276 first_non_zero++; | 278 first_non_zero++; |
277 | 279 |
278 if (first_non_zero < filter_length) { | 280 if (first_non_zero < filter_length) { |
279 // Here we have at least one non-zero factor. | 281 // Here we have at least one non-zero factor. |
280 int last_non_zero = filter_length - 1; | 282 int last_non_zero = filter_length - 1; |
281 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) | 283 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) |
282 last_non_zero--; | 284 last_non_zero--; |
283 | 285 |
284 filter_offset += first_non_zero; | 286 filter_offset += first_non_zero; |
285 filter_length = last_non_zero + 1 - first_non_zero; | 287 filter_length = last_non_zero + 1 - first_non_zero; |
286 SkASSERT(filter_length > 0); | 288 SkASSERT(filter_length > 0); |
287 | 289 |
288 for (int i = first_non_zero; i <= last_non_zero; i++) | 290 for (int i = first_non_zero; i <= last_non_zero; i++) |
289 filter_values_.push_back(filter_values[i]); | 291 filter_values_.push_back(filter_values[i]); |
290 } else { | 292 } else { |
291 // Here all the factors were zeroes. | 293 // Here all the factors were zeroes. |
292 filter_length = 0; | 294 filter_length = 0; |
293 } | 295 } |
294 | 296 |
295 FilterInstance instance; | 297 FilterInstance instance; |
296 | 298 |
297 // We pushed filter_length elements onto filter_values_ | 299 // We pushed filter_length elements onto filter_values_ |
298 instance.data_location = (static_cast<int>(filter_values_.size()) - | 300 instance.data_location = (static_cast<int>(filter_values_.size()) - |
299 filter_length); | 301 filter_length); |
300 instance.offset = filter_offset; | 302 instance.offset = filter_offset; |
301 instance.length = filter_length; | 303 instance.trimmed_length = filter_length; |
304 instance.length = filter_size; | |
302 filters_.push_back(instance); | 305 filters_.push_back(instance); |
303 | 306 |
304 max_filter_ = std::max(max_filter_, filter_length); | 307 max_filter_ = std::max(max_filter_, filter_length); |
305 } | 308 } |
306 | 309 |
310 const ConvolutionFilter1D::Fixed* ConvolutionFilter1D::GetSingleFilter( | |
311 int* specified_filter_length, | |
312 int* filter_offset, | |
313 int* filter_length) const { | |
314 const FilterInstance& filter = filters_[0]; | |
315 *filter_offset = filter.offset; | |
316 *filter_length = filter.trimmed_length; | |
317 *specified_filter_length = filter.length; | |
318 if (filter.trimmed_length == 0) { | |
Alexei Svitkine (slow)
2013/04/11 18:25:37
Nit: Remove {}'s.
motek.
2013/04/12 10:50:59
Done.
| |
319 return NULL; | |
320 } | |
321 return &filter_values_[filter.data_location]; | |
322 } | |
323 | |
307 typedef void (*ConvolveVertically_pointer)( | 324 typedef void (*ConvolveVertically_pointer)( |
308 const ConvolutionFilter1D::Fixed* filter_values, | 325 const ConvolutionFilter1D::Fixed* filter_values, |
309 int filter_length, | 326 int filter_length, |
310 unsigned char* const* source_data_rows, | 327 unsigned char* const* source_data_rows, |
311 int pixel_width, | 328 int pixel_width, |
312 unsigned char* out_row, | 329 unsigned char* out_row, |
313 bool has_alpha); | 330 bool has_alpha); |
314 typedef void (*Convolve4RowsHorizontally_pointer)( | 331 typedef void (*Convolve4RowsHorizontally_pointer)( |
315 const unsigned char* src_data[4], | 332 const unsigned char* src_data[4], |
316 const ConvolutionFilter1D& filter, | 333 const ConvolutionFilter1D& filter, |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
471 source_has_alpha); | 488 source_has_alpha); |
472 } else { | 489 } else { |
473 ConvolveVertically(filter_values, filter_length, | 490 ConvolveVertically(filter_values, filter_length, |
474 first_row_for_filter, | 491 first_row_for_filter, |
475 filter_x.num_values(), cur_output_row, | 492 filter_x.num_values(), cur_output_row, |
476 source_has_alpha); | 493 source_has_alpha); |
477 } | 494 } |
478 } | 495 } |
479 } | 496 } |
480 | 497 |
498 void SingleChannelConvolveX1D(const unsigned char* source_data, | |
499 int source_byte_row_stride, | |
500 int input_channel_index, | |
501 int input_channel_count, | |
502 const ConvolutionFilter1D& filter, | |
503 const SkISize& image_size, | |
504 unsigned char* output, | |
505 int output_byte_row_stride, | |
506 int output_channel_index, | |
507 int output_channel_count, | |
508 bool absolute_values) { | |
509 int filter_offset, filter_length, filter_size; | |
510 // Very much unlike BGRAConvolve2D, here we expect to have the same filter | |
511 // for all pixels. | |
512 const ConvolutionFilter1D::Fixed* filter_values = | |
513 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); | |
514 | |
515 if (filter_values == NULL) | |
516 return; | |
517 | |
518 int centrepoint = filter_length / 2; | |
519 if (filter_size - filter_offset != 2 * filter_offset) { | |
520 // This means the original filter was not symmetrical AND | |
521 // got clipped from one side more than from the other. | |
522 centrepoint = filter_size / 2 - filter_offset; | |
523 } | |
524 | |
525 const unsigned char* source_data_row = source_data; | |
526 unsigned char* output_row = output; | |
527 | |
528 for (int r = 0; r < image_size.height(); ++r) { | |
529 unsigned char* target_byte = output_row + output_channel_index; | |
530 // Process the lead part, padding image to the left with the first pixel. | |
531 int c = 0; | |
532 for (; c < centrepoint; ++c, target_byte += output_channel_count) { | |
533 int accval = 0; | |
534 int i = 0; | |
535 int pixel_byte_index = input_channel_index; | |
536 for (; i < centrepoint - c; ++i) // Padding part. | |
537 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
538 | |
539 for (; i < filter_length; ++i, pixel_byte_index += input_channel_count) | |
540 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
541 | |
542 accval >>= ConvolutionFilter1D::kShiftBits; | |
543 if (absolute_values) | |
544 accval = std::abs(accval); | |
545 *target_byte = ClampTo8(accval); | |
546 } | |
547 | |
548 // Now for the main event. | |
549 for (; c < image_size.width() - centrepoint; | |
550 ++c, target_byte += output_channel_count) { | |
551 int accval = 0; | |
552 int pixel_byte_index = (c - centrepoint) * input_channel_count + | |
553 input_channel_index; | |
554 | |
555 for (int i = 0; i < filter_length; | |
556 ++i, pixel_byte_index += input_channel_count) { | |
557 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
558 } | |
559 // Bring this value back in range. All of the filter scaling factors | |
560 // are in fixed point with kShiftBits bits of fractional part. | |
561 accval >>= ConvolutionFilter1D::kShiftBits; | |
562 if (absolute_values) | |
563 accval = std::abs(accval); | |
564 *target_byte = ClampTo8(accval); | |
565 } | |
566 | |
567 for (; c < image_size.width(); ++c, target_byte += output_channel_count) { | |
568 int accval = 0; | |
569 int overlap_taps = image_size.width() - c + centrepoint; | |
570 int pixel_byte_index = (c - centrepoint) * input_channel_count + | |
571 input_channel_index; | |
572 int i = 0; | |
573 for (; i < overlap_taps - 1; ++i, pixel_byte_index += input_channel_count) | |
574 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
575 | |
576 for (; i < filter_length; ++i) | |
577 accval += filter_values[i] * source_data_row[pixel_byte_index]; | |
578 | |
579 accval >>= ConvolutionFilter1D::kShiftBits; | |
580 if (absolute_values) | |
581 accval = std::abs(accval); | |
582 *target_byte = ClampTo8(accval); | |
583 } | |
584 | |
585 source_data_row += source_byte_row_stride; | |
586 output_row += output_byte_row_stride; | |
587 } | |
588 } | |
589 | |
590 void SingleChannelConvolveY1D(const unsigned char* source_data, | |
591 int source_byte_row_stride, | |
592 int input_channel_index, | |
593 int input_channel_count, | |
594 const ConvolutionFilter1D& filter, | |
595 const SkISize& image_size, | |
596 unsigned char* output, | |
597 int output_byte_row_stride, | |
598 int output_channel_index, | |
599 int output_channel_count, | |
600 bool absolute_values) { | |
601 int filter_offset, filter_length, filter_size; | |
602 // Very much unlike BGRAConvolve2D, here we expect to have the same filter | |
603 // for all pixels. | |
604 const ConvolutionFilter1D::Fixed* filter_values = | |
605 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); | |
606 | |
607 if (filter_values == NULL) | |
608 return; | |
609 | |
610 int centrepoint = filter_length / 2; | |
611 if (filter_size - filter_offset != 2 * filter_offset) { | |
612 // This means the original filter was not symmetrical AND | |
613 // got clipped from one side more than from the other. | |
614 centrepoint = filter_size / 2 - filter_offset; | |
615 } | |
616 | |
617 for (int c = 0; c < image_size.width(); ++c) { | |
618 unsigned char* target_byte = output + c * output_channel_count + | |
619 output_channel_index; | |
620 int r = 0; | |
621 | |
622 for (; r < centrepoint; ++r, target_byte += output_byte_row_stride) { | |
623 int accval = 0; | |
624 int i = 0; | |
625 int pixel_byte_index = c * input_channel_count + input_channel_index; | |
626 | |
627 for (; i < centrepoint - r; ++i) // Padding part. | |
Alexei Svitkine (slow)
2013/04/11 18:25:37
This logic seems identical to SingleChannelConvolv
motek.
2013/04/12 10:50:59
Sort of, kind of. I have plucked a fragment that w
| |
628 accval += filter_values[i] * source_data[pixel_byte_index]; | |
629 | |
630 for (; i < filter_length; ++i, pixel_byte_index += source_byte_row_stride) | |
631 accval += filter_values[i] * source_data[pixel_byte_index]; | |
632 | |
633 accval >>= ConvolutionFilter1D::kShiftBits; | |
634 if (absolute_values) | |
635 accval = std::abs(accval); | |
636 *target_byte = ClampTo8(accval); | |
637 } | |
638 | |
639 for (; r < image_size.height() - centrepoint; | |
640 ++r, target_byte += output_byte_row_stride) { | |
641 int accval = 0; | |
642 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride + | |
643 c * input_channel_count + input_channel_index; | |
644 for (int i = 0; i < filter_length; | |
645 ++i, pixel_byte_index += source_byte_row_stride) { | |
646 accval += filter_values[i] * source_data[pixel_byte_index]; | |
647 } | |
648 | |
649 accval >>= ConvolutionFilter1D::kShiftBits; | |
650 if (absolute_values) | |
651 accval = std::abs(accval); | |
652 *target_byte = ClampTo8(accval); | |
653 } | |
654 | |
655 for (; r < image_size.height(); | |
656 ++r, target_byte += output_byte_row_stride) { | |
657 int accval = 0; | |
658 int overlap_taps = image_size.height() - r + centrepoint; | |
659 int pixel_byte_index = (r - centrepoint) * source_byte_row_stride + | |
660 c * input_channel_count + input_channel_index; | |
661 int i = 0; | |
662 for (; i < overlap_taps - 1; | |
663 ++i, pixel_byte_index += source_byte_row_stride) { | |
664 accval += filter_values[i] * source_data[pixel_byte_index]; | |
665 } | |
666 | |
667 for (; i < filter_length; ++i) | |
668 accval += filter_values[i] * source_data[pixel_byte_index]; | |
669 | |
670 accval >>= ConvolutionFilter1D::kShiftBits; | |
671 if (absolute_values) | |
672 accval = std::abs(accval); | |
673 *target_byte = ClampTo8(accval); | |
674 } | |
675 } | |
676 } | |
677 | |
481 } // namespace skia | 678 } // namespace skia |
OLD | NEW |