OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include <algorithm> | 5 #include <algorithm> |
6 | 6 |
7 #include "skia/ext/convolver.h" | 7 #include "skia/ext/convolver.h" |
8 #include "third_party/skia/include/core/SkSize.h" | |
8 #include "third_party/skia/include/core/SkTypes.h" | 9 #include "third_party/skia/include/core/SkTypes.h" |
9 | 10 |
10 #if defined(SIMD_SSE2) | 11 #if defined(SIMD_SSE2) |
11 #include <emmintrin.h> // ARCH_CPU_X86_FAMILY was defined in build/config.h | 12 #include <emmintrin.h> // ARCH_CPU_X86_FAMILY was defined in build/config.h |
12 #endif | 13 #endif |
13 | 14 |
14 namespace skia { | 15 namespace skia { |
15 | 16 |
16 namespace { | 17 namespace { |
17 | 18 |
(...skipping 657 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
675 AddFilter(filter_offset, &fixed_values[0], filter_length); | 676 AddFilter(filter_offset, &fixed_values[0], filter_length); |
676 } | 677 } |
677 | 678 |
678 void ConvolutionFilter1D::AddFilter(int filter_offset, | 679 void ConvolutionFilter1D::AddFilter(int filter_offset, |
679 const Fixed* filter_values, | 680 const Fixed* filter_values, |
680 int filter_length) { | 681 int filter_length) { |
681 // It is common for leading/trailing filter values to be zeros. In such | 682 // It is common for leading/trailing filter values to be zeros. In such |
682 // cases it is beneficial to only store the central factors. | 683 // cases it is beneficial to only store the central factors. |
683 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on | 684 // For a scaling to 1/4th in each dimension using a Lanczos-2 filter on |
684 // a 1080p image this optimization gives a ~10% speed improvement. | 685 // a 1080p image this optimization gives a ~10% speed improvement. |
686 int filter_size = filter_length; | |
685 int first_non_zero = 0; | 687 int first_non_zero = 0; |
686 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) | 688 while (first_non_zero < filter_length && filter_values[first_non_zero] == 0) |
687 first_non_zero++; | 689 first_non_zero++; |
688 | 690 |
689 if (first_non_zero < filter_length) { | 691 if (first_non_zero < filter_length) { |
690 // Here we have at least one non-zero factor. | 692 // Here we have at least one non-zero factor. |
691 int last_non_zero = filter_length - 1; | 693 int last_non_zero = filter_length - 1; |
692 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) | 694 while (last_non_zero >= 0 && filter_values[last_non_zero] == 0) |
693 last_non_zero--; | 695 last_non_zero--; |
694 | 696 |
695 filter_offset += first_non_zero; | 697 filter_offset += first_non_zero; |
696 filter_length = last_non_zero + 1 - first_non_zero; | 698 filter_length = last_non_zero + 1 - first_non_zero; |
697 SkASSERT(filter_length > 0); | 699 SkASSERT(filter_length > 0); |
698 | 700 |
699 for (int i = first_non_zero; i <= last_non_zero; i++) | 701 for (int i = first_non_zero; i <= last_non_zero; i++) |
700 filter_values_.push_back(filter_values[i]); | 702 filter_values_.push_back(filter_values[i]); |
701 } else { | 703 } else { |
702 // Here all the factors were zeroes. | 704 // Here all the factors were zeroes. |
703 filter_length = 0; | 705 filter_length = 0; |
704 } | 706 } |
705 | 707 |
706 FilterInstance instance; | 708 FilterInstance instance; |
707 | 709 |
708 // We pushed filter_length elements onto filter_values_ | 710 // We pushed filter_length elements onto filter_values_ |
709 instance.data_location = (static_cast<int>(filter_values_.size()) - | 711 instance.data_location = (static_cast<int>(filter_values_.size()) - |
710 filter_length); | 712 filter_length); |
711 instance.offset = filter_offset; | 713 instance.offset = filter_offset; |
712 instance.length = filter_length; | 714 instance.length = filter_length; |
715 instance.filter_size = filter_size; | |
713 filters_.push_back(instance); | 716 filters_.push_back(instance); |
714 | 717 |
715 max_filter_ = std::max(max_filter_, filter_length); | 718 max_filter_ = std::max(max_filter_, filter_length); |
716 } | 719 } |
717 | 720 |
718 void BGRAConvolve2D(const unsigned char* source_data, | 721 void BGRAConvolve2D(const unsigned char* source_data, |
719 int source_byte_row_stride, | 722 int source_byte_row_stride, |
720 bool source_has_alpha, | 723 bool source_has_alpha, |
721 const ConvolutionFilter1D& filter_x, | 724 const ConvolutionFilter1D& filter_x, |
722 const ConvolutionFilter1D& filter_y, | 725 const ConvolutionFilter1D& filter_y, |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
850 filter_x.num_values(), cur_output_row); | 853 filter_x.num_values(), cur_output_row); |
851 } else { | 854 } else { |
852 ConvolveVertically<false>(filter_values, filter_length, | 855 ConvolveVertically<false>(filter_values, filter_length, |
853 first_row_for_filter, | 856 first_row_for_filter, |
854 filter_x.num_values(), cur_output_row); | 857 filter_x.num_values(), cur_output_row); |
855 } | 858 } |
856 } | 859 } |
857 } | 860 } |
858 } | 861 } |
859 | 862 |
863 void SingleChannelConvolve1D_X(const unsigned char* source_data, | |
864 int source_byte_row_stride, | |
865 int input_channel_index, | |
866 int input_channel_count, | |
867 const ConvolutionFilter1D& filter, | |
868 const SkISize& image_size, | |
869 unsigned char* output, | |
870 int output_byte_row_stride, | |
871 int output_channel_index, | |
872 int output_channel_count, | |
873 bool absolute_values) { | |
874 int filter_offset, filter_length, filter_size; | |
875 // Very much unlike BGRAConvolve2D, here we expect to have the same filter | |
876 // for all pixels. | |
877 const ConvolutionFilter1D::Fixed* filter_values = | |
878 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); | |
879 | |
880 if (filter_values == NULL) | |
881 return; | |
882 | |
883 int centrepoint = filter_length / 2; | |
884 if (filter_size - filter_offset != 2 * filter_offset) { | |
885 // This means the original filter was not symmetrical AND | |
886 // got clipped from one side more than from the other. | |
887 centrepoint = filter_size / 2 - filter_offset; | |
888 } | |
889 | |
890 const unsigned char* source_data_row = source_data; | |
891 unsigned char* output_row = output; | |
892 | |
893 for (int r = 0; r < image_size.height(); ++r) { | |
894 unsigned char* target_byte = output_row + output_channel_index; | |
895 for (int c = 0; c < image_size.width(); ++c) { | |
896 int accval = 0; | |
897 for (int i = 0; i < filter_length; ++i) { | |
898 ConvolutionFilter1D::Fixed cur_filter = filter_values[i]; | |
899 int pixel_index = c + i - centrepoint; | |
Stephen White
2013/04/10 15:20:31
Perf: could probably hoist this out of the loop.
motek.
2013/04/11 16:18:38
I have reworked that from the perf angle.
| |
900 // Handling of edges is always wrap-around. Clip to 0 / width. | |
Stephen White
2013/04/10 15:20:31
Seems strange to wrap-around instead of clamp, but
motek.
2013/04/11 16:18:38
Padding would be most reasonable for this applicat
| |
901 pixel_index = (pixel_index + image_size.width()) % image_size.width(); | |
Stephen White
2013/04/10 15:20:31
Perf: could compute the margins in separate inner
motek.
2013/04/11 16:18:38
Done.
| |
902 const unsigned char src_value = source_data_row[ | |
903 pixel_index * input_channel_count + input_channel_index]; | |
904 accval += cur_filter * src_value; | |
905 } | |
906 // Bring this value back in range. All of the filter scaling factors | |
907 // are in fixed point with kShiftBits bits of fractional part. | |
908 accval >>= ConvolutionFilter1D::kShiftBits; | |
909 if (absolute_values) | |
910 accval = std::abs(accval); | |
911 *target_byte = ClampTo8(accval); | |
912 target_byte += output_channel_count; | |
913 } | |
914 | |
915 source_data_row += source_byte_row_stride; | |
916 output_row += output_byte_row_stride; | |
917 } | |
918 } | |
919 | |
920 void SingleChannelConvolve1D_Y(const unsigned char* source_data, | |
921 int source_byte_row_stride, | |
922 int input_channel_index, | |
923 int input_channel_count, | |
924 const ConvolutionFilter1D& filter, | |
925 const SkISize& image_size, | |
926 unsigned char* output, | |
927 int output_byte_row_stride, | |
928 int output_channel_index, | |
929 int output_channel_count, | |
930 bool absolute_values) { | |
931 int filter_offset, filter_length, filter_size; | |
932 // Very much unlike BGRAConvolve2D, here we expect to have the same filter | |
933 // for all pixels. | |
934 const ConvolutionFilter1D::Fixed* filter_values = | |
935 filter.GetSingleFilter(&filter_size, &filter_offset, &filter_length); | |
936 | |
937 if (filter_values == NULL) | |
938 return; | |
939 | |
940 int centrepoint = filter_length / 2; | |
941 if (filter_size - filter_offset != 2 * filter_offset) { | |
942 // This means the original filter was not symmetrical AND | |
943 // got clipped from one side more than from the other. | |
944 centrepoint = filter_size / 2 - filter_offset; | |
945 } | |
946 | |
947 for (int c = 0; c < image_size.width(); ++c) { | |
948 for (int r = 0; r < image_size.height(); ++r) { | |
949 unsigned char* target_byte = output + | |
950 r * output_byte_row_stride + | |
951 c * output_channel_count + | |
952 output_channel_index; | |
953 int accval = 0; | |
954 for (int i = 0; i < filter_length; ++i) { | |
955 ConvolutionFilter1D::Fixed cur_filter = filter_values[i]; | |
956 int pixel_index = r + i - centrepoint; | |
957 pixel_index = (pixel_index + image_size.height()) % image_size.height(); | |
958 | |
959 const unsigned char src_value = source_data[ | |
960 pixel_index * source_byte_row_stride + | |
961 c * input_channel_count + | |
962 input_channel_index]; | |
963 accval += cur_filter * src_value; | |
964 } | |
965 accval >>= ConvolutionFilter1D::kShiftBits; | |
966 if (absolute_values) | |
967 accval = std::abs(accval); | |
968 *target_byte = ClampTo8(accval); | |
969 } | |
970 } | |
971 } | |
972 | |
860 } // namespace skia | 973 } // namespace skia |
OLD | NEW |