OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef SKIA_EXT_CONVOLVER_H_ | 5 #ifndef SKIA_EXT_CONVOLVER_H_ |
6 #define SKIA_EXT_CONVOLVER_H_ | 6 #define SKIA_EXT_CONVOLVER_H_ |
7 #pragma once | 7 #pragma once |
8 | 8 |
9 #include <cmath> | 9 #include <cmath> |
10 #include <vector> | 10 #include <vector> |
11 | 11 |
12 #include "base/basictypes.h" | 12 #include "base/basictypes.h" |
| 13 #include "base/cpu.h" |
| 14 |
| 15 #if defined(ARCH_CPU_X86_FAMILY) |
| 16 #if defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__) || _M_IX86_FP==2 |
| 17 // This is where we had compiler support for SSE2 instructions. |
| 18 #define SIMD_SSE2 1 |
| 19 #endif |
| 20 #endif |
13 | 21 |
14 // avoid confusion with Mac OS X's math library (Carbon) | 22 // avoid confusion with Mac OS X's math library (Carbon) |
15 #if defined(__APPLE__) | 23 #if defined(__APPLE__) |
16 #undef FloatToFixed | 24 #undef FloatToFixed |
17 #undef FixedToFloat | 25 #undef FixedToFloat |
18 #endif | 26 #endif |
19 | 27 |
20 namespace skia { | 28 namespace skia { |
21 | 29 |
22 // Represents a filter in one dimension. Each output pixel has one entry in this | 30 // Represents a filter in one dimension. Each output pixel has one entry in this |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
91 int* filter_length) const { | 99 int* filter_length) const { |
92 const FilterInstance& filter = filters_[value_offset]; | 100 const FilterInstance& filter = filters_[value_offset]; |
93 *filter_offset = filter.offset; | 101 *filter_offset = filter.offset; |
94 *filter_length = filter.length; | 102 *filter_length = filter.length; |
95 if (filter.length == 0) { | 103 if (filter.length == 0) { |
96 return NULL; | 104 return NULL; |
97 } | 105 } |
98 return &filter_values_[filter.data_location]; | 106 return &filter_values_[filter.data_location]; |
99 } | 107 } |
100 | 108 |
| 109 |
| 110 inline void PaddingForSIMD(int padding_count) { |
| 111 // Padding |padding_count| of more dummy coefficients after the coefficients |
| 112 // of last filter to prevent SIMD instructions which load 8 or 16 bytes |
| 113 // together to access invalid memory areas. We are not trying to align the |
| 114 // coefficients right now due to the opaqueness of <vector> implementation. |
| 115 // This has to be done after all |AddFilter| calls. |
| 116 for (int i = 0; i < padding_count; ++i) |
| 117 filter_values_.push_back(static_cast<Fixed>(0)); |
| 118 } |
| 119 |
101 private: | 120 private: |
102 struct FilterInstance { | 121 struct FilterInstance { |
103 // Offset within filter_values for this instance of the filter. | 122 // Offset within filter_values for this instance of the filter. |
104 int data_location; | 123 int data_location; |
105 | 124 |
106 // Distance from the left of the filter to the center. IN PIXELS | 125 // Distance from the left of the filter to the center. IN PIXELS |
107 int offset; | 126 int offset; |
108 | 127 |
109 // Number of values in this filter instance. | 128 // Number of values in this filter instance. |
110 int length; | 129 int length; |
(...skipping 28 matching lines...) Expand all Loading... |
139 // percent faster if you know the image is opaque. | 158 // percent faster if you know the image is opaque. |
140 // | 159 // |
141 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order | 160 // The layout in memory is assumed to be 4-bytes per pixel in B-G-R-A order |
142 // (this is ARGB when loaded into 32-bit words on a little-endian machine). | 161 // (this is ARGB when loaded into 32-bit words on a little-endian machine). |
143 void BGRAConvolve2D(const unsigned char* source_data, | 162 void BGRAConvolve2D(const unsigned char* source_data, |
144 int source_byte_row_stride, | 163 int source_byte_row_stride, |
145 bool source_has_alpha, | 164 bool source_has_alpha, |
146 const ConvolutionFilter1D& xfilter, | 165 const ConvolutionFilter1D& xfilter, |
147 const ConvolutionFilter1D& yfilter, | 166 const ConvolutionFilter1D& yfilter, |
148 int output_byte_row_stride, | 167 int output_byte_row_stride, |
149 unsigned char* output); | 168 unsigned char* output, |
150 | 169 bool use_sse2); |
151 } // namespace skia | 170 } // namespace skia |
152 | 171 |
153 #endif // SKIA_EXT_CONVOLVER_H_ | 172 #endif // SKIA_EXT_CONVOLVER_H_ |
OLD | NEW |