Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(71)

Side by Side Diff: src/opts/SkBitmapFilter_opts_SSE2.cpp

Issue 2500113004: Port convolve functions to SkOpts (Closed)
Patch Set: Fix typo Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/opts/SkBitmapFilter_opts_SSE2.h ('k') | src/opts/SkBitmapProcState_arm_neon.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 /*
2 * Copyright 2013 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include <emmintrin.h>
9 #include "SkBitmap.h"
10 #include "SkBitmapFilter_opts_SSE2.h"
11 #include "SkBitmapProcState.h"
12 #include "SkColor.h"
13 #include "SkColorPriv.h"
14 #include "SkConvolver.h"
15 #include "SkShader.h"
16 #include "SkUnPreMultiply.h"
17
18 #if 0
19 static inline void print128i(__m128i value) {
20 int *v = (int*) &value;
21 printf("% .11d % .11d % .11d % .11d\n", v[0], v[1], v[2], v[3]);
22 }
23
24 static inline void print128i_16(__m128i value) {
25 short *v = (short*) &value;
26 printf("% .5d % .5d % .5d % .5d % .5d % .5d % .5d % .5d\n", v[0], v[1], v[2] , v[3], v[4], v[5], v[6], v[7]);
27 }
28
29 static inline void print128i_8(__m128i value) {
30 unsigned char *v = (unsigned char*) &value;
31 printf("%.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3u %.3 u %.3u %.3u\n",
32 v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7],
33 v[8], v[9], v[10], v[11], v[12], v[13], v[14], v[15]
34 );
35 }
36
37 static inline void print128f(__m128 value) {
38 float *f = (float*) &value;
39 printf("%3.4f %3.4f %3.4f %3.4f\n", f[0], f[1], f[2], f[3]);
40 }
41 #endif
42
43 static SK_ALWAYS_INLINE void accum_remainder(const unsigned char* pixels_left,
44 const SkConvolutionFilter1D::ConvolutionFixed* filter_values, __m128i& a ccum, int r) {
45 int remainder[4] = {0};
46 for (int i = 0; i < r; i++) {
47 SkConvolutionFilter1D::ConvolutionFixed coeff = filter_values[i];
48 remainder[0] += coeff * pixels_left[i * 4 + 0];
49 remainder[1] += coeff * pixels_left[i * 4 + 1];
50 remainder[2] += coeff * pixels_left[i * 4 + 2];
51 remainder[3] += coeff * pixels_left[i * 4 + 3];
52 }
53 __m128i t = _mm_setr_epi32(remainder[0], remainder[1], remainder[2], remaind er[3]);
54 accum = _mm_add_epi32(accum, t);
55 }
56
57 // Convolves horizontally along a single row. The row data is given in
58 // |src_data| and continues for the num_values() of the filter.
59 void convolveHorizontally_SSE2(const unsigned char* src_data,
60 const SkConvolutionFilter1D& filter,
61 unsigned char* out_row,
62 bool /*has_alpha*/) {
63 int num_values = filter.numValues();
64
65 int filter_offset, filter_length;
66 __m128i zero = _mm_setzero_si128();
67
68 // Output one pixel each iteration, calculating all channels (RGBA) together .
69 for (int out_x = 0; out_x < num_values; out_x++) {
70 const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
71 filter.FilterForValue(out_x, &filter_offset, &filter_length);
72
73 __m128i accum = _mm_setzero_si128();
74
75 // Compute the first pixel in this row that the filter affects. It will
76 // touch |filter_length| pixels (4 bytes each) after this.
77 const __m128i* row_to_filter =
78 reinterpret_cast<const __m128i*>(&src_data[filter_offset << 2]);
79
80 // We will load and accumulate with four coefficients per iteration.
81 for (int filter_x = 0; filter_x < filter_length >> 2; filter_x++) {
82
83 // Load 4 coefficients => duplicate 1st and 2nd of them for all chan nels.
84 __m128i coeff, coeff16;
85 // [16] xx xx xx xx c3 c2 c1 c0
86 coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_valu es));
87 // [16] xx xx xx xx c1 c1 c0 c0
88 coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
89 // [16] c1 c1 c1 c1 c0 c0 c0 c0
90 coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
91
92 // Load four pixels => unpack the first two pixels to 16 bits =>
93 // multiply with coefficients => accumulate the convolution result.
94 // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
95 __m128i src8 = _mm_loadu_si128(row_to_filter);
96 // [16] a1 b1 g1 r1 a0 b0 g0 r0
97 __m128i src16 = _mm_unpacklo_epi8(src8, zero);
98 __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
99 __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
100 // [32] a0*c0 b0*c0 g0*c0 r0*c0
101 __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
102 accum = _mm_add_epi32(accum, t);
103 // [32] a1*c1 b1*c1 g1*c1 r1*c1
104 t = _mm_unpackhi_epi16(mul_lo, mul_hi);
105 accum = _mm_add_epi32(accum, t);
106
107 // Duplicate 3rd and 4th coefficients for all channels =>
108 // unpack the 3rd and 4th pixels to 16 bits => multiply with coeffic ients
109 // => accumulate the convolution results.
110 // [16] xx xx xx xx c3 c3 c2 c2
111 coeff16 = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
112 // [16] c3 c3 c3 c3 c2 c2 c2 c2
113 coeff16 = _mm_unpacklo_epi16(coeff16, coeff16);
114 // [16] a3 g3 b3 r3 a2 g2 b2 r2
115 src16 = _mm_unpackhi_epi8(src8, zero);
116 mul_hi = _mm_mulhi_epi16(src16, coeff16);
117 mul_lo = _mm_mullo_epi16(src16, coeff16);
118 // [32] a2*c2 b2*c2 g2*c2 r2*c2
119 t = _mm_unpacklo_epi16(mul_lo, mul_hi);
120 accum = _mm_add_epi32(accum, t);
121 // [32] a3*c3 b3*c3 g3*c3 r3*c3
122 t = _mm_unpackhi_epi16(mul_lo, mul_hi);
123 accum = _mm_add_epi32(accum, t);
124
125 // Advance the pixel and coefficients pointers.
126 row_to_filter += 1;
127 filter_values += 4;
128 }
129
130 // When |filter_length| is not divisible by 4, we accumulate the last 1 - 3
131 // coefficients one at a time.
132 int r = filter_length & 3;
133 if (r) {
134 int remainder_offset = (filter_offset + filter_length - r) * 4;
135 accum_remainder(src_data + remainder_offset, filter_values, accum, r );
136 }
137
138 // Shift right for fixed point implementation.
139 accum = _mm_srai_epi32(accum, SkConvolutionFilter1D::kShiftBits);
140
141 // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
142 accum = _mm_packs_epi32(accum, zero);
143 // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
144 accum = _mm_packus_epi16(accum, zero);
145
146 // Store the pixel value of 32 bits.
147 *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum);
148 out_row += 4;
149 }
150 }
151
152 // Convolves horizontally along four rows. The row data is given in
153 // |src_data| and continues for the num_values() of the filter.
154 // The algorithm is almost same as |ConvolveHorizontally_SSE2|. Please
155 // refer to that function for detailed comments.
156 void convolve4RowsHorizontally_SSE2(const unsigned char* src_data[4],
157 const SkConvolutionFilter1D& filter,
158 unsigned char* out_row[4],
159 size_t outRowBytes) {
160 SkDEBUGCODE(const unsigned char* out_row_0_start = out_row[0];)
161
162 int num_values = filter.numValues();
163
164 int filter_offset, filter_length;
165 __m128i zero = _mm_setzero_si128();
166
167 // Output one pixel each iteration, calculating all channels (RGBA) together .
168 for (int out_x = 0; out_x < num_values; out_x++) {
169 const SkConvolutionFilter1D::ConvolutionFixed* filter_values =
170 filter.FilterForValue(out_x, &filter_offset, &filter_length);
171
172 // four pixels in a column per iteration.
173 __m128i accum0 = _mm_setzero_si128();
174 __m128i accum1 = _mm_setzero_si128();
175 __m128i accum2 = _mm_setzero_si128();
176 __m128i accum3 = _mm_setzero_si128();
177 int start = (filter_offset<<2);
178 // We will load and accumulate with four coefficients per iteration.
179 for (int filter_x = 0; filter_x < (filter_length >> 2); filter_x++) {
180 __m128i coeff, coeff16lo, coeff16hi;
181 // [16] xx xx xx xx c3 c2 c1 c0
182 coeff = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(filter_valu es));
183 // [16] xx xx xx xx c1 c1 c0 c0
184 coeff16lo = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(1, 1, 0, 0));
185 // [16] c1 c1 c1 c1 c0 c0 c0 c0
186 coeff16lo = _mm_unpacklo_epi16(coeff16lo, coeff16lo);
187 // [16] xx xx xx xx c3 c3 c2 c2
188 coeff16hi = _mm_shufflelo_epi16(coeff, _MM_SHUFFLE(3, 3, 2, 2));
189 // [16] c3 c3 c3 c3 c2 c2 c2 c2
190 coeff16hi = _mm_unpacklo_epi16(coeff16hi, coeff16hi);
191
192 __m128i src8, src16, mul_hi, mul_lo, t;
193
194 #define ITERATION(src, accum) \
195 src8 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)); \
196 src16 = _mm_unpacklo_epi8(src8, zero); \
197 mul_hi = _mm_mulhi_epi16(src16, coeff16lo); \
198 mul_lo = _mm_mullo_epi16(src16, coeff16lo); \
199 t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
200 accum = _mm_add_epi32(accum, t); \
201 t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
202 accum = _mm_add_epi32(accum, t); \
203 src16 = _mm_unpackhi_epi8(src8, zero); \
204 mul_hi = _mm_mulhi_epi16(src16, coeff16hi); \
205 mul_lo = _mm_mullo_epi16(src16, coeff16hi); \
206 t = _mm_unpacklo_epi16(mul_lo, mul_hi); \
207 accum = _mm_add_epi32(accum, t); \
208 t = _mm_unpackhi_epi16(mul_lo, mul_hi); \
209 accum = _mm_add_epi32(accum, t)
210
211 ITERATION(src_data[0] + start, accum0);
212 ITERATION(src_data[1] + start, accum1);
213 ITERATION(src_data[2] + start, accum2);
214 ITERATION(src_data[3] + start, accum3);
215
216 start += 16;
217 filter_values += 4;
218 }
219
220 int r = filter_length & 3;
221 if (r) {
222 int remainder_offset = (filter_offset + filter_length - r) * 4;
223 accum_remainder(src_data[0] + remainder_offset, filter_values, accum 0, r);
224 accum_remainder(src_data[1] + remainder_offset, filter_values, accum 1, r);
225 accum_remainder(src_data[2] + remainder_offset, filter_values, accum 2, r);
226 accum_remainder(src_data[3] + remainder_offset, filter_values, accum 3, r);
227 }
228
229 accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
230 accum0 = _mm_packs_epi32(accum0, zero);
231 accum0 = _mm_packus_epi16(accum0, zero);
232 accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
233 accum1 = _mm_packs_epi32(accum1, zero);
234 accum1 = _mm_packus_epi16(accum1, zero);
235 accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
236 accum2 = _mm_packs_epi32(accum2, zero);
237 accum2 = _mm_packus_epi16(accum2, zero);
238 accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
239 accum3 = _mm_packs_epi32(accum3, zero);
240 accum3 = _mm_packus_epi16(accum3, zero);
241
242 // We seem to be running off the edge here (chromium:491660).
243 SkASSERT(((size_t)out_row[0] - (size_t)out_row_0_start) < outRowBytes);
244
245 *(reinterpret_cast<int*>(out_row[0])) = _mm_cvtsi128_si32(accum0);
246 *(reinterpret_cast<int*>(out_row[1])) = _mm_cvtsi128_si32(accum1);
247 *(reinterpret_cast<int*>(out_row[2])) = _mm_cvtsi128_si32(accum2);
248 *(reinterpret_cast<int*>(out_row[3])) = _mm_cvtsi128_si32(accum3);
249
250 out_row[0] += 4;
251 out_row[1] += 4;
252 out_row[2] += 4;
253 out_row[3] += 4;
254 }
255 }
256
257 // Does vertical convolution to produce one output row. The filter values and
258 // length are given in the first two parameters. These are applied to each
259 // of the rows pointed to in the |source_data_rows| array, with each row
260 // being |pixel_width| wide.
261 //
262 // The output must have room for |pixel_width * 4| bytes.
263 template<bool has_alpha>
264 void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filt er_values,
265 int filter_length,
266 unsigned char* const* source_data_rows,
267 int pixel_width,
268 unsigned char* out_row) {
269 int width = pixel_width & ~3;
270
271 __m128i zero = _mm_setzero_si128();
272 __m128i accum0, accum1, accum2, accum3, coeff16;
273 const __m128i* src;
274 // Output four pixels per iteration (16 bytes).
275 for (int out_x = 0; out_x < width; out_x += 4) {
276
277 // Accumulated result for each pixel. 32 bits per RGBA channel.
278 accum0 = _mm_setzero_si128();
279 accum1 = _mm_setzero_si128();
280 accum2 = _mm_setzero_si128();
281 accum3 = _mm_setzero_si128();
282
283 // Convolve with one filter coefficient per iteration.
284 for (int filter_y = 0; filter_y < filter_length; filter_y++) {
285
286 // Duplicate the filter coefficient 8 times.
287 // [16] cj cj cj cj cj cj cj cj
288 coeff16 = _mm_set1_epi16(filter_values[filter_y]);
289
290 // Load four pixels (16 bytes) together.
291 // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
292 src = reinterpret_cast<const __m128i*>(
293 &source_data_rows[filter_y][out_x << 2]);
294 __m128i src8 = _mm_loadu_si128(src);
295
296 // Unpack 1st and 2nd pixels from 8 bits to 16 bits for each channel s =>
297 // multiply with current coefficient => accumulate the result.
298 // [16] a1 b1 g1 r1 a0 b0 g0 r0
299 __m128i src16 = _mm_unpacklo_epi8(src8, zero);
300 __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
301 __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
302 // [32] a0 b0 g0 r0
303 __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
304 accum0 = _mm_add_epi32(accum0, t);
305 // [32] a1 b1 g1 r1
306 t = _mm_unpackhi_epi16(mul_lo, mul_hi);
307 accum1 = _mm_add_epi32(accum1, t);
308
309 // Unpack 3rd and 4th pixels from 8 bits to 16 bits for each channel s =>
310 // multiply with current coefficient => accumulate the result.
311 // [16] a3 b3 g3 r3 a2 b2 g2 r2
312 src16 = _mm_unpackhi_epi8(src8, zero);
313 mul_hi = _mm_mulhi_epi16(src16, coeff16);
314 mul_lo = _mm_mullo_epi16(src16, coeff16);
315 // [32] a2 b2 g2 r2
316 t = _mm_unpacklo_epi16(mul_lo, mul_hi);
317 accum2 = _mm_add_epi32(accum2, t);
318 // [32] a3 b3 g3 r3
319 t = _mm_unpackhi_epi16(mul_lo, mul_hi);
320 accum3 = _mm_add_epi32(accum3, t);
321 }
322
323 // Shift right for fixed point implementation.
324 accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
325 accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
326 accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
327 accum3 = _mm_srai_epi32(accum3, SkConvolutionFilter1D::kShiftBits);
328
329 // Packing 32 bits |accum| to 16 bits per channel (signed saturation).
330 // [16] a1 b1 g1 r1 a0 b0 g0 r0
331 accum0 = _mm_packs_epi32(accum0, accum1);
332 // [16] a3 b3 g3 r3 a2 b2 g2 r2
333 accum2 = _mm_packs_epi32(accum2, accum3);
334
335 // Packing 16 bits |accum| to 8 bits per channel (unsigned saturation).
336 // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
337 accum0 = _mm_packus_epi16(accum0, accum2);
338
339 if (has_alpha) {
340 // Compute the max(ri, gi, bi) for each pixel.
341 // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
342 __m128i a = _mm_srli_epi32(accum0, 8);
343 // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
344 __m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
345 // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
346 a = _mm_srli_epi32(accum0, 16);
347 // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
348 b = _mm_max_epu8(a, b); // Max of r and g and b.
349 // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
350 b = _mm_slli_epi32(b, 24);
351
352 // Make sure the value of alpha channel is always larger than maximu m
353 // value of color channels.
354 accum0 = _mm_max_epu8(b, accum0);
355 } else {
356 // Set value of alpha channels to 0xFF.
357 __m128i mask = _mm_set1_epi32(0xff000000);
358 accum0 = _mm_or_si128(accum0, mask);
359 }
360
361 // Store the convolution result (16 bytes) and advance the pixel pointer s.
362 _mm_storeu_si128(reinterpret_cast<__m128i*>(out_row), accum0);
363 out_row += 16;
364 }
365
366 // When the width of the output is not divisible by 4, We need to save one
367 // pixel (4 bytes) each time. And also the fourth pixel is always absent.
368 if (pixel_width & 3) {
369 accum0 = _mm_setzero_si128();
370 accum1 = _mm_setzero_si128();
371 accum2 = _mm_setzero_si128();
372 for (int filter_y = 0; filter_y < filter_length; ++filter_y) {
373 coeff16 = _mm_set1_epi16(filter_values[filter_y]);
374 // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
375 src = reinterpret_cast<const __m128i*>(
376 &source_data_rows[filter_y][width<<2]);
377 __m128i src8 = _mm_loadu_si128(src);
378 // [16] a1 b1 g1 r1 a0 b0 g0 r0
379 __m128i src16 = _mm_unpacklo_epi8(src8, zero);
380 __m128i mul_hi = _mm_mulhi_epi16(src16, coeff16);
381 __m128i mul_lo = _mm_mullo_epi16(src16, coeff16);
382 // [32] a0 b0 g0 r0
383 __m128i t = _mm_unpacklo_epi16(mul_lo, mul_hi);
384 accum0 = _mm_add_epi32(accum0, t);
385 // [32] a1 b1 g1 r1
386 t = _mm_unpackhi_epi16(mul_lo, mul_hi);
387 accum1 = _mm_add_epi32(accum1, t);
388 // [16] a3 b3 g3 r3 a2 b2 g2 r2
389 src16 = _mm_unpackhi_epi8(src8, zero);
390 mul_hi = _mm_mulhi_epi16(src16, coeff16);
391 mul_lo = _mm_mullo_epi16(src16, coeff16);
392 // [32] a2 b2 g2 r2
393 t = _mm_unpacklo_epi16(mul_lo, mul_hi);
394 accum2 = _mm_add_epi32(accum2, t);
395 }
396
397 accum0 = _mm_srai_epi32(accum0, SkConvolutionFilter1D::kShiftBits);
398 accum1 = _mm_srai_epi32(accum1, SkConvolutionFilter1D::kShiftBits);
399 accum2 = _mm_srai_epi32(accum2, SkConvolutionFilter1D::kShiftBits);
400 // [16] a1 b1 g1 r1 a0 b0 g0 r0
401 accum0 = _mm_packs_epi32(accum0, accum1);
402 // [16] a3 b3 g3 r3 a2 b2 g2 r2
403 accum2 = _mm_packs_epi32(accum2, zero);
404 // [8] a3 b3 g3 r3 a2 b2 g2 r2 a1 b1 g1 r1 a0 b0 g0 r0
405 accum0 = _mm_packus_epi16(accum0, accum2);
406 if (has_alpha) {
407 // [8] xx a3 b3 g3 xx a2 b2 g2 xx a1 b1 g1 xx a0 b0 g0
408 __m128i a = _mm_srli_epi32(accum0, 8);
409 // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
410 __m128i b = _mm_max_epu8(a, accum0); // Max of r and g.
411 // [8] xx xx a3 b3 xx xx a2 b2 xx xx a1 b1 xx xx a0 b0
412 a = _mm_srli_epi32(accum0, 16);
413 // [8] xx xx xx max3 xx xx xx max2 xx xx xx max1 xx xx xx max0
414 b = _mm_max_epu8(a, b); // Max of r and g and b.
415 // [8] max3 00 00 00 max2 00 00 00 max1 00 00 00 max0 00 00 00
416 b = _mm_slli_epi32(b, 24);
417 accum0 = _mm_max_epu8(b, accum0);
418 } else {
419 __m128i mask = _mm_set1_epi32(0xff000000);
420 accum0 = _mm_or_si128(accum0, mask);
421 }
422
423 for (int out_x = width; out_x < pixel_width; out_x++) {
424 *(reinterpret_cast<int*>(out_row)) = _mm_cvtsi128_si32(accum0);
425 accum0 = _mm_srli_si128(accum0, 4);
426 out_row += 4;
427 }
428 }
429 }
430
431 void convolveVertically_SSE2(const SkConvolutionFilter1D::ConvolutionFixed* filt er_values,
432 int filter_length,
433 unsigned char* const* source_data_rows,
434 int pixel_width,
435 unsigned char* out_row,
436 bool has_alpha) {
437 if (has_alpha) {
438 convolveVertically_SSE2<true>(filter_values,
439 filter_length,
440 source_data_rows,
441 pixel_width,
442 out_row);
443 } else {
444 convolveVertically_SSE2<false>(filter_values,
445 filter_length,
446 source_data_rows,
447 pixel_width,
448 out_row);
449 }
450 }
OLDNEW
« no previous file with comments | « src/opts/SkBitmapFilter_opts_SSE2.h ('k') | src/opts/SkBitmapProcState_arm_neon.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698