Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Side by Side Diff: src/opts/SkBitmapProcState_matrix_neon.h

Issue 23835006: ARM Skia NEON patches - 20 - New improved BitmapProcState code (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: Rebase to remove conflicts on ignored-tests.txt Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1
2 #include <arm_neon.h>
3
4
5 #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale)
6 #define SCALE_FILTER_NAME MAKENAME(_filter_scale)
7 #define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine)
8 #define AFFINE_FILTER_NAME MAKENAME(_filter_affine)
9 #define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp)
10 #define PERSP_FILTER_NAME MAKENAME(_filter_persp)
11
12 #define PACK_FILTER_X_NAME MAKENAME(_pack_filter_x)
13 #define PACK_FILTER_Y_NAME MAKENAME(_pack_filter_y)
14 #define PACK_FILTER_X4_NAME MAKENAME(_pack_filter_x4)
15 #define PACK_FILTER_Y4_NAME MAKENAME(_pack_filter_y4)
16
17 #ifndef PREAMBLE
18 #define PREAMBLE(state)
19 #define PREAMBLE_PARAM_X
20 #define PREAMBLE_PARAM_Y
21 #define PREAMBLE_ARG_X
22 #define PREAMBLE_ARG_Y
23 #endif
24
25 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
26 uint32_t xy[], int count, int x, int y) {
27 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
28 SkMatrix::kScale_Mask)) == 0);
29
30 PREAMBLE(s);
31
32 // we store y, x, x, x, x, x
33 const unsigned maxX = s.fBitmap->width() - 1;
34 SkFixed fx;
35 {
36 SkPoint pt;
37 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
38 SkIntToScalar(y) + SK_ScalarHalf, &pt);
39 fx = SkScalarToFixed(pt.fY);
40 const unsigned maxY = s.fBitmap->height() - 1;
41 *xy++ = TILEY_PROCF(fx, maxY);
42 fx = SkScalarToFixed(pt.fX);
43 }
44
45 if (0 == maxX) {
46 // all of the following X values must be 0
47 memset(xy, 0, count * sizeof(uint16_t));
48 return;
49 }
50
51 const SkFixed dx = s.fInvSx;
52
53 #ifdef CHECK_FOR_DECAL
54 // test if we don't need to apply the tile proc
55 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
56 decal_nofilter_scale_neon(xy, fx, dx, count);
57 return;
58 }
59 #endif
60
61 if (count >= 8) {
62 SkFixed dx2 = dx+dx;
63 SkFixed dx4 = dx2+dx2;
64 SkFixed dx8 = dx4+dx4;
65
66 // now build fx/fx+dx/fx+2dx/fx+3dx
67 SkFixed fx1, fx2, fx3;
68 int32x4_t lbase, hbase;
69 int16_t *dst16 = (int16_t *)xy;
70
71 fx1 = fx+dx;
72 fx2 = fx1+dx;
73 fx3 = fx2+dx;
74
75 lbase = vdupq_n_s32(fx);
76 lbase = vsetq_lane_s32(fx1, lbase, 1);
77 lbase = vsetq_lane_s32(fx2, lbase, 2);
78 lbase = vsetq_lane_s32(fx3, lbase, 3);
79 hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
80
81 // store & bump
82 while (count >= 8) {
83
84 int16x8_t fx8;
85
86 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
87
88 vst1q_s16(dst16, fx8);
89
90 // but preserving base & on to the next
91 lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
92 hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
93 dst16 += 8;
94 count -= 8;
95 fx += dx8;
96 };
97 xy = (uint32_t *) dst16;
98 }
99
100 uint16_t* xx = (uint16_t*)xy;
101 for (int i = count; i > 0; --i) {
102 *xx++ = TILEX_PROCF(fx, maxX);
103 fx += dx;
104 }
105 }
106
107 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
108 uint32_t xy[], int count, int x, int y) {
109 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
110 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
111 SkMatrix::kScale_Mask |
112 SkMatrix::kAffine_Mask)) == 0);
113
114 PREAMBLE(s);
115 SkPoint srcPt;
116 s.fInvProc(s.fInvMatrix,
117 SkIntToScalar(x) + SK_ScalarHalf,
118 SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
119
120 SkFixed fx = SkScalarToFixed(srcPt.fX);
121 SkFixed fy = SkScalarToFixed(srcPt.fY);
122 SkFixed dx = s.fInvSx;
123 SkFixed dy = s.fInvKy;
124 int maxX = s.fBitmap->width() - 1;
125 int maxY = s.fBitmap->height() - 1;
126
127 if (count >= 8) {
128 SkFixed dx4 = dx * 4;
129 SkFixed dy4 = dy * 4;
130 SkFixed dx8 = dx * 8;
131 SkFixed dy8 = dy * 8;
132
133 int32x4_t xbase, ybase;
134 int32x4_t x2base, y2base;
135 int16_t *dst16 = (int16_t *) xy;
136
137 // now build fx, fx+dx, fx+2dx, fx+3dx
138 xbase = vdupq_n_s32(fx);
139 xbase = vsetq_lane_s32(fx+dx, xbase, 1);
140 xbase = vsetq_lane_s32(fx+dx+dx, xbase, 2);
141 xbase = vsetq_lane_s32(fx+dx+dx+dx, xbase, 3);
142
143 // same for fy
144 ybase = vdupq_n_s32(fy);
145 ybase = vsetq_lane_s32(fy+dy, ybase, 1);
146 ybase = vsetq_lane_s32(fy+dy+dy, ybase, 2);
147 ybase = vsetq_lane_s32(fy+dy+dy+dy, ybase, 3);
148
149 x2base = vaddq_s32(xbase, vdupq_n_s32(dx4));
150 y2base = vaddq_s32(ybase, vdupq_n_s32(dy4));
151
152 // store & bump
153 do {
154 int16x8x2_t hi16;
155
156 hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX);
157 hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY);
158
159 vst2q_s16(dst16, hi16);
160
161 // moving base and on to the next
162 xbase = vaddq_s32(xbase, vdupq_n_s32(dx8));
163 ybase = vaddq_s32(ybase, vdupq_n_s32(dy8));
164 x2base = vaddq_s32(x2base, vdupq_n_s32(dx8));
165 y2base = vaddq_s32(y2base, vdupq_n_s32(dy8));
166
167 dst16 += 16; // 8x32 aka 16x16
168 count -= 8;
169 fx += dx8;
170 fy += dy8;
171 } while (count >= 8);
172 xy = (uint32_t *) dst16;
173 }
174
175 for (int i = count; i > 0; --i) {
176 *xy++ = (TILEY_PROCF(fy, maxY) << 16) | TILEX_PROCF(fx, maxX);
177 fx += dx; fy += dy;
178 }
179 }
180
181 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
182 uint32_t* SK_RESTRICT xy,
183 int count, int x, int y) {
184 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
185
186 PREAMBLE(s);
187 // max{X,Y} are int here, but later shown/assumed to fit in 16 bits
188 int maxX = s.fBitmap->width() - 1;
189 int maxY = s.fBitmap->height() - 1;
190
191 SkPerspIter iter(s.fInvMatrix,
192 SkIntToScalar(x) + SK_ScalarHalf,
193 SkIntToScalar(y) + SK_ScalarHalf, count);
194
195 while ((count = iter.next()) != 0) {
196 const SkFixed* SK_RESTRICT srcXY = iter.getXY();
197
198 if (count >= 8) {
199 int32_t *mysrc = (int32_t *) srcXY;
200 int16_t *mydst = (int16_t *) xy;
201 do {
202 int16x8x2_t hi16;
203 int32x4x2_t xy1, xy2;
204
205 xy1 = vld2q_s32(mysrc);
206 xy2 = vld2q_s32(mysrc+8);
207
208 hi16.val[0] = TILEX_PROCF_NEON8(xy1.val[0], xy2.val[0], maxX);
209 hi16.val[1] = TILEY_PROCF_NEON8(xy1.val[1], xy2.val[1], maxY);
210
211 vst2q_s16(mydst, hi16);
212
213 count -= 8; // 8 iterations
214 mysrc += 16; // 16 longs
215 mydst += 16; // 16 shorts, aka 8 longs
216 } while (count >= 8);
217 // get xy and srcXY fixed up
218 srcXY = (const SkFixed *) mysrc;
219 xy = (uint32_t *) mydst;
220 }
221
222 while (--count >= 0) {
223 *xy++ = (TILEY_PROCF(srcXY[1], maxY) << 16) |
224 TILEX_PROCF(srcXY[0], maxX);
225 srcXY += 2;
226 }
227 }
228 }
229
230 static inline uint32_t PACK_FILTER_Y_NAME(SkFixed f, unsigned max,
231 SkFixed one PREAMBLE_PARAM_Y) {
232 unsigned i = TILEY_PROCF(f, max);
233 i = (i << 4) | TILEY_LOW_BITS(f, max);
234 return (i << 14) | (TILEY_PROCF((f + one), max));
235 }
236
237 static inline uint32_t PACK_FILTER_X_NAME(SkFixed f, unsigned max,
238 SkFixed one PREAMBLE_PARAM_X) {
239 unsigned i = TILEX_PROCF(f, max);
240 i = (i << 4) | TILEX_LOW_BITS(f, max);
241 return (i << 14) | (TILEX_PROCF((f + one), max));
242 }
243
244 static inline int32x4_t PACK_FILTER_X4_NAME(int32x4_t f, unsigned max,
245 SkFixed one PREAMBLE_PARAM_X) {
246 int32x4_t ret, res, wide_one;
247
248 // Prepare constants
249 wide_one = vdupq_n_s32(one);
250
251 // Step 1
252 res = TILEX_PROCF_NEON4(f, max);
253
254 // Step 2
255 ret = TILEX_LOW_BITS_NEON4(f, max);
256 ret = vsliq_n_s32(ret, res, 4);
257
258 // Step 3
259 res = TILEX_PROCF_NEON4(f + wide_one, max);
260 ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
261
262 return ret;
263 }
264
265 static inline int32x4_t PACK_FILTER_Y4_NAME(int32x4_t f, unsigned max,
266 SkFixed one PREAMBLE_PARAM_X) {
267 int32x4_t ret, res, wide_one;
268
269 // Prepare constants
270 wide_one = vdupq_n_s32(one);
271
272 // Step 1
273 res = TILEY_PROCF_NEON4(f, max);
274
275 // Step 2
276 ret = TILEY_LOW_BITS_NEON4(f, max);
277 ret = vsliq_n_s32(ret, res, 4);
278
279 // Step 3
280 res = TILEY_PROCF_NEON4(f + wide_one, max);
281 ret = vorrq_s32(vshlq_n_s32(ret, 14), res);
282
283 return ret;
284 }
285
286 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
287 uint32_t xy[], int count, int x, int y) {
288 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
289 SkMatrix::kScale_Mask)) == 0);
290 SkASSERT(s.fInvKy == 0);
291
292 PREAMBLE(s);
293
294 const unsigned maxX = s.fBitmap->width() - 1;
295 const SkFixed one = s.fFilterOneX;
296 const SkFixed dx = s.fInvSx;
297 SkFixed fx;
298
299 {
300 SkPoint pt;
301 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
302 SkIntToScalar(y) + SK_ScalarHalf, &pt);
303 const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
304 const unsigned maxY = s.fBitmap->height() - 1;
305 // compute our two Y values up front
306 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
307 // now initialize fx
308 fx = SkScalarToFixed(pt.fX) - (one >> 1);
309 }
310
311 #ifdef CHECK_FOR_DECAL
312 // test if we don't need to apply the tile proc
313 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
314 decal_filter_scale_neon(xy, fx, dx, count);
315 return;
316 }
317 #endif
318 {
319
320 if (count >= 4) {
321 int32x4_t wide_fx;
322
323 wide_fx = vdupq_n_s32(fx);
324 wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
325 wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
326 wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
327
328 while (count >= 4) {
329 int32x4_t res;
330
331 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
332
333 vst1q_u32(xy, vreinterpretq_u32_s32(res));
334
335 wide_fx += vdupq_n_s32(dx+dx+dx+dx);
336 fx += dx+dx+dx+dx;
337 xy += 4;
338 count -= 4;
339 }
340 }
341
342 while (--count >= 0) {
343 *xy++ = PACK_FILTER_X_NAME(fx, maxX, one PREAMBLE_ARG_X);
344 fx += dx;
345 }
346
347 }
348 }
349
350 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s,
351 uint32_t xy[], int count, int x, int y) {
352 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
353 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
354 SkMatrix::kScale_Mask |
355 SkMatrix::kAffine_Mask)) == 0);
356
357 PREAMBLE(s);
358 SkPoint srcPt;
359 s.fInvProc(s.fInvMatrix,
360 SkIntToScalar(x) + SK_ScalarHalf,
361 SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
362
363 SkFixed oneX = s.fFilterOneX;
364 SkFixed oneY = s.fFilterOneY;
365 SkFixed fx = SkScalarToFixed(srcPt.fX) - (oneX >> 1);
366 SkFixed fy = SkScalarToFixed(srcPt.fY) - (oneY >> 1);
367 SkFixed dx = s.fInvSx;
368 SkFixed dy = s.fInvKy;
369 unsigned maxX = s.fBitmap->width() - 1;
370 unsigned maxY = s.fBitmap->height() - 1;
371
372 if (count >= 4) {
373 int32x4_t wide_fy, wide_fx;
374
375 wide_fx = vdupq_n_s32(fx);
376 wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
377 wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
378 wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
379
380 wide_fy = vdupq_n_s32(fy);
381 wide_fy = vsetq_lane_s32(fy+dy, wide_fy, 1);
382 wide_fy = vsetq_lane_s32(fy+dy+dy, wide_fy, 2);
383 wide_fy = vsetq_lane_s32(fy+dy+dy+dy, wide_fy, 3);
384
385 while (count >= 4) {
386 int32x4x2_t vxy;
387
388 // do the X side, then the Y side, then interleave them
389 vxy.val[0] = PACK_FILTER_Y4_NAME(wide_fy, maxY, oneY PREAMBLE_ARG_Y) ;
390 vxy.val[1] = PACK_FILTER_X4_NAME(wide_fx, maxX, oneX PREAMBLE_ARG_X) ;
391
392 // interleave as YXYXYXYX as part of the storing
393 vst2q_s32((int32_t*)xy, vxy);
394
395 // prepare next iteration
396 wide_fx += vdupq_n_s32(dx+dx+dx+dx);
397 fx += dx + dx + dx + dx;
398 wide_fy += vdupq_n_s32(dy+dy+dy+dy);
399 fy += dy+dy+dy+dy;
400 xy += 8; // 4 x's, 4 y's
401 count -= 4;
402 }
403 }
404
405 while (--count >= 0) {
406 // NB: writing Y/X
407 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, oneY PREAMBLE_ARG_Y);
408 fy += dy;
409 *xy++ = PACK_FILTER_X_NAME(fx, maxX, oneX PREAMBLE_ARG_X);
410 fx += dx;
411 }
412 }
413
414 static void PERSP_FILTER_NAME(const SkBitmapProcState& s,
415 uint32_t* SK_RESTRICT xy, int count,
416 int x, int y) {
417 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
418
419 PREAMBLE(s);
420 unsigned maxX = s.fBitmap->width() - 1;
421 unsigned maxY = s.fBitmap->height() - 1;
422 SkFixed oneX = s.fFilterOneX;
423 SkFixed oneY = s.fFilterOneY;
424
425 SkPerspIter iter(s.fInvMatrix,
426 SkIntToScalar(x) + SK_ScalarHalf,
427 SkIntToScalar(y) + SK_ScalarHalf, count);
428
429 while ((count = iter.next()) != 0) {
430 const SkFixed* SK_RESTRICT srcXY = iter.getXY();
431
432 while (count >= 4) {
433 int32x4_t wide_x, wide_y;
434 int32x4x2_t vxy, vresyx;
435
436 // load src: x-y-x-y-x-y-x-y
437 vxy = vld2q_s32(srcXY);
438
439 // do the X side, then the Y side, then interleave them
440 wide_x = vsubq_s32(vxy.val[0], vdupq_n_s32(oneX>>1));
441 wide_y = vsubq_s32(vxy.val[1], vdupq_n_s32(oneY>>1));
442
443 vresyx.val[0] = PACK_FILTER_Y4_NAME(wide_y, maxY, oneY PREAMBLE_ARG_ Y);
444 vresyx.val[1] = PACK_FILTER_X4_NAME(wide_x, maxX, oneX PREAMBLE_ARG_ X);
445
446 // store interleaved as y-x-y-x-y-x-y-x (NB != read order)
447 vst2q_s32((int32_t*)xy, vresyx);
448
449 // on to the next iteration
450 srcXY += 2*4;
451 count -= 4;
452 xy += 2*4;
453 }
454
455 while (--count >= 0) {
456 // NB: we read x/y, we write y/x
457 *xy++ = PACK_FILTER_Y_NAME(srcXY[1] - (oneY >> 1), maxY,
458 oneY PREAMBLE_ARG_Y);
459 *xy++ = PACK_FILTER_X_NAME(srcXY[0] - (oneX >> 1), maxX,
460 oneX PREAMBLE_ARG_X);
461 srcXY += 2;
462 }
463 }
464 }
465
466 const SkBitmapProcState::MatrixProc MAKENAME(_Procs)[] = {
467 SCALE_NOFILTER_NAME,
468 SCALE_FILTER_NAME,
469 AFFINE_NOFILTER_NAME,
470 AFFINE_FILTER_NAME,
471 PERSP_NOFILTER_NAME,
472 PERSP_FILTER_NAME
473 };
474
475 #undef TILEX_PROCF_NEON8
476 #undef TILEY_PROCF_NEON8
477 #undef TILEX_PROCF_NEON4
478 #undef TILEY_PROCF_NEON4
479 #undef TILEX_LOW_BITS_NEON4
480 #undef TILEY_LOW_BITS_NEON4
481
482 #undef MAKENAME
483 #undef TILEX_PROCF
484 #undef TILEY_PROCF
485 #ifdef CHECK_FOR_DECAL
486 #undef CHECK_FOR_DECAL
487 #endif
488
489 #undef SCALE_NOFILTER_NAME
490 #undef SCALE_FILTER_NAME
491 #undef AFFINE_NOFILTER_NAME
492 #undef AFFINE_FILTER_NAME
493 #undef PERSP_NOFILTER_NAME
494 #undef PERSP_FILTER_NAME
495
496 #undef PREAMBLE
497 #undef PREAMBLE_PARAM_X
498 #undef PREAMBLE_PARAM_Y
499 #undef PREAMBLE_ARG_X
500 #undef PREAMBLE_ARG_Y
501
502 #undef TILEX_LOW_BITS
503 #undef TILEY_LOW_BITS
504
OLDNEW
« no previous file with comments | « src/opts/SkBitmapProcState_matrix_clamp_neon.h ('k') | src/opts/SkBitmapProcState_matrix_repeat_neon.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698