OLD | NEW |
1 | 1 |
2 #include <arm_neon.h> | 2 #include <arm_neon.h> |
3 | 3 |
4 | 4 |
5 #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale) | 5 #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale) |
6 #define SCALE_FILTER_NAME MAKENAME(_filter_scale) | 6 #define SCALE_FILTER_NAME MAKENAME(_filter_scale) |
7 #define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine) | 7 #define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine) |
8 #define AFFINE_FILTER_NAME MAKENAME(_filter_affine) | 8 #define AFFINE_FILTER_NAME MAKENAME(_filter_affine) |
9 #define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp) | 9 #define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp) |
10 #define PERSP_FILTER_NAME MAKENAME(_filter_persp) | 10 #define PERSP_FILTER_NAME MAKENAME(_filter_persp) |
(...skipping 13 matching lines...) Expand all Loading... |
24 | 24 |
25 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, | 25 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, |
26 uint32_t xy[], int count, int x, int y) { | 26 uint32_t xy[], int count, int x, int y) { |
27 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | | 27 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | |
28 SkMatrix::kScale_Mask)) == 0); | 28 SkMatrix::kScale_Mask)) == 0); |
29 | 29 |
30 PREAMBLE(s); | 30 PREAMBLE(s); |
31 | 31 |
32 // we store y, x, x, x, x, x | 32 // we store y, x, x, x, x, x |
33 const unsigned maxX = s.fBitmap->width() - 1; | 33 const unsigned maxX = s.fBitmap->width() - 1; |
34 SkFixed fx; | 34 SkFractionalInt fx; |
35 { | 35 { |
36 SkPoint pt; | 36 SkPoint pt; |
37 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, | 37 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, |
38 SkIntToScalar(y) + SK_ScalarHalf, &pt); | 38 SkIntToScalar(y) + SK_ScalarHalf, &pt); |
39 fx = SkScalarToFixed(pt.fY); | 39 fx = SkScalarToFractionalInt(pt.fY); |
40 const unsigned maxY = s.fBitmap->height() - 1; | 40 const unsigned maxY = s.fBitmap->height() - 1; |
41 *xy++ = TILEY_PROCF(fx, maxY); | 41 *xy++ = TILEY_PROCF(SkFractionalIntToFixed(fx), maxY); |
42 fx = SkScalarToFixed(pt.fX); | 42 fx = SkScalarToFractionalInt(pt.fX); |
43 } | 43 } |
44 | 44 |
45 if (0 == maxX) { | 45 if (0 == maxX) { |
46 // all of the following X values must be 0 | 46 // all of the following X values must be 0 |
47 memset(xy, 0, count * sizeof(uint16_t)); | 47 memset(xy, 0, count * sizeof(uint16_t)); |
48 return; | 48 return; |
49 } | 49 } |
50 | 50 |
51 const SkFixed dx = s.fInvSx; | 51 const SkFractionalInt dx = s.fInvSxFractionalInt; |
52 | 52 |
53 #ifdef CHECK_FOR_DECAL | 53 #ifdef CHECK_FOR_DECAL |
54 // test if we don't need to apply the tile proc | 54 // test if we don't need to apply the tile proc |
55 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) { | 55 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) { |
56 decal_nofilter_scale_neon(xy, fx, dx, count); | 56 decal_nofilter_scale_neon(xy, SkFractionalIntToFixed(fx), |
| 57 SkFractionalIntToFixed(dx), count); |
57 return; | 58 return; |
58 } | 59 } |
59 #endif | 60 #endif |
60 | 61 |
61 if (count >= 8) { | 62 if (count >= 8) { |
62 SkFixed dx2 = dx+dx; | 63 SkFractionalInt dx2 = dx+dx; |
63 SkFixed dx4 = dx2+dx2; | 64 SkFractionalInt dx4 = dx2+dx2; |
64 SkFixed dx8 = dx4+dx4; | 65 SkFractionalInt dx8 = dx4+dx4; |
65 | 66 |
66 // now build fx/fx+dx/fx+2dx/fx+3dx | 67 // now build fx/fx+dx/fx+2dx/fx+3dx |
67 SkFixed fx1, fx2, fx3; | 68 SkFractionalInt fx1, fx2, fx3; |
68 int32x4_t lbase, hbase; | 69 int32x4_t lbase, hbase; |
69 int16_t *dst16 = (int16_t *)xy; | 70 int16_t *dst16 = (int16_t *)xy; |
70 | 71 |
71 fx1 = fx+dx; | 72 fx1 = fx+dx; |
72 fx2 = fx1+dx; | 73 fx2 = fx1+dx; |
73 fx3 = fx2+dx; | 74 fx3 = fx2+dx; |
74 | 75 |
75 lbase = vdupq_n_s32(fx); | 76 lbase = vdupq_n_s32(SkFractionalIntToFixed(fx)); |
76 lbase = vsetq_lane_s32(fx1, lbase, 1); | 77 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1); |
77 lbase = vsetq_lane_s32(fx2, lbase, 2); | 78 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2); |
78 lbase = vsetq_lane_s32(fx3, lbase, 3); | 79 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3); |
79 hbase = vaddq_s32(lbase, vdupq_n_s32(dx4)); | 80 hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4))); |
80 | 81 |
81 // store & bump | 82 // store & bump |
82 while (count >= 8) { | 83 while (count >= 8) { |
83 | 84 |
84 int16x8_t fx8; | 85 int16x8_t fx8; |
85 | 86 |
86 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX); | 87 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX); |
87 | 88 |
88 vst1q_s16(dst16, fx8); | 89 vst1q_s16(dst16, fx8); |
89 | 90 |
90 // but preserving base & on to the next | 91 // but preserving base & on to the next |
91 lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8)); | 92 lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8))); |
92 hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8)); | 93 hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8))); |
93 dst16 += 8; | 94 dst16 += 8; |
94 count -= 8; | 95 count -= 8; |
95 fx += dx8; | 96 fx += dx8; |
96 }; | 97 }; |
97 xy = (uint32_t *) dst16; | 98 xy = (uint32_t *) dst16; |
98 } | 99 } |
99 | 100 |
100 uint16_t* xx = (uint16_t*)xy; | 101 uint16_t* xx = (uint16_t*)xy; |
101 for (int i = count; i > 0; --i) { | 102 for (int i = count; i > 0; --i) { |
102 *xx++ = TILEX_PROCF(fx, maxX); | 103 *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX); |
103 fx += dx; | 104 fx += dx; |
104 } | 105 } |
105 } | 106 } |
106 | 107 |
107 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s, | 108 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s, |
108 uint32_t xy[], int count, int x, int y) { | 109 uint32_t xy[], int count, int x, int y) { |
109 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask); | 110 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask); |
110 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | | 111 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | |
111 SkMatrix::kScale_Mask | | 112 SkMatrix::kScale_Mask | |
112 SkMatrix::kAffine_Mask)) == 0); | 113 SkMatrix::kAffine_Mask)) == 0); |
113 | 114 |
114 PREAMBLE(s); | 115 PREAMBLE(s); |
115 SkPoint srcPt; | 116 SkPoint srcPt; |
116 s.fInvProc(s.fInvMatrix, | 117 s.fInvProc(s.fInvMatrix, |
117 SkIntToScalar(x) + SK_ScalarHalf, | 118 SkIntToScalar(x) + SK_ScalarHalf, |
118 SkIntToScalar(y) + SK_ScalarHalf, &srcPt); | 119 SkIntToScalar(y) + SK_ScalarHalf, &srcPt); |
119 | 120 |
120 SkFixed fx = SkScalarToFixed(srcPt.fX); | 121 SkFractionalInt fx = SkScalarToFractionalInt(srcPt.fX); |
121 SkFixed fy = SkScalarToFixed(srcPt.fY); | 122 SkFractionalInt fy = SkScalarToFractionalInt(srcPt.fY); |
122 SkFixed dx = s.fInvSx; | 123 SkFractionalInt dx = s.fInvSxFractionalInt; |
123 SkFixed dy = s.fInvKy; | 124 SkFractionalInt dy = s.fInvKyFractionalInt; |
124 int maxX = s.fBitmap->width() - 1; | 125 int maxX = s.fBitmap->width() - 1; |
125 int maxY = s.fBitmap->height() - 1; | 126 int maxY = s.fBitmap->height() - 1; |
126 | 127 |
127 if (count >= 8) { | 128 if (count >= 8) { |
128 SkFixed dx4 = dx * 4; | 129 SkFractionalInt dx4 = dx * 4; |
129 SkFixed dy4 = dy * 4; | 130 SkFractionalInt dy4 = dy * 4; |
130 SkFixed dx8 = dx * 8; | 131 SkFractionalInt dx8 = dx * 8; |
131 SkFixed dy8 = dy * 8; | 132 SkFractionalInt dy8 = dy * 8; |
132 | 133 |
133 int32x4_t xbase, ybase; | 134 int32x4_t xbase, ybase; |
134 int32x4_t x2base, y2base; | 135 int32x4_t x2base, y2base; |
135 int16_t *dst16 = (int16_t *) xy; | 136 int16_t *dst16 = (int16_t *) xy; |
136 | 137 |
137 // now build fx, fx+dx, fx+2dx, fx+3dx | 138 // now build fx, fx+dx, fx+2dx, fx+3dx |
138 xbase = vdupq_n_s32(fx); | 139 xbase = vdupq_n_s32(SkFractionalIntToFixed(fx)); |
139 xbase = vsetq_lane_s32(fx+dx, xbase, 1); | 140 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), xbase, 1); |
140 xbase = vsetq_lane_s32(fx+dx+dx, xbase, 2); | 141 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), xbase, 2); |
141 xbase = vsetq_lane_s32(fx+dx+dx+dx, xbase, 3); | 142 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), xbase, 3); |
142 | 143 |
143 // same for fy | 144 // same for fy |
144 ybase = vdupq_n_s32(fy); | 145 ybase = vdupq_n_s32(SkFractionalIntToFixed(fy)); |
145 ybase = vsetq_lane_s32(fy+dy, ybase, 1); | 146 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy), ybase, 1); |
146 ybase = vsetq_lane_s32(fy+dy+dy, ybase, 2); | 147 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy), ybase, 2); |
147 ybase = vsetq_lane_s32(fy+dy+dy+dy, ybase, 3); | 148 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy+dy), ybase, 3); |
148 | 149 |
149 x2base = vaddq_s32(xbase, vdupq_n_s32(dx4)); | 150 x2base = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx4))); |
150 y2base = vaddq_s32(ybase, vdupq_n_s32(dy4)); | 151 y2base = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy4))); |
151 | 152 |
152 // store & bump | 153 // store & bump |
153 do { | 154 do { |
154 int16x8x2_t hi16; | 155 int16x8x2_t hi16; |
155 | 156 |
156 hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX); | 157 hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX); |
157 hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY); | 158 hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY); |
158 | 159 |
159 vst2q_s16(dst16, hi16); | 160 vst2q_s16(dst16, hi16); |
160 | 161 |
161 // moving base and on to the next | 162 // moving base and on to the next |
162 xbase = vaddq_s32(xbase, vdupq_n_s32(dx8)); | 163 xbase = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx8))); |
163 ybase = vaddq_s32(ybase, vdupq_n_s32(dy8)); | 164 ybase = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy8))); |
164 x2base = vaddq_s32(x2base, vdupq_n_s32(dx8)); | 165 x2base = vaddq_s32(x2base, vdupq_n_s32(SkFractionalIntToFixed(dx8)))
; |
165 y2base = vaddq_s32(y2base, vdupq_n_s32(dy8)); | 166 y2base = vaddq_s32(y2base, vdupq_n_s32(SkFractionalIntToFixed(dy8)))
; |
166 | 167 |
167 dst16 += 16; // 8x32 aka 16x16 | 168 dst16 += 16; // 8x32 aka 16x16 |
168 count -= 8; | 169 count -= 8; |
169 fx += dx8; | 170 fx += dx8; |
170 fy += dy8; | 171 fy += dy8; |
171 } while (count >= 8); | 172 } while (count >= 8); |
172 xy = (uint32_t *) dst16; | 173 xy = (uint32_t *) dst16; |
173 } | 174 } |
174 | 175 |
175 for (int i = count; i > 0; --i) { | 176 for (int i = count; i > 0; --i) { |
176 *xy++ = (TILEY_PROCF(fy, maxY) << 16) | TILEX_PROCF(fx, maxX); | 177 *xy++ = (TILEY_PROCF(SkFractionalIntToFixed(fy), maxY) << 16) | |
| 178 TILEX_PROCF(SkFractionalIntToFixed(fx), maxX); |
177 fx += dx; fy += dy; | 179 fx += dx; fy += dy; |
178 } | 180 } |
179 } | 181 } |
180 | 182 |
181 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s, | 183 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s, |
182 uint32_t* SK_RESTRICT xy, | 184 uint32_t* SK_RESTRICT xy, |
183 int count, int x, int y) { | 185 int count, int x, int y) { |
184 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask); | 186 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask); |
185 | 187 |
186 PREAMBLE(s); | 188 PREAMBLE(s); |
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
286 static void SCALE_FILTER_NAME(const SkBitmapProcState& s, | 288 static void SCALE_FILTER_NAME(const SkBitmapProcState& s, |
287 uint32_t xy[], int count, int x, int y) { | 289 uint32_t xy[], int count, int x, int y) { |
288 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | | 290 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | |
289 SkMatrix::kScale_Mask)) == 0); | 291 SkMatrix::kScale_Mask)) == 0); |
290 SkASSERT(s.fInvKy == 0); | 292 SkASSERT(s.fInvKy == 0); |
291 | 293 |
292 PREAMBLE(s); | 294 PREAMBLE(s); |
293 | 295 |
294 const unsigned maxX = s.fBitmap->width() - 1; | 296 const unsigned maxX = s.fBitmap->width() - 1; |
295 const SkFixed one = s.fFilterOneX; | 297 const SkFixed one = s.fFilterOneX; |
296 const SkFixed dx = s.fInvSx; | 298 const SkFractionalInt dx = s.fInvSxFractionalInt; |
297 SkFixed fx; | 299 SkFractionalInt fx; |
298 | 300 |
299 { | 301 { |
300 SkPoint pt; | 302 SkPoint pt; |
301 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, | 303 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, |
302 SkIntToScalar(y) + SK_ScalarHalf, &pt); | 304 SkIntToScalar(y) + SK_ScalarHalf, &pt); |
303 const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1); | 305 const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1); |
304 const unsigned maxY = s.fBitmap->height() - 1; | 306 const unsigned maxY = s.fBitmap->height() - 1; |
305 // compute our two Y values up front | 307 // compute our two Y values up front |
306 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y); | 308 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y); |
307 // now initialize fx | 309 // now initialize fx |
308 fx = SkScalarToFixed(pt.fX) - (one >> 1); | 310 fx = SkScalarToFractionalInt(pt.fX) - (SkFixedToFractionalInt(one) >> 1)
; |
309 } | 311 } |
310 | 312 |
311 #ifdef CHECK_FOR_DECAL | 313 #ifdef CHECK_FOR_DECAL |
312 // test if we don't need to apply the tile proc | 314 // test if we don't need to apply the tile proc |
313 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) { | 315 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) { |
314 decal_filter_scale_neon(xy, fx, dx, count); | 316 decal_filter_scale_neon(xy, SkFractionalIntToFixed(fx), |
| 317 SkFractionalIntToFixed(dx), count); |
315 return; | 318 return; |
316 } | 319 } |
317 #endif | 320 #endif |
318 { | 321 { |
319 | 322 |
320 if (count >= 4) { | 323 if (count >= 4) { |
321 int32x4_t wide_fx; | 324 int32x4_t wide_fx; |
322 | 325 |
323 wide_fx = vdupq_n_s32(fx); | 326 wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx)); |
324 wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1); | 327 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1); |
325 wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2); | 328 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2); |
326 wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3); | 329 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3
); |
327 | 330 |
328 while (count >= 4) { | 331 while (count >= 4) { |
329 int32x4_t res; | 332 int32x4_t res; |
330 | 333 |
331 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X); | 334 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X); |
332 | 335 |
333 vst1q_u32(xy, vreinterpretq_u32_s32(res)); | 336 vst1q_u32(xy, vreinterpretq_u32_s32(res)); |
334 | 337 |
335 wide_fx += vdupq_n_s32(dx+dx+dx+dx); | 338 wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx)); |
336 fx += dx+dx+dx+dx; | 339 fx += dx+dx+dx+dx; |
337 xy += 4; | 340 xy += 4; |
338 count -= 4; | 341 count -= 4; |
339 } | 342 } |
340 } | 343 } |
341 | 344 |
342 while (--count >= 0) { | 345 while (--count >= 0) { |
343 *xy++ = PACK_FILTER_X_NAME(fx, maxX, one PREAMBLE_ARG_X); | 346 *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBL
E_ARG_X); |
344 fx += dx; | 347 fx += dx; |
345 } | 348 } |
346 | 349 |
347 } | 350 } |
348 } | 351 } |
349 | 352 |
350 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s, | 353 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s, |
351 uint32_t xy[], int count, int x, int y) { | 354 uint32_t xy[], int count, int x, int y) { |
352 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask); | 355 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask); |
353 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | | 356 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
494 #undef PERSP_FILTER_NAME | 497 #undef PERSP_FILTER_NAME |
495 | 498 |
496 #undef PREAMBLE | 499 #undef PREAMBLE |
497 #undef PREAMBLE_PARAM_X | 500 #undef PREAMBLE_PARAM_X |
498 #undef PREAMBLE_PARAM_Y | 501 #undef PREAMBLE_PARAM_Y |
499 #undef PREAMBLE_ARG_X | 502 #undef PREAMBLE_ARG_X |
500 #undef PREAMBLE_ARG_Y | 503 #undef PREAMBLE_ARG_Y |
501 | 504 |
502 #undef TILEX_LOW_BITS | 505 #undef TILEX_LOW_BITS |
503 #undef TILEY_LOW_BITS | 506 #undef TILEY_LOW_BITS |
OLD | NEW |