Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(672)

Side by Side Diff: src/opts/SkBitmapProcState_matrix_neon.h

Issue 167433002: ARM Skia NEON patches - 20bis - BitmapProcState accurracy (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: Rebase Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « expectations/gm/ignored-tests.txt ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 #include <arm_neon.h> 2 #include <arm_neon.h>
3 3
4 4
5 #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale) 5 #define SCALE_NOFILTER_NAME MAKENAME(_nofilter_scale)
6 #define SCALE_FILTER_NAME MAKENAME(_filter_scale) 6 #define SCALE_FILTER_NAME MAKENAME(_filter_scale)
7 #define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine) 7 #define AFFINE_NOFILTER_NAME MAKENAME(_nofilter_affine)
8 #define AFFINE_FILTER_NAME MAKENAME(_filter_affine) 8 #define AFFINE_FILTER_NAME MAKENAME(_filter_affine)
9 #define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp) 9 #define PERSP_NOFILTER_NAME MAKENAME(_nofilter_persp)
10 #define PERSP_FILTER_NAME MAKENAME(_filter_persp) 10 #define PERSP_FILTER_NAME MAKENAME(_filter_persp)
(...skipping 13 matching lines...) Expand all
24 24
25 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s, 25 static void SCALE_NOFILTER_NAME(const SkBitmapProcState& s,
26 uint32_t xy[], int count, int x, int y) { 26 uint32_t xy[], int count, int x, int y) {
27 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | 27 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
28 SkMatrix::kScale_Mask)) == 0); 28 SkMatrix::kScale_Mask)) == 0);
29 29
30 PREAMBLE(s); 30 PREAMBLE(s);
31 31
32 // we store y, x, x, x, x, x 32 // we store y, x, x, x, x, x
33 const unsigned maxX = s.fBitmap->width() - 1; 33 const unsigned maxX = s.fBitmap->width() - 1;
34 SkFixed fx; 34 SkFractionalInt fx;
35 { 35 {
36 SkPoint pt; 36 SkPoint pt;
37 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 37 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
38 SkIntToScalar(y) + SK_ScalarHalf, &pt); 38 SkIntToScalar(y) + SK_ScalarHalf, &pt);
39 fx = SkScalarToFixed(pt.fY); 39 fx = SkScalarToFractionalInt(pt.fY);
40 const unsigned maxY = s.fBitmap->height() - 1; 40 const unsigned maxY = s.fBitmap->height() - 1;
41 *xy++ = TILEY_PROCF(fx, maxY); 41 *xy++ = TILEY_PROCF(SkFractionalIntToFixed(fx), maxY);
42 fx = SkScalarToFixed(pt.fX); 42 fx = SkScalarToFractionalInt(pt.fX);
43 } 43 }
44 44
45 if (0 == maxX) { 45 if (0 == maxX) {
46 // all of the following X values must be 0 46 // all of the following X values must be 0
47 memset(xy, 0, count * sizeof(uint16_t)); 47 memset(xy, 0, count * sizeof(uint16_t));
48 return; 48 return;
49 } 49 }
50 50
51 const SkFixed dx = s.fInvSx; 51 const SkFractionalInt dx = s.fInvSxFractionalInt;
52 52
53 #ifdef CHECK_FOR_DECAL 53 #ifdef CHECK_FOR_DECAL
54 // test if we don't need to apply the tile proc 54 // test if we don't need to apply the tile proc
55 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) { 55 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
56 decal_nofilter_scale_neon(xy, fx, dx, count); 56 decal_nofilter_scale_neon(xy, SkFractionalIntToFixed(fx),
57 SkFractionalIntToFixed(dx), count);
57 return; 58 return;
58 } 59 }
59 #endif 60 #endif
60 61
61 if (count >= 8) { 62 if (count >= 8) {
62 SkFixed dx2 = dx+dx; 63 SkFractionalInt dx2 = dx+dx;
63 SkFixed dx4 = dx2+dx2; 64 SkFractionalInt dx4 = dx2+dx2;
64 SkFixed dx8 = dx4+dx4; 65 SkFractionalInt dx8 = dx4+dx4;
65 66
66 // now build fx/fx+dx/fx+2dx/fx+3dx 67 // now build fx/fx+dx/fx+2dx/fx+3dx
67 SkFixed fx1, fx2, fx3; 68 SkFractionalInt fx1, fx2, fx3;
68 int32x4_t lbase, hbase; 69 int32x4_t lbase, hbase;
69 int16_t *dst16 = (int16_t *)xy; 70 int16_t *dst16 = (int16_t *)xy;
70 71
71 fx1 = fx+dx; 72 fx1 = fx+dx;
72 fx2 = fx1+dx; 73 fx2 = fx1+dx;
73 fx3 = fx2+dx; 74 fx3 = fx2+dx;
74 75
75 lbase = vdupq_n_s32(fx); 76 lbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
76 lbase = vsetq_lane_s32(fx1, lbase, 1); 77 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx1), lbase, 1);
77 lbase = vsetq_lane_s32(fx2, lbase, 2); 78 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx2), lbase, 2);
78 lbase = vsetq_lane_s32(fx3, lbase, 3); 79 lbase = vsetq_lane_s32(SkFractionalIntToFixed(fx3), lbase, 3);
79 hbase = vaddq_s32(lbase, vdupq_n_s32(dx4)); 80 hbase = vaddq_s32(lbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
80 81
81 // store & bump 82 // store & bump
82 while (count >= 8) { 83 while (count >= 8) {
83 84
84 int16x8_t fx8; 85 int16x8_t fx8;
85 86
86 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX); 87 fx8 = TILEX_PROCF_NEON8(lbase, hbase, maxX);
87 88
88 vst1q_s16(dst16, fx8); 89 vst1q_s16(dst16, fx8);
89 90
90 // but preserving base & on to the next 91 // but preserving base & on to the next
91 lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8)); 92 lbase = vaddq_s32 (lbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
92 hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8)); 93 hbase = vaddq_s32 (hbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
93 dst16 += 8; 94 dst16 += 8;
94 count -= 8; 95 count -= 8;
95 fx += dx8; 96 fx += dx8;
96 }; 97 };
97 xy = (uint32_t *) dst16; 98 xy = (uint32_t *) dst16;
98 } 99 }
99 100
100 uint16_t* xx = (uint16_t*)xy; 101 uint16_t* xx = (uint16_t*)xy;
101 for (int i = count; i > 0; --i) { 102 for (int i = count; i > 0; --i) {
102 *xx++ = TILEX_PROCF(fx, maxX); 103 *xx++ = TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
103 fx += dx; 104 fx += dx;
104 } 105 }
105 } 106 }
106 107
107 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s, 108 static void AFFINE_NOFILTER_NAME(const SkBitmapProcState& s,
108 uint32_t xy[], int count, int x, int y) { 109 uint32_t xy[], int count, int x, int y) {
109 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask); 110 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
110 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | 111 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
111 SkMatrix::kScale_Mask | 112 SkMatrix::kScale_Mask |
112 SkMatrix::kAffine_Mask)) == 0); 113 SkMatrix::kAffine_Mask)) == 0);
113 114
114 PREAMBLE(s); 115 PREAMBLE(s);
115 SkPoint srcPt; 116 SkPoint srcPt;
116 s.fInvProc(s.fInvMatrix, 117 s.fInvProc(s.fInvMatrix,
117 SkIntToScalar(x) + SK_ScalarHalf, 118 SkIntToScalar(x) + SK_ScalarHalf,
118 SkIntToScalar(y) + SK_ScalarHalf, &srcPt); 119 SkIntToScalar(y) + SK_ScalarHalf, &srcPt);
119 120
120 SkFixed fx = SkScalarToFixed(srcPt.fX); 121 SkFractionalInt fx = SkScalarToFractionalInt(srcPt.fX);
121 SkFixed fy = SkScalarToFixed(srcPt.fY); 122 SkFractionalInt fy = SkScalarToFractionalInt(srcPt.fY);
122 SkFixed dx = s.fInvSx; 123 SkFractionalInt dx = s.fInvSxFractionalInt;
123 SkFixed dy = s.fInvKy; 124 SkFractionalInt dy = s.fInvKyFractionalInt;
124 int maxX = s.fBitmap->width() - 1; 125 int maxX = s.fBitmap->width() - 1;
125 int maxY = s.fBitmap->height() - 1; 126 int maxY = s.fBitmap->height() - 1;
126 127
127 if (count >= 8) { 128 if (count >= 8) {
128 SkFixed dx4 = dx * 4; 129 SkFractionalInt dx4 = dx * 4;
129 SkFixed dy4 = dy * 4; 130 SkFractionalInt dy4 = dy * 4;
130 SkFixed dx8 = dx * 8; 131 SkFractionalInt dx8 = dx * 8;
131 SkFixed dy8 = dy * 8; 132 SkFractionalInt dy8 = dy * 8;
132 133
133 int32x4_t xbase, ybase; 134 int32x4_t xbase, ybase;
134 int32x4_t x2base, y2base; 135 int32x4_t x2base, y2base;
135 int16_t *dst16 = (int16_t *) xy; 136 int16_t *dst16 = (int16_t *) xy;
136 137
137 // now build fx, fx+dx, fx+2dx, fx+3dx 138 // now build fx, fx+dx, fx+2dx, fx+3dx
138 xbase = vdupq_n_s32(fx); 139 xbase = vdupq_n_s32(SkFractionalIntToFixed(fx));
139 xbase = vsetq_lane_s32(fx+dx, xbase, 1); 140 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), xbase, 1);
140 xbase = vsetq_lane_s32(fx+dx+dx, xbase, 2); 141 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), xbase, 2);
141 xbase = vsetq_lane_s32(fx+dx+dx+dx, xbase, 3); 142 xbase = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), xbase, 3);
142 143
143 // same for fy 144 // same for fy
144 ybase = vdupq_n_s32(fy); 145 ybase = vdupq_n_s32(SkFractionalIntToFixed(fy));
145 ybase = vsetq_lane_s32(fy+dy, ybase, 1); 146 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy), ybase, 1);
146 ybase = vsetq_lane_s32(fy+dy+dy, ybase, 2); 147 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy), ybase, 2);
147 ybase = vsetq_lane_s32(fy+dy+dy+dy, ybase, 3); 148 ybase = vsetq_lane_s32(SkFractionalIntToFixed(fy+dy+dy+dy), ybase, 3);
148 149
149 x2base = vaddq_s32(xbase, vdupq_n_s32(dx4)); 150 x2base = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx4)));
150 y2base = vaddq_s32(ybase, vdupq_n_s32(dy4)); 151 y2base = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy4)));
151 152
152 // store & bump 153 // store & bump
153 do { 154 do {
154 int16x8x2_t hi16; 155 int16x8x2_t hi16;
155 156
156 hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX); 157 hi16.val[0] = TILEX_PROCF_NEON8(xbase, x2base, maxX);
157 hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY); 158 hi16.val[1] = TILEY_PROCF_NEON8(ybase, y2base, maxY);
158 159
159 vst2q_s16(dst16, hi16); 160 vst2q_s16(dst16, hi16);
160 161
161 // moving base and on to the next 162 // moving base and on to the next
162 xbase = vaddq_s32(xbase, vdupq_n_s32(dx8)); 163 xbase = vaddq_s32(xbase, vdupq_n_s32(SkFractionalIntToFixed(dx8)));
163 ybase = vaddq_s32(ybase, vdupq_n_s32(dy8)); 164 ybase = vaddq_s32(ybase, vdupq_n_s32(SkFractionalIntToFixed(dy8)));
164 x2base = vaddq_s32(x2base, vdupq_n_s32(dx8)); 165 x2base = vaddq_s32(x2base, vdupq_n_s32(SkFractionalIntToFixed(dx8))) ;
165 y2base = vaddq_s32(y2base, vdupq_n_s32(dy8)); 166 y2base = vaddq_s32(y2base, vdupq_n_s32(SkFractionalIntToFixed(dy8))) ;
166 167
167 dst16 += 16; // 8x32 aka 16x16 168 dst16 += 16; // 8x32 aka 16x16
168 count -= 8; 169 count -= 8;
169 fx += dx8; 170 fx += dx8;
170 fy += dy8; 171 fy += dy8;
171 } while (count >= 8); 172 } while (count >= 8);
172 xy = (uint32_t *) dst16; 173 xy = (uint32_t *) dst16;
173 } 174 }
174 175
175 for (int i = count; i > 0; --i) { 176 for (int i = count; i > 0; --i) {
176 *xy++ = (TILEY_PROCF(fy, maxY) << 16) | TILEX_PROCF(fx, maxX); 177 *xy++ = (TILEY_PROCF(SkFractionalIntToFixed(fy), maxY) << 16) |
178 TILEX_PROCF(SkFractionalIntToFixed(fx), maxX);
177 fx += dx; fy += dy; 179 fx += dx; fy += dy;
178 } 180 }
179 } 181 }
180 182
181 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s, 183 static void PERSP_NOFILTER_NAME(const SkBitmapProcState& s,
182 uint32_t* SK_RESTRICT xy, 184 uint32_t* SK_RESTRICT xy,
183 int count, int x, int y) { 185 int count, int x, int y) {
184 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask); 186 SkASSERT(s.fInvType & SkMatrix::kPerspective_Mask);
185 187
186 PREAMBLE(s); 188 PREAMBLE(s);
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
286 static void SCALE_FILTER_NAME(const SkBitmapProcState& s, 288 static void SCALE_FILTER_NAME(const SkBitmapProcState& s,
287 uint32_t xy[], int count, int x, int y) { 289 uint32_t xy[], int count, int x, int y) {
288 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | 290 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
289 SkMatrix::kScale_Mask)) == 0); 291 SkMatrix::kScale_Mask)) == 0);
290 SkASSERT(s.fInvKy == 0); 292 SkASSERT(s.fInvKy == 0);
291 293
292 PREAMBLE(s); 294 PREAMBLE(s);
293 295
294 const unsigned maxX = s.fBitmap->width() - 1; 296 const unsigned maxX = s.fBitmap->width() - 1;
295 const SkFixed one = s.fFilterOneX; 297 const SkFixed one = s.fFilterOneX;
296 const SkFixed dx = s.fInvSx; 298 const SkFractionalInt dx = s.fInvSxFractionalInt;
297 SkFixed fx; 299 SkFractionalInt fx;
298 300
299 { 301 {
300 SkPoint pt; 302 SkPoint pt;
301 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 303 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
302 SkIntToScalar(y) + SK_ScalarHalf, &pt); 304 SkIntToScalar(y) + SK_ScalarHalf, &pt);
303 const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1); 305 const SkFixed fy = SkScalarToFixed(pt.fY) - (s.fFilterOneY >> 1);
304 const unsigned maxY = s.fBitmap->height() - 1; 306 const unsigned maxY = s.fBitmap->height() - 1;
305 // compute our two Y values up front 307 // compute our two Y values up front
306 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y); 308 *xy++ = PACK_FILTER_Y_NAME(fy, maxY, s.fFilterOneY PREAMBLE_ARG_Y);
307 // now initialize fx 309 // now initialize fx
308 fx = SkScalarToFixed(pt.fX) - (one >> 1); 310 fx = SkScalarToFractionalInt(pt.fX) - (SkFixedToFractionalInt(one) >> 1) ;
309 } 311 }
310 312
311 #ifdef CHECK_FOR_DECAL 313 #ifdef CHECK_FOR_DECAL
312 // test if we don't need to apply the tile proc 314 // test if we don't need to apply the tile proc
313 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) { 315 if (can_truncate_to_fixed_for_decal(fx, dx, count, maxX)) {
314 decal_filter_scale_neon(xy, fx, dx, count); 316 decal_filter_scale_neon(xy, SkFractionalIntToFixed(fx),
317 SkFractionalIntToFixed(dx), count);
315 return; 318 return;
316 } 319 }
317 #endif 320 #endif
318 { 321 {
319 322
320 if (count >= 4) { 323 if (count >= 4) {
321 int32x4_t wide_fx; 324 int32x4_t wide_fx;
322 325
323 wide_fx = vdupq_n_s32(fx); 326 wide_fx = vdupq_n_s32(SkFractionalIntToFixed(fx));
324 wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1); 327 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx), wide_fx, 1);
325 wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2); 328 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx), wide_fx, 2);
326 wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3); 329 wide_fx = vsetq_lane_s32(SkFractionalIntToFixed(fx+dx+dx+dx), wide_fx, 3 );
327 330
328 while (count >= 4) { 331 while (count >= 4) {
329 int32x4_t res; 332 int32x4_t res;
330 333
331 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X); 334 res = PACK_FILTER_X4_NAME(wide_fx, maxX, one PREAMBLE_ARG_X);
332 335
333 vst1q_u32(xy, vreinterpretq_u32_s32(res)); 336 vst1q_u32(xy, vreinterpretq_u32_s32(res));
334 337
335 wide_fx += vdupq_n_s32(dx+dx+dx+dx); 338 wide_fx += vdupq_n_s32(SkFractionalIntToFixed(dx+dx+dx+dx));
336 fx += dx+dx+dx+dx; 339 fx += dx+dx+dx+dx;
337 xy += 4; 340 xy += 4;
338 count -= 4; 341 count -= 4;
339 } 342 }
340 } 343 }
341 344
342 while (--count >= 0) { 345 while (--count >= 0) {
343 *xy++ = PACK_FILTER_X_NAME(fx, maxX, one PREAMBLE_ARG_X); 346 *xy++ = PACK_FILTER_X_NAME(SkFractionalIntToFixed(fx), maxX, one PREAMBL E_ARG_X);
344 fx += dx; 347 fx += dx;
345 } 348 }
346 349
347 } 350 }
348 } 351 }
349 352
350 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s, 353 static void AFFINE_FILTER_NAME(const SkBitmapProcState& s,
351 uint32_t xy[], int count, int x, int y) { 354 uint32_t xy[], int count, int x, int y) {
352 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask); 355 SkASSERT(s.fInvType & SkMatrix::kAffine_Mask);
353 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask | 356 SkASSERT((s.fInvType & ~(SkMatrix::kTranslate_Mask |
(...skipping 140 matching lines...) Expand 10 before | Expand all | Expand 10 after
494 #undef PERSP_FILTER_NAME 497 #undef PERSP_FILTER_NAME
495 498
496 #undef PREAMBLE 499 #undef PREAMBLE
497 #undef PREAMBLE_PARAM_X 500 #undef PREAMBLE_PARAM_X
498 #undef PREAMBLE_PARAM_Y 501 #undef PREAMBLE_PARAM_Y
499 #undef PREAMBLE_ARG_X 502 #undef PREAMBLE_ARG_X
500 #undef PREAMBLE_ARG_Y 503 #undef PREAMBLE_ARG_Y
501 504
502 #undef TILEX_LOW_BITS 505 #undef TILEX_LOW_BITS
503 #undef TILEY_LOW_BITS 506 #undef TILEY_LOW_BITS
OLDNEW
« no previous file with comments | « expectations/gm/ignored-tests.txt ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698