Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(459)

Side by Side Diff: src/core/SkPoint.cpp

Issue 60083014: Add sk_float_rsqrt with SSE + NEON fast paths. (Closed) Base URL: https://skia.googlecode.com/svn/trunk
Patch Set: test Created 7 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « include/core/SkPoint.h ('k') | tests/PointTest.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 1
2 /* 2 /*
3 * Copyright 2008 The Android Open Source Project 3 * Copyright 2008 The Android Open Source Project
4 * 4 *
5 * Use of this source code is governed by a BSD-style license that can be 5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file. 6 * found in the LICENSE file.
7 */ 7 */
8 8
9 9
10 #include "SkPoint.h" 10 #include "SkPoint.h"
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
80 } 80 }
81 81
82 bool SkPoint::setNormalize(SkScalar x, SkScalar y) { 82 bool SkPoint::setNormalize(SkScalar x, SkScalar y) {
83 return this->setLength(x, y, SK_Scalar1); 83 return this->setLength(x, y, SK_Scalar1);
84 } 84 }
85 85
86 bool SkPoint::setLength(SkScalar length) { 86 bool SkPoint::setLength(SkScalar length) {
87 return this->setLength(fX, fY, length); 87 return this->setLength(fX, fY, length);
88 } 88 }
89 89
90 #ifdef SK_SCALAR_IS_FLOAT
91
92 // Returns the square of the Euclidian distance to (dx,dy). 90 // Returns the square of the Euclidian distance to (dx,dy).
93 static inline float getLengthSquared(float dx, float dy) { 91 static inline float getLengthSquared(float dx, float dy) {
94 return dx * dx + dy * dy; 92 return dx * dx + dy * dy;
95 } 93 }
96 94
97 // Calculates the square of the Euclidian distance to (dx,dy) and stores it in 95 // Calculates the square of the Euclidian distance to (dx,dy) and stores it in
98 // *lengthSquared. Returns true if the distance is judged to be "nearly zero". 96 // *lengthSquared. Returns true if the distance is judged to be "nearly zero".
99 // 97 //
100 // This logic is encapsulated in a helper method to make it explicit that we 98 // This logic is encapsulated in a helper method to make it explicit that we
101 // always perform this check in the same manner, to avoid inconsistencies 99 // always perform this check in the same manner, to avoid inconsistencies
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
170 // divide by inf. and return (0,0) vector. 168 // divide by inf. and return (0,0) vector.
171 double xx = x; 169 double xx = x;
172 double yy = y; 170 double yy = y;
173 scale = (float)(length / sqrt(xx * xx + yy * yy)); 171 scale = (float)(length / sqrt(xx * xx + yy * yy));
174 } 172 }
175 fX = x * scale; 173 fX = x * scale;
176 fY = y * scale; 174 fY = y * scale;
177 return true; 175 return true;
178 } 176 }
179 177
180 #else 178 bool SkPoint::setLengthFast(float length) {
181 179 return this->setLengthFast(fX, fY, length);
182 #include "Sk64.h"
183
184 // Returns the square of the Euclidian distance to (dx,dy) in *result.
185 static inline void getLengthSquared(SkScalar dx, SkScalar dy, Sk64 *result) {
186 Sk64 dySqr;
187
188 result->setMul(dx, dx);
189 dySqr.setMul(dy, dy);
190 result->add(dySqr);
191 } 180 }
192 181
193 // Calculates the square of the Euclidian distance to (dx,dy) and stores it in 182 bool SkPoint::setLengthFast(float x, float y, float length) {
194 // *lengthSquared. Returns true if the distance is judged to be "nearly zero". 183 float mag2;
195 // 184 if (isLengthNearlyZero(x, y, &mag2)) {
196 // This logic is encapsulated in a helper method to make it explicit that we 185 return false;
197 // always perform this check in the same manner, to avoid inconsistencies 186 }
198 // (see http://code.google.com/p/skia/issues/detail?id=560 ).
199 static inline bool isLengthNearlyZero(SkScalar dx, SkScalar dy,
200 Sk64 *lengthSquared) {
201 Sk64 tolSqr;
202 getLengthSquared(dx, dy, lengthSquared);
203 187
204 // we want nearlyzero^2, but to compute it fast we want to just do a 188 float scale;
205 // 32bit multiply, so we require that it not exceed 31bits. That is true 189 if (SkScalarIsFinite(mag2)) {
206 // if nearlyzero is <= 0xB504, which should be trivial, since usually 190 scale = length * sk_float_rsqrt(mag2); // <--- this is the difference
207 // nearlyzero is a very small fixed-point value. 191 } else {
208 SkASSERT(SK_ScalarNearlyZero <= 0xB504); 192 // our mag2 step overflowed to infinity, so use doubles instead.
209 193 // much slower, but needed when x or y are very large, other wise we
210 tolSqr.set(0, SK_ScalarNearlyZero * SK_ScalarNearlyZero); 194 // divide by inf. and return (0,0) vector.
211 return *lengthSquared <= tolSqr; 195 double xx = x;
196 double yy = y;
197 scale = (float)(length / sqrt(xx * xx + yy * yy));
198 }
199 fX = x * scale;
200 fY = y * scale;
201 return true;
212 } 202 }
213 203
214 SkScalar SkPoint::Normalize(SkPoint* pt) {
215 Sk64 mag2;
216 if (!isLengthNearlyZero(pt->fX, pt->fY, &mag2)) {
217 SkScalar mag = mag2.getSqrt();
218 SkScalar scale = SkScalarInvert(mag);
219 pt->fX = SkScalarMul(pt->fX, scale);
220 pt->fY = SkScalarMul(pt->fY, scale);
221 return mag;
222 }
223 return 0;
224 }
225
226 bool SkPoint::CanNormalize(SkScalar dx, SkScalar dy) {
227 Sk64 mag2_unused;
228 return !isLengthNearlyZero(dx, dy, &mag2_unused);
229 }
230
231 SkScalar SkPoint::Length(SkScalar dx, SkScalar dy) {
232 Sk64 tmp;
233 getLengthSquared(dx, dy, &tmp);
234 return tmp.getSqrt();
235 }
236
237 #ifdef SK_DEBUGx
238 static SkFixed fixlen(SkFixed x, SkFixed y) {
239 float fx = (float)x;
240 float fy = (float)y;
241
242 return (int)floorf(sqrtf(fx*fx + fy*fy) + 0.5f);
243 }
244 #endif
245
246 static inline uint32_t squarefixed(unsigned x) {
247 x >>= 16;
248 return x*x;
249 }
250
251 #if 1 // Newton iter for setLength
252
253 static inline unsigned invsqrt_iter(unsigned V, unsigned U) {
254 unsigned x = V * U >> 14;
255 x = x * U >> 14;
256 x = (3 << 14) - x;
257 x = (U >> 1) * x >> 14;
258 return x;
259 }
260
261 static const uint16_t gInvSqrt14GuessTable[] = {
262 0x4000, 0x3c57, 0x393e, 0x3695, 0x3441, 0x3235, 0x3061,
263 0x2ebd, 0x2d41, 0x2be7, 0x2aaa, 0x2987, 0x287a, 0x2780,
264 0x2698, 0x25be, 0x24f3, 0x2434, 0x2380, 0x22d6, 0x2235,
265 0x219d, 0x210c, 0x2083, 0x2000, 0x1f82, 0x1f0b, 0x1e99,
266 0x1e2b, 0x1dc2, 0x1d5d, 0x1cfc, 0x1c9f, 0x1c45, 0x1bee,
267 0x1b9b, 0x1b4a, 0x1afc, 0x1ab0, 0x1a67, 0x1a20, 0x19dc,
268 0x1999, 0x1959, 0x191a, 0x18dd, 0x18a2, 0x1868, 0x1830,
269 0x17fa, 0x17c4, 0x1791, 0x175e, 0x172d, 0x16fd, 0x16ce
270 };
271
272 #define BUILD_INVSQRT_TABLEx
273 #ifdef BUILD_INVSQRT_TABLE
274 static void build_invsqrt14_guess_table() {
275 for (int i = 8; i <= 63; i++) {
276 unsigned x = SkToU16((1 << 28) / SkSqrt32(i << 25));
277 printf("0x%x, ", x);
278 }
279 printf("\n");
280 }
281 #endif
282
283 static unsigned fast_invsqrt(uint32_t x) {
284 #ifdef BUILD_INVSQRT_TABLE
285 unsigned top2 = x >> 25;
286 SkASSERT(top2 >= 8 && top2 <= 63);
287
288 static bool gOnce;
289 if (!gOnce) {
290 build_invsqrt14_guess_table();
291 gOnce = true;
292 }
293 #endif
294
295 unsigned V = x >> 14; // make V .14
296
297 unsigned top = x >> 25;
298 SkASSERT(top >= 8 && top <= 63);
299 SkASSERT(top - 8 < SK_ARRAY_COUNT(gInvSqrt14GuessTable));
300 unsigned U = gInvSqrt14GuessTable[top - 8];
301
302 U = invsqrt_iter(V, U);
303 return invsqrt_iter(V, U);
304 }
305
306 /* We "normalize" x,y to be .14 values (so we can square them and stay 32bits.
307 Then we Newton-iterate this in .14 space to compute the invser-sqrt, and
308 scale by it at the end. The .14 space means we can execute our iterations
309 and stay in 32bits as well, making the multiplies much cheaper than calling
310 SkFixedMul.
311 */
312 bool SkPoint::setLength(SkFixed ox, SkFixed oy, SkFixed length) {
313 if (ox == 0) {
314 if (oy == 0) {
315 return false;
316 }
317 this->set(0, SkApplySign(length, SkExtractSign(oy)));
318 return true;
319 }
320 if (oy == 0) {
321 this->set(SkApplySign(length, SkExtractSign(ox)), 0);
322 return true;
323 }
324
325 unsigned x = SkAbs32(ox);
326 unsigned y = SkAbs32(oy);
327 int zeros = SkCLZ(x | y);
328
329 // make x,y 1.14 values so our fast sqr won't overflow
330 if (zeros > 17) {
331 x <<= zeros - 17;
332 y <<= zeros - 17;
333 } else {
334 x >>= 17 - zeros;
335 y >>= 17 - zeros;
336 }
337 SkASSERT((x | y) <= 0x7FFF);
338
339 unsigned invrt = fast_invsqrt(x*x + y*y);
340
341 x = x * invrt >> 12;
342 y = y * invrt >> 12;
343
344 if (length != SK_Fixed1) {
345 x = SkFixedMul(x, length);
346 y = SkFixedMul(y, length);
347 }
348 this->set(SkApplySign(x, SkExtractSign(ox)),
349 SkApplySign(y, SkExtractSign(oy)));
350 return true;
351 }
352 #else
353 /*
354 Normalize x,y, and then scale them by length.
355
356 The obvious way to do this would be the following:
357 S64 tmp1, tmp2;
358 tmp1.setMul(x,x);
359 tmp2.setMul(y,y);
360 tmp1.add(tmp2);
361 len = tmp1.getSqrt();
362 x' = SkFixedDiv(x, len);
363 y' = SkFixedDiv(y, len);
364 This is fine, but slower than what we do below.
365
366 The present technique does not compute the starting length, but
367 rather fiddles with x,y iteratively, all the while checking its
368 magnitude^2 (avoiding a sqrt).
369
370 We normalize by first shifting x,y so that at least one of them
371 has bit 31 set (after taking the abs of them).
372 Then we loop, refining x,y by squaring them and comparing
373 against a very large 1.0 (1 << 28), and then adding or subtracting
374 a delta (which itself is reduced by half each time through the loop).
375 For speed we want the squaring to be with a simple integer mul. To keep
376 that from overflowing we shift our coordinates down until we are dealing
377 with at most 15 bits (2^15-1)^2 * 2 says withing 32 bits)
378 When our square is close to 1.0, we shift x,y down into fixed range.
379 */
380 bool SkPoint::setLength(SkFixed ox, SkFixed oy, SkFixed length) {
381 if (ox == 0) {
382 if (oy == 0)
383 return false;
384 this->set(0, SkApplySign(length, SkExtractSign(oy)));
385 return true;
386 }
387 if (oy == 0) {
388 this->set(SkApplySign(length, SkExtractSign(ox)), 0);
389 return true;
390 }
391
392 SkFixed x = SkAbs32(ox);
393 SkFixed y = SkAbs32(oy);
394
395 // shift x,y so that the greater of them is 15bits (1.14 fixed point)
396 {
397 int shift = SkCLZ(x | y);
398 // make them .30
399 x <<= shift - 1;
400 y <<= shift - 1;
401 }
402
403 SkFixed dx = x;
404 SkFixed dy = y;
405
406 for (int i = 0; i < 17; i++) {
407 dx >>= 1;
408 dy >>= 1;
409
410 U32 len2 = squarefixed(x) + squarefixed(y);
411 if (len2 >> 28) {
412 x -= dx;
413 y -= dy;
414 } else {
415 x += dx;
416 y += dy;
417 }
418 }
419 x >>= 14;
420 y >>= 14;
421
422 #ifdef SK_DEBUGx // measure how far we are from unit-length
423 {
424 static int gMaxError;
425 static int gMaxDiff;
426
427 SkFixed len = fixlen(x, y);
428 int err = len - SK_Fixed1;
429 err = SkAbs32(err);
430
431 if (err > gMaxError) {
432 gMaxError = err;
433 SkDebugf("gMaxError %d\n", err);
434 }
435
436 float fx = SkAbs32(ox)/65536.0f;
437 float fy = SkAbs32(oy)/65536.0f;
438 float mag = sqrtf(fx*fx + fy*fy);
439 fx /= mag;
440 fy /= mag;
441 SkFixed xx = (int)floorf(fx * 65536 + 0.5f);
442 SkFixed yy = (int)floorf(fy * 65536 + 0.5f);
443 err = SkMax32(SkAbs32(xx-x), SkAbs32(yy-y));
444 if (err > gMaxDiff) {
445 gMaxDiff = err;
446 SkDebugf("gMaxDiff %d\n", err);
447 }
448 }
449 #endif
450
451 x = SkApplySign(x, SkExtractSign(ox));
452 y = SkApplySign(y, SkExtractSign(oy));
453 if (length != SK_Fixed1) {
454 x = SkFixedMul(x, length);
455 y = SkFixedMul(y, length);
456 }
457
458 this->set(x, y);
459 return true;
460 }
461 #endif
462
463 #endif
464 204
465 /////////////////////////////////////////////////////////////////////////////// 205 ///////////////////////////////////////////////////////////////////////////////
466 206
467 SkScalar SkPoint::distanceToLineBetweenSqd(const SkPoint& a, 207 SkScalar SkPoint::distanceToLineBetweenSqd(const SkPoint& a,
468 const SkPoint& b, 208 const SkPoint& b,
469 Side* side) const { 209 Side* side) const {
470 210
471 SkVector u = b - a; 211 SkVector u = b - a;
472 SkVector v = *this - a; 212 SkVector v = *this - a;
473 213
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
508 248
509 if (uDotV <= 0) { 249 if (uDotV <= 0) {
510 return v.lengthSqd(); 250 return v.lengthSqd();
511 } else if (uDotV > uLengthSqd) { 251 } else if (uDotV > uLengthSqd) {
512 return b.distanceToSqd(*this); 252 return b.distanceToSqd(*this);
513 } else { 253 } else {
514 SkScalar det = u.cross(v); 254 SkScalar det = u.cross(v);
515 return SkScalarMulDiv(det, det, uLengthSqd); 255 return SkScalarMulDiv(det, det, uLengthSqd);
516 } 256 }
517 } 257 }
OLDNEW
« no previous file with comments | « include/core/SkPoint.h ('k') | tests/PointTest.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698