OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2005, 2006 Apple Computer, Inc. All rights reserved. | 2 * Copyright (C) 2005, 2006 Apple Computer, Inc. All rights reserved. |
3 * Copyright (C) 2009 Torch Mobile, Inc. | 3 * Copyright (C) 2009 Torch Mobile, Inc. |
4 * Copyright (C) 2013 Google Inc. All rights reserved. | 4 * Copyright (C) 2013 Google Inc. All rights reserved. |
5 * | 5 * |
6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
8 * are met: | 8 * are met: |
9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
(...skipping 335 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
346 "fmul v25.2d, v25.2d, v30.d[0] \n\t" | 346 "fmul v25.2d, v25.2d, v30.d[0] \n\t" |
347 "fmul v26.2d, v26.2d, v30.d[0] \n\t" | 347 "fmul v26.2d, v26.2d, v30.d[0] \n\t" |
348 "fmul v27.2d, v27.2d, v30.d[0] \n\t" | 348 "fmul v27.2d, v27.2d, v30.d[0] \n\t" |
349 "st1 {v24.2d - v27.2d}, [%[pr]] \n\t" | 349 "st1 {v24.2d - v27.2d}, [%[pr]] \n\t" |
350 : [mat] "+r"(mat), [pr] "+r"(pr) | 350 : [mat] "+r"(mat), [pr] "+r"(pr) |
351 : [rdet] "r"(rdet) | 351 : [rdet] "r"(rdet) |
352 : "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", | 352 : "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", |
353 "v18", "v19", "v20", "v21", "v22", "v23", "24", "25", "v26", "v27", | 353 "v18", "v19", "v20", "v21", "v22", "v23", "24", "25", "v26", "v27", |
354 "v28", "v29", "v30"); | 354 "v28", "v29", "v30"); |
355 #elif HAVE(MIPS_MSA_INTRINSICS) | 355 #elif HAVE(MIPS_MSA_INTRINSICS) |
356 const double rDet = 1/det; | 356 const double rDet = 1 / det; |
357 const double* mat = &(matrix[0][0]); | 357 const double* mat = &(matrix[0][0]); |
358 v2f64 mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7; | 358 v2f64 mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7; |
359 v2f64 rev2, rev3, rev4, rev5, rev6, rev7; | 359 v2f64 rev2, rev3, rev4, rev5, rev6, rev7; |
360 v2f64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | 360 v2f64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
361 v2f64 det0, det1, det2, tmp8, tmp9, tmp10, tmp11; | 361 v2f64 det0, det1, det2, tmp8, tmp9, tmp10, tmp11; |
362 const v2f64 rdet = COPY_DOUBLE_TO_VECTOR(rDet); | 362 const v2f64 rdet = COPY_DOUBLE_TO_VECTOR(rDet); |
363 // mat0 mat1 --> m00 m01 m02 m03 | 363 // mat0 mat1 --> m00 m01 m02 m03 |
364 // mat2 mat3 --> m10 m11 m12 m13 | 364 // mat2 mat3 --> m10 m11 m12 m13 |
365 // mat4 mat5 --> m20 m21 m22 m23 | 365 // mat4 mat5 --> m20 m21 m22 m23 |
366 // mat6 mat7 --> m30 m31 m32 m33 | 366 // mat6 mat7 --> m30 m31 m32 m33 |
367 LD_DP8(mat, 2, mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7); | 367 LD_DP8(mat, 2, mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7); |
368 | 368 |
369 // Right half | 369 // Right half |
370 rev3 = SLDI_D(mat3, mat3, 8); // m13 m12 | 370 rev3 = SLDI_D(mat3, mat3, 8); // m13 m12 |
371 rev5 = SLDI_D(mat5, mat5, 8); // m23 m22 | 371 rev5 = SLDI_D(mat5, mat5, 8); // m23 m22 |
372 rev7 = SLDI_D(mat7, mat7, 8); // m33 m32 | 372 rev7 = SLDI_D(mat7, mat7, 8); // m33 m32 |
373 | 373 |
374 // 2*2 Determinants | 374 // 2*2 Determinants |
375 // for A00 & A01 | 375 // for A00 & A01 |
376 tmp0 = mat5 * rev7; | 376 tmp0 = mat5 * rev7; |
377 tmp1 = mat3 * rev7; | 377 tmp1 = mat3 * rev7; |
378 tmp2 = mat3 * rev5; | 378 tmp2 = mat3 * rev5; |
379 // for A10 & A11 | 379 // for A10 & A11 |
380 tmp3 = mat1 * rev7; | 380 tmp3 = mat1 * rev7; |
381 tmp4 = mat1 * rev5; | 381 tmp4 = mat1 * rev5; |
382 // for A20 & A21 | 382 // for A20 & A21 |
383 tmp5 = mat1 * rev3; | 383 tmp5 = mat1 * rev3; |
384 // for A30 & A31 | 384 // for A30 & A31 |
385 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0); | 385 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp1, (v2i64)tmp0); |
386 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp1, (v2i64) tmp0); | 386 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp1, (v2i64)tmp0); |
387 det0 = tmp6 - tmp7; | 387 det0 = tmp6 - tmp7; |
388 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp3, (v2i64) tmp2); | 388 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp3, (v2i64)tmp2); |
389 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp3, (v2i64) tmp2); | 389 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp3, (v2i64)tmp2); |
390 det1 = tmp6 - tmp7; | 390 det1 = tmp6 - tmp7; |
391 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp5, (v2i64) tmp4); | 391 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp5, (v2i64)tmp4); |
392 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp5, (v2i64) tmp4); | 392 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp5, (v2i64)tmp4); |
393 det2 = tmp6 - tmp7; | 393 det2 = tmp6 - tmp7; |
394 | 394 |
395 // Co-factors | 395 // Co-factors |
396 tmp0 = mat0 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 396 tmp0 = mat0 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
397 tmp1 = mat0 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 397 tmp1 = mat0 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
398 tmp2 = mat0 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 398 tmp2 = mat0 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
399 tmp3 = mat2 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 399 tmp3 = mat2 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
400 tmp4 = mat2 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 400 tmp4 = mat2 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
401 tmp5 = mat2 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 401 tmp5 = mat2 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
402 tmp6 = mat4 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 402 tmp6 = mat4 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
403 tmp7 = mat4 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 403 tmp7 = mat4 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
404 tmp8 = mat4 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 404 tmp8 = mat4 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
405 tmp9 = mat6 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 405 tmp9 = mat6 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
406 tmp10 = mat6 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 406 tmp10 = mat6 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
407 tmp11 = mat6 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 407 tmp11 = mat6 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
408 | 408 |
409 tmp0 -= tmp7; | 409 tmp0 -= tmp7; |
410 tmp1 -= tmp4; | 410 tmp1 -= tmp4; |
411 tmp2 -= tmp5; | 411 tmp2 -= tmp5; |
412 tmp3 -= tmp6; | 412 tmp3 -= tmp6; |
413 tmp0 += tmp10; | 413 tmp0 += tmp10; |
414 tmp1 += tmp11; | 414 tmp1 += tmp11; |
415 tmp2 += tmp8; | 415 tmp2 += tmp8; |
416 tmp3 += tmp9; | 416 tmp3 += tmp9; |
417 | 417 |
418 // Multiply with 1/det | 418 // Multiply with 1/det |
419 tmp0 *= rdet; | 419 tmp0 *= rdet; |
420 tmp1 *= rdet; | 420 tmp1 *= rdet; |
421 tmp2 *= rdet; | 421 tmp2 *= rdet; |
422 tmp3 *= rdet; | 422 tmp3 *= rdet; |
423 | 423 |
424 // Inverse: Upper half | 424 // Inverse: Upper half |
425 result[0][0] = tmp3[1]; | 425 result[0][0] = tmp3[1]; |
426 result[0][1] = -tmp0[1]; | 426 result[0][1] = -tmp0[1]; |
427 result[0][2] = tmp1[1]; | 427 result[0][2] = tmp1[1]; |
428 result[0][3] = -tmp2[1]; | 428 result[0][3] = -tmp2[1]; |
429 result[1][0] = -tmp3[0]; | 429 result[1][0] = -tmp3[0]; |
430 result[1][1] = tmp0[0]; | 430 result[1][1] = tmp0[0]; |
431 result[1][2] = -tmp1[0]; | 431 result[1][2] = -tmp1[0]; |
432 result[1][3] = tmp2[0]; | 432 result[1][3] = tmp2[0]; |
433 // Left half | 433 // Left half |
434 rev2 = SLDI_D(mat2, mat2, 8); // m13 m12 | 434 rev2 = SLDI_D(mat2, mat2, 8); // m13 m12 |
435 rev4 = SLDI_D(mat4, mat4, 8); // m23 m22 | 435 rev4 = SLDI_D(mat4, mat4, 8); // m23 m22 |
436 rev6 = SLDI_D(mat6, mat6, 8); // m33 m32 | 436 rev6 = SLDI_D(mat6, mat6, 8); // m33 m32 |
437 | 437 |
438 // 2*2 Determinants | 438 // 2*2 Determinants |
439 // for A00 & A01 | 439 // for A00 & A01 |
440 tmp0 = mat4 * rev6; | 440 tmp0 = mat4 * rev6; |
441 tmp1 = mat2 * rev6; | 441 tmp1 = mat2 * rev6; |
442 tmp2 = mat2 * rev4; | 442 tmp2 = mat2 * rev4; |
443 // for A10 & A11 | 443 // for A10 & A11 |
444 tmp3 = mat0 * rev6; | 444 tmp3 = mat0 * rev6; |
445 tmp4 = mat0 * rev4; | 445 tmp4 = mat0 * rev4; |
446 // for A20 & A21 | 446 // for A20 & A21 |
447 tmp5 = mat0 * rev2; | 447 tmp5 = mat0 * rev2; |
448 // for A30 & A31 | 448 // for A30 & A31 |
449 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0); | 449 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp1, (v2i64)tmp0); |
450 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp1, (v2i64) tmp0); | 450 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp1, (v2i64)tmp0); |
451 det0 = tmp6 - tmp7; | 451 det0 = tmp6 - tmp7; |
452 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp3, (v2i64) tmp2); | 452 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp3, (v2i64)tmp2); |
453 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp3, (v2i64) tmp2); | 453 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp3, (v2i64)tmp2); |
454 det1 = tmp6 - tmp7; | 454 det1 = tmp6 - tmp7; |
455 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp5, (v2i64) tmp4); | 455 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp5, (v2i64)tmp4); |
456 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp5, (v2i64) tmp4); | 456 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp5, (v2i64)tmp4); |
457 det2 = tmp6 - tmp7; | 457 det2 = tmp6 - tmp7; |
458 | 458 |
459 // Co-factors | 459 // Co-factors |
460 tmp0 = mat3 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 460 tmp0 = mat3 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
461 tmp1 = mat1 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 461 tmp1 = mat1 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
462 tmp2 = mat1 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 462 tmp2 = mat1 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
463 tmp3 = mat1 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 463 tmp3 = mat1 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
464 tmp4 = mat3 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 464 tmp4 = mat3 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
465 tmp5 = mat3 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 465 tmp5 = mat3 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
466 tmp6 = mat5 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 466 tmp6 = mat5 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
467 tmp7 = mat5 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 467 tmp7 = mat5 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
468 tmp8 = mat5 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 468 tmp8 = mat5 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
469 tmp9 = mat7 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 469 tmp9 = mat7 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
470 tmp10 = mat7 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 470 tmp10 = mat7 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
471 tmp11 = mat7 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 471 tmp11 = mat7 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
472 tmp0 -= tmp6; | 472 tmp0 -= tmp6; |
473 tmp1 -= tmp4; | 473 tmp1 -= tmp4; |
474 tmp2 -= tmp7; | 474 tmp2 -= tmp7; |
475 tmp3 -= tmp5; | 475 tmp3 -= tmp5; |
476 tmp0 += tmp9; | 476 tmp0 += tmp9; |
477 tmp1 += tmp11; | 477 tmp1 += tmp11; |
478 tmp2 += tmp10; | 478 tmp2 += tmp10; |
479 tmp3 += tmp8; | 479 tmp3 += tmp8; |
480 | 480 |
481 // Multiply with 1/det | 481 // Multiply with 1/det |
(...skipping 850 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1332 | 1332 |
1333 "st1 {v0.2d - v3.2d}, [x9], 64 \t\n" | 1333 "st1 {v0.2d - v3.2d}, [x9], 64 \t\n" |
1334 "st1 {v4.2d - v7.2d}, [x9] \t\n" | 1334 "st1 {v4.2d - v7.2d}, [x9] \t\n" |
1335 : [leftMatrix] "+r"(leftMatrix), [rightMatrix] "+r"(rightMatrix) | 1335 : [leftMatrix] "+r"(leftMatrix), [rightMatrix] "+r"(rightMatrix) |
1336 : | 1336 : |
1337 : "memory", "x9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | 1337 : "memory", "x9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", |
1338 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "v0", "v1", | 1338 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "v0", "v1", |
1339 "v2", "v3", "v4", "v5", "v6", "v7"); | 1339 "v2", "v3", "v4", "v5", "v6", "v7"); |
1340 #elif HAVE(MIPS_MSA_INTRINSICS) | 1340 #elif HAVE(MIPS_MSA_INTRINSICS) |
1341 v2f64 vleftM0, vleftM1, vleftM2, vleftM3, vleftM4, vleftM5, vleftM6, vleftM7; | 1341 v2f64 vleftM0, vleftM1, vleftM2, vleftM3, vleftM4, vleftM5, vleftM6, vleftM7; |
1342 v2f64 vRightM0, vRightM1, vRightM2, vRightM3, vRightM4, vRightM5, vRightM6, vR
ightM7; | 1342 v2f64 vRightM0, vRightM1, vRightM2, vRightM3, vRightM4, vRightM5, vRightM6, |
| 1343 vRightM7; |
1343 v2f64 vTmpM0, vTmpM1, vTmpM2, vTmpM3; | 1344 v2f64 vTmpM0, vTmpM1, vTmpM2, vTmpM3; |
1344 | 1345 |
1345 vRightM0 = LD_DP(&(m_matrix[0][0])); | 1346 vRightM0 = LD_DP(&(m_matrix[0][0])); |
1346 vRightM1 = LD_DP(&(m_matrix[0][2])); | 1347 vRightM1 = LD_DP(&(m_matrix[0][2])); |
1347 vRightM2 = LD_DP(&(m_matrix[1][0])); | 1348 vRightM2 = LD_DP(&(m_matrix[1][0])); |
1348 vRightM3 = LD_DP(&(m_matrix[1][2])); | 1349 vRightM3 = LD_DP(&(m_matrix[1][2])); |
1349 vRightM4 = LD_DP(&(m_matrix[2][0])); | 1350 vRightM4 = LD_DP(&(m_matrix[2][0])); |
1350 vRightM5 = LD_DP(&(m_matrix[2][2])); | 1351 vRightM5 = LD_DP(&(m_matrix[2][2])); |
1351 vRightM6 = LD_DP(&(m_matrix[3][0])); | 1352 vRightM6 = LD_DP(&(m_matrix[3][0])); |
1352 vRightM7 = LD_DP(&(m_matrix[3][2])); | 1353 vRightM7 = LD_DP(&(m_matrix[3][2])); |
(...skipping 567 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1920 decomposition.translateZ, decomposition.scaleX, decomposition.scaleY, | 1921 decomposition.translateZ, decomposition.scaleX, decomposition.scaleY, |
1921 decomposition.scaleZ, decomposition.skewXY, decomposition.skewXZ, | 1922 decomposition.scaleZ, decomposition.skewXY, decomposition.skewXZ, |
1922 decomposition.skewYZ, decomposition.quaternionX, | 1923 decomposition.skewYZ, decomposition.quaternionX, |
1923 decomposition.quaternionY, decomposition.quaternionZ, | 1924 decomposition.quaternionY, decomposition.quaternionZ, |
1924 decomposition.quaternionW, decomposition.perspectiveX, | 1925 decomposition.quaternionW, decomposition.perspectiveX, |
1925 decomposition.perspectiveY, decomposition.perspectiveZ, | 1926 decomposition.perspectiveY, decomposition.perspectiveZ, |
1926 decomposition.perspectiveW); | 1927 decomposition.perspectiveW); |
1927 } | 1928 } |
1928 | 1929 |
1929 } // namespace blink | 1930 } // namespace blink |
OLD | NEW |