| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2005, 2006 Apple Computer, Inc. All rights reserved. | 2 * Copyright (C) 2005, 2006 Apple Computer, Inc. All rights reserved. |
| 3 * Copyright (C) 2009 Torch Mobile, Inc. | 3 * Copyright (C) 2009 Torch Mobile, Inc. |
| 4 * Copyright (C) 2013 Google Inc. All rights reserved. | 4 * Copyright (C) 2013 Google Inc. All rights reserved. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 335 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 346 "fmul v25.2d, v25.2d, v30.d[0] \n\t" | 346 "fmul v25.2d, v25.2d, v30.d[0] \n\t" |
| 347 "fmul v26.2d, v26.2d, v30.d[0] \n\t" | 347 "fmul v26.2d, v26.2d, v30.d[0] \n\t" |
| 348 "fmul v27.2d, v27.2d, v30.d[0] \n\t" | 348 "fmul v27.2d, v27.2d, v30.d[0] \n\t" |
| 349 "st1 {v24.2d - v27.2d}, [%[pr]] \n\t" | 349 "st1 {v24.2d - v27.2d}, [%[pr]] \n\t" |
| 350 : [mat] "+r"(mat), [pr] "+r"(pr) | 350 : [mat] "+r"(mat), [pr] "+r"(pr) |
| 351 : [rdet] "r"(rdet) | 351 : [rdet] "r"(rdet) |
| 352 : "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", | 352 : "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", |
| 353 "v18", "v19", "v20", "v21", "v22", "v23", "24", "25", "v26", "v27", | 353 "v18", "v19", "v20", "v21", "v22", "v23", "24", "25", "v26", "v27", |
| 354 "v28", "v29", "v30"); | 354 "v28", "v29", "v30"); |
| 355 #elif HAVE(MIPS_MSA_INTRINSICS) | 355 #elif HAVE(MIPS_MSA_INTRINSICS) |
| 356 const double rDet = 1/det; | 356 const double rDet = 1 / det; |
| 357 const double* mat = &(matrix[0][0]); | 357 const double* mat = &(matrix[0][0]); |
| 358 v2f64 mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7; | 358 v2f64 mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7; |
| 359 v2f64 rev2, rev3, rev4, rev5, rev6, rev7; | 359 v2f64 rev2, rev3, rev4, rev5, rev6, rev7; |
| 360 v2f64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; | 360 v2f64 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; |
| 361 v2f64 det0, det1, det2, tmp8, tmp9, tmp10, tmp11; | 361 v2f64 det0, det1, det2, tmp8, tmp9, tmp10, tmp11; |
| 362 const v2f64 rdet = COPY_DOUBLE_TO_VECTOR(rDet); | 362 const v2f64 rdet = COPY_DOUBLE_TO_VECTOR(rDet); |
| 363 // mat0 mat1 --> m00 m01 m02 m03 | 363 // mat0 mat1 --> m00 m01 m02 m03 |
| 364 // mat2 mat3 --> m10 m11 m12 m13 | 364 // mat2 mat3 --> m10 m11 m12 m13 |
| 365 // mat4 mat5 --> m20 m21 m22 m23 | 365 // mat4 mat5 --> m20 m21 m22 m23 |
| 366 // mat6 mat7 --> m30 m31 m32 m33 | 366 // mat6 mat7 --> m30 m31 m32 m33 |
| 367 LD_DP8(mat, 2, mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7); | 367 LD_DP8(mat, 2, mat0, mat1, mat2, mat3, mat4, mat5, mat6, mat7); |
| 368 | 368 |
| 369 // Right half | 369 // Right half |
| 370 rev3 = SLDI_D(mat3, mat3, 8); // m13 m12 | 370 rev3 = SLDI_D(mat3, mat3, 8); // m13 m12 |
| 371 rev5 = SLDI_D(mat5, mat5, 8); // m23 m22 | 371 rev5 = SLDI_D(mat5, mat5, 8); // m23 m22 |
| 372 rev7 = SLDI_D(mat7, mat7, 8); // m33 m32 | 372 rev7 = SLDI_D(mat7, mat7, 8); // m33 m32 |
| 373 | 373 |
| 374 // 2*2 Determinants | 374 // 2*2 Determinants |
| 375 // for A00 & A01 | 375 // for A00 & A01 |
| 376 tmp0 = mat5 * rev7; | 376 tmp0 = mat5 * rev7; |
| 377 tmp1 = mat3 * rev7; | 377 tmp1 = mat3 * rev7; |
| 378 tmp2 = mat3 * rev5; | 378 tmp2 = mat3 * rev5; |
| 379 // for A10 & A11 | 379 // for A10 & A11 |
| 380 tmp3 = mat1 * rev7; | 380 tmp3 = mat1 * rev7; |
| 381 tmp4 = mat1 * rev5; | 381 tmp4 = mat1 * rev5; |
| 382 // for A20 & A21 | 382 // for A20 & A21 |
| 383 tmp5 = mat1 * rev3; | 383 tmp5 = mat1 * rev3; |
| 384 // for A30 & A31 | 384 // for A30 & A31 |
| 385 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0); | 385 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp1, (v2i64)tmp0); |
| 386 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp1, (v2i64) tmp0); | 386 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp1, (v2i64)tmp0); |
| 387 det0 = tmp6 - tmp7; | 387 det0 = tmp6 - tmp7; |
| 388 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp3, (v2i64) tmp2); | 388 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp3, (v2i64)tmp2); |
| 389 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp3, (v2i64) tmp2); | 389 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp3, (v2i64)tmp2); |
| 390 det1 = tmp6 - tmp7; | 390 det1 = tmp6 - tmp7; |
| 391 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp5, (v2i64) tmp4); | 391 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp5, (v2i64)tmp4); |
| 392 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp5, (v2i64) tmp4); | 392 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp5, (v2i64)tmp4); |
| 393 det2 = tmp6 - tmp7; | 393 det2 = tmp6 - tmp7; |
| 394 | 394 |
| 395 // Co-factors | 395 // Co-factors |
| 396 tmp0 = mat0 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 396 tmp0 = mat0 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
| 397 tmp1 = mat0 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 397 tmp1 = mat0 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
| 398 tmp2 = mat0 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 398 tmp2 = mat0 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
| 399 tmp3 = mat2 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 399 tmp3 = mat2 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
| 400 tmp4 = mat2 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 400 tmp4 = mat2 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
| 401 tmp5 = mat2 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 401 tmp5 = mat2 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
| 402 tmp6 = mat4 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 402 tmp6 = mat4 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
| 403 tmp7 = mat4 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 403 tmp7 = mat4 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
| 404 tmp8 = mat4 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 404 tmp8 = mat4 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
| 405 tmp9 = mat6 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 405 tmp9 = mat6 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
| 406 tmp10 = mat6 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 406 tmp10 = mat6 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
| 407 tmp11 = mat6 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 407 tmp11 = mat6 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
| 408 | 408 |
| 409 tmp0 -= tmp7; | 409 tmp0 -= tmp7; |
| 410 tmp1 -= tmp4; | 410 tmp1 -= tmp4; |
| 411 tmp2 -= tmp5; | 411 tmp2 -= tmp5; |
| 412 tmp3 -= tmp6; | 412 tmp3 -= tmp6; |
| 413 tmp0 += tmp10; | 413 tmp0 += tmp10; |
| 414 tmp1 += tmp11; | 414 tmp1 += tmp11; |
| 415 tmp2 += tmp8; | 415 tmp2 += tmp8; |
| 416 tmp3 += tmp9; | 416 tmp3 += tmp9; |
| 417 | 417 |
| 418 // Multiply with 1/det | 418 // Multiply with 1/det |
| 419 tmp0 *= rdet; | 419 tmp0 *= rdet; |
| 420 tmp1 *= rdet; | 420 tmp1 *= rdet; |
| 421 tmp2 *= rdet; | 421 tmp2 *= rdet; |
| 422 tmp3 *= rdet; | 422 tmp3 *= rdet; |
| 423 | 423 |
| 424 // Inverse: Upper half | 424 // Inverse: Upper half |
| 425 result[0][0] = tmp3[1]; | 425 result[0][0] = tmp3[1]; |
| 426 result[0][1] = -tmp0[1]; | 426 result[0][1] = -tmp0[1]; |
| 427 result[0][2] = tmp1[1]; | 427 result[0][2] = tmp1[1]; |
| 428 result[0][3] = -tmp2[1]; | 428 result[0][3] = -tmp2[1]; |
| 429 result[1][0] = -tmp3[0]; | 429 result[1][0] = -tmp3[0]; |
| 430 result[1][1] = tmp0[0]; | 430 result[1][1] = tmp0[0]; |
| 431 result[1][2] = -tmp1[0]; | 431 result[1][2] = -tmp1[0]; |
| 432 result[1][3] = tmp2[0]; | 432 result[1][3] = tmp2[0]; |
| 433 // Left half | 433 // Left half |
| 434 rev2 = SLDI_D(mat2, mat2, 8); // m13 m12 | 434 rev2 = SLDI_D(mat2, mat2, 8); // m13 m12 |
| 435 rev4 = SLDI_D(mat4, mat4, 8); // m23 m22 | 435 rev4 = SLDI_D(mat4, mat4, 8); // m23 m22 |
| 436 rev6 = SLDI_D(mat6, mat6, 8); // m33 m32 | 436 rev6 = SLDI_D(mat6, mat6, 8); // m33 m32 |
| 437 | 437 |
| 438 // 2*2 Determinants | 438 // 2*2 Determinants |
| 439 // for A00 & A01 | 439 // for A00 & A01 |
| 440 tmp0 = mat4 * rev6; | 440 tmp0 = mat4 * rev6; |
| 441 tmp1 = mat2 * rev6; | 441 tmp1 = mat2 * rev6; |
| 442 tmp2 = mat2 * rev4; | 442 tmp2 = mat2 * rev4; |
| 443 // for A10 & A11 | 443 // for A10 & A11 |
| 444 tmp3 = mat0 * rev6; | 444 tmp3 = mat0 * rev6; |
| 445 tmp4 = mat0 * rev4; | 445 tmp4 = mat0 * rev4; |
| 446 // for A20 & A21 | 446 // for A20 & A21 |
| 447 tmp5 = mat0 * rev2; | 447 tmp5 = mat0 * rev2; |
| 448 // for A30 & A31 | 448 // for A30 & A31 |
| 449 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp1, (v2i64) tmp0); | 449 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp1, (v2i64)tmp0); |
| 450 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp1, (v2i64) tmp0); | 450 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp1, (v2i64)tmp0); |
| 451 det0 = tmp6 - tmp7; | 451 det0 = tmp6 - tmp7; |
| 452 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp3, (v2i64) tmp2); | 452 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp3, (v2i64)tmp2); |
| 453 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp3, (v2i64) tmp2); | 453 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp3, (v2i64)tmp2); |
| 454 det1 = tmp6 - tmp7; | 454 det1 = tmp6 - tmp7; |
| 455 tmp6 = (v2f64) __msa_ilvr_d((v2i64) tmp5, (v2i64) tmp4); | 455 tmp6 = (v2f64)__msa_ilvr_d((v2i64)tmp5, (v2i64)tmp4); |
| 456 tmp7 = (v2f64) __msa_ilvl_d((v2i64) tmp5, (v2i64) tmp4); | 456 tmp7 = (v2f64)__msa_ilvl_d((v2i64)tmp5, (v2i64)tmp4); |
| 457 det2 = tmp6 - tmp7; | 457 det2 = tmp6 - tmp7; |
| 458 | 458 |
| 459 // Co-factors | 459 // Co-factors |
| 460 tmp0 = mat3 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 460 tmp0 = mat3 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
| 461 tmp1 = mat1 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 461 tmp1 = mat1 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
| 462 tmp2 = mat1 * (v2f64) __msa_splati_d((v2i64) det0, 0); | 462 tmp2 = mat1 * (v2f64)__msa_splati_d((v2i64)det0, 0); |
| 463 tmp3 = mat1 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 463 tmp3 = mat1 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
| 464 tmp4 = mat3 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 464 tmp4 = mat3 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
| 465 tmp5 = mat3 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 465 tmp5 = mat3 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
| 466 tmp6 = mat5 * (v2f64) __msa_splati_d((v2i64) det0, 1); | 466 tmp6 = mat5 * (v2f64)__msa_splati_d((v2i64)det0, 1); |
| 467 tmp7 = mat5 * (v2f64) __msa_splati_d((v2i64) det1, 1); | 467 tmp7 = mat5 * (v2f64)__msa_splati_d((v2i64)det1, 1); |
| 468 tmp8 = mat5 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 468 tmp8 = mat5 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
| 469 tmp9 = mat7 * (v2f64) __msa_splati_d((v2i64) det1, 0); | 469 tmp9 = mat7 * (v2f64)__msa_splati_d((v2i64)det1, 0); |
| 470 tmp10 = mat7 * (v2f64) __msa_splati_d((v2i64) det2, 0); | 470 tmp10 = mat7 * (v2f64)__msa_splati_d((v2i64)det2, 0); |
| 471 tmp11 = mat7 * (v2f64) __msa_splati_d((v2i64) det2, 1); | 471 tmp11 = mat7 * (v2f64)__msa_splati_d((v2i64)det2, 1); |
| 472 tmp0 -= tmp6; | 472 tmp0 -= tmp6; |
| 473 tmp1 -= tmp4; | 473 tmp1 -= tmp4; |
| 474 tmp2 -= tmp7; | 474 tmp2 -= tmp7; |
| 475 tmp3 -= tmp5; | 475 tmp3 -= tmp5; |
| 476 tmp0 += tmp9; | 476 tmp0 += tmp9; |
| 477 tmp1 += tmp11; | 477 tmp1 += tmp11; |
| 478 tmp2 += tmp10; | 478 tmp2 += tmp10; |
| 479 tmp3 += tmp8; | 479 tmp3 += tmp8; |
| 480 | 480 |
| 481 // Multiply with 1/det | 481 // Multiply with 1/det |
| (...skipping 850 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1332 | 1332 |
| 1333 "st1 {v0.2d - v3.2d}, [x9], 64 \t\n" | 1333 "st1 {v0.2d - v3.2d}, [x9], 64 \t\n" |
| 1334 "st1 {v4.2d - v7.2d}, [x9] \t\n" | 1334 "st1 {v4.2d - v7.2d}, [x9] \t\n" |
| 1335 : [leftMatrix] "+r"(leftMatrix), [rightMatrix] "+r"(rightMatrix) | 1335 : [leftMatrix] "+r"(leftMatrix), [rightMatrix] "+r"(rightMatrix) |
| 1336 : | 1336 : |
| 1337 : "memory", "x9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", | 1337 : "memory", "x9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", |
| 1338 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "v0", "v1", | 1338 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "v0", "v1", |
| 1339 "v2", "v3", "v4", "v5", "v6", "v7"); | 1339 "v2", "v3", "v4", "v5", "v6", "v7"); |
| 1340 #elif HAVE(MIPS_MSA_INTRINSICS) | 1340 #elif HAVE(MIPS_MSA_INTRINSICS) |
| 1341 v2f64 vleftM0, vleftM1, vleftM2, vleftM3, vleftM4, vleftM5, vleftM6, vleftM7; | 1341 v2f64 vleftM0, vleftM1, vleftM2, vleftM3, vleftM4, vleftM5, vleftM6, vleftM7; |
| 1342 v2f64 vRightM0, vRightM1, vRightM2, vRightM3, vRightM4, vRightM5, vRightM6, vR
ightM7; | 1342 v2f64 vRightM0, vRightM1, vRightM2, vRightM3, vRightM4, vRightM5, vRightM6, |
| 1343 vRightM7; |
| 1343 v2f64 vTmpM0, vTmpM1, vTmpM2, vTmpM3; | 1344 v2f64 vTmpM0, vTmpM1, vTmpM2, vTmpM3; |
| 1344 | 1345 |
| 1345 vRightM0 = LD_DP(&(m_matrix[0][0])); | 1346 vRightM0 = LD_DP(&(m_matrix[0][0])); |
| 1346 vRightM1 = LD_DP(&(m_matrix[0][2])); | 1347 vRightM1 = LD_DP(&(m_matrix[0][2])); |
| 1347 vRightM2 = LD_DP(&(m_matrix[1][0])); | 1348 vRightM2 = LD_DP(&(m_matrix[1][0])); |
| 1348 vRightM3 = LD_DP(&(m_matrix[1][2])); | 1349 vRightM3 = LD_DP(&(m_matrix[1][2])); |
| 1349 vRightM4 = LD_DP(&(m_matrix[2][0])); | 1350 vRightM4 = LD_DP(&(m_matrix[2][0])); |
| 1350 vRightM5 = LD_DP(&(m_matrix[2][2])); | 1351 vRightM5 = LD_DP(&(m_matrix[2][2])); |
| 1351 vRightM6 = LD_DP(&(m_matrix[3][0])); | 1352 vRightM6 = LD_DP(&(m_matrix[3][0])); |
| 1352 vRightM7 = LD_DP(&(m_matrix[3][2])); | 1353 vRightM7 = LD_DP(&(m_matrix[3][2])); |
| (...skipping 567 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1920 decomposition.translateZ, decomposition.scaleX, decomposition.scaleY, | 1921 decomposition.translateZ, decomposition.scaleX, decomposition.scaleY, |
| 1921 decomposition.scaleZ, decomposition.skewXY, decomposition.skewXZ, | 1922 decomposition.scaleZ, decomposition.skewXY, decomposition.skewXZ, |
| 1922 decomposition.skewYZ, decomposition.quaternionX, | 1923 decomposition.skewYZ, decomposition.quaternionX, |
| 1923 decomposition.quaternionY, decomposition.quaternionZ, | 1924 decomposition.quaternionY, decomposition.quaternionZ, |
| 1924 decomposition.quaternionW, decomposition.perspectiveX, | 1925 decomposition.quaternionW, decomposition.perspectiveX, |
| 1925 decomposition.perspectiveY, decomposition.perspectiveZ, | 1926 decomposition.perspectiveY, decomposition.perspectiveZ, |
| 1926 decomposition.perspectiveW); | 1927 decomposition.perspectiveW); |
| 1927 } | 1928 } |
| 1928 | 1929 |
| 1929 } // namespace blink | 1930 } // namespace blink |
| OLD | NEW |