Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp

Issue 2539803003: Add back ARMv7 NEON optimization for TransformationMatrix::multiply
Patch Set: Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/platform/transforms/TransformationMatrix.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2005, 2006 Apple Computer, Inc. All rights reserved. 2 * Copyright (C) 2005, 2006 Apple Computer, Inc. All rights reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. 3 * Copyright (C) 2009 Torch Mobile, Inc.
4 * Copyright (C) 2013 Google Inc. All rights reserved. 4 * Copyright (C) 2013 Google Inc. All rights reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
(...skipping 1329 matching lines...) Expand 10 before | Expand all | Expand 10 after
1340 "fmla v6.2d, v30.2d, v23.d[1] \t\n" 1340 "fmla v6.2d, v30.2d, v23.d[1] \t\n"
1341 "fmla v7.2d, v31.2d, v23.d[1] \t\n" 1341 "fmla v7.2d, v31.2d, v23.d[1] \t\n"
1342 1342
1343 "st1 {v0.2d - v3.2d}, [x9], 64 \t\n" 1343 "st1 {v0.2d - v3.2d}, [x9], 64 \t\n"
1344 "st1 {v4.2d - v7.2d}, [x9] \t\n" 1344 "st1 {v4.2d - v7.2d}, [x9] \t\n"
1345 : [leftMatrix] "+r"(leftMatrix), [rightMatrix] "+r"(rightMatrix) 1345 : [leftMatrix] "+r"(leftMatrix), [rightMatrix] "+r"(rightMatrix)
1346 : 1346 :
1347 : "memory", "x9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 1347 : "memory", "x9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
1348 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "v0", "v1", 1348 "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "v0", "v1",
1349 "v2", "v3", "v4", "v5", "v6", "v7"); 1349 "v2", "v3", "v4", "v5", "v6", "v7");
1350 #elif CPU(ARM) && CPU(ARM_NEON)
1351 double* leftMatrix = &(m_matrix[0][0]);
1352 const double* rightMatrix = &(mat.m_matrix[0][0]);
1353 asm volatile (// First row of leftMatrix.
1354 "mov r3, %[leftMatrix]\n\t"
1355 "vld1.64 { d16-d19 }, [%[leftMatrix], :128]!\n\t"
1356 "vld1.64 { d0-d3}, [%[rightMatrix], :128]!\n\t"
1357 "vmul.f64 d4, d0, d16\n\t"
1358 "vld1.64 { d20-d23 }, [%[leftMatrix], :128]!\n\t"
1359 "vmla.f64 d4, d1, d20\n\t"
1360 "vld1.64 { d24-d27 }, [%[leftMatrix], :128]!\n\t"
1361 "vmla.f64 d4, d2, d24\n\t"
1362 "vld1.64 { d28-d31 }, [%[leftMatrix], :128]!\n\t"
1363 "vmla.f64 d4, d3, d28\n\t"
1364
1365 "vmul.f64 d5, d0, d17\n\t"
1366 "vmla.f64 d5, d1, d21\n\t"
1367 "vmla.f64 d5, d2, d25\n\t"
1368 "vmla.f64 d5, d3, d29\n\t"
1369
1370 "vmul.f64 d6, d0, d18\n\t"
1371 "vmla.f64 d6, d1, d22\n\t"
1372 "vmla.f64 d6, d2, d26\n\t"
1373 "vmla.f64 d6, d3, d30\n\t"
1374
1375 "vmul.f64 d7, d0, d19\n\t"
1376 "vmla.f64 d7, d1, d23\n\t"
1377 "vmla.f64 d7, d2, d27\n\t"
1378 "vmla.f64 d7, d3, d31\n\t"
1379 "vld1.64 { d0-d3}, [%[rightMatrix], :128]!\n\t"
1380 "vst1.64 { d4-d7 }, [r3, :128]!\n\t"
1381
1382 // Second row of leftMatrix.
1383 "vmul.f64 d4, d0, d16\n\t"
1384 "vmla.f64 d4, d1, d20\n\t"
1385 "vmla.f64 d4, d2, d24\n\t"
1386 "vmla.f64 d4, d3, d28\n\t"
1387
1388 "vmul.f64 d5, d0, d17\n\t"
1389 "vmla.f64 d5, d1, d21\n\t"
1390 "vmla.f64 d5, d2, d25\n\t"
1391 "vmla.f64 d5, d3, d29\n\t"
1392
1393 "vmul.f64 d6, d0, d18\n\t"
1394 "vmla.f64 d6, d1, d22\n\t"
1395 "vmla.f64 d6, d2, d26\n\t"
1396 "vmla.f64 d6, d3, d30\n\t"
1397
1398 "vmul.f64 d7, d0, d19\n\t"
1399 "vmla.f64 d7, d1, d23\n\t"
1400 "vmla.f64 d7, d2, d27\n\t"
1401 "vmla.f64 d7, d3, d31\n\t"
1402 "vld1.64 { d0-d3}, [%[rightMatrix], :128]!\n\t"
1403 "vst1.64 { d4-d7 }, [r3, :128]!\n\t"
1404
1405 // Third row of leftMatrix.
1406 "vmul.f64 d4, d0, d16\n\t"
1407 "vmla.f64 d4, d1, d20\n\t"
1408 "vmla.f64 d4, d2, d24\n\t"
1409 "vmla.f64 d4, d3, d28\n\t"
1410
1411 "vmul.f64 d5, d0, d17\n\t"
1412 "vmla.f64 d5, d1, d21\n\t"
1413 "vmla.f64 d5, d2, d25\n\t"
1414 "vmla.f64 d5, d3, d29\n\t"
1415
1416 "vmul.f64 d6, d0, d18\n\t"
1417 "vmla.f64 d6, d1, d22\n\t"
1418 "vmla.f64 d6, d2, d26\n\t"
1419 "vmla.f64 d6, d3, d30\n\t"
1420
1421 "vmul.f64 d7, d0, d19\n\t"
1422 "vmla.f64 d7, d1, d23\n\t"
1423 "vmla.f64 d7, d2, d27\n\t"
1424 "vmla.f64 d7, d3, d31\n\t"
1425 "vld1.64 { d0-d3}, [%[rightMatrix], :128]\n\t"
1426 "vst1.64 { d4-d7 }, [r3, :128]!\n\t"
1427
1428 // Fourth and last row of leftMatrix.
1429 "vmul.f64 d4, d0, d16\n\t"
1430 "vmla.f64 d4, d1, d20\n\t"
1431 "vmla.f64 d4, d2, d24\n\t"
1432 "vmla.f64 d4, d3, d28\n\t"
1433
1434 "vmul.f64 d5, d0, d17\n\t"
1435 "vmla.f64 d5, d1, d21\n\t"
1436 "vmla.f64 d5, d2, d25\n\t"
1437 "vmla.f64 d5, d3, d29\n\t"
1438
1439 "vmul.f64 d6, d0, d18\n\t"
1440 "vmla.f64 d6, d1, d22\n\t"
1441 "vmla.f64 d6, d2, d26\n\t"
1442 "vmla.f64 d6, d3, d30\n\t"
1443
1444 "vmul.f64 d7, d0, d19\n\t"
1445 "vmla.f64 d7, d1, d23\n\t"
1446 "vmla.f64 d7, d2, d27\n\t"
1447 "vmla.f64 d7, d3, d31\n\t"
1448 "vst1.64 { d4-d7 }, [r3, :128]\n\t"
1449 : [leftMatrix]"+r"(leftMatrix), [rightMatrix]"+r"(rightMatrix)
1450 :
1451 : "memory", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d 28", "d29", "d30", "d31");
1350 #elif HAVE(MIPS_MSA_INTRINSICS) 1452 #elif HAVE(MIPS_MSA_INTRINSICS)
1351 v2f64 vleftM0, vleftM1, vleftM2, vleftM3, vleftM4, vleftM5, vleftM6, vleftM7; 1453 v2f64 vleftM0, vleftM1, vleftM2, vleftM3, vleftM4, vleftM5, vleftM6, vleftM7;
1352 v2f64 vRightM0, vRightM1, vRightM2, vRightM3, vRightM4, vRightM5, vRightM6, 1454 v2f64 vRightM0, vRightM1, vRightM2, vRightM3, vRightM4, vRightM5, vRightM6,
1353 vRightM7; 1455 vRightM7;
1354 v2f64 vTmpM0, vTmpM1, vTmpM2, vTmpM3; 1456 v2f64 vTmpM0, vTmpM1, vTmpM2, vTmpM3;
1355 1457
1356 vRightM0 = LD_DP(&(m_matrix[0][0])); 1458 vRightM0 = LD_DP(&(m_matrix[0][0]));
1357 vRightM1 = LD_DP(&(m_matrix[0][2])); 1459 vRightM1 = LD_DP(&(m_matrix[0][2]));
1358 vRightM2 = LD_DP(&(m_matrix[1][0])); 1460 vRightM2 = LD_DP(&(m_matrix[1][0]));
1359 vRightM3 = LD_DP(&(m_matrix[1][2])); 1461 vRightM3 = LD_DP(&(m_matrix[1][2]));
(...skipping 571 matching lines...) Expand 10 before | Expand all | Expand 10 after
1931 decomposition.translateZ, decomposition.scaleX, decomposition.scaleY, 2033 decomposition.translateZ, decomposition.scaleX, decomposition.scaleY,
1932 decomposition.scaleZ, decomposition.skewXY, decomposition.skewXZ, 2034 decomposition.scaleZ, decomposition.skewXY, decomposition.skewXZ,
1933 decomposition.skewYZ, decomposition.quaternionX, 2035 decomposition.skewYZ, decomposition.quaternionX,
1934 decomposition.quaternionY, decomposition.quaternionZ, 2036 decomposition.quaternionY, decomposition.quaternionZ,
1935 decomposition.quaternionW, decomposition.perspectiveX, 2037 decomposition.quaternionW, decomposition.perspectiveX,
1936 decomposition.perspectiveY, decomposition.perspectiveZ, 2038 decomposition.perspectiveY, decomposition.perspectiveZ,
1937 decomposition.perspectiveW); 2039 decomposition.perspectiveW);
1938 } 2040 }
1939 2041
1940 } // namespace blink 2042 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/transforms/TransformationMatrix.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698