third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp - Issue 2539803003: Add back ARMv7 NEON optimization for TransformationMatrix::multiply

Unified Diff: third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp

Issue 2539803003: Add back ARMv7 NEON optimization for TransformationMatrix::multiply

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp

diff --git a/third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp b/third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp

index 6b7411b86608e5764f5b816337ce6dce6bfc4aa7..af1d4871742e2a52e5ed1ee230a83bdbc7e63a68 100644

--- a/third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp

+++ b/third_party/WebKit/Source/platform/transforms/TransformationMatrix.cpp

@@ -1347,6 +1347,108 @@ TransformationMatrix& TransformationMatrix::multiply(

: "memory", "x9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",

"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "v0", "v1",

"v2", "v3", "v4", "v5", "v6", "v7");

+#elif CPU(ARM) && CPU(ARM_NEON)

+ double* leftMatrix = &(m_matrix[0][0]);

+ const double* rightMatrix = &(mat.m_matrix[0][0]);

+ asm volatile (// First row of leftMatrix.

+ "mov r3, %[leftMatrix]\n\t"

+ "vld1.64 { d16-d19 }, [%[leftMatrix], :128]!\n\t"

+ "vld1.64 { d0-d3}, [%[rightMatrix], :128]!\n\t"

+ "vmul.f64 d4, d0, d16\n\t"

+ "vld1.64 { d20-d23 }, [%[leftMatrix], :128]!\n\t"

+ "vmla.f64 d4, d1, d20\n\t"

+ "vld1.64 { d24-d27 }, [%[leftMatrix], :128]!\n\t"

+ "vmla.f64 d4, d2, d24\n\t"

+ "vld1.64 { d28-d31 }, [%[leftMatrix], :128]!\n\t"

+ "vmla.f64 d4, d3, d28\n\t"

+ "vmul.f64 d5, d0, d17\n\t"

+ "vmla.f64 d5, d1, d21\n\t"

+ "vmla.f64 d5, d2, d25\n\t"

+ "vmla.f64 d5, d3, d29\n\t"

+ "vmul.f64 d6, d0, d18\n\t"

+ "vmla.f64 d6, d1, d22\n\t"

+ "vmla.f64 d6, d2, d26\n\t"

+ "vmla.f64 d6, d3, d30\n\t"

+ "vmul.f64 d7, d0, d19\n\t"

+ "vmla.f64 d7, d1, d23\n\t"

+ "vmla.f64 d7, d2, d27\n\t"

+ "vmla.f64 d7, d3, d31\n\t"

+ "vld1.64 { d0-d3}, [%[rightMatrix], :128]!\n\t"

+ "vst1.64 { d4-d7 }, [r3, :128]!\n\t"

+ // Second row of leftMatrix.

+ "vmul.f64 d4, d0, d16\n\t"

+ "vmla.f64 d4, d1, d20\n\t"

+ "vmla.f64 d4, d2, d24\n\t"

+ "vmla.f64 d4, d3, d28\n\t"

+ "vmul.f64 d5, d0, d17\n\t"

+ "vmla.f64 d5, d1, d21\n\t"

+ "vmla.f64 d5, d2, d25\n\t"

+ "vmla.f64 d5, d3, d29\n\t"

+ "vmul.f64 d6, d0, d18\n\t"

+ "vmla.f64 d6, d1, d22\n\t"

+ "vmla.f64 d6, d2, d26\n\t"

+ "vmla.f64 d6, d3, d30\n\t"

+ "vmul.f64 d7, d0, d19\n\t"

+ "vmla.f64 d7, d1, d23\n\t"

+ "vmla.f64 d7, d2, d27\n\t"

+ "vmla.f64 d7, d3, d31\n\t"

+ "vld1.64 { d0-d3}, [%[rightMatrix], :128]!\n\t"

+ "vst1.64 { d4-d7 }, [r3, :128]!\n\t"

+ // Third row of leftMatrix.

+ "vmul.f64 d4, d0, d16\n\t"

+ "vmla.f64 d4, d1, d20\n\t"

+ "vmla.f64 d4, d2, d24\n\t"

+ "vmla.f64 d4, d3, d28\n\t"

+ "vmul.f64 d5, d0, d17\n\t"

+ "vmla.f64 d5, d1, d21\n\t"

+ "vmla.f64 d5, d2, d25\n\t"

+ "vmla.f64 d5, d3, d29\n\t"

+ "vmul.f64 d6, d0, d18\n\t"

+ "vmla.f64 d6, d1, d22\n\t"

+ "vmla.f64 d6, d2, d26\n\t"

+ "vmla.f64 d6, d3, d30\n\t"

+ "vmul.f64 d7, d0, d19\n\t"

+ "vmla.f64 d7, d1, d23\n\t"

+ "vmla.f64 d7, d2, d27\n\t"

+ "vmla.f64 d7, d3, d31\n\t"

+ "vld1.64 { d0-d3}, [%[rightMatrix], :128]\n\t"

+ "vst1.64 { d4-d7 }, [r3, :128]!\n\t"

+ // Fourth and last row of leftMatrix.

+ "vmul.f64 d4, d0, d16\n\t"

+ "vmla.f64 d4, d1, d20\n\t"

+ "vmla.f64 d4, d2, d24\n\t"

+ "vmla.f64 d4, d3, d28\n\t"

+ "vmul.f64 d5, d0, d17\n\t"

+ "vmla.f64 d5, d1, d21\n\t"

+ "vmla.f64 d5, d2, d25\n\t"

+ "vmla.f64 d5, d3, d29\n\t"

+ "vmul.f64 d6, d0, d18\n\t"

+ "vmla.f64 d6, d1, d22\n\t"

+ "vmla.f64 d6, d2, d26\n\t"

+ "vmla.f64 d6, d3, d30\n\t"

+ "vmul.f64 d7, d0, d19\n\t"

+ "vmla.f64 d7, d1, d23\n\t"

+ "vmla.f64 d7, d2, d27\n\t"

+ "vmla.f64 d7, d3, d31\n\t"

+ "vst1.64 { d4-d7 }, [r3, :128]\n\t"

+ : [leftMatrix]"+r"(leftMatrix), [rightMatrix]"+r"(rightMatrix)

+ :

+ : "memory", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31");

#elif HAVE(MIPS_MSA_INTRINSICS)

v2f64 vleftM0, vleftM1, vleftM2, vleftM3, vleftM4, vleftM5, vleftM6, vleftM7;

v2f64 vRightM0, vRightM1, vRightM2, vRightM3, vRightM4, vRightM5, vRightM6,

« no previous file with comments | « third_party/WebKit/Source/platform/transforms/TransformationMatrix.h ('k') | no next file » | no next file with comments »