src/compiler/arm/code-generator-arm.cc - Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types.

Unified Diff: src/compiler/arm/code-generator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)

Patch Set: Fix MIPS. Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/compiler/arm/code-generator-arm.cc

diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc

index 5e916f2352a0c16593540c0aaaff7b437e3ae769..bc2f2985a70ce7697bc9c7644e21344f53ecf621 100644

--- a/src/compiler/arm/code-generator-arm.cc

+++ b/src/compiler/arm/code-generator-arm.cc

@@ -1611,6 +1611,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputSimd128Register(1));

break;

}

+ case kArmF32x4AddHoriz: {

+ Simd128Register dst = i.OutputSimd128Register(),

+ src0 = i.InputSimd128Register(0),

+ src1 = i.InputSimd128Register(1);

+ // Make sure we don't overwrite source data before it's used.

+ if (dst.is(src0)) {

+ __ vpadd(dst.low(), src0.low(), src0.high());

georgia.kouveli 2017/04/20 14:53:06 The inputs to all the vpadd instructions seem to b

bbudge 2017/04/21 20:18:58 I changed the tests to use the permutation test ma

+ if (dst.is(src1)) {

+ __ vmov(dst.high(), dst.low());

+ } else {

+ __ vpadd(dst.high(), src1.low(), src1.high());

+ }

+ } else {

+ __ vpadd(dst.high(), src1.low(), src1.high());

+ __ vpadd(dst.low(), src0.low(), src0.high());

+ }

+ break;

+ }

case kArmF32x4Sub: {

__ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -1709,6 +1727,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputSimd128Register(1));

break;

}

+ case kArmI32x4AddHoriz: {

+ Simd128Register dst = i.OutputSimd128Register(),

+ src0 = i.InputSimd128Register(0),

+ src1 = i.InputSimd128Register(1);

+ // Make sure we don't overwrite source data before it's used.

georgia.kouveli 2017/04/20 14:53:06 Might make sense to factor out this code, since it

bbudge 2017/04/21 20:18:58 Done. (And I did the same for the repetitious narr

+ if (dst.is(src0)) {

+ __ vpadd(Neon32, dst.low(), src0.low(), src0.high());

+ if (dst.is(src1)) {

+ __ vmov(dst.high(), dst.low());

+ } else {

+ __ vpadd(Neon32, dst.high(), src1.low(), src1.high());

+ }

+ } else {

+ __ vpadd(Neon32, dst.high(), src1.low(), src1.high());

+ __ vpadd(Neon32, dst.low(), src0.low(), src0.high());

+ }

+ break;

+ }

case kArmI32x4Sub: {

__ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -1857,6 +1893,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputSimd128Register(1));

break;

}

+ case kArmI16x8AddHoriz: {

+ Simd128Register dst = i.OutputSimd128Register(),

+ src0 = i.InputSimd128Register(0),

+ src1 = i.InputSimd128Register(1);

+ // Make sure we don't overwrite source data before it's used.

+ if (dst.is(src0)) {

+ __ vpadd(Neon16, dst.low(), src0.low(), src0.high());

+ if (dst.is(src1)) {

+ __ vmov(dst.high(), dst.low());

+ } else {

+ __ vpadd(Neon16, dst.high(), src1.low(), src1.high());

+ }

+ } else {

+ __ vpadd(Neon16, dst.high(), src1.low(), src1.high());

+ __ vpadd(Neon16, dst.low(), src0.low(), src0.high());

+ }

+ break;

+ }

case kArmI16x8Sub: {

__ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -2025,6 +2079,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputSimd128Register(1));

break;

}

+ case kArmI8x16AddHoriz: {

+ Simd128Register dst = i.OutputSimd128Register(),

+ src0 = i.InputSimd128Register(0),

+ src1 = i.InputSimd128Register(1);

+ // Make sure we don't overwrite source data before it's used.

+ if (dst.is(src0)) {

+ __ vpadd(Neon8, dst.low(), src0.low(), src0.high());

+ if (dst.is(src1)) {

+ __ vmov(dst.high(), dst.low());

+ } else {

+ __ vpadd(Neon8, dst.high(), src1.low(), src1.high());

+ }

+ } else {

+ __ vpadd(Neon8, dst.high(), src1.low(), src1.high());

+ __ vpadd(Neon8, dst.low(), src0.low(), src0.high());

+ }

+ break;

+ }

case kArmI8x16Sub: {

__ vsub(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

« src/arm/assembler-arm.h ('K') | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | src/compiler/machine-operator.cc » ('J')