src/compiler/arm/code-generator-arm.cc - Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types.

Unified Diff: src/compiler/arm/code-generator-arm.cc

Issue 2804883008: [WASM SIMD] Implement horizontal add for float and integer types. (Closed)

Patch Set: Rebase, reformat. Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: src/compiler/arm/code-generator-arm.cc

diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc

index e8c94eab889a356bc78ff048e2f7ec301496ef3f..331a866662e3210086caa934389785823ab9b186 100644

--- a/src/compiler/arm/code-generator-arm.cc

+++ b/src/compiler/arm/code-generator-arm.cc

@@ -496,6 +496,41 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {

DCHECK_EQ(LeaveCC, i.OutputSBit()); \

} while (0)

+#define ASSEMBLE_NEON_NARROWING_OP(dt) \

+ do { \

+ Simd128Register dst = i.OutputSimd128Register(), \

+ src0 = i.InputSimd128Register(0), \

+ src1 = i.InputSimd128Register(1); \

+ if (dst.is(src0) && dst.is(src1)) { \

+ __ vqmovn(dt, dst.low(), src0); \

+ __ vmov(dst.high(), dst.low()); \

+ } else if (dst.is(src0)) { \

+ __ vqmovn(dt, dst.low(), src0); \

+ __ vqmovn(dt, dst.high(), src1); \

+ } else { \

+ __ vqmovn(dt, dst.high(), src1); \

+ __ vqmovn(dt, dst.low(), src0); \

+ } \

+ } while (0)

+#define ASSEMBLE_NEON_PAIRWISE_OP(op, size) \

+ do { \

+ Simd128Register dst = i.OutputSimd128Register(), \

+ src0 = i.InputSimd128Register(0), \

+ src1 = i.InputSimd128Register(1); \

+ if (dst.is(src0)) { \

+ __ op(size, dst.low(), src0.low(), src0.high()); \

+ if (dst.is(src1)) { \

+ __ vmov(dst.high(), dst.low()); \

+ } else { \

+ __ op(size, dst.high(), src1.low(), src1.high()); \

+ } \

+ } else { \

+ __ op(size, dst.high(), src1.low(), src1.high()); \

+ __ op(size, dst.low(), src0.low(), src0.high()); \

+ } \

+ } while (0)

void CodeGenerator::AssembleDeconstructFrame() {

__ LeaveFrame(StackFrame::MANUAL);

unwinding_info_writer_.MarkFrameDeconstructed(__ pc_offset());

@@ -1611,6 +1646,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputSimd128Register(1));

break;

}

+ case kArmF32x4AddHoriz: {

+ Simd128Register dst = i.OutputSimd128Register(),

+ src0 = i.InputSimd128Register(0),

+ src1 = i.InputSimd128Register(1);

+ // Make sure we don't overwrite source data before it's used.

+ if (dst.is(src0)) {

+ __ vpadd(dst.low(), src0.low(), src0.high());

+ if (dst.is(src1)) {

+ __ vmov(dst.high(), dst.low());

+ } else {

+ __ vpadd(dst.high(), src1.low(), src1.high());

+ }

+ } else {

+ __ vpadd(dst.high(), src1.low(), src1.high());

+ __ vpadd(dst.low(), src0.low(), src0.high());

+ }

+ break;

+ }

case kArmF32x4Sub: {

__ vsub(i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -1699,6 +1752,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputSimd128Register(1));

break;

}

+ case kArmI32x4AddHoriz:

+ ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon32);

+ break;

case kArmI32x4Sub: {

__ vsub(Neon32, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -1818,25 +1874,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputInt4(1));

break;

}

- case kArmI16x8SConvertI32x4: {

- Simd128Register dst = i.OutputSimd128Register(),

- src0 = i.InputSimd128Register(0),

- src1 = i.InputSimd128Register(1);

- // Take care not to overwrite a source register before it's used.

- if (dst.is(src0) && dst.is(src1)) {

- __ vqmovn(NeonS16, dst.low(), src0);

- __ vmov(dst.high(), dst.low());

- } else if (dst.is(src0)) {

- // dst is src0, so narrow src0 first.

- __ vqmovn(NeonS16, dst.low(), src0);

- __ vqmovn(NeonS16, dst.high(), src1);

- } else {

- // dst may alias src1, so narrow src1 first.

- __ vqmovn(NeonS16, dst.high(), src1);

- __ vqmovn(NeonS16, dst.low(), src0);

- }

+ case kArmI16x8SConvertI32x4:

+ ASSEMBLE_NEON_NARROWING_OP(NeonS16);

break;

- }

case kArmI16x8Add: {

__ vadd(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -1847,6 +1887,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputSimd128Register(1));

break;

}

+ case kArmI16x8AddHoriz:

+ ASSEMBLE_NEON_PAIRWISE_OP(vpadd, Neon16);

+ break;

case kArmI16x8Sub: {

__ vsub(Neon16, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -1909,25 +1952,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputInt4(1));

break;

}

- case kArmI16x8UConvertI32x4: {

- Simd128Register dst = i.OutputSimd128Register(),

- src0 = i.InputSimd128Register(0),

- src1 = i.InputSimd128Register(1);

- // Take care not to overwrite a source register before it's used.

- if (dst.is(src0) && dst.is(src1)) {

- __ vqmovn(NeonU16, dst.low(), src0);

- __ vmov(dst.high(), dst.low());

- } else if (dst.is(src0)) {

- // dst is src0, so narrow src0 first.

- __ vqmovn(NeonU16, dst.low(), src0);

- __ vqmovn(NeonU16, dst.high(), src1);

- } else {

- // dst may alias src1, so narrow src1 first.

- __ vqmovn(NeonU16, dst.high(), src1);

- __ vqmovn(NeonU16, dst.low(), src0);

- }

+ case kArmI16x8UConvertI32x4:

+ ASSEMBLE_NEON_NARROWING_OP(NeonU16);

break;

- }

case kArmI16x8AddSaturateU: {

__ vqadd(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -1986,25 +2013,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputInt3(1));

break;

}

- case kArmI8x16SConvertI16x8: {

- Simd128Register dst = i.OutputSimd128Register(),

- src0 = i.InputSimd128Register(0),

- src1 = i.InputSimd128Register(1);

- // Take care not to overwrite a source register before it's used.

- if (dst.is(src0) && dst.is(src1)) {

- __ vqmovn(NeonS8, dst.low(), src0);

- __ vmov(dst.high(), dst.low());

- } else if (dst.is(src0)) {

- // dst is src0, so narrow src0 first.

- __ vqmovn(NeonS8, dst.low(), src0);

- __ vqmovn(NeonS8, dst.high(), src1);

- } else {

- // dst may alias src1, so narrow src1 first.

- __ vqmovn(NeonS8, dst.high(), src1);

- __ vqmovn(NeonS8, dst.low(), src0);

- }

+ case kArmI8x16SConvertI16x8:

+ ASSEMBLE_NEON_NARROWING_OP(NeonS8);

break;

- }

case kArmI8x16Add: {

__ vadd(Neon8, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

@@ -2066,25 +2077,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(

i.InputInt3(1));

break;

}

- case kArmI8x16UConvertI16x8: {

- Simd128Register dst = i.OutputSimd128Register(),

- src0 = i.InputSimd128Register(0),

- src1 = i.InputSimd128Register(1);

- // Take care not to overwrite a source register before it's used.

- if (dst.is(src0) && dst.is(src1)) {

- __ vqmovn(NeonU8, dst.low(), src0);

- __ vmov(dst.high(), dst.low());

- } else if (dst.is(src0)) {

- // dst is src0, so narrow src0 first.

- __ vqmovn(NeonU8, dst.low(), src0);

- __ vqmovn(NeonU8, dst.high(), src1);

- } else {

- // dst may alias src1, so narrow src1 first.

- __ vqmovn(NeonU8, dst.high(), src1);

- __ vqmovn(NeonU8, dst.low(), src0);

- }

+ case kArmI8x16UConvertI16x8:

+ ASSEMBLE_NEON_NARROWING_OP(NeonU8);

break;

- }

case kArmI8x16AddSaturateU: {

__ vqadd(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),

i.InputSimd128Register(1));

« no previous file with comments | « src/arm/simulator-arm.cc ('k') | src/compiler/arm/instruction-codes-arm.h » ('j') | no next file with comments »