| Index: src/compiler/arm/code-generator-arm.cc
|
| diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc
|
| index a3302a1ac11b8c5f8a7e98d509ddcf73c18573ea..e9f83bdd07723ff7c884fcef7bcc6bc243d7e03d 100644
|
| --- a/src/compiler/arm/code-generator-arm.cc
|
| +++ b/src/compiler/arm/code-generator-arm.cc
|
| @@ -1636,8 +1636,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
| break;
|
| }
|
| case kArmInt32x4GreaterThanOrEqual: {
|
| - Simd128Register dst = i.OutputSimd128Register();
|
| - __ vcge(NeonS32, dst, i.InputSimd128Register(0),
|
| + __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0),
|
| i.InputSimd128Register(1));
|
| break;
|
| }
|
| @@ -1662,8 +1661,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
| break;
|
| }
|
| case kArmUint32x4GreaterThanOrEqual: {
|
| - Simd128Register dst = i.OutputSimd128Register();
|
| - __ vcge(NeonU32, dst, i.InputSimd128Register(0),
|
| + __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0),
|
| i.InputSimd128Register(1));
|
| break;
|
| }
|
| @@ -1748,8 +1746,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
| break;
|
| }
|
| case kArmInt16x8GreaterThanOrEqual: {
|
| - Simd128Register dst = i.OutputSimd128Register();
|
| - __ vcge(NeonS16, dst, i.InputSimd128Register(0),
|
| + __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0),
|
| i.InputSimd128Register(1));
|
| break;
|
| }
|
| @@ -1784,8 +1781,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
| break;
|
| }
|
| case kArmUint16x8GreaterThanOrEqual: {
|
| - Simd128Register dst = i.OutputSimd128Register();
|
| - __ vcge(NeonU16, dst, i.InputSimd128Register(0),
|
| + __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0),
|
| i.InputSimd128Register(1));
|
| break;
|
| }
|
| @@ -1869,8 +1865,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
| break;
|
| }
|
| case kArmInt8x16GreaterThanOrEqual: {
|
| - Simd128Register dst = i.OutputSimd128Register();
|
| - __ vcge(NeonS8, dst, i.InputSimd128Register(0),
|
| + __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0),
|
| i.InputSimd128Register(1));
|
| break;
|
| }
|
| @@ -1905,8 +1900,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
| break;
|
| }
|
| case kArmUint8x16GreaterThanOrEqual: {
|
| - Simd128Register dst = i.OutputSimd128Register();
|
| - __ vcge(NeonU8, dst, i.InputSimd128Register(0),
|
| + __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0),
|
| i.InputSimd128Register(1));
|
| break;
|
| }
|
| @@ -1934,15 +1928,69 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
|
| __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0));
|
| break;
|
| }
|
| - case kArmSimd32x4Select:
|
| - case kArmSimd16x8Select:
|
| - case kArmSimd8x16Select: {
|
| + case kArmSimd128Select: {
|
| // vbsl clobbers the mask input so make sure it was DefineSameAsFirst.
|
| DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0)));
|
| __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1),
|
| i.InputSimd128Register(2));
|
| break;
|
| }
|
| + case kArmSimd1x4AnyTrue: {
|
| + const QwNeonRegister& src = i.InputSimd128Register(0);
|
| + __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high());
|
| + __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
|
| + kScratchDoubleReg);
|
| + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
|
| + break;
|
| + }
|
| + case kArmSimd1x4AllTrue: {
|
| + const QwNeonRegister& src = i.InputSimd128Register(0);
|
| + __ vpmin(NeonU32, kScratchDoubleReg, src.low(), src.high());
|
| + __ vpmin(NeonU32, kScratchDoubleReg, kScratchDoubleReg,
|
| + kScratchDoubleReg);
|
| + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
|
| + break;
|
| + }
|
| + case kArmSimd1x8AnyTrue: {
|
| + const QwNeonRegister& src = i.InputSimd128Register(0);
|
| + __ vpmax(NeonU16, kScratchDoubleReg, src.low(), src.high());
|
| + __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
| + kScratchDoubleReg);
|
| + __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
| + kScratchDoubleReg);
|
| + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0);
|
| + break;
|
| + }
|
| + case kArmSimd1x8AllTrue: {
|
| + const QwNeonRegister& src = i.InputSimd128Register(0);
|
| + __ vpmin(NeonU16, kScratchDoubleReg, src.low(), src.high());
|
| + __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
| + kScratchDoubleReg);
|
| + __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg,
|
| + kScratchDoubleReg);
|
| + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS16, 0);
|
| + break;
|
| + }
|
| + case kArmSimd1x16AnyTrue: {
|
| + const QwNeonRegister& src = i.InputSimd128Register(0);
|
| + __ vpmax(NeonU8, kScratchDoubleReg, src.low(), src.high());
|
| + __ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
| + // vtst to detect any bits in the bottom 32 bits of kScratchDoubleReg.
|
| + // This saves an instruction vs. the naive sequence of vpmax.
|
| + // kDoubleRegZero is not changed, since it is 0.
|
| + __ vtst(Neon32, kScratchQuadReg, kScratchQuadReg, kScratchQuadReg);
|
| + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS32, 0);
|
| + break;
|
| + }
|
| + case kArmSimd1x16AllTrue: {
|
| + const QwNeonRegister& src = i.InputSimd128Register(0);
|
| + __ vpmin(NeonU8, kScratchDoubleReg, src.low(), src.high());
|
| + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
| + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
| + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
|
| + __ ExtractLane(i.OutputRegister(), kScratchDoubleReg, NeonS8, 0);
|
| + break;
|
| + }
|
| case kCheckedLoadInt8:
|
| ASSEMBLE_CHECKED_LOAD_INTEGER(ldrsb);
|
| break;
|
|
|