Chromium Code Reviews| Index: src/compiler/arm/code-generator-arm.cc |
| diff --git a/src/compiler/arm/code-generator-arm.cc b/src/compiler/arm/code-generator-arm.cc |
| index a4d09c65b2dc5c6fd15f8c68c09bffae51fb4025..731276d89ffe8a35a730f67b5e85b9be77ec226b 100644 |
| --- a/src/compiler/arm/code-generator-arm.cc |
| +++ b/src/compiler/arm/code-generator-arm.cc |
| @@ -1634,8 +1634,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| break; |
| } |
| case kArmInt32x4GreaterThanOrEqual: { |
| - Simd128Register dst = i.OutputSimd128Register(); |
| - __ vcge(NeonS32, dst, i.InputSimd128Register(0), |
| + __ vcge(NeonS32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| @@ -1660,8 +1659,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| break; |
| } |
| case kArmUint32x4GreaterThanOrEqual: { |
| - Simd128Register dst = i.OutputSimd128Register(); |
| - __ vcge(NeonU32, dst, i.InputSimd128Register(0), |
| + __ vcge(NeonU32, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| @@ -1746,8 +1744,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| break; |
| } |
| case kArmInt16x8GreaterThanOrEqual: { |
| - Simd128Register dst = i.OutputSimd128Register(); |
| - __ vcge(NeonS16, dst, i.InputSimd128Register(0), |
| + __ vcge(NeonS16, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| @@ -1782,8 +1779,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| break; |
| } |
| case kArmUint16x8GreaterThanOrEqual: { |
| - Simd128Register dst = i.OutputSimd128Register(); |
| - __ vcge(NeonU16, dst, i.InputSimd128Register(0), |
| + __ vcge(NeonU16, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| @@ -1867,8 +1863,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| break; |
| } |
| case kArmInt8x16GreaterThanOrEqual: { |
| - Simd128Register dst = i.OutputSimd128Register(); |
| - __ vcge(NeonS8, dst, i.InputSimd128Register(0), |
| + __ vcge(NeonS8, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| @@ -1903,8 +1898,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| break; |
| } |
| case kArmUint8x16GreaterThanOrEqual: { |
| - Simd128Register dst = i.OutputSimd128Register(); |
| - __ vcge(NeonU8, dst, i.InputSimd128Register(0), |
| + __ vcge(NeonU8, i.OutputSimd128Register(), i.InputSimd128Register(0), |
| i.InputSimd128Register(1)); |
| break; |
| } |
| @@ -1932,15 +1926,73 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( |
| __ vmvn(i.OutputSimd128Register(), i.InputSimd128Register(0)); |
| break; |
| } |
| - case kArmSimd32x4Select: |
| - case kArmSimd16x8Select: |
| - case kArmSimd8x16Select: { |
| + case kArmSimd128Select: { |
| // vbsl clobbers the mask input so make sure it was DefineSameAsFirst. |
| DCHECK(i.OutputSimd128Register().is(i.InputSimd128Register(0))); |
| __ vbsl(i.OutputSimd128Register(), i.InputSimd128Register(1), |
| i.InputSimd128Register(2)); |
| break; |
| } |
| + case kArmSimd1x4AnyTrue: { |
| + const QwNeonRegister& src = i.InputSimd128Register(0); |
| + __ vpmax(NeonU32, kScratchDoubleReg, src.low(), src.high()); |
| + __ vpmax(NeonU32, kScratchDoubleReg, kScratchDoubleReg, |
| + kScratchDoubleReg); |
| + // kScratchDoubleReg is the top half of kScratchQuadReg, read lane 2. |
| + __ ExtractLane(i.OutputRegister(), kScratchQuadReg, NeonS32, 2); |
|
martyn.capewell
2017/03/01 14:04:05
An ExtractLane function that accepts a double regi
bbudge
2017/03/02 00:34:47
That's a nice improvement. Done.
|
| + break; |
| + } |
| + case kArmSimd1x4AllTrue: { |
| + const QwNeonRegister& src = i.InputSimd128Register(0); |
| + __ vpmin(NeonU32, kScratchDoubleReg, src.low(), src.high()); |
| + __ vpmin(NeonU32, kScratchDoubleReg, kScratchDoubleReg, |
| + kScratchDoubleReg); |
| + // kScratchDoubleReg is the top half of kScratchQuadReg, read lane 2. |
| + __ ExtractLane(i.OutputRegister(), kScratchQuadReg, NeonS32, 2); |
| + break; |
| + } |
| + case kArmSimd1x8AnyTrue: { |
| + const QwNeonRegister& src = i.InputSimd128Register(0); |
| + __ vpmax(NeonU16, kScratchDoubleReg, src.low(), src.high()); |
| + __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg, |
| + kScratchDoubleReg); |
| + __ vpmax(NeonU16, kScratchDoubleReg, kScratchDoubleReg, |
| + kScratchDoubleReg); |
| + // kScratchDoubleReg is the top half of kScratchQuadReg, read S16 lane 4. |
| + __ ExtractLane(i.OutputRegister(), kScratchQuadReg, NeonS16, 4); |
| + break; |
| + } |
| + case kArmSimd1x8AllTrue: { |
| + const QwNeonRegister& src = i.InputSimd128Register(0); |
| + __ vpmin(NeonU16, kScratchDoubleReg, src.low(), src.high()); |
| + __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg, |
| + kScratchDoubleReg); |
| + __ vpmin(NeonU16, kScratchDoubleReg, kScratchDoubleReg, |
| + kScratchDoubleReg); |
| + // kScratchDoubleReg is the top half of kScratchQuadReg, read S16 lane 4. |
| + __ ExtractLane(i.OutputRegister(), kScratchQuadReg, NeonS16, 4); |
| + break; |
| + } |
| + case kArmSimd1x16AnyTrue: { |
|
martyn.capewell
2017/03/01 14:04:05
I think we could save an instruction here:
__ vo
bbudge
2017/03/02 00:34:47
I'd rather not add the DwVfpRegister versions of N
|
| + const QwNeonRegister& src = i.InputSimd128Register(0); |
| + __ vpmax(NeonU16, kScratchDoubleReg, src.low(), src.high()); |
|
bbudge
2017/03/02 00:34:47
whoops, typo: NeonU16
|
| + __ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| + __ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| + __ vpmax(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| + // kScratchDoubleReg is the top half of kScratchQuadReg, read S8 lane 8. |
| + __ ExtractLane(i.OutputRegister(), kScratchQuadReg, NeonS8, 8); |
| + break; |
| + } |
| + case kArmSimd1x16AllTrue: { |
| + const QwNeonRegister& src = i.InputSimd128Register(0); |
| + __ vpmin(NeonU8, kScratchDoubleReg, src.low(), src.high()); |
| + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| + __ vpmin(NeonU8, kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg); |
| + // kScratchDoubleReg is the top half of kScratchQuadReg, read S8 lane 8. |
| + __ ExtractLane(i.OutputRegister(), kScratchQuadReg, NeonS8, 8); |
| + break; |
| + } |
| case kCheckedLoadInt8: |
| ASSEMBLE_CHECKED_LOAD_INTEGER(ldrsb); |
| break; |