| Index: test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
|
| diff --git a/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc b/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
|
| index 6faa6ba975d86a17d998c579ea7766c69dc3e4d8..c4cadaf627f101b8249d0bea33d03a5af99619a7 100644
|
| --- a/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
|
| +++ b/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc
|
| @@ -265,14 +265,18 @@ TEST_F(InstructionSelectorTest, Int32AddConstantAsLeaSingle) {
|
| StreamBuilder m(this, kMachInt32, kMachInt32);
|
| Node* const p0 = m.Parameter(0);
|
| Node* const c0 = m.Int32Constant(15);
|
| - // If there is only a single use of an add's input, use an "addl" not a
|
| - // "leal", it is faster.
|
| + // If one of the add's operands is only used once, use an "leal", even though
|
| + // an "addl" could be used. The "leal" has proven faster--out best guess is
|
| + // that it gives the register allocation more freedom and it doesn't set
|
| + // flags, reducing pressure in the CPU's pipeline. If we're lucky with
|
| + // register allocation, then code generation will select an "addl" later for
|
| + // the cases that have been measured to be faster.
|
| Node* const v0 = m.Int32Add(p0, c0);
|
| m.Return(v0);
|
| Stream s = m.Build();
|
| ASSERT_EQ(1U, s.size());
|
| - EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
|
| - EXPECT_EQ(kMode_None, s[0]->addressing_mode());
|
| + EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
|
| + EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
|
| ASSERT_EQ(2U, s[0]->InputCount());
|
| EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
| EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
|
| @@ -284,12 +288,13 @@ TEST_F(InstructionSelectorTest, Int32AddConstantAsAdd) {
|
| Node* const p0 = m.Parameter(0);
|
| Node* const c0 = m.Int32Constant(1);
|
| // If there is only a single use of an add's input and the immediate constant
|
| - // for the add is 1, use inc.
|
| + // for the add is 1, don't use an inc. It is much slower on modern Intel
|
| + // architectures.
|
| m.Return(m.Int32Add(p0, c0));
|
| Stream s = m.Build();
|
| ASSERT_EQ(1U, s.size());
|
| - EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
|
| - EXPECT_EQ(kMode_None, s[0]->addressing_mode());
|
| + EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
|
| + EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
|
| ASSERT_EQ(2U, s[0]->InputCount());
|
| EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
| EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
|
| @@ -317,12 +322,17 @@ TEST_F(InstructionSelectorTest, Int32AddCommutedConstantAsLeaSingle) {
|
| StreamBuilder m(this, kMachInt32, kMachInt32);
|
| Node* const p0 = m.Parameter(0);
|
| Node* const c0 = m.Int32Constant(15);
|
| - // If there is only a single use of an add's input, use "addl"
|
| + // If one of the add's operands is only used once, use an "leal", even though
|
| + // an "addl" could be used. The "leal" has proven faster--out best guess is
|
| + // that it gives the register allocation more freedom and it doesn't set
|
| + // flags, reducing pressure in the CPU's pipeline. If we're lucky with
|
| + // register allocation, then code generation will select an "addl" later for
|
| + // the cases that have been measured to be faster.
|
| m.Return(m.Int32Add(c0, p0));
|
| Stream s = m.Build();
|
| ASSERT_EQ(1U, s.size());
|
| - EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
|
| - EXPECT_EQ(kMode_None, s[0]->addressing_mode());
|
| + EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
|
| + EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
|
| ASSERT_EQ(2U, s[0]->InputCount());
|
| EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
| EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
|
| @@ -351,12 +361,17 @@ TEST_F(InstructionSelectorTest, Int32AddSimpleAsAdd) {
|
| StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32);
|
| Node* const p0 = m.Parameter(0);
|
| Node* const p1 = m.Parameter(1);
|
| - // If one of the add's operands is only used once, use an "addl".
|
| + // If one of the add's operands is only used once, use an "leal", even though
|
| + // an "addl" could be used. The "leal" has proven faster--out best guess is
|
| + // that it gives the register allocation more freedom and it doesn't set
|
| + // flags, reducing pressure in the CPU's pipeline. If we're lucky with
|
| + // register allocation, then code generation will select an "addl" later for
|
| + // the cases that have been measured to be faster.
|
| m.Return(m.Int32Add(p0, p1));
|
| Stream s = m.Build();
|
| ASSERT_EQ(1U, s.size());
|
| - EXPECT_EQ(kX64Add32, s[0]->arch_opcode());
|
| - EXPECT_EQ(kMode_None, s[0]->addressing_mode());
|
| + EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
|
| + EXPECT_EQ(kMode_MR1, s[0]->addressing_mode());
|
| ASSERT_EQ(2U, s[0]->InputCount());
|
| EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
| EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
|
| @@ -715,8 +730,8 @@ TEST_F(InstructionSelectorTest, Int32SubConstantAsSub) {
|
| m.Return(m.Int32Sub(p0, c0));
|
| Stream s = m.Build();
|
| ASSERT_EQ(1U, s.size());
|
| - EXPECT_EQ(kX64Sub32, s[0]->arch_opcode());
|
| - EXPECT_EQ(kMode_None, s[0]->addressing_mode());
|
| + EXPECT_EQ(kX64Lea32, s[0]->arch_opcode());
|
| + EXPECT_EQ(kMode_MRI, s[0]->addressing_mode());
|
| ASSERT_EQ(2U, s[0]->InputCount());
|
| EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
|
| EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate());
|
| @@ -759,7 +774,7 @@ TEST_F(InstructionSelectorTest, Int32AddScaled2Other) {
|
| EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
|
| EXPECT_EQ(s.ToVreg(a0), s.ToVreg(s[0]->OutputAt(0)));
|
| ASSERT_EQ(2U, s[1]->InputCount());
|
| - EXPECT_EQ(kX64Add32, s[1]->arch_opcode());
|
| + EXPECT_EQ(kX64Lea32, s[1]->arch_opcode());
|
| EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[1]->InputAt(0)));
|
| EXPECT_EQ(s.ToVreg(a0), s.ToVreg(s[1]->InputAt(1)));
|
| EXPECT_EQ(s.ToVreg(a1), s.ToVreg(s[1]->OutputAt(0)));
|
|
|