Index: test/unittests/compiler/x64/instruction-selector-x64-unittest.cc |
diff --git a/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc b/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc |
index 6faa6ba975d86a17d998c579ea7766c69dc3e4d8..c4cadaf627f101b8249d0bea33d03a5af99619a7 100644 |
--- a/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc |
+++ b/test/unittests/compiler/x64/instruction-selector-x64-unittest.cc |
@@ -265,14 +265,18 @@ TEST_F(InstructionSelectorTest, Int32AddConstantAsLeaSingle) { |
StreamBuilder m(this, kMachInt32, kMachInt32); |
Node* const p0 = m.Parameter(0); |
Node* const c0 = m.Int32Constant(15); |
- // If there is only a single use of an add's input, use an "addl" not a |
- // "leal", it is faster. |
+ // If one of the add's operands is only used once, use an "leal", even though |
+ // an "addl" could be used. The "leal" has proven faster--out best guess is |
+ // that it gives the register allocation more freedom and it doesn't set |
+ // flags, reducing pressure in the CPU's pipeline. If we're lucky with |
+ // register allocation, then code generation will select an "addl" later for |
+ // the cases that have been measured to be faster. |
Node* const v0 = m.Int32Add(p0, c0); |
m.Return(v0); |
Stream s = m.Build(); |
ASSERT_EQ(1U, s.size()); |
- EXPECT_EQ(kX64Add32, s[0]->arch_opcode()); |
- EXPECT_EQ(kMode_None, s[0]->addressing_mode()); |
+ EXPECT_EQ(kX64Lea32, s[0]->arch_opcode()); |
+ EXPECT_EQ(kMode_MRI, s[0]->addressing_mode()); |
ASSERT_EQ(2U, s[0]->InputCount()); |
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); |
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate()); |
@@ -284,12 +288,13 @@ TEST_F(InstructionSelectorTest, Int32AddConstantAsAdd) { |
Node* const p0 = m.Parameter(0); |
Node* const c0 = m.Int32Constant(1); |
// If there is only a single use of an add's input and the immediate constant |
- // for the add is 1, use inc. |
+ // for the add is 1, don't use an inc. It is much slower on modern Intel |
+ // architectures. |
m.Return(m.Int32Add(p0, c0)); |
Stream s = m.Build(); |
ASSERT_EQ(1U, s.size()); |
- EXPECT_EQ(kX64Add32, s[0]->arch_opcode()); |
- EXPECT_EQ(kMode_None, s[0]->addressing_mode()); |
+ EXPECT_EQ(kX64Lea32, s[0]->arch_opcode()); |
+ EXPECT_EQ(kMode_MRI, s[0]->addressing_mode()); |
ASSERT_EQ(2U, s[0]->InputCount()); |
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); |
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate()); |
@@ -317,12 +322,17 @@ TEST_F(InstructionSelectorTest, Int32AddCommutedConstantAsLeaSingle) { |
StreamBuilder m(this, kMachInt32, kMachInt32); |
Node* const p0 = m.Parameter(0); |
Node* const c0 = m.Int32Constant(15); |
- // If there is only a single use of an add's input, use "addl" |
+ // If one of the add's operands is only used once, use an "leal", even though |
+ // an "addl" could be used. The "leal" has proven faster--out best guess is |
+ // that it gives the register allocation more freedom and it doesn't set |
+ // flags, reducing pressure in the CPU's pipeline. If we're lucky with |
+ // register allocation, then code generation will select an "addl" later for |
+ // the cases that have been measured to be faster. |
m.Return(m.Int32Add(c0, p0)); |
Stream s = m.Build(); |
ASSERT_EQ(1U, s.size()); |
- EXPECT_EQ(kX64Add32, s[0]->arch_opcode()); |
- EXPECT_EQ(kMode_None, s[0]->addressing_mode()); |
+ EXPECT_EQ(kX64Lea32, s[0]->arch_opcode()); |
+ EXPECT_EQ(kMode_MRI, s[0]->addressing_mode()); |
ASSERT_EQ(2U, s[0]->InputCount()); |
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); |
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate()); |
@@ -351,12 +361,17 @@ TEST_F(InstructionSelectorTest, Int32AddSimpleAsAdd) { |
StreamBuilder m(this, kMachInt32, kMachInt32, kMachInt32); |
Node* const p0 = m.Parameter(0); |
Node* const p1 = m.Parameter(1); |
- // If one of the add's operands is only used once, use an "addl". |
+ // If one of the add's operands is only used once, use an "leal", even though |
+ // an "addl" could be used. The "leal" has proven faster--out best guess is |
+ // that it gives the register allocation more freedom and it doesn't set |
+ // flags, reducing pressure in the CPU's pipeline. If we're lucky with |
+ // register allocation, then code generation will select an "addl" later for |
+ // the cases that have been measured to be faster. |
m.Return(m.Int32Add(p0, p1)); |
Stream s = m.Build(); |
ASSERT_EQ(1U, s.size()); |
- EXPECT_EQ(kX64Add32, s[0]->arch_opcode()); |
- EXPECT_EQ(kMode_None, s[0]->addressing_mode()); |
+ EXPECT_EQ(kX64Lea32, s[0]->arch_opcode()); |
+ EXPECT_EQ(kMode_MR1, s[0]->addressing_mode()); |
ASSERT_EQ(2U, s[0]->InputCount()); |
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); |
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1))); |
@@ -715,8 +730,8 @@ TEST_F(InstructionSelectorTest, Int32SubConstantAsSub) { |
m.Return(m.Int32Sub(p0, c0)); |
Stream s = m.Build(); |
ASSERT_EQ(1U, s.size()); |
- EXPECT_EQ(kX64Sub32, s[0]->arch_opcode()); |
- EXPECT_EQ(kMode_None, s[0]->addressing_mode()); |
+ EXPECT_EQ(kX64Lea32, s[0]->arch_opcode()); |
+ EXPECT_EQ(kMode_MRI, s[0]->addressing_mode()); |
ASSERT_EQ(2U, s[0]->InputCount()); |
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0))); |
EXPECT_TRUE(s[0]->InputAt(1)->IsImmediate()); |
@@ -759,7 +774,7 @@ TEST_F(InstructionSelectorTest, Int32AddScaled2Other) { |
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1))); |
EXPECT_EQ(s.ToVreg(a0), s.ToVreg(s[0]->OutputAt(0))); |
ASSERT_EQ(2U, s[1]->InputCount()); |
- EXPECT_EQ(kX64Add32, s[1]->arch_opcode()); |
+ EXPECT_EQ(kX64Lea32, s[1]->arch_opcode()); |
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[1]->InputAt(0))); |
EXPECT_EQ(s.ToVreg(a0), s.ToVreg(s[1]->InputAt(1))); |
EXPECT_EQ(s.ToVreg(a1), s.ToVreg(s[1]->OutputAt(0))); |