Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 3820d99a0f159b5ea1604600543410f836fd9225..3de1c660363898faec880c80df74271259df8797 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -1184,115 +1184,237 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
| _mov(DestHi, T_4Hi); |
| } break; |
| case InstArithmetic::Shl: { |
| - // TODO: Refactor the similarities between Shl, Lshr, and Ashr. |
|
Jim Stichnoth
2015/09/22 05:48:46
Don't delete this TODO unless you're taking care o
sehr
2015/09/22 16:03:17
Done.
|
| - // gcc does the following: |
| - // a=b<<c ==> |
| - // t1:ecx = c.lo & 0xff |
| - // t2 = b.lo |
| - // t3 = b.hi |
| - // t3 = shld t3, t2, t1 |
| - // t2 = shl t2, t1 |
| - // test t1, 0x20 |
| - // je L1 |
| - // use(t3) |
| - // t3 = t2 |
| - // t2 = 0 |
| - // L1: |
| - // a.lo = t2 |
| - // a.hi = t3 |
| Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| - Constant *BitTest = Ctx->getConstantInt32(0x20); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| - typename Traits::Insts::Label *Label = |
| - Traits::Insts::Label::create(Func, this); |
| - _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| - _mov(T_2, Src0Lo); |
| - _mov(T_3, Src0Hi); |
| - _shld(T_3, T_2, T_1); |
| - _shl(T_2, T_1); |
| - _test(T_1, BitTest); |
| - _br(Traits::Cond::Br_e, Label); |
| - // T_2 and T_3 are being assigned again because of the intra-block |
| - // control flow, so we need the _mov_nonkillable variant to avoid |
| - // liveness problems. |
| - _mov_nonkillable(T_3, T_2); |
| - _mov_nonkillable(T_2, Zero); |
| - Context.insert(Label); |
| - _mov(DestLo, T_2); |
| - _mov(DestHi, T_3); |
| + if (const auto *ConstantShiftAmount = |
| + llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| + if (ShiftAmount > 32) { |
| + // a=b<<c ==> |
| + // t2 = b.lo |
| + // t2 = shl t2, ShiftAmount-32 |
| + // t3 = t2 |
| + // t2 = 0 |
| + _mov(T_2, Src0Lo); |
| + _shl(T_2, Ctx->getConstantInt32(ShiftAmount-32)); |
|
Jim Stichnoth
2015/09/22 05:48:46
Please run "make -f Makefile.standalone format" to
sehr
2015/09/22 16:03:17
Done.
|
| + _mov(DestHi, T_2); |
| + _mov(DestLo, Zero); |
| + } else if (ShiftAmount == 32) { |
| + // a=b<<c ==> |
| + // t2 = b.lo |
| + // a.hi = t2 |
| + // a.lo = 0 |
| + _mov(T_2, Src0Lo); |
| + _mov(DestHi, T_2); |
| + _mov(DestLo, Zero); |
| + } else { |
| + // a=b<<c ==> |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t3 = shld t3, t2, ShiftAmount |
| + // t2 = shl t2, ShiftAmount |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount)); |
| + _shl(T_2, Ctx->getConstantInt32(ShiftAmount)); |
| + // Move T_2 first to reduce register pressure. |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| + } else { |
| + // a=b<<c ==> |
| + // t1:ecx = c.lo & 0xff |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t3 = shld t3, t2, t1 |
| + // t2 = shl t2, t1 |
| + // test t1, 0x20 |
| + // je L1 |
| + // use(t3) |
| + // t3 = t2 |
| + // t2 = 0 |
| + // L1: |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + Constant *BitTest = Ctx->getConstantInt32(0x20); |
| + typename Traits::Insts::Label *Label = |
| + Traits::Insts::Label::create(Func, this); |
| + _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shld(T_3, T_2, T_1); |
| + _shl(T_2, T_1); |
| + _test(T_1, BitTest); |
| + _br(Traits::Cond::Br_e, Label); |
| + // T_2 and T_3 are being assigned again because of the intra-block |
| + // control flow, so we need the _mov_nonkillable variant to avoid |
| + // liveness problems. |
| + _mov_nonkillable(T_3, T_2); |
| + _mov_nonkillable(T_2, Zero); |
| + Context.insert(Label); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| } break; |
| case InstArithmetic::Lshr: { |
| - // a=b>>c (unsigned) ==> |
| - // t1:ecx = c.lo & 0xff |
| - // t2 = b.lo |
| - // t3 = b.hi |
| - // t2 = shrd t2, t3, t1 |
| - // t3 = shr t3, t1 |
| - // test t1, 0x20 |
| - // je L1 |
| - // use(t2) |
| - // t2 = t3 |
| - // t3 = 0 |
| - // L1: |
| - // a.lo = t2 |
| - // a.hi = t3 |
| Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| - Constant *BitTest = Ctx->getConstantInt32(0x20); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| - typename Traits::Insts::Label *Label = |
| - Traits::Insts::Label::create(Func, this); |
| - _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| - _mov(T_2, Src0Lo); |
| - _mov(T_3, Src0Hi); |
| - _shrd(T_2, T_3, T_1); |
| - _shr(T_3, T_1); |
| - _test(T_1, BitTest); |
| - _br(Traits::Cond::Br_e, Label); |
| - // T_2 and T_3 are being assigned again because of the intra-block |
| - // control flow, so we need the _mov_nonkillable variant to avoid |
| - // liveness problems. |
| - _mov_nonkillable(T_2, T_3); |
| - _mov_nonkillable(T_3, Zero); |
| - Context.insert(Label); |
| - _mov(DestLo, T_2); |
| - _mov(DestHi, T_3); |
| + if (const auto *ConstantShiftAmount = |
| + llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| + if (ShiftAmount > 32) { |
| + // a=b>>c (unsigned) ==> |
| + // t3 = b.hi |
| + // t3 = shr t3, ShiftAmount-32 |
| + // a.lo = t3 |
| + // a.hi = 0 |
| + _mov(T_3, Src0Hi); |
| + _shr(T_3, Ctx->getConstantInt32(ShiftAmount-32)); |
| + _mov(DestLo, T_3); |
| + _mov(DestHi, Zero); |
| + } else if (ShiftAmount == 32) { |
| + // a=b>>c (unsigned) ==> |
| + // t3 = b.hi |
| + // a.lo = t3 |
| + // a.hi = 0 |
| + _mov(T_3, Src0Hi); |
| + _mov(DestLo, T_3); |
| + _mov(DestHi, Zero); |
| + } else { |
| + // a=b>>c (unsigned) ==> |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, ShiftAmount |
| + // t3 = shr t3, ShiftAmount |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + _shr(T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + // Move T_3 first to reduce register pressure. |
| + _mov(DestHi, T_3); |
| + _mov(DestLo, T_2); |
| + } |
| + } else { |
| + // a=b>>c (unsigned) ==> |
| + // t1:ecx = c.lo & 0xff |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, t1 |
| + // t3 = shr t3, t1 |
| + // test t1, 0x20 |
| + // je L1 |
| + // use(t2) |
| + // t2 = t3 |
| + // t3 = 0 |
| + // L1: |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + Constant *BitTest = Ctx->getConstantInt32(0x20); |
| + typename Traits::Insts::Label *Label = |
| + Traits::Insts::Label::create(Func, this); |
| + _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, T_1); |
| + _shr(T_3, T_1); |
| + _test(T_1, BitTest); |
| + _br(Traits::Cond::Br_e, Label); |
| + // T_2 and T_3 are being assigned again because of the intra-block |
| + // control flow, so we need the _mov_nonkillable variant to avoid |
| + // liveness problems. |
| + _mov_nonkillable(T_2, T_3); |
| + _mov_nonkillable(T_3, Zero); |
| + Context.insert(Label); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| } break; |
| case InstArithmetic::Ashr: { |
| - // a=b>>c (signed) ==> |
| - // t1:ecx = c.lo & 0xff |
| - // t2 = b.lo |
| - // t3 = b.hi |
| - // t2 = shrd t2, t3, t1 |
| - // t3 = sar t3, t1 |
| - // test t1, 0x20 |
| - // je L1 |
| - // use(t2) |
| - // t2 = t3 |
| - // t3 = sar t3, 0x1f |
| - // L1: |
| - // a.lo = t2 |
| - // a.hi = t3 |
| Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| - Constant *BitTest = Ctx->getConstantInt32(0x20); |
| - Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| - typename Traits::Insts::Label *Label = |
| - Traits::Insts::Label::create(Func, this); |
| - _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| - _mov(T_2, Src0Lo); |
| - _mov(T_3, Src0Hi); |
| - _shrd(T_2, T_3, T_1); |
| - _sar(T_3, T_1); |
| - _test(T_1, BitTest); |
| - _br(Traits::Cond::Br_e, Label); |
| - // T_2 and T_3 are being assigned again because of the intra-block |
| - // control flow, so T_2 needs the _mov_nonkillable variant to avoid |
| - // liveness problems. T_3 doesn't need special treatment because it is |
| - // reassigned via _sar instead of _mov. |
| - _mov_nonkillable(T_2, T_3); |
| - _sar(T_3, SignExtend); |
| - Context.insert(Label); |
| - _mov(DestLo, T_2); |
| - _mov(DestHi, T_3); |
| + if (const auto *ConstantShiftAmount = |
| + llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| + if (ShiftAmount > 32) { |
| + // a=b>>c (signed) ==> |
| + // t2 = b.hi |
| + // t3 = b.hi |
| + // t3 = sar t3, 0x1f |
| + // t2 = shrd t2, t3, ShiftAmount-32 |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Hi); |
| + _mov(T_3, Src0Hi); |
| + _sar(T_3, Ctx->getConstantInt32(0x1f)); |
| + _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount-32)); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } else if (ShiftAmount == 32) { |
| + // a=b>>c (signed) ==> |
| + // t2 = b.hi |
| + // a.lo = t2 |
| + // t3 = b.hi |
| + // t3 = sar t3, 0x1f |
| + // a.hi = t3 |
| + _mov(T_2, Src0Hi); |
| + _mov(DestLo, T_2); |
| + _mov(T_3, Src0Hi); |
| + _sar(T_3, Ctx->getConstantInt32(0x1f)); |
| + _mov(DestHi, T_3); |
| + } else { |
| + // a=b>>c (signed) ==> |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, ShiftAmount |
| + // t3 = sar t3, ShiftAmount |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + _sar(T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| + } else { |
| + // a=b>>c (signed) ==> |
| + // t1:ecx = c.lo & 0xff |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, t1 |
| + // t3 = sar t3, t1 |
| + // test t1, 0x20 |
| + // je L1 |
| + // use(t2) |
| + // t2 = t3 |
| + // t3 = sar t3, 0x1f |
| + // L1: |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + Constant *BitTest = Ctx->getConstantInt32(0x20); |
| + Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| + typename Traits::Insts::Label *Label = |
| + Traits::Insts::Label::create(Func, this); |
| + _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, T_1); |
| + _sar(T_3, T_1); |
| + _test(T_1, BitTest); |
| + _br(Traits::Cond::Br_e, Label); |
| + // T_2 and T_3 are being assigned again because of the intra-block |
| + // control flow, so T_2 needs the _mov_nonkillable variant to avoid |
| + // liveness problems. T_3 doesn't need special treatment because it is |
| + // reassigned via _sar instead of _mov. |
| + _mov_nonkillable(T_2, T_3); |
| + _sar(T_3, SignExtend); |
| + Context.insert(Label); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| } break; |
| case InstArithmetic::Fadd: |
| case InstArithmetic::Fsub: |