Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index 3820d99a0f159b5ea1604600543410f836fd9225..245819a5344c1caee049dae16090179334944796 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -1185,114 +1185,237 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
| } break; |
| case InstArithmetic::Shl: { |
| // TODO: Refactor the similarities between Shl, Lshr, and Ashr. |
| - // gcc does the following: |
| - // a=b<<c ==> |
| - // t1:ecx = c.lo & 0xff |
| - // t2 = b.lo |
| - // t3 = b.hi |
| - // t3 = shld t3, t2, t1 |
| - // t2 = shl t2, t1 |
| - // test t1, 0x20 |
| - // je L1 |
| - // use(t3) |
| - // t3 = t2 |
| - // t2 = 0 |
| - // L1: |
| - // a.lo = t2 |
| - // a.hi = t3 |
| Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| - Constant *BitTest = Ctx->getConstantInt32(0x20); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| - typename Traits::Insts::Label *Label = |
| - Traits::Insts::Label::create(Func, this); |
| - _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| - _mov(T_2, Src0Lo); |
| - _mov(T_3, Src0Hi); |
| - _shld(T_3, T_2, T_1); |
| - _shl(T_2, T_1); |
| - _test(T_1, BitTest); |
| - _br(Traits::Cond::Br_e, Label); |
| - // T_2 and T_3 are being assigned again because of the intra-block |
| - // control flow, so we need the _mov_nonkillable variant to avoid |
| - // liveness problems. |
| - _mov_nonkillable(T_3, T_2); |
| - _mov_nonkillable(T_2, Zero); |
| - Context.insert(Label); |
| - _mov(DestLo, T_2); |
| - _mov(DestHi, T_3); |
| + if (const auto *ConstantShiftAmount = |
| + llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| + if (ShiftAmount > 32) { |
| + // a=b<<c ==> |
| + // t2 = b.lo |
| + // t2 = shl t2, ShiftAmount-32 |
| + // t3 = t2 |
| + // t2 = 0 |
| + _mov(T_2, Src0Lo); |
| + _shl(T_2, Ctx->getConstantInt32(ShiftAmount - 32)); |
| + _mov(DestHi, T_2); |
| + _mov(DestLo, Zero); |
| + } else if (ShiftAmount == 32) { |
| + // a=b<<c ==> |
| + // t2 = b.lo |
| + // a.hi = t2 |
| + // a.lo = 0 |
| + _mov(T_2, Src0Lo); |
| + _mov(DestHi, T_2); |
| + _mov(DestLo, Zero); |
| + } else { |
| + // a=b<<c ==> |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t3 = shld t3, t2, ShiftAmount |
| + // t2 = shl t2, ShiftAmount |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount)); |
| + _shl(T_2, Ctx->getConstantInt32(ShiftAmount)); |
| + // Move T_2 first to reduce register pressure. |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| + } else { |
| + // a=b<<c ==> |
| + // t1:ecx = c.lo & 0xff |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t3 = shld t3, t2, t1 |
| + // t2 = shl t2, t1 |
| + // test t1, 0x20 |
| + // je L1 |
| + // use(t3) |
| + // t3 = t2 |
| + // t2 = 0 |
| + // L1: |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + Constant *BitTest = Ctx->getConstantInt32(0x20); |
| + typename Traits::Insts::Label *Label = |
| + Traits::Insts::Label::create(Func, this); |
| + _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shld(T_3, T_2, T_1); |
| + _shl(T_2, T_1); |
| + _test(T_1, BitTest); |
|
John
2015/09/22 16:18:37
would _cmp be safer here? (e.g., T_1 == 512)
|
| + _br(Traits::Cond::Br_e, Label); |
| + // T_2 and T_3 are being assigned again because of the intra-block |
| + // control flow, so we need the _mov_nonkillable variant to avoid |
| + // liveness problems. |
| + _mov_nonkillable(T_3, T_2); |
| + _mov_nonkillable(T_2, Zero); |
| + Context.insert(Label); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| } break; |
| case InstArithmetic::Lshr: { |
| - // a=b>>c (unsigned) ==> |
| - // t1:ecx = c.lo & 0xff |
| - // t2 = b.lo |
| - // t3 = b.hi |
| - // t2 = shrd t2, t3, t1 |
| - // t3 = shr t3, t1 |
| - // test t1, 0x20 |
| - // je L1 |
| - // use(t2) |
| - // t2 = t3 |
| - // t3 = 0 |
| - // L1: |
| - // a.lo = t2 |
| - // a.hi = t3 |
| Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| - Constant *BitTest = Ctx->getConstantInt32(0x20); |
| Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| - typename Traits::Insts::Label *Label = |
| - Traits::Insts::Label::create(Func, this); |
| - _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| - _mov(T_2, Src0Lo); |
| - _mov(T_3, Src0Hi); |
| - _shrd(T_2, T_3, T_1); |
| - _shr(T_3, T_1); |
| - _test(T_1, BitTest); |
| - _br(Traits::Cond::Br_e, Label); |
| - // T_2 and T_3 are being assigned again because of the intra-block |
| - // control flow, so we need the _mov_nonkillable variant to avoid |
| - // liveness problems. |
| - _mov_nonkillable(T_2, T_3); |
| - _mov_nonkillable(T_3, Zero); |
| - Context.insert(Label); |
| - _mov(DestLo, T_2); |
| - _mov(DestHi, T_3); |
| + if (const auto *ConstantShiftAmount = |
| + llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| + if (ShiftAmount > 32) { |
| + // a=b>>c (unsigned) ==> |
| + // t3 = b.hi |
| + // t3 = shr t3, ShiftAmount-32 |
| + // a.lo = t3 |
| + // a.hi = 0 |
| + _mov(T_3, Src0Hi); |
| + _shr(T_3, Ctx->getConstantInt32(ShiftAmount - 32)); |
| + _mov(DestLo, T_3); |
| + _mov(DestHi, Zero); |
| + } else if (ShiftAmount == 32) { |
| + // a=b>>c (unsigned) ==> |
| + // t3 = b.hi |
| + // a.lo = t3 |
| + // a.hi = 0 |
| + _mov(T_3, Src0Hi); |
| + _mov(DestLo, T_3); |
| + _mov(DestHi, Zero); |
| + } else { |
| + // a=b>>c (unsigned) ==> |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, ShiftAmount |
| + // t3 = shr t3, ShiftAmount |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + _shr(T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + // Move T_3 first to reduce register pressure. |
| + _mov(DestHi, T_3); |
| + _mov(DestLo, T_2); |
| + } |
| + } else { |
| + // a=b>>c (unsigned) ==> |
| + // t1:ecx = c.lo & 0xff |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, t1 |
| + // t3 = shr t3, t1 |
| + // test t1, 0x20 |
| + // je L1 |
| + // use(t2) |
| + // t2 = t3 |
| + // t3 = 0 |
| + // L1: |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + Constant *BitTest = Ctx->getConstantInt32(0x20); |
| + typename Traits::Insts::Label *Label = |
| + Traits::Insts::Label::create(Func, this); |
| + _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, T_1); |
| + _shr(T_3, T_1); |
| + _test(T_1, BitTest); |
| + _br(Traits::Cond::Br_e, Label); |
| + // T_2 and T_3 are being assigned again because of the intra-block |
| + // control flow, so we need the _mov_nonkillable variant to avoid |
| + // liveness problems. |
| + _mov_nonkillable(T_2, T_3); |
| + _mov_nonkillable(T_3, Zero); |
| + Context.insert(Label); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| } break; |
| case InstArithmetic::Ashr: { |
| - // a=b>>c (signed) ==> |
| - // t1:ecx = c.lo & 0xff |
| - // t2 = b.lo |
| - // t3 = b.hi |
| - // t2 = shrd t2, t3, t1 |
| - // t3 = sar t3, t1 |
| - // test t1, 0x20 |
| - // je L1 |
| - // use(t2) |
| - // t2 = t3 |
| - // t3 = sar t3, 0x1f |
| - // L1: |
| - // a.lo = t2 |
| - // a.hi = t3 |
| Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; |
| - Constant *BitTest = Ctx->getConstantInt32(0x20); |
| - Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| - typename Traits::Insts::Label *Label = |
| - Traits::Insts::Label::create(Func, this); |
| - _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| - _mov(T_2, Src0Lo); |
| - _mov(T_3, Src0Hi); |
| - _shrd(T_2, T_3, T_1); |
| - _sar(T_3, T_1); |
| - _test(T_1, BitTest); |
| - _br(Traits::Cond::Br_e, Label); |
| - // T_2 and T_3 are being assigned again because of the intra-block |
| - // control flow, so T_2 needs the _mov_nonkillable variant to avoid |
| - // liveness problems. T_3 doesn't need special treatment because it is |
| - // reassigned via _sar instead of _mov. |
| - _mov_nonkillable(T_2, T_3); |
| - _sar(T_3, SignExtend); |
| - Context.insert(Label); |
| - _mov(DestLo, T_2); |
| - _mov(DestHi, T_3); |
| + if (const auto *ConstantShiftAmount = |
| + llvm::dyn_cast<ConstantInteger32>(Src1Lo)) { |
| + uint32_t ShiftAmount = ConstantShiftAmount->getValue(); |
| + if (ShiftAmount > 32) { |
| + // a=b>>c (signed) ==> |
| + // t2 = b.hi |
| + // t3 = b.hi |
| + // t3 = sar t3, 0x1f |
| + // t2 = shrd t2, t3, ShiftAmount-32 |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Hi); |
| + _mov(T_3, Src0Hi); |
| + _sar(T_3, Ctx->getConstantInt32(0x1f)); |
| + _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount - 32)); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } else if (ShiftAmount == 32) { |
| + // a=b>>c (signed) ==> |
| + // t2 = b.hi |
| + // a.lo = t2 |
| + // t3 = b.hi |
| + // t3 = sar t3, 0x1f |
| + // a.hi = t3 |
| + _mov(T_2, Src0Hi); |
| + _mov(DestLo, T_2); |
| + _mov(T_3, Src0Hi); |
| + _sar(T_3, Ctx->getConstantInt32(0x1f)); |
| + _mov(DestHi, T_3); |
| + } else { |
| + // a=b>>c (signed) ==> |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, ShiftAmount |
| + // t3 = sar t3, ShiftAmount |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + _sar(T_3, Ctx->getConstantInt32(ShiftAmount)); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| + } else { |
| + // a=b>>c (signed) ==> |
| + // t1:ecx = c.lo & 0xff |
| + // t2 = b.lo |
| + // t3 = b.hi |
| + // t2 = shrd t2, t3, t1 |
| + // t3 = sar t3, t1 |
| + // test t1, 0x20 |
| + // je L1 |
| + // use(t2) |
| + // t2 = t3 |
| + // t3 = sar t3, 0x1f |
| + // L1: |
| + // a.lo = t2 |
| + // a.hi = t3 |
| + Constant *BitTest = Ctx->getConstantInt32(0x20); |
| + Constant *SignExtend = Ctx->getConstantInt32(0x1f); |
| + typename Traits::Insts::Label *Label = |
| + Traits::Insts::Label::create(Func, this); |
| + _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); |
| + _mov(T_2, Src0Lo); |
| + _mov(T_3, Src0Hi); |
| + _shrd(T_2, T_3, T_1); |
| + _sar(T_3, T_1); |
| + _test(T_1, BitTest); |
| + _br(Traits::Cond::Br_e, Label); |
| + // T_2 and T_3 are being assigned again because of the intra-block |
| + // control flow, so T_2 needs the _mov_nonkillable variant to avoid |
| + // liveness problems. T_3 doesn't need special treatment because it is |
| + // reassigned via _sar instead of _mov. |
| + _mov_nonkillable(T_2, T_3); |
| + _sar(T_3, SignExtend); |
| + Context.insert(Label); |
| + _mov(DestLo, T_2); |
| + _mov(DestHi, T_3); |
| + } |
| } break; |
| case InstArithmetic::Fadd: |
| case InstArithmetic::Fsub: |