Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(13)

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1351133003: Optimize 64-bit shifts by constants for x86-32 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Code review updates Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/shift.ll » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX86BaseImpl.h
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 3820d99a0f159b5ea1604600543410f836fd9225..245819a5344c1caee049dae16090179334944796 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1185,114 +1185,237 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
} break;
case InstArithmetic::Shl: {
// TODO: Refactor the similarities between Shl, Lshr, and Ashr.
- // gcc does the following:
- // a=b<<c ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t3 = shld t3, t2, t1
- // t2 = shl t2, t1
- // test t1, 0x20
- // je L1
- // use(t3)
- // t3 = t2
- // t2 = 0
- // L1:
- // a.lo = t2
- // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
- typename Traits::Insts::Label *Label =
- Traits::Insts::Label::create(Func, this);
- _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shld(T_3, T_2, T_1);
- _shl(T_2, T_1);
- _test(T_1, BitTest);
- _br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the intra-block
- // control flow, so we need the _mov_nonkillable variant to avoid
- // liveness problems.
- _mov_nonkillable(T_3, T_2);
- _mov_nonkillable(T_2, Zero);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
+ if (const auto *ConstantShiftAmount =
+ llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ uint32_t ShiftAmount = ConstantShiftAmount->getValue();
+ if (ShiftAmount > 32) {
+ // a=b<<c ==>
+ // t2 = b.lo
+ // t2 = shl t2, ShiftAmount-32
+ // t3 = t2
+ // t2 = 0
+ _mov(T_2, Src0Lo);
+ _shl(T_2, Ctx->getConstantInt32(ShiftAmount - 32));
+ _mov(DestHi, T_2);
+ _mov(DestLo, Zero);
+ } else if (ShiftAmount == 32) {
+ // a=b<<c ==>
+ // t2 = b.lo
+ // a.hi = t2
+ // a.lo = 0
+ _mov(T_2, Src0Lo);
+ _mov(DestHi, T_2);
+ _mov(DestLo, Zero);
+ } else {
+ // a=b<<c ==>
+ // t2 = b.lo
+ // t3 = b.hi
+ // t3 = shld t3, t2, ShiftAmount
+ // t2 = shl t2, ShiftAmount
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount));
+ _shl(T_2, Ctx->getConstantInt32(ShiftAmount));
+ // Move T_2 first to reduce register pressure.
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
+ } else {
+ // a=b<<c ==>
+ // t1:ecx = c.lo & 0xff
+ // t2 = b.lo
+ // t3 = b.hi
+ // t3 = shld t3, t2, t1
+ // t2 = shl t2, t1
+ // test t1, 0x20
+ // je L1
+ // use(t3)
+ // t3 = t2
+ // t2 = 0
+ // L1:
+ // a.lo = t2
+ // a.hi = t3
+ Constant *BitTest = Ctx->getConstantInt32(0x20);
+ typename Traits::Insts::Label *Label =
+ Traits::Insts::Label::create(Func, this);
+ _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shld(T_3, T_2, T_1);
+ _shl(T_2, T_1);
+ _test(T_1, BitTest);
John 2015/09/22 16:18:37 would _cmp be safer here? (e.g., T_1 == 512)
+ _br(Traits::Cond::Br_e, Label);
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so we need the _mov_nonkillable variant to avoid
+ // liveness problems.
+ _mov_nonkillable(T_3, T_2);
+ _mov_nonkillable(T_2, Zero);
+ Context.insert(Label);
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
} break;
case InstArithmetic::Lshr: {
- // a=b>>c (unsigned) ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t2 = shrd t2, t3, t1
- // t3 = shr t3, t1
- // test t1, 0x20
- // je L1
- // use(t2)
- // t2 = t3
- // t3 = 0
- // L1:
- // a.lo = t2
- // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
- typename Traits::Insts::Label *Label =
- Traits::Insts::Label::create(Func, this);
- _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shrd(T_2, T_3, T_1);
- _shr(T_3, T_1);
- _test(T_1, BitTest);
- _br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the intra-block
- // control flow, so we need the _mov_nonkillable variant to avoid
- // liveness problems.
- _mov_nonkillable(T_2, T_3);
- _mov_nonkillable(T_3, Zero);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
+ if (const auto *ConstantShiftAmount =
+ llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ uint32_t ShiftAmount = ConstantShiftAmount->getValue();
+ if (ShiftAmount > 32) {
+ // a=b>>c (unsigned) ==>
+ // t3 = b.hi
+ // t3 = shr t3, ShiftAmount-32
+ // a.lo = t3
+ // a.hi = 0
+ _mov(T_3, Src0Hi);
+ _shr(T_3, Ctx->getConstantInt32(ShiftAmount - 32));
+ _mov(DestLo, T_3);
+ _mov(DestHi, Zero);
+ } else if (ShiftAmount == 32) {
+ // a=b>>c (unsigned) ==>
+ // t3 = b.hi
+ // a.lo = t3
+ // a.hi = 0
+ _mov(T_3, Src0Hi);
+ _mov(DestLo, T_3);
+ _mov(DestHi, Zero);
+ } else {
+ // a=b>>c (unsigned) ==>
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, ShiftAmount
+ // t3 = shr t3, ShiftAmount
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
+ _shr(T_3, Ctx->getConstantInt32(ShiftAmount));
+ // Move T_3 first to reduce register pressure.
+ _mov(DestHi, T_3);
+ _mov(DestLo, T_2);
+ }
+ } else {
+ // a=b>>c (unsigned) ==>
+ // t1:ecx = c.lo & 0xff
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, t1
+ // t3 = shr t3, t1
+ // test t1, 0x20
+ // je L1
+ // use(t2)
+ // t2 = t3
+ // t3 = 0
+ // L1:
+ // a.lo = t2
+ // a.hi = t3
+ Constant *BitTest = Ctx->getConstantInt32(0x20);
+ typename Traits::Insts::Label *Label =
+ Traits::Insts::Label::create(Func, this);
+ _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, T_1);
+ _shr(T_3, T_1);
+ _test(T_1, BitTest);
+ _br(Traits::Cond::Br_e, Label);
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so we need the _mov_nonkillable variant to avoid
+ // liveness problems.
+ _mov_nonkillable(T_2, T_3);
+ _mov_nonkillable(T_3, Zero);
+ Context.insert(Label);
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
} break;
case InstArithmetic::Ashr: {
- // a=b>>c (signed) ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t2 = shrd t2, t3, t1
- // t3 = sar t3, t1
- // test t1, 0x20
- // je L1
- // use(t2)
- // t2 = t3
- // t3 = sar t3, 0x1f
- // L1:
- // a.lo = t2
- // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
- Constant *SignExtend = Ctx->getConstantInt32(0x1f);
- typename Traits::Insts::Label *Label =
- Traits::Insts::Label::create(Func, this);
- _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shrd(T_2, T_3, T_1);
- _sar(T_3, T_1);
- _test(T_1, BitTest);
- _br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the intra-block
- // control flow, so T_2 needs the _mov_nonkillable variant to avoid
- // liveness problems. T_3 doesn't need special treatment because it is
- // reassigned via _sar instead of _mov.
- _mov_nonkillable(T_2, T_3);
- _sar(T_3, SignExtend);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
+ if (const auto *ConstantShiftAmount =
+ llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ uint32_t ShiftAmount = ConstantShiftAmount->getValue();
+ if (ShiftAmount > 32) {
+ // a=b>>c (signed) ==>
+ // t2 = b.hi
+ // t3 = b.hi
+ // t3 = sar t3, 0x1f
+ // t2 = shrd t2, t3, ShiftAmount-32
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Hi);
+ _mov(T_3, Src0Hi);
+ _sar(T_3, Ctx->getConstantInt32(0x1f));
+ _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount - 32));
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ } else if (ShiftAmount == 32) {
+ // a=b>>c (signed) ==>
+ // t2 = b.hi
+ // a.lo = t2
+ // t3 = b.hi
+ // t3 = sar t3, 0x1f
+ // a.hi = t3
+ _mov(T_2, Src0Hi);
+ _mov(DestLo, T_2);
+ _mov(T_3, Src0Hi);
+ _sar(T_3, Ctx->getConstantInt32(0x1f));
+ _mov(DestHi, T_3);
+ } else {
+ // a=b>>c (signed) ==>
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, ShiftAmount
+ // t3 = sar t3, ShiftAmount
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
+ _sar(T_3, Ctx->getConstantInt32(ShiftAmount));
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
+ } else {
+ // a=b>>c (signed) ==>
+ // t1:ecx = c.lo & 0xff
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, t1
+ // t3 = sar t3, t1
+ // test t1, 0x20
+ // je L1
+ // use(t2)
+ // t2 = t3
+ // t3 = sar t3, 0x1f
+ // L1:
+ // a.lo = t2
+ // a.hi = t3
+ Constant *BitTest = Ctx->getConstantInt32(0x20);
+ Constant *SignExtend = Ctx->getConstantInt32(0x1f);
+ typename Traits::Insts::Label *Label =
+ Traits::Insts::Label::create(Func, this);
+ _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, T_1);
+ _sar(T_3, T_1);
+ _test(T_1, BitTest);
+ _br(Traits::Cond::Br_e, Label);
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so T_2 needs the _mov_nonkillable variant to avoid
+ // liveness problems. T_3 doesn't need special treatment because it is
+ // reassigned via _sar instead of _mov.
+ _mov_nonkillable(T_2, T_3);
+ _sar(T_3, SignExtend);
+ Context.insert(Label);
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
} break;
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | tests_lit/llvm2ice_tests/shift.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698