Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(25)

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1351133003: Optimize 64-bit shifts by constants for x86-32 (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX86BaseImpl.h
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 3820d99a0f159b5ea1604600543410f836fd9225..3de1c660363898faec880c80df74271259df8797 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -1184,115 +1184,237 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
_mov(DestHi, T_4Hi);
} break;
case InstArithmetic::Shl: {
- // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
Jim Stichnoth 2015/09/22 05:48:46 Don't delete this TODO unless you're taking care o
sehr 2015/09/22 16:03:17 Done.
- // gcc does the following:
- // a=b<<c ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t3 = shld t3, t2, t1
- // t2 = shl t2, t1
- // test t1, 0x20
- // je L1
- // use(t3)
- // t3 = t2
- // t2 = 0
- // L1:
- // a.lo = t2
- // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
- typename Traits::Insts::Label *Label =
- Traits::Insts::Label::create(Func, this);
- _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shld(T_3, T_2, T_1);
- _shl(T_2, T_1);
- _test(T_1, BitTest);
- _br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the intra-block
- // control flow, so we need the _mov_nonkillable variant to avoid
- // liveness problems.
- _mov_nonkillable(T_3, T_2);
- _mov_nonkillable(T_2, Zero);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
+ if (const auto *ConstantShiftAmount =
+ llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ uint32_t ShiftAmount = ConstantShiftAmount->getValue();
+ if (ShiftAmount > 32) {
+ // a=b<<c ==>
+ // t2 = b.lo
+ // t2 = shl t2, ShiftAmount-32
+ // t3 = t2
+ // t2 = 0
+ _mov(T_2, Src0Lo);
+ _shl(T_2, Ctx->getConstantInt32(ShiftAmount-32));
Jim Stichnoth 2015/09/22 05:48:46 Please run "make -f Makefile.standalone format" to
sehr 2015/09/22 16:03:17 Done.
+ _mov(DestHi, T_2);
+ _mov(DestLo, Zero);
+ } else if (ShiftAmount == 32) {
+ // a=b<<c ==>
+ // t2 = b.lo
+ // a.hi = t2
+ // a.lo = 0
+ _mov(T_2, Src0Lo);
+ _mov(DestHi, T_2);
+ _mov(DestLo, Zero);
+ } else {
+ // a=b<<c ==>
+ // t2 = b.lo
+ // t3 = b.hi
+ // t3 = shld t3, t2, ShiftAmount
+ // t2 = shl t2, ShiftAmount
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shld(T_3, T_2, Ctx->getConstantInt32(ShiftAmount));
+ _shl(T_2, Ctx->getConstantInt32(ShiftAmount));
+ // Move T_2 first to reduce register pressure.
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
+ } else {
+ // a=b<<c ==>
+ // t1:ecx = c.lo & 0xff
+ // t2 = b.lo
+ // t3 = b.hi
+ // t3 = shld t3, t2, t1
+ // t2 = shl t2, t1
+ // test t1, 0x20
+ // je L1
+ // use(t3)
+ // t3 = t2
+ // t2 = 0
+ // L1:
+ // a.lo = t2
+ // a.hi = t3
+ Constant *BitTest = Ctx->getConstantInt32(0x20);
+ typename Traits::Insts::Label *Label =
+ Traits::Insts::Label::create(Func, this);
+ _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shld(T_3, T_2, T_1);
+ _shl(T_2, T_1);
+ _test(T_1, BitTest);
+ _br(Traits::Cond::Br_e, Label);
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so we need the _mov_nonkillable variant to avoid
+ // liveness problems.
+ _mov_nonkillable(T_3, T_2);
+ _mov_nonkillable(T_2, Zero);
+ Context.insert(Label);
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
} break;
case InstArithmetic::Lshr: {
- // a=b>>c (unsigned) ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t2 = shrd t2, t3, t1
- // t3 = shr t3, t1
- // test t1, 0x20
- // je L1
- // use(t2)
- // t2 = t3
- // t3 = 0
- // L1:
- // a.lo = t2
- // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
Constant *Zero = Ctx->getConstantZero(IceType_i32);
- typename Traits::Insts::Label *Label =
- Traits::Insts::Label::create(Func, this);
- _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shrd(T_2, T_3, T_1);
- _shr(T_3, T_1);
- _test(T_1, BitTest);
- _br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the intra-block
- // control flow, so we need the _mov_nonkillable variant to avoid
- // liveness problems.
- _mov_nonkillable(T_2, T_3);
- _mov_nonkillable(T_3, Zero);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
+ if (const auto *ConstantShiftAmount =
+ llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ uint32_t ShiftAmount = ConstantShiftAmount->getValue();
+ if (ShiftAmount > 32) {
+ // a=b>>c (unsigned) ==>
+ // t3 = b.hi
+ // t3 = shr t3, ShiftAmount-32
+ // a.lo = t3
+ // a.hi = 0
+ _mov(T_3, Src0Hi);
+ _shr(T_3, Ctx->getConstantInt32(ShiftAmount-32));
+ _mov(DestLo, T_3);
+ _mov(DestHi, Zero);
+ } else if (ShiftAmount == 32) {
+ // a=b>>c (unsigned) ==>
+ // t3 = b.hi
+ // a.lo = t3
+ // a.hi = 0
+ _mov(T_3, Src0Hi);
+ _mov(DestLo, T_3);
+ _mov(DestHi, Zero);
+ } else {
+ // a=b>>c (unsigned) ==>
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, ShiftAmount
+ // t3 = shr t3, ShiftAmount
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
+ _shr(T_3, Ctx->getConstantInt32(ShiftAmount));
+ // Move T_3 first to reduce register pressure.
+ _mov(DestHi, T_3);
+ _mov(DestLo, T_2);
+ }
+ } else {
+ // a=b>>c (unsigned) ==>
+ // t1:ecx = c.lo & 0xff
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, t1
+ // t3 = shr t3, t1
+ // test t1, 0x20
+ // je L1
+ // use(t2)
+ // t2 = t3
+ // t3 = 0
+ // L1:
+ // a.lo = t2
+ // a.hi = t3
+ Constant *BitTest = Ctx->getConstantInt32(0x20);
+ typename Traits::Insts::Label *Label =
+ Traits::Insts::Label::create(Func, this);
+ _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, T_1);
+ _shr(T_3, T_1);
+ _test(T_1, BitTest);
+ _br(Traits::Cond::Br_e, Label);
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so we need the _mov_nonkillable variant to avoid
+ // liveness problems.
+ _mov_nonkillable(T_2, T_3);
+ _mov_nonkillable(T_3, Zero);
+ Context.insert(Label);
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
} break;
case InstArithmetic::Ashr: {
- // a=b>>c (signed) ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t2 = shrd t2, t3, t1
- // t3 = sar t3, t1
- // test t1, 0x20
- // je L1
- // use(t2)
- // t2 = t3
- // t3 = sar t3, 0x1f
- // L1:
- // a.lo = t2
- // a.hi = t3
Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
- Constant *SignExtend = Ctx->getConstantInt32(0x1f);
- typename Traits::Insts::Label *Label =
- Traits::Insts::Label::create(Func, this);
- _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shrd(T_2, T_3, T_1);
- _sar(T_3, T_1);
- _test(T_1, BitTest);
- _br(Traits::Cond::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the intra-block
- // control flow, so T_2 needs the _mov_nonkillable variant to avoid
- // liveness problems. T_3 doesn't need special treatment because it is
- // reassigned via _sar instead of _mov.
- _mov_nonkillable(T_2, T_3);
- _sar(T_3, SignExtend);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
+ if (const auto *ConstantShiftAmount =
+ llvm::dyn_cast<ConstantInteger32>(Src1Lo)) {
+ uint32_t ShiftAmount = ConstantShiftAmount->getValue();
+ if (ShiftAmount > 32) {
+ // a=b>>c (signed) ==>
+ // t2 = b.hi
+ // t3 = b.hi
+ // t3 = sar t3, 0x1f
+ // t2 = shrd t2, t3, ShiftAmount-32
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Hi);
+ _mov(T_3, Src0Hi);
+ _sar(T_3, Ctx->getConstantInt32(0x1f));
+ _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount-32));
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ } else if (ShiftAmount == 32) {
+ // a=b>>c (signed) ==>
+ // t2 = b.hi
+ // a.lo = t2
+ // t3 = b.hi
+ // t3 = sar t3, 0x1f
+ // a.hi = t3
+ _mov(T_2, Src0Hi);
+ _mov(DestLo, T_2);
+ _mov(T_3, Src0Hi);
+ _sar(T_3, Ctx->getConstantInt32(0x1f));
+ _mov(DestHi, T_3);
+ } else {
+ // a=b>>c (signed) ==>
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, ShiftAmount
+ // t3 = sar t3, ShiftAmount
+ // a.lo = t2
+ // a.hi = t3
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, Ctx->getConstantInt32(ShiftAmount));
+ _sar(T_3, Ctx->getConstantInt32(ShiftAmount));
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
+ } else {
+ // a=b>>c (signed) ==>
+ // t1:ecx = c.lo & 0xff
+ // t2 = b.lo
+ // t3 = b.hi
+ // t2 = shrd t2, t3, t1
+ // t3 = sar t3, t1
+ // test t1, 0x20
+ // je L1
+ // use(t2)
+ // t2 = t3
+ // t3 = sar t3, 0x1f
+ // L1:
+ // a.lo = t2
+ // a.hi = t3
+ Constant *BitTest = Ctx->getConstantInt32(0x20);
+ Constant *SignExtend = Ctx->getConstantInt32(0x1f);
+ typename Traits::Insts::Label *Label =
+ Traits::Insts::Label::create(Func, this);
+ _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
+ _mov(T_2, Src0Lo);
+ _mov(T_3, Src0Hi);
+ _shrd(T_2, T_3, T_1);
+ _sar(T_3, T_1);
+ _test(T_1, BitTest);
+ _br(Traits::Cond::Br_e, Label);
+ // T_2 and T_3 are being assigned again because of the intra-block
+ // control flow, so T_2 needs the _mov_nonkillable variant to avoid
+ // liveness problems. T_3 doesn't need special treatment because it is
+ // reassigned via _sar instead of _mov.
+ _mov_nonkillable(T_2, T_3);
+ _sar(T_3, SignExtend);
+ Context.insert(Label);
+ _mov(DestLo, T_2);
+ _mov(DestHi, T_3);
+ }
} break;
case InstArithmetic::Fadd:
case InstArithmetic::Fsub:

Powered by Google App Engine
This is Rietveld 408576698