Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index bf11573b69c4e9b0ed1c084a35150bd887550607..90d25332bccd7f01ceffcba635431ea0aecf21ce 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -39,7 +39,7 @@ namespace { |
| const struct TableFcmp_ { |
| uint32_t Default; |
| bool SwapOperands; |
| - InstX8632Br::BrCond C1, C2; |
| + InstX8632::BrCond C1, C2; |
| } TableFcmp[] = { |
| #define X(val, dflt, swap, C1, C2) \ |
| { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \ |
| @@ -54,7 +54,7 @@ const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); |
| // x86 conditional branch instruction. |
| const struct TableIcmp32_ { |
| - InstX8632Br::BrCond Mapping; |
| + InstX8632::BrCond Mapping; |
| } TableIcmp32[] = { |
| #define X(val, C_32, C1_64, C2_64, C3_64) \ |
| { InstX8632Br::C_32 } \ |
| @@ -69,7 +69,7 @@ const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); |
| // conditional branches are needed. For the other conditions, three separate |
| // conditional branches are needed. |
| const struct TableIcmp64_ { |
| - InstX8632Br::BrCond C1, C2, C3; |
| + InstX8632::BrCond C1, C2, C3; |
| } TableIcmp64[] = { |
| #define X(val, C_32, C1_64, C2_64, C3_64) \ |
| { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \ |
| @@ -79,7 +79,7 @@ const struct TableIcmp64_ { |
| }; |
| const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); |
| -InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| +InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| size_t Index = static_cast<size_t>(Cond); |
| assert(Index < TableIcmp32Size); |
| return TableIcmp32[Index].Mapping; |
| @@ -2108,11 +2108,36 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
| return; |
| } |
| case Intrinsics::Bswap: |
| + Func->setError("Unhandled intrinsic"); |
| + return; |
| + case Intrinsics::Ctpop: { |
| + Variable *Dest = Instr->getDest(); |
| + Operand *Val = Instr->getArg(0); |
| + InstCall *Call = makeHelperCall(Val->getType() == IceType_i32 ? |
| + "__popcountsi2" : "__popcountdi2", Dest, 1); |
| + Call->addArg(Val); |
| + lowerCall(Call); |
| + // The popcount helpers always return 32-bit values, while the intrinsic's |
| + // signature matches the native POPCNT instruction and fills a 64-bit reg |
| + // (in 64-bit mode). Thus, clear the upper bits of the dest just in case |
| + // the user doesn't do that in the IR. If the user does that in the IR, |
| + // then this zero'ing instruction is dead and gets optimized out. |
| + if (Val->getType() == IceType_i64) { |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| + _mov(DestHi, Zero); |
| + } |
| + return; |
| + } |
| case Intrinsics::Ctlz: |
| - case Intrinsics::Ctpop: |
| + // The "is zero undef" parameter is ignored and we always return |
| + // a well-defined value. |
| + lowerCtlz(Instr->getDest(), Instr->getArg(0)); |
| + return; |
| case Intrinsics::Cttz: |
| - // TODO(jvoung): fill it in. |
| - Func->setError("Unhandled intrinsic"); |
| + // The "is zero undef" parameter is ignored and we always return |
| + // a well-defined value. |
| + lowerCttz(Instr->getDest(), Instr->getArg(0)); |
| return; |
| case Intrinsics::Longjmp: { |
| InstCall *Call = makeHelperCall("longjmp", NULL, 2); |
| @@ -2407,6 +2432,98 @@ void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi, |
| _mov(Dest, T_eax); |
| } |
| +// We could do constant folding here, but that should have |
| +// been done by the front-end/middle-end optimizations. |
| +void TargetX8632::lowerCtlz(Variable *Dest, Operand *Val) { |
| + // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). |
| + // Then the instructions will handle the Val == 0 case much more simply |
| + // and won't require conversion from bit position to number of zeros. |
| + // |
| + // Otherwise: |
| + // bsr IF_NOT_ZERO, Val |
| + // mov DEST, 63 |
| + // cmovne DEST, IF_NOT_ZERO |
|
Jim Stichnoth
2014/07/14 23:20:45
Might want to document that the cmov instruction r
jvoung (off chromium)
2014/07/15 21:30:23
Done.
I think the cvt instructions and some/most
|
| + // xor DEST, 31 |
| + // |
| + // The xor DEST, 31 converts a bit position to # of leading zeroes. |
| + // E.g., for 000... 00001100, bsr will say that the most significant bit |
| + // set is at position 3, while the number of leading zeros is 28. Xor is |
| + // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case). |
| + // |
| + // Similar for 64-bit, but start w/ speculating that the upper 32 bits |
| + // are all zero, and compute the result for that case (checking the lower |
| + // 32 bits). Then actually compute the result for the upper bits and |
| + // cmov in the result from the lower computation if the earlier speculation |
| + // was correct. |
| + Type Ty = Val->getType(); |
| + assert(Ty == IceType_i32 || Ty == IceType_i64); |
| + Val = legalize(Val); |
| + Operand *FirstVal = Ty == IceType_i32 ? Val : loOperand(Val); |
| + Variable *T = makeReg(IceType_i32); |
| + _bsr(T, FirstVal); |
| + Variable *T_Dest = makeReg(IceType_i32); |
| + Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63); |
| + Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31); |
| + _mov(T_Dest, SixtyThree); |
| + _cmov(T_Dest, T, InstX8632::Br_ne); |
| + _xor(T_Dest, ThirtyOne); |
| + if (Ty == IceType_i32) { |
| + _mov(Dest, T_Dest); |
| + return; |
| + } |
| + Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32); |
| + _add(T_Dest, ThirtyTwo); |
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + // Will be using "test" on this, so we need a registerized version. |
| + Variable *HigherVar = legalizeToVar(hiOperand(Val)); |
| + Variable *T_Dest2 = makeReg(IceType_i32); |
| + _bsr(T_Dest2, HigherVar); |
| + _xor(T_Dest2, ThirtyOne); |
| + _test(HigherVar, HigherVar); |
| + _cmov(T_Dest2, T_Dest, InstX8632::Br_e); |
| + _mov(DestLo, T_Dest2); |
| + _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| +} |
| + |
| +void TargetX8632::lowerCttz(Variable *Dest, Operand *Val) { |
| + // TODO(jvoung): Determine if the user CPU supports TZCNT (BMI). |
|
Jim Stichnoth
2014/07/14 23:20:45
Would it make sense to refactor this and lowerCtlz
jvoung (off chromium)
2014/07/15 21:30:23
Hmm, I tried that in the new patch set.
There are
|
| + // Then the instructions will handle the Val == 0 case much more simply. |
| + // |
| + // Otherwise: |
| + // bsf IF_NOT_ZERO, Val |
| + // mov IF_ZERO, 32 |
| + // cmovne IF_ZERO, IF_NOT_ZERO |
| + // |
| + // Similar for 64-bit, but start w/ speculating that the bottom 32 bits |
| + // are all zero. |
| + Type Ty = Val->getType(); |
| + assert(Ty == IceType_i32 || Ty == IceType_i64); |
| + Val = legalize(Val); |
| + Operand *FirstVal = Ty == IceType_i32 ? Val : hiOperand(Val); |
| + Variable *T = makeReg(IceType_i32); |
| + _bsf(T, FirstVal); |
| + Variable *T_Dest = makeReg(IceType_i32); |
| + Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32); |
| + _mov(T_Dest, ThirtyTwo); |
| + _cmov(T_Dest, T, InstX8632::Br_ne); |
| + if (Ty == IceType_i32) { |
| + _mov(Dest, T_Dest); |
| + return; |
| + } |
| + _add(T_Dest, ThirtyTwo); |
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| + // Will be using "test" on this, so we need a registerized version. |
| + Variable *LowerVar = legalizeToVar(loOperand(Val)); |
| + Variable *T_Dest2 = makeReg(IceType_i32); |
| + _bsf(T_Dest2, LowerVar); |
| + _test(LowerVar, LowerVar); |
| + _cmov(T_Dest2, T_Dest, InstX8632::Br_e); |
| + _mov(DestLo, T_Dest2); |
| + _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
| +} |
| + |
| namespace { |
| bool isAdd(const Inst *Inst) { |