| Index: src/IceTargetLoweringX8632.cpp
|
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
|
| index 38b6fc63e8cbd4fc7b9e11a60097a02c7c4092cc..45c31514e0eff1b61f8f5a97e3a96ca09307f93a 100644
|
| --- a/src/IceTargetLoweringX8632.cpp
|
| +++ b/src/IceTargetLoweringX8632.cpp
|
| @@ -39,7 +39,7 @@ namespace {
|
| const struct TableFcmp_ {
|
| uint32_t Default;
|
| bool SwapOperands;
|
| - InstX8632Br::BrCond C1, C2;
|
| + InstX8632::BrCond C1, C2;
|
| } TableFcmp[] = {
|
| #define X(val, dflt, swap, C1, C2) \
|
| { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
|
| @@ -54,7 +54,7 @@ const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
|
| // x86 conditional branch instruction.
|
|
|
| const struct TableIcmp32_ {
|
| - InstX8632Br::BrCond Mapping;
|
| + InstX8632::BrCond Mapping;
|
| } TableIcmp32[] = {
|
| #define X(val, C_32, C1_64, C2_64, C3_64) \
|
| { InstX8632Br::C_32 } \
|
| @@ -69,7 +69,7 @@ const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
|
| // conditional branches are needed. For the other conditions, three separate
|
| // conditional branches are needed.
|
| const struct TableIcmp64_ {
|
| - InstX8632Br::BrCond C1, C2, C3;
|
| + InstX8632::BrCond C1, C2, C3;
|
| } TableIcmp64[] = {
|
| #define X(val, C_32, C1_64, C2_64, C3_64) \
|
| { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
|
| @@ -79,7 +79,7 @@ const struct TableIcmp64_ {
|
| };
|
| const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
|
|
|
| -InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
|
| +InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
|
| size_t Index = static_cast<size_t>(Cond);
|
| assert(Index < TableIcmp32Size);
|
| return TableIcmp32[Index].Mapping;
|
| @@ -2109,12 +2109,61 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| return;
|
| }
|
| case Intrinsics::Bswap:
|
| - case Intrinsics::Ctlz:
|
| - case Intrinsics::Ctpop:
|
| - case Intrinsics::Cttz:
|
| - // TODO(jvoung): fill it in.
|
| Func->setError("Unhandled intrinsic");
|
| return;
|
| + case Intrinsics::Ctpop: {
|
| + Variable *Dest = Instr->getDest();
|
| + Operand *Val = Instr->getArg(0);
|
| + InstCall *Call = makeHelperCall(Val->getType() == IceType_i64 ?
|
| + "__popcountdi2" : "__popcountsi2", Dest, 1);
|
| + Call->addArg(Val);
|
| + lowerCall(Call);
|
| + // The popcount helpers always return 32-bit values, while the intrinsic's
|
| + // signature matches the native POPCNT instruction and fills a 64-bit reg
|
| + // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
|
| + // the user doesn't do that in the IR. If the user does that in the IR,
|
| + // then this zero'ing instruction is dead and gets optimized out.
|
| + if (Val->getType() == IceType_i64) {
|
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| + Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| + _mov(DestHi, Zero);
|
| + }
|
| + return;
|
| + }
|
| + case Intrinsics::Ctlz: {
|
| + // The "is zero undef" parameter is ignored and we always return
|
| + // a well-defined value.
|
| + Operand *Val = legalize(Instr->getArg(0));
|
| + Operand *FirstVal;
|
| + Operand *SecondVal = NULL;
|
| + if (Val->getType() == IceType_i64) {
|
| + FirstVal = loOperand(Val);
|
| + SecondVal = hiOperand(Val);
|
| + } else {
|
| + FirstVal = Val;
|
| + }
|
| + const bool IsCttz = false;
|
| + lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
|
| + SecondVal);
|
| + return;
|
| + }
|
| + case Intrinsics::Cttz: {
|
| + // The "is zero undef" parameter is ignored and we always return
|
| + // a well-defined value.
|
| + Operand *Val = legalize(Instr->getArg(0));
|
| + Operand *FirstVal;
|
| + Operand *SecondVal = NULL;
|
| + if (Val->getType() == IceType_i64) {
|
| + FirstVal = hiOperand(Val);
|
| + SecondVal = loOperand(Val);
|
| + } else {
|
| + FirstVal = Val;
|
| + }
|
| + const bool IsCttz = true;
|
| + lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
|
| + SecondVal);
|
| + return;
|
| + }
|
| case Intrinsics::Longjmp: {
|
| InstCall *Call = makeHelperCall("longjmp", NULL, 2);
|
| Call->addArg(Instr->getArg(0));
|
| @@ -2408,6 +2457,81 @@ void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
|
| _mov(Dest, T_eax);
|
| }
|
|
|
| +// Lowers count {trailing, leading} zeros intrinsic.
|
| +//
|
| +// We could do constant folding here, but that should have
|
| +// been done by the front-end/middle-end optimizations.
|
| +void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
|
| + Operand *FirstVal, Operand *SecondVal) {
|
| + // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
|
| + // Then the instructions will handle the Val == 0 case much more simply
|
| + // and won't require conversion from bit position to number of zeros.
|
| + //
|
| + // Otherwise:
|
| + // bsr IF_NOT_ZERO, Val
|
| + // mov T_DEST, 63
|
| + // cmovne T_DEST, IF_NOT_ZERO
|
| + // xor T_DEST, 31
|
| + // mov DEST, T_DEST
|
| + //
|
| + // NOTE: T_DEST must be a register because cmov requires its dest to be a
|
| + // register. Also, bsf and bsr require their dest to be a register.
|
| + //
|
| + // The xor DEST, 31 converts a bit position to # of leading zeroes.
|
| + // E.g., for 000... 00001100, bsr will say that the most significant bit
|
| + // set is at position 3, while the number of leading zeros is 28. Xor is
|
| + // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
|
| + //
|
| + // Similar for 64-bit, but start w/ speculating that the upper 32 bits
|
| + // are all zero, and compute the result for that case (checking the lower
|
| + // 32 bits). Then actually compute the result for the upper bits and
|
| + // cmov in the result from the lower computation if the earlier speculation
|
| + // was correct.
|
| + //
|
| + // Cttz, is similar, but uses bsf instead, and doesn't require the xor
|
| + // bit position conversion, and the speculation is reversed.
|
| + assert(Ty == IceType_i32 || Ty == IceType_i64);
|
| + Variable *T = makeReg(IceType_i32);
|
| + if (Cttz) {
|
| + _bsf(T, FirstVal);
|
| + } else {
|
| + _bsr(T, FirstVal);
|
| + }
|
| + Variable *T_Dest = makeReg(IceType_i32);
|
| + Constant *ThirtyTwo = Ctx->getConstantInt(IceType_i32, 32);
|
| + Constant *ThirtyOne = Ctx->getConstantInt(IceType_i32, 31);
|
| + if (Cttz) {
|
| + _mov(T_Dest, ThirtyTwo);
|
| + } else {
|
| + Constant *SixtyThree = Ctx->getConstantInt(IceType_i32, 63);
|
| + _mov(T_Dest, SixtyThree);
|
| + }
|
| + _cmov(T_Dest, T, InstX8632::Br_ne);
|
| + if (!Cttz) {
|
| + _xor(T_Dest, ThirtyOne);
|
| + }
|
| + if (Ty == IceType_i32) {
|
| + _mov(Dest, T_Dest);
|
| + return;
|
| + }
|
| + _add(T_Dest, ThirtyTwo);
|
| + Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| + Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| + // Will be using "test" on this, so we need a registerized variable.
|
| + Variable *SecondVar = legalizeToVar(SecondVal);
|
| + Variable *T_Dest2 = makeReg(IceType_i32);
|
| + if (Cttz) {
|
| + _bsf(T_Dest2, SecondVar);
|
| + } else {
|
| + _bsr(T_Dest2, SecondVar);
|
| + _xor(T_Dest2, ThirtyOne);
|
| + }
|
| + _test(SecondVar, SecondVar);
|
| + _cmov(T_Dest2, T_Dest, InstX8632::Br_e);
|
| + _mov(DestLo, T_Dest2);
|
| + _mov(DestHi, Ctx->getConstantZero(IceType_i32));
|
| +}
|
| +
|
| namespace {
|
|
|
| bool isAdd(const Inst *Inst) {
|
|
|