Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(61)

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1273153002: Subzero. Native 64-bit int arithmetic on x86-64. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX86BaseImpl.h
diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h
index 74fa5d7cbd564ef687632070ecc1c4ecd22adca7..a73be566a172e016d18cfcc0e4d52d442af6d670 100644
--- a/src/IceTargetLoweringX86BaseImpl.h
+++ b/src/IceTargetLoweringX86BaseImpl.h
@@ -77,6 +77,7 @@ template <class MachineTraits> class BoolFolding {
public:
enum BoolFoldingProducerKind {
PK_None,
+ // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
PK_Icmp32,
PK_Icmp64,
PK_Fcmp,
@@ -120,7 +121,7 @@ template <class MachineTraits>
typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
if (llvm::isa<InstIcmp>(Instr)) {
- if (Instr->getSrc(0)->getType() != IceType_i64)
+ if (MachineTraits::Is64Bit || Instr->getSrc(0)->getType() != IceType_i64)
return PK_Icmp32;
return PK_None; // TODO(stichnot): actually PK_Icmp64;
}
@@ -643,10 +644,10 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
} else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
// An AtomicLoad intrinsic qualifies as long as it has a valid
// memory ordering, and can be implemented in a single
- // instruction (i.e., not i64).
+ // instruction (i.e., not i64 on x86-32).
Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
if (ID == Intrinsics::AtomicLoad &&
- Intrin->getDest()->getType() != IceType_i64 &&
+ (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
Intrinsics::isMemoryOrderValid(
ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
LoadDest = Intrin->getDest();
@@ -724,6 +725,10 @@ bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
template <class Machine>
Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
+ // Special case: never allow partial reads/writes to/from %rBP and %rSP.
+ if (RegNum == Traits::RegisterSet::Reg_esp ||
+ RegNum == Traits::RegisterSet::Reg_ebp)
+ Ty = Traits::WordType;
if (Ty == IceType_void)
Ty = IceType_i32;
if (PhysicalRegisters[Ty].empty())
@@ -770,7 +775,7 @@ void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
}
if (Offset)
Str << Offset;
- const Type FrameSPTy = IceType_i32;
+ const Type FrameSPTy = Traits::WordType;
Str << "(%" << getRegName(BaseRegNum, FrameSPTy) << ")";
}
@@ -810,8 +815,7 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
Variable *Lo = Arg->getLo();
Variable *Hi = Arg->getHi();
Type Ty = Arg->getType();
- if (Lo && Hi && Ty == IceType_i64) {
- // TODO(jpp): This special case is not needed for x86-64.
+ if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
assert(Lo->getType() != IceType_i64); // don't want infinite recursion
assert(Hi->getType() != IceType_i64); // don't want infinite recursion
finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
@@ -824,7 +828,7 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
if (Arg->hasReg()) {
- assert(Ty != IceType_i64);
+ assert(Ty != IceType_i64 || Traits::Is64Bit);
typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
if (isVectorType(Arg->getType())) {
@@ -840,11 +844,13 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
}
template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
- // TODO(jpp): this is wrong for x86-64.
- return IceType_i32;
+ return Traits::WordType;
}
-template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
+template <class Machine>
+template <typename T>
+typename std::enable_if<!T::Is64Bit, void>::type
+TargetX86Base<Machine>::split64(Variable *Var) {
switch (Var->getType()) {
default:
return;
@@ -876,7 +882,9 @@ template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {
}
template <class Machine>
-Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
+template <typename T>
+typename std::enable_if<!T::Is64Bit, Operand>::type *
+TargetX86Base<Machine>::loOperand(Operand *Operand) {
assert(Operand->getType() == IceType_i64 ||
Operand->getType() == IceType_f64);
if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -905,7 +913,9 @@ Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {
}
template <class Machine>
-Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
+template <typename T>
+typename std::enable_if<!T::Is64Bit, Operand>::type *
+TargetX86Base<Machine>::hiOperand(Operand *Operand) {
assert(Operand->getType() == IceType_i64 ||
Operand->getType() == IceType_f64);
if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
@@ -1107,8 +1117,8 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
std::swap(Src0, Src1);
}
- if (Dest->getType() == IceType_i64) {
- // These helper-call-involved instructions are lowered in this
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ // These x86-32 helper-call-involved instructions are lowered in this
// separate switch. This is because loOperand() and hiOperand()
// may insert redundant instructions for constant blinding and
// pooling. Such redundant instructions will fail liveness analysis
@@ -1656,7 +1666,8 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
Context.insert(InstFakeUse::create(Func, T_eax));
} else {
Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
+ T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
+ _mov(T_edx, Zero);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
_div(T_edx, Src1, T);
_mov(Dest, T_edx);
@@ -1721,7 +1732,7 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {
_mov(Dest, T);
Context.insert(InstFakeUse::create(Func, T_eax));
} else {
- T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
+ T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
_mov(T, Src0, Traits::RegisterSet::Reg_eax);
_cbwdq(T_edx, T);
_idiv(T_edx, Src1, T);
@@ -1765,7 +1776,7 @@ void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {
Variable *Dest = Inst->getDest();
Operand *Src0 = Inst->getSrc(0);
assert(Dest->getType() == Src0->getType());
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
Src0 = legalize(Src0);
Operand *Src0Lo = loOperand(Src0);
Operand *Src0Hi = hiOperand(Src0);
@@ -1870,7 +1881,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_psra(T, ShiftConstant);
_movp(Dest, T);
}
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
Constant *Shift = Ctx->getConstantInt32(31);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -1930,7 +1941,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_movp(T, Src0RM);
_pand(T, OneMask);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// t1=movzx src; dst.lo=t1; dst.hi=0
Constant *Zero = Ctx->getConstantZero(IceType_i32);
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
@@ -1952,15 +1963,21 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Constant *One = Ctx->getConstantInt32(1);
Type DestTy = Dest->getType();
Variable *T;
- if (DestTy == IceType_i8) {
- T = makeReg(DestTy);
- _mov(T, Src0RM);
- } else {
- // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
- T = makeReg(IceType_i32);
- _movzx(T, Src0RM);
- }
+ T = makeReg(IceType_i32);
+ _mov(T, Src0RM);
_and(T, One);
+ if (!Traits::Is64Bit) {
+ assert(DestTy != IceType_i64);
+ } else if (DestTy == IceType_i64) {
+ // In x86-64 we should be able to rely on mov reg, reg to zero extend T
+ // into Dest. At this point we can't ensure Dest will live in a
+ // register. Therefore, we use _movzx, which the assembler rightly
+ // converts to a 32-bit mov. A new temporary is created because the
+ // assembler does not know how to movzx to a memory location.
+ Variable *T_1 = makeReg(IceType_i64);
+ _movzx(T_1, T);
+ T = T_1;
+ }
_mov(Dest, T);
} else {
// t1 = movzx src; dst = t1
@@ -1982,7 +1999,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_movp(Dest, T);
} else {
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
- if (Src0->getType() == IceType_i64)
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64)
Src0 = loOperand(Src0);
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// t1 = trunc Src0RM; Dest = t1
@@ -2013,7 +2030,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
_movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// Use a helper for converting floating-point values to 64-bit
// integers. SSE2 appears to have no way to convert from xmm
// registers to something like the edx:eax register pair, and
@@ -2032,7 +2049,15 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Dest->getType() == IceType_i64) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Dest->getType() != IceType_i64);
+ T_1 = makeReg(IceType_i32);
+ }
+ // cvt() requires its integer argument to be a GPR.
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2050,14 +2075,18 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Call->addArg(Inst->getSrc(0));
lowerCall(Call);
} else if (Dest->getType() == IceType_i64 ||
- Dest->getType() == IceType_i32) {
+ (!Traits::Is64Bit && Dest->getType() == IceType_i32)) {
// Use a helper for both x86-32 and x86-64.
- split64(Dest);
+ if (!Traits::Is64Bit)
+ split64(Dest);
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
Type SrcType = Inst->getSrc(0)->getType();
IceString TargetString;
- if (isInt32Asserting32Or64(DestType)) {
+ if (Traits::Is64Bit) {
+ TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
+ : H_fptoui_f64_i64;
+ } else if (isInt32Asserting32Or64(DestType)) {
TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
: H_fptoui_f64_i32;
} else {
@@ -2071,7 +2100,15 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
} else {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
+ assert(Dest->getType() != IceType_i64);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Dest->getType() == IceType_i32) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Dest->getType() != IceType_i32);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
_cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
_mov(T_2, T_1); // T_1 and T_2 may have different integer types
@@ -2090,7 +2127,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Variable *T = makeReg(Dest->getType());
_cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
_movp(Dest, T);
- } else if (Inst->getSrc(0)->getType() == IceType_i64) {
+ } else if (!Traits::Is64Bit && Inst->getSrc(0)->getType() == IceType_i64) {
// Use a helper for x86-32.
const SizeT MaxSrcs = 1;
Type DestType = Dest->getType();
@@ -2106,9 +2143,16 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
// Sign-extend the operand.
// t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Src0RM->getType() == IceType_i64) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Src0RM->getType() != IceType_i64);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
+ if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
_movsx(T_1, Src0RM);
@@ -2126,7 +2170,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Call->addArg(Src0);
lowerCall(Call);
} else if (Src0->getType() == IceType_i64 ||
- Src0->getType() == IceType_i32) {
+ (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
// Use a helper for x86-32 and x86-64. Also use a helper for
// i32 on x86-32.
const SizeT MaxSrcs = 1;
@@ -2147,9 +2191,17 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
// Zero-extend the operand.
// t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
+ Variable *T_1 = nullptr;
+ if (Traits::Is64Bit && Src0RM->getType() == IceType_i32) {
+ T_1 = makeReg(IceType_i64);
+ } else {
+ assert(Src0RM->getType() != IceType_i64);
+ assert(Traits::Is64Bit || Src0RM->getType() != IceType_i32);
+ T_1 = makeReg(IceType_i32);
+ }
+ T_1->setWeightInfinite();
Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
+ if (Src0RM->getType() == T_1->getType())
_mov(T_1, Src0RM);
else
_movzx(T_1, Src0RM);
@@ -2205,77 +2257,96 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {
_mov(Dest, Spill);
} break;
case IceType_i64: {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- assert(Src0RM->getType() == IceType_f64);
- // a.i64 = bitcast b.f64 ==>
- // s.f64 = spill b.f64
- // t_lo.i32 = lo(s.f64)
- // a_lo.i32 = t_lo.i32
- // t_hi.i32 = hi(s.f64)
- // a_hi.i32 = t_hi.i32
- Operand *SpillLo, *SpillHi;
- if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
- typename Traits::SpillVariable *SpillVar =
- Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Src0Var);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
- _movq(Spill, Src0RM);
- SpillLo = Traits::VariableSplit::create(Func, Spill,
- Traits::VariableSplit::Low);
- SpillHi = Traits::VariableSplit::create(Func, Spill,
- Traits::VariableSplit::High);
+ assert(Src0->getType() == IceType_f64);
+ if (Traits::Is64Bit) {
+ // Movd requires its fp argument (in this case, the bitcast source) to
+ // be an xmm register.
+ Variable *Src0R = legalizeToReg(Src0);
+ Variable *T = makeReg(IceType_i64);
+ _movd(T, Src0R);
+ _mov(Dest, T);
} else {
- SpillLo = loOperand(Src0RM);
- SpillHi = hiOperand(Src0RM);
- }
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ // a.i64 = bitcast b.f64 ==>
+ // s.f64 = spill b.f64
+ // t_lo.i32 = lo(s.f64)
+ // a_lo.i32 = t_lo.i32
+ // t_hi.i32 = hi(s.f64)
+ // a_hi.i32 = t_hi.i32
+ Operand *SpillLo, *SpillHi;
+ if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
+ typename Traits::SpillVariable *SpillVar =
+ Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+ SpillVar->setLinkedTo(Src0Var);
+ Variable *Spill = SpillVar;
+ Spill->setWeight(RegWeight::Zero);
+ _movq(Spill, Src0RM);
+ SpillLo = Traits::VariableSplit::create(Func, Spill,
+ Traits::VariableSplit::Low);
+ SpillHi = Traits::VariableSplit::create(Func, Spill,
+ Traits::VariableSplit::High);
+ } else {
+ SpillLo = loOperand(Src0RM);
+ SpillHi = hiOperand(Src0RM);
+ }
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = makeReg(IceType_i32);
- Variable *T_Hi = makeReg(IceType_i32);
+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Variable *T_Lo = makeReg(IceType_i32);
+ Variable *T_Hi = makeReg(IceType_i32);
- _mov(T_Lo, SpillLo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, SpillHi);
- _mov(DestHi, T_Hi);
+ _mov(T_Lo, SpillLo);
+ _mov(DestLo, T_Lo);
+ _mov(T_Hi, SpillHi);
+ _mov(DestHi, T_Hi);
+ }
} break;
case IceType_f64: {
- Src0 = legalize(Src0);
assert(Src0->getType() == IceType_i64);
- if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
- Variable *T = Func->makeVariable(Dest->getType());
- _movq(T, Src0);
- _movq(Dest, T);
- break;
- }
- // a.f64 = bitcast b.i64 ==>
- // t_lo.i32 = b_lo.i32
- // FakeDef(s.f64)
- // lo(s.f64) = t_lo.i32
- // t_hi.i32 = b_hi.i32
- // hi(s.f64) = t_hi.i32
- // a.f64 = s.f64
- typename Traits::SpillVariable *SpillVar =
- Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Dest);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
+ if (Traits::Is64Bit) {
+ Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
+ Variable *T = makeReg(IceType_f64);
+ // Movd requires its fp argument (in this case, the bitcast destination)
+ // to be an xmm register.
+ T->setWeightInfinite();
+ _movd(T, Src0RM);
+ _mov(Dest, T);
+ } else {
+ Src0 = legalize(Src0);
+ if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
+ Variable *T = Func->makeVariable(Dest->getType());
+ _movq(T, Src0);
+ _movq(Dest, T);
+ break;
+ }
+ // a.f64 = bitcast b.i64 ==>
+ // t_lo.i32 = b_lo.i32
+ // FakeDef(s.f64)
+ // lo(s.f64) = t_lo.i32
+ // t_hi.i32 = b_hi.i32
+ // hi(s.f64) = t_hi.i32
+ // a.f64 = s.f64
+ typename Traits::SpillVariable *SpillVar =
+ Func->makeVariable<typename Traits::SpillVariable>(IceType_f64);
+ SpillVar->setLinkedTo(Dest);
+ Variable *Spill = SpillVar;
+ Spill->setWeight(RegWeight::Zero);
- Variable *T_Lo = nullptr, *T_Hi = nullptr;
- typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
- Func, Spill, Traits::VariableSplit::Low);
- typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
- Func, Spill, Traits::VariableSplit::High);
- _mov(T_Lo, loOperand(Src0));
- // Technically, the Spill is defined after the _store happens, but
- // SpillLo is considered a "use" of Spill so define Spill before it
- // is used.
- Context.insert(InstFakeDef::create(Func, Spill));
- _store(T_Lo, SpillLo);
- _mov(T_Hi, hiOperand(Src0));
- _store(T_Hi, SpillHi);
- _movq(Dest, Spill);
+ Variable *T_Lo = nullptr, *T_Hi = nullptr;
+ typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
+ Func, Spill, Traits::VariableSplit::Low);
+ typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
+ Func, Spill, Traits::VariableSplit::High);
+ _mov(T_Lo, loOperand(Src0));
+ // Technically, the Spill is defined after the _store happens, but
+ // SpillLo is considered a "use" of Spill so define Spill before it
+ // is used.
+ Context.insert(InstFakeDef::create(Func, Spill));
+ _store(T_Lo, SpillLo);
+ _mov(T_Hi, hiOperand(Src0));
+ _store(T_Hi, SpillHi);
+ _movq(Dest, Spill);
+ }
} break;
case IceType_v8i1: {
assert(Src0->getType() == IceType_i8);
@@ -2615,32 +2686,8 @@ void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
return;
}
- // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
- if (Src0->getType() == IceType_i64) {
- InstIcmp::ICond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < Traits::TableIcmp64Size);
- Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
- Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- Constant *One = Ctx->getConstantInt32(1);
- typename Traits::Insts::Label *LabelFalse =
- Traits::Insts::Label::create(Func, this);
- typename Traits::Insts::Label *LabelTrue =
- Traits::Insts::Label::create(Func, this);
- _mov(Dest, One);
- _cmp(Src0HiRM, Src1HiRI);
- if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
- _br(Traits::TableIcmp64[Index].C1, LabelTrue);
- if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
- _br(Traits::TableIcmp64[Index].C2, LabelFalse);
- _cmp(Src0LoRM, Src1LoRI);
- _br(Traits::TableIcmp64[Index].C3, LabelTrue);
- Context.insert(LabelFalse);
- _mov_nonkillable(Dest, Zero);
- Context.insert(LabelTrue);
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
+ lowerIcmp64(Inst);
return;
}
@@ -2650,6 +2697,40 @@ void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {
_setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));
}
+template <typename Machine>
+template <typename T>
+typename std::enable_if<!T::Is64Bit, void>::type
+TargetX86Base<Machine>::lowerIcmp64(const InstIcmp *Inst) {
+ // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
+ Operand *Src0 = legalize(Inst->getSrc(0));
+ Operand *Src1 = legalize(Inst->getSrc(1));
+ Variable *Dest = Inst->getDest();
+ InstIcmp::ICond Condition = Inst->getCondition();
+ size_t Index = static_cast<size_t>(Condition);
+ assert(Index < Traits::TableIcmp64Size);
+ Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
+ Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
+ Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
+ Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ Constant *One = Ctx->getConstantInt32(1);
+ typename Traits::Insts::Label *LabelFalse =
+ Traits::Insts::Label::create(Func, this);
+ typename Traits::Insts::Label *LabelTrue =
+ Traits::Insts::Label::create(Func, this);
+ _mov(Dest, One);
+ _cmp(Src0HiRM, Src1HiRI);
+ if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C1, LabelTrue);
+ if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
+ _br(Traits::TableIcmp64[Index].C2, LabelFalse);
+ _cmp(Src0LoRM, Src1LoRI);
+ _br(Traits::TableIcmp64[Index].C3, LabelTrue);
+ Context.insert(LabelFalse);
+ _mov_nonkillable(Dest, Zero);
+ Context.insert(LabelTrue);
+}
+
template <class Machine>
void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {
Operand *SourceVectNotLegalized = Inst->getSrc(0);
@@ -2848,7 +2929,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
return;
}
Variable *Dest = Instr->getDest();
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// Follow what GCC does and use a movq instead of what lowerLoad()
// normally does (split the load into two).
// Thus, this skips load/arithmetic op folding. Load/arithmetic folding
@@ -2898,7 +2979,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
// Add a fence after the store to make it visible.
Operand *Value = Instr->getArg(0);
Operand *Ptr = Instr->getArg(1);
- if (Value->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
// Use a movq instead of what lowerStore() normally does
// (split the store into two), following what GCC does.
// Cast the bits from int -> to an xmm register first.
@@ -2922,7 +3003,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
Operand *Val = Instr->getArg(0);
// In 32-bit mode, bswap only works on 32-bit arguments, and the
// argument must be a register. Use rotate left for 16-bit bswap.
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
Val = legalizeUndef(Val);
Variable *T_Lo = legalizeToReg(loOperand(Val));
Variable *T_Hi = legalizeToReg(hiOperand(Val));
@@ -2932,7 +3013,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
_bswap(T_Hi);
_mov(DestLo, T_Hi);
_mov(DestHi, T_Lo);
- } else if (Val->getType() == IceType_i32) {
+ } else if ((Traits::Is64Bit && Val->getType() == IceType_i64) ||
+ Val->getType() == IceType_i32) {
Variable *T = legalizeToReg(Val);
_bswap(T);
_mov(Dest, T);
@@ -2949,11 +3031,28 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
}
case Intrinsics::Ctpop: {
Variable *Dest = Instr->getDest();
+ Variable *T = nullptr;
Operand *Val = Instr->getArg(0);
- InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
- ? H_call_ctpop_i32
- : H_call_ctpop_i64,
- Dest, 1);
+ Type ValTy = Val->getType();
+ assert(ValTy == IceType_i32 || ValTy == IceType_i64);
+
+ if (!Traits::Is64Bit) {
+ T = Dest;
+ } else {
+ T = makeReg(IceType_i64);
+ if (ValTy == IceType_i32) {
+ // in x86-64, __popcountsi2 is not defined, so we cheat a bit by
+ // converting it to a 64-bit value, and using ctpop_i64. _movzx should
+ // ensure we will not have any bits set on Val's upper 32 bits.
+ Variable *V = makeReg(IceType_i64);
+ _movzx(V, Val);
+ Val = V;
+ }
+ ValTy = IceType_i64;
+ }
+
+ InstCall *Call = makeHelperCall(
+ ValTy == IceType_i32 ? H_call_ctpop_i32 : H_call_ctpop_i64, T, 1);
Call->addArg(Val);
lowerCall(Call);
// The popcount helpers always return 32-bit values, while the intrinsic's
@@ -2961,10 +3060,33 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
// (in 64-bit mode). Thus, clear the upper bits of the dest just in case
// the user doesn't do that in the IR. If the user does that in the IR,
// then this zero'ing instruction is dead and gets optimized out.
- if (Val->getType() == IceType_i64) {
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(DestHi, Zero);
+ if (!Traits::Is64Bit) {
+ assert(T == Dest);
+ if (Val->getType() == IceType_i64) {
+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
+ Constant *Zero = Ctx->getConstantZero(IceType_i32);
+ _mov(DestHi, Zero);
+ }
+ } else {
+ assert(Val->getType() == IceType_i64);
+ // T is 64 bit. It needs to be copied to dest. We need to:
+ //
+ // T_1.32 = trunc T.64 to i32
+ // T_2.64 = zext T_1.32 to i64
+ // Dest.<<right_size>> = T_2.<<right_size>>
+ //
+ // which ensures the upper 32 bits will always be cleared. Just doing a
+ //
+ // mov Dest.32 = trunc T.32 to i32
+ //
+ // is dangerous because there's a chance the compiler will optimize this
+ // copy out. To use _movzx we need two new registers (one 32-, and
+ // another 64-bit wide.)
+ Variable *T_1 = makeReg(IceType_i32);
+ _mov(T_1, T);
+ Variable *T_2 = makeReg(IceType_i64);
+ _movzx(T_2, T_1);
+ _mov(Dest, T_2);
}
return;
}
@@ -2974,7 +3096,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
FirstVal = loOperand(Val);
SecondVal = hiOperand(Val);
} else {
@@ -2991,7 +3113,7 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(
Operand *Val = legalize(Instr->getArg(0));
Operand *FirstVal;
Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
FirstVal = hiOperand(Val);
SecondVal = loOperand(Val);
} else {
@@ -3105,7 +3227,7 @@ template <class Machine>
void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,
Operand *Ptr, Operand *Expected,
Operand *Desired) {
- if (Expected->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Expected->getType() == IceType_i64) {
// Reserve the pre-colored registers first, before adding any more
// infinite-weight variables from formMemoryOperand's legalization.
Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
@@ -3223,7 +3345,7 @@ void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Func->setError("Unknown AtomicRMW operation");
return;
case Intrinsics::AtomicAdd: {
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
// All the fall-through paths must set this to true, but use this
// for asserting.
NeedsCmpxchg = true;
@@ -3241,7 +3363,7 @@ void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
return;
}
case Intrinsics::AtomicSub: {
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
NeedsCmpxchg = true;
Op_Lo = &TargetX86Base<Machine>::_sub;
Op_Hi = &TargetX86Base<Machine>::_sbb;
@@ -3278,7 +3400,7 @@ void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
Op_Hi = &TargetX86Base<Machine>::_xor;
break;
case Intrinsics::AtomicExchange:
- if (Dest->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
NeedsCmpxchg = true;
// NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
// just need to be moved to the ecx and ebx registers.
@@ -3332,7 +3454,7 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,
// If Op_{Lo,Hi} are nullptr, then just copy the value.
Val = legalize(Val);
Type Ty = Val->getType();
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
@@ -3464,7 +3586,7 @@ void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
if (!Cttz) {
_xor(T_Dest, ThirtyOne);
}
- if (Ty == IceType_i32) {
+ if (Traits::Is64Bit || Ty == IceType_i32) {
_mov(Dest, T_Dest);
return;
}
@@ -3891,7 +4013,8 @@ inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
template <class Machine>
void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
- // A Load instruction can be treated the same as an Assign instruction, after
+ // A Load instruction can be treated the same as an Assign instruction,
Jim Stichnoth 2015/08/11 16:01:37 formatting
John 2015/08/12 19:27:55 Done.
+ // after
// the source operand is transformed into an Traits::X86OperandMem operand.
// Note that the address mode optimization already creates an
// Traits::X86OperandMem operand, so it doesn't need another level of
@@ -4053,7 +4176,7 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
std::swap(SrcT, SrcF);
Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
}
- if (DestTy == IceType_i64) {
+ if (!Traits::Is64Bit && DestTy == IceType_i64) {
SrcT = legalizeUndef(SrcT);
SrcF = legalizeUndef(SrcF);
// Set the low portion.
@@ -4075,7 +4198,8 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {
return;
}
- assert(DestTy == IceType_i16 || DestTy == IceType_i32);
+ assert(DestTy == IceType_i16 || DestTy == IceType_i32 ||
+ (Traits::Is64Bit && DestTy == IceType_i64));
Variable *T = nullptr;
SrcF = legalize(SrcF);
_mov(T, SrcF);
@@ -4092,7 +4216,7 @@ void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
formMemoryOperand(Addr, Value->getType());
Type Ty = NewAddr->getType();
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Value = legalizeUndef(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
@@ -4140,7 +4264,7 @@ Operand *TargetX86Base<Machine>::lowerCmpRange(Operand *Comparison,
uint64_t Min, uint64_t Max) {
// TODO(ascull): 64-bit should not reach here but only because it is not
// implemented yet. This should be able to handle the 64-bit case.
- assert(Comparison->getType() != IceType_i64);
+ assert(Traits::Is64Bit || Comparison->getType() != IceType_i64);
// Subtracting 0 is a nop so don't do it
if (Min != 0) {
// Avoid clobbering the comparison by copying it
@@ -4239,7 +4363,7 @@ void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
assert(CaseClusters.size() != 0); // Should always be at least one
- if (Src0->getType() == IceType_i64) {
+ if (!Traits::Is64Bit && Src0->getType() == IceType_i64) {
Src0 = legalize(Src0); // get Base/Index into physical registers
Operand *Src0Lo = loOperand(Src0);
Operand *Src0Hi = hiOperand(Src0);
@@ -4444,7 +4568,7 @@ void TargetX86Base<Machine>::lowerRMW(
Operand *Src = RMW->getData();
Type Ty = Src->getType();
typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
- if (Ty == IceType_i64) {
+ if (!Traits::Is64Bit && Ty == IceType_i64) {
Src = legalizeUndef(Src);
Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
@@ -4478,7 +4602,8 @@ void TargetX86Base<Machine>::lowerRMW(
return;
}
} else {
- // i8, i16, i32
+ // x86-32: i8, i16, i32
+ // x86-64: i8, i16, i32, i64
switch (RMW->getOp()) {
default:
// TODO(stichnot): Implement other arithmetic operators.
@@ -4523,7 +4648,14 @@ void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
/// turned into zeroes, since loOperand() and hiOperand() don't expect
/// Undef input.
template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
- // Pause constant blinding or pooling, blinding or pooling will be done later
+ if (Traits::Is64Bit) {
+ // On x86-64 we don't need to prelower phis -- the architecture can handle
+ // 64-bit integer natively.
+ return;
+ }
+
+ // Pause constant blinding or pooling, blinding or pooling will be done
+ // later
Jim Stichnoth 2015/08/11 16:01:37 formatting
John 2015/08/12 19:27:55 Done.
// during phi lowering assignments
BoolFlagSaver B(RandomizationPoolingPaused, true);
PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
@@ -4685,6 +4817,16 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
// There should be no constants of vector type (other than undef).
assert(!isVectorType(Ty));
+ // If the operand is a 64 bit constant integer we need to legalize it to a
+ // register in x86-64.
+ if (Traits::Is64Bit) {
+ if (auto *C = llvm::dyn_cast<ConstantInteger64>(Const)) {
+ Variable *V = copyToReg(C, RegNum);
+ V->setWeightInfinite();
+ return V;
+ }
+ }
+
// If the operand is an 32 bit constant integer, we should check
// whether we need to randomize it or pool it.
if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
@@ -4822,7 +4964,7 @@ TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
template <class Machine>
Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
// There aren't any 64-bit integer registers for x86-32.
- assert(Type != IceType_i64);
+ assert(Traits::Is64Bit || Type != IceType_i64);
Variable *Reg = Func->makeVariable(Type);
if (RegNum == Variable::NoRegister)
Reg->setWeightInfinite();
@@ -4854,8 +4996,15 @@ void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
}
template <class Machine>
-void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {
- llvm::report_fatal_error("Not expecting to emit 64-bit integers");
+void TargetX86Base<Machine>::emit(const ConstantInteger64 *C) const {
+ if (!Traits::Is64Bit) {
+ llvm::report_fatal_error("Not expecting to emit 64-bit integers");
+ } else {
+ if (!BuildDefs::dump())
+ return;
+ Ostream &Str = Ctx->getStrEmit();
+ Str << getConstantPrefix() << C->getValue();
+ }
}
template <class Machine>
@@ -5000,7 +5149,8 @@ TargetX86Base<Machine>::randomizeOrPoolImmediate(
MemOperand->getBase(), Mask1);
// If we have already assigned a physical register, we must come from
// advancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
+ // the assigned register as this assignment is that start of its
+ // use-def
Jim Stichnoth 2015/08/11 16:01:37 formatting
John 2015/08/12 19:27:55 Done.
// chain. So we add RegNum argument here.
Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
_lea(RegTemp, TempMemOperand);
« src/IceTargetLoweringX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698