Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index a5704b530a20158506602b4271ed386faedc6838..5d71573e41f9042beb30e3a3b1ff531f6aee8ca4 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -124,6 +124,20 @@ Type getInVectorElementType(Type Ty) { |
const unsigned X86_MAX_XMM_ARGS = 4; |
// The number of bits in a byte |
const unsigned X86_CHAR_BIT = 8; |
+// Stack alignment |
+const unsigned X86_STACK_ALIGNMENT_BYTES = 16; |
+// Size of the return address on the stack |
+const unsigned X86_RET_IP_SIZE_BYTES = 4; |
+ |
+// Return the difference between Size and the next highest multiple of |
+// the stack alignment. All values are in bytes. |
+uint32_t getAdjustmentToAlignStackInBytes(uint32_t Size) { |
Jim Stichnoth
2014/08/05 18:09:27
Almost all the calls to this routine have a patter
wala
2014/08/05 23:57:03
Done.
|
+ uint32_t Offset = Size % X86_STACK_ALIGNMENT_BYTES; |
+ if (Offset) { |
+ return X86_STACK_ALIGNMENT_BYTES - Offset; |
+ } |
+ return 0; |
+} |
// Instruction set options |
namespace cl = ::llvm::cl; |
@@ -248,8 +262,8 @@ void __attribute__((unused)) xMacroIntegrityCheck() { |
TargetX8632::TargetX8632(Cfg *Func) |
: TargetLowering(Func), InstructionSet(CLInstructionSet), |
- IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0), |
- NextLabelNumber(0), ComputedLiveRanges(false), |
+ IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
+ LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), |
PhysicalRegisters(VarList(Reg_NUM)) { |
// TODO: Don't initialize IntegerRegisters and friends every time. |
// Instead, initialize in some sort of static initializer for the |
@@ -543,6 +557,9 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
return; |
} |
+ if (isVectorType(Ty)) { |
+ InArgsSizeBytes += getAdjustmentToAlignStackInBytes(InArgsSizeBytes); |
+ } |
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
if (Arg->hasReg()) { |
@@ -570,7 +587,6 @@ void TargetX8632::addProlog(CfgNode *Node) { |
// or B. |
const bool SimpleCoalescing = true; |
size_t InArgsSizeBytes = 0; |
- size_t RetIpSizeBytes = 4; |
size_t PreservedRegsSizeBytes = 0; |
LocalsSizeBytes = 0; |
Context.init(Node); |
@@ -657,6 +673,11 @@ void TargetX8632::addProlog(CfgNode *Node) { |
_mov(ebp, esp); |
} |
+ if (NeedsStackAlignment) { |
+ LocalsSizeBytes += getAdjustmentToAlignStackInBytes( |
+ X86_RET_IP_SIZE_BYTES + LocalsSizeBytes + PreservedRegsSizeBytes); |
+ } |
+ |
// Generate "sub esp, LocalsSizeBytes" |
if (LocalsSizeBytes) |
_sub(getPhysicalRegister(Reg_esp), |
@@ -668,7 +689,7 @@ void TargetX8632::addProlog(CfgNode *Node) { |
// for those that were register-allocated. Args are pushed right to |
// left, so Arg[0] is closest to the stack/frame pointer. |
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
- size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; |
+ size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; |
if (!IsEbpBasedFrame) |
BasicFrameOffset += LocalsSizeBytes; |
@@ -959,12 +980,29 @@ llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, |
void TargetX8632::lowerAlloca(const InstAlloca *Inst) { |
IsEbpBasedFrame = true; |
- // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize |
- // the number of adjustments of esp, etc. |
+ NeedsStackAlignment = true; |
Jim Stichnoth
2014/08/05 18:09:27
A couple things here.
1. It looks like alloca low
wala
2014/08/05 23:57:03
Done.
|
+ // TODO(sehr,stichnot): align allocated memory, minimize the number of |
+ // adjustments of esp, etc. |
+ |
Variable *esp = getPhysicalRegister(Reg_esp); |
Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
Variable *Dest = Inst->getDest(); |
- _sub(esp, TotalSize); |
+ if (ConstantInteger *ConstantTotalSize = |
+ llvm::dyn_cast<ConstantInteger>(TotalSize)) { |
+ uint32_t Value = ConstantTotalSize->getValue(); |
+ Value += getAdjustmentToAlignStackInBytes(Value); |
+ if (Value) { |
+ _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); |
+ } |
+ } else { |
+ Variable *T = makeReg(IceType_i32); |
+ _mov(T, TotalSize); |
+ // Non-constant sizes need to be adjusted to the next highest |
+ // multiple of the stack alignment at runtime. |
+ _add(T, Ctx->getConstantInt(IceType_i32, 15)); |
Jim Stichnoth
2014/08/05 18:09:26
Should these constants be in terms of X86_STACK_AL
wala
2014/08/05 23:57:03
Done.
|
+ _and(T, Ctx->getConstantInt(IceType_i32, -16)); |
+ _sub(esp, T); |
+ } |
_mov(Dest, esp); |
} |
@@ -1592,51 +1630,77 @@ void TargetX8632::lowerBr(const InstBr *Inst) { |
} |
void TargetX8632::lowerCall(const InstCall *Instr) { |
+ // x86-32 calling convention: |
+ // |
+ // * At the point before the call, the stack must be aligned to 16 |
+ // bytes. |
+ // |
+ // * The first four arguments of vector type, regardless of their |
+ // position relative to the other arguments in the argument list, are |
+ // placed in registers xmm0 - xmm3. |
+ // |
+ // * Other arguments are placed on the stack ordered according to the |
Jim Stichnoth
2014/08/05 18:09:27
This is unclear to me (and possibly wrong). Does
wala
2014/08/05 23:57:03
Done.
|
+ // argument list and get assigned stack locations with increasing |
+ // addresses. The first stack argument is placed at what is the bottom |
+ // of the stack at the point before the call. |
+ // |
+ // * Stack arguments of vector type are aligned to the next highest |
+ // multiple of 16 bytes. Other stack arguments are aligned to the |
+ // next highest multiple of 4 bytes. |
+ NeedsStackAlignment = true; |
+ |
+ OperandList XmmArgs; |
+ OperandList StackArgs, StackArgLocations; |
+ uint32_t ParameterAreaSizeBytes = 0; |
+ |
// Classify each argument operand according to the location where the |
// argument is passed. |
- OperandList XmmArgs; |
- OperandList StackArgs; |
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
Operand *Arg = Instr->getArg(i); |
- if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) { |
+ Type Ty = Arg->getType(); |
+ // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
+ assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || |
+ Ty == IceType_f64 || isVectorType(Ty)); |
+ if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { |
XmmArgs.push_back(Arg); |
} else { |
StackArgs.push_back(Arg); |
+ if (isVectorType(Arg->getType())) { |
+ ParameterAreaSizeBytes += |
+ getAdjustmentToAlignStackInBytes(ParameterAreaSizeBytes); |
+ } |
+ Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); |
+ Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes); |
+ StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); |
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
} |
} |
- // For stack arguments, generate a sequence of push instructions, |
- // pushing right to left, keeping track of stack offsets in case a |
- // push involves a stack operand and we are using an esp-based frame. |
- uint32_t StackOffset = 0; |
- // TODO: Consolidate the stack adjustment for function calls by |
- // reserving enough space for the arguments only once. |
+ |
+ // Adjust the parameter area so that the stack is aligned. It is |
+ // assumed that the stack is already aligned at the start of the |
+ // calling sequence. |
+ ParameterAreaSizeBytes += |
+ getAdjustmentToAlignStackInBytes(ParameterAreaSizeBytes); |
+ |
+ // Subtract the appropriate amount for the argument area. This also |
+ // takes care of setting the stack adjustment during emission. |
// |
// TODO: If for some reason the call instruction gets dead-code |
// eliminated after lowering, we would need to ensure that the |
- // pre-call push instructions and the post-call esp adjustment get |
- // eliminated as well. |
- for (OperandList::reverse_iterator I = StackArgs.rbegin(), |
- E = StackArgs.rend(); I != E; ++I) { |
- Operand *Arg = legalize(*I); |
- if (Arg->getType() == IceType_i64) { |
- _push(hiOperand(Arg)); |
- _push(loOperand(Arg)); |
- } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) { |
- // If the Arg turns out to be a memory operand, more than one push |
- // instruction is required. This ends up being somewhat clumsy in |
- // the current IR, so we use a workaround. Force the operand into |
- // a (xmm) register, and then push the register. An xmm register |
- // push is actually not possible in x86, but the Push instruction |
- // emitter handles this by decrementing the stack pointer and |
- // directly writing the xmm register value. |
- _push(legalize(Arg, Legal_Reg)); |
- } else { |
- // Otherwise PNaCl requires parameter types to be at least 32-bits. |
- assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32); |
- _push(Arg); |
- } |
- StackOffset += typeWidthInBytesOnStack(Arg->getType()); |
+ // pre-call and the post-call esp adjustment get eliminated as well. |
+ if (ParameterAreaSizeBytes) { |
+ _adjust_stack(ParameterAreaSizeBytes); |
} |
+ |
+ // Copy arguments that are passed on the stack to the appropriate |
+ // stack locations. |
+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
+ // TODO: Consider calling postLower() here to reduce the register |
+ // pressure associated with using too many infinite weight |
+ // temporaries when lowering the call sequence in -Om1 mode. |
+ } |
+ |
// Copy arguments to be passed in registers to the appropriate |
// registers. |
// TODO: Investigate the impact of lowering arguments passed in |
@@ -1700,10 +1764,11 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
if (ReturnRegHi) |
Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
- // Add the appropriate offset to esp. |
- if (StackOffset) { |
+ // Add the appropriate offset to esp. The call instruction takes care |
+ // of resetting the stack offset during emission. |
+ if (ParameterAreaSizeBytes) { |
Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); |
- _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); |
+ _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes)); |
} |
// Insert a register-kill pseudo instruction. |
@@ -2182,9 +2247,9 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
// Use pshufd and movd/movss. |
// |
- // ALIGNHACK: Force vector operands to registers in instructions that |
- // require aligned memory operands until support for stack alignment |
- // is implemented. |
+ // ALIGNHACK: Force vector operands to registers in instructions |
+ // that require aligned memory operands until support for data |
+ // alignment is implemented. |
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
Operand *SourceVectRM = |
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
@@ -2269,8 +2334,8 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
- // ALIGNHACK: Without support for stack alignment, both operands to |
- // cmpps need to be forced into registers. Once support for stack |
+ // ALIGNHACK: Without support for data alignment, both operands to |
+ // cmpps need to be forced into registers. Once support for data |
// alignment is implemented, remove LEGAL_HACK. |
#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
switch (Condition) { |
@@ -2410,8 +2475,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
} |
// TODO: ALIGNHACK: Both operands to compare instructions need to be |
- // in registers until stack alignment support is implemented. Once |
- // there is support for stack alignment, LEGAL_HACK can be removed. |
+ // in registers until data alignment support is implemented. Once |
+ // there is support for data alignment, LEGAL_HACK can be removed. |
#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
Variable *T = makeReg(Ty); |
switch (Condition) { |
@@ -2631,9 +2696,9 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
- // ALIGNHACK: Force vector operands to registers in instructions that |
- // require aligned memory operands until support for stack alignment |
- // is implemented. |
+ // ALIGNHACK: Force vector operands to registers in instructions |
+ // that require aligned memory operands until support for data |
+ // alignment is implemented. |
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
if (Index == 1) { |
SourceVectRM = ALIGN_HACK(SourceVectRM); |
@@ -2921,7 +2986,8 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
} |
case Intrinsics::Memset: { |
// The value operand needs to be extended to a stack slot size |
- // because "push" only works for a specific operand size. |
+ // because the PNaCl ABI requires arguments to be at least 32 bits |
+ // wide. |
Operand *ValOp = Instr->getArg(1); |
assert(ValOp->getType() == IceType_i8); |
Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); |
@@ -3590,9 +3656,9 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
Variable *T = makeReg(SrcTy); |
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
- // ALIGNHACK: Until stack alignment support is implemented, vector |
+ // ALIGNHACK: Until data alignment support is implemented, vector |
// instructions need to have vector operands in registers. Once |
- // there is support for stack alignment, LEGAL_HACK can be removed. |
+ // there is support for data alignment, LEGAL_HACK can be removed. |
#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
if (InstructionSet >= SSE4_1) { |
// TODO(wala): If the condition operand is a constant, use blendps |
@@ -3687,13 +3753,16 @@ void TargetX8632::lowerStore(const InstStore *Inst) { |
Operand *Value = Inst->getData(); |
Operand *Addr = Inst->getAddr(); |
OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); |
+ Type Ty = NewAddr->getType(); |
- if (NewAddr->getType() == IceType_i64) { |
+ if (Ty == IceType_i64) { |
Value = legalize(Value); |
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); |
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); |
_store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
_store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
+ } else if (isVectorType(Ty)) { |
+ _storep(legalizeToVar(Value), NewAddr); |
} else { |
Value = legalize(Value, Legal_Reg | Legal_Imm, true); |
_store(Value, NewAddr); |
@@ -4036,9 +4105,9 @@ void TargetX8632::postLower() { |
llvm::SmallBitVector AvailableTypedRegisters = |
AvailableRegisters & getRegisterSetForType(Var->getType()); |
if (!AvailableTypedRegisters.any()) { |
- // This is a hack in case we run out of physical registers |
- // due to an excessive number of "push" instructions from |
- // lowering a call. |
+ // This is a hack in case we run out of physical registers due |
+ // to an excessively long code sequence, as might happen when |
+ // lowering arguments in lowerCall(). |
AvailableRegisters = WhiteList; |
AvailableTypedRegisters = |
AvailableRegisters & getRegisterSetForType(Var->getType()); |