Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 444443002: Subzero: Align the stack at the point of function calls. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: First round of comments. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/64bit.pnacl.ll » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index a5704b530a20158506602b4271ed386faedc6838..f9c0df4abe5392f409748f459bb39be33c15f5d0 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -124,6 +124,18 @@ Type getInVectorElementType(Type Ty) {
const unsigned X86_MAX_XMM_ARGS = 4;
// The number of bits in a byte
const unsigned X86_CHAR_BIT = 8;
+// Stack alignment
+const unsigned X86_STACK_ALIGNMENT_BYTES = 16;
+// Size of the return address on the stack
+const unsigned X86_RET_IP_SIZE_BYTES = 4;
+
+// Value is a size in bytes. Return Value adjusted to the next highest
+// multiple of the stack alignment.
+uint32_t applyStackAlignment(uint32_t Value) {
+ // power of 2
+ assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
+ return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
+}
// Instruction set options
namespace cl = ::llvm::cl;
@@ -248,8 +260,8 @@ void __attribute__((unused)) xMacroIntegrityCheck() {
TargetX8632::TargetX8632(Cfg *Func)
: TargetLowering(Func), InstructionSet(CLInstructionSet),
- IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0),
- NextLabelNumber(0), ComputedLiveRanges(false),
+ IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
+ LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
PhysicalRegisters(VarList(Reg_NUM)) {
// TODO: Don't initialize IntegerRegisters and friends every time.
// Instead, initialize in some sort of static initializer for the
@@ -543,6 +555,9 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
return;
}
+ if (isVectorType(Ty)) {
+ InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
+ }
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
if (Arg->hasReg()) {
@@ -570,7 +585,6 @@ void TargetX8632::addProlog(CfgNode *Node) {
// or B.
const bool SimpleCoalescing = true;
size_t InArgsSizeBytes = 0;
- size_t RetIpSizeBytes = 4;
size_t PreservedRegsSizeBytes = 0;
LocalsSizeBytes = 0;
Context.init(Node);
@@ -657,6 +671,13 @@ void TargetX8632::addProlog(CfgNode *Node) {
_mov(ebp, esp);
}
+ if (NeedsStackAlignment) {
+ uint32_t StackSize = applyStackAlignment(
+ X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes);
+ LocalsSizeBytes =
+ StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
+ }
+
// Generate "sub esp, LocalsSizeBytes"
if (LocalsSizeBytes)
_sub(getPhysicalRegister(Reg_esp),
@@ -668,7 +689,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
// for those that were register-allocated. Args are pushed right to
// left, so Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
- size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
+ size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
if (!IsEbpBasedFrame)
BasicFrameOffset += LocalsSizeBytes;
@@ -959,12 +980,48 @@ llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
IsEbpBasedFrame = true;
- // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
- // the number of adjustments of esp, etc.
+ // Conservatively require the stack to be aligned. Some stack
+ // adjustment operations implemented below assume that the stack is
+ // aligned before the alloca. All the alloca code ensures that the
+ // stack alignment is preserved after the alloca. The stack alignment
+ // restriction can be relaxed in some cases.
+ NeedsStackAlignment = true;
+
+ // TODO(sehr,stichnot): align allocated memory, minimize the number of
+ // adjustments of esp, etc.
Variable *esp = getPhysicalRegister(Reg_esp);
Operand *TotalSize = legalize(Inst->getSizeInBytes());
Variable *Dest = Inst->getDest();
- _sub(esp, TotalSize);
+ uint32_t AlignmentParam = Inst->getAlignInBytes();
+
+ // LLVM enforces power of 2 alignment.
+ assert((AlignmentParam & (AlignmentParam - 1)) == 0);
+ assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
+ bool AdjustAlignment = AlignmentParam > X86_STACK_ALIGNMENT_BYTES;
+ uint32_t Alignment =
+ AdjustAlignment ? AlignmentParam : X86_STACK_ALIGNMENT_BYTES;
+
+ if (AdjustAlignment) {
+ _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
+ }
+ if (ConstantInteger *ConstantTotalSize =
+ llvm::dyn_cast<ConstantInteger>(TotalSize)) {
+ uint32_t Value = ConstantTotalSize->getValue();
+ // Round Value up to the next highest multiple of the alignment.
+ Value = (Value + Alignment - 1) & -Alignment;
+ _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
+ } else {
+ // Non-constant sizes need to be adjusted to the next highest
+ // multiple of the stack alignment at runtime.
+ Variable *T = makeReg(IceType_i32);
+ _mov(T, TotalSize);
+ _add(T, Ctx->getConstantInt(IceType_i32, X86_STACK_ALIGNMENT_BYTES - 1));
+ _and(T, Ctx->getConstantInt(IceType_i32, -X86_STACK_ALIGNMENT_BYTES));
+ _sub(esp, T);
+ if (AdjustAlignment) {
wala 2014/08/06 03:07:30 1018 - 1019 is better expressed as add T, (Alignm
+ _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
+ }
+ }
_mov(Dest, esp);
}
@@ -1592,51 +1649,78 @@ void TargetX8632::lowerBr(const InstBr *Inst) {
}
void TargetX8632::lowerCall(const InstCall *Instr) {
+ // x86-32 calling convention:
+ //
+ // * At the point before the call, the stack must be aligned to 16
+ // bytes.
+ //
+ // * The first four arguments of vector type, regardless of their
+ // position relative to the other arguments in the argument list, are
+ // placed in registers xmm0 - xmm3.
+ //
+ // * Other arguments are pushed onto the stack in right-to-left order,
+ // such that the left-most argument ends up on the top of the stack at
+ // the lowest memory address.
+ //
+ // * Stack arguments of vector type are aligned to start at the next
+ // highest multiple of 16 bytes. Other stack arguments are aligned to
+ // 4 bytes.
+ //
+ // This intends to match the section "IA-32 Function Calling
+ // Convention" of the document "OS X ABI Function Call Guide" by
+ // Apple.
+ NeedsStackAlignment = true;
+
+ OperandList XmmArgs;
+ OperandList StackArgs, StackArgLocations;
+ uint32_t ParameterAreaSizeBytes = 0;
+
// Classify each argument operand according to the location where the
// argument is passed.
- OperandList XmmArgs;
- OperandList StackArgs;
for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
Operand *Arg = Instr->getArg(i);
- if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
+ Type Ty = Arg->getType();
+ // The PNaCl ABI requires the width of arguments to be at least 32 bits.
+ assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
+ Ty == IceType_f64 || isVectorType(Ty));
+ if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
XmmArgs.push_back(Arg);
} else {
StackArgs.push_back(Arg);
+ if (isVectorType(Arg->getType())) {
+ ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
+ }
+ Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
+ Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes);
+ StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
+ ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
}
}
- // For stack arguments, generate a sequence of push instructions,
- // pushing right to left, keeping track of stack offsets in case a
- // push involves a stack operand and we are using an esp-based frame.
- uint32_t StackOffset = 0;
- // TODO: Consolidate the stack adjustment for function calls by
- // reserving enough space for the arguments only once.
+
+ // Adjust the parameter area so that the stack is aligned. It is
+ // assumed that the stack is already aligned at the start of the
+ // calling sequence.
+ ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
+
+ // Subtract the appropriate amount for the argument area. This also
+ // takes care of setting the stack adjustment during emission.
//
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
- // pre-call push instructions and the post-call esp adjustment get
- // eliminated as well.
- for (OperandList::reverse_iterator I = StackArgs.rbegin(),
- E = StackArgs.rend(); I != E; ++I) {
- Operand *Arg = legalize(*I);
- if (Arg->getType() == IceType_i64) {
- _push(hiOperand(Arg));
- _push(loOperand(Arg));
- } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) {
- // If the Arg turns out to be a memory operand, more than one push
- // instruction is required. This ends up being somewhat clumsy in
- // the current IR, so we use a workaround. Force the operand into
- // a (xmm) register, and then push the register. An xmm register
- // push is actually not possible in x86, but the Push instruction
- // emitter handles this by decrementing the stack pointer and
- // directly writing the xmm register value.
- _push(legalize(Arg, Legal_Reg));
- } else {
- // Otherwise PNaCl requires parameter types to be at least 32-bits.
- assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
- _push(Arg);
- }
- StackOffset += typeWidthInBytesOnStack(Arg->getType());
+ // pre-call and the post-call esp adjustment get eliminated as well.
+ if (ParameterAreaSizeBytes) {
+ _adjust_stack(ParameterAreaSizeBytes);
}
+
+ // Copy arguments that are passed on the stack to the appropriate
+ // stack locations.
+ for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
+ lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
+ // TODO: Consider calling postLower() here to reduce the register
+ // pressure associated with using too many infinite weight
+ // temporaries when lowering the call sequence in -Om1 mode.
+ }
+
// Copy arguments to be passed in registers to the appropriate
// registers.
// TODO: Investigate the impact of lowering arguments passed in
@@ -1700,10 +1784,11 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
- // Add the appropriate offset to esp.
- if (StackOffset) {
+ // Add the appropriate offset to esp. The call instruction takes care
+ // of resetting the stack offset during emission.
+ if (ParameterAreaSizeBytes) {
Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
- _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
+ _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes));
}
// Insert a register-kill pseudo instruction.
@@ -2182,9 +2267,9 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
// Use pshufd and movd/movss.
//
- // ALIGNHACK: Force vector operands to registers in instructions that
- // require aligned memory operands until support for stack alignment
- // is implemented.
+ // ALIGNHACK: Force vector operands to registers in instructions
+ // that require aligned memory operands until support for data
+ // alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
Operand *SourceVectRM =
legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
@@ -2269,8 +2354,8 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
- // ALIGNHACK: Without support for stack alignment, both operands to
- // cmpps need to be forced into registers. Once support for stack
+ // ALIGNHACK: Without support for data alignment, both operands to
+ // cmpps need to be forced into registers. Once support for data
// alignment is implemented, remove LEGAL_HACK.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
switch (Condition) {
@@ -2410,8 +2495,8 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
}
// TODO: ALIGNHACK: Both operands to compare instructions need to be
- // in registers until stack alignment support is implemented. Once
- // there is support for stack alignment, LEGAL_HACK can be removed.
+ // in registers until data alignment support is implemented. Once
+ // there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
Variable *T = makeReg(Ty);
switch (Condition) {
@@ -2631,9 +2716,9 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
- // ALIGNHACK: Force vector operands to registers in instructions that
- // require aligned memory operands until support for stack alignment
- // is implemented.
+ // ALIGNHACK: Force vector operands to registers in instructions
+ // that require aligned memory operands until support for data
+ // alignment is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
if (Index == 1) {
SourceVectRM = ALIGN_HACK(SourceVectRM);
@@ -2921,7 +3006,8 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
}
case Intrinsics::Memset: {
// The value operand needs to be extended to a stack slot size
- // because "push" only works for a specific operand size.
+ // because the PNaCl ABI requires arguments to be at least 32 bits
+ // wide.
Operand *ValOp = Instr->getArg(1);
assert(ValOp->getType() == IceType_i8);
Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode());
@@ -3590,9 +3676,9 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {
Variable *T = makeReg(SrcTy);
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
- // ALIGNHACK: Until stack alignment support is implemented, vector
+ // ALIGNHACK: Until data alignment support is implemented, vector
// instructions need to have vector operands in registers. Once
- // there is support for stack alignment, LEGAL_HACK can be removed.
+ // there is support for data alignment, LEGAL_HACK can be removed.
#define LEGAL_HACK(Vect) legalizeToVar((Vect))
if (InstructionSet >= SSE4_1) {
// TODO(wala): If the condition operand is a constant, use blendps
@@ -3687,13 +3773,16 @@ void TargetX8632::lowerStore(const InstStore *Inst) {
Operand *Value = Inst->getData();
Operand *Addr = Inst->getAddr();
OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
+ Type Ty = NewAddr->getType();
- if (NewAddr->getType() == IceType_i64) {
+ if (Ty == IceType_i64) {
Value = legalize(Value);
Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
_store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
_store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
+ } else if (isVectorType(Ty)) {
+ _storep(legalizeToVar(Value), NewAddr);
} else {
Value = legalize(Value, Legal_Reg | Legal_Imm, true);
_store(Value, NewAddr);
@@ -4036,9 +4125,9 @@ void TargetX8632::postLower() {
llvm::SmallBitVector AvailableTypedRegisters =
AvailableRegisters & getRegisterSetForType(Var->getType());
if (!AvailableTypedRegisters.any()) {
- // This is a hack in case we run out of physical registers
- // due to an excessive number of "push" instructions from
- // lowering a call.
+ // This is a hack in case we run out of physical registers due
+ // to an excessively long code sequence, as might happen when
+ // lowering arguments in lowerCall().
AvailableRegisters = WhiteList;
AvailableTypedRegisters =
AvailableRegisters & getRegisterSetForType(Var->getType());
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/64bit.pnacl.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698