Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 7630d37624bdd787fcc3f6ce7b2b69913c4fb210..e322c68b8a990a1c032f0b0aa51f1bbb6c4a6956 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -412,34 +412,76 @@ void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const { |
Str << "]"; |
} |
-// Helper function for addProlog(). Sets the frame offset for Arg, |
-// updates InArgsSizeBytes according to Arg's width, and generates an |
-// instruction to copy Arg into its assigned register if applicable. |
-// For an I64 arg that has been split into Lo and Hi components, it |
-// calls itself recursively on the components, taking care to handle |
-// Lo first because of the little-endian architecture. |
-void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr, |
- size_t BasicFrameOffset, |
- size_t &InArgsSizeBytes) { |
- Variable *Lo = Arg->getLo(); |
- Variable *Hi = Arg->getHi(); |
- Type Ty = Arg->getType(); |
- if (Lo && Hi && Ty == IceType_i64) { |
- assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
- assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
- setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
- setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
- return; |
+// Classify each argument as stack or register. Add a hint to the |
Jim Stichnoth
2014/07/08 18:03:15
As we discussed in person, I think a simpler appro
wala
2014/07/08 22:38:57
Done.
|
+// register allocator that register arguments should go in their home |
+// registers. |
+void TargetX8632::lowerArguments() { |
+ const VarList &Args = Func->getArgs(); |
+ unsigned NumXmmArgs = 0; |
+ |
+ for (VarList::const_iterator I = Args.begin(), E = Args.end(); I != E; ++I) { |
+ Variable *Arg = *I; |
+ // The first four vector arguments go in xmm0 - xmm3. |
+ if (isVectorType(Arg->getType()) && NumXmmArgs < 4) { |
+ Arg->setArgLoc(Variable::RegisterArgLoc); |
+ Variable *HomeReg = makeReg(Arg->getType(), Reg_xmm0 + NumXmmArgs); |
+ Arg->setPreferredRegister(HomeReg, false); |
+ Arg->setHomeRegister(HomeReg); |
+ ++NumXmmArgs; |
+ } else { |
+ Arg->setArgLoc(Variable::StackArgLoc); |
+ } |
} |
- Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
- if (Arg->hasReg()) { |
- assert(Ty != IceType_i64); |
- OperandX8632Mem *Mem = OperandX8632Mem::create( |
- Func, Ty, FramePtr, |
- Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); |
- _mov(Arg, Mem); |
+} |
+ |
+// Helper function for addProlog(). |
+// |
+// If Arg is an argument passed on the stack, this sets the frame offset |
+// for Arg and updates InArgsSizeBytes according to Arg's width. For an |
+// I64 arg that has been split into Lo and Hi components, it calls |
+// itself recursively on the components, taking care to handle Lo first |
+// because of the little-endian architecture. Lastly, this function |
+// generates an instruction to copy Arg into its assigned register if |
+// applicable. |
+// |
+// If Arg is an argument passed in a register, this generates |
+// instructions to copy Arg from its home register into its assigned |
+// location. |
+void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
+ size_t BasicFrameOffset, |
+ size_t &InArgsSizeBytes) { |
+ if (Arg->getArgLoc() == Variable::StackArgLoc) { |
+ Variable *Lo = Arg->getLo(); |
+ Variable *Hi = Arg->getHi(); |
+ Type Ty = Arg->getType(); |
+ if (Lo && Hi && Ty == IceType_i64) { |
+ assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
+ assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
+ finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
+ finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
+ return; |
+ } |
+ Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
+ InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
+ if (Arg->hasReg()) { |
+ assert(Ty != IceType_i64); |
+ OperandX8632Mem *Mem = OperandX8632Mem::create( |
+ Func, Ty, FramePtr, |
+ Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); |
+ if (isVectorType(Arg->getType())) { |
+ _movp(Arg, Mem); |
+ } else { |
+ _mov(Arg, Mem); |
+ } |
+ } |
+ } else if (Arg->getArgLoc() == Variable::RegisterArgLoc) { |
+ // Make sure that Arg has a location before emitting the copy |
+ // instruction. When using computed live ranges, Arg is not |
+ // assigned any location if its live range is empty. |
+ if (!ComputedLiveRanges || !Arg->getLiveRange().isEmpty()) { |
+ _movp(Arg, Arg->getHomeRegister()); |
+ } |
} |
- InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
} |
Type TargetX8632::stackSlotType() { return IceType_i32; } |
@@ -490,7 +532,7 @@ void TargetX8632::addProlog(CfgNode *Node) { |
continue; |
} |
// An argument passed on the stack already has a stack slot. |
- if (Var->getIsArg()) |
+ if (Var->getArgLoc() == Variable::StackArgLoc) |
continue; |
// An unreferenced variable doesn't need a stack slot. |
if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
@@ -547,23 +589,19 @@ void TargetX8632::addProlog(CfgNode *Node) { |
resetStackAdjustment(); |
- // Fill in stack offsets for args, and copy args into registers for |
- // those that were register-allocated. Args are pushed right to |
+ // Fill in stack offsets for stack args, and copy args into registers |
+ // for those that were register-allocated. Args are pushed right to |
// left, so Arg[0] is closest to the stack/frame pointer. |
// |
- // TODO: Make this right for different width args, calling |
- // conventions, etc. For one thing, args passed in registers will |
- // need to be copied/shuffled to their home registers (the |
- // RegManager code may have some permutation logic to leverage), |
- // and if they have no home register, home space will need to be |
- // allocated on the stack to copy into. |
+ // Args passed in registers will need to be copied / shuffled into |
+ // their home locations. |
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; |
if (!IsEbpBasedFrame) |
BasicFrameOffset += LocalsSizeBytes; |
for (SizeT i = 0; i < Args.size(); ++i) { |
Variable *Arg = Args[i]; |
- setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
+ finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
} |
// Fill in stack offsets for locals. |
@@ -578,7 +616,7 @@ void TargetX8632::addProlog(CfgNode *Node) { |
RegsUsed[Var->getRegNum()] = true; |
continue; |
} |
- if (Var->getIsArg()) |
+ if (Var->getArgLoc() == Variable::StackArgLoc) |
continue; |
if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
continue; |
@@ -754,7 +792,9 @@ void TargetX8632::split64(Variable *Var) { |
Var->setLoHi(Lo, Hi); |
if (Var->getIsArg()) { |
Lo->setIsArg(Func); |
+ Lo->setArgLoc(Var->getArgLoc()); |
Hi->setIsArg(Func); |
+ Hi->setArgLoc(Var->getArgLoc()); |
} |
} |
@@ -1269,16 +1309,21 @@ void TargetX8632::lowerBr(const InstBr *Inst) { |
} |
void TargetX8632::lowerCall(const InstCall *Instr) { |
- // Generate a sequence of push instructions, pushing right to left, |
- // keeping track of stack offsets in case a push involves a stack |
- // operand and we are using an esp-based frame. |
+ // For stack arguments, generate a sequence of push instructions, |
+ // pushing right to left, keeping track of stack offsets in case a |
+ // push involves a stack operand and we are using an esp-based frame. |
uint32_t StackOffset = 0; |
+ // Keep track of the number of xmm registers that get used to pass |
+ // arguments. |
+ unsigned NumXmmArgs = 0; |
+ VarList RegisterArgs; |
// TODO: If for some reason the call instruction gets dead-code |
// eliminated after lowering, we would need to ensure that the |
// pre-call push instructions and the post-call esp adjustment get |
// eliminated as well. |
for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) { |
Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1)); |
+ bool ArgInRegister = false; |
if (Arg->getType() == IceType_i64) { |
_push(hiOperand(Arg)); |
_push(loOperand(Arg)); |
@@ -1294,18 +1339,43 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
Variable *T = NULL; |
_mov(T, Arg); |
_push(T); |
+ } else if (isVectorType(Arg->getType())) { |
+ if (NumXmmArgs < 4) { |
+ Variable *Reg = legalizeToVar(Arg, false, Reg_xmm0 + NumXmmArgs); |
+ ++NumXmmArgs; |
+ ArgInRegister = true; |
+ RegisterArgs.push_back(Reg); |
+ } else { |
+ // sub esp, 16 |
+ // movups [esp], legalize_to_reg(Arg) |
+ Variable *esp = getPhysicalRegister(Reg_esp); |
+ _sub(esp, Ctx->getConstantInt(IceType_i8, 16)); |
Jim Stichnoth
2014/07/08 18:03:15
Use a symbolic constant instead of 16. (And reuse
wala
2014/07/08 22:38:58
Done, but by using the argument width directly wit
|
+ Constant *Zero = Ctx->getConstantZero(IceType_i8); |
+ OperandX8632Mem *Dest = |
+ OperandX8632Mem::create(Func, Arg->getType(), esp, Zero); |
+ _storep(legalize(Arg, Legal_Reg), Dest); |
+ } |
} else { |
// Otherwise PNaCl requires parameter types to be at least 32-bits. |
assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32); |
_push(Arg); |
} |
- StackOffset += typeWidthInBytesOnStack(Arg->getType()); |
+ if (!ArgInRegister) { |
+ StackOffset += typeWidthInBytesOnStack(Arg->getType()); |
+ } |
+ } |
+ // Generate a FakeUse of all register arguments so that they do not |
+ // get dead code eliminated. |
Jim Stichnoth
2014/07/08 18:03:15
Mention in the comment that the dead code eliminat
wala
2014/07/08 22:38:57
Done.
|
+ for (VarList::const_iterator I = RegisterArgs.begin(), E = RegisterArgs.end(); |
+ I != E; ++I) { |
+ Context.insert(InstFakeUse::create(Func, *I)); |
} |
// Generate the call instruction. Assign its result to a temporary |
// with high register allocation weight. |
Variable *Dest = Instr->getDest(); |
Variable *eax = NULL; // doubles as RegLo as necessary |
Variable *edx = NULL; |
+ Variable *xmm0 = NULL; |
Jim Stichnoth
2014/07/08 18:03:15
Can you rename variable eax (above) to something l
wala
2014/07/08 22:38:57
Done.
|
if (Dest) { |
switch (Dest->getType()) { |
case IceType_NUM: |
@@ -1334,21 +1404,15 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
case IceType_v16i8: |
case IceType_v8i16: |
case IceType_v4i32: |
- case IceType_v4f32: { |
- // TODO(wala): Handle return values of vector type in the caller. |
- IceString Ty; |
- llvm::raw_string_ostream BaseOS(Ty); |
- Ostream OS(&BaseOS); |
- OS << Dest->getType(); |
- Func->setError("Unhandled dest type: " + BaseOS.str()); |
- return; |
- } |
+ case IceType_v4f32: |
+ xmm0 = makeReg(Dest->getType(), Reg_xmm0); |
+ break; |
} |
} |
// TODO(stichnot): LEAHACK: remove Legal_All (and use default) once |
// a proper emitter is used. |
Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); |
- Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget); |
+ Inst *NewCall = InstX8632Call::create(Func, eax ? eax : xmm0, CallTarget); |
Context.insert(NewCall); |
if (edx) |
Context.insert(InstFakeDef::create(Func, edx)); |
@@ -1368,13 +1432,17 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); |
// Generate a FakeUse to keep the call live if necessary. |
- if (Instr->hasSideEffects() && eax) { |
- Inst *FakeUse = InstFakeUse::create(Func, eax); |
+ if (Instr->hasSideEffects() && (eax || xmm0)) { |
+ Inst *FakeUse = InstFakeUse::create(Func, eax ? eax : xmm0); |
Context.insert(FakeUse); |
} |
- // Generate Dest=eax assignment. |
- if (Dest && eax) { |
+ if (!Dest) { |
+ return; |
+ } |
+ |
+ // Assign the result of the call to Dest. |
+ if (eax) { |
if (edx) { |
split64(Dest); |
Variable *DestLo = Dest->getLo(); |
@@ -1387,15 +1455,15 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
Dest->setPreferredRegister(eax, false); |
_mov(Dest, eax); |
} |
- } |
- |
- // Special treatment for an FP function which returns its result in |
- // st(0). |
- if (Dest && |
- (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) { |
+ } else if (xmm0) { |
+ Dest->setPreferredRegister(xmm0, false); |
+ _movp(Dest, xmm0); |
+ } else if (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64) { |
+ // Special treatment for an FP function which returns its result in |
+ // st(0). |
_fstp(Dest); |
- // If Dest ends up being a physical xmm register, the fstp emit |
- // code will route st(0) through a temporary stack slot. |
+ // If Dest ends up being a physical xmm register, the fstp emit code |
+ // will route st(0) through a temporary stack slot. |
} |
} |