Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(17)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 372113005: Add support for passing and returning vectors in accordance with the x86 calling convention. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 7630d37624bdd787fcc3f6ce7b2b69913c4fb210..e322c68b8a990a1c032f0b0aa51f1bbb6c4a6956 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -412,34 +412,76 @@ void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const {
Str << "]";
}
-// Helper function for addProlog(). Sets the frame offset for Arg,
-// updates InArgsSizeBytes according to Arg's width, and generates an
-// instruction to copy Arg into its assigned register if applicable.
-// For an I64 arg that has been split into Lo and Hi components, it
-// calls itself recursively on the components, taking care to handle
-// Lo first because of the little-endian architecture.
-void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
- size_t BasicFrameOffset,
- size_t &InArgsSizeBytes) {
- Variable *Lo = Arg->getLo();
- Variable *Hi = Arg->getHi();
- Type Ty = Arg->getType();
- if (Lo && Hi && Ty == IceType_i64) {
- assert(Lo->getType() != IceType_i64); // don't want infinite recursion
- assert(Hi->getType() != IceType_i64); // don't want infinite recursion
- setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
- setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
- return;
+// Classify each argument as stack or register. Add a hint to the
Jim Stichnoth 2014/07/08 18:03:15 As we discussed in person, I think a simpler appro
wala 2014/07/08 22:38:57 Done.
+// register allocator that register arguments should go in their home
+// registers.
+void TargetX8632::lowerArguments() {
+ const VarList &Args = Func->getArgs();
+ unsigned NumXmmArgs = 0;
+
+ for (VarList::const_iterator I = Args.begin(), E = Args.end(); I != E; ++I) {
+ Variable *Arg = *I;
+ // The first four vector arguments go in xmm0 - xmm3.
+ if (isVectorType(Arg->getType()) && NumXmmArgs < 4) {
+ Arg->setArgLoc(Variable::RegisterArgLoc);
+ Variable *HomeReg = makeReg(Arg->getType(), Reg_xmm0 + NumXmmArgs);
+ Arg->setPreferredRegister(HomeReg, false);
+ Arg->setHomeRegister(HomeReg);
+ ++NumXmmArgs;
+ } else {
+ Arg->setArgLoc(Variable::StackArgLoc);
+ }
}
- Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
- if (Arg->hasReg()) {
- assert(Ty != IceType_i64);
- OperandX8632Mem *Mem = OperandX8632Mem::create(
- Func, Ty, FramePtr,
- Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
- _mov(Arg, Mem);
+}
+
+// Helper function for addProlog().
+//
+// If Arg is an argument passed on the stack, this sets the frame offset
+// for Arg and updates InArgsSizeBytes according to Arg's width. For an
+// I64 arg that has been split into Lo and Hi components, it calls
+// itself recursively on the components, taking care to handle Lo first
+// because of the little-endian architecture. Lastly, this function
+// generates an instruction to copy Arg into its assigned register if
+// applicable.
+//
+// If Arg is an argument passed in a register, this generates
+// instructions to copy Arg from its home register into its assigned
+// location.
+void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
+ size_t BasicFrameOffset,
+ size_t &InArgsSizeBytes) {
+ if (Arg->getArgLoc() == Variable::StackArgLoc) {
+ Variable *Lo = Arg->getLo();
+ Variable *Hi = Arg->getHi();
+ Type Ty = Arg->getType();
+ if (Lo && Hi && Ty == IceType_i64) {
+ assert(Lo->getType() != IceType_i64); // don't want infinite recursion
+ assert(Hi->getType() != IceType_i64); // don't want infinite recursion
+ finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
+ finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
+ return;
+ }
+ Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
+ InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
+ if (Arg->hasReg()) {
+ assert(Ty != IceType_i64);
+ OperandX8632Mem *Mem = OperandX8632Mem::create(
+ Func, Ty, FramePtr,
+ Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
+ if (isVectorType(Arg->getType())) {
+ _movp(Arg, Mem);
+ } else {
+ _mov(Arg, Mem);
+ }
+ }
+ } else if (Arg->getArgLoc() == Variable::RegisterArgLoc) {
+ // Make sure that Arg has a location before emitting the copy
+ // instruction. When using computed live ranges, Arg is not
+ // assigned any location if its live range is empty.
+ if (!ComputedLiveRanges || !Arg->getLiveRange().isEmpty()) {
+ _movp(Arg, Arg->getHomeRegister());
+ }
}
- InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
}
Type TargetX8632::stackSlotType() { return IceType_i32; }
@@ -490,7 +532,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
continue;
}
// An argument passed on the stack already has a stack slot.
- if (Var->getIsArg())
+ if (Var->getArgLoc() == Variable::StackArgLoc)
continue;
// An unreferenced variable doesn't need a stack slot.
if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
@@ -547,23 +589,19 @@ void TargetX8632::addProlog(CfgNode *Node) {
resetStackAdjustment();
- // Fill in stack offsets for args, and copy args into registers for
- // those that were register-allocated. Args are pushed right to
+ // Fill in stack offsets for stack args, and copy args into registers
+ // for those that were register-allocated. Args are pushed right to
// left, so Arg[0] is closest to the stack/frame pointer.
//
- // TODO: Make this right for different width args, calling
- // conventions, etc. For one thing, args passed in registers will
- // need to be copied/shuffled to their home registers (the
- // RegManager code may have some permutation logic to leverage),
- // and if they have no home register, home space will need to be
- // allocated on the stack to copy into.
+ // Args passed in registers will need to be copied / shuffled into
+ // their home locations.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
if (!IsEbpBasedFrame)
BasicFrameOffset += LocalsSizeBytes;
for (SizeT i = 0; i < Args.size(); ++i) {
Variable *Arg = Args[i];
- setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
+ finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
}
// Fill in stack offsets for locals.
@@ -578,7 +616,7 @@ void TargetX8632::addProlog(CfgNode *Node) {
RegsUsed[Var->getRegNum()] = true;
continue;
}
- if (Var->getIsArg())
+ if (Var->getArgLoc() == Variable::StackArgLoc)
continue;
if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
continue;
@@ -754,7 +792,9 @@ void TargetX8632::split64(Variable *Var) {
Var->setLoHi(Lo, Hi);
if (Var->getIsArg()) {
Lo->setIsArg(Func);
+ Lo->setArgLoc(Var->getArgLoc());
Hi->setIsArg(Func);
+ Hi->setArgLoc(Var->getArgLoc());
}
}
@@ -1269,16 +1309,21 @@ void TargetX8632::lowerBr(const InstBr *Inst) {
}
void TargetX8632::lowerCall(const InstCall *Instr) {
- // Generate a sequence of push instructions, pushing right to left,
- // keeping track of stack offsets in case a push involves a stack
- // operand and we are using an esp-based frame.
+ // For stack arguments, generate a sequence of push instructions,
+ // pushing right to left, keeping track of stack offsets in case a
+ // push involves a stack operand and we are using an esp-based frame.
uint32_t StackOffset = 0;
+ // Keep track of the number of xmm registers that get used to pass
+ // arguments.
+ unsigned NumXmmArgs = 0;
+ VarList RegisterArgs;
// TODO: If for some reason the call instruction gets dead-code
// eliminated after lowering, we would need to ensure that the
// pre-call push instructions and the post-call esp adjustment get
// eliminated as well.
for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
+ bool ArgInRegister = false;
if (Arg->getType() == IceType_i64) {
_push(hiOperand(Arg));
_push(loOperand(Arg));
@@ -1294,18 +1339,43 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
Variable *T = NULL;
_mov(T, Arg);
_push(T);
+ } else if (isVectorType(Arg->getType())) {
+ if (NumXmmArgs < 4) {
+ Variable *Reg = legalizeToVar(Arg, false, Reg_xmm0 + NumXmmArgs);
+ ++NumXmmArgs;
+ ArgInRegister = true;
+ RegisterArgs.push_back(Reg);
+ } else {
+ // sub esp, 16
+ // movups [esp], legalize_to_reg(Arg)
+ Variable *esp = getPhysicalRegister(Reg_esp);
+ _sub(esp, Ctx->getConstantInt(IceType_i8, 16));
Jim Stichnoth 2014/07/08 18:03:15 Use a symbolic constant instead of 16. (And reuse
wala 2014/07/08 22:38:58 Done, but by using the argument width directly wit
+ Constant *Zero = Ctx->getConstantZero(IceType_i8);
+ OperandX8632Mem *Dest =
+ OperandX8632Mem::create(Func, Arg->getType(), esp, Zero);
+ _storep(legalize(Arg, Legal_Reg), Dest);
+ }
} else {
// Otherwise PNaCl requires parameter types to be at least 32-bits.
assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
_push(Arg);
}
- StackOffset += typeWidthInBytesOnStack(Arg->getType());
+ if (!ArgInRegister) {
+ StackOffset += typeWidthInBytesOnStack(Arg->getType());
+ }
+ }
+ // Generate a FakeUse of all register arguments so that they do not
+ // get dead code eliminated.
Jim Stichnoth 2014/07/08 18:03:15 Mention in the comment that the dead code eliminat
wala 2014/07/08 22:38:57 Done.
+ for (VarList::const_iterator I = RegisterArgs.begin(), E = RegisterArgs.end();
+ I != E; ++I) {
+ Context.insert(InstFakeUse::create(Func, *I));
}
// Generate the call instruction. Assign its result to a temporary
// with high register allocation weight.
Variable *Dest = Instr->getDest();
Variable *eax = NULL; // doubles as RegLo as necessary
Variable *edx = NULL;
+ Variable *xmm0 = NULL;
Jim Stichnoth 2014/07/08 18:03:15 Can you rename variable eax (above) to something l
wala 2014/07/08 22:38:57 Done.
if (Dest) {
switch (Dest->getType()) {
case IceType_NUM:
@@ -1334,21 +1404,15 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
case IceType_v16i8:
case IceType_v8i16:
case IceType_v4i32:
- case IceType_v4f32: {
- // TODO(wala): Handle return values of vector type in the caller.
- IceString Ty;
- llvm::raw_string_ostream BaseOS(Ty);
- Ostream OS(&BaseOS);
- OS << Dest->getType();
- Func->setError("Unhandled dest type: " + BaseOS.str());
- return;
- }
+ case IceType_v4f32:
+ xmm0 = makeReg(Dest->getType(), Reg_xmm0);
+ break;
}
}
// TODO(stichnot): LEAHACK: remove Legal_All (and use default) once
// a proper emitter is used.
Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
- Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
+ Inst *NewCall = InstX8632Call::create(Func, eax ? eax : xmm0, CallTarget);
Context.insert(NewCall);
if (edx)
Context.insert(InstFakeDef::create(Func, edx));
@@ -1368,13 +1432,17 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
// Generate a FakeUse to keep the call live if necessary.
- if (Instr->hasSideEffects() && eax) {
- Inst *FakeUse = InstFakeUse::create(Func, eax);
+ if (Instr->hasSideEffects() && (eax || xmm0)) {
+ Inst *FakeUse = InstFakeUse::create(Func, eax ? eax : xmm0);
Context.insert(FakeUse);
}
- // Generate Dest=eax assignment.
- if (Dest && eax) {
+ if (!Dest) {
+ return;
+ }
+
+ // Assign the result of the call to Dest.
+ if (eax) {
if (edx) {
split64(Dest);
Variable *DestLo = Dest->getLo();
@@ -1387,15 +1455,15 @@ void TargetX8632::lowerCall(const InstCall *Instr) {
Dest->setPreferredRegister(eax, false);
_mov(Dest, eax);
}
- }
-
- // Special treatment for an FP function which returns its result in
- // st(0).
- if (Dest &&
- (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) {
+ } else if (xmm0) {
+ Dest->setPreferredRegister(xmm0, false);
+ _movp(Dest, xmm0);
+ } else if (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64) {
+ // Special treatment for an FP function which returns its result in
+ // st(0).
_fstp(Dest);
- // If Dest ends up being a physical xmm register, the fstp emit
- // code will route st(0) through a temporary stack slot.
+ // If Dest ends up being a physical xmm register, the fstp emit code
+ // will route st(0) through a temporary stack slot.
}
}

Powered by Google App Engine
This is Rietveld 408576698