src/IceTargetLoweringX8632.cpp - Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Moves methods in TargetLoweringX8632.cpp so they match TargetLoweringX8664.cpp Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
82 };	82 };

83	83

84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =	84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =

85 llvm::array_lengthof(TableTypeX8632Attributes);	85 llvm::array_lengthof(TableTypeX8632Attributes);

86	86

87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;	87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;

88 const char *MachineTraits<TargetX8632>::TargetName = "X8632";	88 const char *MachineTraits<TargetX8632>::TargetName = "X8632";

89	89

90 } // end of namespace X86Internal	90 } // end of namespace X86Internal

91	91

	92 //------------------------------------------------------------------------------

	93 // __ ______ __ __ ______ ______ __ __ __ ______

	94 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

	95 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

	96 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

	97 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

	98 //

	99 //------------------------------------------------------------------------------

	100 void TargetX8632::lowerCall(const InstCall *Instr) {

	101 // x86-32 calling convention:

	102 //

	103 // * At the point before the call, the stack must be aligned to 16

	104 // bytes.

	105 //

	106 // * The first four arguments of vector type, regardless of their

	107 // position relative to the other arguments in the argument list, are

	108 // placed in registers xmm0 - xmm3.

	109 //

	110 // * Other arguments are pushed onto the stack in right-to-left order,

	111 // such that the left-most argument ends up on the top of the stack at

	112 // the lowest memory address.

	113 //

	114 // * Stack arguments of vector type are aligned to start at the next

	115 // highest multiple of 16 bytes. Other stack arguments are aligned to

	116 // 4 bytes.

	117 //

	118 // This intends to match the section "IA-32 Function Calling

	119 // Convention" of the document "OS X ABI Function Call Guide" by

	120 // Apple.

	121 NeedsStackAlignment = true;

	122

	123 typedef std::vector<Operand *> OperandList;

	124 OperandList XmmArgs;

	125 OperandList StackArgs, StackArgLocations;

	126 uint32_t ParameterAreaSizeBytes = 0;

	127

	128 // Classify each argument operand according to the location where the

	129 // argument is passed.

	130 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

	131 Operand *Arg = Instr->getArg(i);

	132 Type Ty = Arg->getType();

	133 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

	134 assert(typeWidthInBytes(Ty) >= 4);

	135 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

	136 XmmArgs.push_back(Arg);

	137 } else {

	138 StackArgs.push_back(Arg);

	139 if (isVectorType(Arg->getType())) {

	140 ParameterAreaSizeBytes =

	141 Traits::applyStackAlignment(ParameterAreaSizeBytes);

	142 }

	143 Variable *esp =

	144 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	145 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

	146 StackArgLocations.push_back(

	147 Traits::X86OperandMem::create(Func, Ty, esp, Loc));

	148 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

	149 }

	150 }

	151

	152 // Adjust the parameter area so that the stack is aligned. It is

	153 // assumed that the stack is already aligned at the start of the

	154 // calling sequence.

	155 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

	156

	157 // Subtract the appropriate amount for the argument area. This also

	158 // takes care of setting the stack adjustment during emission.

	159 //

	160 // TODO: If for some reason the call instruction gets dead-code

	161 // eliminated after lowering, we would need to ensure that the

	162 // pre-call and the post-call esp adjustment get eliminated as well.

	163 if (ParameterAreaSizeBytes) {

	164 _adjust_stack(ParameterAreaSizeBytes);

	165 }

	166

	167 // Copy arguments that are passed on the stack to the appropriate

	168 // stack locations.

	169 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

	170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

	171 }

	172

	173 // Copy arguments to be passed in registers to the appropriate

	174 // registers.

	175 // TODO: Investigate the impact of lowering arguments passed in

	176 // registers after lowering stack arguments as opposed to the other

	177 // way around. Lowering register arguments after stack arguments may

	178 // reduce register pressure. On the other hand, lowering register

	179 // arguments first (before stack arguments) may result in more compact

	180 // code, as the memory operand displacements may end up being smaller

	181 // before any stack adjustment is done.

	182 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

	183 Variable *Reg =

	184 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);

	185 // Generate a FakeUse of register arguments so that they do not get

	186 // dead code eliminated as a result of the FakeKill of scratch

	187 // registers after the call.

	188 Context.insert(InstFakeUse::create(Func, Reg));

	189 }

	190 // Generate the call instruction. Assign its result to a temporary

	191 // with high register allocation weight.

	192 Variable *Dest = Instr->getDest();

	193 // ReturnReg doubles as ReturnRegLo as necessary.

	194 Variable *ReturnReg = nullptr;

	195 Variable *ReturnRegHi = nullptr;

	196 if (Dest) {

	197 switch (Dest->getType()) {

	198 case IceType_NUM:

	199 llvm_unreachable("Invalid Call dest type");

	200 break;

	201 case IceType_void:
	Jim Stichnoth 2015/08/06 13:49:19 Wasn't there discussion on another CL that IceType Wasn't there discussion on another CL that IceType_void and IceType_NUM should both result in llvm::report_fatal_error() ? John 2015/08/06 14:44:08 There was a comment about it, but I did not unders Show quoted text On 2015/08/06 13:49:19, stichnot wrote: > Wasn't there discussion on another CL that IceType_void and IceType_NUM should > both result in llvm::report_fatal_error() ? There was a comment about it, but I did not understand to be a suggestion/request-for-change. Sorry. Done (also for X8664.)
	202 break;

	203 case IceType_i1:

	204 case IceType_i8:

	205 case IceType_i16:

	206 case IceType_i32:

	207 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

	208 break;

	209 case IceType_i64:

	210 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

	211 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

	212 break;

	213 case IceType_f32:

	214 case IceType_f64:

	215 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

	216 // the fstp instruction.

	217 break;

	218 case IceType_v4i1:

	219 case IceType_v8i1:

	220 case IceType_v16i1:

	221 case IceType_v16i8:

	222 case IceType_v8i16:

	223 case IceType_v4i32:

	224 case IceType_v4f32:

	225 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

	226 break;

	227 }

	228 }

	229 Operand *CallTarget = legalize(Instr->getCallTarget());

	230 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

	231 if (NeedSandboxing) {

	232 if (llvm::isa<Constant>(CallTarget)) {

	233 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

	234 } else {

	235 Variable *CallTargetVar = nullptr;

	236 _mov(CallTargetVar, CallTarget);

	237 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

	238 const SizeT BundleSize =

	239 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

	240 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

	241 CallTarget = CallTargetVar;

	242 }

	243 }

	244 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

	245 Context.insert(NewCall);

	246 if (NeedSandboxing)

	247 _bundle_unlock();

	248 if (ReturnRegHi)

	249 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

	250

	251 // Add the appropriate offset to esp. The call instruction takes care

	252 // of resetting the stack offset during emission.

	253 if (ParameterAreaSizeBytes) {

	254 Variable *esp =

	255 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	256 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

	257 }

	258

	259 // Insert a register-kill pseudo instruction.

	260 Context.insert(InstFakeKill::create(Func, NewCall));

	261

	262 // Generate a FakeUse to keep the call live if necessary.

	263 if (Instr->hasSideEffects() && ReturnReg) {

	264 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

	265 Context.insert(FakeUse);

	266 }

	267

	268 if (!Dest)

	269 return;

	270

	271 // Assign the result of the call to Dest.

	272 if (ReturnReg) {

	273 if (ReturnRegHi) {

	274 assert(Dest->getType() == IceType_i64);

	275 split64(Dest);

	276 Variable *DestLo = Dest->getLo();

	277 Variable *DestHi = Dest->getHi();

	278 _mov(DestLo, ReturnReg);

	279 _mov(DestHi, ReturnRegHi);

	280 } else {

	281 assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|

	282 Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|

	283 isVectorType(Dest->getType()));

	284 if (isVectorType(Dest->getType())) {

	285 _movp(Dest, ReturnReg);

	286 } else {

	287 _mov(Dest, ReturnReg);

	288 }

	289 }

	290 } else if (isScalarFloatingType(Dest->getType())) {

	291 // Special treatment for an FP function which returns its result in

	292 // st(0).

	293 // If Dest ends up being a physical xmm register, the fstp emit code

	294 // will route st(0) through a temporary stack slot.

	295 _fstp(Dest);

	296 // Create a fake use of Dest in case it actually isn't used,

	297 // because st(0) still needs to be popped.

	298 Context.insert(InstFakeUse::create(Func, Dest));

	299 }

	300 }

	301

	302 void TargetX8632::lowerArguments() {

	303 VarList &Args = Func->getArgs();

	304 // The first four arguments of vector type, regardless of their

	305 // position relative to the other arguments in the argument list, are

	306 // passed in registers xmm0 - xmm3.

	307 unsigned NumXmmArgs = 0;

	308

	309 Context.init(Func->getEntryNode());

	310 Context.setInsertPoint(Context.getCur());

	311

	312 for (SizeT I = 0, E = Args.size();

	313 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {

	314 Variable *Arg = Args[I];

	315 Type Ty = Arg->getType();

	316 if (!isVectorType(Ty))

	317 continue;

	318 // Replace Arg in the argument list with the home register. Then

	319 // generate an instruction in the prolog to copy the home register

	320 // to the assigned location of Arg.

	321 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;

	322 ++NumXmmArgs;

	323 Variable *RegisterArg = Func->makeVariable(Ty);

	324 if (BuildDefs::dump())

	325 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

	326 RegisterArg->setRegNum(RegNum);

	327 RegisterArg->setIsArg();

	328 Arg->setIsArg(false);

	329

	330 Args[I] = RegisterArg;

	331 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

	332 }

	333 }

	334

	335 void TargetX8632::lowerRet(const InstRet *Inst) {

	336 Variable *Reg = nullptr;

	337 if (Inst->hasRetValue()) {

	338 Operand *Src0 = legalize(Inst->getRetValue());

	339 // TODO(jpp): this is not needed.

	340 if (Src0->getType() == IceType_i64) {

	341 Variable *eax =

	342 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);

	343 Variable *edx =

	344 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);

	345 Reg = eax;

	346 Context.insert(InstFakeUse::create(Func, edx));

	347 } else if (isScalarFloatingType(Src0->getType())) {

	348 _fld(Src0);

	349 } else if (isVectorType(Src0->getType())) {

	350 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);

	351 } else {

	352 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);

	353 }

	354 }

	355 // Add a ret instruction even if sandboxing is enabled, because

	356 // addEpilog explicitly looks for a ret instruction as a marker for

	357 // where to insert the frame removal instructions.

	358 _ret(Reg);

	359 // Add a fake use of esp to make sure esp stays alive for the entire

	360 // function. Otherwise post-call esp adjustments get dead-code

	361 // eliminated. TODO: Are there more places where the fake use

	362 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

	363 // have a ret instruction.

	364 Variable *esp =

	365 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	366 Context.insert(InstFakeUse::create(Func, esp));

	367 }

	368

	369 void TargetX8632::addProlog(CfgNode *Node) {

	370 // Stack frame layout:

	371 //

	372 // +------------------------+

	373 // \| 1. return address \|

	374 // +------------------------+

	375 // \| 2. preserved registers \|

	376 // +------------------------+

	377 // \| 3. padding \|

	378 // +------------------------+

	379 // \| 4. global spill area \|

	380 // +------------------------+

	381 // \| 5. padding \|

	382 // +------------------------+

	383 // \| 6. local spill area \|

	384 // +------------------------+

	385 // \| 7. padding \|

	386 // +------------------------+

	387 // \| 8. allocas \|

	388 // +------------------------+

	389 //

	390 // The following variables record the size in bytes of the given areas:

	391 // * X86_RET_IP_SIZE_BYTES: area 1

	392 // * PreservedRegsSizeBytes: area 2

	393 // * SpillAreaPaddingBytes: area 3

	394 // * GlobalsSize: area 4

	395 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

	396 // * LocalsSpillAreaSize: area 6

	397 // * SpillAreaSizeBytes: areas 3 - 7

	398

	399 // Determine stack frame offsets for each Variable without a

	400 // register assignment. This can be done as one variable per stack

	401 // slot. Or, do coalescing by running the register allocator again

	402 // with an infinite set of registers (as a side effect, this gives

	403 // variables a second chance at physical register assignment).

	404 //

	405 // A middle ground approach is to leverage sparsity and allocate one

	406 // block of space on the frame for globals (variables with

	407 // multi-block lifetime), and one block to share for locals

	408 // (single-block lifetime).

	409

	410 Context.init(Node);

	411 Context.setInsertPoint(Context.getCur());

	412

	413 llvm::SmallBitVector CalleeSaves =

	414 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	415 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

	416 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;

	417 size_t GlobalsSize = 0;

	418 // If there is a separate locals area, this represents that area.

	419 // Otherwise it counts any variable not counted by GlobalsSize.

	420 SpillAreaSizeBytes = 0;

	421 // If there is a separate locals area, this specifies the alignment

	422 // for it.

	423 uint32_t LocalsSlotsAlignmentBytes = 0;

	424 // The entire spill locations area gets aligned to largest natural

	425 // alignment of the variables that have a spill slot.

	426 uint32_t SpillAreaAlignmentBytes = 0;

	427 // A spill slot linked to a variable with a stack slot should reuse

	428 // that stack slot.

	429 std::function<bool(Variable *)> TargetVarHook =

	430 [&VariablesLinkedToSpillSlots](Variable *Var) {

	431 if (auto *SpillVar =

	432 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {

	433 assert(Var->getWeight().isZero());

	434 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {

	435 VariablesLinkedToSpillSlots.push_back(Var);

	436 return true;

	437 }

	438 }

	439 return false;

	440 };

	441

	442 // Compute the list of spilled variables and bounds for GlobalsSize, etc.

	443 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

	444 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

	445 &LocalsSlotsAlignmentBytes, TargetVarHook);

	446 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

	447 SpillAreaSizeBytes += GlobalsSize;

	448

	449 // Add push instructions for preserved registers.

	450 uint32_t NumCallee = 0;

	451 size_t PreservedRegsSizeBytes = 0;

	452 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	453 if (CalleeSaves[i] && RegsUsed[i]) {

	454 ++NumCallee;

	455 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

	456 _push(getPhysicalRegister(i));

	457 }

	458 }

	459 Ctx->statsUpdateRegistersSaved(NumCallee);

	460

	461 // Generate "push ebp; mov ebp, esp"

	462 if (IsEbpBasedFrame) {

	463 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

	464 .count() == 0);

	465 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

	466 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

	467 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	468 _push(ebp);

	469 _mov(ebp, esp);

	470 // Keep ebp live for late-stage liveness analysis

	471 // (e.g. asm-verbose mode).

	472 Context.insert(InstFakeUse::create(Func, ebp));

	473 }

	474

	475 // Align the variables area. SpillAreaPaddingBytes is the size of

	476 // the region after the preserved registers and before the spill areas.

	477 // LocalsSlotsPaddingBytes is the amount of padding between the globals

	478 // and locals area if they are separate.

	479 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);

	480 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

	481 uint32_t SpillAreaPaddingBytes = 0;

	482 uint32_t LocalsSlotsPaddingBytes = 0;

	483 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

	484 SpillAreaAlignmentBytes, GlobalsSize,

	485 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

	486 &LocalsSlotsPaddingBytes);

	487 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

	488 uint32_t GlobalsAndSubsequentPaddingSize =

	489 GlobalsSize + LocalsSlotsPaddingBytes;

	490

	491 // Align esp if necessary.

	492 if (NeedsStackAlignment) {

	493 uint32_t StackOffset =

	494 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

	495 uint32_t StackSize =

	496 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);

	497 SpillAreaSizeBytes = StackSize - StackOffset;

	498 }

	499

	500 // Generate "sub esp, SpillAreaSizeBytes"

	501 if (SpillAreaSizeBytes)

	502 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),

	503 Ctx->getConstantInt32(SpillAreaSizeBytes));

	504 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

	505

	506 resetStackAdjustment();

	507

	508 // Fill in stack offsets for stack args, and copy args into registers

	509 // for those that were register-allocated. Args are pushed right to

	510 // left, so Arg[0] is closest to the stack/frame pointer.

	511 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

	512 size_t BasicFrameOffset =

	513 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;

	514 if (!IsEbpBasedFrame)

	515 BasicFrameOffset += SpillAreaSizeBytes;

	516

	517 const VarList &Args = Func->getArgs();

	518 size_t InArgsSizeBytes = 0;

	519 unsigned NumXmmArgs = 0;

	520 for (Variable *Arg : Args) {

	521 // Skip arguments passed in registers.

	522 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {

	523 ++NumXmmArgs;

	524 continue;

	525 }

	526 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	527 }

	528

	529 // Fill in stack offsets for locals.

	530 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

	531 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

	532 IsEbpBasedFrame);

	533 // Assign stack offsets to variables that have been linked to spilled

	534 // variables.

	535 for (Variable *Var : VariablesLinkedToSpillSlots) {

	536 Variable *Linked =

	537 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();

	538 Var->setStackOffset(Linked->getStackOffset());

	539 }

	540 this->HasComputedFrame = true;

	541

	542 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {

	543 OstreamLocker L(Func->getContext());

	544 Ostream &Str = Func->getContext()->getStrDump();

	545

	546 Str << "Stack layout:\n";

	547 uint32_t EspAdjustmentPaddingSize =

	548 SpillAreaSizeBytes - LocalsSpillAreaSize -

	549 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

	550 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

	551 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"

	552 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

	553 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

	554 << " globals spill area = " << GlobalsSize << " bytes\n"

	555 << " globals-locals spill areas intermediate padding = "

	556 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

	557 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

	558 << " esp alignment padding = " << EspAdjustmentPaddingSize

	559 << " bytes\n";

	560

	561 Str << "Stack details:\n"

	562 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"

	563 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

	564 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

	565 << " bytes\n"

	566 << " is ebp based = " << IsEbpBasedFrame << "\n";

	567 }

	568 }

	569

	570 void TargetX8632::addEpilog(CfgNode *Node) {

	571 InstList &Insts = Node->getInsts();

	572 InstList::reverse_iterator RI, E;

	573 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

	574 if (llvm::isa<typename Traits::Insts::Ret>(*RI))

	575 break;

	576 }

	577 if (RI == E)

	578 return;

	579

	580 // Convert the reverse_iterator position into its corresponding

	581 // (forward) iterator position.

	582 InstList::iterator InsertPoint = RI.base();

	583 --InsertPoint;

	584 Context.init(Node);

	585 Context.setInsertPoint(InsertPoint);

	586

	587 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	588 if (IsEbpBasedFrame) {

	589 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

	590 // For late-stage liveness analysis (e.g. asm-verbose mode),

	591 // adding a fake use of esp before the assignment of esp=ebp keeps

	592 // previous esp adjustments from being dead-code eliminated.

	593 Context.insert(InstFakeUse::create(Func, esp));

	594 _mov(esp, ebp);

	595 _pop(ebp);

	596 } else {

	597 // add esp, SpillAreaSizeBytes

	598 if (SpillAreaSizeBytes)

	599 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));

	600 }

	601

	602 // Add pop instructions for preserved registers.

	603 llvm::SmallBitVector CalleeSaves =

	604 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	605 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	606 SizeT j = CalleeSaves.size() - i - 1;

	607 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)

	608 continue;

	609 if (CalleeSaves[j] && RegsUsed[j]) {

	610 _pop(getPhysicalRegister(j));

	611 }

	612 }

	613

	614 if (!Ctx->getFlags().getUseSandboxing())

	615 return;

	616 // Change the original ret instruction into a sandboxed return sequence.

	617 // t:ecx = pop

	618 // bundle_lock

	619 // and t, ~31

	620 // jmp *t

	621 // bundle_unlock

	622 // FakeUse <original_ret_operand>

	623 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

	624 _pop(T_ecx);

	625 lowerIndirectJump(T_ecx);

	626 if (RI->getSrcSize()) {

	627 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));

	628 Context.insert(InstFakeUse::create(Func, RetValue));

	629 }

	630 RI->setDeleted();

	631 }

	632

	633 void TargetX8632::emitJumpTable(const Cfg *Func,

	634 const InstJumpTable *JumpTable) const {

	635 if (!BuildDefs::dump())

	636 return;

	637 Ostream &Str = Ctx->getStrEmit();

	638 IceString MangledName = Ctx->mangleName(Func->getFunctionName());

	639 Str << "\t.section\t.rodata." << MangledName

	640 << "$jumptable,\"a\",@progbits\n";

	641 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

	642 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

	643

	644 // On X8632 pointers are 32-bit hence the use of .long

	645 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

	646 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

	647 Str << "\n";

	648 }

	649

92 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)	650 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)

93 : TargetDataLowering(Ctx) {}	651 : TargetDataLowering(Ctx) {}

94	652

95 namespace {	653 namespace {

96 template <typename T> struct PoolTypeConverter {};	654 template <typename T> struct PoolTypeConverter {};

97	655

98 template <> struct PoolTypeConverter<float> {	656 template <> struct PoolTypeConverter<float> {

99 typedef uint32_t PrimitiveIntType;	657 typedef uint32_t PrimitiveIntType;

100 typedef ConstantFloat IceType;	658 typedef ConstantFloat IceType;

101 static const Type Ty = IceType_f32;	659 static const Type Ty = IceType_f32;

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
152 static const Type Ty = IceType_i8;	710 static const Type Ty = IceType_i8;

153 static const char *TypeName;	711 static const char *TypeName;

154 static const char *AsmTag;	712 static const char *AsmTag;

155 static const char *PrintfString;	713 static const char *PrintfString;

156 };	714 };

157 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";	715 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";

158 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";	716 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";

159 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";	717 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";

160 } // end of anonymous namespace	718 } // end of anonymous namespace

161	719

162 void TargetX8632::emitJumpTable(const Cfg *Func,

163 const InstJumpTable *JumpTable) const {

164 if (!BuildDefs::dump())

165 return;

166 Ostream &Str = Ctx->getStrEmit();

167 IceString MangledName = Ctx->mangleName(Func->getFunctionName());

168 Str << "\t.section\t.rodata." << MangledName

169 << "$jumptable,\"a\",@progbits\n";

170 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

171 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

172

173 // On X8632 pointers are 32-bit hence the use of .long

174 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

175 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

176 Str << "\n";

177 }

178

179 template <typename T>	720 template <typename T>

180 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {	721 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {

181 if (!BuildDefs::dump())	722 if (!BuildDefs::dump())

182 return;	723 return;

183 Ostream &Str = Ctx->getStrEmit();	724 Ostream &Str = Ctx->getStrEmit();

184 Type Ty = T::Ty;	725 Type Ty = T::Ty;

185 SizeT Align = typeAlignInBytes(Ty);	726 SizeT Align = typeAlignInBytes(Ty);

186 ConstantList Pool = Ctx->getConstantPool(Ty);	727 ConstantList Pool = Ctx->getConstantPool(Ty);

187	728

188 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align	729 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align

(...skipping 211 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
400 // Repeat the static asserts with respect to the high-level table	941 // Repeat the static asserts with respect to the high-level table

401 // entries in case the high-level table has extra entries.	942 // entries in case the high-level table has extra entries.

402 #define X(tag, sizeLog2, align, elts, elty, str) \	943 #define X(tag, sizeLog2, align, elts, elty, str) \

403 static_assert(_table1_##tag == _table2_##tag, \	944 static_assert(_table1_##tag == _table2_##tag, \

404 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");	945 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

405 ICETYPE_TABLE	946 ICETYPE_TABLE

406 #undef X	947 #undef X

407 } // end of namespace dummy3	948 } // end of namespace dummy3

408 } // end of anonymous namespace	949 } // end of anonymous namespace

409	950

410 //------------------------------------------------------------------------------

411 // __ ______ __ __ ______ ______ __ __ __ ______

412 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

413 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

414 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

415 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

416 //

417 //------------------------------------------------------------------------------

418 void TargetX8632::lowerCall(const InstCall *Instr) {

419 // x86-32 calling convention:

420 //

421 // * At the point before the call, the stack must be aligned to 16

422 // bytes.

423 //

424 // * The first four arguments of vector type, regardless of their

425 // position relative to the other arguments in the argument list, are

426 // placed in registers xmm0 - xmm3.

427 //

428 // * Other arguments are pushed onto the stack in right-to-left order,

429 // such that the left-most argument ends up on the top of the stack at

430 // the lowest memory address.

431 //

432 // * Stack arguments of vector type are aligned to start at the next

433 // highest multiple of 16 bytes. Other stack arguments are aligned to

434 // 4 bytes.

435 //

436 // This intends to match the section "IA-32 Function Calling

437 // Convention" of the document "OS X ABI Function Call Guide" by

438 // Apple.

439 NeedsStackAlignment = true;

440

441 typedef std::vector<Operand *> OperandList;

442 OperandList XmmArgs;

443 OperandList StackArgs, StackArgLocations;

444 uint32_t ParameterAreaSizeBytes = 0;

445

446 // Classify each argument operand according to the location where the

447 // argument is passed.

448 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

449 Operand *Arg = Instr->getArg(i);

450 Type Ty = Arg->getType();

451 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

452 assert(typeWidthInBytes(Ty) >= 4);

453 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

454 XmmArgs.push_back(Arg);

455 } else {

456 StackArgs.push_back(Arg);

457 if (isVectorType(Arg->getType())) {

458 ParameterAreaSizeBytes =

459 Traits::applyStackAlignment(ParameterAreaSizeBytes);

460 }

461 Variable *esp =

462 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

463 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

464 StackArgLocations.push_back(

465 Traits::X86OperandMem::create(Func, Ty, esp, Loc));

466 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

467 }

468 }

469

470 // Adjust the parameter area so that the stack is aligned. It is

471 // assumed that the stack is already aligned at the start of the

472 // calling sequence.

473 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

474

475 // Subtract the appropriate amount for the argument area. This also

476 // takes care of setting the stack adjustment during emission.

477 //

478 // TODO: If for some reason the call instruction gets dead-code

479 // eliminated after lowering, we would need to ensure that the

480 // pre-call and the post-call esp adjustment get eliminated as well.

481 if (ParameterAreaSizeBytes) {

482 _adjust_stack(ParameterAreaSizeBytes);

483 }

484

485 // Copy arguments that are passed on the stack to the appropriate

486 // stack locations.

487 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

488 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

489 }

490

491 // Copy arguments to be passed in registers to the appropriate

492 // registers.

493 // TODO: Investigate the impact of lowering arguments passed in

494 // registers after lowering stack arguments as opposed to the other

495 // way around. Lowering register arguments after stack arguments may

496 // reduce register pressure. On the other hand, lowering register

497 // arguments first (before stack arguments) may result in more compact

498 // code, as the memory operand displacements may end up being smaller

499 // before any stack adjustment is done.

500 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

501 Variable *Reg =

502 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);

503 // Generate a FakeUse of register arguments so that they do not get

504 // dead code eliminated as a result of the FakeKill of scratch

505 // registers after the call.

506 Context.insert(InstFakeUse::create(Func, Reg));

507 }

508 // Generate the call instruction. Assign its result to a temporary

509 // with high register allocation weight.

510 Variable *Dest = Instr->getDest();

511 // ReturnReg doubles as ReturnRegLo as necessary.

512 Variable *ReturnReg = nullptr;

513 Variable *ReturnRegHi = nullptr;

514 if (Dest) {

515 switch (Dest->getType()) {

516 case IceType_NUM:

517 llvm_unreachable("Invalid Call dest type");

518 break;

519 case IceType_void:

520 break;

521 case IceType_i1:

522 case IceType_i8:

523 case IceType_i16:

524 case IceType_i32:

525 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

526 break;

527 case IceType_i64:

528 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

529 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

530 break;

531 case IceType_f32:

532 case IceType_f64:

533 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

534 // the fstp instruction.

535 break;

536 case IceType_v4i1:

537 case IceType_v8i1:

538 case IceType_v16i1:

539 case IceType_v16i8:

540 case IceType_v8i16:

541 case IceType_v4i32:

542 case IceType_v4f32:

543 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

544 break;

545 }

546 }

547 Operand *CallTarget = legalize(Instr->getCallTarget());

548 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

549 if (NeedSandboxing) {

550 if (llvm::isa<Constant>(CallTarget)) {

551 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

552 } else {

553 Variable *CallTargetVar = nullptr;

554 _mov(CallTargetVar, CallTarget);

555 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

556 const SizeT BundleSize =

557 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

558 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

559 CallTarget = CallTargetVar;

560 }

561 }

562 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

563 Context.insert(NewCall);

564 if (NeedSandboxing)

565 _bundle_unlock();

566 if (ReturnRegHi)

567 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

568

569 // Add the appropriate offset to esp. The call instruction takes care

570 // of resetting the stack offset during emission.

571 if (ParameterAreaSizeBytes) {

572 Variable *esp =

573 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

574 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

575 }

576

577 // Insert a register-kill pseudo instruction.

578 Context.insert(InstFakeKill::create(Func, NewCall));

579

580 // Generate a FakeUse to keep the call live if necessary.

581 if (Instr->hasSideEffects() && ReturnReg) {

582 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

583 Context.insert(FakeUse);

584 }

585

586 if (!Dest)

587 return;

588

589 // Assign the result of the call to Dest.

590 if (ReturnReg) {

591 if (ReturnRegHi) {

592 assert(Dest->getType() == IceType_i64);

593 split64(Dest);

594 Variable *DestLo = Dest->getLo();

595 Variable *DestHi = Dest->getHi();

596 _mov(DestLo, ReturnReg);

597 _mov(DestHi, ReturnRegHi);

598 } else {

599 assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|

600 Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|

601 isVectorType(Dest->getType()));

602 if (isVectorType(Dest->getType())) {

603 _movp(Dest, ReturnReg);

604 } else {

605 _mov(Dest, ReturnReg);

606 }

607 }

608 } else if (isScalarFloatingType(Dest->getType())) {

609 // Special treatment for an FP function which returns its result in

610 // st(0).

611 // If Dest ends up being a physical xmm register, the fstp emit code

612 // will route st(0) through a temporary stack slot.

613 _fstp(Dest);

614 // Create a fake use of Dest in case it actually isn't used,

615 // because st(0) still needs to be popped.

616 Context.insert(InstFakeUse::create(Func, Dest));

617 }

618 }

619

620 } // end of namespace Ice	951 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | src/IceTargetLoweringX86BaseImpl.h » ('J')