src/IceTargetLoweringX8632.cpp - Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1261383002: Subzero. Moves code around in preparations for 64-bit lowering. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments. Created 5 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 ///	9 ///

10 /// \file	10 /// \file

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
82 };	82 };

83	83

84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =	84 const size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =

85 llvm::array_lengthof(TableTypeX8632Attributes);	85 llvm::array_lengthof(TableTypeX8632Attributes);

86	86

87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;	87 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;

88 const char *MachineTraits<TargetX8632>::TargetName = "X8632";	88 const char *MachineTraits<TargetX8632>::TargetName = "X8632";

89	89

90 } // end of namespace X86Internal	90 } // end of namespace X86Internal

91	91

	92 //------------------------------------------------------------------------------

	93 // __ ______ __ __ ______ ______ __ __ __ ______

	94 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

	95 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

	96 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

	97 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

	98 //

	99 //------------------------------------------------------------------------------

	100 void TargetX8632::lowerCall(const InstCall *Instr) {

	101 // x86-32 calling convention:

	102 //

	103 // * At the point before the call, the stack must be aligned to 16

	104 // bytes.

	105 //

	106 // * The first four arguments of vector type, regardless of their

	107 // position relative to the other arguments in the argument list, are

	108 // placed in registers xmm0 - xmm3.

	109 //

	110 // * Other arguments are pushed onto the stack in right-to-left order,

	111 // such that the left-most argument ends up on the top of the stack at

	112 // the lowest memory address.

	113 //

	114 // * Stack arguments of vector type are aligned to start at the next

	115 // highest multiple of 16 bytes. Other stack arguments are aligned to

	116 // 4 bytes.

	117 //

	118 // This intends to match the section "IA-32 Function Calling

	119 // Convention" of the document "OS X ABI Function Call Guide" by

	120 // Apple.

	121 NeedsStackAlignment = true;

	122

	123 typedef std::vector<Operand *> OperandList;

	124 OperandList XmmArgs;

	125 OperandList StackArgs, StackArgLocations;

	126 uint32_t ParameterAreaSizeBytes = 0;

	127

	128 // Classify each argument operand according to the location where the

	129 // argument is passed.

	130 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

	131 Operand *Arg = Instr->getArg(i);

	132 Type Ty = Arg->getType();

	133 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

	134 assert(typeWidthInBytes(Ty) >= 4);

	135 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

	136 XmmArgs.push_back(Arg);

	137 } else {

	138 StackArgs.push_back(Arg);

	139 if (isVectorType(Arg->getType())) {

	140 ParameterAreaSizeBytes =

	141 Traits::applyStackAlignment(ParameterAreaSizeBytes);

	142 }

	143 Variable *esp =

	144 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	145 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

	146 StackArgLocations.push_back(

	147 Traits::X86OperandMem::create(Func, Ty, esp, Loc));

	148 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

	149 }

	150 }

	151

	152 // Adjust the parameter area so that the stack is aligned. It is

	153 // assumed that the stack is already aligned at the start of the

	154 // calling sequence.

	155 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

	156

	157 // Subtract the appropriate amount for the argument area. This also

	158 // takes care of setting the stack adjustment during emission.

	159 //

	160 // TODO: If for some reason the call instruction gets dead-code

	161 // eliminated after lowering, we would need to ensure that the

	162 // pre-call and the post-call esp adjustment get eliminated as well.

	163 if (ParameterAreaSizeBytes) {

	164 _adjust_stack(ParameterAreaSizeBytes);

	165 }

	166

	167 // Copy arguments that are passed on the stack to the appropriate

	168 // stack locations.

	169 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

	170 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

	171 }

	172

	173 // Copy arguments to be passed in registers to the appropriate

	174 // registers.

	175 // TODO: Investigate the impact of lowering arguments passed in

	176 // registers after lowering stack arguments as opposed to the other

	177 // way around. Lowering register arguments after stack arguments may

	178 // reduce register pressure. On the other hand, lowering register

	179 // arguments first (before stack arguments) may result in more compact

	180 // code, as the memory operand displacements may end up being smaller

	181 // before any stack adjustment is done.

	182 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

	183 Variable *Reg =

	184 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);

	185 // Generate a FakeUse of register arguments so that they do not get

	186 // dead code eliminated as a result of the FakeKill of scratch

	187 // registers after the call.

	188 Context.insert(InstFakeUse::create(Func, Reg));

	189 }

	190 // Generate the call instruction. Assign its result to a temporary

	191 // with high register allocation weight.

	192 Variable *Dest = Instr->getDest();

	193 // ReturnReg doubles as ReturnRegLo as necessary.

	194 Variable *ReturnReg = nullptr;

	195 Variable *ReturnRegHi = nullptr;

	196 if (Dest) {

	197 switch (Dest->getType()) {

	198 case IceType_NUM:

	199 case IceType_void:

	200 llvm::report_fatal_error("Invalid Call dest type");

	201 break;

	202 case IceType_i1:

	203 case IceType_i8:

	204 case IceType_i16:

	205 case IceType_i32:

	206 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

	207 break;

	208 case IceType_i64:

	209 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

	210 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

	211 break;

	212 case IceType_f32:

	213 case IceType_f64:

	214 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

	215 // the fstp instruction.

	216 break;

	217 case IceType_v4i1:

	218 case IceType_v8i1:

	219 case IceType_v16i1:

	220 case IceType_v16i8:

	221 case IceType_v8i16:

	222 case IceType_v4i32:

	223 case IceType_v4f32:

	224 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

	225 break;

	226 }

	227 }

	228 Operand *CallTarget = legalize(Instr->getCallTarget());

	229 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

	230 if (NeedSandboxing) {

	231 if (llvm::isa<Constant>(CallTarget)) {

	232 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

	233 } else {

	234 Variable *CallTargetVar = nullptr;

	235 _mov(CallTargetVar, CallTarget);

	236 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

	237 const SizeT BundleSize =

	238 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

	239 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

	240 CallTarget = CallTargetVar;

	241 }

	242 }

	243 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

	244 Context.insert(NewCall);

	245 if (NeedSandboxing)

	246 _bundle_unlock();

	247 if (ReturnRegHi)

	248 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

	249

	250 // Add the appropriate offset to esp. The call instruction takes care

	251 // of resetting the stack offset during emission.

	252 if (ParameterAreaSizeBytes) {

	253 Variable *esp =

	254 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	255 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

	256 }

	257

	258 // Insert a register-kill pseudo instruction.

	259 Context.insert(InstFakeKill::create(Func, NewCall));

	260

	261 // Generate a FakeUse to keep the call live if necessary.

	262 if (Instr->hasSideEffects() && ReturnReg) {

	263 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

	264 Context.insert(FakeUse);

	265 }

	266

	267 if (!Dest)

	268 return;

	269

	270 // Assign the result of the call to Dest.

	271 if (ReturnReg) {

	272 if (ReturnRegHi) {

	273 assert(Dest->getType() == IceType_i64);

	274 split64(Dest);

	275 Variable *DestLo = Dest->getLo();

	276 Variable *DestHi = Dest->getHi();

	277 _mov(DestLo, ReturnReg);

	278 _mov(DestHi, ReturnRegHi);

	279 } else {

	280 assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|

	281 Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|

	282 isVectorType(Dest->getType()));

	283 if (isVectorType(Dest->getType())) {

	284 _movp(Dest, ReturnReg);

	285 } else {

	286 _mov(Dest, ReturnReg);

	287 }

	288 }

	289 } else if (isScalarFloatingType(Dest->getType())) {

	290 // Special treatment for an FP function which returns its result in

	291 // st(0).

	292 // If Dest ends up being a physical xmm register, the fstp emit code

	293 // will route st(0) through a temporary stack slot.

	294 _fstp(Dest);

	295 // Create a fake use of Dest in case it actually isn't used,

	296 // because st(0) still needs to be popped.

	297 Context.insert(InstFakeUse::create(Func, Dest));

	298 }

	299 }

	300

	301 void TargetX8632::lowerArguments() {

	302 VarList &Args = Func->getArgs();

	303 // The first four arguments of vector type, regardless of their

	304 // position relative to the other arguments in the argument list, are

	305 // passed in registers xmm0 - xmm3.

	306 unsigned NumXmmArgs = 0;

	307

	308 Context.init(Func->getEntryNode());

	309 Context.setInsertPoint(Context.getCur());

	310

	311 for (SizeT I = 0, E = Args.size();

	312 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {

	313 Variable *Arg = Args[I];

	314 Type Ty = Arg->getType();

	315 if (!isVectorType(Ty))

	316 continue;

	317 // Replace Arg in the argument list with the home register. Then

	318 // generate an instruction in the prolog to copy the home register

	319 // to the assigned location of Arg.

	320 int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs;

	321 ++NumXmmArgs;

	322 Variable *RegisterArg = Func->makeVariable(Ty);

	323 if (BuildDefs::dump())

	324 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

	325 RegisterArg->setRegNum(RegNum);

	326 RegisterArg->setIsArg();

	327 Arg->setIsArg(false);

	328

	329 Args[I] = RegisterArg;

	330 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

	331 }

	332 }

	333

	334 void TargetX8632::lowerRet(const InstRet *Inst) {

	335 Variable *Reg = nullptr;

	336 if (Inst->hasRetValue()) {

	337 Operand *Src0 = legalize(Inst->getRetValue());

	338 // TODO(jpp): this is not needed.

	339 if (Src0->getType() == IceType_i64) {

	340 Variable *eax =

	341 legalizeToReg(loOperand(Src0), Traits::RegisterSet::Reg_eax);

	342 Variable *edx =

	343 legalizeToReg(hiOperand(Src0), Traits::RegisterSet::Reg_edx);

	344 Reg = eax;

	345 Context.insert(InstFakeUse::create(Func, edx));

	346 } else if (isScalarFloatingType(Src0->getType())) {

	347 _fld(Src0);

	348 } else if (isVectorType(Src0->getType())) {

	349 Reg = legalizeToReg(Src0, Traits::RegisterSet::Reg_xmm0);

	350 } else {

	351 _mov(Reg, Src0, Traits::RegisterSet::Reg_eax);

	352 }

	353 }

	354 // Add a ret instruction even if sandboxing is enabled, because

	355 // addEpilog explicitly looks for a ret instruction as a marker for

	356 // where to insert the frame removal instructions.

	357 _ret(Reg);

	358 // Add a fake use of esp to make sure esp stays alive for the entire

	359 // function. Otherwise post-call esp adjustments get dead-code

	360 // eliminated. TODO: Are there more places where the fake use

	361 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

	362 // have a ret instruction.

	363 Variable *esp =

	364 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	365 Context.insert(InstFakeUse::create(Func, esp));

	366 }

	367

	368 void TargetX8632::addProlog(CfgNode *Node) {

	369 // Stack frame layout:

	370 //

	371 // +------------------------+

	372 // \| 1. return address \|

	373 // +------------------------+

	374 // \| 2. preserved registers \|

	375 // +------------------------+

	376 // \| 3. padding \|

	377 // +------------------------+

	378 // \| 4. global spill area \|

	379 // +------------------------+

	380 // \| 5. padding \|

	381 // +------------------------+

	382 // \| 6. local spill area \|

	383 // +------------------------+

	384 // \| 7. padding \|

	385 // +------------------------+

	386 // \| 8. allocas \|

	387 // +------------------------+

	388 //

	389 // The following variables record the size in bytes of the given areas:

	390 // * X86_RET_IP_SIZE_BYTES: area 1

	391 // * PreservedRegsSizeBytes: area 2

	392 // * SpillAreaPaddingBytes: area 3

	393 // * GlobalsSize: area 4

	394 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

	395 // * LocalsSpillAreaSize: area 6

	396 // * SpillAreaSizeBytes: areas 3 - 7

	397

	398 // Determine stack frame offsets for each Variable without a

	399 // register assignment. This can be done as one variable per stack

	400 // slot. Or, do coalescing by running the register allocator again

	401 // with an infinite set of registers (as a side effect, this gives

	402 // variables a second chance at physical register assignment).

	403 //

	404 // A middle ground approach is to leverage sparsity and allocate one

	405 // block of space on the frame for globals (variables with

	406 // multi-block lifetime), and one block to share for locals

	407 // (single-block lifetime).

	408

	409 Context.init(Node);

	410 Context.setInsertPoint(Context.getCur());

	411

	412 llvm::SmallBitVector CalleeSaves =

	413 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	414 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

	415 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;

	416 size_t GlobalsSize = 0;

	417 // If there is a separate locals area, this represents that area.

	418 // Otherwise it counts any variable not counted by GlobalsSize.

	419 SpillAreaSizeBytes = 0;

	420 // If there is a separate locals area, this specifies the alignment

	421 // for it.

	422 uint32_t LocalsSlotsAlignmentBytes = 0;

	423 // The entire spill locations area gets aligned to largest natural

	424 // alignment of the variables that have a spill slot.

	425 uint32_t SpillAreaAlignmentBytes = 0;

	426 // A spill slot linked to a variable with a stack slot should reuse

	427 // that stack slot.

	428 std::function<bool(Variable *)> TargetVarHook =

	429 [&VariablesLinkedToSpillSlots](Variable *Var) {

	430 if (auto *SpillVar =

	431 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {

	432 assert(Var->getWeight().isZero());

	433 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {

	434 VariablesLinkedToSpillSlots.push_back(Var);

	435 return true;

	436 }

	437 }

	438 return false;

	439 };

	440

	441 // Compute the list of spilled variables and bounds for GlobalsSize, etc.

	442 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

	443 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

	444 &LocalsSlotsAlignmentBytes, TargetVarHook);

	445 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

	446 SpillAreaSizeBytes += GlobalsSize;

	447

	448 // Add push instructions for preserved registers.

	449 uint32_t NumCallee = 0;

	450 size_t PreservedRegsSizeBytes = 0;

	451 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	452 if (CalleeSaves[i] && RegsUsed[i]) {

	453 ++NumCallee;

	454 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

	455 _push(getPhysicalRegister(i));

	456 }

	457 }

	458 Ctx->statsUpdateRegistersSaved(NumCallee);

	459

	460 // Generate "push ebp; mov ebp, esp"

	461 if (IsEbpBasedFrame) {

	462 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

	463 .count() == 0);

	464 PreservedRegsSizeBytes += typeWidthInBytes(IceType_i32);

	465 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

	466 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	467 _push(ebp);

	468 _mov(ebp, esp);

	469 // Keep ebp live for late-stage liveness analysis

	470 // (e.g. asm-verbose mode).

	471 Context.insert(InstFakeUse::create(Func, ebp));

	472 }

	473

	474 // Align the variables area. SpillAreaPaddingBytes is the size of

	475 // the region after the preserved registers and before the spill areas.

	476 // LocalsSlotsPaddingBytes is the amount of padding between the globals

	477 // and locals area if they are separate.

	478 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);

	479 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

	480 uint32_t SpillAreaPaddingBytes = 0;

	481 uint32_t LocalsSlotsPaddingBytes = 0;

	482 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

	483 SpillAreaAlignmentBytes, GlobalsSize,

	484 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

	485 &LocalsSlotsPaddingBytes);

	486 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

	487 uint32_t GlobalsAndSubsequentPaddingSize =

	488 GlobalsSize + LocalsSlotsPaddingBytes;

	489

	490 // Align esp if necessary.

	491 if (NeedsStackAlignment) {

	492 uint32_t StackOffset =

	493 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

	494 uint32_t StackSize =

	495 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);

	496 SpillAreaSizeBytes = StackSize - StackOffset;

	497 }

	498

	499 // Generate "sub esp, SpillAreaSizeBytes"

	500 if (SpillAreaSizeBytes)

	501 _sub(getPhysicalRegister(Traits::RegisterSet::Reg_esp),

	502 Ctx->getConstantInt32(SpillAreaSizeBytes));

	503 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

	504

	505 resetStackAdjustment();

	506

	507 // Fill in stack offsets for stack args, and copy args into registers

	508 // for those that were register-allocated. Args are pushed right to

	509 // left, so Arg[0] is closest to the stack/frame pointer.

	510 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

	511 size_t BasicFrameOffset =

	512 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;

	513 if (!IsEbpBasedFrame)

	514 BasicFrameOffset += SpillAreaSizeBytes;

	515

	516 const VarList &Args = Func->getArgs();

	517 size_t InArgsSizeBytes = 0;

	518 unsigned NumXmmArgs = 0;

	519 for (Variable *Arg : Args) {

	520 // Skip arguments passed in registers.

	521 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {

	522 ++NumXmmArgs;

	523 continue;

	524 }

	525 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	526 }

	527

	528 // Fill in stack offsets for locals.

	529 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

	530 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

	531 IsEbpBasedFrame);

	532 // Assign stack offsets to variables that have been linked to spilled

	533 // variables.

	534 for (Variable *Var : VariablesLinkedToSpillSlots) {

	535 Variable *Linked =

	536 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();

	537 Var->setStackOffset(Linked->getStackOffset());

	538 }

	539 this->HasComputedFrame = true;

	540

	541 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {

	542 OstreamLocker L(Func->getContext());

	543 Ostream &Str = Func->getContext()->getStrDump();

	544

	545 Str << "Stack layout:\n";

	546 uint32_t EspAdjustmentPaddingSize =

	547 SpillAreaSizeBytes - LocalsSpillAreaSize -

	548 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

	549 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

	550 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"

	551 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

	552 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

	553 << " globals spill area = " << GlobalsSize << " bytes\n"

	554 << " globals-locals spill areas intermediate padding = "

	555 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

	556 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

	557 << " esp alignment padding = " << EspAdjustmentPaddingSize

	558 << " bytes\n";

	559

	560 Str << "Stack details:\n"

	561 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"

	562 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

	563 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

	564 << " bytes\n"

	565 << " is ebp based = " << IsEbpBasedFrame << "\n";

	566 }

	567 }

	568

	569 void TargetX8632::addEpilog(CfgNode *Node) {

	570 InstList &Insts = Node->getInsts();

	571 InstList::reverse_iterator RI, E;

	572 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

	573 if (llvm::isa<typename Traits::Insts::Ret>(*RI))

	574 break;

	575 }

	576 if (RI == E)

	577 return;

	578

	579 // Convert the reverse_iterator position into its corresponding

	580 // (forward) iterator position.

	581 InstList::iterator InsertPoint = RI.base();

	582 --InsertPoint;

	583 Context.init(Node);

	584 Context.setInsertPoint(InsertPoint);

	585

	586 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);

	587 if (IsEbpBasedFrame) {

	588 Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp);

	589 // For late-stage liveness analysis (e.g. asm-verbose mode),

	590 // adding a fake use of esp before the assignment of esp=ebp keeps

	591 // previous esp adjustments from being dead-code eliminated.

	592 Context.insert(InstFakeUse::create(Func, esp));

	593 _mov(esp, ebp);

	594 _pop(ebp);

	595 } else {

	596 // add esp, SpillAreaSizeBytes

	597 if (SpillAreaSizeBytes)

	598 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));

	599 }

	600

	601 // Add pop instructions for preserved registers.

	602 llvm::SmallBitVector CalleeSaves =

	603 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	604 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	605 SizeT j = CalleeSaves.size() - i - 1;

	606 if (j == Traits::RegisterSet::Reg_ebp && IsEbpBasedFrame)

	607 continue;

	608 if (CalleeSaves[j] && RegsUsed[j]) {

	609 _pop(getPhysicalRegister(j));

	610 }

	611 }

	612

	613 if (!Ctx->getFlags().getUseSandboxing())

	614 return;

	615 // Change the original ret instruction into a sandboxed return sequence.

	616 // t:ecx = pop

	617 // bundle_lock

	618 // and t, ~31

	619 // jmp *t

	620 // bundle_unlock

	621 // FakeUse <original_ret_operand>

	622 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);

	623 _pop(T_ecx);

	624 lowerIndirectJump(T_ecx);

	625 if (RI->getSrcSize()) {

	626 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));

	627 Context.insert(InstFakeUse::create(Func, RetValue));

	628 }

	629 RI->setDeleted();

	630 }

	631

	632 void TargetX8632::emitJumpTable(const Cfg *Func,

	633 const InstJumpTable *JumpTable) const {

	634 if (!BuildDefs::dump())

	635 return;

	636 Ostream &Str = Ctx->getStrEmit();

	637 IceString MangledName = Ctx->mangleName(Func->getFunctionName());

	638 Str << "\t.section\t.rodata." << MangledName

	639 << "$jumptable,\"a\",@progbits\n";

	640 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

	641 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

	642

	643 // On X8632 pointers are 32-bit hence the use of .long

	644 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

	645 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

	646 Str << "\n";

	647 }

	648

92 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)	649 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)

93 : TargetDataLowering(Ctx) {}	650 : TargetDataLowering(Ctx) {}

94	651

95 namespace {	652 namespace {

96 template <typename T> struct PoolTypeConverter {};	653 template <typename T> struct PoolTypeConverter {};

97	654

98 template <> struct PoolTypeConverter<float> {	655 template <> struct PoolTypeConverter<float> {

99 typedef uint32_t PrimitiveIntType;	656 typedef uint32_t PrimitiveIntType;

100 typedef ConstantFloat IceType;	657 typedef ConstantFloat IceType;

101 static const Type Ty = IceType_f32;	658 static const Type Ty = IceType_f32;

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
152 static const Type Ty = IceType_i8;	709 static const Type Ty = IceType_i8;

153 static const char *TypeName;	710 static const char *TypeName;

154 static const char *AsmTag;	711 static const char *AsmTag;

155 static const char *PrintfString;	712 static const char *PrintfString;

156 };	713 };

157 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";	714 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";

158 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";	715 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";

159 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";	716 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";

160 } // end of anonymous namespace	717 } // end of anonymous namespace

161	718

162 void TargetX8632::emitJumpTable(const Cfg *Func,

163 const InstJumpTable *JumpTable) const {

164 if (!BuildDefs::dump())

165 return;

166 Ostream &Str = Ctx->getStrEmit();

167 IceString MangledName = Ctx->mangleName(Func->getFunctionName());

168 Str << "\t.section\t.rodata." << MangledName

169 << "$jumptable,\"a\",@progbits\n";

170 Str << "\t.align\t" << typeWidthInBytes(getPointerType()) << "\n";

171 Str << InstJumpTable::makeName(MangledName, JumpTable->getId()) << ":";

172

173 // On X8632 pointers are 32-bit hence the use of .long

174 for (SizeT I = 0; I < JumpTable->getNumTargets(); ++I)

175 Str << "\n\t.long\t" << JumpTable->getTarget(I)->getAsmName();

176 Str << "\n";

177 }

178

179 template <typename T>	719 template <typename T>

180 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {	720 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {

181 if (!BuildDefs::dump())	721 if (!BuildDefs::dump())

182 return;	722 return;

183 Ostream &Str = Ctx->getStrEmit();	723 Ostream &Str = Ctx->getStrEmit();

184 Type Ty = T::Ty;	724 Type Ty = T::Ty;

185 SizeT Align = typeAlignInBytes(Ty);	725 SizeT Align = typeAlignInBytes(Ty);

186 ConstantList Pool = Ctx->getConstantPool(Ty);	726 ConstantList Pool = Ctx->getConstantPool(Ty);

187	727

188 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align	728 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align

(...skipping 211 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
400 // Repeat the static asserts with respect to the high-level table	940 // Repeat the static asserts with respect to the high-level table

401 // entries in case the high-level table has extra entries.	941 // entries in case the high-level table has extra entries.

402 #define X(tag, sizeLog2, align, elts, elty, str) \	942 #define X(tag, sizeLog2, align, elts, elty, str) \

403 static_assert(_table1_##tag == _table2_##tag, \	943 static_assert(_table1_##tag == _table2_##tag, \

404 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");	944 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

405 ICETYPE_TABLE	945 ICETYPE_TABLE

406 #undef X	946 #undef X

407 } // end of namespace dummy3	947 } // end of namespace dummy3

408 } // end of anonymous namespace	948 } // end of anonymous namespace

409	949

410 //------------------------------------------------------------------------------

411 // __ ______ __ __ ______ ______ __ __ __ ______

412 // /\ \ /\ __ \/\ \ _ \ \/\ ___\/\ == \/\ \/\ "-.\ \/\ ___\

413 // \ \ \___\ \ \/\ \ \ \/ ".\ \ \ __\\ \ __<\ \ \ \ \-. \ \ \__ \

414 // \ \_____\ \_____\ \__/".~\_\ \_____\ \_\ \_\ \_\ \_\\"\_\ \_____\

415 // \/_____/\/_____/\/_/ \/_/\/_____/\/_/ /_/\/_/\/_/ \/_/\/_____/

416 //

417 //------------------------------------------------------------------------------

418 void TargetX8632::lowerCall(const InstCall *Instr) {

419 // x86-32 calling convention:

420 //

421 // * At the point before the call, the stack must be aligned to 16

422 // bytes.

423 //

424 // * The first four arguments of vector type, regardless of their

425 // position relative to the other arguments in the argument list, are

426 // placed in registers xmm0 - xmm3.

427 //

428 // * Other arguments are pushed onto the stack in right-to-left order,

429 // such that the left-most argument ends up on the top of the stack at

430 // the lowest memory address.

431 //

432 // * Stack arguments of vector type are aligned to start at the next

433 // highest multiple of 16 bytes. Other stack arguments are aligned to

434 // 4 bytes.

435 //

436 // This intends to match the section "IA-32 Function Calling

437 // Convention" of the document "OS X ABI Function Call Guide" by

438 // Apple.

439 NeedsStackAlignment = true;

440

441 typedef std::vector<Operand *> OperandList;

442 OperandList XmmArgs;

443 OperandList StackArgs, StackArgLocations;

444 uint32_t ParameterAreaSizeBytes = 0;

445

446 // Classify each argument operand according to the location where the

447 // argument is passed.

448 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

449 Operand *Arg = Instr->getArg(i);

450 Type Ty = Arg->getType();

451 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

452 assert(typeWidthInBytes(Ty) >= 4);

453 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

454 XmmArgs.push_back(Arg);

455 } else {

456 StackArgs.push_back(Arg);

457 if (isVectorType(Arg->getType())) {

458 ParameterAreaSizeBytes =

459 Traits::applyStackAlignment(ParameterAreaSizeBytes);

460 }

461 Variable *esp =

462 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

463 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

464 StackArgLocations.push_back(

465 Traits::X86OperandMem::create(Func, Ty, esp, Loc));

466 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

467 }

468 }

469

470 // Adjust the parameter area so that the stack is aligned. It is

471 // assumed that the stack is already aligned at the start of the

472 // calling sequence.

473 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

474

475 // Subtract the appropriate amount for the argument area. This also

476 // takes care of setting the stack adjustment during emission.

477 //

478 // TODO: If for some reason the call instruction gets dead-code

479 // eliminated after lowering, we would need to ensure that the

480 // pre-call and the post-call esp adjustment get eliminated as well.

481 if (ParameterAreaSizeBytes) {

482 _adjust_stack(ParameterAreaSizeBytes);

483 }

484

485 // Copy arguments that are passed on the stack to the appropriate

486 // stack locations.

487 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

488 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

489 }

490

491 // Copy arguments to be passed in registers to the appropriate

492 // registers.

493 // TODO: Investigate the impact of lowering arguments passed in

494 // registers after lowering stack arguments as opposed to the other

495 // way around. Lowering register arguments after stack arguments may

496 // reduce register pressure. On the other hand, lowering register

497 // arguments first (before stack arguments) may result in more compact

498 // code, as the memory operand displacements may end up being smaller

499 // before any stack adjustment is done.

500 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

501 Variable *Reg =

502 legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i);

503 // Generate a FakeUse of register arguments so that they do not get

504 // dead code eliminated as a result of the FakeKill of scratch

505 // registers after the call.

506 Context.insert(InstFakeUse::create(Func, Reg));

507 }

508 // Generate the call instruction. Assign its result to a temporary

509 // with high register allocation weight.

510 Variable *Dest = Instr->getDest();

511 // ReturnReg doubles as ReturnRegLo as necessary.

512 Variable *ReturnReg = nullptr;

513 Variable *ReturnRegHi = nullptr;

514 if (Dest) {

515 switch (Dest->getType()) {

516 case IceType_NUM:

517 llvm_unreachable("Invalid Call dest type");

518 break;

519 case IceType_void:

520 break;

521 case IceType_i1:

522 case IceType_i8:

523 case IceType_i16:

524 case IceType_i32:

525 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_eax);

526 break;

527 case IceType_i64:

528 ReturnReg = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);

529 ReturnRegHi = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);

530 break;

531 case IceType_f32:

532 case IceType_f64:

533 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

534 // the fstp instruction.

535 break;

536 case IceType_v4i1:

537 case IceType_v8i1:

538 case IceType_v16i1:

539 case IceType_v16i8:

540 case IceType_v8i16:

541 case IceType_v4i32:

542 case IceType_v4f32:

543 ReturnReg = makeReg(Dest->getType(), Traits::RegisterSet::Reg_xmm0);

544 break;

545 }

546 }

547 Operand *CallTarget = legalize(Instr->getCallTarget());

548 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

549 if (NeedSandboxing) {

550 if (llvm::isa<Constant>(CallTarget)) {

551 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

552 } else {

553 Variable *CallTargetVar = nullptr;

554 _mov(CallTargetVar, CallTarget);

555 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

556 const SizeT BundleSize =

557 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

558 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

559 CallTarget = CallTargetVar;

560 }

561 }

562 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);

563 Context.insert(NewCall);

564 if (NeedSandboxing)

565 _bundle_unlock();

566 if (ReturnRegHi)

567 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

568

569 // Add the appropriate offset to esp. The call instruction takes care

570 // of resetting the stack offset during emission.

571 if (ParameterAreaSizeBytes) {

572 Variable *esp =

573 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);

574 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

575 }

576

577 // Insert a register-kill pseudo instruction.

578 Context.insert(InstFakeKill::create(Func, NewCall));

579

580 // Generate a FakeUse to keep the call live if necessary.

581 if (Instr->hasSideEffects() && ReturnReg) {

582 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

583 Context.insert(FakeUse);

584 }

585

586 if (!Dest)

587 return;

588

589 // Assign the result of the call to Dest.

590 if (ReturnReg) {

591 if (ReturnRegHi) {

592 assert(Dest->getType() == IceType_i64);

593 split64(Dest);

594 Variable *DestLo = Dest->getLo();

595 Variable *DestHi = Dest->getHi();

596 _mov(DestLo, ReturnReg);

597 _mov(DestHi, ReturnRegHi);

598 } else {

599 assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|

600 Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|

601 isVectorType(Dest->getType()));

602 if (isVectorType(Dest->getType())) {

603 _movp(Dest, ReturnReg);

604 } else {

605 _mov(Dest, ReturnReg);

606 }

607 }

608 } else if (isScalarFloatingType(Dest->getType())) {

609 // Special treatment for an FP function which returns its result in

610 // st(0).

611 // If Dest ends up being a physical xmm register, the fstp emit code

612 // will route st(0) through a temporary stack slot.

613 _fstp(Dest);

614 // Create a fake use of Dest in case it actually isn't used,

615 // because st(0) still needs to be popped.

616 Context.insert(InstFakeUse::create(Func, Dest));

617 }

618 }

619

620 } // end of namespace Ice	950 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8664.h » ('j') | no next file with comments »