src/IceTargetLoweringX8632.cpp - Issue 265703002: Add Om1 lowering with no optimizations

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 265703002: Add Om1 lowering with no optimizations (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

	2 //

	3 // The Subzero Code Generator

	4 //

	5 // This file is distributed under the University of Illinois Open Source

	6 // License. See LICENSE.TXT for details.

	7 //

	8 //===----------------------------------------------------------------------===//

	9 //

	10 // This file implements the TargetLoweringX8632 class, which

	11 // consists almost entirely of the lowering sequence for each

	12 // high-level instruction. It also implements

	13 // TargetX8632Fast::postLower() which does the simplest possible

	14 // register allocation for the "fast" target.

	15 //

	16 //===----------------------------------------------------------------------===//

	17

	18 #include "IceDefs.h"

	19 #include "IceCfg.h"

	20 #include "IceCfgNode.h"

	21 #include "IceInstX8632.h"

	22 #include "IceOperand.h"

	23 #include "IceTargetLoweringX8632.def"

	24 #include "IceTargetLoweringX8632.h"

	25

	26 namespace Ice {

	27

	28 namespace {

	29

	30 // The following table summarizes the logic for lowering the fcmp instruction.

	31 // There is one table entry for each of the 16 conditions. A comment in

	32 // lowerFcmp() describes the lowering template. In the most general case, there

	33 // is a compare followed by two conditional branches, because some fcmp

	34 // conditions don't map to a single x86 conditional branch. However, in many

	35 // cases it is possible to swap the operands in the comparison and have a single

	36 // conditional branch. Since it's quite tedious to validate the table by hand,

	37 // good execution tests are helpful.

	38

	39 const struct _TableFcmp {

	40 uint32_t Default;

	41 bool SwapOperands;

	42 InstX8632Br::BrCond C1, C2;

	43 } TableFcmp[] = {

	44 #define X(val, dflt, swap, C1, C2) \

	45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \

	46 ,

	47 FCMPX8632_TABLE

	48 #undef X

	49 };

	50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

	51

	52 // The following table summarizes the logic for lowering the icmp instruction

	53 // for i32 and narrower types. Each icmp condition has a clear mapping to an

	54 // x86 conditional branch instruction.

	55

	56 const struct _TableIcmp32 {

	57 InstX8632Br::BrCond Mapping;

	58 } TableIcmp32[] = {

	59 #define X(val, C_32, C1_64, C2_64, C3_64) \

	60 { InstX8632Br::C_32 } \

	61 ,

	62 ICMPX8632_TABLE

	63 #undef X

	64 };

	65 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);

	66

	67 // The following table summarizes the logic for lowering the icmp instruction

	68 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and

	69 // conditional branches are needed. For the other conditions, three separate

	70 // conditional branches are needed.

	71 const struct _TableIcmp64 {

	72 InstX8632Br::BrCond C1, C2, C3;

	73 } TableIcmp64[] = {

	74 #define X(val, C_32, C1_64, C2_64, C3_64) \

	75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \

	76 ,

	77 ICMPX8632_TABLE

	78 #undef X

	79 };

	80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);

	81

	82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

	83 size_t Index = static_cast<size_t>(Cond);

	84 assert(Index < TableIcmp32Size);

	85 return TableIcmp32[Index].Mapping;

	86 }

	87

	88 // In some cases, there are x-macros tables for both high-level and

	89 // low-level instructions/operands that use the same enum key value.

	90 // The tables are kept separate to maintain a proper separation

	91 // between abstraction layers. There is a risk that the tables

	92 // could get out of sync if enum values are reordered or if entries

	93 // are added or deleted. This dummy function uses static_assert to

	94 // ensure everything is kept in sync.

	95 void xMacroIntegrityCheck() {

	96 // Validate the enum values in FCMPX8632_TABLE.

	97 {

	98 // Define a temporary set of enum values based on low-level

	99 // table entries.

	100 enum _tmp_enum {

	101 #define X(val, dflt, swap, C1, C2) _tmp_##val,

	102 FCMPX8632_TABLE

	103 #undef X

	104 };

	105 // Define a set of constants based on high-level table entries.

	106 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

	107 ICEINSTFCMP_TABLE;

	108 #undef X

	109 // Define a set of constants based on low-level table entries,

	110 // and ensure the table entry keys are consistent.

	111 #define X(val, dflt, swap, C1, C2) \

	112 static const int _table2_##val = _tmp_##val; \

	113 STATIC_ASSERT(_table1_##val == _table2_##val);

	114 FCMPX8632_TABLE;

	115 #undef X

	116 // Repeat the static asserts with respect to the high-level

	117 // table entries in case the high-level table has extra entries.

	118 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

	119 ICEINSTFCMP_TABLE;

	120 #undef X

	121 }

	122

	123 // Validate the enum values in ICMPX8632_TABLE.

	124 {

	125 // Define a temporary set of enum values based on low-level

	126 // table entries.

	127 enum _tmp_enum {

	128 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

	129 ICMPX8632_TABLE

	130 #undef X

	131 };

	132 // Define a set of constants based on high-level table entries.

	133 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

	134 ICEINSTICMP_TABLE;

	135 #undef X

	136 // Define a set of constants based on low-level table entries,

	137 // and ensure the table entry keys are consistent.

	138 #define X(val, C_32, C1_64, C2_64, C3_64) \

	139 static const int _table2_##val = _tmp_##val; \

	140 STATIC_ASSERT(_table1_##val == _table2_##val);

	141 ICMPX8632_TABLE;

	142 #undef X

	143 // Repeat the static asserts with respect to the high-level

	144 // table entries in case the high-level table has extra entries.

	145 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

	146 ICEINSTICMP_TABLE;

	147 #undef X

	148 }

	149

	150 // Validate the enum values in ICETYPEX8632_TABLE.

	151 {

	152 // Define a temporary set of enum values based on low-level

	153 // table entries.

	154 enum _tmp_enum {

	155 #define X(tag, cvt, sdss, width) _tmp_##tag,

	156 ICETYPEX8632_TABLE

	157 #undef X

	158 };

	159 // Define a set of constants based on high-level table entries.

	160 #define X(tag, size, align, str) static const int _table1_##tag = tag;

	161 ICETYPE_TABLE;

	162 #undef X

	163 // Define a set of constants based on low-level table entries,

	164 // and ensure the table entry keys are consistent.

	165 #define X(tag, cvt, sdss, width) \

	166 static const int _table2_##tag = _tmp_##tag; \

	167 STATIC_ASSERT(_table1_##tag == _table2_##tag);

	168 ICETYPEX8632_TABLE;

	169 #undef X

	170 // Repeat the static asserts with respect to the high-level

	171 // table entries in case the high-level table has extra entries.

	172 #define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

	173 ICETYPE_TABLE;

	174 #undef X

	175 }

	176 }

	177

	178 } // end of anonymous namespace

	179

	180 TargetX8632::TargetX8632(Cfg *Func)

	181 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),

	182 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),

	183 PhysicalRegisters(VarList(Reg_NUM)) {

	184 llvm::SmallBitVector IntegerRegisters(Reg_NUM);

	185 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);

	186 llvm::SmallBitVector FloatRegisters(Reg_NUM);

	187 llvm::SmallBitVector InvalidRegisters(Reg_NUM);

	188 ScratchRegs.resize(Reg_NUM);

	189 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	190 frameptr, isI8, isInt, isFP) \

	191 IntegerRegisters[val] = isInt; \

	192 IntegerRegistersI8[val] = isI8; \

	193 FloatRegisters[val] = isFP; \

	194 ScratchRegs[val] = scratch;

	195 REGX8632_TABLE;

	196 #undef X

	197 TypeToRegisterSet[IceType_void] = InvalidRegisters;

	198 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;

	199 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;

	200 TypeToRegisterSet[IceType_i16] = IntegerRegisters;

	201 TypeToRegisterSet[IceType_i32] = IntegerRegisters;

	202 TypeToRegisterSet[IceType_i64] = IntegerRegisters;

	203 TypeToRegisterSet[IceType_f32] = FloatRegisters;

	204 TypeToRegisterSet[IceType_f64] = FloatRegisters;

	205 }

	206

	207 void TargetX8632::translateOm1() {

	208 GlobalContext *Context = Func->getContext();

	209 Ostream &Str = Context->getStrDump();

	210 Timer T_placePhiLoads;

	211 Func->placePhiLoads();

	212 if (Func->hasError())

	213 return;

	214 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");

	215 Timer T_placePhiStores;

	216 Func->placePhiStores();

	217 if (Func->hasError())

	218 return;

	219 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");

	220 Timer T_deletePhis;

	221 Func->deletePhis();

	222 if (Func->hasError())

	223 return;

	224 T_deletePhis.printElapsedUs(Context, "deletePhis()");

	225 if (Context->isVerbose())

	226 Str << "================ After Phi lowering ================\n";

	227 Func->dump();

	228

	229 Timer T_genCode;

	230 Func->genCode();

	231 if (Func->hasError())

	232 return;

	233 T_genCode.printElapsedUs(Context, "genCode()");

	234 if (Context->isVerbose())

	235 Str << "================ After initial x8632 codegen ================\n";

	236 Func->dump();

	237

	238 Timer T_genFrame;

	239 Func->genFrame();

	240 if (Func->hasError())

	241 return;

	242 T_genFrame.printElapsedUs(Context, "genFrame()");

	243 if (Context->isVerbose())

	244 Str << "================ After stack frame mapping ================\n";

	245 Func->dump();

	246 }

	247

	248 IceString TargetX8632::RegNames[] = {

	249 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	250 frameptr, isI8, isInt, isFP) \

	251 name,

	252 REGX8632_TABLE

	253 #undef X

	254 };

	255

	256 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {

	257 assert(RegNum < PhysicalRegisters.size());

	258 Variable *Reg = PhysicalRegisters[RegNum];

	259 if (Reg == NULL) {

	260 CfgNode *Node = NULL; // NULL means multi-block lifetime

	261 Reg = Func->makeVariable(IceType_i32, Node);

	262 Reg->setRegNum(RegNum);

	263 PhysicalRegisters[RegNum] = Reg;

	264 }

	265 return Reg;

	266 }

	267

	268 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {

	269 assert(RegNum < Reg_NUM);

	270 static IceString RegNames8[] = {

	271 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	272 frameptr, isI8, isInt, isFP) \

	273 "" name8,

	274 REGX8632_TABLE

	275 #undef X

	276 };

	277 static IceString RegNames16[] = {

	278 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	279 frameptr, isI8, isInt, isFP) \

	280 "" name16,

	281 REGX8632_TABLE

	282 #undef X

	283 };

	284 switch (Ty) {

	285 case IceType_i1:

	286 case IceType_i8:

	287 return RegNames8[RegNum];

	288 case IceType_i16:

	289 return RegNames16[RegNum];

	290 default:

	291 return RegNames[RegNum];

	292 }

	293 }

	294

	295 void TargetX8632::emitVariable(const Variable *Var,

	296 const Cfg *Func) const {

	297 Ostream &Str = Ctx->getStrEmit();

	298 assert(Var->getLocalUseNode() == NULL \|\| Var->getLocalUseNode() == Func->getCu rrentNode());

	299 if (Var->hasReg()) {

	300 Str << getRegName(Var->getRegNum(), Var->getType());

	301 return;

	302 }

	303 Str << InstX8632::getWidthString(Var->getType());

	304 Str << " [" << getRegName(

	305 getFrameOrStackReg(), IceType_i32);

	306 int32_t Offset = Var->getStackOffset() + getStackAdjustment();

	307 if (Offset) {

	308 if (Offset > 0)

	309 Str << "+";

	310 Str << Offset;

	311 }

	312 Str << "]";

	313 }

	314

	315 // Helper function for addProlog(). Sets the frame offset for Arg,

	316 // updates InArgsSizeBytes according to Arg's width, and generates an

	317 // instruction to copy Arg into its assigned register if applicable.

	318 // For an I64 arg that has been split into Lo and Hi components, it

	319 // calls itself recursively on the components, taking care to handle

	320 // Lo first because of the little-endian architecture.

	321 void TargetX8632::setArgOffsetAndCopy(Variable Arg, Variable FramePtr,

	322 int32_t BasicFrameOffset,

	323 int32_t &InArgsSizeBytes) {

	324 Variable *Lo = Arg->getLo();

	325 Variable *Hi = Arg->getHi();

	326 Type Ty = Arg->getType();

	327 if (Lo && Hi && Ty == IceType_i64) {

	328 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

	329 assert(Hi->getType() != IceType_i64); // don't want infinite recursion

	330 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	331 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	332 return;

	333 }

	334 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

	335 if (Arg->hasReg()) {

	336 assert(Ty != IceType_i64);

	337 OperandX8632Mem *Mem = OperandX8632Mem::create(

	338 Func, Ty, FramePtr,

	339 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));

	340 _mov(Arg, Mem);

	341 }

	342 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

	343 }

	344

	345 void TargetX8632::addProlog(CfgNode *Node) {

	346 // If SimpleCoalescing is false, each variable without a register

	347 // gets its own unique stack slot, which leads to large stack

	348 // frames. If SimpleCoalescing is true, then each "global" variable

	349 // without a register gets its own slot, but "local" variable slots

	350 // are reused across basic blocks. E.g., if A and B are local to

	351 // block 1 and C is local to block 2, then C may share a slot with A

	352 // or B.

	353 const bool SimpleCoalescing = true;

	354 int32_t InArgsSizeBytes = 0;

	355 int32_t RetIpSizeBytes = 4;

	356 int32_t PreservedRegsSizeBytes = 0;

	357 LocalsSizeBytes = 0;

	358 Context.init(Node);

	359 Context.setInsertPoint(Context.getCur());

	360

	361 // Determine stack frame offsets for each Variable without a

	362 // register assignment. This can be done as one variable per stack

	363 // slot. Or, do coalescing by running the register allocator again

	364 // with an infinite set of registers (as a side effect, this gives

	365 // variables a second chance at physical register assignment).

	366 //

	367 // A middle ground approach is to leverage sparsity and allocate one

	368 // block of space on the frame for globals (variables with

	369 // multi-block lifetime), and one block to share for locals

	370 // (single-block lifetime).

	371

	372 llvm::SmallBitVector CalleeSaves =

	373 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	374

	375 int32_t GlobalsSize = 0;

	376 std::vector<int> LocalsSize(Func->getNumNodes());

	377

	378 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and

	379 // LocalsSizeBytes.

	380 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

	381 const VarList &Variables = Func->getVariables();

	382 const VarList &Args = Func->getArgs();

	383 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

	384 I != E; ++I) {

	385 Variable Var = I;

	386 if (Var->hasReg()) {

	387 RegsUsed[Var->getRegNum()] = true;

	388 continue;

	389 }

	390 // An argument passed on the stack already has a stack slot.

	391 if (Var->getIsArg())

	392 continue;

	393 // A spill slot linked to a variable with a stack slot should reuse

	394 // that stack slot.

	395 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

	396 if (Variable *Linked = Var->getPreferredRegister()) {

	397 if (!Linked->hasReg())

	398 continue;

	399 }

	400 }

	401 int32_t Increment = typeWidthInBytesOnStack(Var->getType());

	402 if (SimpleCoalescing) {

	403 if (Var->isMultiblockLife()) {

	404 GlobalsSize += Increment;

	405 } else {

	406 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

	407 LocalsSize[NodeIndex] += Increment;

	408 if (LocalsSize[NodeIndex] > LocalsSizeBytes)

	409 LocalsSizeBytes = LocalsSize[NodeIndex];

	410 }

	411 } else {

	412 LocalsSizeBytes += Increment;

	413 }

	414 }

	415 LocalsSizeBytes += GlobalsSize;

	416

	417 // Add push instructions for preserved registers.

	418 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	419 if (CalleeSaves[i] && RegsUsed[i]) {

	420 PreservedRegsSizeBytes += 4;

	421 const bool SuppressStackAdjustment = true;

	422 _push(getPhysicalRegister(i), SuppressStackAdjustment);

	423 }

	424 }

	425

	426 // Generate "push ebp; mov ebp, esp"

	427 if (IsEbpBasedFrame) {

	428 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

	429 .count() == 0);

	430 PreservedRegsSizeBytes += 4;

	431 Variable *ebp = getPhysicalRegister(Reg_ebp);

	432 Variable *esp = getPhysicalRegister(Reg_esp);

	433 const bool SuppressStackAdjustment = true;

	434 _push(ebp, SuppressStackAdjustment);

	435 _mov(ebp, esp);

	436 }

	437

	438 // Generate "sub esp, LocalsSizeBytes"

	439 if (LocalsSizeBytes)

	440 _sub(getPhysicalRegister(Reg_esp),

	441 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

	442

	443 resetStackAdjustment();

	444

	445 // Fill in stack offsets for args, and copy args into registers for

	446 // those that were register-allocated. Args are pushed right to

	447 // left, so Arg[0] is closest to the stack/frame pointer.

	448 //

	449 // TODO: Make this right for different width args, calling

	450 // conventions, etc. For one thing, args passed in registers will

	451 // need to be copied/shuffled to their home registers (the

	452 // RegManager code may have some permutation logic to leverage),

	453 // and if they have no home register, home space will need to be

	454 // allocated on the stack to copy into.

	455 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

	456 int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;

	457 if (!IsEbpBasedFrame)

	458 BasicFrameOffset += LocalsSizeBytes;

	459 for (SizeT i = 0; i < Args.size(); ++i) {

	460 Variable *Arg = Args[i];

	461 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	462 }

	463

	464 // Fill in stack offsets for locals.

	465 int32_t TotalGlobalsSize = GlobalsSize;

	466 GlobalsSize = 0;

	467 LocalsSize.assign(LocalsSize.size(), 0);

	468 int32_t NextStackOffset = 0;

	469 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

	470 I != E; ++I) {

	471 Variable Var = I;

	472 if (Var->hasReg()) {

	473 RegsUsed[Var->getRegNum()] = true;

	474 continue;

	475 }

	476 if (Var->getIsArg())

	477 continue;

	478 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

	479 if (Variable *Linked = Var->getPreferredRegister()) {

	480 if (!Linked->hasReg()) {

	481 // TODO: Make sure Linked has already been assigned a stack

	482 // slot.

	483 Var->setStackOffset(Linked->getStackOffset());

	484 continue;

	485 }

	486 }

	487 }

	488 int32_t Increment = typeWidthInBytesOnStack(Var->getType());

	489 if (SimpleCoalescing) {

	490 if (Var->isMultiblockLife()) {

	491 GlobalsSize += Increment;

	492 NextStackOffset = GlobalsSize;

	493 } else {

	494 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

	495 LocalsSize[NodeIndex] += Increment;

	496 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];

	497 }

	498 } else {

	499 NextStackOffset += Increment;

	500 }

	501 if (IsEbpBasedFrame)

	502 Var->setStackOffset(-NextStackOffset);

	503 else

	504 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);

	505 }

	506 this->FrameSizeLocals = NextStackOffset;

	507 this->HasComputedFrame = true;

	508

	509 if (Func->getContext()->isVerbose(IceV_Frame)) {

	510 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes

	511 << "\n"

	512 << "InArgsSizeBytes=" << InArgsSizeBytes

	513 << "\n"

	514 << "PreservedRegsSizeBytes="

	515 << PreservedRegsSizeBytes << "\n";

	516 }

	517 }

	518

	519 void TargetX8632::addEpilog(CfgNode *Node) {

	520 InstList &Insts = Node->getInsts();

	521 InstList::reverse_iterator RI, E;

	522 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

	523 if (llvm::isa<InstX8632Ret>(*RI))

	524 break;

	525 }

	526 if (RI == E)

	527 return;

	528

	529 // Convert the reverse_iterator position into its corresponding

	530 // (forward) iterator position.

	531 InstList::iterator InsertPoint = RI.base();

	532 --InsertPoint;

	533 Context.init(Node);

	534 Context.setInsertPoint(InsertPoint);

	535

	536 Variable *esp = getPhysicalRegister(Reg_esp);

	537 if (IsEbpBasedFrame) {

	538 Variable *ebp = getPhysicalRegister(Reg_ebp);

	539 _mov(esp, ebp);

	540 _pop(ebp);

	541 } else {

	542 // add esp, LocalsSizeBytes

	543 if (LocalsSizeBytes)

	544 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

	545 }

	546

	547 // Add pop instructions for preserved registers.

	548 llvm::SmallBitVector CalleeSaves =

	549 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	550 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	551 SizeT j = CalleeSaves.size() - i - 1;

	552 if (j == Reg_ebp && IsEbpBasedFrame)

	553 continue;

	554 if (CalleeSaves[j] && RegsUsed[j]) {

	555 _pop(getPhysicalRegister(j));

	556 }

	557 }

	558 }

	559

	560 void TargetX8632::split64(Variable *Var) {

	561 switch (Var->getType()) {

	562 default:

	563 return;

	564 case IceType_i64:

	565 // TODO: Only consider F64 if we need to push each half when

	566 // passing as an argument to a function call. Note that each half

	567 // is still typed as I32.

	568 case IceType_f64:

	569 break;

	570 }

	571 Variable *Lo = Var->getLo();

	572 Variable *Hi = Var->getHi();

	573 if (Lo) {

	574 assert(Hi);

	575 return;

	576 }

	577 assert(Hi == NULL);

	578 Lo = Func->makeVariable(IceType_i32, Context.getNode(),

	579 Var->getName() + "__lo");

	580 Hi = Func->makeVariable(IceType_i32, Context.getNode(),

	581 Var->getName() + "__hi");

	582 Var->setLoHi(Lo, Hi);

	583 if (Var->getIsArg()) {

	584 Lo->setIsArg(Func);

	585 Hi->setIsArg(Func);

	586 }

	587 }

	588

	589 Operand TargetX8632::loOperand(Operand Operand) {

	590 assert(Operand->getType() == IceType_i64);

	591 if (Operand->getType() != IceType_i64)

	592 return Operand;

	593 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

	594 split64(Var);

	595 return Var->getLo();

	596 }

	597 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {

	598 uint64_t Mask = (1ull << 32) - 1;

	599 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);

	600 }

	601 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

	602 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),

	603 Mem->getOffset(), Mem->getIndex(),

	604 Mem->getShift());

	605 }

	606 llvm_unreachable("Unsupported operand type");

	607 return NULL;

	608 }

	609

	610 Operand TargetX8632::hiOperand(Operand Operand) {

	611 assert(Operand->getType() == IceType_i64);

	612 if (Operand->getType() != IceType_i64)

	613 return Operand;

	614 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

	615 split64(Var);

	616 return Var->getHi();

	617 }

	618 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {

	619 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);

	620 }

	621 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

	622 Constant *Offset = Mem->getOffset();

	623 if (Offset == NULL)

	624 Offset = Ctx->getConstantInt(IceType_i32, 4);

	625 else if (ConstantInteger *IntOffset =

	626 llvm::dyn_cast<ConstantInteger>(Offset)) {

	627 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());

	628 } else if (ConstantRelocatable *SymOffset =

	629 llvm::dyn_cast<ConstantRelocatable>(Offset)) {

	630 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),

	631 SymOffset->getName());

	632 }

	633 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,

	634 Mem->getIndex(), Mem->getShift());

	635 }

	636 llvm_unreachable("Unsupported operand type");

	637 return NULL;

	638 }

	639

	640 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,

	641 RegSetMask Exclude) const {

	642 llvm::SmallBitVector Registers(Reg_NUM);

	643

	644 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	645 frameptr, isI8, isInt, isFP) \

	646 if (scratch && (Include & RegSet_CallerSave)) \

	647 Registers[val] = true; \

	648 if (preserved && (Include & RegSet_CalleeSave)) \

	649 Registers[val] = true; \

	650 if (stackptr && (Include & RegSet_StackPointer)) \

	651 Registers[val] = true; \

	652 if (frameptr && (Include & RegSet_FramePointer)) \

	653 Registers[val] = true; \

	654 if (scratch && (Exclude & RegSet_CallerSave)) \

	655 Registers[val] = false; \

	656 if (preserved && (Exclude & RegSet_CalleeSave)) \

	657 Registers[val] = false; \

	658 if (stackptr && (Exclude & RegSet_StackPointer)) \

	659 Registers[val] = false; \

	660 if (frameptr && (Exclude & RegSet_FramePointer)) \

	661 Registers[val] = false;

	662

	663 REGX8632_TABLE

	664

	665 #undef X

	666

	667 return Registers;

	668 }

	669

	670 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

	671 IsEbpBasedFrame = true;

	672 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize

	673 // the number of adjustments of esp, etc.

	674 Variable *esp = getPhysicalRegister(Reg_esp);

	675 Operand *TotalSize = legalize(Inst->getSizeInBytes());

	676 Variable *Dest = Inst->getDest();

	677 _sub(esp, TotalSize);

	678 _mov(Dest, esp);

	679 }

	680

	681 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

	682 Variable *Dest = Inst->getDest();

	683 Operand *Src0 = legalize(Inst->getSrc(0));

	684 Operand *Src1 = legalize(Inst->getSrc(1));

	685 if (Dest->getType() == IceType_i64) {

	686 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	687 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	688 Operand *Src0Lo = loOperand(Src0);

	689 Operand *Src0Hi = hiOperand(Src0);

	690 Operand *Src1Lo = loOperand(Src1);

	691 Operand *Src1Hi = hiOperand(Src1);

	692 Variable T_Lo = NULL, T_Hi = NULL;

	693 switch (Inst->getOp()) {

	694 case InstArithmetic::Add:

	695 _mov(T_Lo, Src0Lo);

	696 _add(T_Lo, Src1Lo);

	697 _mov(DestLo, T_Lo);

	698 _mov(T_Hi, Src0Hi);

	699 _adc(T_Hi, Src1Hi);

	700 _mov(DestHi, T_Hi);

	701 break;

	702 case InstArithmetic::And:

	703 _mov(T_Lo, Src0Lo);

	704 _and(T_Lo, Src1Lo);

	705 _mov(DestLo, T_Lo);

	706 _mov(T_Hi, Src0Hi);

	707 _and(T_Hi, Src1Hi);

	708 _mov(DestHi, T_Hi);

	709 break;

	710 case InstArithmetic::Or:

	711 _mov(T_Lo, Src0Lo);

	712 _or(T_Lo, Src1Lo);

	713 _mov(DestLo, T_Lo);

	714 _mov(T_Hi, Src0Hi);

	715 _or(T_Hi, Src1Hi);

	716 _mov(DestHi, T_Hi);

	717 break;

	718 case InstArithmetic::Xor:

	719 _mov(T_Lo, Src0Lo);

	720 _xor(T_Lo, Src1Lo);

	721 _mov(DestLo, T_Lo);

	722 _mov(T_Hi, Src0Hi);

	723 _xor(T_Hi, Src1Hi);

	724 _mov(DestHi, T_Hi);

	725 break;

	726 case InstArithmetic::Sub:

	727 _mov(T_Lo, Src0Lo);

	728 _sub(T_Lo, Src1Lo);

	729 _mov(DestLo, T_Lo);

	730 _mov(T_Hi, Src0Hi);

	731 _sbb(T_Hi, Src1Hi);

	732 _mov(DestHi, T_Hi);

	733 break;

	734 case InstArithmetic::Mul: {

	735 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	736 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);

	737 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);

	738 // gcc does the following:

	739 // a=b*c ==>

	740 // t1 = b.hi; t1 *=(imul) c.lo

	741 // t2 = c.hi; t2 *=(imul) b.lo

	742 // t3:eax = b.lo

	743 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo

	744 // a.lo = t4.lo

	745 // t4.hi += t1

	746 // t4.hi += t2

	747 // a.hi = t4.hi

	748 _mov(T_1, Src0Hi);

	749 _imul(T_1, Src1Lo);

	750 _mov(T_2, Src1Hi);

	751 _imul(T_2, Src0Lo);

	752 _mov(T_3, Src0Lo, Reg_eax);

	753 _mul(T_4Lo, T_3, Src1Lo);

	754 // The mul instruction produces two dest variables, edx:eax. We

	755 // create a fake definition of edx to account for this.

	756 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));

	757 _mov(DestLo, T_4Lo);

	758 _add(T_4Hi, T_1);

	759 _add(T_4Hi, T_2);

	760 _mov(DestHi, T_4Hi);

	761 } break;

	762 case InstArithmetic::Shl: {

	763 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.

	764 // gcc does the following:

	765 // a=b<<c ==>

	766 // t1:ecx = c.lo & 0xff

	767 // t2 = b.lo

	768 // t3 = b.hi

	769 // t3 = shld t3, t2, t1

	770 // t2 = shl t2, t1

	771 // test t1, 0x20

	772 // je L1

	773 // use(t3)

	774 // t3 = t2

	775 // t2 = 0

	776 // L1:

	777 // a.lo = t2

	778 // a.hi = t3

	779 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	780 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

	781 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	782 InstX8632Label *Label = InstX8632Label::create(Func, this);

	783 _mov(T_1, Src1Lo, Reg_ecx);

	784 _mov(T_2, Src0Lo);

	785 _mov(T_3, Src0Hi);

	786 _shld(T_3, T_2, T_1);

	787 _shl(T_2, T_1);

	788 _test(T_1, BitTest);

	789 _br(InstX8632Br::Br_e, Label);

	790 // Because of the intra-block control flow, we need to fake a use

	791 // of T_3 to prevent its earlier definition from being dead-code

	792 // eliminated in the presence of its later definition.

	793 Context.insert(InstFakeUse::create(Func, T_3));

	794 _mov(T_3, T_2);

	795 _mov(T_2, Zero);

	796 Context.insert(Label);

	797 _mov(DestLo, T_2);

	798 _mov(DestHi, T_3);

	799 } break;

	800 case InstArithmetic::Lshr: {

	801 // a=b>>c (unsigned) ==>

	802 // t1:ecx = c.lo & 0xff

	803 // t2 = b.lo

	804 // t3 = b.hi

	805 // t2 = shrd t2, t3, t1

	806 // t3 = shr t3, t1

	807 // test t1, 0x20

	808 // je L1

	809 // use(t2)

	810 // t2 = t3

	811 // t3 = 0

	812 // L1:

	813 // a.lo = t2

	814 // a.hi = t3

	815 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	816 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

	817 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	818 InstX8632Label *Label = InstX8632Label::create(Func, this);

	819 _mov(T_1, Src1Lo, Reg_ecx);

	820 _mov(T_2, Src0Lo);

	821 _mov(T_3, Src0Hi);

	822 _shrd(T_2, T_3, T_1);

	823 _shr(T_3, T_1);

	824 _test(T_1, BitTest);

	825 _br(InstX8632Br::Br_e, Label);

	826 // Because of the intra-block control flow, we need to fake a use

	827 // of T_3 to prevent its earlier definition from being dead-code

	828 // eliminated in the presence of its later definition.

	829 Context.insert(InstFakeUse::create(Func, T_2));

	830 _mov(T_2, T_3);

	831 _mov(T_3, Zero);

	832 Context.insert(Label);

	833 _mov(DestLo, T_2);

	834 _mov(DestHi, T_3);

	835 } break;

	836 case InstArithmetic::Ashr: {

	837 // a=b>>c (signed) ==>

	838 // t1:ecx = c.lo & 0xff

	839 // t2 = b.lo

	840 // t3 = b.hi

	841 // t2 = shrd t2, t3, t1

	842 // t3 = sar t3, t1

	843 // test t1, 0x20

	844 // je L1

	845 // use(t2)

	846 // t2 = t3

	847 // t3 = sar t3, 0x1f

	848 // L1:

	849 // a.lo = t2

	850 // a.hi = t3

	851 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	852 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

	853 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);

	854 InstX8632Label *Label = InstX8632Label::create(Func, this);

	855 _mov(T_1, Src1Lo, Reg_ecx);

	856 _mov(T_2, Src0Lo);

	857 _mov(T_3, Src0Hi);

	858 _shrd(T_2, T_3, T_1);

	859 _sar(T_3, T_1);

	860 _test(T_1, BitTest);

	861 _br(InstX8632Br::Br_e, Label);

	862 // Because of the intra-block control flow, we need to fake a use

	863 // of T_3 to prevent its earlier definition from being dead-code

	864 // eliminated in the presence of its later definition.

	865 Context.insert(InstFakeUse::create(Func, T_2));

	866 _mov(T_2, T_3);

	867 _sar(T_3, SignExtend);

	868 Context.insert(Label);

	869 _mov(DestLo, T_2);

	870 _mov(DestHi, T_3);

	871 } break;

	872 case InstArithmetic::Udiv: {

	873 const SizeT MaxSrcs = 2;

	874 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);

	875 Call->addArg(Inst->getSrc(0));

	876 Call->addArg(Inst->getSrc(1));

	877 lowerCall(Call);

	878 } break;

	879 case InstArithmetic::Sdiv: {

	880 const SizeT MaxSrcs = 2;

	881 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);

	882 Call->addArg(Inst->getSrc(0));

	883 Call->addArg(Inst->getSrc(1));

	884 lowerCall(Call);

	885 } break;

	886 case InstArithmetic::Urem: {

	887 const SizeT MaxSrcs = 2;

	888 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);

	889 Call->addArg(Inst->getSrc(0));

	890 Call->addArg(Inst->getSrc(1));

	891 lowerCall(Call);

	892 } break;

	893 case InstArithmetic::Srem: {

	894 const SizeT MaxSrcs = 2;

	895 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);

	896 Call->addArg(Inst->getSrc(0));

	897 Call->addArg(Inst->getSrc(1));

	898 lowerCall(Call);

	899 } break;

	900 case InstArithmetic::Fadd:

	901 case InstArithmetic::Fsub:

	902 case InstArithmetic::Fmul:

	903 case InstArithmetic::Fdiv:

	904 case InstArithmetic::Frem:

	905 llvm_unreachable("FP instruction with i64 type");

	906 break;

	907 }

	908 } else { // Dest->getType() != IceType_i64

	909 Variable *T_edx = NULL;

	910 Variable *T = NULL;

	911 switch (Inst->getOp()) {

	912 case InstArithmetic::Add:

	913 _mov(T, Src0);

	914 _add(T, Src1);

	915 _mov(Dest, T);

	916 break;

	917 case InstArithmetic::And:

	918 _mov(T, Src0);

	919 _and(T, Src1);

	920 _mov(Dest, T);

	921 break;

	922 case InstArithmetic::Or:

	923 _mov(T, Src0);

	924 _or(T, Src1);

	925 _mov(Dest, T);

	926 break;

	927 case InstArithmetic::Xor:

	928 _mov(T, Src0);

	929 _xor(T, Src1);

	930 _mov(Dest, T);

	931 break;

	932 case InstArithmetic::Sub:

	933 _mov(T, Src0);

	934 _sub(T, Src1);

	935 _mov(Dest, T);

	936 break;

	937 case InstArithmetic::Mul:

	938 // TODO: Optimize for llvm::isa<Constant>(Src1)

	939 // TODO: Strength-reduce multiplications by a constant,

	940 // particularly -1 and powers of 2. Advanced: use lea to

	941 // multiply by 3, 5, 9.

	942 //

	943 // The 8-bit version of imul only allows the form "imul r/m8"

	944 // where T must be in eax.

	945 if (Dest->getType() == IceType_i8)

	946 _mov(T, Src0, Reg_eax);

	947 else

	948 _mov(T, Src0);

	949 _imul(T, Src1);

	950 _mov(Dest, T);

	951 break;

	952 case InstArithmetic::Shl:

	953 _mov(T, Src0);

	954 if (!llvm::isa<Constant>(Src1))

	955 Src1 = legalizeToVar(Src1, false, Reg_ecx);

	956 _shl(T, Src1);

	957 _mov(Dest, T);

	958 break;

	959 case InstArithmetic::Lshr:

	960 _mov(T, Src0);

	961 if (!llvm::isa<Constant>(Src1))

	962 Src1 = legalizeToVar(Src1, false, Reg_ecx);

	963 _shr(T, Src1);

	964 _mov(Dest, T);

	965 break;

	966 case InstArithmetic::Ashr:

	967 _mov(T, Src0);

	968 if (!llvm::isa<Constant>(Src1))

	969 Src1 = legalizeToVar(Src1, false, Reg_ecx);

	970 _sar(T, Src1);

	971 _mov(Dest, T);

	972 break;

	973 case InstArithmetic::Udiv:

	974 if (Dest->getType() == IceType_i8) {

	975 Variable *T_ah = NULL;

	976 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);

	977 _mov(T, Src0, Reg_eax);

	978 _mov(T_ah, Zero, Reg_ah);

	979 _div(T_ah, Src1, T);

	980 Context.insert(InstFakeUse::create(Func, T_ah));

	981 _mov(Dest, T);

	982 } else {

	983 // TODO: fix for 8-bit, see Urem

	984 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	985 _mov(T, Src0, Reg_eax);

	986 _mov(T_edx, Zero, Reg_edx);

	987 _div(T, Src1, T_edx);

	988 _mov(Dest, T);

	989 }

	990 break;

	991 case InstArithmetic::Sdiv:

	992 T_edx = makeReg(IceType_i32, Reg_edx);

	993 _mov(T, Src0, Reg_eax);

	994 _cdq(T_edx, T);

	995 _idiv(T, Src1, T_edx);

	996 _mov(Dest, T);

	997 break;

	998 case InstArithmetic::Urem:

	999 if (Dest->getType() == IceType_i8) {

	1000 Variable *T_ah = NULL;

	1001 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);

	1002 _mov(T, Src0, Reg_eax);

	1003 _mov(T_ah, Zero, Reg_ah);

	1004 _div(T_ah, Src1, T);

	1005 _mov(Dest, T_ah);

	1006 } else {

	1007 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1008 _mov(T_edx, Zero, Reg_edx);

	1009 _mov(T, Src0, Reg_eax);

	1010 _div(T_edx, Src1, T);

	1011 _mov(Dest, T_edx);

	1012 }

	1013 break;

	1014 case InstArithmetic::Srem:

	1015 T_edx = makeReg(IceType_i32, Reg_edx);

	1016 _mov(T, Src0, Reg_eax);

	1017 _cdq(T_edx, T);

	1018 _idiv(T_edx, Src1, T);

	1019 _mov(Dest, T_edx);

	1020 break;

	1021 case InstArithmetic::Fadd:

	1022 _mov(T, Src0);

	1023 _addss(T, Src1);

	1024 _mov(Dest, T);

	1025 break;

	1026 case InstArithmetic::Fsub:

	1027 _mov(T, Src0);

	1028 _subss(T, Src1);

	1029 _mov(Dest, T);

	1030 break;

	1031 case InstArithmetic::Fmul:

	1032 _mov(T, Src0);

	1033 _mulss(T, Src1);

	1034 _mov(Dest, T);

	1035 break;

	1036 case InstArithmetic::Fdiv:

	1037 _mov(T, Src0);

	1038 _divss(T, Src1);

	1039 _mov(Dest, T);

	1040 break;

	1041 case InstArithmetic::Frem: {

	1042 const SizeT MaxSrcs = 2;

	1043 Type Ty = Dest->getType();

	1044 InstCall *Call =

	1045 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);

	1046 Call->addArg(Src0);

	1047 Call->addArg(Src1);

	1048 return lowerCall(Call);

	1049 } break;

	1050 }

	1051 }

	1052 }

	1053

	1054 void TargetX8632::lowerAssign(const InstAssign *Inst) {

	1055 Variable *Dest = Inst->getDest();

	1056 Operand *Src0 = legalize(Inst->getSrc(0));

	1057 assert(Dest->getType() == Src0->getType());

	1058 if (Dest->getType() == IceType_i64) {

	1059 Operand *Src0Lo = loOperand(Src0);

	1060 Operand *Src0Hi = hiOperand(Src0);

	1061 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1062 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1063 Variable T_Lo = NULL, T_Hi = NULL;

	1064 _mov(T_Lo, Src0Lo);

	1065 _mov(DestLo, T_Lo);

	1066 _mov(T_Hi, Src0Hi);

	1067 _mov(DestHi, T_Hi);

	1068 } else {

	1069 const bool AllowOverlap = true;

	1070 // RI is either a physical register or an immediate.

	1071 Operand *RI = legalize(Src0, Legal_Reg \| Legal_Imm, AllowOverlap);

	1072 _mov(Dest, RI);

	1073 }

	1074 }

	1075

	1076 void TargetX8632::lowerBr(const InstBr *Inst) {

	1077 if (Inst->isUnconditional()) {

	1078 _br(Inst->getTargetUnconditional());

	1079 } else {

	1080 Operand *Src0 = legalize(Inst->getCondition());

	1081 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1082 _cmp(Src0, Zero);

	1083 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

	1084 }

	1085 }

	1086

	1087 void TargetX8632::lowerCall(const InstCall *Instr) {

	1088 // Generate a sequence of push instructions, pushing right to left,

	1089 // keeping track of stack offsets in case a push involves a stack

	1090 // operand and we are using an esp-based frame.

	1091 uint32_t StackOffset = 0;

	1092 // TODO: If for some reason the call instruction gets dead-code

	1093 // eliminated after lowering, we would need to ensure that the

	1094 // pre-call push instructions and the post-call esp adjustment get

	1095 // eliminated as well.

	1096 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {

	1097 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));

	1098 if (Arg->getType() == IceType_i64) {

	1099 _push(hiOperand(Arg));

	1100 _push(loOperand(Arg));

	1101 } else if (Arg->getType() == IceType_f64) {

	1102 // If the Arg turns out to be a memory operand, we need to push

	1103 // 8 bytes, which requires two push instructions. This ends up

	1104 // being somewhat clumsy in the current IR, so we use a

	1105 // workaround. Force the operand into a (xmm) register, and

	1106 // then push the register. An xmm register push is actually not

	1107 // possible in x86, but the Push instruction emitter handles

	1108 // this by decrementing the stack pointer and directly writing

	1109 // the xmm register value.

	1110 Variable *T = NULL;

	1111 _mov(T, Arg);

	1112 _push(T);

	1113 } else {

	1114 _push(Arg);

	1115 }

	1116 StackOffset += typeWidthInBytesOnStack(Arg->getType());

	1117 }

	1118 // Generate the call instruction. Assign its result to a temporary

	1119 // with high register allocation weight.

	1120 Variable *Dest = Instr->getDest();

	1121 Variable *eax = NULL; // doubles as RegLo as necessary

	1122 Variable *edx = NULL;

	1123 if (Dest) {

	1124 switch (Dest->getType()) {

	1125 case IceType_NUM:

	1126 llvm_unreachable("Invalid Call dest type");

	1127 break;

	1128 case IceType_void:

	1129 break;

	1130 case IceType_i1:

	1131 case IceType_i8:

	1132 case IceType_i16:

	1133 case IceType_i32:

	1134 eax = makeReg(Dest->getType(), Reg_eax);

	1135 break;

	1136 case IceType_i64:

	1137 eax = makeReg(IceType_i32, Reg_eax);

	1138 edx = makeReg(IceType_i32, Reg_edx);

	1139 break;

	1140 case IceType_f32:

	1141 case IceType_f64:

	1142 // Leave eax==edx==NULL, and capture the result with the fstp

	1143 // instruction.

	1144 break;

	1145 }

	1146 }

	1147 Operand *CallTarget = legalize(Instr->getCallTarget());

	1148 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);

	1149 Context.insert(NewCall);

	1150 if (edx)

	1151 Context.insert(InstFakeDef::create(Func, edx));

	1152

	1153 // Add the appropriate offset to esp.

	1154 if (StackOffset) {

	1155 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

	1156 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));

	1157 }

	1158

	1159 // Insert a register-kill pseudo instruction.

	1160 VarList KilledRegs;

	1161 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {

	1162 if (ScratchRegs[i])

	1163 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));

	1164 }

	1165 if (!KilledRegs.empty()) {

	1166 Inst *Kill = InstFakeKill::create(Func, KilledRegs, NewCall);

	1167 Context.insert(Kill);

	1168 }

	1169

	1170 // Generate a FakeUse to keep the call live if necessary.

	1171 if (Instr->hasSideEffects() && eax) {

	1172 Inst *FakeUse = InstFakeUse::create(Func, eax);

	1173 Context.insert(FakeUse);

	1174 }

	1175

	1176 // Generate Dest=eax assignment.

	1177 if (Dest && eax) {

	1178 if (edx) {

	1179 split64(Dest);

	1180 Variable *DestLo = Dest->getLo();

	1181 Variable *DestHi = Dest->getHi();

	1182 DestLo->setPreferredRegister(eax, false);

	1183 DestHi->setPreferredRegister(edx, false);

	1184 _mov(DestLo, eax);

	1185 _mov(DestHi, edx);

	1186 } else {

	1187 Dest->setPreferredRegister(eax, false);

	1188 _mov(Dest, eax);

	1189 }

	1190 }

	1191

	1192 // Special treatment for an FP function which returns its result in

	1193 // st(0).

	1194 if (Dest &&

	1195 (Dest->getType() == IceType_f32 \|\| Dest->getType() == IceType_f64)) {

	1196 _fstp(Dest);

	1197 // If Dest ends up being a physical xmm register, the fstp emit

	1198 // code will route st(0) through a temporary stack slot.

	1199 }

	1200 }

	1201

	1202 void TargetX8632::lowerCast(const InstCast *Inst) {

	1203 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

	1204 InstCast::OpKind CastKind = Inst->getCastKind();

	1205 Variable *Dest = Inst->getDest();

	1206 // Src0RM is the source operand legalized to physical register or memory, but

	1207 // not immediate, since the relevant x86 native instructions don't allow an

	1208 // immediate operand. If the operand is an immediate, we could consider

	1209 // computing the strength-reduced result at translation time, but we're

	1210 // unlikely to see something like that in the bitcode that the optimizer

	1211 // wouldn't have already taken care of.

	1212 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem, true);

	1213 switch (CastKind) {

	1214 default:

	1215 Func->setError("Cast type not supported");

	1216 return;

	1217 case InstCast::Sext:

	1218 if (Dest->getType() == IceType_i64) {

	1219 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

	1220 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1221 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1222 Variable *T_Lo = makeReg(DestLo->getType());

	1223 if (Src0RM->getType() == IceType_i32)

	1224 _mov(T_Lo, Src0RM);

	1225 else

	1226 _movsx(T_Lo, Src0RM);

	1227 _mov(DestLo, T_Lo);

	1228 Variable *T_Hi = NULL;

	1229 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);

	1230 _mov(T_Hi, T_Lo);

	1231 _sar(T_Hi, Shift);

	1232 _mov(DestHi, T_Hi);

	1233 } else {

	1234 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and

	1235 // also copy to the high operand of a 64-bit variable.

	1236 // t1 = movsx src; dst = t1

	1237 Variable *T = makeReg(Dest->getType());

	1238 _movsx(T, Src0RM);

	1239 _mov(Dest, T);

	1240 }

	1241 break;

	1242 case InstCast::Zext:

	1243 if (Dest->getType() == IceType_i64) {

	1244 // t1=movzx src; dst.lo=t1; dst.hi=0

	1245 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1246 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1247 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1248 Variable *Tmp = makeReg(DestLo->getType());

	1249 if (Src0RM->getType() == IceType_i32)

	1250 _mov(Tmp, Src0RM);

	1251 else

	1252 _movzx(Tmp, Src0RM);

	1253 _mov(DestLo, Tmp);

	1254 _mov(DestHi, Zero);

	1255 } else if (Src0RM->getType() == IceType_i1) {

	1256 // t = Src0RM; t &= 1; Dest = t

	1257 Operand *One = Ctx->getConstantInt(IceType_i32, 1);

	1258 Variable *T = makeReg(IceType_i32);

	1259 _movzx(T, Src0RM);

	1260 _and(T, One);

	1261 _mov(Dest, T);

	1262 } else {

	1263 // t1 = movzx src; dst = t1

	1264 Variable *T = makeReg(Dest->getType());

	1265 _movzx(T, Src0RM);

	1266 _mov(Dest, T);

	1267 }

	1268 break;

	1269 case InstCast::Trunc: {

	1270 if (Src0RM->getType() == IceType_i64)

	1271 Src0RM = loOperand(Src0RM);

	1272 // t1 = trunc Src0RM; Dest = t1

	1273 Variable *T = NULL;

	1274 _mov(T, Src0RM);

	1275 _mov(Dest, T);

	1276 break;

	1277 }

	1278 case InstCast::Fptrunc:

	1279 case InstCast::Fpext: {

	1280 // t1 = cvt Src0RM; Dest = t1

	1281 Variable *T = makeReg(Dest->getType());

	1282 _cvt(T, Src0RM);

	1283 _mov(Dest, T);

	1284 break;

	1285 }

	1286 case InstCast::Fptosi:

	1287 if (Dest->getType() == IceType_i64) {

	1288 // Use a helper for converting floating-point values to 64-bit

	1289 // integers. SSE2 appears to have no way to convert from xmm

	1290 // registers to something like the edx:eax register pair, and

	1291 // gcc and clang both want to use x87 instructions complete with

	1292 // temporary manipulation of the status word. This helper is

	1293 // not needed for x86-64.

	1294 split64(Dest);

	1295 const SizeT MaxSrcs = 1;

	1296 Type SrcType = Inst->getSrc(0)->getType();

	1297 InstCall *Call = makeHelperCall(

	1298 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);

	1299 Call->addArg(Inst->getSrc(0));

	1300 lowerCall(Call);

	1301 } else {

	1302 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

	1303 Variable *T_1 = makeReg(IceType_i32);

	1304 Variable *T_2 = makeReg(Dest->getType());

	1305 _cvt(T_1, Src0RM);

	1306 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

	1307 _mov(Dest, T_2);

	1308 T_2->setPreferredRegister(T_1, true);

	1309 }

	1310 break;

	1311 case InstCast::Fptoui:

	1312 if (Dest->getType() == IceType_i64 \|\| Dest->getType() == IceType_i32) {

	1313 // Use a helper for both x86-32 and x86-64.

	1314 split64(Dest);

	1315 const SizeT MaxSrcs = 1;

	1316 Type DestType = Dest->getType();

	1317 Type SrcType = Src0RM->getType();

	1318 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");

	1319 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");

	1320 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64

	1321 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;

	1322 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

	1323 Call->addArg(Inst->getSrc(0));

	1324 lowerCall(Call);

	1325 return;

	1326 } else {

	1327 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

	1328 Variable *T_1 = makeReg(IceType_i32);

	1329 Variable *T_2 = makeReg(Dest->getType());

	1330 _cvt(T_1, Src0RM);

	1331 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

	1332 _mov(Dest, T_2);

	1333 T_2->setPreferredRegister(T_1, true);

	1334 }

	1335 break;

	1336 case InstCast::Sitofp:

	1337 if (Src0RM->getType() == IceType_i64) {

	1338 // Use a helper for x86-32.

	1339 const SizeT MaxSrcs = 1;

	1340 Type DestType = Dest->getType();

	1341 InstCall *Call = makeHelperCall(

	1342 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);

	1343 Call->addArg(Inst->getSrc(0));

	1344 lowerCall(Call);

	1345 return;

	1346 } else {

	1347 // Sign-extend the operand.

	1348 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2

	1349 Variable *T_1 = makeReg(IceType_i32);

	1350 Variable *T_2 = makeReg(Dest->getType());

	1351 if (Src0RM->getType() == IceType_i32)

	1352 _mov(T_1, Src0RM);

	1353 else

	1354 _movsx(T_1, Src0RM);

	1355 _cvt(T_2, T_1);

	1356 _mov(Dest, T_2);

	1357 }

	1358 break;

	1359 case InstCast::Uitofp:

	1360 if (Src0RM->getType() == IceType_i64 \|\| Src0RM->getType() == IceType_i32) {

	1361 // Use a helper for x86-32 and x86-64. Also use a helper for

	1362 // i32 on x86-32.

	1363 const SizeT MaxSrcs = 1;

	1364 Type DestType = Dest->getType();

	1365 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");

	1366 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");

	1367 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod

	1368 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;

	1369 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

	1370 Call->addArg(Inst->getSrc(0));

	1371 lowerCall(Call);

	1372 return;

	1373 } else {

	1374 // Zero-extend the operand.

	1375 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2

	1376 Variable *T_1 = makeReg(IceType_i32);

	1377 Variable *T_2 = makeReg(Dest->getType());

	1378 if (Src0RM->getType() == IceType_i32)

	1379 _mov(T_1, Src0RM);

	1380 else

	1381 _movzx(T_1, Src0RM);

	1382 _cvt(T_2, T_1);

	1383 _mov(Dest, T_2);

	1384 }

	1385 break;

	1386 case InstCast::Bitcast:

	1387 if (Dest->getType() == Src0RM->getType()) {

	1388 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);

	1389 lowerAssign(Assign);

	1390 llvm_unreachable("Pointer bitcasts aren't lowered correctly.");

	1391 return;

	1392 }

	1393 switch (Dest->getType()) {

	1394 default:

	1395 llvm_unreachable("Unexpected Bitcast dest type");

	1396 case IceType_i32:

	1397 case IceType_f32: {

	1398 Type DestType = Dest->getType();

	1399 Type SrcType = Src0RM->getType();

	1400 assert((DestType == IceType_i32 && SrcType == IceType_f32) \|\|

	1401 (DestType == IceType_f32 && SrcType == IceType_i32));

	1402 // a.i32 = bitcast b.f32 ==>

	1403 // t.f32 = b.f32

	1404 // s.f32 = spill t.f32

	1405 // a.i32 = s.f32

	1406 Variable *T = NULL;

	1407 // TODO: Should be able to force a spill setup by calling legalize() with

	1408 // Legal_Mem and not Legal_Reg or Legal_Imm.

	1409 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());

	1410 Spill->setWeight(RegWeight::Zero);

	1411 Spill->setPreferredRegister(Dest, true);

	1412 _mov(T, Src0RM);

	1413 _mov(Spill, T);

	1414 _mov(Dest, Spill);

	1415 } break;

	1416 case IceType_i64: {

	1417 assert(Src0RM->getType() == IceType_f64);

	1418 // a.i64 = bitcast b.f64 ==>

	1419 // s.f64 = spill b.f64

	1420 // t_lo.i32 = lo(s.f64)

	1421 // a_lo.i32 = t_lo.i32

	1422 // t_hi.i32 = hi(s.f64)

	1423 // a_hi.i32 = t_hi.i32

	1424 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());

	1425 Spill->setWeight(RegWeight::Zero);

	1426 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);

	1427 _mov(Spill, Src0RM);

	1428

	1429 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1430 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1431 Variable *T_Lo = makeReg(IceType_i32);

	1432 Variable *T_Hi = makeReg(IceType_i32);

	1433 VariableSplit *SpillLo =

	1434 VariableSplit::create(Func, Spill, VariableSplit::Low);

	1435 VariableSplit *SpillHi =

	1436 VariableSplit::create(Func, Spill, VariableSplit::High);

	1437

	1438 _mov(T_Lo, SpillLo);

	1439 _mov(DestLo, T_Lo);

	1440 _mov(T_Hi, SpillHi);

	1441 _mov(DestHi, T_Hi);

	1442 } break;

	1443 case IceType_f64: {

	1444 assert(Src0RM->getType() == IceType_i64);

	1445 // a.f64 = bitcast b.i64 ==>

	1446 // t_lo.i32 = b_lo.i32

	1447 // lo(s.f64) = t_lo.i32

	1448 // FakeUse(s.f64)

	1449 // t_hi.i32 = b_hi.i32

	1450 // hi(s.f64) = t_hi.i32

	1451 // a.f64 = s.f64

	1452 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());

	1453 Spill->setWeight(RegWeight::Zero);

	1454 Spill->setPreferredRegister(Dest, true);

	1455

	1456 Context.insert(InstFakeDef::create(Func, Spill));

	1457

	1458 Variable T_Lo = NULL, T_Hi = NULL;

	1459 VariableSplit *SpillLo =

	1460 VariableSplit::create(Func, Spill, VariableSplit::Low);

	1461 VariableSplit *SpillHi =

	1462 VariableSplit::create(Func, Spill, VariableSplit::High);

	1463 _mov(T_Lo, loOperand(Src0RM));

	1464 _store(T_Lo, SpillLo);

	1465 _mov(T_Hi, hiOperand(Src0RM));

	1466 _store(T_Hi, SpillHi);

	1467 _mov(Dest, Spill);

	1468 } break;

	1469 }

	1470 break;

	1471 }

	1472 }

	1473

	1474 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

	1475 Operand *Src0 = Inst->getSrc(0);

	1476 Operand *Src1 = Inst->getSrc(1);

	1477 Variable *Dest = Inst->getDest();

	1478 // Lowering a = fcmp cond, b, c

	1479 // ucomiss b, c /* only if C1 != Br_None */

	1480 // /* but swap b,c order if SwapOperands==true */

	1481 // mov a, <default>

	1482 // j<C1> label /* only if C1 != Br_None */

	1483 // j<C2> label /* only if C2 != Br_None */

	1484 // FakeUse(a) /* only if C1 != Br_None */

	1485 // mov a, !<default> /* only if C1 != Br_None */

	1486 // label: /* only if C1 != Br_None */

	1487 InstFcmp::FCond Condition = Inst->getCondition();

	1488 size_t Index = static_cast<size_t>(Condition);

	1489 assert(Index < TableFcmpSize);

	1490 // The table is indexed by InstFcmp::Condition. Make sure it didn't fall

	1491 // out of order.

	1492 if (TableFcmp[Index].SwapOperands) {

	1493 Operand *Tmp = Src0;

	1494 Src0 = Src1;

	1495 Src1 = Tmp;

	1496 }

	1497 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);

	1498 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);

	1499 if (HasC1) {

	1500 Src0 = legalize(Src0);

	1501 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

	1502 Variable *T = NULL;

	1503 _mov(T, Src0);

	1504 _ucomiss(T, Src1RM);

	1505 }

	1506 Constant *Default =

	1507 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);

	1508 _mov(Dest, Default);

	1509 if (HasC1) {

	1510 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1511 _br(TableFcmp[Index].C1, Label);

	1512 if (HasC2) {

	1513 _br(TableFcmp[Index].C2, Label);

	1514 }

	1515 Context.insert(InstFakeUse::create(Func, Dest));

	1516 Constant *NonDefault =

	1517 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);

	1518 _mov(Dest, NonDefault);

	1519 Context.insert(Label);

	1520 }

	1521 }

	1522

	1523 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

	1524 Operand *Src0 = legalize(Inst->getSrc(0));

	1525 Operand *Src1 = legalize(Inst->getSrc(1));

	1526 Variable *Dest = Inst->getDest();

	1527

	1528 // If Src1 is an immediate, or known to be a physical register, we can

	1529 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

	1530 // a physical register. (Actually, either Src0 or Src1 can be chosen for

	1531 // the physical register, but unfortunately we have to commit to one or

	1532 // the other before register allocation.)

	1533 bool IsSrc1ImmOrReg = false;

	1534 if (llvm::isa<Constant>(Src1))

	1535 IsSrc1ImmOrReg = true;

	1536 else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

	1537 if (Var->hasReg())

	1538 IsSrc1ImmOrReg = true;

	1539 }

	1540

	1541 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

	1542 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1543 Constant *One = Ctx->getConstantInt(IceType_i32, 1);

	1544 if (Src0->getType() == IceType_i64) {

	1545 InstIcmp::ICond Condition = Inst->getCondition();

	1546 size_t Index = static_cast<size_t>(Condition);

	1547 assert(Index < TableIcmp64Size);

	1548 // The table is indexed by InstIcmp::Condition. Make sure it didn't fall

	1549 // out of order.

	1550 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

	1551 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

	1552 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {

	1553 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1554 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));

	1555 _cmp(loOperand(Src0), Src1LoRI);

	1556 _br(InstX8632Br::Br_ne, Label);

	1557 _cmp(hiOperand(Src0), Src1HiRI);

	1558 _br(InstX8632Br::Br_ne, Label);

	1559 Context.insert(InstFakeUse::create(Func, Dest));

	1560 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));

	1561 Context.insert(Label);

	1562 } else {

	1563 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

	1564 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

	1565 _mov(Dest, One);

	1566 _cmp(hiOperand(Src0), Src1HiRI);

	1567 _br(TableIcmp64[Index].C1, LabelTrue);

	1568 _br(TableIcmp64[Index].C2, LabelFalse);

	1569 _cmp(loOperand(Src0), Src1LoRI);

	1570 _br(TableIcmp64[Index].C3, LabelTrue);

	1571 Context.insert(LabelFalse);

	1572 Context.insert(InstFakeUse::create(Func, Dest));

	1573 _mov(Dest, Zero);

	1574 Context.insert(LabelTrue);

	1575 }

	1576 return;

	1577 }

	1578 // cmp b, c

	1579 Operand *Src0New =

	1580 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

	1581 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1582 _cmp(Src0New, Src1);

	1583 _mov(Dest, One);

	1584 _br(getIcmp32Mapping(Inst->getCondition()), Label);

	1585 Context.insert(InstFakeUse::create(Func, Dest));

	1586 _mov(Dest, Zero);

	1587 Context.insert(Label);

	1588 }

	1589

	1590 void TargetX8632::lowerLoad(const InstLoad *Inst) {

	1591 // A Load instruction can be treated the same as an Assign

	1592 // instruction, after the source operand is transformed into an

	1593 // OperandX8632Mem operand. Note that the address mode

	1594 // optimization already creates an OperandX8632Mem operand, so it

	1595 // doesn't need another level of transformation.

	1596 Type Ty = Inst->getDest()->getType();

	1597 Operand *Src0 = Inst->getSourceAddress();

	1598 // Address mode optimization already creates an OperandX8632Mem

	1599 // operand, so it doesn't need another level of transformation.

	1600 if (!llvm::isa<OperandX8632Mem>(Src0)) {

	1601 Variable *Base = llvm::dyn_cast<Variable>(Src0);

	1602 Constant *Offset = llvm::dyn_cast<Constant>(Src0);

	1603 assert(Base \|\| Offset);

	1604 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);

	1605 }

	1606

	1607 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);

	1608 lowerAssign(Assign);

	1609 }

	1610

	1611 void TargetX8632::lowerPhi(const InstPhi * /Inst/) {

	1612 Func->setError("Phi lowering not implemented");

	1613 }

	1614

	1615 void TargetX8632::lowerRet(const InstRet *Inst) {

	1616 Variable *Reg = NULL;

	1617 if (Inst->hasRetValue()) {

	1618 Operand *Src0 = legalize(Inst->getRetValue());

	1619 if (Src0->getType() == IceType_i64) {

	1620 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);

	1621 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);

	1622 Reg = eax;

	1623 Context.insert(InstFakeUse::create(Func, edx));

	1624 } else if (Src0->getType() == IceType_f32 \|\|

	1625 Src0->getType() == IceType_f64) {

	1626 _fld(Src0);

	1627 } else {

	1628 _mov(Reg, Src0, Reg_eax);

	1629 }

	1630 }

	1631 _ret(Reg);

	1632 // Add a fake use of esp to make sure esp stays alive for the entire

	1633 // function. Otherwise post-call esp adjustments get dead-code

	1634 // eliminated. TODO: Are there more places where the fake use

	1635 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

	1636 // have a ret instruction.

	1637 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

	1638 Context.insert(InstFakeUse::create(Func, esp));

	1639 }

	1640

	1641 void TargetX8632::lowerSelect(const InstSelect *Inst) {

	1642 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:

	1643 Variable *Dest = Inst->getDest();

	1644 Operand *SrcT = Inst->getTrueOperand();

	1645 Operand *SrcF = Inst->getFalseOperand();

	1646 Operand *Condition = legalize(Inst->getCondition());

	1647 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1648 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1649

	1650 if (Dest->getType() == IceType_i64) {

	1651 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1652 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1653 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg \| Legal_Imm, true);

	1654 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Imm, true);

	1655 _cmp(Condition, Zero);

	1656 _mov(DestLo, SrcLoRI);

	1657 _mov(DestHi, SrcHiRI);

	1658 _br(InstX8632Br::Br_ne, Label);

	1659 Context.insert(InstFakeUse::create(Func, DestLo));

	1660 Context.insert(InstFakeUse::create(Func, DestHi));

	1661 Operand *SrcFLo = loOperand(SrcF);

	1662 Operand *SrcFHi = hiOperand(SrcF);

	1663 SrcLoRI = legalize(SrcFLo, Legal_Reg \| Legal_Imm, true);

	1664 SrcHiRI = legalize(SrcFHi, Legal_Reg \| Legal_Imm, true);

	1665 _mov(DestLo, SrcLoRI);

	1666 _mov(DestHi, SrcHiRI);

	1667 } else {

	1668 _cmp(Condition, Zero);

	1669 SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm, true);

	1670 _mov(Dest, SrcT);

	1671 _br(InstX8632Br::Br_ne, Label);

	1672 Context.insert(InstFakeUse::create(Func, Dest));

	1673 SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm, true);

	1674 _mov(Dest, SrcF);

	1675 }

	1676

	1677 Context.insert(Label);

	1678 }

	1679

	1680 void TargetX8632::lowerStore(const InstStore *Inst) {

	1681 Operand *Value = Inst->getData();

	1682 Operand *Addr = Inst->getAddr();

	1683 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);

	1684 // Address mode optimization already creates an OperandX8632Mem

	1685 // operand, so it doesn't need another level of transformation.

	1686 if (!NewAddr) {

	1687 // The address will be either a constant (which represents a global

	1688 // variable) or a variable, so either the Base or Offset component

	1689 // of the OperandX8632Mem will be set.

	1690 Variable *Base = llvm::dyn_cast<Variable>(Addr);

	1691 Constant *Offset = llvm::dyn_cast<Constant>(Addr);

	1692 assert(Base \|\| Offset);

	1693 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);

	1694 }

	1695 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));

	1696

	1697 if (NewAddr->getType() == IceType_i64) {

	1698 Value = legalize(Value);

	1699 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm, true);

	1700 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm, true);

	1701 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));

	1702 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));

	1703 } else {

	1704 Value = legalize(Value, Legal_Reg \| Legal_Imm, true);

	1705 _store(Value, NewAddr);

	1706 }

	1707 }

	1708

	1709 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

	1710 // This implements the most naive possible lowering.

	1711 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

	1712 Operand *Src0 = Inst->getComparison();

	1713 SizeT NumCases = Inst->getNumCases();

	1714 // OK, we'll be slightly less naive by forcing Src into a physical

	1715 // register if there are 2 or more uses.

	1716 if (NumCases >= 2)

	1717 Src0 = legalizeToVar(Src0, true);

	1718 else

	1719 Src0 = legalize(Src0, Legal_All, true);

	1720 for (SizeT I = 0; I < NumCases; ++I) {

	1721 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));

	1722 _cmp(Src0, Value);

	1723 _br(InstX8632Br::Br_e, Inst->getLabel(I));

	1724 }

	1725

	1726 _br(Inst->getLabelDefault());

	1727 }

	1728

	1729 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {

	1730 const SizeT MaxSrcs = 0;

	1731 Variable *Dest = NULL;

	1732 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);

	1733 lowerCall(Call);

	1734 }

	1735

	1736 Operand TargetX8632::legalize(Operand From, LegalMask Allowed,

	1737 bool AllowOverlap, int32_t RegNum) {

	1738 assert(Allowed & Legal_Reg);

	1739 assert(RegNum == Variable::NoRegister \|\| Allowed == Legal_Reg);

	1740 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {

	1741 Variable *Base = Mem->getBase();

	1742 Variable *Index = Mem->getIndex();

	1743 Variable *RegBase = Base;

	1744 Variable *RegIndex = Index;

	1745 if (Base) {

	1746 RegBase = legalizeToVar(Base, true);

	1747 }

	1748 if (Index) {

	1749 RegIndex = legalizeToVar(Index, true);

	1750 }

	1751 if (Base != RegBase \|\| Index != RegIndex) {

	1752 From =

	1753 OperandX8632Mem::create(Func, Mem->getType(), RegBase,

	1754 Mem->getOffset(), RegIndex, Mem->getShift());

	1755 }

	1756

	1757 if (!(Allowed & Legal_Mem)) {

	1758 Variable *Reg = makeReg(From->getType(), RegNum);

	1759 _mov(Reg, From, RegNum);

	1760 From = Reg;

	1761 }

	1762 return From;

	1763 }

	1764 if (llvm::isa<Constant>(From)) {

	1765 if (!(Allowed & Legal_Imm)) {

	1766 Variable *Reg = makeReg(From->getType(), RegNum);

	1767 _mov(Reg, From);

	1768 From = Reg;

	1769 }

	1770 return From;

	1771 }

	1772 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {

	1773 // We need a new physical register for the operand if:

	1774 // Mem is not allowed and Var->getRegNum() is unknown, or

	1775 // RegNum is required and Var->getRegNum() doesn't match.

	1776 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) \|\|

	1777 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

	1778 Variable *Reg = makeReg(From->getType(), RegNum);

	1779 if (RegNum == Variable::NoRegister) {

	1780 Reg->setPreferredRegister(Var, AllowOverlap);

	1781 }

	1782 _mov(Reg, From);

	1783 From = Reg;

	1784 }

	1785 return From;

	1786 }

	1787 llvm_unreachable("Unhandled operand kind in legalize()");

	1788 return From;

	1789 }

	1790

	1791 Variable TargetX8632::legalizeToVar(Operand From, bool AllowOverlap,

	1792 int32_t RegNum) {

	1793 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));

	1794 }

	1795

	1796 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

	1797 Variable *Reg = Func->makeVariable(Type, Context.getNode());

	1798 if (RegNum == Variable::NoRegister)

	1799 Reg->setWeightInfinite();

	1800 else

	1801 Reg->setRegNum(RegNum);

	1802 return Reg;

	1803 }

	1804

	1805 void TargetX8632::postLower() {

	1806 if (Ctx->getOptLevel() != Opt_m1)

	1807 return;

	1808 // TODO: Avoid recomputing WhiteList every instruction.

	1809 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);

	1810 // Make one pass to black-list pre-colored registers. TODO: If

	1811 // there was some prior register allocation pass that made register

	1812 // assignments, those registers need to be black-listed here as

	1813 // well.

	1814 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;

	1815 ++I) {

	1816 const Inst Inst = I;

	1817 if (Inst->isDeleted())

	1818 continue;

	1819 if (llvm::isa<InstFakeKill>(Inst))

	1820 continue;

	1821 SizeT VarIndex = 0;

	1822 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {

	1823 Operand *Src = Inst->getSrc(SrcNum);

	1824 SizeT NumVars = Src->getNumVars();

	1825 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {

	1826 const Variable *Var = Src->getVar(J);

	1827 if (!Var->hasReg())

	1828 continue;

	1829 WhiteList[Var->getRegNum()] = false;

	1830 }

	1831 }

	1832 }

	1833 // The second pass colors infinite-weight variables.

	1834 llvm::SmallBitVector AvailableRegisters = WhiteList;

	1835 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;

	1836 ++I) {

	1837 const Inst Inst = I;

	1838 if (Inst->isDeleted())

	1839 continue;

	1840 SizeT VarIndex = 0;

	1841 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {

	1842 Operand *Src = Inst->getSrc(SrcNum);

	1843 SizeT NumVars = Src->getNumVars();

	1844 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {

	1845 Variable *Var = Src->getVar(J);

	1846 if (Var->hasReg())

	1847 continue;

	1848 if (!Var->getWeight().isInf())

	1849 continue;

	1850 llvm::SmallBitVector AvailableTypedRegisters =

	1851 AvailableRegisters & getRegisterSetForType(Var->getType());

	1852 if (!AvailableTypedRegisters.any()) {

	1853 // This is a hack in case we run out of physical registers

	1854 // due to an excessive number of "push" instructions from

	1855 // lowering a call.

	1856 AvailableRegisters = WhiteList;

	1857 AvailableTypedRegisters =

	1858 AvailableRegisters & getRegisterSetForType(Var->getType());

	1859 }

	1860 assert(AvailableTypedRegisters.any());

	1861 int32_t RegNum = AvailableTypedRegisters.find_first();

	1862 Var->setRegNum(RegNum);

	1863 AvailableRegisters[RegNum] = false;

	1864 }

	1865 }

	1866 }

	1867 }

	1868

	1869 } // end of namespace Ice

OLD	NEW

« src/IceTargetLoweringX8632.h ('K') | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | no next file with comments »