src/IceTargetLoweringX8632.cpp - Issue 265703002: Add Om1 lowering with no optimizations

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 265703002: Add Om1 lowering with no optimizations (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Add frem test; add LOWERING.rst file Created 6 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

	2 //

	3 // The Subzero Code Generator

	4 //

	5 // This file is distributed under the University of Illinois Open Source

	6 // License. See LICENSE.TXT for details.

	7 //

	8 //===----------------------------------------------------------------------===//

	9 //

	10 // This file implements the TargetLoweringX8632 class, which

	11 // consists almost entirely of the lowering sequence for each

	12 // high-level instruction. It also implements

	13 // TargetX8632Fast::postLower() which does the simplest possible

	14 // register allocation for the "fast" target.

	15 //

	16 //===----------------------------------------------------------------------===//

	17

	18 #include "IceDefs.h"

	19 #include "IceCfg.h"

	20 #include "IceCfgNode.h"

	21 #include "IceInstX8632.h"

	22 #include "IceOperand.h"

	23 #include "IceTargetLoweringX8632.def"

	24 #include "IceTargetLoweringX8632.h"

	25

	26 namespace Ice {

	27

	28 namespace {

	29

	30 // The following table summarizes the logic for lowering the fcmp instruction.

	31 // There is one table entry for each of the 16 conditions. A comment in

	32 // lowerFcmp() describes the lowering template. In the most general case, there

	33 // is a compare followed by two conditional branches, because some fcmp

	34 // conditions don't map to a single x86 conditional branch. However, in many

	35 // cases it is possible to swap the operands in the comparison and have a single

	36 // conditional branch. Since it's quite tedious to validate the table by hand,

	37 // good execution tests are helpful.

	38

	39 const struct TableFcmp_ {

	40 uint32_t Default;

	41 bool SwapOperands;

	42 InstX8632Br::BrCond C1, C2;

	43 } TableFcmp[] = {

	44 #define X(val, dflt, swap, C1, C2) \

	45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \

	46 ,

	47 FCMPX8632_TABLE

	48 #undef X

	49 };

	50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

	51

	52 // The following table summarizes the logic for lowering the icmp instruction

	53 // for i32 and narrower types. Each icmp condition has a clear mapping to an

	54 // x86 conditional branch instruction.

	55

	56 const struct TableIcmp32_ {

	57 InstX8632Br::BrCond Mapping;

	58 } TableIcmp32[] = {

	59 #define X(val, C_32, C1_64, C2_64, C3_64) \

	60 { InstX8632Br::C_32 } \

	61 ,

	62 ICMPX8632_TABLE

	63 #undef X

	64 };

	65 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);

	66

	67 // The following table summarizes the logic for lowering the icmp instruction

	68 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and

	69 // conditional branches are needed. For the other conditions, three separate

	70 // conditional branches are needed.

	71 const struct TableIcmp64_ {

	72 InstX8632Br::BrCond C1, C2, C3;

	73 } TableIcmp64[] = {

	74 #define X(val, C_32, C1_64, C2_64, C3_64) \

	75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \

	76 ,

	77 ICMPX8632_TABLE

	78 #undef X

	79 };

	80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);

	81

	82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

	83 size_t Index = static_cast<size_t>(Cond);

	84 assert(Index < TableIcmp32Size);

	85 return TableIcmp32[Index].Mapping;

	86 }

	87

	88 // In some cases, there are x-macros tables for both high-level and

	89 // low-level instructions/operands that use the same enum key value.

	90 // The tables are kept separate to maintain a proper separation

	91 // between abstraction layers. There is a risk that the tables

	92 // could get out of sync if enum values are reordered or if entries

	93 // are added or deleted. This dummy function uses static_assert to

	94 // ensure everything is kept in sync.

	95 void xMacroIntegrityCheck() {

	96 // Validate the enum values in FCMPX8632_TABLE.

	97 {

	98 // Define a temporary set of enum values based on low-level

	99 // table entries.

	100 enum _tmp_enum {

	101 #define X(val, dflt, swap, C1, C2) _tmp_##val,

	102 FCMPX8632_TABLE

	103 #undef X

	104 };

	105 // Define a set of constants based on high-level table entries.

	106 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

	107 ICEINSTFCMP_TABLE;

	108 #undef X

	109 // Define a set of constants based on low-level table entries,

	110 // and ensure the table entry keys are consistent.

	111 #define X(val, dflt, swap, C1, C2) \

	112 static const int _table2_##val = _tmp_##val; \

	113 STATIC_ASSERT(_table1_##val == _table2_##val);

	114 FCMPX8632_TABLE;

	115 #undef X

	116 // Repeat the static asserts with respect to the high-level

	117 // table entries in case the high-level table has extra entries.

	118 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

	119 ICEINSTFCMP_TABLE;

	120 #undef X

	121 }

	122

	123 // Validate the enum values in ICMPX8632_TABLE.

	124 {

	125 // Define a temporary set of enum values based on low-level

	126 // table entries.

	127 enum _tmp_enum {

	128 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

	129 ICMPX8632_TABLE

	130 #undef X

	131 };

	132 // Define a set of constants based on high-level table entries.

	133 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

	134 ICEINSTICMP_TABLE;

	135 #undef X

	136 // Define a set of constants based on low-level table entries,

	137 // and ensure the table entry keys are consistent.

	138 #define X(val, C_32, C1_64, C2_64, C3_64) \

	139 static const int _table2_##val = _tmp_##val; \

	140 STATIC_ASSERT(_table1_##val == _table2_##val);

	141 ICMPX8632_TABLE;

	142 #undef X

	143 // Repeat the static asserts with respect to the high-level

	144 // table entries in case the high-level table has extra entries.

	145 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

	146 ICEINSTICMP_TABLE;

	147 #undef X

	148 }

	149

	150 // Validate the enum values in ICETYPEX8632_TABLE.

	151 {

	152 // Define a temporary set of enum values based on low-level

	153 // table entries.

	154 enum _tmp_enum {

	155 #define X(tag, cvt, sdss, width) _tmp_##tag,

	156 ICETYPEX8632_TABLE

	157 #undef X

	158 };

	159 // Define a set of constants based on high-level table entries.

	160 #define X(tag, size, align, str) static const int _table1_##tag = tag;

	161 ICETYPE_TABLE;

	162 #undef X

	163 // Define a set of constants based on low-level table entries,

	164 // and ensure the table entry keys are consistent.

	165 #define X(tag, cvt, sdss, width) \

	166 static const int _table2_##tag = _tmp_##tag; \

	167 STATIC_ASSERT(_table1_##tag == _table2_##tag);

	168 ICETYPEX8632_TABLE;

	169 #undef X

	170 // Repeat the static asserts with respect to the high-level

	171 // table entries in case the high-level table has extra entries.

	172 #define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

	173 ICETYPE_TABLE;

	174 #undef X

	175 }

	176 }

	177

	178 } // end of anonymous namespace

	179

	180 TargetX8632::TargetX8632(Cfg *Func)

	181 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),

	182 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),

	183 PhysicalRegisters(VarList(Reg_NUM)) {

	184 llvm::SmallBitVector IntegerRegisters(Reg_NUM);

	185 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);

	186 llvm::SmallBitVector FloatRegisters(Reg_NUM);

	187 llvm::SmallBitVector InvalidRegisters(Reg_NUM);

	188 ScratchRegs.resize(Reg_NUM);

	189 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	190 frameptr, isI8, isInt, isFP) \

	191 IntegerRegisters[val] = isInt; \

	192 IntegerRegistersI8[val] = isI8; \

	193 FloatRegisters[val] = isFP; \

	194 ScratchRegs[val] = scratch;

	195 REGX8632_TABLE;

	196 #undef X

	197 TypeToRegisterSet[IceType_void] = InvalidRegisters;
	jvoung (off chromium) 2014/05/15 23:47:34 Maybe at some point some of this be initialized on Maybe at some point some of this be initialized once and for all instead of for each Func? We don't support different calling conventions, so the ScratchRegs (for example) won't vary across functions, and something decided later like IsEbpBasedFrame doesn't affect these. Jim Stichnoth 2014/05/17 14:14:32 This is all true. For now, I'd like to leave a TO Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > Maybe at some point some of this be initialized once and for all instead of for > each Func? > > We don't support different calling conventions, so the ScratchRegs (for example) > won't vary across functions, and something decided later like IsEbpBasedFrame > doesn't affect these. This is all true. For now, I'd like to leave a TODO about factoring this out into some sort of static or pseudo-static initializer. The problem is that TargetX8632 isn't determined or referenced until TargetLowering::createLowering() is called during the Cfg constructor. Probably some sort of CRTP-like magic would be appropriate here. Alternatively, do a locked first-time initialization in every TargetX8632 ctor, but I'd prefer to keep locking contained within GlobalContext.
	198 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;

	199 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;

	200 TypeToRegisterSet[IceType_i16] = IntegerRegisters;

	201 TypeToRegisterSet[IceType_i32] = IntegerRegisters;

	202 TypeToRegisterSet[IceType_i64] = IntegerRegisters;

	203 TypeToRegisterSet[IceType_f32] = FloatRegisters;

	204 TypeToRegisterSet[IceType_f64] = FloatRegisters;

	205 }

	206

	207 void TargetX8632::translateOm1() {

	208 GlobalContext *Context = Func->getContext();

	209 Ostream &Str = Context->getStrDump();

	210 Timer T_placePhiLoads;

	211 Func->placePhiLoads();

	212 if (Func->hasError())

	213 return;

	214 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");

	215 Timer T_placePhiStores;

	216 Func->placePhiStores();

	217 if (Func->hasError())

	218 return;

	219 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");

	220 Timer T_deletePhis;

	221 Func->deletePhis();

	222 if (Func->hasError())

	223 return;

	224 T_deletePhis.printElapsedUs(Context, "deletePhis()");

	225 if (Context->isVerbose())

	226 Str << "================ After Phi lowering ================\n";

	227 Func->dump();

	228

	229 Timer T_genCode;

	230 Func->genCode();

	231 if (Func->hasError())

	232 return;

	233 T_genCode.printElapsedUs(Context, "genCode()");

	234 if (Context->isVerbose())

	235 Str << "================ After initial x8632 codegen ================\n";

	236 Func->dump();

	237

	238 Timer T_genFrame;

	239 Func->genFrame();

	240 if (Func->hasError())

	241 return;

	242 T_genFrame.printElapsedUs(Context, "genFrame()");

	243 if (Context->isVerbose())

	244 Str << "================ After stack frame mapping ================\n";

	245 Func->dump();

	246 }

	247

	248 IceString TargetX8632::RegNames[] = {

	249 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	250 frameptr, isI8, isInt, isFP) \

	251 name,

	252 REGX8632_TABLE

	253 #undef X

	254 };

	255

	256 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {

	257 assert(RegNum < PhysicalRegisters.size());

	258 Variable *Reg = PhysicalRegisters[RegNum];

	259 if (Reg == NULL) {

	260 CfgNode *Node = NULL; // NULL means multi-block lifetime

	261 Reg = Func->makeVariable(IceType_i32, Node);

	262 Reg->setRegNum(RegNum);

	263 PhysicalRegisters[RegNum] = Reg;

	264 }

	265 return Reg;

	266 }

	267

	268 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {

	269 assert(RegNum < Reg_NUM);

	270 static IceString RegNames8[] = {

	271 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	272 frameptr, isI8, isInt, isFP) \

	273 "" name8,

	274 REGX8632_TABLE

	275 #undef X

	276 };

	277 static IceString RegNames16[] = {

	278 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	279 frameptr, isI8, isInt, isFP) \

	280 "" name16,

	281 REGX8632_TABLE

	282 #undef X

	283 };

	284 switch (Ty) {

	285 case IceType_i1:

	286 case IceType_i8:

	287 return RegNames8[RegNum];

	288 case IceType_i16:

	289 return RegNames16[RegNum];

	290 default:

	291 return RegNames[RegNum];

	292 }

	293 }

	294

	295 void TargetX8632::emitVariable(const Variable Var, const Cfg Func) const {

	296 Ostream &Str = Ctx->getStrEmit();

	297 assert(Var->getLocalUseNode() == NULL \|\|

	298 Var->getLocalUseNode() == Func->getCurrentNode());

	299 if (Var->hasReg()) {

	300 Str << getRegName(Var->getRegNum(), Var->getType());

	301 return;

	302 }

	303 Str << InstX8632::getWidthString(Var->getType());

	304 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);

	305 int32_t Offset = Var->getStackOffset() + getStackAdjustment();

	306 if (Offset) {

	307 if (Offset > 0)

	308 Str << "+";

	309 Str << Offset;

	310 }

	311 Str << "]";

	312 }

	313

	314 // Helper function for addProlog(). Sets the frame offset for Arg,

	315 // updates InArgsSizeBytes according to Arg's width, and generates an

	316 // instruction to copy Arg into its assigned register if applicable.

	317 // For an I64 arg that has been split into Lo and Hi components, it

	318 // calls itself recursively on the components, taking care to handle

	319 // Lo first because of the little-endian architecture.

	320 void TargetX8632::setArgOffsetAndCopy(Variable Arg, Variable FramePtr,

	321 int32_t BasicFrameOffset,

	322 int32_t &InArgsSizeBytes) {

	323 Variable *Lo = Arg->getLo();

	324 Variable *Hi = Arg->getHi();

	325 Type Ty = Arg->getType();

	326 if (Lo && Hi && Ty == IceType_i64) {

	327 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

	328 assert(Hi->getType() != IceType_i64); // don't want infinite recursion

	329 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	330 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	331 return;

	332 }

	333 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

	334 if (Arg->hasReg()) {

	335 assert(Ty != IceType_i64);

	336 OperandX8632Mem *Mem = OperandX8632Mem::create(

	337 Func, Ty, FramePtr,

	338 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));

	339 _mov(Arg, Mem);

	340 }

	341 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

	342 }

	343

	344 void TargetX8632::addProlog(CfgNode *Node) {

	345 // If SimpleCoalescing is false, each variable without a register

	346 // gets its own unique stack slot, which leads to large stack

	347 // frames. If SimpleCoalescing is true, then each "global" variable

	348 // without a register gets its own slot, but "local" variable slots

	349 // are reused across basic blocks. E.g., if A and B are local to

	350 // block 1 and C is local to block 2, then C may share a slot with A

	351 // or B.

	352 const bool SimpleCoalescing = true;
	jvoung (off chromium) 2014/05/15 23:47:34 Doesn't seem like this will ever be set to false. Doesn't seem like this will ever be set to false. Is there a plan to add a flag to control this (or control through the optlevel)? Jim Stichnoth 2014/05/17 14:14:32 I didn't plan to expose this by a flag, since Simp Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > Doesn't seem like this will ever be set to false. Is there a plan to add a flag > to control this (or control through the optlevel)? I didn't plan to expose this by a flag, since SimpleCoalescing is pretty much always a win. The intention is to keep it internal just for benchmarking.
	353 int32_t InArgsSizeBytes = 0;

	354 int32_t RetIpSizeBytes = 4;

	355 int32_t PreservedRegsSizeBytes = 0;

	356 LocalsSizeBytes = 0;

	357 Context.init(Node);

	358 Context.setInsertPoint(Context.getCur());

	359

	360 // Determine stack frame offsets for each Variable without a

	361 // register assignment. This can be done as one variable per stack

	362 // slot. Or, do coalescing by running the register allocator again

	363 // with an infinite set of registers (as a side effect, this gives

	364 // variables a second chance at physical register assignment).

	365 //

	366 // A middle ground approach is to leverage sparsity and allocate one

	367 // block of space on the frame for globals (variables with

	368 // multi-block lifetime), and one block to share for locals

	369 // (single-block lifetime).

	370

	371 llvm::SmallBitVector CalleeSaves =

	372 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	373

	374 int32_t GlobalsSize = 0;

	375 std::vector<int> LocalsSize(Func->getNumNodes());

	376

	377 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and

	378 // LocalsSizeBytes.

	379 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

	380 const VarList &Variables = Func->getVariables();

	381 const VarList &Args = Func->getArgs();

	382 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

	383 I != E; ++I) {

	384 Variable Var = I;

	385 if (Var->hasReg()) {

	386 RegsUsed[Var->getRegNum()] = true;

	387 continue;

	388 }

	389 // An argument passed on the stack already has a stack slot.

	390 if (Var->getIsArg())

	391 continue;

	392 // A spill slot linked to a variable with a stack slot should reuse

	393 // that stack slot.

	394 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

	395 if (Variable *Linked = Var->getPreferredRegister()) {

	396 if (!Linked->hasReg())

	397 continue;

	398 }

	399 }

	400 int32_t Increment = typeWidthInBytesOnStack(Var->getType());

	401 if (SimpleCoalescing) {

	402 if (Var->isMultiblockLife()) {

	403 GlobalsSize += Increment;

	404 } else {

	405 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

	406 LocalsSize[NodeIndex] += Increment;

	407 if (LocalsSize[NodeIndex] > LocalsSizeBytes)

	408 LocalsSizeBytes = LocalsSize[NodeIndex];

	409 }

	410 } else {

	411 LocalsSizeBytes += Increment;

	412 }

	413 }

	414 LocalsSizeBytes += GlobalsSize;

	415

	416 // Add push instructions for preserved registers.

	417 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	418 if (CalleeSaves[i] && RegsUsed[i]) {

	419 PreservedRegsSizeBytes += 4;

	420 const bool SuppressStackAdjustment = true;

	421 _push(getPhysicalRegister(i), SuppressStackAdjustment);

	422 }

	423 }

	424

	425 // Generate "push ebp; mov ebp, esp"

	426 if (IsEbpBasedFrame) {

	427 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

	428 .count() == 0);

	429 PreservedRegsSizeBytes += 4;

	430 Variable *ebp = getPhysicalRegister(Reg_ebp);

	431 Variable *esp = getPhysicalRegister(Reg_esp);

	432 const bool SuppressStackAdjustment = true;

	433 _push(ebp, SuppressStackAdjustment);

	434 _mov(ebp, esp);

	435 }

	436

	437 // Generate "sub esp, LocalsSizeBytes"

	438 if (LocalsSizeBytes)

	439 _sub(getPhysicalRegister(Reg_esp),

	440 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

	441

	442 resetStackAdjustment();
	jvoung (off chromium) 2014/05/15 23:47:34 question: when will the StackAdjustment be non-zer question: when will the StackAdjustment be non-zero at this point (and need resetStackAdjustment())? The _push() that are added here have SuppressStackAdjustment = true. Jim Stichnoth 2014/05/17 14:14:32 Currently it should always be zero at this point. Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > question: when will the StackAdjustment be non-zero at this point (and need > resetStackAdjustment())? The _push() that are added here have > SuppressStackAdjustment = true. Currently it should always be zero at this point. This explicit reset is just in case _sub() gets clever enough in the future to update the adjustment, such as in the preceding statement.
	443

	444 // Fill in stack offsets for args, and copy args into registers for

	445 // those that were register-allocated. Args are pushed right to

	446 // left, so Arg[0] is closest to the stack/frame pointer.

	447 //

	448 // TODO: Make this right for different width args, calling

	449 // conventions, etc. For one thing, args passed in registers will

	450 // need to be copied/shuffled to their home registers (the

	451 // RegManager code may have some permutation logic to leverage),

	452 // and if they have no home register, home space will need to be

	453 // allocated on the stack to copy into.

	454 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

	455 int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;

	456 if (!IsEbpBasedFrame)

	457 BasicFrameOffset += LocalsSizeBytes;

	458 for (SizeT i = 0; i < Args.size(); ++i) {

	459 Variable *Arg = Args[i];

	460 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	461 }

	462

	463 // Fill in stack offsets for locals.

	464 int32_t TotalGlobalsSize = GlobalsSize;

	465 GlobalsSize = 0;

	466 LocalsSize.assign(LocalsSize.size(), 0);

	467 int32_t NextStackOffset = 0;

	468 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

	469 I != E; ++I) {

	470 Variable Var = I;

	471 if (Var->hasReg()) {

	472 RegsUsed[Var->getRegNum()] = true;

	473 continue;

	474 }

	475 if (Var->getIsArg())

	476 continue;

	477 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

	478 if (Variable *Linked = Var->getPreferredRegister()) {

	479 if (!Linked->hasReg()) {

	480 // TODO: Make sure Linked has already been assigned a stack

	481 // slot.

	482 Var->setStackOffset(Linked->getStackOffset());

	483 continue;

	484 }

	485 }

	486 }

	487 int32_t Increment = typeWidthInBytesOnStack(Var->getType());

	488 if (SimpleCoalescing) {

	489 if (Var->isMultiblockLife()) {

	490 GlobalsSize += Increment;

	491 NextStackOffset = GlobalsSize;

	492 } else {

	493 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

	494 LocalsSize[NodeIndex] += Increment;

	495 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];

	496 }

	497 } else {

	498 NextStackOffset += Increment;

	499 }

	500 if (IsEbpBasedFrame)

	501 Var->setStackOffset(-NextStackOffset);

	502 else

	503 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);

	504 }

	505 this->FrameSizeLocals = NextStackOffset;

	506 this->HasComputedFrame = true;

	507

	508 if (Func->getContext()->isVerbose(IceV_Frame)) {

	509 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes

	510 << "\n"

	511 << "InArgsSizeBytes=" << InArgsSizeBytes

	512 << "\n"

	513 << "PreservedRegsSizeBytes="

	514 << PreservedRegsSizeBytes << "\n";

	515 }

	516 }

	517

	518 void TargetX8632::addEpilog(CfgNode *Node) {

	519 InstList &Insts = Node->getInsts();

	520 InstList::reverse_iterator RI, E;

	521 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

	522 if (llvm::isa<InstX8632Ret>(*RI))

	523 break;
	jvoung (off chromium) 2014/05/15 23:47:34 What are possible instructions at the end of a CFG What are possible instructions at the end of a CFG node that aren't Ret? (can't just take the last element of the InstList?) Jim Stichnoth 2014/05/17 14:14:32 TargetX8632::lowerRet() adds a FakeUse of esp at t Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > What are possible instructions at the end of a CFG node that aren't Ret? > (can't just take the last element of the InstList?) TargetX8632::lowerRet() adds a FakeUse of esp at the end, to make sure the various esp frame adjustments don't get eliminated.
	524 }

	525 if (RI == E)

	526 return;

	527

	528 // Convert the reverse_iterator position into its corresponding

	529 // (forward) iterator position.

	530 InstList::iterator InsertPoint = RI.base();

	531 --InsertPoint;

	532 Context.init(Node);

	533 Context.setInsertPoint(InsertPoint);

	534

	535 Variable *esp = getPhysicalRegister(Reg_esp);

	536 if (IsEbpBasedFrame) {

	537 Variable *ebp = getPhysicalRegister(Reg_ebp);

	538 _mov(esp, ebp);

	539 _pop(ebp);

	540 } else {

	541 // add esp, LocalsSizeBytes

	542 if (LocalsSizeBytes)

	543 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

	544 }

	545

	546 // Add pop instructions for preserved registers.

	547 llvm::SmallBitVector CalleeSaves =

	548 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	549 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	550 SizeT j = CalleeSaves.size() - i - 1;

	551 if (j == Reg_ebp && IsEbpBasedFrame)

	552 continue;

	553 if (CalleeSaves[j] && RegsUsed[j]) {

	554 _pop(getPhysicalRegister(j));

	555 }

	556 }

	557 }

	558

	559 void TargetX8632::split64(Variable *Var) {

	560 switch (Var->getType()) {

	561 default:

	562 return;

	563 case IceType_i64:

	564 // TODO: Only consider F64 if we need to push each half when

	565 // passing as an argument to a function call. Note that each half

	566 // is still typed as I32.

	567 case IceType_f64:

	568 break;

	569 }

	570 Variable *Lo = Var->getLo();

	571 Variable *Hi = Var->getHi();

	572 if (Lo) {

	573 assert(Hi);

	574 return;

	575 }

	576 assert(Hi == NULL);

	577 Lo = Func->makeVariable(IceType_i32, Context.getNode(),

	578 Var->getName() + "__lo");

	579 Hi = Func->makeVariable(IceType_i32, Context.getNode(),

	580 Var->getName() + "__hi");

	581 Var->setLoHi(Lo, Hi);

	582 if (Var->getIsArg()) {

	583 Lo->setIsArg(Func);

	584 Hi->setIsArg(Func);

	585 }

	586 }

	587

	588 Operand TargetX8632::loOperand(Operand Operand) {

	589 assert(Operand->getType() == IceType_i64);

	590 if (Operand->getType() != IceType_i64)

	591 return Operand;

	592 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

	593 split64(Var);

	594 return Var->getLo();

	595 }

	596 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {

	597 uint64_t Mask = (1ull << 32) - 1;

	598 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);

	599 }

	600 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

	601 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),

	602 Mem->getOffset(), Mem->getIndex(),

	603 Mem->getShift());

	604 }

	605 llvm_unreachable("Unsupported operand type");

	606 return NULL;

	607 }

	608

	609 Operand TargetX8632::hiOperand(Operand Operand) {

	610 assert(Operand->getType() == IceType_i64);

	611 if (Operand->getType() != IceType_i64)

	612 return Operand;

	613 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

	614 split64(Var);

	615 return Var->getHi();

	616 }

	617 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {

	618 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);

	619 }

	620 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

	621 Constant *Offset = Mem->getOffset();

	622 if (Offset == NULL)

	623 Offset = Ctx->getConstantInt(IceType_i32, 4);

	624 else if (ConstantInteger *IntOffset =

	625 llvm::dyn_cast<ConstantInteger>(Offset)) {

	626 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());

	627 } else if (ConstantRelocatable *SymOffset =

	628 llvm::dyn_cast<ConstantRelocatable>(Offset)) {

	629 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),

	630 SymOffset->getName());

	631 }

	632 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,

	633 Mem->getIndex(), Mem->getShift());

	634 }

	635 llvm_unreachable("Unsupported operand type");

	636 return NULL;

	637 }

	638

	639 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
	jvoung (off chromium) 2014/05/15 23:47:34 I'm wondering if this needs to be so general. Is t I'm wondering if this needs to be so general. Is this generality more useful later, when you add the register allocator? E.g., it seems a bit heavy-weight to call this w/ "getRegisterSet(RegSet_FramePointer, RegSet_None)", when there will only be one frame pointer? FWIW, I think LLVM uses a bunch of pre-computed arrays and just chooses a certain one to return. Probably okay for now though -- looks like for scratch registers you don't end up using this function, but have separate ScratchRegs bitvector, so that takes care of the more common instruction and this is mostly used for prolog/epilog right now. Jim Stichnoth 2014/05/17 14:14:32 Yes, that instance of getRegisterSet() is kind of Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > I'm wondering if this needs to be so general. Is this generality more useful > later, when you add the register allocator? > > E.g., it seems a bit heavy-weight to call this w/ > "getRegisterSet(RegSet_FramePointer, RegSet_None)", when there will only be one > frame pointer? > > FWIW, I think LLVM uses a bunch of pre-computed arrays and just chooses a > certain one to return. > > Probably okay for now though -- looks like for scratch registers you don't end > up using this function, but have separate ScratchRegs bitvector, so that takes > care of the more common instruction and this is mostly used for prolog/epilog > right now. Yes, that instance of getRegisterSet() is kind of silly, but it's inside an assert()... For register allocation, it sets up CallerSave\|CalleeSave as the Include set, and StackPointer (plus FramePointer if appropriate) as the Exclude set. I think this whole thing can be reevaluated after implementing for x86-64 and ARM.
	640 RegSetMask Exclude) const {

	641 llvm::SmallBitVector Registers(Reg_NUM);

	642

	643 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

	644 frameptr, isI8, isInt, isFP) \

	645 if (scratch && (Include & RegSet_CallerSave)) \

	646 Registers[val] = true; \

	647 if (preserved && (Include & RegSet_CalleeSave)) \

	648 Registers[val] = true; \

	649 if (stackptr && (Include & RegSet_StackPointer)) \

	650 Registers[val] = true; \

	651 if (frameptr && (Include & RegSet_FramePointer)) \

	652 Registers[val] = true; \

	653 if (scratch && (Exclude & RegSet_CallerSave)) \

	654 Registers[val] = false; \

	655 if (preserved && (Exclude & RegSet_CalleeSave)) \

	656 Registers[val] = false; \

	657 if (stackptr && (Exclude & RegSet_StackPointer)) \

	658 Registers[val] = false; \

	659 if (frameptr && (Exclude & RegSet_FramePointer)) \

	660 Registers[val] = false;

	661

	662 REGX8632_TABLE

	663

	664 #undef X

	665

	666 return Registers;

	667 }

	668

	669 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

	670 IsEbpBasedFrame = true;

	671 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize

	672 // the number of adjustments of esp, etc.

	673 Variable *esp = getPhysicalRegister(Reg_esp);

	674 Operand *TotalSize = legalize(Inst->getSizeInBytes());

	675 Variable *Dest = Inst->getDest();

	676 _sub(esp, TotalSize);

	677 _mov(Dest, esp);

	678 }

	679

	680 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

	681 Variable *Dest = Inst->getDest();

	682 Operand *Src0 = legalize(Inst->getSrc(0));

	683 Operand *Src1 = legalize(Inst->getSrc(1));

	684 if (Dest->getType() == IceType_i64) {

	685 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	686 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	687 Operand *Src0Lo = loOperand(Src0);

	688 Operand *Src0Hi = hiOperand(Src0);

	689 Operand *Src1Lo = loOperand(Src1);

	690 Operand *Src1Hi = hiOperand(Src1);

	691 Variable T_Lo = NULL, T_Hi = NULL;

	692 switch (Inst->getOp()) {

	693 case InstArithmetic::Add:

	694 _mov(T_Lo, Src0Lo);

	695 _add(T_Lo, Src1Lo);

	696 _mov(DestLo, T_Lo);

	697 _mov(T_Hi, Src0Hi);

	698 _adc(T_Hi, Src1Hi);

	699 _mov(DestHi, T_Hi);

	700 break;

	701 case InstArithmetic::And:

	702 _mov(T_Lo, Src0Lo);

	703 _and(T_Lo, Src1Lo);

	704 _mov(DestLo, T_Lo);

	705 _mov(T_Hi, Src0Hi);

	706 _and(T_Hi, Src1Hi);

	707 _mov(DestHi, T_Hi);

	708 break;

	709 case InstArithmetic::Or:

	710 _mov(T_Lo, Src0Lo);

	711 _or(T_Lo, Src1Lo);

	712 _mov(DestLo, T_Lo);

	713 _mov(T_Hi, Src0Hi);

	714 _or(T_Hi, Src1Hi);

	715 _mov(DestHi, T_Hi);

	716 break;

	717 case InstArithmetic::Xor:

	718 _mov(T_Lo, Src0Lo);

	719 _xor(T_Lo, Src1Lo);

	720 _mov(DestLo, T_Lo);

	721 _mov(T_Hi, Src0Hi);

	722 _xor(T_Hi, Src1Hi);

	723 _mov(DestHi, T_Hi);

	724 break;

	725 case InstArithmetic::Sub:

	726 _mov(T_Lo, Src0Lo);

	727 _sub(T_Lo, Src1Lo);

	728 _mov(DestLo, T_Lo);

	729 _mov(T_Hi, Src0Hi);

	730 _sbb(T_Hi, Src1Hi);

	731 _mov(DestHi, T_Hi);

	732 break;

	733 case InstArithmetic::Mul: {

	734 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	735 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
	jvoung (off chromium) 2014/05/15 23:47:34 When is it appropriate to use getPhysicalRegister( When is it appropriate to use getPhysicalRegister(Reg_eax) vs makeReg(..., Reg_eax)? Use makeReg() for a fresh variable when you need to keep the lifetime accounting separate? Jim Stichnoth 2014/05/17 14:14:32 That's right. getPhysicalRegister() is used for t Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > When is it appropriate to use getPhysicalRegister(Reg_eax) vs makeReg(..., > Reg_eax)? Use makeReg() for a fresh variable when you need to keep the lifetime > accounting separate? That's right. getPhysicalRegister() is used for things that don't really need their lifetime tracked: pushing callee-save registers in the prolog, reference to stack/frame pointer, and killing scratch registers at call sites. Using a single instance of a Variable keeps the number of Variables under control. For variables with normal lifetime tracking, the liveness analysis (to come later) assumes that the live range begins and ends at most once per basic block, so using getPhysicalRegister for different purposes in the same basic block could prevent that register from being allocated to another purpose during the interval where it is really free.
	736 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);

	737 // gcc does the following:

	738 // a=b*c ==>

	739 // t1 = b.hi; t1 *=(imul) c.lo

	740 // t2 = c.hi; t2 *=(imul) b.lo

	741 // t3:eax = b.lo

	742 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo

	743 // a.lo = t4.lo

	744 // t4.hi += t1

	745 // t4.hi += t2

	746 // a.hi = t4.hi

	747 _mov(T_1, Src0Hi);

	748 _imul(T_1, Src1Lo);

	749 _mov(T_2, Src1Hi);

	750 _imul(T_2, Src0Lo);

	751 _mov(T_3, Src0Lo, Reg_eax);

	752 _mul(T_4Lo, T_3, Src1Lo);

	753 // The mul instruction produces two dest variables, edx:eax. We

	754 // create a fake definition of edx to account for this.

	755 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));

	756 _mov(DestLo, T_4Lo);

	757 _add(T_4Hi, T_1);

	758 _add(T_4Hi, T_2);

	759 _mov(DestHi, T_4Hi);

	760 } break;

	761 case InstArithmetic::Shl: {

	762 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.

	763 // gcc does the following:

	764 // a=b<<c ==>

	765 // t1:ecx = c.lo & 0xff

	766 // t2 = b.lo

	767 // t3 = b.hi

	768 // t3 = shld t3, t2, t1

	769 // t2 = shl t2, t1

	770 // test t1, 0x20

	771 // je L1

	772 // use(t3)

	773 // t3 = t2

	774 // t2 = 0

	775 // L1:

	776 // a.lo = t2

	777 // a.hi = t3

	778 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	779 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

	780 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	781 InstX8632Label *Label = InstX8632Label::create(Func, this);

	782 _mov(T_1, Src1Lo, Reg_ecx);

	783 _mov(T_2, Src0Lo);

	784 _mov(T_3, Src0Hi);

	785 _shld(T_3, T_2, T_1);

	786 _shl(T_2, T_1);

	787 _test(T_1, BitTest);

	788 _br(InstX8632Br::Br_e, Label);

	789 // Because of the intra-block control flow, we need to fake a use

	790 // of T_3 to prevent its earlier definition from being dead-code

	791 // eliminated in the presence of its later definition.

	792 Context.insert(InstFakeUse::create(Func, T_3));

	793 _mov(T_3, T_2);

	794 _mov(T_2, Zero);

	795 Context.insert(Label);

	796 _mov(DestLo, T_2);

	797 _mov(DestHi, T_3);

	798 } break;

	799 case InstArithmetic::Lshr: {

	800 // a=b>>c (unsigned) ==>

	801 // t1:ecx = c.lo & 0xff

	802 // t2 = b.lo

	803 // t3 = b.hi

	804 // t2 = shrd t2, t3, t1

	805 // t3 = shr t3, t1

	806 // test t1, 0x20

	807 // je L1

	808 // use(t2)

	809 // t2 = t3

	810 // t3 = 0

	811 // L1:

	812 // a.lo = t2

	813 // a.hi = t3

	814 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	815 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

	816 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	817 InstX8632Label *Label = InstX8632Label::create(Func, this);

	818 _mov(T_1, Src1Lo, Reg_ecx);

	819 _mov(T_2, Src0Lo);

	820 _mov(T_3, Src0Hi);

	821 _shrd(T_2, T_3, T_1);

	822 _shr(T_3, T_1);

	823 _test(T_1, BitTest);

	824 _br(InstX8632Br::Br_e, Label);

	825 // Because of the intra-block control flow, we need to fake a use

	826 // of T_3 to prevent its earlier definition from being dead-code

	827 // eliminated in the presence of its later definition.

	828 Context.insert(InstFakeUse::create(Func, T_2));

	829 _mov(T_2, T_3);

	830 _mov(T_3, Zero);

	831 Context.insert(Label);

	832 _mov(DestLo, T_2);

	833 _mov(DestHi, T_3);

	834 } break;

	835 case InstArithmetic::Ashr: {

	836 // a=b>>c (signed) ==>

	837 // t1:ecx = c.lo & 0xff

	838 // t2 = b.lo

	839 // t3 = b.hi

	840 // t2 = shrd t2, t3, t1

	841 // t3 = sar t3, t1

	842 // test t1, 0x20

	843 // je L1

	844 // use(t2)

	845 // t2 = t3

	846 // t3 = sar t3, 0x1f

	847 // L1:

	848 // a.lo = t2

	849 // a.hi = t3

	850 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

	851 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

	852 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);

	853 InstX8632Label *Label = InstX8632Label::create(Func, this);

	854 _mov(T_1, Src1Lo, Reg_ecx);

	855 _mov(T_2, Src0Lo);

	856 _mov(T_3, Src0Hi);

	857 _shrd(T_2, T_3, T_1);

	858 _sar(T_3, T_1);

	859 _test(T_1, BitTest);

	860 _br(InstX8632Br::Br_e, Label);

	861 // Because of the intra-block control flow, we need to fake a use

	862 // of T_3 to prevent its earlier definition from being dead-code

	863 // eliminated in the presence of its later definition.

	864 Context.insert(InstFakeUse::create(Func, T_2));

	865 _mov(T_2, T_3);

	866 _sar(T_3, SignExtend);

	867 Context.insert(Label);

	868 _mov(DestLo, T_2);

	869 _mov(DestHi, T_3);

	870 } break;

	871 case InstArithmetic::Udiv: {

	872 const SizeT MaxSrcs = 2;

	873 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);

	874 Call->addArg(Inst->getSrc(0));

	875 Call->addArg(Inst->getSrc(1));

	876 lowerCall(Call);

	877 } break;

	878 case InstArithmetic::Sdiv: {

	879 const SizeT MaxSrcs = 2;

	880 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);

	881 Call->addArg(Inst->getSrc(0));

	882 Call->addArg(Inst->getSrc(1));

	883 lowerCall(Call);

	884 } break;

	885 case InstArithmetic::Urem: {

	886 const SizeT MaxSrcs = 2;

	887 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);

	888 Call->addArg(Inst->getSrc(0));

	889 Call->addArg(Inst->getSrc(1));

	890 lowerCall(Call);

	891 } break;

	892 case InstArithmetic::Srem: {

	893 const SizeT MaxSrcs = 2;

	894 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);

	895 Call->addArg(Inst->getSrc(0));

	896 Call->addArg(Inst->getSrc(1));

	897 lowerCall(Call);

	898 } break;

	899 case InstArithmetic::Fadd:

	900 case InstArithmetic::Fsub:

	901 case InstArithmetic::Fmul:

	902 case InstArithmetic::Fdiv:

	903 case InstArithmetic::Frem:

	904 llvm_unreachable("FP instruction with i64 type");

	905 break;

	906 }

	907 } else { // Dest->getType() != IceType_i64

	908 Variable *T_edx = NULL;

	909 Variable *T = NULL;

	910 switch (Inst->getOp()) {

	911 case InstArithmetic::Add:

	912 _mov(T, Src0);

	913 _add(T, Src1);

	914 _mov(Dest, T);

	915 break;

	916 case InstArithmetic::And:

	917 _mov(T, Src0);

	918 _and(T, Src1);

	919 _mov(Dest, T);

	920 break;

	921 case InstArithmetic::Or:

	922 _mov(T, Src0);

	923 _or(T, Src1);

	924 _mov(Dest, T);

	925 break;

	926 case InstArithmetic::Xor:

	927 _mov(T, Src0);

	928 _xor(T, Src1);

	929 _mov(Dest, T);

	930 break;

	931 case InstArithmetic::Sub:

	932 _mov(T, Src0);

	933 _sub(T, Src1);

	934 _mov(Dest, T);

	935 break;

	936 case InstArithmetic::Mul:

	937 // TODO: Optimize for llvm::isa<Constant>(Src1)

	938 // TODO: Strength-reduce multiplications by a constant,

	939 // particularly -1 and powers of 2. Advanced: use lea to

	940 // multiply by 3, 5, 9.

	941 //

	942 // The 8-bit version of imul only allows the form "imul r/m8"

	943 // where T must be in eax.

	944 if (Dest->getType() == IceType_i8)

	945 _mov(T, Src0, Reg_eax);

	946 else

	947 _mov(T, Src0);

	948 _imul(T, Src1);

	949 _mov(Dest, T);

	950 break;

	951 case InstArithmetic::Shl:

	952 _mov(T, Src0);

	953 if (!llvm::isa<Constant>(Src1))

	954 Src1 = legalizeToVar(Src1, false, Reg_ecx);

	955 _shl(T, Src1);

	956 _mov(Dest, T);

	957 break;

	958 case InstArithmetic::Lshr:

	959 _mov(T, Src0);

	960 if (!llvm::isa<Constant>(Src1))

	961 Src1 = legalizeToVar(Src1, false, Reg_ecx);

	962 _shr(T, Src1);

	963 _mov(Dest, T);

	964 break;

	965 case InstArithmetic::Ashr:

	966 _mov(T, Src0);

	967 if (!llvm::isa<Constant>(Src1))

	968 Src1 = legalizeToVar(Src1, false, Reg_ecx);

	969 _sar(T, Src1);

	970 _mov(Dest, T);

	971 break;

	972 case InstArithmetic::Udiv:

	973 if (Dest->getType() == IceType_i8) {

	974 Variable *T_ah = NULL;

	975 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);

	976 _mov(T, Src0, Reg_eax);

	977 _mov(T_ah, Zero, Reg_ah);

	978 _div(T_ah, Src1, T);
	jvoung (off chromium) 2014/05/15 23:47:34 I'm probably missing something: why is it _div(T_ I'm probably missing something: why is it _div(T_ah, ..., T) then FakeUse(T_ah), instead of _div(T, ..., T_ah)? I'm comparing this to the eax/edx for the non-i8 case. Jim Stichnoth 2014/05/17 14:14:32 Nice - your simpler suggestion seems to work. :) Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > I'm probably missing something: > > why is it _div(T_ah, ..., T) then FakeUse(T_ah), instead of _div(T, ..., T_ah)? > I'm comparing this to the eax/edx for the non-i8 case. Nice - your simpler suggestion seems to work. :) Probably the original sequence came from adapting the Urem i8 implementation, rather than from the Udiv non-i8 implementation.
	979 Context.insert(InstFakeUse::create(Func, T_ah));

	980 _mov(Dest, T);

	981 } else {

	982 // TODO: fix for 8-bit, see Urem
	jvoung (off chromium) 2014/05/15 23:47:34 Should the TODO be under the above branch for == I Should the TODO be under the above branch for == IceType_i8 ? (and what is broken) Jim Stichnoth 2014/05/17 14:14:32 Done. I think that TODO was left in by accident. Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > Should the TODO be under the above branch for == IceType_i8 ? > (and what is broken) Done. I think that TODO was left in by accident.
	983 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	984 _mov(T, Src0, Reg_eax);

	985 _mov(T_edx, Zero, Reg_edx);

	986 _div(T, Src1, T_edx);

	987 _mov(Dest, T);

	988 }

	989 break;

	990 case InstArithmetic::Sdiv:

	991 T_edx = makeReg(IceType_i32, Reg_edx);

	992 _mov(T, Src0, Reg_eax);

	993 _cdq(T_edx, T);

	994 _idiv(T, Src1, T_edx);

	995 _mov(Dest, T);

	996 break;

	997 case InstArithmetic::Urem:

	998 if (Dest->getType() == IceType_i8) {

	999 Variable *T_ah = NULL;

	1000 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);

	1001 _mov(T, Src0, Reg_eax);

	1002 _mov(T_ah, Zero, Reg_ah);

	1003 _div(T_ah, Src1, T);

	1004 _mov(Dest, T_ah);

	1005 } else {

	1006 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1007 _mov(T_edx, Zero, Reg_edx);

	1008 _mov(T, Src0, Reg_eax);

	1009 _div(T_edx, Src1, T);

	1010 _mov(Dest, T_edx);

	1011 }

	1012 break;

	1013 case InstArithmetic::Srem:

	1014 T_edx = makeReg(IceType_i32, Reg_edx);

	1015 _mov(T, Src0, Reg_eax);

	1016 _cdq(T_edx, T);

	1017 _idiv(T_edx, Src1, T);

	1018 _mov(Dest, T_edx);

	1019 break;

	1020 case InstArithmetic::Fadd:

	1021 _mov(T, Src0);

	1022 _addss(T, Src1);

	1023 _mov(Dest, T);

	1024 break;

	1025 case InstArithmetic::Fsub:

	1026 _mov(T, Src0);

	1027 _subss(T, Src1);

	1028 _mov(Dest, T);

	1029 break;

	1030 case InstArithmetic::Fmul:

	1031 _mov(T, Src0);

	1032 _mulss(T, Src1);

	1033 _mov(Dest, T);

	1034 break;

	1035 case InstArithmetic::Fdiv:

	1036 _mov(T, Src0);

	1037 _divss(T, Src1);

	1038 _mov(Dest, T);

	1039 break;

	1040 case InstArithmetic::Frem: {

	1041 const SizeT MaxSrcs = 2;

	1042 Type Ty = Dest->getType();

	1043 InstCall *Call =

	1044 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);

	1045 Call->addArg(Src0);

	1046 Call->addArg(Src1);

	1047 return lowerCall(Call);

	1048 } break;

	1049 }

	1050 }

	1051 }

	1052

	1053 void TargetX8632::lowerAssign(const InstAssign *Inst) {

	1054 Variable *Dest = Inst->getDest();

	1055 Operand *Src0 = legalize(Inst->getSrc(0));
	jvoung (off chromium) 2014/05/19 20:28:54 Probably on your mind already since you noted a TO Probably on your mind already since you noted a TODO under mul for strength-reducing when some operands are constants: Might be worth TODO checking if Dest is a reg and src is 0 and the flags can be clobbered, and doing xor reg, reg instead -- if worth the translation time overhead of checking for that pattern. There are also some cases elsewhere w/ "_mov(T_i, Zero)", but I guess you don't know if T_i should be in a register or not. Jim Stichnoth 2014/05/20 18:20:08 My thought on these kinds of opportunities (e.g. s Show quoted text On 2014/05/19 20:28:54, jvoung wrote: > Probably on your mind already since you noted a TODO under mul for > strength-reducing when some operands are constants: > > Might be worth TODO checking if Dest is a reg and src is 0 and the flags can be > clobbered, and doing xor reg, reg instead -- if worth the translation time > overhead of checking for that pattern. > > There are also some cases elsewhere w/ "_mov(T_i, Zero)", but I guess you don't > know if T_i should be in a register or not. My thought on these kinds of opportunities (e.g. setting a register to 0, or comparing a register to 0, or other common idioms with shorter encodings or some other advantage) is to handle them either as a post register allocation peephole optimization pass, or to put the peephole logic directly into the emitter. As you point out, we would need to make sure the flags are affected in the expected way. For example, the icmp/fcmp/select lowering is sometimes like this: cmp src1, src2 mov dest, 0 jle label mov dest, 1 label: We could swap the first two instructions, but that would guarantee that dest's live range interferes with src1 and src2, increasing register pressure. So with this lowering, we would want to forbid the first mov from affecting flags.
	1056 assert(Dest->getType() == Src0->getType());

	1057 if (Dest->getType() == IceType_i64) {

	1058 Operand *Src0Lo = loOperand(Src0);

	1059 Operand *Src0Hi = hiOperand(Src0);

	1060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1061 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1062 Variable T_Lo = NULL, T_Hi = NULL;

	1063 _mov(T_Lo, Src0Lo);

	1064 _mov(DestLo, T_Lo);

	1065 _mov(T_Hi, Src0Hi);

	1066 _mov(DestHi, T_Hi);

	1067 } else {

	1068 const bool AllowOverlap = true;

	1069 // RI is either a physical register or an immediate.

	1070 Operand *RI = legalize(Src0, Legal_Reg \| Legal_Imm, AllowOverlap);
	jvoung (off chromium) 2014/05/19 20:28:54 Does this mean that Inst->getSrc(0) gets legalized Does this mean that Inst->getSrc(0) gets legalized twice (once here w/ stricter mask and once at line 1055 w/ a looser mask)? Would it work to only legalize once (move the other legalize into the if (Dest->getType() == IceType_i64) branch? Jim Stichnoth 2014/05/20 18:20:08 Done. Show quoted text On 2014/05/19 20:28:54, jvoung wrote: > Does this mean that Inst->getSrc(0) gets legalized twice (once here w/ stricter > mask and once at line 1055 w/ a looser mask)? > > Would it work to only legalize once (move the other legalize into the if > (Dest->getType() == IceType_i64) branch? Done.
	1071 _mov(Dest, RI);

	1072 }

	1073 }

	1074

	1075 void TargetX8632::lowerBr(const InstBr *Inst) {

	1076 if (Inst->isUnconditional()) {

	1077 _br(Inst->getTargetUnconditional());

	1078 } else {

	1079 Operand *Src0 = legalize(Inst->getCondition());

	1080 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1081 _cmp(Src0, Zero);
	jvoung (off chromium) 2014/05/19 20:28:54 Is it better to _test reg,reg than _cmp reg, zero? Is it better to _test reg,reg than _cmp reg, zero? I guess you don't know if Src will be in a register or not? Jim Stichnoth 2014/05/20 18:20:08 That's right. That could be done as a peephole as Show quoted text On 2014/05/19 20:28:54, jvoung wrote: > Is it better to _test reg,reg than _cmp reg, zero? I guess you don't know if Src > will be in a register or not? That's right. That could be done as a peephole as described above. I believe the test instruction affects flags differently than comparing to 0, but unlikely in a way we care about.
	1082 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

	1083 }

	1084 }

	1085

	1086 void TargetX8632::lowerCall(const InstCall *Instr) {

	1087 // Generate a sequence of push instructions, pushing right to left,

	1088 // keeping track of stack offsets in case a push involves a stack

	1089 // operand and we are using an esp-based frame.

	1090 uint32_t StackOffset = 0;

	1091 // TODO: If for some reason the call instruction gets dead-code

	1092 // eliminated after lowering, we would need to ensure that the

	1093 // pre-call push instructions and the post-call esp adjustment get

	1094 // eliminated as well.

	1095 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {

	1096 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));

	1097 if (Arg->getType() == IceType_i64) {

	1098 _push(hiOperand(Arg));

	1099 _push(loOperand(Arg));

	1100 } else if (Arg->getType() == IceType_f64) {

	1101 // If the Arg turns out to be a memory operand, we need to push

	1102 // 8 bytes, which requires two push instructions. This ends up

	1103 // being somewhat clumsy in the current IR, so we use a

	1104 // workaround. Force the operand into a (xmm) register, and

	1105 // then push the register. An xmm register push is actually not

	1106 // possible in x86, but the Push instruction emitter handles

	1107 // this by decrementing the stack pointer and directly writing

	1108 // the xmm register value.

	1109 Variable *T = NULL;

	1110 _mov(T, Arg);

	1111 _push(T);

	1112 } else {

	1113 _push(Arg);

	1114 }

	1115 StackOffset += typeWidthInBytesOnStack(Arg->getType());

	1116 }

	1117 // Generate the call instruction. Assign its result to a temporary

	1118 // with high register allocation weight.

	1119 Variable *Dest = Instr->getDest();

	1120 Variable *eax = NULL; // doubles as RegLo as necessary

	1121 Variable *edx = NULL;

	1122 if (Dest) {

	1123 switch (Dest->getType()) {

	1124 case IceType_NUM:

	1125 llvm_unreachable("Invalid Call dest type");

	1126 break;

	1127 case IceType_void:

	1128 break;

	1129 case IceType_i1:

	1130 case IceType_i8:

	1131 case IceType_i16:

	1132 case IceType_i32:

	1133 eax = makeReg(Dest->getType(), Reg_eax);

	1134 break;

	1135 case IceType_i64:

	1136 eax = makeReg(IceType_i32, Reg_eax);

	1137 edx = makeReg(IceType_i32, Reg_edx);

	1138 break;

	1139 case IceType_f32:

	1140 case IceType_f64:

	1141 // Leave eax==edx==NULL, and capture the result with the fstp

	1142 // instruction.

	1143 break;

	1144 }

	1145 }

	1146 Operand *CallTarget = legalize(Instr->getCallTarget());

	1147 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);

	1148 Context.insert(NewCall);

	1149 if (edx)

	1150 Context.insert(InstFakeDef::create(Func, edx));

	1151

	1152 // Add the appropriate offset to esp.

	1153 if (StackOffset) {

	1154 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

	1155 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));

	1156 }

	1157

	1158 // Insert a register-kill pseudo instruction.

	1159 VarList KilledRegs;

	1160 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {

	1161 if (ScratchRegs[i])

	1162 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));

	1163 }

	1164 if (!KilledRegs.empty()) {
	jvoung (off chromium) 2014/05/19 20:28:54 For x86 this is always true because there's always For x86 this is always true because there's always some ScratchReg? Well, it's a small point so up to you for removing the check (or turning it into an assert). Jim Stichnoth 2014/05/20 18:20:08 A bit of pedantry, I guess, in case we ever try ou Show quoted text On 2014/05/19 20:28:54, jvoung wrote: > For x86 this is always true because there's always some ScratchReg? Well, it's a > small point so up to you for removing the check (or turning it into an assert). A bit of pedantry, I guess, in case we ever try out an ABI that entirely lacks scratch registers. :) Even then, it's just a minor optimization since a FakeKill instructions with an empty variable list will just be a no-op. I've removed the condition.
	1165 Inst *Kill = InstFakeKill::create(Func, KilledRegs, NewCall);

	1166 Context.insert(Kill);

	1167 }

	1168

	1169 // Generate a FakeUse to keep the call live if necessary.

	1170 if (Instr->hasSideEffects() && eax) {
	jvoung (off chromium) 2014/05/19 20:28:54 for InstrCall, hasSideEffects() is always true? for InstrCall, hasSideEffects() is always true? Jim Stichnoth 2014/05/20 18:20:08 Today it's true, but I'm looking ahead to allowing Show quoted text On 2014/05/19 20:28:54, jvoung wrote: > for InstrCall, hasSideEffects() is always true? Today it's true, but I'm looking ahead to allowing dead-code elimination to eliminate intrinsic calls or other calls that are somehow known to lack side effect.
	1171 Inst *FakeUse = InstFakeUse::create(Func, eax);

	1172 Context.insert(FakeUse);

	1173 }

	1174

	1175 // Generate Dest=eax assignment.

	1176 if (Dest && eax) {

	1177 if (edx) {

	1178 split64(Dest);

	1179 Variable *DestLo = Dest->getLo();

	1180 Variable *DestHi = Dest->getHi();

	1181 DestLo->setPreferredRegister(eax, false);

	1182 DestHi->setPreferredRegister(edx, false);

	1183 _mov(DestLo, eax);

	1184 _mov(DestHi, edx);

	1185 } else {

	1186 Dest->setPreferredRegister(eax, false);

	1187 _mov(Dest, eax);

	1188 }

	1189 }

	1190

	1191 // Special treatment for an FP function which returns its result in

	1192 // st(0).

	1193 if (Dest &&

	1194 (Dest->getType() == IceType_f32 \|\| Dest->getType() == IceType_f64)) {

	1195 _fstp(Dest);

	1196 // If Dest ends up being a physical xmm register, the fstp emit

	1197 // code will route st(0) through a temporary stack slot.

	1198 }

	1199 }

	1200

	1201 void TargetX8632::lowerCast(const InstCast *Inst) {

	1202 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

	1203 InstCast::OpKind CastKind = Inst->getCastKind();

	1204 Variable *Dest = Inst->getDest();

	1205 // Src0RM is the source operand legalized to physical register or memory, but

	1206 // not immediate, since the relevant x86 native instructions don't allow an

	1207 // immediate operand. If the operand is an immediate, we could consider

	1208 // computing the strength-reduced result at translation time, but we're

	1209 // unlikely to see something like that in the bitcode that the optimizer

	1210 // wouldn't have already taken care of.

	1211 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem, true);

	1212 switch (CastKind) {

	1213 default:

	1214 Func->setError("Cast type not supported");

	1215 return;

	1216 case InstCast::Sext:

	1217 if (Dest->getType() == IceType_i64) {

	1218 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

	1219 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1220 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1221 Variable *T_Lo = makeReg(DestLo->getType());

	1222 if (Src0RM->getType() == IceType_i32)

	1223 _mov(T_Lo, Src0RM);

	1224 else

	1225 _movsx(T_Lo, Src0RM);

	1226 _mov(DestLo, T_Lo);

	1227 Variable *T_Hi = NULL;

	1228 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);

	1229 _mov(T_Hi, T_Lo);

	1230 _sar(T_Hi, Shift);

	1231 _mov(DestHi, T_Hi);

	1232 } else {

	1233 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and

	1234 // also copy to the high operand of a 64-bit variable.

	1235 // t1 = movsx src; dst = t1

	1236 Variable *T = makeReg(Dest->getType());

	1237 _movsx(T, Src0RM);

	1238 _mov(Dest, T);

	1239 }

	1240 break;

	1241 case InstCast::Zext:

	1242 if (Dest->getType() == IceType_i64) {

	1243 // t1=movzx src; dst.lo=t1; dst.hi=0

	1244 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1245 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1246 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1247 Variable *Tmp = makeReg(DestLo->getType());

	1248 if (Src0RM->getType() == IceType_i32)

	1249 _mov(Tmp, Src0RM);

	1250 else

	1251 _movzx(Tmp, Src0RM);

	1252 _mov(DestLo, Tmp);

	1253 _mov(DestHi, Zero);

	1254 } else if (Src0RM->getType() == IceType_i1) {

	1255 // t = Src0RM; t &= 1; Dest = t

	1256 Operand *One = Ctx->getConstantInt(IceType_i32, 1);

	1257 Variable *T = makeReg(IceType_i32);

	1258 _movzx(T, Src0RM);

	1259 _and(T, One);

	1260 _mov(Dest, T);

	1261 } else {

	1262 // t1 = movzx src; dst = t1

	1263 Variable *T = makeReg(Dest->getType());

	1264 _movzx(T, Src0RM);

	1265 _mov(Dest, T);

	1266 }

	1267 break;

	1268 case InstCast::Trunc: {

	1269 if (Src0RM->getType() == IceType_i64)

	1270 Src0RM = loOperand(Src0RM);

	1271 // t1 = trunc Src0RM; Dest = t1

	1272 Variable *T = NULL;

	1273 _mov(T, Src0RM);

	1274 _mov(Dest, T);

	1275 break;

	1276 }

	1277 case InstCast::Fptrunc:

	1278 case InstCast::Fpext: {

	1279 // t1 = cvt Src0RM; Dest = t1

	1280 Variable *T = makeReg(Dest->getType());

	1281 _cvt(T, Src0RM);

	1282 _mov(Dest, T);

	1283 break;

	1284 }

	1285 case InstCast::Fptosi:

	1286 if (Dest->getType() == IceType_i64) {

	1287 // Use a helper for converting floating-point values to 64-bit

	1288 // integers. SSE2 appears to have no way to convert from xmm

	1289 // registers to something like the edx:eax register pair, and

	1290 // gcc and clang both want to use x87 instructions complete with

	1291 // temporary manipulation of the status word. This helper is

	1292 // not needed for x86-64.

	1293 split64(Dest);

	1294 const SizeT MaxSrcs = 1;

	1295 Type SrcType = Inst->getSrc(0)->getType();

	1296 InstCall *Call = makeHelperCall(

	1297 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);

	1298 Call->addArg(Inst->getSrc(0));

	1299 lowerCall(Call);

	1300 } else {

	1301 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

	1302 Variable *T_1 = makeReg(IceType_i32);

	1303 Variable *T_2 = makeReg(Dest->getType());

	1304 _cvt(T_1, Src0RM);

	1305 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

	1306 _mov(Dest, T_2);

	1307 T_2->setPreferredRegister(T_1, true);

	1308 }

	1309 break;

	1310 case InstCast::Fptoui:

	1311 if (Dest->getType() == IceType_i64 \|\| Dest->getType() == IceType_i32) {

	1312 // Use a helper for both x86-32 and x86-64.

	1313 split64(Dest);

	1314 const SizeT MaxSrcs = 1;

	1315 Type DestType = Dest->getType();

	1316 Type SrcType = Src0RM->getType();

	1317 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");

	1318 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");

	1319 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
	jvoung (off chromium) 2014/05/19 20:28:54 For the cases where gcc would have just invoked a For the cases where gcc would have just invoked a libgcc method anyway (like for the case of cvtdtoui64), were you planning on later calling the compiler-rt version directly (e.g., __fixunsdfdi)? Was the problem that the pnacl driver complained about undefined symbols when referring to compiler-rt functions? Jim Stichnoth 2014/05/20 18:20:08 Actually, these helper function names may resemble Show quoted text On 2014/05/19 20:28:54, jvoung wrote: > For the cases where gcc would have just invoked a libgcc method anyway (like for > the case of cvtdtoui64), were you planning on later calling the compiler-rt > version directly (e.g., __fixunsdfdi)? > > Was the problem that the pnacl driver complained about undefined symbols when > referring to compiler-rt functions? Actually, these helper function names may resemble the gcc versions, but you'll see in crosstest/test_cast_main.cpp that I have provided actual implementations (this is the only place where we actually attempt to link Subzero-produced code). Adding a TODO to the several places that create calls to these cvt* functions.
	1320 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;

	1321 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

	1322 Call->addArg(Inst->getSrc(0));

	1323 lowerCall(Call);

	1324 return;

	1325 } else {

	1326 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

	1327 Variable *T_1 = makeReg(IceType_i32);

	1328 Variable *T_2 = makeReg(Dest->getType());

	1329 _cvt(T_1, Src0RM);

	1330 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

	1331 _mov(Dest, T_2);

	1332 T_2->setPreferredRegister(T_1, true);

	1333 }

	1334 break;

	1335 case InstCast::Sitofp:

	1336 if (Src0RM->getType() == IceType_i64) {

	1337 // Use a helper for x86-32.

	1338 const SizeT MaxSrcs = 1;

	1339 Type DestType = Dest->getType();

	1340 InstCall *Call = makeHelperCall(

	1341 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);

	1342 Call->addArg(Inst->getSrc(0));

	1343 lowerCall(Call);

	1344 return;

	1345 } else {

	1346 // Sign-extend the operand.

	1347 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2

	1348 Variable *T_1 = makeReg(IceType_i32);

	1349 Variable *T_2 = makeReg(Dest->getType());

	1350 if (Src0RM->getType() == IceType_i32)

	1351 _mov(T_1, Src0RM);

	1352 else

	1353 _movsx(T_1, Src0RM);

	1354 _cvt(T_2, T_1);

	1355 _mov(Dest, T_2);

	1356 }

	1357 break;

	1358 case InstCast::Uitofp:

	1359 if (Src0RM->getType() == IceType_i64 \|\| Src0RM->getType() == IceType_i32) {

	1360 // Use a helper for x86-32 and x86-64. Also use a helper for

	1361 // i32 on x86-32.

	1362 const SizeT MaxSrcs = 1;

	1363 Type DestType = Dest->getType();

	1364 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");

	1365 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");

	1366 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod

	1367 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;

	1368 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

	1369 Call->addArg(Inst->getSrc(0));

	1370 lowerCall(Call);

	1371 return;

	1372 } else {

	1373 // Zero-extend the operand.

	1374 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2

	1375 Variable *T_1 = makeReg(IceType_i32);

	1376 Variable *T_2 = makeReg(Dest->getType());

	1377 if (Src0RM->getType() == IceType_i32)

	1378 _mov(T_1, Src0RM);

	1379 else

	1380 _movzx(T_1, Src0RM);

	1381 _cvt(T_2, T_1);

	1382 _mov(Dest, T_2);

	1383 }

	1384 break;

	1385 case InstCast::Bitcast:

	1386 if (Dest->getType() == Src0RM->getType()) {

	1387 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);

	1388 lowerAssign(Assign);

	1389 llvm_unreachable("Pointer bitcasts aren't lowered correctly.");

	1390 return;

	1391 }

	1392 switch (Dest->getType()) {

	1393 default:

	1394 llvm_unreachable("Unexpected Bitcast dest type");

	1395 case IceType_i32:

	1396 case IceType_f32: {

	1397 Type DestType = Dest->getType();

	1398 Type SrcType = Src0RM->getType();

	1399 assert((DestType == IceType_i32 && SrcType == IceType_f32) \|\|

	1400 (DestType == IceType_f32 && SrcType == IceType_i32));

	1401 // a.i32 = bitcast b.f32 ==>

	1402 // t.f32 = b.f32

	1403 // s.f32 = spill t.f32

	1404 // a.i32 = s.f32

	1405 Variable *T = NULL;

	1406 // TODO: Should be able to force a spill setup by calling legalize() with

	1407 // Legal_Mem and not Legal_Reg or Legal_Imm.

	1408 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());

	1409 Spill->setWeight(RegWeight::Zero);

	1410 Spill->setPreferredRegister(Dest, true);

	1411 _mov(T, Src0RM);

	1412 _mov(Spill, T);

	1413 _mov(Dest, Spill);

	1414 } break;

	1415 case IceType_i64: {

	1416 assert(Src0RM->getType() == IceType_f64);

	1417 // a.i64 = bitcast b.f64 ==>

	1418 // s.f64 = spill b.f64

	1419 // t_lo.i32 = lo(s.f64)

	1420 // a_lo.i32 = t_lo.i32

	1421 // t_hi.i32 = hi(s.f64)

	1422 // a_hi.i32 = t_hi.i32

	1423 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());

	1424 Spill->setWeight(RegWeight::Zero);

	1425 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);

	1426 _mov(Spill, Src0RM);

	1427

	1428 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1429 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1430 Variable *T_Lo = makeReg(IceType_i32);

	1431 Variable *T_Hi = makeReg(IceType_i32);

	1432 VariableSplit *SpillLo =

	1433 VariableSplit::create(Func, Spill, VariableSplit::Low);

	1434 VariableSplit *SpillHi =

	1435 VariableSplit::create(Func, Spill, VariableSplit::High);

	1436

	1437 _mov(T_Lo, SpillLo);

	1438 _mov(DestLo, T_Lo);

	1439 _mov(T_Hi, SpillHi);

	1440 _mov(DestHi, T_Hi);

	1441 } break;

	1442 case IceType_f64: {

	1443 assert(Src0RM->getType() == IceType_i64);

	1444 // a.f64 = bitcast b.i64 ==>

	1445 // t_lo.i32 = b_lo.i32

	1446 // lo(s.f64) = t_lo.i32

	1447 // FakeUse(s.f64)

	1448 // t_hi.i32 = b_hi.i32

	1449 // hi(s.f64) = t_hi.i32

	1450 // a.f64 = s.f64

	1451 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());

	1452 Spill->setWeight(RegWeight::Zero);

	1453 Spill->setPreferredRegister(Dest, true);

	1454

	1455 Context.insert(InstFakeDef::create(Func, Spill));

	1456

	1457 Variable T_Lo = NULL, T_Hi = NULL;

	1458 VariableSplit *SpillLo =

	1459 VariableSplit::create(Func, Spill, VariableSplit::Low);

	1460 VariableSplit *SpillHi =

	1461 VariableSplit::create(Func, Spill, VariableSplit::High);

	1462 _mov(T_Lo, loOperand(Src0RM));

	1463 _store(T_Lo, SpillLo);

	1464 _mov(T_Hi, hiOperand(Src0RM));

	1465 _store(T_Hi, SpillHi);

	1466 _mov(Dest, Spill);

	1467 } break;

	1468 }

	1469 break;

	1470 }

	1471 }

	1472

	1473 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

	1474 Operand *Src0 = Inst->getSrc(0);

	1475 Operand *Src1 = Inst->getSrc(1);

	1476 Variable *Dest = Inst->getDest();

	1477 // Lowering a = fcmp cond, b, c

	1478 // ucomiss b, c /* only if C1 != Br_None */

	1479 // /* but swap b,c order if SwapOperands==true */

	1480 // mov a, <default>

	1481 // j<C1> label /* only if C1 != Br_None */

	1482 // j<C2> label /* only if C2 != Br_None */

	1483 // FakeUse(a) /* only if C1 != Br_None */

	1484 // mov a, !<default> /* only if C1 != Br_None */

	1485 // label: /* only if C1 != Br_None */

	1486 InstFcmp::FCond Condition = Inst->getCondition();

	1487 size_t Index = static_cast<size_t>(Condition);

	1488 assert(Index < TableFcmpSize);

	1489 // The table is indexed by InstFcmp::Condition. Make sure it didn't fall

	1490 // out of order.

	1491 if (TableFcmp[Index].SwapOperands) {

	1492 Operand *Tmp = Src0;

	1493 Src0 = Src1;

	1494 Src1 = Tmp;

	1495 }

	1496 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);

	1497 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);

	1498 if (HasC1) {

	1499 Src0 = legalize(Src0);

	1500 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

	1501 Variable *T = NULL;

	1502 _mov(T, Src0);

	1503 _ucomiss(T, Src1RM);

	1504 }

	1505 Constant *Default =

	1506 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);

	1507 _mov(Dest, Default);

	1508 if (HasC1) {

	1509 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1510 _br(TableFcmp[Index].C1, Label);

	1511 if (HasC2) {

	1512 _br(TableFcmp[Index].C2, Label);

	1513 }

	1514 Context.insert(InstFakeUse::create(Func, Dest));

	1515 Constant *NonDefault =

	1516 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);

	1517 _mov(Dest, NonDefault);

	1518 Context.insert(Label);

	1519 }

	1520 }

	1521

	1522 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

	1523 Operand *Src0 = legalize(Inst->getSrc(0));

	1524 Operand *Src1 = legalize(Inst->getSrc(1));

	1525 Variable *Dest = Inst->getDest();

	1526

	1527 // If Src1 is an immediate, or known to be a physical register, we can

	1528 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

	1529 // a physical register. (Actually, either Src0 or Src1 can be chosen for

	1530 // the physical register, but unfortunately we have to commit to one or

	1531 // the other before register allocation.)

	1532 bool IsSrc1ImmOrReg = false;

	1533 if (llvm::isa<Constant>(Src1))

	1534 IsSrc1ImmOrReg = true;

	1535 else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

	1536 if (Var->hasReg())

	1537 IsSrc1ImmOrReg = true;

	1538 }

	1539

	1540 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

	1541 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1542 Constant *One = Ctx->getConstantInt(IceType_i32, 1);

	1543 if (Src0->getType() == IceType_i64) {

	1544 InstIcmp::ICond Condition = Inst->getCondition();

	1545 size_t Index = static_cast<size_t>(Condition);

	1546 assert(Index < TableIcmp64Size);

	1547 // The table is indexed by InstIcmp::Condition. Make sure it didn't fall

	1548 // out of order.

	1549 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

	1550 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

	1551 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {

	1552 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1553 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));

	1554 _cmp(loOperand(Src0), Src1LoRI);

	1555 _br(InstX8632Br::Br_ne, Label);

	1556 _cmp(hiOperand(Src0), Src1HiRI);

	1557 _br(InstX8632Br::Br_ne, Label);

	1558 Context.insert(InstFakeUse::create(Func, Dest));

	1559 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));

	1560 Context.insert(Label);

	1561 } else {

	1562 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

	1563 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

	1564 _mov(Dest, One);

	1565 _cmp(hiOperand(Src0), Src1HiRI);

	1566 _br(TableIcmp64[Index].C1, LabelTrue);

	1567 _br(TableIcmp64[Index].C2, LabelFalse);

	1568 _cmp(loOperand(Src0), Src1LoRI);

	1569 _br(TableIcmp64[Index].C3, LabelTrue);

	1570 Context.insert(LabelFalse);

	1571 Context.insert(InstFakeUse::create(Func, Dest));

	1572 _mov(Dest, Zero);

	1573 Context.insert(LabelTrue);

	1574 }

	1575 return;

	1576 }

	1577 // cmp b, c

	1578 Operand *Src0New =

	1579 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

	1580 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1581 _cmp(Src0New, Src1);

	1582 _mov(Dest, One);

	1583 _br(getIcmp32Mapping(Inst->getCondition()), Label);

	1584 Context.insert(InstFakeUse::create(Func, Dest));

	1585 _mov(Dest, Zero);

	1586 Context.insert(Label);

	1587 }

	1588

	1589 void TargetX8632::lowerLoad(const InstLoad *Inst) {

	1590 // A Load instruction can be treated the same as an Assign

	1591 // instruction, after the source operand is transformed into an

	1592 // OperandX8632Mem operand. Note that the address mode

	1593 // optimization already creates an OperandX8632Mem operand, so it

	1594 // doesn't need another level of transformation.

	1595 Type Ty = Inst->getDest()->getType();

	1596 Operand *Src0 = Inst->getSourceAddress();

	1597 // Address mode optimization already creates an OperandX8632Mem

	1598 // operand, so it doesn't need another level of transformation.

	1599 if (!llvm::isa<OperandX8632Mem>(Src0)) {

	1600 Variable *Base = llvm::dyn_cast<Variable>(Src0);

	1601 Constant *Offset = llvm::dyn_cast<Constant>(Src0);

	1602 assert(Base \|\| Offset);

	1603 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);

	1604 }

	1605

	1606 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);

	1607 lowerAssign(Assign);

	1608 }

	1609

	1610 void TargetX8632::lowerPhi(const InstPhi * /Inst/) {

	1611 Func->setError("Phi lowering not implemented");
	jvoung (off chromium) 2014/05/15 23:47:34 nit: "not implemented" sounds like something will nit: "not implemented" sounds like something will be added later -- but Phis should have been deleted by this point, so this is more of an unexpected situation? Or is that not the case? Jim Stichnoth 2014/05/17 14:14:32 My longer-term plan is to delay phi lowering until Show quoted text On 2014/05/15 23:47:34, jvoung wrote: > nit: "not implemented" sounds like something will be added later -- but Phis > should have been deleted by this point, so this is more of an unexpected > situation? Or is that not the case? My longer-term plan is to delay phi lowering until after register allocation. As such, phis could technically still be part of the IR at this point, but even if so, they should be segregated into CfgNode::Phis and not part of CfgNode::Insts. In any case, I made the error message more precise.
	1612 }

	1613

	1614 void TargetX8632::lowerRet(const InstRet *Inst) {

	1615 Variable *Reg = NULL;

	1616 if (Inst->hasRetValue()) {

	1617 Operand *Src0 = legalize(Inst->getRetValue());

	1618 if (Src0->getType() == IceType_i64) {

	1619 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);

	1620 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);

	1621 Reg = eax;

	1622 Context.insert(InstFakeUse::create(Func, edx));

	1623 } else if (Src0->getType() == IceType_f32 \|\|

	1624 Src0->getType() == IceType_f64) {

	1625 _fld(Src0);

	1626 } else {

	1627 _mov(Reg, Src0, Reg_eax);

	1628 }

	1629 }

	1630 _ret(Reg);

	1631 // Add a fake use of esp to make sure esp stays alive for the entire

	1632 // function. Otherwise post-call esp adjustments get dead-code

	1633 // eliminated. TODO: Are there more places where the fake use

	1634 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

	1635 // have a ret instruction.

	1636 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

	1637 Context.insert(InstFakeUse::create(Func, esp));

	1638 }

	1639

	1640 void TargetX8632::lowerSelect(const InstSelect *Inst) {

	1641 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:

	1642 Variable *Dest = Inst->getDest();

	1643 Operand *SrcT = Inst->getTrueOperand();

	1644 Operand *SrcF = Inst->getFalseOperand();

	1645 Operand *Condition = legalize(Inst->getCondition());

	1646 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

	1647 InstX8632Label *Label = InstX8632Label::create(Func, this);

	1648

	1649 if (Dest->getType() == IceType_i64) {

	1650 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

	1651 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

	1652 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg \| Legal_Imm, true);

	1653 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Imm, true);

	1654 _cmp(Condition, Zero);

	1655 _mov(DestLo, SrcLoRI);

	1656 _mov(DestHi, SrcHiRI);

	1657 _br(InstX8632Br::Br_ne, Label);

	1658 Context.insert(InstFakeUse::create(Func, DestLo));

	1659 Context.insert(InstFakeUse::create(Func, DestHi));

	1660 Operand *SrcFLo = loOperand(SrcF);

	1661 Operand *SrcFHi = hiOperand(SrcF);

	1662 SrcLoRI = legalize(SrcFLo, Legal_Reg \| Legal_Imm, true);

	1663 SrcHiRI = legalize(SrcFHi, Legal_Reg \| Legal_Imm, true);

	1664 _mov(DestLo, SrcLoRI);

	1665 _mov(DestHi, SrcHiRI);

	1666 } else {

	1667 _cmp(Condition, Zero);

	1668 SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm, true);

	1669 _mov(Dest, SrcT);

	1670 _br(InstX8632Br::Br_ne, Label);

	1671 Context.insert(InstFakeUse::create(Func, Dest));

	1672 SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm, true);

	1673 _mov(Dest, SrcF);

	1674 }

	1675

	1676 Context.insert(Label);

	1677 }

	1678

	1679 void TargetX8632::lowerStore(const InstStore *Inst) {

	1680 Operand *Value = Inst->getData();

	1681 Operand *Addr = Inst->getAddr();

	1682 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);

	1683 // Address mode optimization already creates an OperandX8632Mem

	1684 // operand, so it doesn't need another level of transformation.

	1685 if (!NewAddr) {

	1686 // The address will be either a constant (which represents a global

	1687 // variable) or a variable, so either the Base or Offset component

	1688 // of the OperandX8632Mem will be set.

	1689 Variable *Base = llvm::dyn_cast<Variable>(Addr);

	1690 Constant *Offset = llvm::dyn_cast<Constant>(Addr);

	1691 assert(Base \|\| Offset);

	1692 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);

	1693 }

	1694 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));

	1695

	1696 if (NewAddr->getType() == IceType_i64) {

	1697 Value = legalize(Value);

	1698 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm, true);

	1699 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm, true);

	1700 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));

	1701 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));

	1702 } else {

	1703 Value = legalize(Value, Legal_Reg \| Legal_Imm, true);

	1704 _store(Value, NewAddr);

	1705 }

	1706 }

	1707

	1708 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

	1709 // This implements the most naive possible lowering.

	1710 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

	1711 Operand *Src0 = Inst->getComparison();

	1712 SizeT NumCases = Inst->getNumCases();

	1713 // OK, we'll be slightly less naive by forcing Src into a physical

	1714 // register if there are 2 or more uses.

	1715 if (NumCases >= 2)

	1716 Src0 = legalizeToVar(Src0, true);

	1717 else

	1718 Src0 = legalize(Src0, Legal_All, true);

	1719 for (SizeT I = 0; I < NumCases; ++I) {

	1720 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));

	1721 _cmp(Src0, Value);

	1722 _br(InstX8632Br::Br_e, Inst->getLabel(I));

	1723 }

	1724

	1725 _br(Inst->getLabelDefault());

	1726 }

	1727

	1728 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) {

	1729 const SizeT MaxSrcs = 0;

	1730 Variable *Dest = NULL;

	1731 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);

	1732 lowerCall(Call);

	1733 }

	1734

	1735 Operand TargetX8632::legalize(Operand From, LegalMask Allowed,

	1736 bool AllowOverlap, int32_t RegNum) {

	1737 assert(Allowed & Legal_Reg);

	1738 assert(RegNum == Variable::NoRegister \|\| Allowed == Legal_Reg);

	1739 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {

	1740 Variable *Base = Mem->getBase();

	1741 Variable *Index = Mem->getIndex();

	1742 Variable *RegBase = Base;

	1743 Variable *RegIndex = Index;

	1744 if (Base) {

	1745 RegBase = legalizeToVar(Base, true);

	1746 }

	1747 if (Index) {

	1748 RegIndex = legalizeToVar(Index, true);

	1749 }

	1750 if (Base != RegBase \|\| Index != RegIndex) {

	1751 From =

	1752 OperandX8632Mem::create(Func, Mem->getType(), RegBase,

	1753 Mem->getOffset(), RegIndex, Mem->getShift());

	1754 }

	1755

	1756 if (!(Allowed & Legal_Mem)) {

	1757 Variable *Reg = makeReg(From->getType(), RegNum);

	1758 _mov(Reg, From, RegNum);

	1759 From = Reg;

	1760 }

	1761 return From;

	1762 }

	1763 if (llvm::isa<Constant>(From)) {

	1764 if (!(Allowed & Legal_Imm)) {

	1765 Variable *Reg = makeReg(From->getType(), RegNum);

	1766 _mov(Reg, From);

	1767 From = Reg;

	1768 }

	1769 return From;

	1770 }

	1771 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {

	1772 // We need a new physical register for the operand if:

	1773 // Mem is not allowed and Var->getRegNum() is unknown, or

	1774 // RegNum is required and Var->getRegNum() doesn't match.

	1775 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) \|\|

	1776 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

	1777 Variable *Reg = makeReg(From->getType(), RegNum);

	1778 if (RegNum == Variable::NoRegister) {

	1779 Reg->setPreferredRegister(Var, AllowOverlap);

	1780 }

	1781 _mov(Reg, From);

	1782 From = Reg;

	1783 }

	1784 return From;

	1785 }

	1786 llvm_unreachable("Unhandled operand kind in legalize()");

	1787 return From;

	1788 }

	1789

	1790 Variable TargetX8632::legalizeToVar(Operand From, bool AllowOverlap,

	1791 int32_t RegNum) {

	1792 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));

	1793 }

	1794

	1795 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

	1796 Variable *Reg = Func->makeVariable(Type, Context.getNode());

	1797 if (RegNum == Variable::NoRegister)

	1798 Reg->setWeightInfinite();

	1799 else

	1800 Reg->setRegNum(RegNum);

	1801 return Reg;

	1802 }

	1803

	1804 void TargetX8632::postLower() {

	1805 if (Ctx->getOptLevel() != Opt_m1)

	1806 return;

	1807 // TODO: Avoid recomputing WhiteList every instruction.

	1808 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);

	1809 // Make one pass to black-list pre-colored registers. TODO: If

	1810 // there was some prior register allocation pass that made register

	1811 // assignments, those registers need to be black-listed here as

	1812 // well.

	1813 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;

	1814 ++I) {

	1815 const Inst Inst = I;

	1816 if (Inst->isDeleted())

	1817 continue;

	1818 if (llvm::isa<InstFakeKill>(Inst))

	1819 continue;

	1820 SizeT VarIndex = 0;

	1821 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {

	1822 Operand *Src = Inst->getSrc(SrcNum);

	1823 SizeT NumVars = Src->getNumVars();

	1824 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {

	1825 const Variable *Var = Src->getVar(J);

	1826 if (!Var->hasReg())

	1827 continue;

	1828 WhiteList[Var->getRegNum()] = false;

	1829 }

	1830 }

	1831 }

	1832 // The second pass colors infinite-weight variables.

	1833 llvm::SmallBitVector AvailableRegisters = WhiteList;

	1834 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;

	1835 ++I) {

	1836 const Inst Inst = I;

	1837 if (Inst->isDeleted())

	1838 continue;

	1839 SizeT VarIndex = 0;

	1840 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {

	1841 Operand *Src = Inst->getSrc(SrcNum);

	1842 SizeT NumVars = Src->getNumVars();

	1843 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {

	1844 Variable *Var = Src->getVar(J);

	1845 if (Var->hasReg())

	1846 continue;

	1847 if (!Var->getWeight().isInf())

	1848 continue;

	1849 llvm::SmallBitVector AvailableTypedRegisters =

	1850 AvailableRegisters & getRegisterSetForType(Var->getType());

	1851 if (!AvailableTypedRegisters.any()) {

	1852 // This is a hack in case we run out of physical registers

	1853 // due to an excessive number of "push" instructions from

	1854 // lowering a call.

	1855 AvailableRegisters = WhiteList;

	1856 AvailableTypedRegisters =

	1857 AvailableRegisters & getRegisterSetForType(Var->getType());

	1858 }

	1859 assert(AvailableTypedRegisters.any());

	1860 int32_t RegNum = AvailableTypedRegisters.find_first();

	1861 Var->setRegNum(RegNum);

	1862 AvailableRegisters[RegNum] = false;

	1863 }

	1864 }

	1865 }

	1866 }

	1867

	1868 } // end of namespace Ice

OLD	NEW

« src/IceTargetLoweringX8632.h ('K') | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | src/llvm2ice.cpp » ('J')