src/IceTargetLoweringX8632.cpp - Issue 265703002: Add Om1 lowering with no optimizations

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 265703002: Add Om1 lowering with no optimizations (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX8632.cpp

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..cca4c1fdeb0672d89e1cd7d6160ecf980278c38f

--- /dev/null

+++ b/src/IceTargetLoweringX8632.cpp

@@ -0,0 +1,1869 @@

+//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

+//

+// The Subzero Code Generator

+//

+// This file is distributed under the University of Illinois Open Source

+// License. See LICENSE.TXT for details.

+//

+//===----------------------------------------------------------------------===//

+//

+// This file implements the TargetLoweringX8632 class, which

+// consists almost entirely of the lowering sequence for each

+// high-level instruction. It also implements

+// TargetX8632Fast::postLower() which does the simplest possible

+// register allocation for the "fast" target.

+//

+//===----------------------------------------------------------------------===//

+#include "IceDefs.h"

+#include "IceCfg.h"

+#include "IceCfgNode.h"

+#include "IceInstX8632.h"

+#include "IceOperand.h"

+#include "IceTargetLoweringX8632.def"

+#include "IceTargetLoweringX8632.h"

+namespace Ice {

+namespace {

+// The following table summarizes the logic for lowering the fcmp instruction.

+// There is one table entry for each of the 16 conditions. A comment in

+// lowerFcmp() describes the lowering template. In the most general case, there

+// is a compare followed by two conditional branches, because some fcmp

+// conditions don't map to a single x86 conditional branch. However, in many

+// cases it is possible to swap the operands in the comparison and have a single

+// conditional branch. Since it's quite tedious to validate the table by hand,

+// good execution tests are helpful.

+const struct _TableFcmp {

+ uint32_t Default;

+ bool SwapOperands;

+ InstX8632Br::BrCond C1, C2;

+} TableFcmp[] = {

+#define X(val, dflt, swap, C1, C2) \

+ { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \

+ ,

+ FCMPX8632_TABLE

+#undef X

+ };

+const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

+// The following table summarizes the logic for lowering the icmp instruction

+// for i32 and narrower types. Each icmp condition has a clear mapping to an

+// x86 conditional branch instruction.

+const struct _TableIcmp32 {

+ InstX8632Br::BrCond Mapping;

+} TableIcmp32[] = {

+#define X(val, C_32, C1_64, C2_64, C3_64) \

+ { InstX8632Br::C_32 } \

+ ,

+ ICMPX8632_TABLE

+#undef X

+ };

+const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);

+// The following table summarizes the logic for lowering the icmp instruction

+// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and

+// conditional branches are needed. For the other conditions, three separate

+// conditional branches are needed.

+const struct _TableIcmp64 {

+ InstX8632Br::BrCond C1, C2, C3;

+} TableIcmp64[] = {

+#define X(val, C_32, C1_64, C2_64, C3_64) \

+ { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \

+ ,

+ ICMPX8632_TABLE

+#undef X

+ };

+const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);

+InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

+ size_t Index = static_cast<size_t>(Cond);

+ assert(Index < TableIcmp32Size);

+ return TableIcmp32[Index].Mapping;

+// In some cases, there are x-macros tables for both high-level and

+// low-level instructions/operands that use the same enum key value.

+// The tables are kept separate to maintain a proper separation

+// between abstraction layers. There is a risk that the tables

+// could get out of sync if enum values are reordered or if entries

+// are added or deleted. This dummy function uses static_assert to

+// ensure everything is kept in sync.

+void xMacroIntegrityCheck() {

+ // Validate the enum values in FCMPX8632_TABLE.

+ {

+ // Define a temporary set of enum values based on low-level

+ // table entries.

+ enum _tmp_enum {

+#define X(val, dflt, swap, C1, C2) _tmp_##val,

+ FCMPX8632_TABLE

+#undef X

+ };

+// Define a set of constants based on high-level table entries.

+#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

+ ICEINSTFCMP_TABLE;

+#undef X

+// Define a set of constants based on low-level table entries,

+// and ensure the table entry keys are consistent.

+#define X(val, dflt, swap, C1, C2) \

+ static const int _table2_##val = _tmp_##val; \

+ STATIC_ASSERT(_table1_##val == _table2_##val);

+ FCMPX8632_TABLE;

+#undef X

+// Repeat the static asserts with respect to the high-level

+// table entries in case the high-level table has extra entries.

+#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

+ ICEINSTFCMP_TABLE;

+#undef X

+ }

+ // Validate the enum values in ICMPX8632_TABLE.

+ {

+ // Define a temporary set of enum values based on low-level

+ // table entries.

+ enum _tmp_enum {

+#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

+ ICMPX8632_TABLE

+#undef X

+ };

+// Define a set of constants based on high-level table entries.

+#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

+ ICEINSTICMP_TABLE;

+#undef X

+// Define a set of constants based on low-level table entries,

+// and ensure the table entry keys are consistent.

+#define X(val, C_32, C1_64, C2_64, C3_64) \

+ static const int _table2_##val = _tmp_##val; \

+ STATIC_ASSERT(_table1_##val == _table2_##val);

+ ICMPX8632_TABLE;

+#undef X

+// Repeat the static asserts with respect to the high-level

+// table entries in case the high-level table has extra entries.

+#define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

+ ICEINSTICMP_TABLE;

+#undef X

+ }

+ // Validate the enum values in ICETYPEX8632_TABLE.

+ {

+ // Define a temporary set of enum values based on low-level

+ // table entries.

+ enum _tmp_enum {

+#define X(tag, cvt, sdss, width) _tmp_##tag,

+ ICETYPEX8632_TABLE

+#undef X

+ };

+// Define a set of constants based on high-level table entries.

+#define X(tag, size, align, str) static const int _table1_##tag = tag;

+ ICETYPE_TABLE;

+#undef X

+// Define a set of constants based on low-level table entries,

+// and ensure the table entry keys are consistent.

+#define X(tag, cvt, sdss, width) \

+ static const int _table2_##tag = _tmp_##tag; \

+ STATIC_ASSERT(_table1_##tag == _table2_##tag);

+ ICETYPEX8632_TABLE;

+#undef X

+// Repeat the static asserts with respect to the high-level

+// table entries in case the high-level table has extra entries.

+#define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);

+ ICETYPE_TABLE;

+#undef X

+ }

+} // end of anonymous namespace

+TargetX8632::TargetX8632(Cfg *Func)

+ : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),

+ LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),

+ PhysicalRegisters(VarList(Reg_NUM)) {

+ llvm::SmallBitVector IntegerRegisters(Reg_NUM);

+ llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);

+ llvm::SmallBitVector FloatRegisters(Reg_NUM);

+ llvm::SmallBitVector InvalidRegisters(Reg_NUM);

+ ScratchRegs.resize(Reg_NUM);

+#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

+ frameptr, isI8, isInt, isFP) \

+ IntegerRegisters[val] = isInt; \

+ IntegerRegistersI8[val] = isI8; \

+ FloatRegisters[val] = isFP; \

+ ScratchRegs[val] = scratch;

+ REGX8632_TABLE;

+#undef X

+ TypeToRegisterSet[IceType_void] = InvalidRegisters;

+ TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;

+ TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;

+ TypeToRegisterSet[IceType_i16] = IntegerRegisters;

+ TypeToRegisterSet[IceType_i32] = IntegerRegisters;

+ TypeToRegisterSet[IceType_i64] = IntegerRegisters;

+ TypeToRegisterSet[IceType_f32] = FloatRegisters;

+ TypeToRegisterSet[IceType_f64] = FloatRegisters;

+void TargetX8632::translateOm1() {

+ GlobalContext *Context = Func->getContext();

+ Ostream &Str = Context->getStrDump();

+ Timer T_placePhiLoads;

+ Func->placePhiLoads();

+ if (Func->hasError())

+ return;

+ T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");

+ Timer T_placePhiStores;

+ Func->placePhiStores();

+ if (Func->hasError())

+ return;

+ T_placePhiStores.printElapsedUs(Context, "placePhiStores()");

+ Timer T_deletePhis;

+ Func->deletePhis();

+ if (Func->hasError())

+ return;

+ T_deletePhis.printElapsedUs(Context, "deletePhis()");

+ if (Context->isVerbose())

+ Str << "================ After Phi lowering ================\n";

+ Func->dump();

+ Timer T_genCode;

+ Func->genCode();

+ if (Func->hasError())

+ return;

+ T_genCode.printElapsedUs(Context, "genCode()");

+ if (Context->isVerbose())

+ Str << "================ After initial x8632 codegen ================\n";

+ Func->dump();

+ Timer T_genFrame;

+ Func->genFrame();

+ if (Func->hasError())

+ return;

+ T_genFrame.printElapsedUs(Context, "genFrame()");

+ if (Context->isVerbose())

+ Str << "================ After stack frame mapping ================\n";

+ Func->dump();

+IceString TargetX8632::RegNames[] = {

+#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

+ frameptr, isI8, isInt, isFP) \

+ name,

+ REGX8632_TABLE

+#undef X

+};

+Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {

+ assert(RegNum < PhysicalRegisters.size());

+ Variable *Reg = PhysicalRegisters[RegNum];

+ if (Reg == NULL) {

+ CfgNode *Node = NULL; // NULL means multi-block lifetime

+ Reg = Func->makeVariable(IceType_i32, Node);

+ Reg->setRegNum(RegNum);

+ PhysicalRegisters[RegNum] = Reg;

+ }

+ return Reg;

+IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {

+ assert(RegNum < Reg_NUM);

+ static IceString RegNames8[] = {

+#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

+ frameptr, isI8, isInt, isFP) \

+ "" name8,

+ REGX8632_TABLE

+#undef X

+ };

+ static IceString RegNames16[] = {

+#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

+ frameptr, isI8, isInt, isFP) \

+ "" name16,

+ REGX8632_TABLE

+#undef X

+ };

+ switch (Ty) {

+ case IceType_i1:

+ case IceType_i8:

+ return RegNames8[RegNum];

+ case IceType_i16:

+ return RegNames16[RegNum];

+ default:

+ return RegNames[RegNum];

+ }

+void TargetX8632::emitVariable(const Variable *Var,

+ const Cfg *Func) const {

+ Ostream &Str = Ctx->getStrEmit();

+ assert(Var->getLocalUseNode() == NULL || Var->getLocalUseNode() == Func->getCurrentNode());

+ if (Var->hasReg()) {

+ Str << getRegName(Var->getRegNum(), Var->getType());

+ return;

+ }

+ Str << InstX8632::getWidthString(Var->getType());

+ Str << " [" << getRegName(

+ getFrameOrStackReg(), IceType_i32);

+ int32_t Offset = Var->getStackOffset() + getStackAdjustment();

+ if (Offset) {

+ if (Offset > 0)

+ Str << "+";

+ Str << Offset;

+ }

+ Str << "]";

+// Helper function for addProlog(). Sets the frame offset for Arg,

+// updates InArgsSizeBytes according to Arg's width, and generates an

+// instruction to copy Arg into its assigned register if applicable.

+// For an I64 arg that has been split into Lo and Hi components, it

+// calls itself recursively on the components, taking care to handle

+// Lo first because of the little-endian architecture.

+void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,

+ int32_t BasicFrameOffset,

+ int32_t &InArgsSizeBytes) {

+ Variable *Lo = Arg->getLo();

+ Variable *Hi = Arg->getHi();

+ Type Ty = Arg->getType();

+ if (Lo && Hi && Ty == IceType_i64) {

+ assert(Lo->getType() != IceType_i64); // don't want infinite recursion

+ assert(Hi->getType() != IceType_i64); // don't want infinite recursion

+ setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

+ setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

+ return;

+ }

+ Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

+ if (Arg->hasReg()) {

+ assert(Ty != IceType_i64);

+ OperandX8632Mem *Mem = OperandX8632Mem::create(

+ Func, Ty, FramePtr,

+ Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));

+ _mov(Arg, Mem);

+ }

+ InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

+void TargetX8632::addProlog(CfgNode *Node) {

+ // If SimpleCoalescing is false, each variable without a register

+ // gets its own unique stack slot, which leads to large stack

+ // frames. If SimpleCoalescing is true, then each "global" variable

+ // without a register gets its own slot, but "local" variable slots

+ // are reused across basic blocks. E.g., if A and B are local to

+ // block 1 and C is local to block 2, then C may share a slot with A

+ // or B.

+ const bool SimpleCoalescing = true;

+ int32_t InArgsSizeBytes = 0;

+ int32_t RetIpSizeBytes = 4;

+ int32_t PreservedRegsSizeBytes = 0;

+ LocalsSizeBytes = 0;

+ Context.init(Node);

+ Context.setInsertPoint(Context.getCur());

+ // Determine stack frame offsets for each Variable without a

+ // register assignment. This can be done as one variable per stack

+ // slot. Or, do coalescing by running the register allocator again

+ // with an infinite set of registers (as a side effect, this gives

+ // variables a second chance at physical register assignment).

+ //

+ // A middle ground approach is to leverage sparsity and allocate one

+ // block of space on the frame for globals (variables with

+ // multi-block lifetime), and one block to share for locals

+ // (single-block lifetime).

+ llvm::SmallBitVector CalleeSaves =

+ getRegisterSet(RegSet_CalleeSave, RegSet_None);

+ int32_t GlobalsSize = 0;

+ std::vector<int> LocalsSize(Func->getNumNodes());

+ // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and

+ // LocalsSizeBytes.

+ RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

+ const VarList &Variables = Func->getVariables();

+ const VarList &Args = Func->getArgs();

+ for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

+ I != E; ++I) {

+ Variable *Var = *I;

+ if (Var->hasReg()) {

+ RegsUsed[Var->getRegNum()] = true;

+ continue;

+ }

+ // An argument passed on the stack already has a stack slot.

+ if (Var->getIsArg())

+ continue;

+ // A spill slot linked to a variable with a stack slot should reuse

+ // that stack slot.

+ if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

+ if (Variable *Linked = Var->getPreferredRegister()) {

+ if (!Linked->hasReg())

+ continue;

+ }

+ int32_t Increment = typeWidthInBytesOnStack(Var->getType());

+ if (SimpleCoalescing) {

+ if (Var->isMultiblockLife()) {

+ GlobalsSize += Increment;

+ } else {

+ SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

+ LocalsSize[NodeIndex] += Increment;

+ if (LocalsSize[NodeIndex] > LocalsSizeBytes)

+ LocalsSizeBytes = LocalsSize[NodeIndex];

+ }

+ } else {

+ LocalsSizeBytes += Increment;

+ }

+ LocalsSizeBytes += GlobalsSize;

+ // Add push instructions for preserved registers.

+ for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

+ if (CalleeSaves[i] && RegsUsed[i]) {

+ PreservedRegsSizeBytes += 4;

+ const bool SuppressStackAdjustment = true;

+ _push(getPhysicalRegister(i), SuppressStackAdjustment);

+ }

+ // Generate "push ebp; mov ebp, esp"

+ if (IsEbpBasedFrame) {

+ assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

+ .count() == 0);

+ PreservedRegsSizeBytes += 4;

+ Variable *ebp = getPhysicalRegister(Reg_ebp);

+ Variable *esp = getPhysicalRegister(Reg_esp);

+ const bool SuppressStackAdjustment = true;

+ _push(ebp, SuppressStackAdjustment);

+ _mov(ebp, esp);

+ }

+ // Generate "sub esp, LocalsSizeBytes"

+ if (LocalsSizeBytes)

+ _sub(getPhysicalRegister(Reg_esp),

+ Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

+ resetStackAdjustment();

+ // Fill in stack offsets for args, and copy args into registers for

+ // those that were register-allocated. Args are pushed right to

+ // left, so Arg[0] is closest to the stack/frame pointer.

+ //

+ // TODO: Make this right for different width args, calling

+ // conventions, etc. For one thing, args passed in registers will

+ // need to be copied/shuffled to their home registers (the

+ // RegManager code may have some permutation logic to leverage),

+ // and if they have no home register, home space will need to be

+ // allocated on the stack to copy into.

+ Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

+ int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;

+ if (!IsEbpBasedFrame)

+ BasicFrameOffset += LocalsSizeBytes;

+ for (SizeT i = 0; i < Args.size(); ++i) {

+ Variable *Arg = Args[i];

+ setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

+ }

+ // Fill in stack offsets for locals.

+ int32_t TotalGlobalsSize = GlobalsSize;

+ GlobalsSize = 0;

+ LocalsSize.assign(LocalsSize.size(), 0);

+ int32_t NextStackOffset = 0;

+ for (VarList::const_iterator I = Variables.begin(), E = Variables.end();

+ I != E; ++I) {

+ Variable *Var = *I;

+ if (Var->hasReg()) {

+ RegsUsed[Var->getRegNum()] = true;

+ continue;

+ }

+ if (Var->getIsArg())

+ continue;

+ if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {

+ if (Variable *Linked = Var->getPreferredRegister()) {

+ if (!Linked->hasReg()) {

+ // TODO: Make sure Linked has already been assigned a stack

+ // slot.

+ Var->setStackOffset(Linked->getStackOffset());

+ continue;

+ }

+ int32_t Increment = typeWidthInBytesOnStack(Var->getType());

+ if (SimpleCoalescing) {

+ if (Var->isMultiblockLife()) {

+ GlobalsSize += Increment;

+ NextStackOffset = GlobalsSize;

+ } else {

+ SizeT NodeIndex = Var->getLocalUseNode()->getIndex();

+ LocalsSize[NodeIndex] += Increment;

+ NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];

+ }

+ } else {

+ NextStackOffset += Increment;

+ }

+ if (IsEbpBasedFrame)

+ Var->setStackOffset(-NextStackOffset);

+ else

+ Var->setStackOffset(LocalsSizeBytes - NextStackOffset);

+ }

+ this->FrameSizeLocals = NextStackOffset;

+ this->HasComputedFrame = true;

+ if (Func->getContext()->isVerbose(IceV_Frame)) {

+ Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes

+ << "\n"

+ << "InArgsSizeBytes=" << InArgsSizeBytes

+ << "\n"

+ << "PreservedRegsSizeBytes="

+ << PreservedRegsSizeBytes << "\n";

+ }

+void TargetX8632::addEpilog(CfgNode *Node) {

+ InstList &Insts = Node->getInsts();

+ InstList::reverse_iterator RI, E;

+ for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

+ if (llvm::isa<InstX8632Ret>(*RI))

+ break;

+ }

+ if (RI == E)

+ return;

+ // Convert the reverse_iterator position into its corresponding

+ // (forward) iterator position.

+ InstList::iterator InsertPoint = RI.base();

+ --InsertPoint;

+ Context.init(Node);

+ Context.setInsertPoint(InsertPoint);

+ Variable *esp = getPhysicalRegister(Reg_esp);

+ if (IsEbpBasedFrame) {

+ Variable *ebp = getPhysicalRegister(Reg_ebp);

+ _mov(esp, ebp);

+ _pop(ebp);

+ } else {

+ // add esp, LocalsSizeBytes

+ if (LocalsSizeBytes)

+ _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));

+ }

+ // Add pop instructions for preserved registers.

+ llvm::SmallBitVector CalleeSaves =

+ getRegisterSet(RegSet_CalleeSave, RegSet_None);

+ for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

+ SizeT j = CalleeSaves.size() - i - 1;

+ if (j == Reg_ebp && IsEbpBasedFrame)

+ continue;

+ if (CalleeSaves[j] && RegsUsed[j]) {

+ _pop(getPhysicalRegister(j));

+ }

+void TargetX8632::split64(Variable *Var) {

+ switch (Var->getType()) {

+ default:

+ return;

+ case IceType_i64:

+ // TODO: Only consider F64 if we need to push each half when

+ // passing as an argument to a function call. Note that each half

+ // is still typed as I32.

+ case IceType_f64:

+ break;

+ }

+ Variable *Lo = Var->getLo();

+ Variable *Hi = Var->getHi();

+ if (Lo) {

+ assert(Hi);

+ return;

+ }

+ assert(Hi == NULL);

+ Lo = Func->makeVariable(IceType_i32, Context.getNode(),

+ Var->getName() + "__lo");

+ Hi = Func->makeVariable(IceType_i32, Context.getNode(),

+ Var->getName() + "__hi");

+ Var->setLoHi(Lo, Hi);

+ if (Var->getIsArg()) {

+ Lo->setIsArg(Func);

+ Hi->setIsArg(Func);

+ }

+Operand *TargetX8632::loOperand(Operand *Operand) {

+ assert(Operand->getType() == IceType_i64);

+ if (Operand->getType() != IceType_i64)

+ return Operand;

+ if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

+ split64(Var);

+ return Var->getLo();

+ }

+ if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {

+ uint64_t Mask = (1ull << 32) - 1;

+ return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);

+ }

+ if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

+ return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),

+ Mem->getOffset(), Mem->getIndex(),

+ Mem->getShift());

+ }

+ llvm_unreachable("Unsupported operand type");

+ return NULL;

+Operand *TargetX8632::hiOperand(Operand *Operand) {

+ assert(Operand->getType() == IceType_i64);

+ if (Operand->getType() != IceType_i64)

+ return Operand;

+ if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

+ split64(Var);

+ return Var->getHi();

+ }

+ if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {

+ return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);

+ }

+ if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

+ Constant *Offset = Mem->getOffset();

+ if (Offset == NULL)

+ Offset = Ctx->getConstantInt(IceType_i32, 4);

+ else if (ConstantInteger *IntOffset =

+ llvm::dyn_cast<ConstantInteger>(Offset)) {

+ Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());

+ } else if (ConstantRelocatable *SymOffset =

+ llvm::dyn_cast<ConstantRelocatable>(Offset)) {

+ Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),

+ SymOffset->getName());

+ }

+ return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,

+ Mem->getIndex(), Mem->getShift());

+ }

+ llvm_unreachable("Unsupported operand type");

+ return NULL;

+llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,

+ RegSetMask Exclude) const {

+ llvm::SmallBitVector Registers(Reg_NUM);

+#define X(val, init, name, name16, name8, scratch, preserved, stackptr, \

+ frameptr, isI8, isInt, isFP) \

+ if (scratch && (Include & RegSet_CallerSave)) \

+ Registers[val] = true; \

+ if (preserved && (Include & RegSet_CalleeSave)) \

+ Registers[val] = true; \

+ if (stackptr && (Include & RegSet_StackPointer)) \

+ Registers[val] = true; \

+ if (frameptr && (Include & RegSet_FramePointer)) \

+ Registers[val] = true; \

+ if (scratch && (Exclude & RegSet_CallerSave)) \

+ Registers[val] = false; \

+ if (preserved && (Exclude & RegSet_CalleeSave)) \

+ Registers[val] = false; \

+ if (stackptr && (Exclude & RegSet_StackPointer)) \

+ Registers[val] = false; \

+ if (frameptr && (Exclude & RegSet_FramePointer)) \

+ Registers[val] = false;

+ REGX8632_TABLE

+#undef X

+ return Registers;

+void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

+ IsEbpBasedFrame = true;

+ // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize

+ // the number of adjustments of esp, etc.

+ Variable *esp = getPhysicalRegister(Reg_esp);

+ Operand *TotalSize = legalize(Inst->getSizeInBytes());

+ Variable *Dest = Inst->getDest();

+ _sub(esp, TotalSize);

+ _mov(Dest, esp);

+void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

+ Variable *Dest = Inst->getDest();

+ Operand *Src0 = legalize(Inst->getSrc(0));

+ Operand *Src1 = legalize(Inst->getSrc(1));

+ if (Dest->getType() == IceType_i64) {

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Operand *Src0Lo = loOperand(Src0);

+ Operand *Src0Hi = hiOperand(Src0);

+ Operand *Src1Lo = loOperand(Src1);

+ Operand *Src1Hi = hiOperand(Src1);

+ Variable *T_Lo = NULL, *T_Hi = NULL;

+ switch (Inst->getOp()) {

+ case InstArithmetic::Add:

+ _mov(T_Lo, Src0Lo);

+ _add(T_Lo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _mov(T_Hi, Src0Hi);

+ _adc(T_Hi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ break;

+ case InstArithmetic::And:

+ _mov(T_Lo, Src0Lo);

+ _and(T_Lo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _mov(T_Hi, Src0Hi);

+ _and(T_Hi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ break;

+ case InstArithmetic::Or:

+ _mov(T_Lo, Src0Lo);

+ _or(T_Lo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _mov(T_Hi, Src0Hi);

+ _or(T_Hi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ break;

+ case InstArithmetic::Xor:

+ _mov(T_Lo, Src0Lo);

+ _xor(T_Lo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _mov(T_Hi, Src0Hi);

+ _xor(T_Hi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ break;

+ case InstArithmetic::Sub:

+ _mov(T_Lo, Src0Lo);

+ _sub(T_Lo, Src1Lo);

+ _mov(DestLo, T_Lo);

+ _mov(T_Hi, Src0Hi);

+ _sbb(T_Hi, Src1Hi);

+ _mov(DestHi, T_Hi);

+ break;

+ case InstArithmetic::Mul: {

+ Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;

+ Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);

+ Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);

+ // gcc does the following:

+ // a=b*c ==>

+ // t1 = b.hi; t1 *=(imul) c.lo

+ // t2 = c.hi; t2 *=(imul) b.lo

+ // t3:eax = b.lo

+ // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo

+ // a.lo = t4.lo

+ // t4.hi += t1

+ // t4.hi += t2

+ // a.hi = t4.hi

+ _mov(T_1, Src0Hi);

+ _imul(T_1, Src1Lo);

+ _mov(T_2, Src1Hi);

+ _imul(T_2, Src0Lo);

+ _mov(T_3, Src0Lo, Reg_eax);

+ _mul(T_4Lo, T_3, Src1Lo);

+ // The mul instruction produces two dest variables, edx:eax. We

+ // create a fake definition of edx to account for this.

+ Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));

+ _mov(DestLo, T_4Lo);

+ _add(T_4Hi, T_1);

+ _add(T_4Hi, T_2);

+ _mov(DestHi, T_4Hi);

+ } break;

+ case InstArithmetic::Shl: {

+ // TODO: Refactor the similarities between Shl, Lshr, and Ashr.

+ // gcc does the following:

+ // a=b<<c ==>

+ // t1:ecx = c.lo & 0xff

+ // t2 = b.lo

+ // t3 = b.hi

+ // t3 = shld t3, t2, t1

+ // t2 = shl t2, t1

+ // test t1, 0x20

+ // je L1

+ // use(t3)

+ // t3 = t2

+ // t2 = 0

+ // L1:

+ // a.lo = t2

+ // a.hi = t3

+ Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;

+ Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _mov(T_1, Src1Lo, Reg_ecx);

+ _mov(T_2, Src0Lo);

+ _mov(T_3, Src0Hi);

+ _shld(T_3, T_2, T_1);

+ _shl(T_2, T_1);

+ _test(T_1, BitTest);

+ _br(InstX8632Br::Br_e, Label);

+ // Because of the intra-block control flow, we need to fake a use

+ // of T_3 to prevent its earlier definition from being dead-code

+ // eliminated in the presence of its later definition.

+ Context.insert(InstFakeUse::create(Func, T_3));

+ _mov(T_3, T_2);

+ _mov(T_2, Zero);

+ Context.insert(Label);

+ _mov(DestLo, T_2);

+ _mov(DestHi, T_3);

+ } break;

+ case InstArithmetic::Lshr: {

+ // a=b>>c (unsigned) ==>

+ // t1:ecx = c.lo & 0xff

+ // t2 = b.lo

+ // t3 = b.hi

+ // t2 = shrd t2, t3, t1

+ // t3 = shr t3, t1

+ // test t1, 0x20

+ // je L1

+ // use(t2)

+ // t2 = t3

+ // t3 = 0

+ // L1:

+ // a.lo = t2

+ // a.hi = t3

+ Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;

+ Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _mov(T_1, Src1Lo, Reg_ecx);

+ _mov(T_2, Src0Lo);

+ _mov(T_3, Src0Hi);

+ _shrd(T_2, T_3, T_1);

+ _shr(T_3, T_1);

+ _test(T_1, BitTest);

+ _br(InstX8632Br::Br_e, Label);

+ // Because of the intra-block control flow, we need to fake a use

+ // of T_3 to prevent its earlier definition from being dead-code

+ // eliminated in the presence of its later definition.

+ Context.insert(InstFakeUse::create(Func, T_2));

+ _mov(T_2, T_3);

+ _mov(T_3, Zero);

+ Context.insert(Label);

+ _mov(DestLo, T_2);

+ _mov(DestHi, T_3);

+ } break;

+ case InstArithmetic::Ashr: {

+ // a=b>>c (signed) ==>

+ // t1:ecx = c.lo & 0xff

+ // t2 = b.lo

+ // t3 = b.hi

+ // t2 = shrd t2, t3, t1

+ // t3 = sar t3, t1

+ // test t1, 0x20

+ // je L1

+ // use(t2)

+ // t2 = t3

+ // t3 = sar t3, 0x1f

+ // L1:

+ // a.lo = t2

+ // a.hi = t3

+ Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;

+ Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);

+ Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _mov(T_1, Src1Lo, Reg_ecx);

+ _mov(T_2, Src0Lo);

+ _mov(T_3, Src0Hi);

+ _shrd(T_2, T_3, T_1);

+ _sar(T_3, T_1);

+ _test(T_1, BitTest);

+ _br(InstX8632Br::Br_e, Label);

+ // Because of the intra-block control flow, we need to fake a use

+ // of T_3 to prevent its earlier definition from being dead-code

+ // eliminated in the presence of its later definition.

+ Context.insert(InstFakeUse::create(Func, T_2));

+ _mov(T_2, T_3);

+ _sar(T_3, SignExtend);

+ Context.insert(Label);

+ _mov(DestLo, T_2);

+ _mov(DestHi, T_3);

+ } break;

+ case InstArithmetic::Udiv: {

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ Call->addArg(Inst->getSrc(1));

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Sdiv: {

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ Call->addArg(Inst->getSrc(1));

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Urem: {

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ Call->addArg(Inst->getSrc(1));

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Srem: {

+ const SizeT MaxSrcs = 2;

+ InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ Call->addArg(Inst->getSrc(1));

+ lowerCall(Call);

+ } break;

+ case InstArithmetic::Fadd:

+ case InstArithmetic::Fsub:

+ case InstArithmetic::Fmul:

+ case InstArithmetic::Fdiv:

+ case InstArithmetic::Frem:

+ llvm_unreachable("FP instruction with i64 type");

+ break;

+ }

+ } else { // Dest->getType() != IceType_i64

+ Variable *T_edx = NULL;

+ Variable *T = NULL;

+ switch (Inst->getOp()) {

+ case InstArithmetic::Add:

+ _mov(T, Src0);

+ _add(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::And:

+ _mov(T, Src0);

+ _and(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Or:

+ _mov(T, Src0);

+ _or(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Xor:

+ _mov(T, Src0);

+ _xor(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Sub:

+ _mov(T, Src0);

+ _sub(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Mul:

+ // TODO: Optimize for llvm::isa<Constant>(Src1)

+ // TODO: Strength-reduce multiplications by a constant,

+ // particularly -1 and powers of 2. Advanced: use lea to

+ // multiply by 3, 5, 9.

+ //

+ // The 8-bit version of imul only allows the form "imul r/m8"

+ // where T must be in eax.

+ if (Dest->getType() == IceType_i8)

+ _mov(T, Src0, Reg_eax);

+ else

+ _mov(T, Src0);

+ _imul(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Shl:

+ _mov(T, Src0);

+ if (!llvm::isa<Constant>(Src1))

+ Src1 = legalizeToVar(Src1, false, Reg_ecx);

+ _shl(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Lshr:

+ _mov(T, Src0);

+ if (!llvm::isa<Constant>(Src1))

+ Src1 = legalizeToVar(Src1, false, Reg_ecx);

+ _shr(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Ashr:

+ _mov(T, Src0);

+ if (!llvm::isa<Constant>(Src1))

+ Src1 = legalizeToVar(Src1, false, Reg_ecx);

+ _sar(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Udiv:

+ if (Dest->getType() == IceType_i8) {

+ Variable *T_ah = NULL;

+ Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);

+ _mov(T, Src0, Reg_eax);

+ _mov(T_ah, Zero, Reg_ah);

+ _div(T_ah, Src1, T);

+ Context.insert(InstFakeUse::create(Func, T_ah));

+ _mov(Dest, T);

+ } else {

+ // TODO: fix for 8-bit, see Urem

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ _mov(T, Src0, Reg_eax);

+ _mov(T_edx, Zero, Reg_edx);

+ _div(T, Src1, T_edx);

+ _mov(Dest, T);

+ }

+ break;

+ case InstArithmetic::Sdiv:

+ T_edx = makeReg(IceType_i32, Reg_edx);

+ _mov(T, Src0, Reg_eax);

+ _cdq(T_edx, T);

+ _idiv(T, Src1, T_edx);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Urem:

+ if (Dest->getType() == IceType_i8) {

+ Variable *T_ah = NULL;

+ Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);

+ _mov(T, Src0, Reg_eax);

+ _mov(T_ah, Zero, Reg_ah);

+ _div(T_ah, Src1, T);

+ _mov(Dest, T_ah);

+ } else {

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ _mov(T_edx, Zero, Reg_edx);

+ _mov(T, Src0, Reg_eax);

+ _div(T_edx, Src1, T);

+ _mov(Dest, T_edx);

+ }

+ break;

+ case InstArithmetic::Srem:

+ T_edx = makeReg(IceType_i32, Reg_edx);

+ _mov(T, Src0, Reg_eax);

+ _cdq(T_edx, T);

+ _idiv(T_edx, Src1, T);

+ _mov(Dest, T_edx);

+ break;

+ case InstArithmetic::Fadd:

+ _mov(T, Src0);

+ _addss(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Fsub:

+ _mov(T, Src0);

+ _subss(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Fmul:

+ _mov(T, Src0);

+ _mulss(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Fdiv:

+ _mov(T, Src0);

+ _divss(T, Src1);

+ _mov(Dest, T);

+ break;

+ case InstArithmetic::Frem: {

+ const SizeT MaxSrcs = 2;

+ Type Ty = Dest->getType();

+ InstCall *Call =

+ makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);

+ Call->addArg(Src0);

+ Call->addArg(Src1);

+ return lowerCall(Call);

+ } break;

+ }

+void TargetX8632::lowerAssign(const InstAssign *Inst) {

+ Variable *Dest = Inst->getDest();

+ Operand *Src0 = legalize(Inst->getSrc(0));

+ assert(Dest->getType() == Src0->getType());

+ if (Dest->getType() == IceType_i64) {

+ Operand *Src0Lo = loOperand(Src0);

+ Operand *Src0Hi = hiOperand(Src0);

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Variable *T_Lo = NULL, *T_Hi = NULL;

+ _mov(T_Lo, Src0Lo);

+ _mov(DestLo, T_Lo);

+ _mov(T_Hi, Src0Hi);

+ _mov(DestHi, T_Hi);

+ } else {

+ const bool AllowOverlap = true;

+ // RI is either a physical register or an immediate.

+ Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);

+ _mov(Dest, RI);

+ }

+void TargetX8632::lowerBr(const InstBr *Inst) {

+ if (Inst->isUnconditional()) {

+ _br(Inst->getTargetUnconditional());

+ } else {

+ Operand *Src0 = legalize(Inst->getCondition());

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ _cmp(Src0, Zero);

+ _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

+ }

+void TargetX8632::lowerCall(const InstCall *Instr) {

+ // Generate a sequence of push instructions, pushing right to left,

+ // keeping track of stack offsets in case a push involves a stack

+ // operand and we are using an esp-based frame.

+ uint32_t StackOffset = 0;

+ // TODO: If for some reason the call instruction gets dead-code

+ // eliminated after lowering, we would need to ensure that the

+ // pre-call push instructions and the post-call esp adjustment get

+ // eliminated as well.

+ for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {

+ Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));

+ if (Arg->getType() == IceType_i64) {

+ _push(hiOperand(Arg));

+ _push(loOperand(Arg));

+ } else if (Arg->getType() == IceType_f64) {

+ // If the Arg turns out to be a memory operand, we need to push

+ // 8 bytes, which requires two push instructions. This ends up

+ // being somewhat clumsy in the current IR, so we use a

+ // workaround. Force the operand into a (xmm) register, and

+ // then push the register. An xmm register push is actually not

+ // possible in x86, but the Push instruction emitter handles

+ // this by decrementing the stack pointer and directly writing

+ // the xmm register value.

+ Variable *T = NULL;

+ _mov(T, Arg);

+ _push(T);

+ } else {

+ _push(Arg);

+ }

+ StackOffset += typeWidthInBytesOnStack(Arg->getType());

+ }

+ // Generate the call instruction. Assign its result to a temporary

+ // with high register allocation weight.

+ Variable *Dest = Instr->getDest();

+ Variable *eax = NULL; // doubles as RegLo as necessary

+ Variable *edx = NULL;

+ if (Dest) {

+ switch (Dest->getType()) {

+ case IceType_NUM:

+ llvm_unreachable("Invalid Call dest type");

+ break;

+ case IceType_void:

+ break;

+ case IceType_i1:

+ case IceType_i8:

+ case IceType_i16:

+ case IceType_i32:

+ eax = makeReg(Dest->getType(), Reg_eax);

+ break;

+ case IceType_i64:

+ eax = makeReg(IceType_i32, Reg_eax);

+ edx = makeReg(IceType_i32, Reg_edx);

+ break;

+ case IceType_f32:

+ case IceType_f64:

+ // Leave eax==edx==NULL, and capture the result with the fstp

+ // instruction.

+ break;

+ }

+ Operand *CallTarget = legalize(Instr->getCallTarget());

+ Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);

+ Context.insert(NewCall);

+ if (edx)

+ Context.insert(InstFakeDef::create(Func, edx));

+ // Add the appropriate offset to esp.

+ if (StackOffset) {

+ Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

+ _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));

+ }

+ // Insert a register-kill pseudo instruction.

+ VarList KilledRegs;

+ for (SizeT i = 0; i < ScratchRegs.size(); ++i) {

+ if (ScratchRegs[i])

+ KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));

+ }

+ if (!KilledRegs.empty()) {

+ Inst *Kill = InstFakeKill::create(Func, KilledRegs, NewCall);

+ Context.insert(Kill);

+ }

+ // Generate a FakeUse to keep the call live if necessary.

+ if (Instr->hasSideEffects() && eax) {

+ Inst *FakeUse = InstFakeUse::create(Func, eax);

+ Context.insert(FakeUse);

+ }

+ // Generate Dest=eax assignment.

+ if (Dest && eax) {

+ if (edx) {

+ split64(Dest);

+ Variable *DestLo = Dest->getLo();

+ Variable *DestHi = Dest->getHi();

+ DestLo->setPreferredRegister(eax, false);

+ DestHi->setPreferredRegister(edx, false);

+ _mov(DestLo, eax);

+ _mov(DestHi, edx);

+ } else {

+ Dest->setPreferredRegister(eax, false);

+ _mov(Dest, eax);

+ }

+ // Special treatment for an FP function which returns its result in

+ // st(0).

+ if (Dest &&

+ (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) {

+ _fstp(Dest);

+ // If Dest ends up being a physical xmm register, the fstp emit

+ // code will route st(0) through a temporary stack slot.

+ }

+void TargetX8632::lowerCast(const InstCast *Inst) {

+ // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

+ InstCast::OpKind CastKind = Inst->getCastKind();

+ Variable *Dest = Inst->getDest();

+ // Src0RM is the source operand legalized to physical register or memory, but

+ // not immediate, since the relevant x86 native instructions don't allow an

+ // immediate operand. If the operand is an immediate, we could consider

+ // computing the strength-reduced result at translation time, but we're

+ // unlikely to see something like that in the bitcode that the optimizer

+ // wouldn't have already taken care of.

+ Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem, true);

+ switch (CastKind) {

+ default:

+ Func->setError("Cast type not supported");

+ return;

+ case InstCast::Sext:

+ if (Dest->getType() == IceType_i64) {

+ // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Variable *T_Lo = makeReg(DestLo->getType());

+ if (Src0RM->getType() == IceType_i32)

+ _mov(T_Lo, Src0RM);

+ else

+ _movsx(T_Lo, Src0RM);

+ _mov(DestLo, T_Lo);

+ Variable *T_Hi = NULL;

+ Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);

+ _mov(T_Hi, T_Lo);

+ _sar(T_Hi, Shift);

+ _mov(DestHi, T_Hi);

+ } else {

+ // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and

+ // also copy to the high operand of a 64-bit variable.

+ // t1 = movsx src; dst = t1

+ Variable *T = makeReg(Dest->getType());

+ _movsx(T, Src0RM);

+ _mov(Dest, T);

+ }

+ break;

+ case InstCast::Zext:

+ if (Dest->getType() == IceType_i64) {

+ // t1=movzx src; dst.lo=t1; dst.hi=0

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Variable *Tmp = makeReg(DestLo->getType());

+ if (Src0RM->getType() == IceType_i32)

+ _mov(Tmp, Src0RM);

+ else

+ _movzx(Tmp, Src0RM);

+ _mov(DestLo, Tmp);

+ _mov(DestHi, Zero);

+ } else if (Src0RM->getType() == IceType_i1) {

+ // t = Src0RM; t &= 1; Dest = t

+ Operand *One = Ctx->getConstantInt(IceType_i32, 1);

+ Variable *T = makeReg(IceType_i32);

+ _movzx(T, Src0RM);

+ _and(T, One);

+ _mov(Dest, T);

+ } else {

+ // t1 = movzx src; dst = t1

+ Variable *T = makeReg(Dest->getType());

+ _movzx(T, Src0RM);

+ _mov(Dest, T);

+ }

+ break;

+ case InstCast::Trunc: {

+ if (Src0RM->getType() == IceType_i64)

+ Src0RM = loOperand(Src0RM);

+ // t1 = trunc Src0RM; Dest = t1

+ Variable *T = NULL;

+ _mov(T, Src0RM);

+ _mov(Dest, T);

+ break;

+ }

+ case InstCast::Fptrunc:

+ case InstCast::Fpext: {

+ // t1 = cvt Src0RM; Dest = t1

+ Variable *T = makeReg(Dest->getType());

+ _cvt(T, Src0RM);

+ _mov(Dest, T);

+ break;

+ }

+ case InstCast::Fptosi:

+ if (Dest->getType() == IceType_i64) {

+ // Use a helper for converting floating-point values to 64-bit

+ // integers. SSE2 appears to have no way to convert from xmm

+ // registers to something like the edx:eax register pair, and

+ // gcc and clang both want to use x87 instructions complete with

+ // temporary manipulation of the status word. This helper is

+ // not needed for x86-64.

+ split64(Dest);

+ const SizeT MaxSrcs = 1;

+ Type SrcType = Inst->getSrc(0)->getType();

+ InstCall *Call = makeHelperCall(

+ SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ lowerCall(Call);

+ } else {

+ // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

+ Variable *T_1 = makeReg(IceType_i32);

+ Variable *T_2 = makeReg(Dest->getType());

+ _cvt(T_1, Src0RM);

+ _mov(T_2, T_1); // T_1 and T_2 may have different integer types

+ _mov(Dest, T_2);

+ T_2->setPreferredRegister(T_1, true);

+ }

+ break;

+ case InstCast::Fptoui:

+ if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {

+ // Use a helper for both x86-32 and x86-64.

+ split64(Dest);

+ const SizeT MaxSrcs = 1;

+ Type DestType = Dest->getType();

+ Type SrcType = Src0RM->getType();

+ IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");

+ IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");

+ // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64

+ IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;

+ InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ lowerCall(Call);

+ return;

+ } else {

+ // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

+ Variable *T_1 = makeReg(IceType_i32);

+ Variable *T_2 = makeReg(Dest->getType());

+ _cvt(T_1, Src0RM);

+ _mov(T_2, T_1); // T_1 and T_2 may have different integer types

+ _mov(Dest, T_2);

+ T_2->setPreferredRegister(T_1, true);

+ }

+ break;

+ case InstCast::Sitofp:

+ if (Src0RM->getType() == IceType_i64) {

+ // Use a helper for x86-32.

+ const SizeT MaxSrcs = 1;

+ Type DestType = Dest->getType();

+ InstCall *Call = makeHelperCall(

+ DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ lowerCall(Call);

+ return;

+ } else {

+ // Sign-extend the operand.

+ // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2

+ Variable *T_1 = makeReg(IceType_i32);

+ Variable *T_2 = makeReg(Dest->getType());

+ if (Src0RM->getType() == IceType_i32)

+ _mov(T_1, Src0RM);

+ else

+ _movsx(T_1, Src0RM);

+ _cvt(T_2, T_1);

+ _mov(Dest, T_2);

+ }

+ break;

+ case InstCast::Uitofp:

+ if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) {

+ // Use a helper for x86-32 and x86-64. Also use a helper for

+ // i32 on x86-32.

+ const SizeT MaxSrcs = 1;

+ Type DestType = Dest->getType();

+ IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");

+ IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");

+ // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod

+ IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;

+ InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

+ Call->addArg(Inst->getSrc(0));

+ lowerCall(Call);

+ return;

+ } else {

+ // Zero-extend the operand.

+ // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2

+ Variable *T_1 = makeReg(IceType_i32);

+ Variable *T_2 = makeReg(Dest->getType());

+ if (Src0RM->getType() == IceType_i32)

+ _mov(T_1, Src0RM);

+ else

+ _movzx(T_1, Src0RM);

+ _cvt(T_2, T_1);

+ _mov(Dest, T_2);

+ }

+ break;

+ case InstCast::Bitcast:

+ if (Dest->getType() == Src0RM->getType()) {

+ InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);

+ lowerAssign(Assign);

+ llvm_unreachable("Pointer bitcasts aren't lowered correctly.");

+ return;

+ }

+ switch (Dest->getType()) {

+ default:

+ llvm_unreachable("Unexpected Bitcast dest type");

+ case IceType_i32:

+ case IceType_f32: {

+ Type DestType = Dest->getType();

+ Type SrcType = Src0RM->getType();

+ assert((DestType == IceType_i32 && SrcType == IceType_f32) ||

+ (DestType == IceType_f32 && SrcType == IceType_i32));

+ // a.i32 = bitcast b.f32 ==>

+ // t.f32 = b.f32

+ // s.f32 = spill t.f32

+ // a.i32 = s.f32

+ Variable *T = NULL;

+ // TODO: Should be able to force a spill setup by calling legalize() with

+ // Legal_Mem and not Legal_Reg or Legal_Imm.

+ Variable *Spill = Func->makeVariable(SrcType, Context.getNode());

+ Spill->setWeight(RegWeight::Zero);

+ Spill->setPreferredRegister(Dest, true);

+ _mov(T, Src0RM);

+ _mov(Spill, T);

+ _mov(Dest, Spill);

+ } break;

+ case IceType_i64: {

+ assert(Src0RM->getType() == IceType_f64);

+ // a.i64 = bitcast b.f64 ==>

+ // s.f64 = spill b.f64

+ // t_lo.i32 = lo(s.f64)

+ // a_lo.i32 = t_lo.i32

+ // t_hi.i32 = hi(s.f64)

+ // a_hi.i32 = t_hi.i32

+ Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());

+ Spill->setWeight(RegWeight::Zero);

+ Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);

+ _mov(Spill, Src0RM);

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Variable *T_Lo = makeReg(IceType_i32);

+ Variable *T_Hi = makeReg(IceType_i32);

+ VariableSplit *SpillLo =

+ VariableSplit::create(Func, Spill, VariableSplit::Low);

+ VariableSplit *SpillHi =

+ VariableSplit::create(Func, Spill, VariableSplit::High);

+ _mov(T_Lo, SpillLo);

+ _mov(DestLo, T_Lo);

+ _mov(T_Hi, SpillHi);

+ _mov(DestHi, T_Hi);

+ } break;

+ case IceType_f64: {

+ assert(Src0RM->getType() == IceType_i64);

+ // a.f64 = bitcast b.i64 ==>

+ // t_lo.i32 = b_lo.i32

+ // lo(s.f64) = t_lo.i32

+ // FakeUse(s.f64)

+ // t_hi.i32 = b_hi.i32

+ // hi(s.f64) = t_hi.i32

+ // a.f64 = s.f64

+ Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());

+ Spill->setWeight(RegWeight::Zero);

+ Spill->setPreferredRegister(Dest, true);

+ Context.insert(InstFakeDef::create(Func, Spill));

+ Variable *T_Lo = NULL, *T_Hi = NULL;

+ VariableSplit *SpillLo =

+ VariableSplit::create(Func, Spill, VariableSplit::Low);

+ VariableSplit *SpillHi =

+ VariableSplit::create(Func, Spill, VariableSplit::High);

+ _mov(T_Lo, loOperand(Src0RM));

+ _store(T_Lo, SpillLo);

+ _mov(T_Hi, hiOperand(Src0RM));

+ _store(T_Hi, SpillHi);

+ _mov(Dest, Spill);

+ } break;

+ }

+ break;

+ }

+void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

+ Operand *Src0 = Inst->getSrc(0);

+ Operand *Src1 = Inst->getSrc(1);

+ Variable *Dest = Inst->getDest();

+ // Lowering a = fcmp cond, b, c

+ // ucomiss b, c /* only if C1 != Br_None */

+ // /* but swap b,c order if SwapOperands==true */

+ // mov a, <default>

+ // j<C1> label /* only if C1 != Br_None */

+ // j<C2> label /* only if C2 != Br_None */

+ // FakeUse(a) /* only if C1 != Br_None */

+ // mov a, !<default> /* only if C1 != Br_None */

+ // label: /* only if C1 != Br_None */

+ InstFcmp::FCond Condition = Inst->getCondition();

+ size_t Index = static_cast<size_t>(Condition);

+ assert(Index < TableFcmpSize);

+ // The table is indexed by InstFcmp::Condition. Make sure it didn't fall

+ // out of order.

+ if (TableFcmp[Index].SwapOperands) {

+ Operand *Tmp = Src0;

+ Src0 = Src1;

+ Src1 = Tmp;

+ }

+ bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);

+ bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);

+ if (HasC1) {

+ Src0 = legalize(Src0);

+ Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);

+ Variable *T = NULL;

+ _mov(T, Src0);

+ _ucomiss(T, Src1RM);

+ }

+ Constant *Default =

+ Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);

+ _mov(Dest, Default);

+ if (HasC1) {

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _br(TableFcmp[Index].C1, Label);

+ if (HasC2) {

+ _br(TableFcmp[Index].C2, Label);

+ }

+ Context.insert(InstFakeUse::create(Func, Dest));

+ Constant *NonDefault =

+ Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);

+ _mov(Dest, NonDefault);

+ Context.insert(Label);

+ }

+void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

+ Operand *Src0 = legalize(Inst->getSrc(0));

+ Operand *Src1 = legalize(Inst->getSrc(1));

+ Variable *Dest = Inst->getDest();

+ // If Src1 is an immediate, or known to be a physical register, we can

+ // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into

+ // a physical register. (Actually, either Src0 or Src1 can be chosen for

+ // the physical register, but unfortunately we have to commit to one or

+ // the other before register allocation.)

+ bool IsSrc1ImmOrReg = false;

+ if (llvm::isa<Constant>(Src1))

+ IsSrc1ImmOrReg = true;

+ else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

+ if (Var->hasReg())

+ IsSrc1ImmOrReg = true;

+ }

+ // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ Constant *One = Ctx->getConstantInt(IceType_i32, 1);

+ if (Src0->getType() == IceType_i64) {

+ InstIcmp::ICond Condition = Inst->getCondition();

+ size_t Index = static_cast<size_t>(Condition);

+ assert(Index < TableIcmp64Size);

+ // The table is indexed by InstIcmp::Condition. Make sure it didn't fall

+ // out of order.

+ Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);

+ Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);

+ if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));

+ _cmp(loOperand(Src0), Src1LoRI);

+ _br(InstX8632Br::Br_ne, Label);

+ _cmp(hiOperand(Src0), Src1HiRI);

+ _br(InstX8632Br::Br_ne, Label);

+ Context.insert(InstFakeUse::create(Func, Dest));

+ _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));

+ Context.insert(Label);

+ } else {

+ InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

+ InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

+ _mov(Dest, One);

+ _cmp(hiOperand(Src0), Src1HiRI);

+ _br(TableIcmp64[Index].C1, LabelTrue);

+ _br(TableIcmp64[Index].C2, LabelFalse);

+ _cmp(loOperand(Src0), Src1LoRI);

+ _br(TableIcmp64[Index].C3, LabelTrue);

+ Context.insert(LabelFalse);

+ Context.insert(InstFakeUse::create(Func, Dest));

+ _mov(Dest, Zero);

+ Context.insert(LabelTrue);

+ }

+ return;

+ }

+ // cmp b, c

+ Operand *Src0New =

+ legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ _cmp(Src0New, Src1);

+ _mov(Dest, One);

+ _br(getIcmp32Mapping(Inst->getCondition()), Label);

+ Context.insert(InstFakeUse::create(Func, Dest));

+ _mov(Dest, Zero);

+ Context.insert(Label);

+void TargetX8632::lowerLoad(const InstLoad *Inst) {

+ // A Load instruction can be treated the same as an Assign

+ // instruction, after the source operand is transformed into an

+ // OperandX8632Mem operand. Note that the address mode

+ // optimization already creates an OperandX8632Mem operand, so it

+ // doesn't need another level of transformation.

+ Type Ty = Inst->getDest()->getType();

+ Operand *Src0 = Inst->getSourceAddress();

+ // Address mode optimization already creates an OperandX8632Mem

+ // operand, so it doesn't need another level of transformation.

+ if (!llvm::isa<OperandX8632Mem>(Src0)) {

+ Variable *Base = llvm::dyn_cast<Variable>(Src0);

+ Constant *Offset = llvm::dyn_cast<Constant>(Src0);

+ assert(Base || Offset);

+ Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);

+ }

+ InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);

+ lowerAssign(Assign);

+void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {

+ Func->setError("Phi lowering not implemented");

+void TargetX8632::lowerRet(const InstRet *Inst) {

+ Variable *Reg = NULL;

+ if (Inst->hasRetValue()) {

+ Operand *Src0 = legalize(Inst->getRetValue());

+ if (Src0->getType() == IceType_i64) {

+ Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);

+ Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);

+ Reg = eax;

+ Context.insert(InstFakeUse::create(Func, edx));

+ } else if (Src0->getType() == IceType_f32 ||

+ Src0->getType() == IceType_f64) {

+ _fld(Src0);

+ } else {

+ _mov(Reg, Src0, Reg_eax);

+ }

+ _ret(Reg);

+ // Add a fake use of esp to make sure esp stays alive for the entire

+ // function. Otherwise post-call esp adjustments get dead-code

+ // eliminated. TODO: Are there more places where the fake use

+ // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

+ // have a ret instruction.

+ Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);

+ Context.insert(InstFakeUse::create(Func, esp));

+void TargetX8632::lowerSelect(const InstSelect *Inst) {

+ // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:

+ Variable *Dest = Inst->getDest();

+ Operand *SrcT = Inst->getTrueOperand();

+ Operand *SrcF = Inst->getFalseOperand();

+ Operand *Condition = legalize(Inst->getCondition());

+ Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);

+ InstX8632Label *Label = InstX8632Label::create(Func, this);

+ if (Dest->getType() == IceType_i64) {

+ Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

+ Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

+ Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);

+ Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);

+ _cmp(Condition, Zero);

+ _mov(DestLo, SrcLoRI);

+ _mov(DestHi, SrcHiRI);

+ _br(InstX8632Br::Br_ne, Label);

+ Context.insert(InstFakeUse::create(Func, DestLo));

+ Context.insert(InstFakeUse::create(Func, DestHi));

+ Operand *SrcFLo = loOperand(SrcF);

+ Operand *SrcFHi = hiOperand(SrcF);

+ SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);

+ SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);

+ _mov(DestLo, SrcLoRI);

+ _mov(DestHi, SrcHiRI);

+ } else {

+ _cmp(Condition, Zero);

+ SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);

+ _mov(Dest, SrcT);

+ _br(InstX8632Br::Br_ne, Label);

+ Context.insert(InstFakeUse::create(Func, Dest));

+ SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);

+ _mov(Dest, SrcF);

+ }

+ Context.insert(Label);

+void TargetX8632::lowerStore(const InstStore *Inst) {

+ Operand *Value = Inst->getData();

+ Operand *Addr = Inst->getAddr();

+ OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);

+ // Address mode optimization already creates an OperandX8632Mem

+ // operand, so it doesn't need another level of transformation.

+ if (!NewAddr) {

+ // The address will be either a constant (which represents a global

+ // variable) or a variable, so either the Base or Offset component

+ // of the OperandX8632Mem will be set.

+ Variable *Base = llvm::dyn_cast<Variable>(Addr);

+ Constant *Offset = llvm::dyn_cast<Constant>(Addr);

+ assert(Base || Offset);

+ NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);

+ }

+ NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));

+ if (NewAddr->getType() == IceType_i64) {

+ Value = legalize(Value);

+ Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);

+ Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);

+ _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));

+ _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));

+ } else {

+ Value = legalize(Value, Legal_Reg | Legal_Imm, true);

+ _store(Value, NewAddr);

+ }

+void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

+ // This implements the most naive possible lowering.

+ // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

+ Operand *Src0 = Inst->getComparison();

+ SizeT NumCases = Inst->getNumCases();

+ // OK, we'll be slightly less naive by forcing Src into a physical

+ // register if there are 2 or more uses.

+ if (NumCases >= 2)

+ Src0 = legalizeToVar(Src0, true);

+ else

+ Src0 = legalize(Src0, Legal_All, true);

+ for (SizeT I = 0; I < NumCases; ++I) {

+ Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));

+ _cmp(Src0, Value);

+ _br(InstX8632Br::Br_e, Inst->getLabel(I));

+ }

+ _br(Inst->getLabelDefault());

+void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {

+ const SizeT MaxSrcs = 0;

+ Variable *Dest = NULL;

+ InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);

+ lowerCall(Call);

+Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,

+ bool AllowOverlap, int32_t RegNum) {

+ assert(Allowed & Legal_Reg);

+ assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);

+ if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {

+ Variable *Base = Mem->getBase();

+ Variable *Index = Mem->getIndex();

+ Variable *RegBase = Base;

+ Variable *RegIndex = Index;

+ if (Base) {

+ RegBase = legalizeToVar(Base, true);

+ }

+ if (Index) {

+ RegIndex = legalizeToVar(Index, true);

+ }

+ if (Base != RegBase || Index != RegIndex) {

+ From =

+ OperandX8632Mem::create(Func, Mem->getType(), RegBase,

+ Mem->getOffset(), RegIndex, Mem->getShift());

+ }

+ if (!(Allowed & Legal_Mem)) {

+ Variable *Reg = makeReg(From->getType(), RegNum);

+ _mov(Reg, From, RegNum);

+ From = Reg;

+ }

+ return From;

+ }

+ if (llvm::isa<Constant>(From)) {

+ if (!(Allowed & Legal_Imm)) {

+ Variable *Reg = makeReg(From->getType(), RegNum);

+ _mov(Reg, From);

+ From = Reg;

+ }

+ return From;

+ }

+ if (Variable *Var = llvm::dyn_cast<Variable>(From)) {

+ // We need a new physical register for the operand if:

+ // Mem is not allowed and Var->getRegNum() is unknown, or

+ // RegNum is required and Var->getRegNum() doesn't match.

+ if ((!(Allowed & Legal_Mem) && !Var->hasReg()) ||

+ (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

+ Variable *Reg = makeReg(From->getType(), RegNum);

+ if (RegNum == Variable::NoRegister) {

+ Reg->setPreferredRegister(Var, AllowOverlap);

+ }

+ _mov(Reg, From);

+ From = Reg;

+ }

+ return From;

+ }

+ llvm_unreachable("Unhandled operand kind in legalize()");

+ return From;

+Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,

+ int32_t RegNum) {

+ return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));

+Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

+ Variable *Reg = Func->makeVariable(Type, Context.getNode());

+ if (RegNum == Variable::NoRegister)

+ Reg->setWeightInfinite();

+ else

+ Reg->setRegNum(RegNum);

+ return Reg;

+void TargetX8632::postLower() {

+ if (Ctx->getOptLevel() != Opt_m1)

+ return;

+ // TODO: Avoid recomputing WhiteList every instruction.

+ llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);

+ // Make one pass to black-list pre-colored registers. TODO: If

+ // there was some prior register allocation pass that made register

+ // assignments, those registers need to be black-listed here as

+ // well.

+ for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;

+ ++I) {

+ const Inst *Inst = *I;

+ if (Inst->isDeleted())

+ continue;

+ if (llvm::isa<InstFakeKill>(Inst))

+ continue;

+ SizeT VarIndex = 0;

+ for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {

+ Operand *Src = Inst->getSrc(SrcNum);

+ SizeT NumVars = Src->getNumVars();

+ for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {

+ const Variable *Var = Src->getVar(J);

+ if (!Var->hasReg())

+ continue;

+ WhiteList[Var->getRegNum()] = false;

+ }

+ // The second pass colors infinite-weight variables.

+ llvm::SmallBitVector AvailableRegisters = WhiteList;

+ for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;

+ ++I) {

+ const Inst *Inst = *I;

+ if (Inst->isDeleted())

+ continue;

+ SizeT VarIndex = 0;

+ for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {

+ Operand *Src = Inst->getSrc(SrcNum);

+ SizeT NumVars = Src->getNumVars();

+ for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {

+ Variable *Var = Src->getVar(J);

+ if (Var->hasReg())

+ continue;

+ if (!Var->getWeight().isInf())

+ continue;

+ llvm::SmallBitVector AvailableTypedRegisters =

+ AvailableRegisters & getRegisterSetForType(Var->getType());

+ if (!AvailableTypedRegisters.any()) {

+ // This is a hack in case we run out of physical registers

+ // due to an excessive number of "push" instructions from

+ // lowering a call.

+ AvailableRegisters = WhiteList;

+ AvailableTypedRegisters =

+ AvailableRegisters & getRegisterSetForType(Var->getType());

+ }

+ assert(AvailableTypedRegisters.any());

+ int32_t RegNum = AvailableTypedRegisters.find_first();

+ Var->setRegNum(RegNum);

+ AvailableRegisters[RegNum] = false;

+ }

+} // end of namespace Ice

« src/IceTargetLoweringX8632.h ('K') | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX8632.def » ('j') | no next file with comments »