src/IceTargetLoweringX86BaseImpl.h - Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation.

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX86BaseImpl.h

diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX86BaseImpl.h

similarity index 88%

copy from src/IceTargetLoweringX8632.cpp

copy to src/IceTargetLoweringX86BaseImpl.h

index c1ba40429a8f788b78ed600e2a8f731d1f3ac83b..b02339ac8a29b979f1239a0fbe101a70caf545c4 100644

--- a/src/IceTargetLoweringX8632.cpp

+++ b/src/IceTargetLoweringX86BaseImpl.h

@@ -1,4 +1,4 @@

-//===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

+//===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering *- C++ -*-===//

// The Subzero Code Generator

@@ -7,12 +7,15 @@

//===----------------------------------------------------------------------===//

-// This file implements the TargetLoweringX8632 class, which

+// This file implements the TargetLoweringX86Base class, which

// consists almost entirely of the lowering sequence for each

// high-level instruction.

//===----------------------------------------------------------------------===//

+#ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

+#define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

#include "llvm/Support/MathExtras.h"

#include "IceCfg.h"

@@ -30,221 +33,7 @@

#include "IceUtils.h"

namespace Ice {

-namespace {

-// The following table summarizes the logic for lowering the fcmp

-// instruction. There is one table entry for each of the 16 conditions.

-//

-// The first four columns describe the case when the operands are

-// floating point scalar values. A comment in lowerFcmp() describes the

-// lowering template. In the most general case, there is a compare

-// followed by two conditional branches, because some fcmp conditions

-// don't map to a single x86 conditional branch. However, in many cases

-// it is possible to swap the operands in the comparison and have a

-// single conditional branch. Since it's quite tedious to validate the

-// table by hand, good execution tests are helpful.

-//

-// The last two columns describe the case when the operands are vectors

-// of floating point values. For most fcmp conditions, there is a clear

-// mapping to a single x86 cmpps instruction variant. Some fcmp

-// conditions require special code to handle and these are marked in the

-// table with a Cmpps_Invalid predicate.

-const struct TableFcmp_ {

- uint32_t Default;

- bool SwapScalarOperands;

- CondX86::BrCond C1, C2;

- bool SwapVectorOperands;

- CondX86::CmppsCond Predicate;

-} TableFcmp[] = {

-#define X(val, dflt, swapS, C1, C2, swapV, pred) \

- { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \

- ,

- FCMPX8632_TABLE

-#undef X

-};

-const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

-// The following table summarizes the logic for lowering the icmp instruction

-// for i32 and narrower types. Each icmp condition has a clear mapping to an

-// x86 conditional branch instruction.

-const struct TableIcmp32_ {

- CondX86::BrCond Mapping;

-} TableIcmp32[] = {

-#define X(val, C_32, C1_64, C2_64, C3_64) \

- { CondX86::C_32 } \

- ,

- ICMPX8632_TABLE

-#undef X

-};

-const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);

-// The following table summarizes the logic for lowering the icmp instruction

-// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and

-// conditional branches are needed. For the other conditions, three separate

-// conditional branches are needed.

-const struct TableIcmp64_ {

- CondX86::BrCond C1, C2, C3;

-} TableIcmp64[] = {

-#define X(val, C_32, C1_64, C2_64, C3_64) \

- { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \

- ,

- ICMPX8632_TABLE

-#undef X

-};

-const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);

-CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

- size_t Index = static_cast<size_t>(Cond);

- assert(Index < TableIcmp32Size);

- return TableIcmp32[Index].Mapping;

-const struct TableTypeX8632Attributes_ {

- Type InVectorElementType;

-} TableTypeX8632Attributes[] = {

-#define X(tag, elementty, cvt, sdss, pack, width, fld) \

- { elementty } \

- ,

- ICETYPEX8632_TABLE

-#undef X

-};

-const size_t TableTypeX8632AttributesSize =

- llvm::array_lengthof(TableTypeX8632Attributes);

-// Return the type which the elements of the vector have in the X86

-// representation of the vector.

-Type getInVectorElementType(Type Ty) {

- assert(isVectorType(Ty));

- size_t Index = static_cast<size_t>(Ty);

- (void)Index;

- assert(Index < TableTypeX8632AttributesSize);

- return TableTypeX8632Attributes[Ty].InVectorElementType;

-// The maximum number of arguments to pass in XMM registers

-const uint32_t X86_MAX_XMM_ARGS = 4;

-// The number of bits in a byte

-const uint32_t X86_CHAR_BIT = 8;

-// Stack alignment

-const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;

-// Size of the return address on the stack

-const uint32_t X86_RET_IP_SIZE_BYTES = 4;

-// The number of different NOP instructions

-const uint32_t X86_NUM_NOP_VARIANTS = 5;

-// Value is in bytes. Return Value adjusted to the next highest multiple

-// of the stack alignment.

-uint32_t applyStackAlignment(uint32_t Value) {

- return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

-// In some cases, there are x-macros tables for both high-level and

-// low-level instructions/operands that use the same enum key value.

-// The tables are kept separate to maintain a proper separation

-// between abstraction layers. There is a risk that the tables could

-// get out of sync if enum values are reordered or if entries are

-// added or deleted. The following dummy namespaces use

-// static_asserts to ensure everything is kept in sync.

-// Validate the enum values in FCMPX8632_TABLE.

-namespace dummy1 {

-// Define a temporary set of enum values based on low-level table

-// entries.

-enum _tmp_enum {

-#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,

- FCMPX8632_TABLE

-#undef X

- _num

-};

-// Define a set of constants based on high-level table entries.

-#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

-ICEINSTFCMP_TABLE

-#undef X

-// Define a set of constants based on low-level table entries, and

-// ensure the table entry keys are consistent.

-#define X(val, dflt, swapS, C1, C2, swapV, pred) \

- static const int _table2_##val = _tmp_##val; \

- static_assert( \

- _table1_##val == _table2_##val, \

- "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

-FCMPX8632_TABLE

-#undef X

-// Repeat the static asserts with respect to the high-level table

-// entries in case the high-level table has extra entries.

-#define X(tag, str) \

- static_assert( \

- _table1_##tag == _table2_##tag, \

- "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

-ICEINSTFCMP_TABLE

-#undef X

-} // end of namespace dummy1

-// Validate the enum values in ICMPX8632_TABLE.

-namespace dummy2 {

-// Define a temporary set of enum values based on low-level table

-// entries.

-enum _tmp_enum {

-#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

- ICMPX8632_TABLE

-#undef X

- _num

-};

-// Define a set of constants based on high-level table entries.

-#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

-ICEINSTICMP_TABLE

-#undef X

-// Define a set of constants based on low-level table entries, and

-// ensure the table entry keys are consistent.

-#define X(val, C_32, C1_64, C2_64, C3_64) \

- static const int _table2_##val = _tmp_##val; \

- static_assert( \

- _table1_##val == _table2_##val, \

- "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

-ICMPX8632_TABLE

-#undef X

-// Repeat the static asserts with respect to the high-level table

-// entries in case the high-level table has extra entries.

-#define X(tag, str) \

- static_assert( \

- _table1_##tag == _table2_##tag, \

- "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

-ICEINSTICMP_TABLE

-#undef X

-} // end of namespace dummy2

-// Validate the enum values in ICETYPEX8632_TABLE.

-namespace dummy3 {

-// Define a temporary set of enum values based on low-level table

-// entries.

-enum _tmp_enum {

-#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,

- ICETYPEX8632_TABLE

-#undef X

- _num

-};

-// Define a set of constants based on high-level table entries.

-#define X(tag, size, align, elts, elty, str) \

- static const int _table1_##tag = tag;

-ICETYPE_TABLE

-#undef X

-// Define a set of constants based on low-level table entries, and

-// ensure the table entry keys are consistent.

-#define X(tag, elementty, cvt, sdss, pack, width, fld) \

- static const int _table2_##tag = _tmp_##tag; \

- static_assert(_table1_##tag == _table2_##tag, \

- "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

-ICETYPEX8632_TABLE

-#undef X

-// Repeat the static asserts with respect to the high-level table

-// entries in case the high-level table has extra entries.

-#define X(tag, size, align, elts, elty, str) \

- static_assert(_table1_##tag == _table2_##tag, \

- "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

-ICETYPE_TABLE

-#undef X

-} // end of namespace dummy3

+namespace X86Internal {

// A helper class to ease the settings of RandomizationPoolingPause

// to disable constant blinding or pooling for some translation phases.

@@ -262,13 +51,76 @@ private:

bool &Flag;

};

-} // end of anonymous namespace

+template <class MachineTraits> class BoolFoldingEntry {

+ BoolFoldingEntry(const BoolFoldingEntry &) = delete;

+public:

+ BoolFoldingEntry() = default;

+ explicit BoolFoldingEntry(Inst *I);

+ BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;

+ // Instr is the instruction producing the i1-type variable of interest.

+ Inst *Instr = nullptr;

+ // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).

+ bool IsComplex = false;

+ // IsLiveOut is initialized conservatively to true, and is set to false when

+ // we encounter an instruction that ends Var's live range. We disable the

+ // folding optimization when Var is live beyond this basic block. Note that

+ // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will

+ // always be true and the folding optimization will never be performed.

+ bool IsLiveOut = true;

+ // NumUses counts the number of times Var is used as a source operand in the

+ // basic block. If IsComplex is true and there is more than one use of Var,

+ // then the folding optimization is disabled for Var.

+ uint32_t NumUses = 0;

+};

+template <class MachineTraits> class BoolFolding {

+public:

+ enum BoolFoldingProducerKind {

+ PK_None,

+ PK_Icmp32,

+ PK_Icmp64,

+ PK_Fcmp,

+ PK_Trunc

+ };

+ // Currently the actual enum values are not used (other than CK_None), but we

+ // go

+ // ahead and produce them anyway for symmetry with the

+ // BoolFoldingProducerKind.

+ enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };

+private:

+ BoolFolding(const BoolFolding &) = delete;

+ BoolFolding &operator=(const BoolFolding &) = delete;

+public:

+ BoolFolding() = default;

+ static BoolFoldingProducerKind getProducerKind(const Inst *Instr);

+ static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);

+ static bool hasComplexLowering(const Inst *Instr);

+ void init(CfgNode *Node);

+ const Inst *getProducerFor(const Operand *Opnd) const;

+ void dump(const Cfg *Func) const;

+private:

+ // Returns true if Producers contains a valid entry for the given VarNum.

+ bool containsValid(SizeT VarNum) const {

+ auto Element = Producers.find(VarNum);

+ return Element != Producers.end() && Element->second.Instr != nullptr;

+ }

+ void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }

+ // Producers maps Variable::Number to a BoolFoldingEntry.

+ std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;

+};

-BoolFoldingEntry::BoolFoldingEntry(Inst *I)

- : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {}

+template <class MachineTraits>

+BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)

+ : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}

-BoolFolding::BoolFoldingProducerKind

-BoolFolding::getProducerKind(const Inst *Instr) {

+template <class MachineTraits>

+typename BoolFolding<MachineTraits>::BoolFoldingProducerKind

+BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {

if (llvm::isa<InstIcmp>(Instr)) {

if (Instr->getSrc(0)->getType() != IceType_i64)

return PK_Icmp32;

@@ -289,8 +141,9 @@ BoolFolding::getProducerKind(const Inst *Instr) {

return PK_None;

}

-BoolFolding::BoolFoldingConsumerKind

-BoolFolding::getConsumerKind(const Inst *Instr) {

+template <class MachineTraits>

+typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind

+BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {

if (llvm::isa<InstBr>(Instr))

return CK_Br;

if (llvm::isa<InstSelect>(Instr))

@@ -316,19 +169,21 @@ BoolFolding::getConsumerKind(const Inst *Instr) {

// and some floating-point compares. When this is true, and there is

// more than one consumer, we prefer to disable the folding

// optimization because it minimizes branches.

-bool BoolFolding::hasComplexLowering(const Inst *Instr) {

+template <class MachineTraits>

+bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {

switch (getProducerKind(Instr)) {

default:

return false;

case PK_Icmp64:

return true;

case PK_Fcmp:

- return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=

- CondX86::Br_None;

+ return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]

+ .C2 != CondX86::Br_None;

}

-void BoolFolding::init(CfgNode *Node) {

+template <class MachineTraits>

+void BoolFolding<MachineTraits>::init(CfgNode *Node) {

Producers.clear();

for (Inst &Instr : Node->getInsts()) {

// Check whether Instr is a valid producer.

@@ -337,7 +192,7 @@ void BoolFolding::init(CfgNode *Node) {

&& Var // only instructions with an actual dest var

&& Var->getType() == IceType_i1 // only bool-type dest vars

&& getProducerKind(&Instr) != PK_None) { // white-listed instructions

- Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);

+ Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);

}

// Check each src variable against the map.

for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {

@@ -379,7 +234,9 @@ void BoolFolding::init(CfgNode *Node) {

}

-const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {

+template <class MachineTraits>

+const Inst *

+BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {

auto *Var = llvm::dyn_cast<const Variable>(Opnd);

if (Var == nullptr)

return nullptr;

@@ -390,7 +247,8 @@ const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {

return Element->second.Instr;

}

-void BoolFolding::dump(const Cfg *Func) const {

+template <class MachineTraits>

+void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {

if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))

return;

OstreamLocker L(Func->getContext());

@@ -404,22 +262,26 @@ void BoolFolding::dump(const Cfg *Func) const {

}

-void TargetX8632::initNodeForLowering(CfgNode *Node) {

+template <class Machine>

+void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {

FoldingInfo.init(Node);

FoldingInfo.dump(Func);

}

-TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) {

- static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==

- (TargetInstructionSet::X86InstructionSet_End -

- TargetInstructionSet::X86InstructionSet_Begin),

- "X86InstructionSet range different from TargetInstructionSet");

+template <class Machine>

+TargetX86Base<Machine>::TargetX86Base(Cfg *Func)

+ : Machine(Func) {

+ static_assert(

+ (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==

+ (TargetInstructionSet::X86InstructionSet_End -

+ TargetInstructionSet::X86InstructionSet_Begin),

+ "Traits::InstructionSet range different from TargetInstructionSet");

if (Func->getContext()->getFlags().getTargetInstructionSet() !=

TargetInstructionSet::BaseInstructionSet) {

- InstructionSet = static_cast<X86InstructionSet>(

+ InstructionSet = static_cast<typename Traits::InstructionSet>(

(Func->getContext()->getFlags().getTargetInstructionSet() -

TargetInstructionSet::X86InstructionSet_Begin) +

- X86InstructionSet::Begin);

+ Traits::InstructionSet::Begin);

}

// TODO: Don't initialize IntegerRegisters and friends every time.

// Instead, initialize in some sort of static initializer for the

@@ -456,7 +318,7 @@ TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) {

TypeToRegisterSet[IceType_v4f32] = VectorRegisters;

}

-void TargetX8632::translateO2() {

+template <class Machine> void TargetX86Base<Machine>::translateO2() {

TimerMarker T(TimerStack::TT_O2, Func);

if (!Ctx->getFlags().getPhiEdgeSplit()) {

@@ -568,7 +430,7 @@ void TargetX8632::translateO2() {

}

-void TargetX8632::translateOm1() {

+template <class Machine> void TargetX86Base<Machine>::translateOm1() {

TimerMarker T(TimerStack::TT_Om1, Func);

Func->placePhiLoads();

@@ -605,8 +467,6 @@ void TargetX8632::translateOm1() {

}

-namespace {

bool canRMW(const InstArithmetic *Arith) {

Type Ty = Arith->getDest()->getType();

// X86 vector instructions write to a register and have no RMW

@@ -652,9 +512,7 @@ bool isSameMemAddressOperand(const Operand *A, const Operand *B) {

return false;

}

-} // end of anonymous namespace

-void TargetX8632::findRMW() {

+template <class Machine> void TargetX86Base<Machine>::findRMW() {

Func->dump("Before RMW");

OstreamLocker L(Func->getContext());

Ostream &Str = Func->getContext()->getStrDump();

@@ -746,8 +604,6 @@ void TargetX8632::findRMW() {

}

-namespace {

// Converts a ConstantInteger32 operand into its constant value, or

// MemoryOrderInvalid if the operand is not a ConstantInteger32.

uint64_t getConstantMemoryOrder(Operand *Opnd) {

@@ -774,9 +630,7 @@ bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,

return false;

}

-} // end of anonymous namespace

-void TargetX8632::doLoadOpt() {

+template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

for (CfgNode *Node : Func->getNodes()) {

Context.init(Node);

while (!Context.atEnd()) {

@@ -866,14 +720,16 @@ void TargetX8632::doLoadOpt() {

Func->dump("After load optimization");

}

-bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {

+template <class Machine>

+bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {

if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {

return Br->optimizeBranch(NextNode);

}

return false;

}

-IceString TargetX8632::RegNames[] = {

+template <class Machine>

+IceString TargetX86Base<Machine>::RegNames[] = {

#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

frameptr, isI8, isInt, isFP) \

name,

@@ -881,7 +737,8 @@ IceString TargetX8632::RegNames[] = {

#undef X

};

-Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {

if (Ty == IceType_void)

Ty = IceType_i32;

if (PhysicalRegisters[Ty].empty())

@@ -902,7 +759,8 @@ Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {

return Reg;

}

-IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {

+template <class Machine>

+IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {

assert(RegNum < RegX8632::Reg_NUM);

static IceString RegNames8[] = {

#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

@@ -929,7 +787,8 @@ IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {

}

-void TargetX8632::emitVariable(const Variable *Var) const {

+template <class Machine>

+void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {

Ostream &Str = Ctx->getStrEmit();

if (Var->hasReg()) {

Str << "%" << getRegName(Var->getRegNum(), Var->getType());

@@ -947,7 +806,9 @@ void TargetX8632::emitVariable(const Variable *Var) const {

Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";

}

-X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {

+template <class Machine>

+X8632::Address

+TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {

if (Var->hasReg())

llvm_unreachable("Stack Variable has a register assigned");

if (Var->getWeight().isInf()) {

@@ -959,7 +820,7 @@ X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {

return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);

}

-void TargetX8632::lowerArguments() {

+template <class Machine> void TargetX86Base<Machine>::lowerArguments() {

VarList &Args = Func->getArgs();

// The first four arguments of vector type, regardless of their

// position relative to the other arguments in the argument list, are

@@ -969,8 +830,8 @@ void TargetX8632::lowerArguments() {

Context.init(Func->getEntryNode());

Context.setInsertPoint(Context.getCur());

- for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;

- ++I) {

+ for (SizeT I = 0, E = Args.size();

+ I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {

Variable *Arg = Args[I];

Type Ty = Arg->getType();

if (!isVectorType(Ty))

@@ -1001,9 +862,11 @@ void TargetX8632::lowerArguments() {

// Lo first because of the little-endian architecture. Lastly, this

// function generates an instruction to copy Arg into its assigned

// register if applicable.

-void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

- size_t BasicFrameOffset,

- size_t &InArgsSizeBytes) {

+template <class Machine>

+void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

+ Variable *FramePtr,

+ size_t BasicFrameOffset,

+ size_t &InArgsSizeBytes) {

Variable *Lo = Arg->getLo();

Variable *Hi = Arg->getHi();

Type Ty = Arg->getType();

@@ -1015,7 +878,7 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

return;

}

if (isVectorType(Ty)) {

- InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);

+ InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);

}

Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

@@ -1036,9 +899,11 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

}

-Type TargetX8632::stackSlotType() { return IceType_i32; }

+template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {

+ return IceType_i32;

-void TargetX8632::addProlog(CfgNode *Node) {

+template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {

// Stack frame layout:

// +------------------------+

@@ -1147,11 +1012,11 @@ void TargetX8632::addProlog(CfgNode *Node) {

// the region after the preserved registers and before the spill areas.

// LocalsSlotsPaddingBytes is the amount of padding between the globals

// and locals area if they are separate.

- assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);

+ assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);

assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

uint32_t SpillAreaPaddingBytes = 0;

uint32_t LocalsSlotsPaddingBytes = 0;

- alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

+ alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

SpillAreaAlignmentBytes, GlobalsSize,

LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

&LocalsSlotsPaddingBytes);

@@ -1161,8 +1026,10 @@ void TargetX8632::addProlog(CfgNode *Node) {

// Align esp if necessary.

if (NeedsStackAlignment) {

- uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

- uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

+ uint32_t StackOffset =

+ Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

+ uint32_t StackSize =

+ Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);

SpillAreaSizeBytes = StackSize - StackOffset;

}

@@ -1178,7 +1045,8 @@ void TargetX8632::addProlog(CfgNode *Node) {

// for those that were register-allocated. Args are pushed right to

// left, so Arg[0] is closest to the stack/frame pointer.

Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

- size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;

+ size_t BasicFrameOffset =

+ PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;

if (!IsEbpBasedFrame)

BasicFrameOffset += SpillAreaSizeBytes;

@@ -1187,7 +1055,7 @@ void TargetX8632::addProlog(CfgNode *Node) {

unsigned NumXmmArgs = 0;

for (Variable *Arg : Args) {

// Skip arguments passed in registers.

- if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {

+ if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {

++NumXmmArgs;

continue;

}

@@ -1215,7 +1083,7 @@ void TargetX8632::addProlog(CfgNode *Node) {

SpillAreaSizeBytes - LocalsSpillAreaSize -

GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

Str << " in-args = " << InArgsSizeBytes << " bytes\n"

- << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"

+ << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"

<< " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

<< " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

<< " globals spill area = " << GlobalsSize << " bytes\n"

@@ -1234,7 +1102,7 @@ void TargetX8632::addProlog(CfgNode *Node) {

}

-void TargetX8632::addEpilog(CfgNode *Node) {

+template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {

InstList &Insts = Node->getInsts();

InstList::reverse_iterator RI, E;

for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

@@ -1287,8 +1155,8 @@ void TargetX8632::addEpilog(CfgNode *Node) {

// jmp *t

// bundle_unlock

// FakeUse <original_ret_operand>

- const SizeT BundleSize = 1

- << Func->getAssembler<>()->getBundleAlignLog2Bytes();

+ const SizeT BundleSize =

+ 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();

Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

_pop(T_ecx);

_bundle_lock();

@@ -1302,7 +1170,7 @@ void TargetX8632::addEpilog(CfgNode *Node) {

RI->setDeleted();

}

-void TargetX8632::split64(Variable *Var) {

+template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {

switch (Var->getType()) {

default:

return;

@@ -1333,7 +1201,8 @@ void TargetX8632::split64(Variable *Var) {

}

-Operand *TargetX8632::loOperand(Operand *Operand) {

+template <class Machine>

+Operand *TargetX86Base<Machine>::loOperand(Operand *Operand) {

assert(Operand->getType() == IceType_i64 ||

Operand->getType() == IceType_f64);

if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

@@ -1360,7 +1229,8 @@ Operand *TargetX8632::loOperand(Operand *Operand) {

return nullptr;

}

-Operand *TargetX8632::hiOperand(Operand *Operand) {

+template <class Machine>

+Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {

assert(Operand->getType() == IceType_i64 ||

Operand->getType() == IceType_f64);

if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

@@ -1401,8 +1271,10 @@ Operand *TargetX8632::hiOperand(Operand *Operand) {

return nullptr;

}

-llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,

- RegSetMask Exclude) const {

+template <class Machine>

+llvm::SmallBitVector

+TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,

+ RegSetMask Exclude) const {

llvm::SmallBitVector Registers(RegX8632::Reg_NUM);

#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

@@ -1431,7 +1303,8 @@ llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,

return Registers;

}

-void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

IsEbpBasedFrame = true;

// Conservatively require the stack to be aligned. Some stack

// adjustment operations implemented below assume that the stack is

@@ -1451,10 +1324,11 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

// LLVM enforces power of 2 alignment.

assert(llvm::isPowerOf2_32(AlignmentParam));

- assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES));

+ assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));

- uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);

- if (Alignment > X86_STACK_ALIGNMENT_BYTES) {

+ uint32_t Alignment =

+ std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);

+ if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {

_and(esp, Ctx->getConstantInt32(-Alignment));

}

if (const auto *ConstantTotalSize =

@@ -1480,8 +1354,9 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

// to multiply by powers of 2. These can be combined such that

// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,

// combined with left-shifting by 2.

-bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0,

- int32_t Src1) {

+template <class Machine>

+bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,

+ int32_t Src1) {

// Disable this optimization for Om1 and O0, just to keep things

// simple there.

if (Ctx->getFlags().getOptLevel() < Opt_1)

@@ -1570,7 +1445,8 @@ bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0,

return true;

}

-void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

Variable *Dest = Inst->getDest();

Operand *Src0 = legalize(Inst->getSrc(0));

Operand *Src1 = legalize(Inst->getSrc(1));

@@ -1873,7 +1749,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

bool TypesAreValidForPmull =

Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;

bool InstructionSetIsValidForPmull =

- Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;

+ Dest->getType() == IceType_v8i16 || InstructionSet >= Machine::SSE4_1;

if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

Variable *T = makeReg(Dest->getType());

_movp(T, Src0);

@@ -2067,7 +1943,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

// add t,src

// sar t,log

// dest=t

- uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

+ uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

_mov(T, Src0);

// If for some reason we are dividing by 1, just treat it

// like an assignment.

@@ -2136,7 +2012,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

// sub t,src

// neg t

// dest=t

- uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

+ uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

// If for some reason we are dividing by 1, just assign 0.

if (LogDiv == 0) {

_mov(Dest, Ctx->getConstantZero(Ty));

@@ -2204,7 +2080,8 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

}

-void TargetX8632::lowerAssign(const InstAssign *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {

Variable *Dest = Inst->getDest();

Operand *Src0 = Inst->getSrc(0);

assert(Dest->getType() == Src0->getType());

@@ -2249,7 +2126,8 @@ void TargetX8632::lowerAssign(const InstAssign *Inst) {

}

-void TargetX8632::lowerBr(const InstBr *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {

if (Inst->isUnconditional()) {

_br(Inst->getTargetUnconditional());

return;

@@ -2270,7 +2148,7 @@ void TargetX8632::lowerBr(const InstBr *Inst) {

Operand *Src1 = legalize(Producer->getSrc(1));

Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

_cmp(Src0RM, Src1);

- _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),

+ _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),

Inst->getTargetFalse());

return;

}

@@ -2283,7 +2161,8 @@ void TargetX8632::lowerBr(const InstBr *Inst) {

_br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

}

-void TargetX8632::lowerCall(const InstCall *Instr) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {

// x86-32 calling convention:

// * At the point before the call, the stack must be aligned to 16

@@ -2318,12 +2197,13 @@ void TargetX8632::lowerCall(const InstCall *Instr) {

Type Ty = Arg->getType();

// The PNaCl ABI requires the width of arguments to be at least 32 bits.

assert(typeWidthInBytes(Ty) >= 4);

- if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {

+ if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

XmmArgs.push_back(Arg);

} else {

StackArgs.push_back(Arg);

if (isVectorType(Arg->getType())) {

- ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

+ ParameterAreaSizeBytes =

+ Traits::applyStackAlignment(ParameterAreaSizeBytes);

}

Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

@@ -2335,7 +2215,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) {

// Adjust the parameter area so that the stack is aligned. It is

// assumed that the stack is already aligned at the start of the

// calling sequence.

- ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

+ ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

// Subtract the appropriate amount for the argument area. This also

// takes care of setting the stack adjustment during emission.

@@ -2418,7 +2298,7 @@ void TargetX8632::lowerCall(const InstCall *Instr) {

_mov(CallTargetVar, CallTarget);

_bundle_lock(InstBundleLock::Opt_AlignToEnd);

const SizeT BundleSize =

- 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

+ 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();

_and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

CallTarget = CallTargetVar;

}

@@ -2480,7 +2360,8 @@ void TargetX8632::lowerCall(const InstCall *Instr) {

}

-void TargetX8632::lowerCast(const InstCast *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

// a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

InstCast::OpKind CastKind = Inst->getCastKind();

Variable *Dest = Inst->getDest();

@@ -2510,7 +2391,8 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

} else {

// width = width(elty) - 1; dest = (src << width) >> width

SizeT ShiftAmount =

- X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;

+ Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -

+ 1;

Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);

Variable *T = makeReg(DestTy);

_movp(T, Src0RM);

@@ -2545,7 +2427,8 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

// shl t1, dst_bitwidth - 1

// sar t1, dst_bitwidth - 1

// dst = t1

- size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

+ size_t DestBits =

+ Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);

Variable *T = makeReg(Dest->getType());

if (typeWidthInBytes(Dest->getType()) <=

@@ -2950,7 +2833,9 @@ void TargetX8632::lowerCast(const InstCast *Inst) {

}

-void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerExtractElement(

+ const InstExtractElement *Inst) {

Operand *SourceVectNotLegalized = Inst->getSrc(0);

ConstantInteger32 *ElementIndex =

llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));

@@ -2960,12 +2845,12 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {

unsigned Index = ElementIndex->getValue();

Type Ty = SourceVectNotLegalized->getType();

Type ElementTy = typeElementType(Ty);

- Type InVectorElementTy = getInVectorElementType(Ty);

+ Type InVectorElementTy = Traits::getInVectorElementType(Ty);

Variable *ExtractedElementR = makeReg(InVectorElementTy);

// TODO(wala): Determine the best lowering sequences for each type.

- bool CanUsePextr =

- Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;

+ bool CanUsePextr = Ty == IceType_v8i16 || Ty == IceType_v8i1 ||

+ InstructionSet >= Machine::SSE4_1;

if (CanUsePextr && Ty != IceType_v4f32) {

// Use pextrb, pextrw, or pextrd.

Constant *Mask = Ctx->getConstantInt32(Index);

@@ -3026,7 +2911,8 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {

_mov(Dest, ExtractedElementR);

}

-void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {

Operand *Src0 = Inst->getSrc(0);

Operand *Src1 = Inst->getSrc(1);

Variable *Dest = Inst->getDest();

@@ -3034,9 +2920,9 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

if (isVectorType(Dest->getType())) {

InstFcmp::FCond Condition = Inst->getCondition();

size_t Index = static_cast<size_t>(Condition);

- assert(Index < TableFcmpSize);

+ assert(Index < Traits::TableFcmpSize);

- if (TableFcmp[Index].SwapVectorOperands) {

+ if (Traits::TableFcmp[Index].SwapVectorOperands) {

Operand *T = Src0;

Src0 = Src1;

Src1 = T;

@@ -3057,7 +2943,7 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

switch (Condition) {

default: {

- CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;

+ CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate;

assert(Predicate != CondX86::Cmpps_Invalid);

T = makeReg(Src0RM->getType());

_movp(T, Src0RM);

@@ -3106,11 +2992,11 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

// setcc a, C1

InstFcmp::FCond Condition = Inst->getCondition();

size_t Index = static_cast<size_t>(Condition);

- assert(Index < TableFcmpSize);

- if (TableFcmp[Index].SwapScalarOperands)

+ assert(Index < Traits::TableFcmpSize);

+ if (Traits::TableFcmp[Index].SwapScalarOperands)

std::swap(Src0, Src1);

- bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);

- bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);

+ bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None);

+ bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None);

if (HasC1) {

Src0 = legalize(Src0);

Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);

@@ -3118,26 +3004,28 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

_mov(T, Src0);

_ucomiss(T, Src1RM);

if (!HasC2) {

- assert(TableFcmp[Index].Default);

- _setcc(Dest, TableFcmp[Index].C1);

+ assert(Traits::TableFcmp[Index].Default);

+ _setcc(Dest, Traits::TableFcmp[Index].C1);

return;

}

- Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);

+ Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);

_mov(Dest, Default);

if (HasC1) {

InstX8632Label *Label = InstX8632Label::create(Func, this);

- _br(TableFcmp[Index].C1, Label);

+ _br(Traits::TableFcmp[Index].C1, Label);

if (HasC2) {

- _br(TableFcmp[Index].C2, Label);

+ _br(Traits::TableFcmp[Index].C2, Label);

}

- Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);

+ Constant *NonDefault =

+ Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);

_mov_nonkillable(Dest, NonDefault);

Context.insert(Label);

}

-void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {

Operand *Src0 = legalize(Inst->getSrc(0));

Operand *Src1 = legalize(Inst->getSrc(1));

Variable *Dest = Inst->getDest();

@@ -3255,7 +3143,7 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

if (Src0->getType() == IceType_i64) {

InstIcmp::ICond Condition = Inst->getCondition();

size_t Index = static_cast<size_t>(Condition);

- assert(Index < TableIcmp64Size);

+ assert(Index < Traits::TableIcmp64Size);

Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);

Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);

Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);

@@ -3266,12 +3154,12 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

_mov(Dest, One);

_cmp(Src0HiRM, Src1HiRI);

- if (TableIcmp64[Index].C1 != CondX86::Br_None)

- _br(TableIcmp64[Index].C1, LabelTrue);

- if (TableIcmp64[Index].C2 != CondX86::Br_None)

- _br(TableIcmp64[Index].C2, LabelFalse);

+ if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None)

+ _br(Traits::TableIcmp64[Index].C1, LabelTrue);

+ if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None)

+ _br(Traits::TableIcmp64[Index].C2, LabelFalse);

_cmp(Src0LoRM, Src1LoRI);

- _br(TableIcmp64[Index].C3, LabelTrue);

+ _br(Traits::TableIcmp64[Index].C3, LabelTrue);

Context.insert(LabelFalse);

_mov_nonkillable(Dest, Zero);

Context.insert(LabelTrue);

@@ -3281,10 +3169,11 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

// cmp b, c

Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

_cmp(Src0RM, Src1);

- _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));

+ _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));

}

-void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {

Operand *SourceVectNotLegalized = Inst->getSrc(0);

Operand *ElementToInsertNotLegalized = Inst->getSrc(1);

ConstantInteger32 *ElementIndex =

@@ -3296,7 +3185,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {

Type Ty = SourceVectNotLegalized->getType();

Type ElementTy = typeElementType(Ty);

- Type InVectorElementTy = getInVectorElementType(Ty);

+ Type InVectorElementTy = Traits::getInVectorElementType(Ty);

if (ElementTy == IceType_i1) {

// Expand the element to the appropriate size for it to be inserted

@@ -3308,7 +3197,8 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {

ElementToInsertNotLegalized = Expanded;

}

- if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {

+ if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||

+ InstructionSet >= Machine::SSE4_1) {

// Use insertps, pinsrb, pinsrw, or pinsrd.

Operand *ElementRM =

legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);

@@ -3407,7 +3297,9 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {

}

-void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerIntrinsicCall(

+ const InstIntrinsicCall *Instr) {

switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {

case Intrinsics::AtomicCmpxchg: {

if (!Intrinsics::isMemoryOrderValid(

@@ -3510,11 +3402,10 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

Func->setError("Unexpected memory ordering for AtomicRMW");

return;

}

- lowerAtomicRMW(

- Instr->getDest(),

- static_cast<uint32_t>(

- llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),

- Instr->getArg(1), Instr->getArg(2));

+ lowerAtomicRMW(Instr->getDest(),

+ static_cast<uint32_t>(llvm::cast<ConstantInteger32>(

+ Instr->getArg(0))->getValue()),

+ Instr->getArg(1), Instr->getArg(2));

return;

case Intrinsics::AtomicStore: {

if (!Intrinsics::isMemoryOrderValid(

@@ -3740,8 +3631,10 @@ void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

return;

}

-void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,

- Operand *Expected, Operand *Desired) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,

+ Operand *Ptr, Operand *Expected,

+ Operand *Desired) {

if (Expected->getType() == IceType_i64) {

// Reserve the pre-colored registers first, before adding any more

// infinite-weight variables from formMemoryOperand's legalization.

@@ -3771,9 +3664,11 @@ void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,

_mov(DestPrev, T_eax);

}

-bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,

- Operand *Expected,

- Operand *Desired) {

+template <class Machine>

+bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,

+ Operand *PtrToMem,

+ Operand *Expected,

+ Operand *Desired) {

if (Ctx->getFlags().getOptLevel() == Opt_m1)

return false;

// Peek ahead a few instructions and see how Dest is used.

@@ -3844,8 +3739,9 @@ bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,

return false;

}

-void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

- Operand *Ptr, Operand *Val) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

+ Operand *Ptr, Operand *Val) {

bool NeedsCmpxchg = false;

LowerBinOp Op_Lo = nullptr;

LowerBinOp Op_Hi = nullptr;

@@ -3858,8 +3754,8 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

// All the fall-through paths must set this to true, but use this

// for asserting.

NeedsCmpxchg = true;

- Op_Lo = &TargetX8632::_add;

- Op_Hi = &TargetX8632::_adc;

+ Op_Lo = &TargetX86Base<Machine>::_add;

+ Op_Hi = &TargetX86Base<Machine>::_adc;

break;

}

OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

@@ -3873,8 +3769,8 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

case Intrinsics::AtomicSub: {

if (Dest->getType() == IceType_i64) {

NeedsCmpxchg = true;

- Op_Lo = &TargetX8632::_sub;

- Op_Hi = &TargetX8632::_sbb;

+ Op_Lo = &TargetX86Base<Machine>::_sub;

+ Op_Hi = &TargetX86Base<Machine>::_sbb;

break;

}

OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

@@ -3893,18 +3789,18 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

// xadd is probably fine vs lock add for add, and xchg is fine

// vs an atomic store.

NeedsCmpxchg = true;

- Op_Lo = &TargetX8632::_or;

- Op_Hi = &TargetX8632::_or;

+ Op_Lo = &TargetX86Base<Machine>::_or;

+ Op_Hi = &TargetX86Base<Machine>::_or;

break;

case Intrinsics::AtomicAnd:

NeedsCmpxchg = true;

- Op_Lo = &TargetX8632::_and;

- Op_Hi = &TargetX8632::_and;

+ Op_Lo = &TargetX86Base<Machine>::_and;

+ Op_Hi = &TargetX86Base<Machine>::_and;

break;

case Intrinsics::AtomicXor:

NeedsCmpxchg = true;

- Op_Lo = &TargetX8632::_xor;

- Op_Hi = &TargetX8632::_xor;

+ Op_Lo = &TargetX86Base<Machine>::_xor;

+ Op_Hi = &TargetX86Base<Machine>::_xor;

break;

case Intrinsics::AtomicExchange:

if (Dest->getType() == IceType_i64) {

@@ -3928,9 +3824,12 @@ void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);

}

-void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,

- Variable *Dest, Operand *Ptr,

- Operand *Val) {

+template <class Machine>

+void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,

+ LowerBinOp Op_Hi,

+ Variable *Dest,

+ Operand *Ptr,

+ Operand *Val) {

// Expand a more complex RMW operation as a cmpxchg loop:

// For 64-bit:

// mov eax, [ptr]

@@ -4035,8 +3934,10 @@ void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,

// We could do constant folding here, but that should have

// been done by the front-end/middle-end optimizations.

-void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,

- Operand *FirstVal, Operand *SecondVal) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,

+ Operand *FirstVal,

+ Operand *SecondVal) {

// TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).

// Then the instructions will handle the Val == 0 case much more simply

// and won't require conversion from bit position to number of zeros.

@@ -4107,8 +4008,6 @@ void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,

_mov(DestHi, Ctx->getConstantZero(IceType_i32));

}

-namespace {

bool isAdd(const Inst *Inst) {

if (const InstArithmetic *Arith =

llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {

@@ -4349,9 +4248,8 @@ void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,

}

-} // anonymous namespace

-void TargetX8632::lowerLoad(const InstLoad *Load) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {

// A Load instruction can be treated the same as an Assign

// instruction, after the source operand is transformed into an

// OperandX8632Mem operand. Note that the address mode

@@ -4364,7 +4262,7 @@ void TargetX8632::lowerLoad(const InstLoad *Load) {

lowerAssign(Assign);

}

-void TargetX8632::doAddressOptLoad() {

+template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {

Inst *Inst = Context.getCur();

Variable *Dest = Inst->getDest();

Operand *Addr = Inst->getSrc(0);

@@ -4388,18 +4286,21 @@ void TargetX8632::doAddressOptLoad() {

}

-void TargetX8632::randomlyInsertNop(float Probability) {

+template <class Machine>

+void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {

RandomNumberGeneratorWrapper RNG(Ctx->getRNG());

if (RNG.getTrueWithProbability(Probability)) {

- _nop(RNG(X86_NUM_NOP_VARIANTS));

+ _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));

}

-void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerPhi(const InstPhi * /*Inst*/) {

Func->setError("Phi found in regular instruction list");

}

-void TargetX8632::lowerRet(const InstRet *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {

Variable *Reg = nullptr;

if (Inst->hasRetValue()) {

Operand *Src0 = legalize(Inst->getRetValue());

@@ -4429,7 +4330,8 @@ void TargetX8632::lowerRet(const InstRet *Inst) {

Context.insert(InstFakeUse::create(Func, esp));

}

-void TargetX8632::lowerSelect(const InstSelect *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {

Variable *Dest = Inst->getDest();

Type DestTy = Dest->getType();

Operand *SrcT = Inst->getTrueOperand();

@@ -4441,7 +4343,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {

Variable *T = makeReg(SrcTy);

Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);

Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);

- if (InstructionSet >= SSE4_1) {

+ if (InstructionSet >= Machine::SSE4_1) {

// TODO(wala): If the condition operand is a constant, use blendps

// or pblendw.

@@ -4467,7 +4369,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {

}

return;

}

- // Lower select without SSE4.1:

+ // Lower select without Machine::SSE4.1:

// a=d?b:c ==>

// if elementtype(d) != i1:

// d=sext(d);

@@ -4505,7 +4407,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {

break;

case BoolFolding::PK_Icmp32: {

auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);

- Cond = getIcmp32Mapping(Cmp->getCondition());

+ Cond = Traits::getIcmp32Mapping(Cmp->getCondition());

CmpOpnd1 = legalize(Producer->getSrc(1));

CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);

} break;

@@ -4569,7 +4471,8 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) {

_mov(Dest, T);

}

-void TargetX8632::lowerStore(const InstStore *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {

Operand *Value = Inst->getData();

Operand *Addr = Inst->getAddr();

OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());

@@ -4589,7 +4492,7 @@ void TargetX8632::lowerStore(const InstStore *Inst) {

}

-void TargetX8632::doAddressOptStore() {

+template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {

InstStore *Inst = llvm::cast<InstStore>(Context.getCur());

Operand *Data = Inst->getData();

Operand *Addr = Inst->getAddr();

@@ -4616,7 +4519,8 @@ void TargetX8632::doAddressOptStore() {

}

-void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {

// This implements the most naive possible lowering.

// cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

Operand *Src0 = Inst->getComparison();

@@ -4660,9 +4564,10 @@ void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

_br(Inst->getLabelDefault());

}

-void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,

- Variable *Dest, Operand *Src0,

- Operand *Src1) {

+template <class Machine>

+void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,

+ Variable *Dest, Operand *Src0,

+ Operand *Src1) {

assert(isVectorType(Dest->getType()));

Type Ty = Dest->getType();

Type ElementTy = typeElementType(Ty);

@@ -4699,7 +4604,8 @@ void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,

// We can eliminate the sext operation by copying the result of pcmpeqd,

// pcmpgtd, or cmpps (which produce sign extended results) to the result

// of the sext operation.

-void TargetX8632::eliminateNextVectorSextInstruction(

+template <class Machine>

+void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(

Variable *SignExtendedResult) {

if (InstCast *NextCast =

llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

@@ -4713,9 +4619,14 @@ void TargetX8632::eliminateNextVectorSextInstruction(

}

-void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }

+template <class Machine>

+void TargetX86Base<Machine>::lowerUnreachable(

+ const InstUnreachable * /*Inst*/) {

+ _ud2();

-void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) {

// If the beacon variable's live range does not end in this

// instruction, then it must end in the modified Store instruction

// that follows. This means that the original Store instruction is

@@ -4789,7 +4700,8 @@ void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {

llvm::report_fatal_error("Couldn't lower RMW instruction");

}

-void TargetX8632::lowerOther(const Inst *Instr) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {

if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {

lowerRMW(RMW);

} else {

@@ -4801,7 +4713,7 @@ void TargetX8632::lowerOther(const Inst *Instr) {

// preserve integrity of liveness analysis. Undef values are also

// turned into zeroes, since loOperand() and hiOperand() don't expect

// Undef input.

-void TargetX8632::prelowerPhis() {

+template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {

// Pause constant blinding or pooling, blinding or pooling will be done later

// during phi lowering assignments

BoolFlagSaver B(RandomizationPoolingPaused, true);

@@ -4832,8 +4744,6 @@ void TargetX8632::prelowerPhis() {

}

-namespace {

bool isMemoryOperand(const Operand *Opnd) {

if (const auto Var = llvm::dyn_cast<Variable>(Opnd))

return !Var->hasReg();

@@ -4848,12 +4758,11 @@ bool isMemoryOperand(const Operand *Opnd) {

return true;

}

-} // end of anonymous namespace

// Lower the pre-ordered list of assignments into mov instructions.

// Also has to do some ad-hoc register allocation as necessary.

-void TargetX8632::lowerPhiAssignments(CfgNode *Node,

- const AssignList &Assignments) {

+template <class Machine>

+void TargetX86Base<Machine>::lowerPhiAssignments(

+ CfgNode *Node, const AssignList &Assignments) {

// Check that this is a properly initialized shell of a node.

assert(Node->getOutEdges().size() == 1);

assert(Node->getInsts().empty());

@@ -5004,7 +4913,8 @@ void TargetX8632::lowerPhiAssignments(CfgNode *Node,

// TODO(wala): Add limited support for vector constants so that

// complex initialization in registers is unnecessary.

-Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {

Variable *Reg = makeReg(Ty, RegNum);

// Insert a FakeDef, since otherwise the live range of Reg might

// be overestimated.

@@ -5013,7 +4923,9 @@ Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

return Reg;

}

-Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,

+ int32_t RegNum) {

Variable *MinusOnes = makeReg(Ty, RegNum);

// Insert a FakeDef so the live range of MinusOnes is not overestimated.

Context.insert(InstFakeDef::create(Func, MinusOnes));

@@ -5021,19 +4933,23 @@ Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {

return MinusOnes;

}

-Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {

Variable *Dest = makeVectorOfZeros(Ty, RegNum);

Variable *MinusOne = makeVectorOfMinusOnes(Ty);

_psub(Dest, MinusOne);

return Dest;

}

-Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,

+ int32_t RegNum) {

assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||

Ty == IceType_v16i8);

if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {

Variable *Reg = makeVectorOfOnes(Ty, RegNum);

- SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;

+ SizeT Shift =

+ typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;

_psll(Reg, Ctx->getConstantInt8(Shift));

return Reg;

} else {

@@ -5053,15 +4969,18 @@ Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

// for f64. Construct it as vector of ones logically right shifted

// one bit. TODO(stichnot): Fix the wala TODO above, to represent

// vector constants in memory.

-Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,

+ int32_t RegNum) {

Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);

_psrl(Reg, Ctx->getConstantInt8(1));

return Reg;

}

-OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

- Variable *Slot,

- uint32_t Offset) {

+template <class Machine>

+OperandX8632Mem *

+TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,

+ uint32_t Offset) {

// Ensure that Loc is a stack slot.

assert(Slot->getWeight().isZero());

assert(Slot->getRegNum() == Variable::NoRegister);

@@ -5078,7 +4997,8 @@ OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

// Helper for legalize() to emit the right code to lower an operand to a

// register of the appropriate type.

-Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {

Type Ty = Src->getType();

Variable *Reg = makeReg(Ty, RegNum);

if (isVectorType(Ty)) {

@@ -5089,8 +5009,9 @@ Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {

return Reg;

}

-Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,

- int32_t RegNum) {

+template <class Machine>

+Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,

+ int32_t RegNum) {

Type Ty = From->getType();

// Assert that a physical register is allowed. To date, all calls

// to legalize() allow a physical register. If a physical register

@@ -5203,7 +5124,8 @@ Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,

}

// Provide a trivial wrapper to legalize() for this common usage.

-Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) {

return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));

}

@@ -5213,7 +5135,9 @@ Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {

// (Actually, either Src0 or Src1 can be chosen for the physical

// register, but unfortunately we have to commit to one or the other

// before register allocation.)

-Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {

+template <class Machine>

+Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,

+ Operand *Src1) {

bool IsSrc1ImmOrReg = false;

if (llvm::isa<Constant>(Src1)) {

IsSrc1ImmOrReg = true;

@@ -5224,8 +5148,10 @@ Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {

return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);

}

-OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty,

- bool DoLegalize) {

+template <class Machine>

+OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd,

+ Type Ty,

+ bool DoLegalize) {

OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);

// It may be the case that address mode optimization already creates

// an OperandX8632Mem, so in that case it wouldn't need another level

@@ -5257,7 +5183,8 @@ OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty,

DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));

}

-Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

+template <class Machine>

+Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {

// There aren't any 64-bit integer registers for x86-32.

assert(Type != IceType_i64);

Variable *Reg = Func->makeVariable(Type);

@@ -5268,13 +5195,14 @@ Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

return Reg;

}

-void TargetX8632::postLower() {

+template <class Machine> void TargetX86Base<Machine>::postLower() {

if (Ctx->getFlags().getOptLevel() == Opt_m1)

return;

inferTwoAddress();

}

-void TargetX8632::makeRandomRegisterPermutation(

+template <class Machine>

+void TargetX86Base<Machine>::makeRandomRegisterPermutation(

llvm::SmallVectorImpl<int32_t> &Permutation,

const llvm::SmallBitVector &ExcludeRegisters) const {

// TODO(stichnot): Declaring Permutation this way loses type/size

@@ -5341,192 +5269,44 @@ void TargetX8632::makeRandomRegisterPermutation(

}

-void TargetX8632::emit(const ConstantInteger32 *C) const {

+template <class Machine>

+void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {

if (!ALLOW_DUMP)

return;

Ostream &Str = Ctx->getStrEmit();

Str << getConstantPrefix() << C->getValue();

}

-void TargetX8632::emit(const ConstantInteger64 *) const {

+template <class Machine>

+void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {

llvm::report_fatal_error("Not expecting to emit 64-bit integers");

}

-void TargetX8632::emit(const ConstantFloat *C) const {

+template <class Machine>

+void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {

if (!ALLOW_DUMP)

return;

Ostream &Str = Ctx->getStrEmit();

C->emitPoolLabel(Str);

}

-void TargetX8632::emit(const ConstantDouble *C) const {

+template <class Machine>

+void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {

if (!ALLOW_DUMP)

return;

Ostream &Str = Ctx->getStrEmit();

C->emitPoolLabel(Str);

}

-void TargetX8632::emit(const ConstantUndef *) const {

+template <class Machine>

+void TargetX86Base<Machine>::emit(const ConstantUndef *) const {

llvm::report_fatal_error("undef value encountered by emitter.");

}

-TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)

- : TargetDataLowering(Ctx) {}

-void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,

- const IceString &SectionSuffix) {

- switch (Ctx->getFlags().getOutFileType()) {

- case FT_Elf: {

- ELFObjectWriter *Writer = Ctx->getObjectWriter();

- Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);

- } break;

- case FT_Asm:

- case FT_Iasm: {

- const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();

- OstreamLocker L(Ctx);

- for (const VariableDeclaration *Var : Vars) {

- if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {

- emitGlobal(*Var, SectionSuffix);

- }

- } break;

- }

-template <typename T> struct PoolTypeConverter {};

-template <> struct PoolTypeConverter<float> {

- typedef uint32_t PrimitiveIntType;

- typedef ConstantFloat IceType;

- static const Type Ty = IceType_f32;

- static const char *TypeName;

- static const char *AsmTag;

- static const char *PrintfString;

-};

-const char *PoolTypeConverter<float>::TypeName = "float";

-const char *PoolTypeConverter<float>::AsmTag = ".long";

-const char *PoolTypeConverter<float>::PrintfString = "0x%x";

-template <> struct PoolTypeConverter<double> {

- typedef uint64_t PrimitiveIntType;

- typedef ConstantDouble IceType;

- static const Type Ty = IceType_f64;

- static const char *TypeName;

- static const char *AsmTag;

- static const char *PrintfString;

-};

-const char *PoolTypeConverter<double>::TypeName = "double";

-const char *PoolTypeConverter<double>::AsmTag = ".quad";

-const char *PoolTypeConverter<double>::PrintfString = "0x%llx";

-// Add converter for int type constant pooling

-template <> struct PoolTypeConverter<uint32_t> {

- typedef uint32_t PrimitiveIntType;

- typedef ConstantInteger32 IceType;

- static const Type Ty = IceType_i32;

- static const char *TypeName;

- static const char *AsmTag;

- static const char *PrintfString;

-};

-const char *PoolTypeConverter<uint32_t>::TypeName = "i32";

-const char *PoolTypeConverter<uint32_t>::AsmTag = ".long";

-const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x";

-// Add converter for int type constant pooling

-template <> struct PoolTypeConverter<uint16_t> {

- typedef uint32_t PrimitiveIntType;

- typedef ConstantInteger32 IceType;

- static const Type Ty = IceType_i16;

- static const char *TypeName;

- static const char *AsmTag;

- static const char *PrintfString;

-};

-const char *PoolTypeConverter<uint16_t>::TypeName = "i16";

-const char *PoolTypeConverter<uint16_t>::AsmTag = ".short";

-const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x";

-// Add converter for int type constant pooling

-template <> struct PoolTypeConverter<uint8_t> {

- typedef uint32_t PrimitiveIntType;

- typedef ConstantInteger32 IceType;

- static const Type Ty = IceType_i8;

- static const char *TypeName;

- static const char *AsmTag;

- static const char *PrintfString;

-};

-const char *PoolTypeConverter<uint8_t>::TypeName = "i8";

-const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";

-const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";

-template <typename T>

-void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {

- if (!ALLOW_DUMP)

- return;

- Ostream &Str = Ctx->getStrEmit();

- Type Ty = T::Ty;

- SizeT Align = typeAlignInBytes(Ty);

- ConstantList Pool = Ctx->getConstantPool(Ty);

- Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align

- << "\n";

- Str << "\t.align\t" << Align << "\n";

- for (Constant *C : Pool) {

- if (!C->getShouldBePooled())

- continue;

- typename T::IceType *Const = llvm::cast<typename T::IceType>(C);

- typename T::IceType::PrimType Value = Const->getValue();

- // Use memcpy() to copy bits from Value into RawValue in a way

- // that avoids breaking strict-aliasing rules.

- typename T::PrimitiveIntType RawValue;

- memcpy(&RawValue, &Value, sizeof(Value));

- char buf[30];

- int CharsPrinted =

- snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);

- assert(CharsPrinted >= 0 &&

- (size_t)CharsPrinted < llvm::array_lengthof(buf));

- (void)CharsPrinted; // avoid warnings if asserts are disabled

- Const->emitPoolLabel(Str);

- Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "

- << Value << "\n";

- }

-void TargetDataX8632::lowerConstants() {

- if (Ctx->getFlags().getDisableTranslation())

- return;

- // No need to emit constants from the int pool since (for x86) they

- // are embedded as immediates in the instructions, just emit float/double.

- switch (Ctx->getFlags().getOutFileType()) {

- case FT_Elf: {

- ELFObjectWriter *Writer = Ctx->getObjectWriter();

- Writer->writeConstantPool<ConstantInteger32>(IceType_i8);

- Writer->writeConstantPool<ConstantInteger32>(IceType_i16);

- Writer->writeConstantPool<ConstantInteger32>(IceType_i32);

- Writer->writeConstantPool<ConstantFloat>(IceType_f32);

- Writer->writeConstantPool<ConstantDouble>(IceType_f64);

- } break;

- case FT_Asm:

- case FT_Iasm: {

- OstreamLocker L(Ctx);

- emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);

- emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);

- emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);

- emitConstantPool<PoolTypeConverter<float>>(Ctx);

- emitConstantPool<PoolTypeConverter<double>>(Ctx);

- } break;

- }

-TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)

- : TargetHeaderLowering(Ctx) {}

// Randomize or pool an Immediate.

-Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate,

- int32_t RegNum) {

+template <class Machine>

+Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,

+ int32_t RegNum) {

assert(llvm::isa<ConstantInteger32>(Immediate) ||

llvm::isa<ConstantRelocatable>(Immediate));

if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||

@@ -5602,9 +5382,10 @@ Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate,

return Immediate;

}

+template <class Machine>

OperandX8632Mem *

-TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,

- int32_t RegNum) {

+TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,

+ int32_t RegNum) {

assert(MemOperand);

if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||

RandomizationPoolingPaused == true) {

@@ -5629,9 +5410,8 @@ TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,

// TO:

// insert: lea offset+cookie[base], RegTemp

// => -cookie[RegTemp, index, shift]

- uint32_t Value =

- llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())

- ->getValue();

+ uint32_t Value = llvm::dyn_cast<ConstantInteger32>(

+ MemOperand->getOffset())->getValue();

uint32_t Cookie = Ctx->getRandomizationCookie();

Constant *Mask1 = Ctx->getConstantInt(

MemOperand->getOffset()->getType(), Cookie + Value);

@@ -5717,4 +5497,8 @@ TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,

return MemOperand;

}

+} // end of namespace X86Internal

} // end of namespace Ice

+#endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

Jim Stichnoth 2015/06/22 23:04:05 git (or someone) warns about this "trailing whites

John 2015/06/22 23:09:55 Done. I usually add the extra newline so that ca

« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »