| Index: src/IceTargetLoweringX8632.cpp
|
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
|
| index c1ba40429a8f788b78ed600e2a8f731d1f3ac83b..55a0bfc638789fa9a2d49f03783d79762f9b0f5f 100644
|
| --- a/src/IceTargetLoweringX8632.cpp
|
| +++ b/src/IceTargetLoweringX8632.cpp
|
| @@ -2,5397 +2,165 @@
|
| //
|
| // The Subzero Code Generator
|
| //
|
| -// This file is distributed under the University of Illinois Open Source
|
| -// License. See LICENSE.TXT for details.
|
| -//
|
| -//===----------------------------------------------------------------------===//
|
| -//
|
| -// This file implements the TargetLoweringX8632 class, which
|
| -// consists almost entirely of the lowering sequence for each
|
| -// high-level instruction.
|
| -//
|
| -//===----------------------------------------------------------------------===//
|
| -
|
| -#include "llvm/Support/MathExtras.h"
|
| -
|
| -#include "IceCfg.h"
|
| -#include "IceCfgNode.h"
|
| -#include "IceClFlags.h"
|
| -#include "IceDefs.h"
|
| -#include "IceELFObjectWriter.h"
|
| -#include "IceGlobalInits.h"
|
| -#include "IceInstX8632.h"
|
| -#include "IceLiveness.h"
|
| -#include "IceOperand.h"
|
| -#include "IceRegistersX8632.h"
|
| -#include "IceTargetLoweringX8632.def"
|
| -#include "IceTargetLoweringX8632.h"
|
| -#include "IceUtils.h"
|
| -
|
| -namespace Ice {
|
| -
|
| -namespace {
|
| -
|
| -// The following table summarizes the logic for lowering the fcmp
|
| -// instruction. There is one table entry for each of the 16 conditions.
|
| -//
|
| -// The first four columns describe the case when the operands are
|
| -// floating point scalar values. A comment in lowerFcmp() describes the
|
| -// lowering template. In the most general case, there is a compare
|
| -// followed by two conditional branches, because some fcmp conditions
|
| -// don't map to a single x86 conditional branch. However, in many cases
|
| -// it is possible to swap the operands in the comparison and have a
|
| -// single conditional branch. Since it's quite tedious to validate the
|
| -// table by hand, good execution tests are helpful.
|
| -//
|
| -// The last two columns describe the case when the operands are vectors
|
| -// of floating point values. For most fcmp conditions, there is a clear
|
| -// mapping to a single x86 cmpps instruction variant. Some fcmp
|
| -// conditions require special code to handle and these are marked in the
|
| -// table with a Cmpps_Invalid predicate.
|
| -const struct TableFcmp_ {
|
| - uint32_t Default;
|
| - bool SwapScalarOperands;
|
| - CondX86::BrCond C1, C2;
|
| - bool SwapVectorOperands;
|
| - CondX86::CmppsCond Predicate;
|
| -} TableFcmp[] = {
|
| -#define X(val, dflt, swapS, C1, C2, swapV, pred) \
|
| - { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
|
| - ,
|
| - FCMPX8632_TABLE
|
| -#undef X
|
| -};
|
| -const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
|
| -
|
| -// The following table summarizes the logic for lowering the icmp instruction
|
| -// for i32 and narrower types. Each icmp condition has a clear mapping to an
|
| -// x86 conditional branch instruction.
|
| -
|
| -const struct TableIcmp32_ {
|
| - CondX86::BrCond Mapping;
|
| -} TableIcmp32[] = {
|
| -#define X(val, C_32, C1_64, C2_64, C3_64) \
|
| - { CondX86::C_32 } \
|
| - ,
|
| - ICMPX8632_TABLE
|
| -#undef X
|
| -};
|
| -const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
|
| -
|
| -// The following table summarizes the logic for lowering the icmp instruction
|
| -// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
|
| -// conditional branches are needed. For the other conditions, three separate
|
| -// conditional branches are needed.
|
| -const struct TableIcmp64_ {
|
| - CondX86::BrCond C1, C2, C3;
|
| -} TableIcmp64[] = {
|
| -#define X(val, C_32, C1_64, C2_64, C3_64) \
|
| - { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
|
| - ,
|
| - ICMPX8632_TABLE
|
| -#undef X
|
| -};
|
| -const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
|
| -
|
| -CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
|
| - size_t Index = static_cast<size_t>(Cond);
|
| - assert(Index < TableIcmp32Size);
|
| - return TableIcmp32[Index].Mapping;
|
| -}
|
| -
|
| -const struct TableTypeX8632Attributes_ {
|
| - Type InVectorElementType;
|
| -} TableTypeX8632Attributes[] = {
|
| -#define X(tag, elementty, cvt, sdss, pack, width, fld) \
|
| - { elementty } \
|
| - ,
|
| - ICETYPEX8632_TABLE
|
| -#undef X
|
| -};
|
| -const size_t TableTypeX8632AttributesSize =
|
| - llvm::array_lengthof(TableTypeX8632Attributes);
|
| -
|
| -// Return the type which the elements of the vector have in the X86
|
| -// representation of the vector.
|
| -Type getInVectorElementType(Type Ty) {
|
| - assert(isVectorType(Ty));
|
| - size_t Index = static_cast<size_t>(Ty);
|
| - (void)Index;
|
| - assert(Index < TableTypeX8632AttributesSize);
|
| - return TableTypeX8632Attributes[Ty].InVectorElementType;
|
| -}
|
| -
|
| -// The maximum number of arguments to pass in XMM registers
|
| -const uint32_t X86_MAX_XMM_ARGS = 4;
|
| -// The number of bits in a byte
|
| -const uint32_t X86_CHAR_BIT = 8;
|
| -// Stack alignment
|
| -const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
|
| -// Size of the return address on the stack
|
| -const uint32_t X86_RET_IP_SIZE_BYTES = 4;
|
| -// The number of different NOP instructions
|
| -const uint32_t X86_NUM_NOP_VARIANTS = 5;
|
| -
|
| -// Value is in bytes. Return Value adjusted to the next highest multiple
|
| -// of the stack alignment.
|
| -uint32_t applyStackAlignment(uint32_t Value) {
|
| - return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
|
| -}
|
| -
|
| -// In some cases, there are x-macros tables for both high-level and
|
| -// low-level instructions/operands that use the same enum key value.
|
| -// The tables are kept separate to maintain a proper separation
|
| -// between abstraction layers. There is a risk that the tables could
|
| -// get out of sync if enum values are reordered or if entries are
|
| -// added or deleted. The following dummy namespaces use
|
| -// static_asserts to ensure everything is kept in sync.
|
| -
|
| -// Validate the enum values in FCMPX8632_TABLE.
|
| -namespace dummy1 {
|
| -// Define a temporary set of enum values based on low-level table
|
| -// entries.
|
| -enum _tmp_enum {
|
| -#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
|
| - FCMPX8632_TABLE
|
| -#undef X
|
| - _num
|
| -};
|
| -// Define a set of constants based on high-level table entries.
|
| -#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
|
| -ICEINSTFCMP_TABLE
|
| -#undef X
|
| -// Define a set of constants based on low-level table entries, and
|
| -// ensure the table entry keys are consistent.
|
| -#define X(val, dflt, swapS, C1, C2, swapV, pred) \
|
| - static const int _table2_##val = _tmp_##val; \
|
| - static_assert( \
|
| - _table1_##val == _table2_##val, \
|
| - "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
|
| -FCMPX8632_TABLE
|
| -#undef X
|
| -// Repeat the static asserts with respect to the high-level table
|
| -// entries in case the high-level table has extra entries.
|
| -#define X(tag, str) \
|
| - static_assert( \
|
| - _table1_##tag == _table2_##tag, \
|
| - "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
|
| -ICEINSTFCMP_TABLE
|
| -#undef X
|
| -} // end of namespace dummy1
|
| -
|
| -// Validate the enum values in ICMPX8632_TABLE.
|
| -namespace dummy2 {
|
| -// Define a temporary set of enum values based on low-level table
|
| -// entries.
|
| -enum _tmp_enum {
|
| -#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
|
| - ICMPX8632_TABLE
|
| -#undef X
|
| - _num
|
| -};
|
| -// Define a set of constants based on high-level table entries.
|
| -#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
|
| -ICEINSTICMP_TABLE
|
| -#undef X
|
| -// Define a set of constants based on low-level table entries, and
|
| -// ensure the table entry keys are consistent.
|
| -#define X(val, C_32, C1_64, C2_64, C3_64) \
|
| - static const int _table2_##val = _tmp_##val; \
|
| - static_assert( \
|
| - _table1_##val == _table2_##val, \
|
| - "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
|
| -ICMPX8632_TABLE
|
| -#undef X
|
| -// Repeat the static asserts with respect to the high-level table
|
| -// entries in case the high-level table has extra entries.
|
| -#define X(tag, str) \
|
| - static_assert( \
|
| - _table1_##tag == _table2_##tag, \
|
| - "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
|
| -ICEINSTICMP_TABLE
|
| -#undef X
|
| -} // end of namespace dummy2
|
| -
|
| -// Validate the enum values in ICETYPEX8632_TABLE.
|
| -namespace dummy3 {
|
| -// Define a temporary set of enum values based on low-level table
|
| -// entries.
|
| -enum _tmp_enum {
|
| -#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
|
| - ICETYPEX8632_TABLE
|
| -#undef X
|
| - _num
|
| -};
|
| -// Define a set of constants based on high-level table entries.
|
| -#define X(tag, size, align, elts, elty, str) \
|
| - static const int _table1_##tag = tag;
|
| -ICETYPE_TABLE
|
| -#undef X
|
| -// Define a set of constants based on low-level table entries, and
|
| -// ensure the table entry keys are consistent.
|
| -#define X(tag, elementty, cvt, sdss, pack, width, fld) \
|
| - static const int _table2_##tag = _tmp_##tag; \
|
| - static_assert(_table1_##tag == _table2_##tag, \
|
| - "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
|
| -ICETYPEX8632_TABLE
|
| -#undef X
|
| -// Repeat the static asserts with respect to the high-level table
|
| -// entries in case the high-level table has extra entries.
|
| -#define X(tag, size, align, elts, elty, str) \
|
| - static_assert(_table1_##tag == _table2_##tag, \
|
| - "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
|
| -ICETYPE_TABLE
|
| -#undef X
|
| -} // end of namespace dummy3
|
| -
|
| -// A helper class to ease the settings of RandomizationPoolingPause
|
| -// to disable constant blinding or pooling for some translation phases.
|
| -class BoolFlagSaver {
|
| - BoolFlagSaver() = delete;
|
| - BoolFlagSaver(const BoolFlagSaver &) = delete;
|
| - BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
|
| -
|
| -public:
|
| - BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
|
| - ~BoolFlagSaver() { Flag = OldValue; }
|
| -
|
| -private:
|
| - const bool OldValue;
|
| - bool &Flag;
|
| -};
|
| -
|
| -} // end of anonymous namespace
|
| -
|
| -BoolFoldingEntry::BoolFoldingEntry(Inst *I)
|
| - : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {}
|
| -
|
| -BoolFolding::BoolFoldingProducerKind
|
| -BoolFolding::getProducerKind(const Inst *Instr) {
|
| - if (llvm::isa<InstIcmp>(Instr)) {
|
| - if (Instr->getSrc(0)->getType() != IceType_i64)
|
| - return PK_Icmp32;
|
| - return PK_None; // TODO(stichnot): actually PK_Icmp64;
|
| - }
|
| - return PK_None; // TODO(stichnot): remove this
|
| -
|
| - if (llvm::isa<InstFcmp>(Instr))
|
| - return PK_Fcmp;
|
| - if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
|
| - switch (Cast->getCastKind()) {
|
| - default:
|
| - return PK_None;
|
| - case InstCast::Trunc:
|
| - return PK_Trunc;
|
| - }
|
| - }
|
| - return PK_None;
|
| -}
|
| -
|
| -BoolFolding::BoolFoldingConsumerKind
|
| -BoolFolding::getConsumerKind(const Inst *Instr) {
|
| - if (llvm::isa<InstBr>(Instr))
|
| - return CK_Br;
|
| - if (llvm::isa<InstSelect>(Instr))
|
| - return CK_Select;
|
| - return CK_None; // TODO(stichnot): remove this
|
| -
|
| - if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
|
| - switch (Cast->getCastKind()) {
|
| - default:
|
| - return CK_None;
|
| - case InstCast::Sext:
|
| - return CK_Sext;
|
| - case InstCast::Zext:
|
| - return CK_Zext;
|
| - }
|
| - }
|
| - return CK_None;
|
| -}
|
| -
|
| -// Returns true if the producing instruction has a "complex" lowering
|
| -// sequence. This generally means that its lowering sequence requires
|
| -// more than one conditional branch, namely 64-bit integer compares
|
| -// and some floating-point compares. When this is true, and there is
|
| -// more than one consumer, we prefer to disable the folding
|
| -// optimization because it minimizes branches.
|
| -bool BoolFolding::hasComplexLowering(const Inst *Instr) {
|
| - switch (getProducerKind(Instr)) {
|
| - default:
|
| - return false;
|
| - case PK_Icmp64:
|
| - return true;
|
| - case PK_Fcmp:
|
| - return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
|
| - CondX86::Br_None;
|
| - }
|
| -}
|
| -
|
| -void BoolFolding::init(CfgNode *Node) {
|
| - Producers.clear();
|
| - for (Inst &Instr : Node->getInsts()) {
|
| - // Check whether Instr is a valid producer.
|
| - Variable *Var = Instr.getDest();
|
| - if (!Instr.isDeleted() // only consider non-deleted instructions
|
| - && Var // only instructions with an actual dest var
|
| - && Var->getType() == IceType_i1 // only bool-type dest vars
|
| - && getProducerKind(&Instr) != PK_None) { // white-listed instructions
|
| - Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);
|
| - }
|
| - // Check each src variable against the map.
|
| - for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
|
| - Operand *Src = Instr.getSrc(I);
|
| - SizeT NumVars = Src->getNumVars();
|
| - for (SizeT J = 0; J < NumVars; ++J) {
|
| - const Variable *Var = Src->getVar(J);
|
| - SizeT VarNum = Var->getIndex();
|
| - if (containsValid(VarNum)) {
|
| - if (I != 0 // All valid consumers use Var as the first source operand
|
| - || getConsumerKind(&Instr) == CK_None // must be white-listed
|
| - || (Producers[VarNum].IsComplex && // complex can't be multi-use
|
| - Producers[VarNum].NumUses > 0)) {
|
| - setInvalid(VarNum);
|
| - continue;
|
| - }
|
| - ++Producers[VarNum].NumUses;
|
| - if (Instr.isLastUse(Var)) {
|
| - Producers[VarNum].IsLiveOut = false;
|
| - }
|
| - }
|
| - }
|
| - }
|
| - }
|
| - for (auto &I : Producers) {
|
| - // Ignore entries previously marked invalid.
|
| - if (I.second.Instr == nullptr)
|
| - continue;
|
| - // Disable the producer if its dest may be live beyond this block.
|
| - if (I.second.IsLiveOut) {
|
| - setInvalid(I.first);
|
| - continue;
|
| - }
|
| - // Mark as "dead" rather than outright deleting. This is so that
|
| - // other peephole style optimizations during or before lowering
|
| - // have access to this instruction in undeleted form. See for
|
| - // example tryOptimizedCmpxchgCmpBr().
|
| - I.second.Instr->setDead();
|
| - }
|
| -}
|
| -
|
| -const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {
|
| - auto *Var = llvm::dyn_cast<const Variable>(Opnd);
|
| - if (Var == nullptr)
|
| - return nullptr;
|
| - SizeT VarNum = Var->getIndex();
|
| - auto Element = Producers.find(VarNum);
|
| - if (Element == Producers.end())
|
| - return nullptr;
|
| - return Element->second.Instr;
|
| -}
|
| -
|
| -void BoolFolding::dump(const Cfg *Func) const {
|
| - if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
|
| - return;
|
| - OstreamLocker L(Func->getContext());
|
| - Ostream &Str = Func->getContext()->getStrDump();
|
| - for (auto &I : Producers) {
|
| - if (I.second.Instr == nullptr)
|
| - continue;
|
| - Str << "Found foldable producer:\n ";
|
| - I.second.Instr->dump(Func);
|
| - Str << "\n";
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::initNodeForLowering(CfgNode *Node) {
|
| - FoldingInfo.init(Node);
|
| - FoldingInfo.dump(Func);
|
| -}
|
| -
|
| -TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) {
|
| - static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==
|
| - (TargetInstructionSet::X86InstructionSet_End -
|
| - TargetInstructionSet::X86InstructionSet_Begin),
|
| - "X86InstructionSet range different from TargetInstructionSet");
|
| - if (Func->getContext()->getFlags().getTargetInstructionSet() !=
|
| - TargetInstructionSet::BaseInstructionSet) {
|
| - InstructionSet = static_cast<X86InstructionSet>(
|
| - (Func->getContext()->getFlags().getTargetInstructionSet() -
|
| - TargetInstructionSet::X86InstructionSet_Begin) +
|
| - X86InstructionSet::Begin);
|
| - }
|
| - // TODO: Don't initialize IntegerRegisters and friends every time.
|
| - // Instead, initialize in some sort of static initializer for the
|
| - // class.
|
| - llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
|
| - llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
|
| - llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
|
| - llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
|
| - llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
|
| - ScratchRegs.resize(RegX8632::Reg_NUM);
|
| -#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
|
| - frameptr, isI8, isInt, isFP) \
|
| - IntegerRegisters[RegX8632::val] = isInt; \
|
| - IntegerRegistersI8[RegX8632::val] = isI8; \
|
| - FloatRegisters[RegX8632::val] = isFP; \
|
| - VectorRegisters[RegX8632::val] = isFP; \
|
| - ScratchRegs[RegX8632::val] = scratch;
|
| - REGX8632_TABLE;
|
| -#undef X
|
| - TypeToRegisterSet[IceType_void] = InvalidRegisters;
|
| - TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
|
| - TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
|
| - TypeToRegisterSet[IceType_i16] = IntegerRegisters;
|
| - TypeToRegisterSet[IceType_i32] = IntegerRegisters;
|
| - TypeToRegisterSet[IceType_i64] = IntegerRegisters;
|
| - TypeToRegisterSet[IceType_f32] = FloatRegisters;
|
| - TypeToRegisterSet[IceType_f64] = FloatRegisters;
|
| - TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
|
| - TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
|
| - TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
|
| - TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
|
| - TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
|
| - TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
|
| - TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
|
| -}
|
| -
|
| -void TargetX8632::translateO2() {
|
| - TimerMarker T(TimerStack::TT_O2, Func);
|
| -
|
| - if (!Ctx->getFlags().getPhiEdgeSplit()) {
|
| - // Lower Phi instructions.
|
| - Func->placePhiLoads();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->placePhiStores();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->deletePhis();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After Phi lowering");
|
| - }
|
| -
|
| - // Address mode optimization.
|
| - Func->getVMetadata()->init(VMK_SingleDefs);
|
| - Func->doAddressOpt();
|
| -
|
| - // Find read-modify-write opportunities. Do this after address mode
|
| - // optimization so that doAddressOpt() doesn't need to be applied to RMW
|
| - // instructions as well.
|
| - findRMW();
|
| - Func->dump("After RMW transform");
|
| -
|
| - // Argument lowering
|
| - Func->doArgLowering();
|
| -
|
| - // Target lowering. This requires liveness analysis for some parts
|
| - // of the lowering decisions, such as compare/branch fusing. If
|
| - // non-lightweight liveness analysis is used, the instructions need
|
| - // to be renumbered first. TODO: This renumbering should only be
|
| - // necessary if we're actually calculating live intervals, which we
|
| - // only do for register allocation.
|
| - Func->renumberInstructions();
|
| - if (Func->hasError())
|
| - return;
|
| -
|
| - // TODO: It should be sufficient to use the fastest liveness
|
| - // calculation, i.e. livenessLightweight(). However, for some
|
| - // reason that slows down the rest of the translation. Investigate.
|
| - Func->liveness(Liveness_Basic);
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After x86 address mode opt");
|
| -
|
| - // Disable constant blinding or pooling for load optimization.
|
| - {
|
| - BoolFlagSaver B(RandomizationPoolingPaused, true);
|
| - doLoadOpt();
|
| - }
|
| - Func->genCode();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After x86 codegen");
|
| -
|
| - // Register allocation. This requires instruction renumbering and
|
| - // full liveness analysis.
|
| - Func->renumberInstructions();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->liveness(Liveness_Intervals);
|
| - if (Func->hasError())
|
| - return;
|
| - // Validate the live range computations. The expensive validation
|
| - // call is deliberately only made when assertions are enabled.
|
| - assert(Func->validateLiveness());
|
| - // The post-codegen dump is done here, after liveness analysis and
|
| - // associated cleanup, to make the dump cleaner and more useful.
|
| - Func->dump("After initial x8632 codegen");
|
| - Func->getVMetadata()->init(VMK_All);
|
| - regAlloc(RAK_Global);
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After linear scan regalloc");
|
| -
|
| - if (Ctx->getFlags().getPhiEdgeSplit()) {
|
| - // We need to pause constant blinding or pooling during advanced
|
| - // phi lowering, unless the lowering assignment has a physical
|
| - // register for the dest Variable.
|
| - {
|
| - BoolFlagSaver B(RandomizationPoolingPaused, true);
|
| - Func->advancedPhiLowering();
|
| - }
|
| - Func->dump("After advanced Phi lowering");
|
| - }
|
| -
|
| - // Stack frame mapping.
|
| - Func->genFrame();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After stack frame mapping");
|
| -
|
| - Func->contractEmptyNodes();
|
| - Func->reorderNodes();
|
| -
|
| - // Branch optimization. This needs to be done just before code
|
| - // emission. In particular, no transformations that insert or
|
| - // reorder CfgNodes should be done after branch optimization. We go
|
| - // ahead and do it before nop insertion to reduce the amount of work
|
| - // needed for searching for opportunities.
|
| - Func->doBranchOpt();
|
| - Func->dump("After branch optimization");
|
| -
|
| - // Nop insertion
|
| - if (Ctx->getFlags().shouldDoNopInsertion()) {
|
| - Func->doNopInsertion();
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::translateOm1() {
|
| - TimerMarker T(TimerStack::TT_Om1, Func);
|
| -
|
| - Func->placePhiLoads();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->placePhiStores();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->deletePhis();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After Phi lowering");
|
| -
|
| - Func->doArgLowering();
|
| -
|
| - Func->genCode();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After initial x8632 codegen");
|
| -
|
| - regAlloc(RAK_InfOnly);
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After regalloc of infinite-weight variables");
|
| -
|
| - Func->genFrame();
|
| - if (Func->hasError())
|
| - return;
|
| - Func->dump("After stack frame mapping");
|
| -
|
| - // Nop insertion
|
| - if (Ctx->getFlags().shouldDoNopInsertion()) {
|
| - Func->doNopInsertion();
|
| - }
|
| -}
|
| -
|
| -namespace {
|
| -
|
| -bool canRMW(const InstArithmetic *Arith) {
|
| - Type Ty = Arith->getDest()->getType();
|
| - // X86 vector instructions write to a register and have no RMW
|
| - // option.
|
| - if (isVectorType(Ty))
|
| - return false;
|
| - bool isI64 = Ty == IceType_i64;
|
| -
|
| - switch (Arith->getOp()) {
|
| - // Not handled for lack of simple lowering:
|
| - // shift on i64
|
| - // mul, udiv, urem, sdiv, srem, frem
|
| - // Not handled for lack of RMW instructions:
|
| - // fadd, fsub, fmul, fdiv (also vector types)
|
| - default:
|
| - return false;
|
| - case InstArithmetic::Add:
|
| - case InstArithmetic::Sub:
|
| - case InstArithmetic::And:
|
| - case InstArithmetic::Or:
|
| - case InstArithmetic::Xor:
|
| - return true;
|
| - case InstArithmetic::Shl:
|
| - case InstArithmetic::Lshr:
|
| - case InstArithmetic::Ashr:
|
| - return false; // TODO(stichnot): implement
|
| - return !isI64;
|
| - }
|
| -}
|
| -
|
| -bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
|
| - if (A == B)
|
| - return true;
|
| - if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {
|
| - if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {
|
| - return MemA->getBase() == MemB->getBase() &&
|
| - MemA->getOffset() == MemB->getOffset() &&
|
| - MemA->getIndex() == MemB->getIndex() &&
|
| - MemA->getShift() == MemB->getShift() &&
|
| - MemA->getSegmentRegister() == MemB->getSegmentRegister();
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -} // end of anonymous namespace
|
| -
|
| -void TargetX8632::findRMW() {
|
| - Func->dump("Before RMW");
|
| - OstreamLocker L(Func->getContext());
|
| - Ostream &Str = Func->getContext()->getStrDump();
|
| - for (CfgNode *Node : Func->getNodes()) {
|
| - // Walk through the instructions, considering each sequence of 3
|
| - // instructions, and look for the particular RMW pattern. Note that this
|
| - // search can be "broken" (false negatives) if there are intervening deleted
|
| - // instructions, or intervening instructions that could be safely moved out
|
| - // of the way to reveal an RMW pattern.
|
| - auto E = Node->getInsts().end();
|
| - auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
|
| - for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
|
| - // Make I3 skip over deleted instructions.
|
| - while (I3 != E && I3->isDeleted())
|
| - ++I3;
|
| - if (I1 == E || I2 == E || I3 == E)
|
| - continue;
|
| - assert(!I1->isDeleted());
|
| - assert(!I2->isDeleted());
|
| - assert(!I3->isDeleted());
|
| - if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
|
| - if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
|
| - if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
|
| - // Look for:
|
| - // a = Load addr
|
| - // b = <op> a, other
|
| - // Store b, addr
|
| - // Change to:
|
| - // a = Load addr
|
| - // b = <op> a, other
|
| - // x = FakeDef
|
| - // RMW <op>, addr, other, x
|
| - // b = Store b, addr, x
|
| - // Note that inferTwoAddress() makes sure setDestNonKillable() gets
|
| - // called on the updated Store instruction, to avoid liveness
|
| - // problems later.
|
| - //
|
| - // With this transformation, the Store instruction acquires a Dest
|
| - // variable and is now subject to dead code elimination if there are
|
| - // no more uses of "b". Variable "x" is a beacon for determining
|
| - // whether the Store instruction gets dead-code eliminated. If the
|
| - // Store instruction is eliminated, then it must be the case that
|
| - // the RMW instruction ends x's live range, and therefore the RMW
|
| - // instruction will be retained and later lowered. On the other
|
| - // hand, if the RMW instruction does not end x's live range, then
|
| - // the Store instruction must still be present, and therefore the
|
| - // RMW instruction is ignored during lowering because it is
|
| - // redundant with the Store instruction.
|
| - //
|
| - // Note that if "a" has further uses, the RMW transformation may
|
| - // still trigger, resulting in two loads and one store, which is
|
| - // worse than the original one load and one store. However, this is
|
| - // probably rare, and caching probably keeps it just as fast.
|
| - if (!isSameMemAddressOperand(Load->getSourceAddress(),
|
| - Store->getAddr()))
|
| - continue;
|
| - Operand *ArithSrcFromLoad = Arith->getSrc(0);
|
| - Operand *ArithSrcOther = Arith->getSrc(1);
|
| - if (ArithSrcFromLoad != Load->getDest()) {
|
| - if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
|
| - continue;
|
| - std::swap(ArithSrcFromLoad, ArithSrcOther);
|
| - }
|
| - if (Arith->getDest() != Store->getData())
|
| - continue;
|
| - if (!canRMW(Arith))
|
| - continue;
|
| - if (Func->isVerbose(IceV_RMW)) {
|
| - Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
|
| - Load->dump(Func);
|
| - Str << "\n ";
|
| - Arith->dump(Func);
|
| - Str << "\n ";
|
| - Store->dump(Func);
|
| - Str << "\n";
|
| - }
|
| - Variable *Beacon = Func->makeVariable(IceType_i32);
|
| - Beacon->setWeight(0);
|
| - Store->setRmwBeacon(Beacon);
|
| - InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
|
| - Node->getInsts().insert(I3, BeaconDef);
|
| - InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
|
| - Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
|
| - Node->getInsts().insert(I3, RMW);
|
| - }
|
| - }
|
| - }
|
| - }
|
| - }
|
| -}
|
| -
|
| -namespace {
|
| -
|
| -// Converts a ConstantInteger32 operand into its constant value, or
|
| -// MemoryOrderInvalid if the operand is not a ConstantInteger32.
|
| -uint64_t getConstantMemoryOrder(Operand *Opnd) {
|
| - if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
|
| - return Integer->getValue();
|
| - return Intrinsics::MemoryOrderInvalid;
|
| -}
|
| -
|
| -// Determines whether the dest of a Load instruction can be folded
|
| -// into one of the src operands of a 2-operand instruction. This is
|
| -// true as long as the load dest matches exactly one of the binary
|
| -// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
|
| -// the answer is true.
|
| -bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
|
| - Operand *&Src0, Operand *&Src1) {
|
| - if (Src0 == LoadDest && Src1 != LoadDest) {
|
| - Src0 = LoadSrc;
|
| - return true;
|
| - }
|
| - if (Src0 != LoadDest && Src1 == LoadDest) {
|
| - Src1 = LoadSrc;
|
| - return true;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -} // end of anonymous namespace
|
| -
|
| -void TargetX8632::doLoadOpt() {
|
| - for (CfgNode *Node : Func->getNodes()) {
|
| - Context.init(Node);
|
| - while (!Context.atEnd()) {
|
| - Variable *LoadDest = nullptr;
|
| - Operand *LoadSrc = nullptr;
|
| - Inst *CurInst = Context.getCur();
|
| - Inst *Next = Context.getNextInst();
|
| - // Determine whether the current instruction is a Load
|
| - // instruction or equivalent.
|
| - if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
|
| - // An InstLoad always qualifies.
|
| - LoadDest = Load->getDest();
|
| - const bool DoLegalize = false;
|
| - LoadSrc = formMemoryOperand(Load->getSourceAddress(),
|
| - LoadDest->getType(), DoLegalize);
|
| - } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
|
| - // An AtomicLoad intrinsic qualifies as long as it has a valid
|
| - // memory ordering, and can be implemented in a single
|
| - // instruction (i.e., not i64).
|
| - Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
|
| - if (ID == Intrinsics::AtomicLoad &&
|
| - Intrin->getDest()->getType() != IceType_i64 &&
|
| - Intrinsics::isMemoryOrderValid(
|
| - ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
|
| - LoadDest = Intrin->getDest();
|
| - const bool DoLegalize = false;
|
| - LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
|
| - DoLegalize);
|
| - }
|
| - }
|
| - // A Load instruction can be folded into the following
|
| - // instruction only if the following instruction ends the Load's
|
| - // Dest variable's live range.
|
| - if (LoadDest && Next && Next->isLastUse(LoadDest)) {
|
| - assert(LoadSrc);
|
| - Inst *NewInst = nullptr;
|
| - if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
|
| - Operand *Src0 = Arith->getSrc(0);
|
| - Operand *Src1 = Arith->getSrc(1);
|
| - if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
|
| - NewInst = InstArithmetic::create(Func, Arith->getOp(),
|
| - Arith->getDest(), Src0, Src1);
|
| - }
|
| - } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
|
| - Operand *Src0 = Icmp->getSrc(0);
|
| - Operand *Src1 = Icmp->getSrc(1);
|
| - if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
|
| - NewInst = InstIcmp::create(Func, Icmp->getCondition(),
|
| - Icmp->getDest(), Src0, Src1);
|
| - }
|
| - } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
|
| - Operand *Src0 = Fcmp->getSrc(0);
|
| - Operand *Src1 = Fcmp->getSrc(1);
|
| - if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
|
| - NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
|
| - Fcmp->getDest(), Src0, Src1);
|
| - }
|
| - } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
|
| - Operand *Src0 = Select->getTrueOperand();
|
| - Operand *Src1 = Select->getFalseOperand();
|
| - if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
|
| - NewInst = InstSelect::create(Func, Select->getDest(),
|
| - Select->getCondition(), Src0, Src1);
|
| - }
|
| - } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
|
| - // The load dest can always be folded into a Cast
|
| - // instruction.
|
| - Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
|
| - if (Src0 == LoadDest) {
|
| - NewInst = InstCast::create(Func, Cast->getCastKind(),
|
| - Cast->getDest(), LoadSrc);
|
| - }
|
| - }
|
| - if (NewInst) {
|
| - CurInst->setDeleted();
|
| - Next->setDeleted();
|
| - Context.insert(NewInst);
|
| - // Update NewInst->LiveRangesEnded so that target lowering
|
| - // may benefit. Also update NewInst->HasSideEffects.
|
| - NewInst->spliceLivenessInfo(Next, CurInst);
|
| - }
|
| - }
|
| - Context.advanceCur();
|
| - Context.advanceNext();
|
| - }
|
| - }
|
| - Func->dump("After load optimization");
|
| -}
|
| -
|
| -bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
|
| - if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
|
| - return Br->optimizeBranch(NextNode);
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -IceString TargetX8632::RegNames[] = {
|
| -#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
|
| - frameptr, isI8, isInt, isFP) \
|
| - name,
|
| - REGX8632_TABLE
|
| -#undef X
|
| -};
|
| -
|
| -Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
|
| - if (Ty == IceType_void)
|
| - Ty = IceType_i32;
|
| - if (PhysicalRegisters[Ty].empty())
|
| - PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
|
| - assert(RegNum < PhysicalRegisters[Ty].size());
|
| - Variable *Reg = PhysicalRegisters[Ty][RegNum];
|
| - if (Reg == nullptr) {
|
| - Reg = Func->makeVariable(Ty);
|
| - Reg->setRegNum(RegNum);
|
| - PhysicalRegisters[Ty][RegNum] = Reg;
|
| - // Specially mark esp as an "argument" so that it is considered
|
| - // live upon function entry.
|
| - if (RegNum == RegX8632::Reg_esp) {
|
| - Func->addImplicitArg(Reg);
|
| - Reg->setIgnoreLiveness();
|
| - }
|
| - }
|
| - return Reg;
|
| -}
|
| -
|
| -IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
|
| - assert(RegNum < RegX8632::Reg_NUM);
|
| - static IceString RegNames8[] = {
|
| -#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
|
| - frameptr, isI8, isInt, isFP) \
|
| - name8,
|
| - REGX8632_TABLE
|
| -#undef X
|
| - };
|
| - static IceString RegNames16[] = {
|
| -#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
|
| - frameptr, isI8, isInt, isFP) \
|
| - name16,
|
| - REGX8632_TABLE
|
| -#undef X
|
| - };
|
| - switch (Ty) {
|
| - case IceType_i1:
|
| - case IceType_i8:
|
| - return RegNames8[RegNum];
|
| - case IceType_i16:
|
| - return RegNames16[RegNum];
|
| - default:
|
| - return RegNames[RegNum];
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::emitVariable(const Variable *Var) const {
|
| - Ostream &Str = Ctx->getStrEmit();
|
| - if (Var->hasReg()) {
|
| - Str << "%" << getRegName(Var->getRegNum(), Var->getType());
|
| - return;
|
| - }
|
| - if (Var->getWeight().isInf()) {
|
| - llvm_unreachable("Infinite-weight Variable has no register assigned");
|
| - }
|
| - int32_t Offset = Var->getStackOffset();
|
| - if (!hasFramePointer())
|
| - Offset += getStackAdjustment();
|
| - if (Offset)
|
| - Str << Offset;
|
| - const Type FrameSPTy = IceType_i32;
|
| - Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
|
| -}
|
| -
|
| -X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
|
| - if (Var->hasReg())
|
| - llvm_unreachable("Stack Variable has a register assigned");
|
| - if (Var->getWeight().isInf()) {
|
| - llvm_unreachable("Infinite-weight Variable has no register assigned");
|
| - }
|
| - int32_t Offset = Var->getStackOffset();
|
| - if (!hasFramePointer())
|
| - Offset += getStackAdjustment();
|
| - return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
|
| -}
|
| -
|
| -void TargetX8632::lowerArguments() {
|
| - VarList &Args = Func->getArgs();
|
| - // The first four arguments of vector type, regardless of their
|
| - // position relative to the other arguments in the argument list, are
|
| - // passed in registers xmm0 - xmm3.
|
| - unsigned NumXmmArgs = 0;
|
| -
|
| - Context.init(Func->getEntryNode());
|
| - Context.setInsertPoint(Context.getCur());
|
| -
|
| - for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
|
| - ++I) {
|
| - Variable *Arg = Args[I];
|
| - Type Ty = Arg->getType();
|
| - if (!isVectorType(Ty))
|
| - continue;
|
| - // Replace Arg in the argument list with the home register. Then
|
| - // generate an instruction in the prolog to copy the home register
|
| - // to the assigned location of Arg.
|
| - int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
|
| - ++NumXmmArgs;
|
| - Variable *RegisterArg = Func->makeVariable(Ty);
|
| - if (ALLOW_DUMP)
|
| - RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
|
| - RegisterArg->setRegNum(RegNum);
|
| - RegisterArg->setIsArg();
|
| - Arg->setIsArg(false);
|
| -
|
| - Args[I] = RegisterArg;
|
| - Context.insert(InstAssign::create(Func, Arg, RegisterArg));
|
| - }
|
| -}
|
| -
|
| -// Helper function for addProlog().
|
| -//
|
| -// This assumes Arg is an argument passed on the stack. This sets the
|
| -// frame offset for Arg and updates InArgsSizeBytes according to Arg's
|
| -// width. For an I64 arg that has been split into Lo and Hi components,
|
| -// it calls itself recursively on the components, taking care to handle
|
| -// Lo first because of the little-endian architecture. Lastly, this
|
| -// function generates an instruction to copy Arg into its assigned
|
| -// register if applicable.
|
| -void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
|
| - size_t BasicFrameOffset,
|
| - size_t &InArgsSizeBytes) {
|
| - Variable *Lo = Arg->getLo();
|
| - Variable *Hi = Arg->getHi();
|
| - Type Ty = Arg->getType();
|
| - if (Lo && Hi && Ty == IceType_i64) {
|
| - assert(Lo->getType() != IceType_i64); // don't want infinite recursion
|
| - assert(Hi->getType() != IceType_i64); // don't want infinite recursion
|
| - finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
|
| - finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
|
| - return;
|
| - }
|
| - if (isVectorType(Ty)) {
|
| - InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
|
| - }
|
| - Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
|
| - InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
|
| - if (Arg->hasReg()) {
|
| - assert(Ty != IceType_i64);
|
| - OperandX8632Mem *Mem = OperandX8632Mem::create(
|
| - Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
|
| - if (isVectorType(Arg->getType())) {
|
| - _movp(Arg, Mem);
|
| - } else {
|
| - _mov(Arg, Mem);
|
| - }
|
| - // This argument-copying instruction uses an explicit
|
| - // OperandX8632Mem operand instead of a Variable, so its
|
| - // fill-from-stack operation has to be tracked separately for
|
| - // statistics.
|
| - Ctx->statsUpdateFills();
|
| - }
|
| -}
|
| -
|
| -Type TargetX8632::stackSlotType() { return IceType_i32; }
|
| -
|
| -void TargetX8632::addProlog(CfgNode *Node) {
|
| - // Stack frame layout:
|
| - //
|
| - // +------------------------+
|
| - // | 1. return address |
|
| - // +------------------------+
|
| - // | 2. preserved registers |
|
| - // +------------------------+
|
| - // | 3. padding |
|
| - // +------------------------+
|
| - // | 4. global spill area |
|
| - // +------------------------+
|
| - // | 5. padding |
|
| - // +------------------------+
|
| - // | 6. local spill area |
|
| - // +------------------------+
|
| - // | 7. padding |
|
| - // +------------------------+
|
| - // | 8. allocas |
|
| - // +------------------------+
|
| - //
|
| - // The following variables record the size in bytes of the given areas:
|
| - // * X86_RET_IP_SIZE_BYTES: area 1
|
| - // * PreservedRegsSizeBytes: area 2
|
| - // * SpillAreaPaddingBytes: area 3
|
| - // * GlobalsSize: area 4
|
| - // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
|
| - // * LocalsSpillAreaSize: area 6
|
| - // * SpillAreaSizeBytes: areas 3 - 7
|
| -
|
| - // Determine stack frame offsets for each Variable without a
|
| - // register assignment. This can be done as one variable per stack
|
| - // slot. Or, do coalescing by running the register allocator again
|
| - // with an infinite set of registers (as a side effect, this gives
|
| - // variables a second chance at physical register assignment).
|
| - //
|
| - // A middle ground approach is to leverage sparsity and allocate one
|
| - // block of space on the frame for globals (variables with
|
| - // multi-block lifetime), and one block to share for locals
|
| - // (single-block lifetime).
|
| -
|
| - Context.init(Node);
|
| - Context.setInsertPoint(Context.getCur());
|
| -
|
| - llvm::SmallBitVector CalleeSaves =
|
| - getRegisterSet(RegSet_CalleeSave, RegSet_None);
|
| - RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
|
| - VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
|
| - size_t GlobalsSize = 0;
|
| - // If there is a separate locals area, this represents that area.
|
| - // Otherwise it counts any variable not counted by GlobalsSize.
|
| - SpillAreaSizeBytes = 0;
|
| - // If there is a separate locals area, this specifies the alignment
|
| - // for it.
|
| - uint32_t LocalsSlotsAlignmentBytes = 0;
|
| - // The entire spill locations area gets aligned to largest natural
|
| - // alignment of the variables that have a spill slot.
|
| - uint32_t SpillAreaAlignmentBytes = 0;
|
| - // A spill slot linked to a variable with a stack slot should reuse
|
| - // that stack slot.
|
| - std::function<bool(Variable *)> TargetVarHook =
|
| - [&VariablesLinkedToSpillSlots](Variable *Var) {
|
| - if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
|
| - assert(Var->getWeight().isZero());
|
| - if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
|
| - VariablesLinkedToSpillSlots.push_back(Var);
|
| - return true;
|
| - }
|
| - }
|
| - return false;
|
| - };
|
| -
|
| - // Compute the list of spilled variables and bounds for GlobalsSize, etc.
|
| - getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
|
| - &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
|
| - &LocalsSlotsAlignmentBytes, TargetVarHook);
|
| - uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
|
| - SpillAreaSizeBytes += GlobalsSize;
|
| -
|
| - // Add push instructions for preserved registers.
|
| - uint32_t NumCallee = 0;
|
| - size_t PreservedRegsSizeBytes = 0;
|
| - for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
|
| - if (CalleeSaves[i] && RegsUsed[i]) {
|
| - ++NumCallee;
|
| - PreservedRegsSizeBytes += 4;
|
| - _push(getPhysicalRegister(i));
|
| - }
|
| - }
|
| - Ctx->statsUpdateRegistersSaved(NumCallee);
|
| -
|
| - // Generate "push ebp; mov ebp, esp"
|
| - if (IsEbpBasedFrame) {
|
| - assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
|
| - .count() == 0);
|
| - PreservedRegsSizeBytes += 4;
|
| - Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
|
| - Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
|
| - _push(ebp);
|
| - _mov(ebp, esp);
|
| - // Keep ebp live for late-stage liveness analysis
|
| - // (e.g. asm-verbose mode).
|
| - Context.insert(InstFakeUse::create(Func, ebp));
|
| - }
|
| -
|
| - // Align the variables area. SpillAreaPaddingBytes is the size of
|
| - // the region after the preserved registers and before the spill areas.
|
| - // LocalsSlotsPaddingBytes is the amount of padding between the globals
|
| - // and locals area if they are separate.
|
| - assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
|
| - assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
|
| - uint32_t SpillAreaPaddingBytes = 0;
|
| - uint32_t LocalsSlotsPaddingBytes = 0;
|
| - alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
|
| - SpillAreaAlignmentBytes, GlobalsSize,
|
| - LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
|
| - &LocalsSlotsPaddingBytes);
|
| - SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
|
| - uint32_t GlobalsAndSubsequentPaddingSize =
|
| - GlobalsSize + LocalsSlotsPaddingBytes;
|
| -
|
| - // Align esp if necessary.
|
| - if (NeedsStackAlignment) {
|
| - uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
|
| - uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
|
| - SpillAreaSizeBytes = StackSize - StackOffset;
|
| - }
|
| -
|
| - // Generate "sub esp, SpillAreaSizeBytes"
|
| - if (SpillAreaSizeBytes)
|
| - _sub(getPhysicalRegister(RegX8632::Reg_esp),
|
| - Ctx->getConstantInt32(SpillAreaSizeBytes));
|
| - Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
|
| -
|
| - resetStackAdjustment();
|
| -
|
| - // Fill in stack offsets for stack args, and copy args into registers
|
| - // for those that were register-allocated. Args are pushed right to
|
| - // left, so Arg[0] is closest to the stack/frame pointer.
|
| - Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
|
| - size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
|
| - if (!IsEbpBasedFrame)
|
| - BasicFrameOffset += SpillAreaSizeBytes;
|
| -
|
| - const VarList &Args = Func->getArgs();
|
| - size_t InArgsSizeBytes = 0;
|
| - unsigned NumXmmArgs = 0;
|
| - for (Variable *Arg : Args) {
|
| - // Skip arguments passed in registers.
|
| - if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
|
| - ++NumXmmArgs;
|
| - continue;
|
| - }
|
| - finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
|
| - }
|
| -
|
| - // Fill in stack offsets for locals.
|
| - assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
|
| - SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
|
| - IsEbpBasedFrame);
|
| - // Assign stack offsets to variables that have been linked to spilled
|
| - // variables.
|
| - for (Variable *Var : VariablesLinkedToSpillSlots) {
|
| - Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
|
| - Var->setStackOffset(Linked->getStackOffset());
|
| - }
|
| - this->HasComputedFrame = true;
|
| -
|
| - if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
|
| - OstreamLocker L(Func->getContext());
|
| - Ostream &Str = Func->getContext()->getStrDump();
|
| -
|
| - Str << "Stack layout:\n";
|
| - uint32_t EspAdjustmentPaddingSize =
|
| - SpillAreaSizeBytes - LocalsSpillAreaSize -
|
| - GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
|
| - Str << " in-args = " << InArgsSizeBytes << " bytes\n"
|
| - << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
|
| - << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
|
| - << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
|
| - << " globals spill area = " << GlobalsSize << " bytes\n"
|
| - << " globals-locals spill areas intermediate padding = "
|
| - << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
|
| - << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
|
| - << " esp alignment padding = " << EspAdjustmentPaddingSize
|
| - << " bytes\n";
|
| -
|
| - Str << "Stack details:\n"
|
| - << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
|
| - << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
|
| - << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
|
| - << " bytes\n"
|
| - << " is ebp based = " << IsEbpBasedFrame << "\n";
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::addEpilog(CfgNode *Node) {
|
| - InstList &Insts = Node->getInsts();
|
| - InstList::reverse_iterator RI, E;
|
| - for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
|
| - if (llvm::isa<InstX8632Ret>(*RI))
|
| - break;
|
| - }
|
| - if (RI == E)
|
| - return;
|
| -
|
| - // Convert the reverse_iterator position into its corresponding
|
| - // (forward) iterator position.
|
| - InstList::iterator InsertPoint = RI.base();
|
| - --InsertPoint;
|
| - Context.init(Node);
|
| - Context.setInsertPoint(InsertPoint);
|
| -
|
| - Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
|
| - if (IsEbpBasedFrame) {
|
| - Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
|
| - // For late-stage liveness analysis (e.g. asm-verbose mode),
|
| - // adding a fake use of esp before the assignment of esp=ebp keeps
|
| - // previous esp adjustments from being dead-code eliminated.
|
| - Context.insert(InstFakeUse::create(Func, esp));
|
| - _mov(esp, ebp);
|
| - _pop(ebp);
|
| - } else {
|
| - // add esp, SpillAreaSizeBytes
|
| - if (SpillAreaSizeBytes)
|
| - _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
|
| - }
|
| -
|
| - // Add pop instructions for preserved registers.
|
| - llvm::SmallBitVector CalleeSaves =
|
| - getRegisterSet(RegSet_CalleeSave, RegSet_None);
|
| - for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
|
| - SizeT j = CalleeSaves.size() - i - 1;
|
| - if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
|
| - continue;
|
| - if (CalleeSaves[j] && RegsUsed[j]) {
|
| - _pop(getPhysicalRegister(j));
|
| - }
|
| - }
|
| -
|
| - if (!Ctx->getFlags().getUseSandboxing())
|
| - return;
|
| - // Change the original ret instruction into a sandboxed return sequence.
|
| - // t:ecx = pop
|
| - // bundle_lock
|
| - // and t, ~31
|
| - // jmp *t
|
| - // bundle_unlock
|
| - // FakeUse <original_ret_operand>
|
| - const SizeT BundleSize = 1
|
| - << Func->getAssembler<>()->getBundleAlignLog2Bytes();
|
| - Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
|
| - _pop(T_ecx);
|
| - _bundle_lock();
|
| - _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
|
| - _jmp(T_ecx);
|
| - _bundle_unlock();
|
| - if (RI->getSrcSize()) {
|
| - Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
|
| - Context.insert(InstFakeUse::create(Func, RetValue));
|
| - }
|
| - RI->setDeleted();
|
| -}
|
| -
|
| -void TargetX8632::split64(Variable *Var) {
|
| - switch (Var->getType()) {
|
| - default:
|
| - return;
|
| - case IceType_i64:
|
| - // TODO: Only consider F64 if we need to push each half when
|
| - // passing as an argument to a function call. Note that each half
|
| - // is still typed as I32.
|
| - case IceType_f64:
|
| - break;
|
| - }
|
| - Variable *Lo = Var->getLo();
|
| - Variable *Hi = Var->getHi();
|
| - if (Lo) {
|
| - assert(Hi);
|
| - return;
|
| - }
|
| - assert(Hi == nullptr);
|
| - Lo = Func->makeVariable(IceType_i32);
|
| - Hi = Func->makeVariable(IceType_i32);
|
| - if (ALLOW_DUMP) {
|
| - Lo->setName(Func, Var->getName(Func) + "__lo");
|
| - Hi->setName(Func, Var->getName(Func) + "__hi");
|
| - }
|
| - Var->setLoHi(Lo, Hi);
|
| - if (Var->getIsArg()) {
|
| - Lo->setIsArg();
|
| - Hi->setIsArg();
|
| - }
|
| -}
|
| -
|
| -Operand *TargetX8632::loOperand(Operand *Operand) {
|
| - assert(Operand->getType() == IceType_i64 ||
|
| - Operand->getType() == IceType_f64);
|
| - if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
|
| - return Operand;
|
| - if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
|
| - split64(Var);
|
| - return Var->getLo();
|
| - }
|
| - if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
|
| - ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
|
| - Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
|
| - return legalize(ConstInt);
|
| - }
|
| - if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
|
| - OperandX8632Mem *MemOperand = OperandX8632Mem::create(
|
| - Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
|
| - Mem->getShift(), Mem->getSegmentRegister());
|
| - // Test if we should randomize or pool the offset, if so randomize it or
|
| - // pool it then create mem operand with the blinded/pooled constant.
|
| - // Otherwise, return the mem operand as ordinary mem operand.
|
| - return legalize(MemOperand);
|
| - }
|
| - llvm_unreachable("Unsupported operand type");
|
| - return nullptr;
|
| -}
|
| -
|
| -Operand *TargetX8632::hiOperand(Operand *Operand) {
|
| - assert(Operand->getType() == IceType_i64 ||
|
| - Operand->getType() == IceType_f64);
|
| - if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
|
| - return Operand;
|
| - if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
|
| - split64(Var);
|
| - return Var->getHi();
|
| - }
|
| - if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
|
| - ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
|
| - Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
|
| - // check if we need to blind/pool the constant
|
| - return legalize(ConstInt);
|
| - }
|
| - if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
|
| - Constant *Offset = Mem->getOffset();
|
| - if (Offset == nullptr) {
|
| - Offset = Ctx->getConstantInt32(4);
|
| - } else if (ConstantInteger32 *IntOffset =
|
| - llvm::dyn_cast<ConstantInteger32>(Offset)) {
|
| - Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
|
| - } else if (ConstantRelocatable *SymOffset =
|
| - llvm::dyn_cast<ConstantRelocatable>(Offset)) {
|
| - assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
|
| - Offset =
|
| - Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
|
| - SymOffset->getSuppressMangling());
|
| - }
|
| - OperandX8632Mem *MemOperand = OperandX8632Mem::create(
|
| - Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
|
| - Mem->getShift(), Mem->getSegmentRegister());
|
| - // Test if the Offset is an eligible i32 constants for randomization and
|
| - // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
|
| - // operand.
|
| - return legalize(MemOperand);
|
| - }
|
| - llvm_unreachable("Unsupported operand type");
|
| - return nullptr;
|
| -}
|
| -
|
| -llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
|
| - RegSetMask Exclude) const {
|
| - llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
|
| -
|
| -#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
|
| - frameptr, isI8, isInt, isFP) \
|
| - if (scratch && (Include & RegSet_CallerSave)) \
|
| - Registers[RegX8632::val] = true; \
|
| - if (preserved && (Include & RegSet_CalleeSave)) \
|
| - Registers[RegX8632::val] = true; \
|
| - if (stackptr && (Include & RegSet_StackPointer)) \
|
| - Registers[RegX8632::val] = true; \
|
| - if (frameptr && (Include & RegSet_FramePointer)) \
|
| - Registers[RegX8632::val] = true; \
|
| - if (scratch && (Exclude & RegSet_CallerSave)) \
|
| - Registers[RegX8632::val] = false; \
|
| - if (preserved && (Exclude & RegSet_CalleeSave)) \
|
| - Registers[RegX8632::val] = false; \
|
| - if (stackptr && (Exclude & RegSet_StackPointer)) \
|
| - Registers[RegX8632::val] = false; \
|
| - if (frameptr && (Exclude & RegSet_FramePointer)) \
|
| - Registers[RegX8632::val] = false;
|
| -
|
| - REGX8632_TABLE
|
| -
|
| -#undef X
|
| -
|
| - return Registers;
|
| -}
|
| -
|
| -void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
|
| - IsEbpBasedFrame = true;
|
| - // Conservatively require the stack to be aligned. Some stack
|
| - // adjustment operations implemented below assume that the stack is
|
| - // aligned before the alloca. All the alloca code ensures that the
|
| - // stack alignment is preserved after the alloca. The stack alignment
|
| - // restriction can be relaxed in some cases.
|
| - NeedsStackAlignment = true;
|
| -
|
| - // TODO(stichnot): minimize the number of adjustments of esp, etc.
|
| - Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
|
| - Operand *TotalSize = legalize(Inst->getSizeInBytes());
|
| - Variable *Dest = Inst->getDest();
|
| - uint32_t AlignmentParam = Inst->getAlignInBytes();
|
| - // For default align=0, set it to the real value 1, to avoid any
|
| - // bit-manipulation problems below.
|
| - AlignmentParam = std::max(AlignmentParam, 1u);
|
| -
|
| - // LLVM enforces power of 2 alignment.
|
| - assert(llvm::isPowerOf2_32(AlignmentParam));
|
| - assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES));
|
| -
|
| - uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
|
| - if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
|
| - _and(esp, Ctx->getConstantInt32(-Alignment));
|
| - }
|
| - if (const auto *ConstantTotalSize =
|
| - llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
|
| - uint32_t Value = ConstantTotalSize->getValue();
|
| - Value = Utils::applyAlignment(Value, Alignment);
|
| - _sub(esp, Ctx->getConstantInt32(Value));
|
| - } else {
|
| - // Non-constant sizes need to be adjusted to the next highest
|
| - // multiple of the required alignment at runtime.
|
| - Variable *T = makeReg(IceType_i32);
|
| - _mov(T, TotalSize);
|
| - _add(T, Ctx->getConstantInt32(Alignment - 1));
|
| - _and(T, Ctx->getConstantInt32(-Alignment));
|
| - _sub(esp, T);
|
| - }
|
| - _mov(Dest, esp);
|
| -}
|
| -
|
| -// Strength-reduce scalar integer multiplication by a constant (for
|
| -// i32 or narrower) for certain constants. The lea instruction can be
|
| -// used to multiply by 3, 5, or 9, and the lsh instruction can be used
|
| -// to multiply by powers of 2. These can be combined such that
|
| -// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
|
| -// combined with left-shifting by 2.
|
| -bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0,
|
| - int32_t Src1) {
|
| - // Disable this optimization for Om1 and O0, just to keep things
|
| - // simple there.
|
| - if (Ctx->getFlags().getOptLevel() < Opt_1)
|
| - return false;
|
| - Type Ty = Dest->getType();
|
| - Variable *T = nullptr;
|
| - if (Src1 == -1) {
|
| - _mov(T, Src0);
|
| - _neg(T);
|
| - _mov(Dest, T);
|
| - return true;
|
| - }
|
| - if (Src1 == 0) {
|
| - _mov(Dest, Ctx->getConstantZero(Ty));
|
| - return true;
|
| - }
|
| - if (Src1 == 1) {
|
| - _mov(T, Src0);
|
| - _mov(Dest, T);
|
| - return true;
|
| - }
|
| - // Don't bother with the edge case where Src1 == MININT.
|
| - if (Src1 == -Src1)
|
| - return false;
|
| - const bool Src1IsNegative = Src1 < 0;
|
| - if (Src1IsNegative)
|
| - Src1 = -Src1;
|
| - uint32_t Count9 = 0;
|
| - uint32_t Count5 = 0;
|
| - uint32_t Count3 = 0;
|
| - uint32_t Count2 = 0;
|
| - uint32_t CountOps = 0;
|
| - while (Src1 > 1) {
|
| - if (Src1 % 9 == 0) {
|
| - ++CountOps;
|
| - ++Count9;
|
| - Src1 /= 9;
|
| - } else if (Src1 % 5 == 0) {
|
| - ++CountOps;
|
| - ++Count5;
|
| - Src1 /= 5;
|
| - } else if (Src1 % 3 == 0) {
|
| - ++CountOps;
|
| - ++Count3;
|
| - Src1 /= 3;
|
| - } else if (Src1 % 2 == 0) {
|
| - if (Count2 == 0)
|
| - ++CountOps;
|
| - ++Count2;
|
| - Src1 /= 2;
|
| - } else {
|
| - return false;
|
| - }
|
| - }
|
| - // Lea optimization only works for i16 and i32 types, not i8.
|
| - if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
|
| - return false;
|
| - // Limit the number of lea/shl operations for a single multiply, to
|
| - // a somewhat arbitrary choice of 3.
|
| - const uint32_t MaxOpsForOptimizedMul = 3;
|
| - if (CountOps > MaxOpsForOptimizedMul)
|
| - return false;
|
| - _mov(T, Src0);
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - for (uint32_t i = 0; i < Count9; ++i) {
|
| - const uint16_t Shift = 3; // log2(9-1)
|
| - _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
|
| - _set_dest_nonkillable();
|
| - }
|
| - for (uint32_t i = 0; i < Count5; ++i) {
|
| - const uint16_t Shift = 2; // log2(5-1)
|
| - _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
|
| - _set_dest_nonkillable();
|
| - }
|
| - for (uint32_t i = 0; i < Count3; ++i) {
|
| - const uint16_t Shift = 1; // log2(3-1)
|
| - _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
|
| - _set_dest_nonkillable();
|
| - }
|
| - if (Count2) {
|
| - _shl(T, Ctx->getConstantInt(Ty, Count2));
|
| - }
|
| - if (Src1IsNegative)
|
| - _neg(T);
|
| - _mov(Dest, T);
|
| - return true;
|
| -}
|
| -
|
| -void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
|
| - Variable *Dest = Inst->getDest();
|
| - Operand *Src0 = legalize(Inst->getSrc(0));
|
| - Operand *Src1 = legalize(Inst->getSrc(1));
|
| - if (Inst->isCommutative()) {
|
| - if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
|
| - std::swap(Src0, Src1);
|
| - if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
|
| - std::swap(Src0, Src1);
|
| - }
|
| - if (Dest->getType() == IceType_i64) {
|
| - // These helper-call-involved instructions are lowered in this
|
| - // separate switch. This is because loOperand() and hiOperand()
|
| - // may insert redundant instructions for constant blinding and
|
| - // pooling. Such redundant instructions will fail liveness analysis
|
| - // under -Om1 setting. And, actually these arguments do not need
|
| - // to be processed with loOperand() and hiOperand() to be used.
|
| - switch (Inst->getOp()) {
|
| - case InstArithmetic::Udiv: {
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
|
| - Call->addArg(Inst->getSrc(0));
|
| - Call->addArg(Inst->getSrc(1));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case InstArithmetic::Sdiv: {
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
|
| - Call->addArg(Inst->getSrc(0));
|
| - Call->addArg(Inst->getSrc(1));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case InstArithmetic::Urem: {
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
|
| - Call->addArg(Inst->getSrc(0));
|
| - Call->addArg(Inst->getSrc(1));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case InstArithmetic::Srem: {
|
| - const SizeT MaxSrcs = 2;
|
| - InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
|
| - Call->addArg(Inst->getSrc(0));
|
| - Call->addArg(Inst->getSrc(1));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - default:
|
| - break;
|
| - }
|
| -
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - Operand *Src0Lo = loOperand(Src0);
|
| - Operand *Src0Hi = hiOperand(Src0);
|
| - Operand *Src1Lo = loOperand(Src1);
|
| - Operand *Src1Hi = hiOperand(Src1);
|
| - Variable *T_Lo = nullptr, *T_Hi = nullptr;
|
| - switch (Inst->getOp()) {
|
| - case InstArithmetic::_num:
|
| - llvm_unreachable("Unknown arithmetic operator");
|
| - break;
|
| - case InstArithmetic::Add:
|
| - _mov(T_Lo, Src0Lo);
|
| - _add(T_Lo, Src1Lo);
|
| - _mov(DestLo, T_Lo);
|
| - _mov(T_Hi, Src0Hi);
|
| - _adc(T_Hi, Src1Hi);
|
| - _mov(DestHi, T_Hi);
|
| - break;
|
| - case InstArithmetic::And:
|
| - _mov(T_Lo, Src0Lo);
|
| - _and(T_Lo, Src1Lo);
|
| - _mov(DestLo, T_Lo);
|
| - _mov(T_Hi, Src0Hi);
|
| - _and(T_Hi, Src1Hi);
|
| - _mov(DestHi, T_Hi);
|
| - break;
|
| - case InstArithmetic::Or:
|
| - _mov(T_Lo, Src0Lo);
|
| - _or(T_Lo, Src1Lo);
|
| - _mov(DestLo, T_Lo);
|
| - _mov(T_Hi, Src0Hi);
|
| - _or(T_Hi, Src1Hi);
|
| - _mov(DestHi, T_Hi);
|
| - break;
|
| - case InstArithmetic::Xor:
|
| - _mov(T_Lo, Src0Lo);
|
| - _xor(T_Lo, Src1Lo);
|
| - _mov(DestLo, T_Lo);
|
| - _mov(T_Hi, Src0Hi);
|
| - _xor(T_Hi, Src1Hi);
|
| - _mov(DestHi, T_Hi);
|
| - break;
|
| - case InstArithmetic::Sub:
|
| - _mov(T_Lo, Src0Lo);
|
| - _sub(T_Lo, Src1Lo);
|
| - _mov(DestLo, T_Lo);
|
| - _mov(T_Hi, Src0Hi);
|
| - _sbb(T_Hi, Src1Hi);
|
| - _mov(DestHi, T_Hi);
|
| - break;
|
| - case InstArithmetic::Mul: {
|
| - Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
|
| - Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
|
| - Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - // gcc does the following:
|
| - // a=b*c ==>
|
| - // t1 = b.hi; t1 *=(imul) c.lo
|
| - // t2 = c.hi; t2 *=(imul) b.lo
|
| - // t3:eax = b.lo
|
| - // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
|
| - // a.lo = t4.lo
|
| - // t4.hi += t1
|
| - // t4.hi += t2
|
| - // a.hi = t4.hi
|
| - // The mul instruction cannot take an immediate operand.
|
| - Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
|
| - _mov(T_1, Src0Hi);
|
| - _imul(T_1, Src1Lo);
|
| - _mov(T_2, Src1Hi);
|
| - _imul(T_2, Src0Lo);
|
| - _mov(T_3, Src0Lo, RegX8632::Reg_eax);
|
| - _mul(T_4Lo, T_3, Src1Lo);
|
| - // The mul instruction produces two dest variables, edx:eax. We
|
| - // create a fake definition of edx to account for this.
|
| - Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
|
| - _mov(DestLo, T_4Lo);
|
| - _add(T_4Hi, T_1);
|
| - _add(T_4Hi, T_2);
|
| - _mov(DestHi, T_4Hi);
|
| - } break;
|
| - case InstArithmetic::Shl: {
|
| - // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
|
| - // gcc does the following:
|
| - // a=b<<c ==>
|
| - // t1:ecx = c.lo & 0xff
|
| - // t2 = b.lo
|
| - // t3 = b.hi
|
| - // t3 = shld t3, t2, t1
|
| - // t2 = shl t2, t1
|
| - // test t1, 0x20
|
| - // je L1
|
| - // use(t3)
|
| - // t3 = t2
|
| - // t2 = 0
|
| - // L1:
|
| - // a.lo = t2
|
| - // a.hi = t3
|
| - Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
|
| - Constant *BitTest = Ctx->getConstantInt32(0x20);
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
|
| - _mov(T_2, Src0Lo);
|
| - _mov(T_3, Src0Hi);
|
| - _shld(T_3, T_2, T_1);
|
| - _shl(T_2, T_1);
|
| - _test(T_1, BitTest);
|
| - _br(CondX86::Br_e, Label);
|
| - // T_2 and T_3 are being assigned again because of the
|
| - // intra-block control flow, so we need the _mov_nonkillable
|
| - // variant to avoid liveness problems.
|
| - _mov_nonkillable(T_3, T_2);
|
| - _mov_nonkillable(T_2, Zero);
|
| - Context.insert(Label);
|
| - _mov(DestLo, T_2);
|
| - _mov(DestHi, T_3);
|
| - } break;
|
| - case InstArithmetic::Lshr: {
|
| - // a=b>>c (unsigned) ==>
|
| - // t1:ecx = c.lo & 0xff
|
| - // t2 = b.lo
|
| - // t3 = b.hi
|
| - // t2 = shrd t2, t3, t1
|
| - // t3 = shr t3, t1
|
| - // test t1, 0x20
|
| - // je L1
|
| - // use(t2)
|
| - // t2 = t3
|
| - // t3 = 0
|
| - // L1:
|
| - // a.lo = t2
|
| - // a.hi = t3
|
| - Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
|
| - Constant *BitTest = Ctx->getConstantInt32(0x20);
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
|
| - _mov(T_2, Src0Lo);
|
| - _mov(T_3, Src0Hi);
|
| - _shrd(T_2, T_3, T_1);
|
| - _shr(T_3, T_1);
|
| - _test(T_1, BitTest);
|
| - _br(CondX86::Br_e, Label);
|
| - // T_2 and T_3 are being assigned again because of the
|
| - // intra-block control flow, so we need the _mov_nonkillable
|
| - // variant to avoid liveness problems.
|
| - _mov_nonkillable(T_2, T_3);
|
| - _mov_nonkillable(T_3, Zero);
|
| - Context.insert(Label);
|
| - _mov(DestLo, T_2);
|
| - _mov(DestHi, T_3);
|
| - } break;
|
| - case InstArithmetic::Ashr: {
|
| - // a=b>>c (signed) ==>
|
| - // t1:ecx = c.lo & 0xff
|
| - // t2 = b.lo
|
| - // t3 = b.hi
|
| - // t2 = shrd t2, t3, t1
|
| - // t3 = sar t3, t1
|
| - // test t1, 0x20
|
| - // je L1
|
| - // use(t2)
|
| - // t2 = t3
|
| - // t3 = sar t3, 0x1f
|
| - // L1:
|
| - // a.lo = t2
|
| - // a.hi = t3
|
| - Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
|
| - Constant *BitTest = Ctx->getConstantInt32(0x20);
|
| - Constant *SignExtend = Ctx->getConstantInt32(0x1f);
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
|
| - _mov(T_2, Src0Lo);
|
| - _mov(T_3, Src0Hi);
|
| - _shrd(T_2, T_3, T_1);
|
| - _sar(T_3, T_1);
|
| - _test(T_1, BitTest);
|
| - _br(CondX86::Br_e, Label);
|
| - // T_2 and T_3 are being assigned again because of the
|
| - // intra-block control flow, so T_2 needs the _mov_nonkillable
|
| - // variant to avoid liveness problems. T_3 doesn't need special
|
| - // treatment because it is reassigned via _sar instead of _mov.
|
| - _mov_nonkillable(T_2, T_3);
|
| - _sar(T_3, SignExtend);
|
| - Context.insert(Label);
|
| - _mov(DestLo, T_2);
|
| - _mov(DestHi, T_3);
|
| - } break;
|
| - case InstArithmetic::Fadd:
|
| - case InstArithmetic::Fsub:
|
| - case InstArithmetic::Fmul:
|
| - case InstArithmetic::Fdiv:
|
| - case InstArithmetic::Frem:
|
| - llvm_unreachable("FP instruction with i64 type");
|
| - break;
|
| - case InstArithmetic::Udiv:
|
| - case InstArithmetic::Sdiv:
|
| - case InstArithmetic::Urem:
|
| - case InstArithmetic::Srem:
|
| - llvm_unreachable("Call-helper-involved instruction for i64 type \
|
| - should have already been handled before");
|
| - break;
|
| - }
|
| - return;
|
| - }
|
| - if (isVectorType(Dest->getType())) {
|
| - // TODO: Trap on integer divide and integer modulo by zero.
|
| - // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
|
| - if (llvm::isa<OperandX8632Mem>(Src1))
|
| - Src1 = legalizeToVar(Src1);
|
| - switch (Inst->getOp()) {
|
| - case InstArithmetic::_num:
|
| - llvm_unreachable("Unknown arithmetic operator");
|
| - break;
|
| - case InstArithmetic::Add: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _padd(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::And: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _pand(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Or: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _por(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Xor: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _pxor(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Sub: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _psub(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Mul: {
|
| - bool TypesAreValidForPmull =
|
| - Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
|
| - bool InstructionSetIsValidForPmull =
|
| - Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
|
| - if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _pmull(T, Src1);
|
| - _movp(Dest, T);
|
| - } else if (Dest->getType() == IceType_v4i32) {
|
| - // Lowering sequence:
|
| - // Note: The mask arguments have index 0 on the left.
|
| - //
|
| - // movups T1, Src0
|
| - // pshufd T2, Src0, {1,0,3,0}
|
| - // pshufd T3, Src1, {1,0,3,0}
|
| - // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
|
| - // pmuludq T1, Src1
|
| - // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
|
| - // pmuludq T2, T3
|
| - // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
|
| - // shufps T1, T2, {0,2,0,2}
|
| - // pshufd T4, T1, {0,2,1,3}
|
| - // movups Dest, T4
|
| -
|
| - // Mask that directs pshufd to create a vector with entries
|
| - // Src[1, 0, 3, 0]
|
| - const unsigned Constant1030 = 0x31;
|
| - Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
|
| - // Mask that directs shufps to create a vector with entries
|
| - // Dest[0, 2], Src[0, 2]
|
| - const unsigned Mask0202 = 0x88;
|
| - // Mask that directs pshufd to create a vector with entries
|
| - // Src[0, 2, 1, 3]
|
| - const unsigned Mask0213 = 0xd8;
|
| - Variable *T1 = makeReg(IceType_v4i32);
|
| - Variable *T2 = makeReg(IceType_v4i32);
|
| - Variable *T3 = makeReg(IceType_v4i32);
|
| - Variable *T4 = makeReg(IceType_v4i32);
|
| - _movp(T1, Src0);
|
| - _pshufd(T2, Src0, Mask1030);
|
| - _pshufd(T3, Src1, Mask1030);
|
| - _pmuludq(T1, Src1);
|
| - _pmuludq(T2, T3);
|
| - _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
|
| - _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
|
| - _movp(Dest, T4);
|
| - } else {
|
| - assert(Dest->getType() == IceType_v16i8);
|
| - scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| - }
|
| - } break;
|
| - case InstArithmetic::Shl:
|
| - case InstArithmetic::Lshr:
|
| - case InstArithmetic::Ashr:
|
| - case InstArithmetic::Udiv:
|
| - case InstArithmetic::Urem:
|
| - case InstArithmetic::Sdiv:
|
| - case InstArithmetic::Srem:
|
| - scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| - break;
|
| - case InstArithmetic::Fadd: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _addps(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Fsub: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _subps(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Fmul: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _mulps(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Fdiv: {
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0);
|
| - _divps(T, Src1);
|
| - _movp(Dest, T);
|
| - } break;
|
| - case InstArithmetic::Frem:
|
| - scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
|
| - break;
|
| - }
|
| - return;
|
| - }
|
| - Variable *T_edx = nullptr;
|
| - Variable *T = nullptr;
|
| - switch (Inst->getOp()) {
|
| - case InstArithmetic::_num:
|
| - llvm_unreachable("Unknown arithmetic operator");
|
| - break;
|
| - case InstArithmetic::Add:
|
| - _mov(T, Src0);
|
| - _add(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::And:
|
| - _mov(T, Src0);
|
| - _and(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Or:
|
| - _mov(T, Src0);
|
| - _or(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Xor:
|
| - _mov(T, Src0);
|
| - _xor(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Sub:
|
| - _mov(T, Src0);
|
| - _sub(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Mul:
|
| - if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| - if (optimizeScalarMul(Dest, Src0, C->getValue()))
|
| - return;
|
| - }
|
| - // The 8-bit version of imul only allows the form "imul r/m8"
|
| - // where T must be in eax.
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - } else {
|
| - _mov(T, Src0);
|
| - }
|
| - _imul(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Shl:
|
| - _mov(T, Src0);
|
| - if (!llvm::isa<Constant>(Src1))
|
| - Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| - _shl(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Lshr:
|
| - _mov(T, Src0);
|
| - if (!llvm::isa<Constant>(Src1))
|
| - Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| - _shr(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Ashr:
|
| - _mov(T, Src0);
|
| - if (!llvm::isa<Constant>(Src1))
|
| - Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
|
| - _sar(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Udiv:
|
| - // div and idiv are the few arithmetic operators that do not allow
|
| - // immediates as the operand.
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - Variable *T_ah = nullptr;
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i8);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _mov(T_ah, Zero, RegX8632::Reg_ah);
|
| - _div(T, Src1, T_ah);
|
| - _mov(Dest, T);
|
| - } else {
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _mov(T_edx, Zero, RegX8632::Reg_edx);
|
| - _div(T, Src1, T_edx);
|
| - _mov(Dest, T);
|
| - }
|
| - break;
|
| - case InstArithmetic::Sdiv:
|
| - // TODO(stichnot): Enable this after doing better performance
|
| - // and cross testing.
|
| - if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
|
| - // Optimize division by constant power of 2, but not for Om1
|
| - // or O0, just to keep things simple there.
|
| - if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| - int32_t Divisor = C->getValue();
|
| - uint32_t UDivisor = static_cast<uint32_t>(Divisor);
|
| - if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
|
| - uint32_t LogDiv = llvm::Log2_32(UDivisor);
|
| - Type Ty = Dest->getType();
|
| - // LLVM does the following for dest=src/(1<<log):
|
| - // t=src
|
| - // sar t,typewidth-1 // -1 if src is negative, 0 if not
|
| - // shr t,typewidth-log
|
| - // add t,src
|
| - // sar t,log
|
| - // dest=t
|
| - uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
|
| - _mov(T, Src0);
|
| - // If for some reason we are dividing by 1, just treat it
|
| - // like an assignment.
|
| - if (LogDiv > 0) {
|
| - // The initial sar is unnecessary when dividing by 2.
|
| - if (LogDiv > 1)
|
| - _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
|
| - _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
|
| - _add(T, Src0);
|
| - _sar(T, Ctx->getConstantInt(Ty, LogDiv));
|
| - }
|
| - _mov(Dest, T);
|
| - return;
|
| - }
|
| - }
|
| - }
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T, T);
|
| - _idiv(T, Src1, T);
|
| - _mov(Dest, T);
|
| - } else {
|
| - T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T, Src1, T_edx);
|
| - _mov(Dest, T);
|
| - }
|
| - break;
|
| - case InstArithmetic::Urem:
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - Variable *T_ah = nullptr;
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i8);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _mov(T_ah, Zero, RegX8632::Reg_ah);
|
| - _div(T_ah, Src1, T);
|
| - _mov(Dest, T_ah);
|
| - } else {
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _mov(T_edx, Zero, RegX8632::Reg_edx);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _div(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| - }
|
| - break;
|
| - case InstArithmetic::Srem:
|
| - // TODO(stichnot): Enable this after doing better performance
|
| - // and cross testing.
|
| - if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
|
| - // Optimize mod by constant power of 2, but not for Om1 or O0,
|
| - // just to keep things simple there.
|
| - if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
|
| - int32_t Divisor = C->getValue();
|
| - uint32_t UDivisor = static_cast<uint32_t>(Divisor);
|
| - if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
|
| - uint32_t LogDiv = llvm::Log2_32(UDivisor);
|
| - Type Ty = Dest->getType();
|
| - // LLVM does the following for dest=src%(1<<log):
|
| - // t=src
|
| - // sar t,typewidth-1 // -1 if src is negative, 0 if not
|
| - // shr t,typewidth-log
|
| - // add t,src
|
| - // and t, -(1<<log)
|
| - // sub t,src
|
| - // neg t
|
| - // dest=t
|
| - uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
|
| - // If for some reason we are dividing by 1, just assign 0.
|
| - if (LogDiv == 0) {
|
| - _mov(Dest, Ctx->getConstantZero(Ty));
|
| - return;
|
| - }
|
| - _mov(T, Src0);
|
| - // The initial sar is unnecessary when dividing by 2.
|
| - if (LogDiv > 1)
|
| - _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
|
| - _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
|
| - _add(T, Src0);
|
| - _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
|
| - _sub(T, Src0);
|
| - _neg(T);
|
| - _mov(Dest, T);
|
| - return;
|
| - }
|
| - }
|
| - }
|
| - Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (isByteSizedArithType(Dest->getType())) {
|
| - Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T, T);
|
| - Context.insert(InstFakeDef::create(Func, T_ah));
|
| - _idiv(T_ah, Src1, T);
|
| - _mov(Dest, T_ah);
|
| - } else {
|
| - T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - _mov(T, Src0, RegX8632::Reg_eax);
|
| - _cbwdq(T_edx, T);
|
| - _idiv(T_edx, Src1, T);
|
| - _mov(Dest, T_edx);
|
| - }
|
| - break;
|
| - case InstArithmetic::Fadd:
|
| - _mov(T, Src0);
|
| - _addss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Fsub:
|
| - _mov(T, Src0);
|
| - _subss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Fmul:
|
| - _mov(T, Src0);
|
| - _mulss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Fdiv:
|
| - _mov(T, Src0);
|
| - _divss(T, Src1);
|
| - _mov(Dest, T);
|
| - break;
|
| - case InstArithmetic::Frem: {
|
| - const SizeT MaxSrcs = 2;
|
| - Type Ty = Dest->getType();
|
| - InstCall *Call = makeHelperCall(
|
| - isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - Call->addArg(Src1);
|
| - return lowerCall(Call);
|
| - }
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerAssign(const InstAssign *Inst) {
|
| - Variable *Dest = Inst->getDest();
|
| - Operand *Src0 = Inst->getSrc(0);
|
| - assert(Dest->getType() == Src0->getType());
|
| - if (Dest->getType() == IceType_i64) {
|
| - Src0 = legalize(Src0);
|
| - Operand *Src0Lo = loOperand(Src0);
|
| - Operand *Src0Hi = hiOperand(Src0);
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - Variable *T_Lo = nullptr, *T_Hi = nullptr;
|
| - _mov(T_Lo, Src0Lo);
|
| - _mov(DestLo, T_Lo);
|
| - _mov(T_Hi, Src0Hi);
|
| - _mov(DestHi, T_Hi);
|
| - } else {
|
| - Operand *RI;
|
| - if (Dest->hasReg()) {
|
| - // If Dest already has a physical register, then legalize the
|
| - // Src operand into a Variable with the same register
|
| - // assignment. This is mostly a workaround for advanced phi
|
| - // lowering's ad-hoc register allocation which assumes no
|
| - // register allocation is needed when at least one of the
|
| - // operands is non-memory.
|
| -
|
| - // If we have a physical register for the dest variable, we can
|
| - // enable our constant blinding or pooling again. Note this is
|
| - // only for advancedPhiLowering(), the flag flip should leave
|
| - // no other side effect.
|
| - {
|
| - BoolFlagSaver B(RandomizationPoolingPaused, false);
|
| - RI = legalize(Src0, Legal_Reg, Dest->getRegNum());
|
| - }
|
| - } else {
|
| - // If Dest could be a stack operand, then RI must be a physical
|
| - // register or a scalar integer immediate.
|
| - RI = legalize(Src0, Legal_Reg | Legal_Imm);
|
| - }
|
| - if (isVectorType(Dest->getType()))
|
| - _movp(Dest, RI);
|
| - else
|
| - _mov(Dest, RI);
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerBr(const InstBr *Inst) {
|
| - if (Inst->isUnconditional()) {
|
| - _br(Inst->getTargetUnconditional());
|
| - return;
|
| - }
|
| - Operand *Cond = Inst->getCondition();
|
| -
|
| - // Handle folding opportunities.
|
| - if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
|
| - assert(Producer->isDeleted());
|
| - switch (BoolFolding::getProducerKind(Producer)) {
|
| - default:
|
| - break;
|
| - case BoolFolding::PK_Icmp32: {
|
| - // TODO(stichnot): Refactor similarities between this block and
|
| - // the corresponding code in lowerIcmp().
|
| - auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
|
| - Operand *Src0 = Producer->getSrc(0);
|
| - Operand *Src1 = legalize(Producer->getSrc(1));
|
| - Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
|
| - _cmp(Src0RM, Src1);
|
| - _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
|
| - Inst->getTargetFalse());
|
| - return;
|
| - }
|
| - }
|
| - }
|
| -
|
| - Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _cmp(Src0, Zero);
|
| - _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
|
| -}
|
| -
|
| -void TargetX8632::lowerCall(const InstCall *Instr) {
|
| - // x86-32 calling convention:
|
| - //
|
| - // * At the point before the call, the stack must be aligned to 16
|
| - // bytes.
|
| - //
|
| - // * The first four arguments of vector type, regardless of their
|
| - // position relative to the other arguments in the argument list, are
|
| - // placed in registers xmm0 - xmm3.
|
| - //
|
| - // * Other arguments are pushed onto the stack in right-to-left order,
|
| - // such that the left-most argument ends up on the top of the stack at
|
| - // the lowest memory address.
|
| - //
|
| - // * Stack arguments of vector type are aligned to start at the next
|
| - // highest multiple of 16 bytes. Other stack arguments are aligned to
|
| - // 4 bytes.
|
| - //
|
| - // This intends to match the section "IA-32 Function Calling
|
| - // Convention" of the document "OS X ABI Function Call Guide" by
|
| - // Apple.
|
| - NeedsStackAlignment = true;
|
| -
|
| - typedef std::vector<Operand *> OperandList;
|
| - OperandList XmmArgs;
|
| - OperandList StackArgs, StackArgLocations;
|
| - uint32_t ParameterAreaSizeBytes = 0;
|
| -
|
| - // Classify each argument operand according to the location where the
|
| - // argument is passed.
|
| - for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
|
| - Operand *Arg = Instr->getArg(i);
|
| - Type Ty = Arg->getType();
|
| - // The PNaCl ABI requires the width of arguments to be at least 32 bits.
|
| - assert(typeWidthInBytes(Ty) >= 4);
|
| - if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
|
| - XmmArgs.push_back(Arg);
|
| - } else {
|
| - StackArgs.push_back(Arg);
|
| - if (isVectorType(Arg->getType())) {
|
| - ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
|
| - }
|
| - Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
|
| - Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
|
| - StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
|
| - ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
|
| - }
|
| - }
|
| -
|
| - // Adjust the parameter area so that the stack is aligned. It is
|
| - // assumed that the stack is already aligned at the start of the
|
| - // calling sequence.
|
| - ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
|
| -
|
| - // Subtract the appropriate amount for the argument area. This also
|
| - // takes care of setting the stack adjustment during emission.
|
| - //
|
| - // TODO: If for some reason the call instruction gets dead-code
|
| - // eliminated after lowering, we would need to ensure that the
|
| - // pre-call and the post-call esp adjustment get eliminated as well.
|
| - if (ParameterAreaSizeBytes) {
|
| - _adjust_stack(ParameterAreaSizeBytes);
|
| - }
|
| -
|
| - // Copy arguments that are passed on the stack to the appropriate
|
| - // stack locations.
|
| - for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
|
| - lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
|
| - }
|
| -
|
| - // Copy arguments to be passed in registers to the appropriate
|
| - // registers.
|
| - // TODO: Investigate the impact of lowering arguments passed in
|
| - // registers after lowering stack arguments as opposed to the other
|
| - // way around. Lowering register arguments after stack arguments may
|
| - // reduce register pressure. On the other hand, lowering register
|
| - // arguments first (before stack arguments) may result in more compact
|
| - // code, as the memory operand displacements may end up being smaller
|
| - // before any stack adjustment is done.
|
| - for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
|
| - Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
|
| - // Generate a FakeUse of register arguments so that they do not get
|
| - // dead code eliminated as a result of the FakeKill of scratch
|
| - // registers after the call.
|
| - Context.insert(InstFakeUse::create(Func, Reg));
|
| - }
|
| - // Generate the call instruction. Assign its result to a temporary
|
| - // with high register allocation weight.
|
| - Variable *Dest = Instr->getDest();
|
| - // ReturnReg doubles as ReturnRegLo as necessary.
|
| - Variable *ReturnReg = nullptr;
|
| - Variable *ReturnRegHi = nullptr;
|
| - if (Dest) {
|
| - switch (Dest->getType()) {
|
| - case IceType_NUM:
|
| - llvm_unreachable("Invalid Call dest type");
|
| - break;
|
| - case IceType_void:
|
| - break;
|
| - case IceType_i1:
|
| - case IceType_i8:
|
| - case IceType_i16:
|
| - case IceType_i32:
|
| - ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
|
| - break;
|
| - case IceType_i64:
|
| - ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
|
| - ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - break;
|
| - case IceType_f32:
|
| - case IceType_f64:
|
| - // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
|
| - // the fstp instruction.
|
| - break;
|
| - case IceType_v4i1:
|
| - case IceType_v8i1:
|
| - case IceType_v16i1:
|
| - case IceType_v16i8:
|
| - case IceType_v8i16:
|
| - case IceType_v4i32:
|
| - case IceType_v4f32:
|
| - ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
|
| - break;
|
| - }
|
| - }
|
| - Operand *CallTarget = legalize(Instr->getCallTarget());
|
| - const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
|
| - if (NeedSandboxing) {
|
| - if (llvm::isa<Constant>(CallTarget)) {
|
| - _bundle_lock(InstBundleLock::Opt_AlignToEnd);
|
| - } else {
|
| - Variable *CallTargetVar = nullptr;
|
| - _mov(CallTargetVar, CallTarget);
|
| - _bundle_lock(InstBundleLock::Opt_AlignToEnd);
|
| - const SizeT BundleSize =
|
| - 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
|
| - _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
|
| - CallTarget = CallTargetVar;
|
| - }
|
| - }
|
| - Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
|
| - Context.insert(NewCall);
|
| - if (NeedSandboxing)
|
| - _bundle_unlock();
|
| - if (ReturnRegHi)
|
| - Context.insert(InstFakeDef::create(Func, ReturnRegHi));
|
| -
|
| - // Add the appropriate offset to esp. The call instruction takes care
|
| - // of resetting the stack offset during emission.
|
| - if (ParameterAreaSizeBytes) {
|
| - Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
|
| - _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
|
| - }
|
| -
|
| - // Insert a register-kill pseudo instruction.
|
| - Context.insert(InstFakeKill::create(Func, NewCall));
|
| -
|
| - // Generate a FakeUse to keep the call live if necessary.
|
| - if (Instr->hasSideEffects() && ReturnReg) {
|
| - Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
|
| - Context.insert(FakeUse);
|
| - }
|
| -
|
| - if (!Dest)
|
| - return;
|
| -
|
| - // Assign the result of the call to Dest.
|
| - if (ReturnReg) {
|
| - if (ReturnRegHi) {
|
| - assert(Dest->getType() == IceType_i64);
|
| - split64(Dest);
|
| - Variable *DestLo = Dest->getLo();
|
| - Variable *DestHi = Dest->getHi();
|
| - _mov(DestLo, ReturnReg);
|
| - _mov(DestHi, ReturnRegHi);
|
| - } else {
|
| - assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
|
| - Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
|
| - isVectorType(Dest->getType()));
|
| - if (isVectorType(Dest->getType())) {
|
| - _movp(Dest, ReturnReg);
|
| - } else {
|
| - _mov(Dest, ReturnReg);
|
| - }
|
| - }
|
| - } else if (isScalarFloatingType(Dest->getType())) {
|
| - // Special treatment for an FP function which returns its result in
|
| - // st(0).
|
| - // If Dest ends up being a physical xmm register, the fstp emit code
|
| - // will route st(0) through a temporary stack slot.
|
| - _fstp(Dest);
|
| - // Create a fake use of Dest in case it actually isn't used,
|
| - // because st(0) still needs to be popped.
|
| - Context.insert(InstFakeUse::create(Func, Dest));
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerCast(const InstCast *Inst) {
|
| - // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
|
| - InstCast::OpKind CastKind = Inst->getCastKind();
|
| - Variable *Dest = Inst->getDest();
|
| - switch (CastKind) {
|
| - default:
|
| - Func->setError("Cast type not supported");
|
| - return;
|
| - case InstCast::Sext: {
|
| - // Src0RM is the source operand legalized to physical register or memory,
|
| - // but not immediate, since the relevant x86 native instructions don't
|
| - // allow an immediate operand. If the operand is an immediate, we could
|
| - // consider computing the strength-reduced result at translation time,
|
| - // but we're unlikely to see something like that in the bitcode that
|
| - // the optimizer wouldn't have already taken care of.
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - if (isVectorType(Dest->getType())) {
|
| - Type DestTy = Dest->getType();
|
| - if (DestTy == IceType_v16i8) {
|
| - // onemask = materialize(1,1,...); dst = (src & onemask) > 0
|
| - Variable *OneMask = makeVectorOfOnes(Dest->getType());
|
| - Variable *T = makeReg(DestTy);
|
| - _movp(T, Src0RM);
|
| - _pand(T, OneMask);
|
| - Variable *Zeros = makeVectorOfZeros(Dest->getType());
|
| - _pcmpgt(T, Zeros);
|
| - _movp(Dest, T);
|
| - } else {
|
| - // width = width(elty) - 1; dest = (src << width) >> width
|
| - SizeT ShiftAmount =
|
| - X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
|
| - Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
|
| - Variable *T = makeReg(DestTy);
|
| - _movp(T, Src0RM);
|
| - _psll(T, ShiftConstant);
|
| - _psra(T, ShiftConstant);
|
| - _movp(Dest, T);
|
| - }
|
| - } else if (Dest->getType() == IceType_i64) {
|
| - // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
|
| - Constant *Shift = Ctx->getConstantInt32(31);
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - Variable *T_Lo = makeReg(DestLo->getType());
|
| - if (Src0RM->getType() == IceType_i32) {
|
| - _mov(T_Lo, Src0RM);
|
| - } else if (Src0RM->getType() == IceType_i1) {
|
| - _movzx(T_Lo, Src0RM);
|
| - _shl(T_Lo, Shift);
|
| - _sar(T_Lo, Shift);
|
| - } else {
|
| - _movsx(T_Lo, Src0RM);
|
| - }
|
| - _mov(DestLo, T_Lo);
|
| - Variable *T_Hi = nullptr;
|
| - _mov(T_Hi, T_Lo);
|
| - if (Src0RM->getType() != IceType_i1)
|
| - // For i1, the sar instruction is already done above.
|
| - _sar(T_Hi, Shift);
|
| - _mov(DestHi, T_Hi);
|
| - } else if (Src0RM->getType() == IceType_i1) {
|
| - // t1 = src
|
| - // shl t1, dst_bitwidth - 1
|
| - // sar t1, dst_bitwidth - 1
|
| - // dst = t1
|
| - size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
|
| - Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
|
| - Variable *T = makeReg(Dest->getType());
|
| - if (typeWidthInBytes(Dest->getType()) <=
|
| - typeWidthInBytes(Src0RM->getType())) {
|
| - _mov(T, Src0RM);
|
| - } else {
|
| - // Widen the source using movsx or movzx. (It doesn't matter
|
| - // which one, since the following shl/sar overwrite the bits.)
|
| - _movzx(T, Src0RM);
|
| - }
|
| - _shl(T, ShiftAmount);
|
| - _sar(T, ShiftAmount);
|
| - _mov(Dest, T);
|
| - } else {
|
| - // t1 = movsx src; dst = t1
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movsx(T, Src0RM);
|
| - _mov(Dest, T);
|
| - }
|
| - break;
|
| - }
|
| - case InstCast::Zext: {
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - if (isVectorType(Dest->getType())) {
|
| - // onemask = materialize(1,1,...); dest = onemask & src
|
| - Type DestTy = Dest->getType();
|
| - Variable *OneMask = makeVectorOfOnes(DestTy);
|
| - Variable *T = makeReg(DestTy);
|
| - _movp(T, Src0RM);
|
| - _pand(T, OneMask);
|
| - _movp(Dest, T);
|
| - } else if (Dest->getType() == IceType_i64) {
|
| - // t1=movzx src; dst.lo=t1; dst.hi=0
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - Variable *Tmp = makeReg(DestLo->getType());
|
| - if (Src0RM->getType() == IceType_i32) {
|
| - _mov(Tmp, Src0RM);
|
| - } else {
|
| - _movzx(Tmp, Src0RM);
|
| - }
|
| - if (Src0RM->getType() == IceType_i1) {
|
| - Constant *One = Ctx->getConstantInt32(1);
|
| - _and(Tmp, One);
|
| - }
|
| - _mov(DestLo, Tmp);
|
| - _mov(DestHi, Zero);
|
| - } else if (Src0RM->getType() == IceType_i1) {
|
| - // t = Src0RM; t &= 1; Dest = t
|
| - Constant *One = Ctx->getConstantInt32(1);
|
| - Type DestTy = Dest->getType();
|
| - Variable *T;
|
| - if (DestTy == IceType_i8) {
|
| - T = makeReg(DestTy);
|
| - _mov(T, Src0RM);
|
| - } else {
|
| - // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
|
| - T = makeReg(IceType_i32);
|
| - _movzx(T, Src0RM);
|
| - }
|
| - _and(T, One);
|
| - _mov(Dest, T);
|
| - } else {
|
| - // t1 = movzx src; dst = t1
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movzx(T, Src0RM);
|
| - _mov(Dest, T);
|
| - }
|
| - break;
|
| - }
|
| - case InstCast::Trunc: {
|
| - if (isVectorType(Dest->getType())) {
|
| - // onemask = materialize(1,1,...); dst = src & onemask
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - Type Src0Ty = Src0RM->getType();
|
| - Variable *OneMask = makeVectorOfOnes(Src0Ty);
|
| - Variable *T = makeReg(Dest->getType());
|
| - _movp(T, Src0RM);
|
| - _pand(T, OneMask);
|
| - _movp(Dest, T);
|
| - } else {
|
| - Operand *Src0 = Inst->getSrc(0);
|
| - if (Src0->getType() == IceType_i64)
|
| - Src0 = loOperand(Src0);
|
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - // t1 = trunc Src0RM; Dest = t1
|
| - Variable *T = nullptr;
|
| - _mov(T, Src0RM);
|
| - if (Dest->getType() == IceType_i1)
|
| - _and(T, Ctx->getConstantInt1(1));
|
| - _mov(Dest, T);
|
| - }
|
| - break;
|
| - }
|
| - case InstCast::Fptrunc:
|
| - case InstCast::Fpext: {
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - // t1 = cvt Src0RM; Dest = t1
|
| - Variable *T = makeReg(Dest->getType());
|
| - _cvt(T, Src0RM, InstX8632Cvt::Float2float);
|
| - _mov(Dest, T);
|
| - break;
|
| - }
|
| - case InstCast::Fptosi:
|
| - if (isVectorType(Dest->getType())) {
|
| - assert(Dest->getType() == IceType_v4i32 &&
|
| - Inst->getSrc(0)->getType() == IceType_v4f32);
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - if (llvm::isa<OperandX8632Mem>(Src0RM))
|
| - Src0RM = legalizeToVar(Src0RM);
|
| - Variable *T = makeReg(Dest->getType());
|
| - _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
|
| - _movp(Dest, T);
|
| - } else if (Dest->getType() == IceType_i64) {
|
| - // Use a helper for converting floating-point values to 64-bit
|
| - // integers. SSE2 appears to have no way to convert from xmm
|
| - // registers to something like the edx:eax register pair, and
|
| - // gcc and clang both want to use x87 instructions complete with
|
| - // temporary manipulation of the status word. This helper is
|
| - // not needed for x86-64.
|
| - split64(Dest);
|
| - const SizeT MaxSrcs = 1;
|
| - Type SrcType = Inst->getSrc(0)->getType();
|
| - InstCall *Call =
|
| - makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
|
| - : H_fptosi_f64_i64,
|
| - Dest, MaxSrcs);
|
| - Call->addArg(Inst->getSrc(0));
|
| - lowerCall(Call);
|
| - } else {
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
|
| - Variable *T_1 = makeReg(IceType_i32);
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| - _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
|
| - _mov(T_2, T_1); // T_1 and T_2 may have different integer types
|
| - if (Dest->getType() == IceType_i1)
|
| - _and(T_2, Ctx->getConstantInt1(1));
|
| - _mov(Dest, T_2);
|
| - }
|
| - break;
|
| - case InstCast::Fptoui:
|
| - if (isVectorType(Dest->getType())) {
|
| - assert(Dest->getType() == IceType_v4i32 &&
|
| - Inst->getSrc(0)->getType() == IceType_v4f32);
|
| - const SizeT MaxSrcs = 1;
|
| - InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
|
| - Call->addArg(Inst->getSrc(0));
|
| - lowerCall(Call);
|
| - } else if (Dest->getType() == IceType_i64 ||
|
| - Dest->getType() == IceType_i32) {
|
| - // Use a helper for both x86-32 and x86-64.
|
| - split64(Dest);
|
| - const SizeT MaxSrcs = 1;
|
| - Type DestType = Dest->getType();
|
| - Type SrcType = Inst->getSrc(0)->getType();
|
| - IceString TargetString;
|
| - if (isInt32Asserting32Or64(DestType)) {
|
| - TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
|
| - : H_fptoui_f64_i32;
|
| - } else {
|
| - TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
|
| - : H_fptoui_f64_i64;
|
| - }
|
| - InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
|
| - Call->addArg(Inst->getSrc(0));
|
| - lowerCall(Call);
|
| - return;
|
| - } else {
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
|
| - Variable *T_1 = makeReg(IceType_i32);
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| - _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
|
| - _mov(T_2, T_1); // T_1 and T_2 may have different integer types
|
| - if (Dest->getType() == IceType_i1)
|
| - _and(T_2, Ctx->getConstantInt1(1));
|
| - _mov(Dest, T_2);
|
| - }
|
| - break;
|
| - case InstCast::Sitofp:
|
| - if (isVectorType(Dest->getType())) {
|
| - assert(Dest->getType() == IceType_v4f32 &&
|
| - Inst->getSrc(0)->getType() == IceType_v4i32);
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - if (llvm::isa<OperandX8632Mem>(Src0RM))
|
| - Src0RM = legalizeToVar(Src0RM);
|
| - Variable *T = makeReg(Dest->getType());
|
| - _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
|
| - _movp(Dest, T);
|
| - } else if (Inst->getSrc(0)->getType() == IceType_i64) {
|
| - // Use a helper for x86-32.
|
| - const SizeT MaxSrcs = 1;
|
| - Type DestType = Dest->getType();
|
| - InstCall *Call =
|
| - makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
|
| - : H_sitofp_i64_f64,
|
| - Dest, MaxSrcs);
|
| - // TODO: Call the correct compiler-rt helper function.
|
| - Call->addArg(Inst->getSrc(0));
|
| - lowerCall(Call);
|
| - return;
|
| - } else {
|
| - Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
|
| - // Sign-extend the operand.
|
| - // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
|
| - Variable *T_1 = makeReg(IceType_i32);
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| - if (Src0RM->getType() == IceType_i32)
|
| - _mov(T_1, Src0RM);
|
| - else
|
| - _movsx(T_1, Src0RM);
|
| - _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
|
| - _mov(Dest, T_2);
|
| - }
|
| - break;
|
| - case InstCast::Uitofp: {
|
| - Operand *Src0 = Inst->getSrc(0);
|
| - if (isVectorType(Src0->getType())) {
|
| - assert(Dest->getType() == IceType_v4f32 &&
|
| - Src0->getType() == IceType_v4i32);
|
| - const SizeT MaxSrcs = 1;
|
| - InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - lowerCall(Call);
|
| - } else if (Src0->getType() == IceType_i64 ||
|
| - Src0->getType() == IceType_i32) {
|
| - // Use a helper for x86-32 and x86-64. Also use a helper for
|
| - // i32 on x86-32.
|
| - const SizeT MaxSrcs = 1;
|
| - Type DestType = Dest->getType();
|
| - IceString TargetString;
|
| - if (isInt32Asserting32Or64(Src0->getType())) {
|
| - TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
|
| - : H_uitofp_i32_f64;
|
| - } else {
|
| - TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
|
| - : H_uitofp_i64_f64;
|
| - }
|
| - InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
|
| - Call->addArg(Src0);
|
| - lowerCall(Call);
|
| - return;
|
| - } else {
|
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - // Zero-extend the operand.
|
| - // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
|
| - Variable *T_1 = makeReg(IceType_i32);
|
| - Variable *T_2 = makeReg(Dest->getType());
|
| - if (Src0RM->getType() == IceType_i32)
|
| - _mov(T_1, Src0RM);
|
| - else
|
| - _movzx(T_1, Src0RM);
|
| - _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
|
| - _mov(Dest, T_2);
|
| - }
|
| - break;
|
| - }
|
| - case InstCast::Bitcast: {
|
| - Operand *Src0 = Inst->getSrc(0);
|
| - if (Dest->getType() == Src0->getType()) {
|
| - InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
|
| - lowerAssign(Assign);
|
| - return;
|
| - }
|
| - switch (Dest->getType()) {
|
| - default:
|
| - llvm_unreachable("Unexpected Bitcast dest type");
|
| - case IceType_i8: {
|
| - assert(Src0->getType() == IceType_v8i1);
|
| - InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
|
| - Call->addArg(Src0);
|
| - lowerCall(Call);
|
| - } break;
|
| - case IceType_i16: {
|
| - assert(Src0->getType() == IceType_v16i1);
|
| - InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
|
| - Call->addArg(Src0);
|
| - lowerCall(Call);
|
| - } break;
|
| - case IceType_i32:
|
| - case IceType_f32: {
|
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - Type DestType = Dest->getType();
|
| - Type SrcType = Src0RM->getType();
|
| - (void)DestType;
|
| - assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
|
| - (DestType == IceType_f32 && SrcType == IceType_i32));
|
| - // a.i32 = bitcast b.f32 ==>
|
| - // t.f32 = b.f32
|
| - // s.f32 = spill t.f32
|
| - // a.i32 = s.f32
|
| - Variable *T = nullptr;
|
| - // TODO: Should be able to force a spill setup by calling legalize() with
|
| - // Legal_Mem and not Legal_Reg or Legal_Imm.
|
| - SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
|
| - SpillVar->setLinkedTo(Dest);
|
| - Variable *Spill = SpillVar;
|
| - Spill->setWeight(RegWeight::Zero);
|
| - _mov(T, Src0RM);
|
| - _mov(Spill, T);
|
| - _mov(Dest, Spill);
|
| - } break;
|
| - case IceType_i64: {
|
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - assert(Src0RM->getType() == IceType_f64);
|
| - // a.i64 = bitcast b.f64 ==>
|
| - // s.f64 = spill b.f64
|
| - // t_lo.i32 = lo(s.f64)
|
| - // a_lo.i32 = t_lo.i32
|
| - // t_hi.i32 = hi(s.f64)
|
| - // a_hi.i32 = t_hi.i32
|
| - Operand *SpillLo, *SpillHi;
|
| - if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
|
| - SpillVariable *SpillVar =
|
| - Func->makeVariable<SpillVariable>(IceType_f64);
|
| - SpillVar->setLinkedTo(Src0Var);
|
| - Variable *Spill = SpillVar;
|
| - Spill->setWeight(RegWeight::Zero);
|
| - _movq(Spill, Src0RM);
|
| - SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);
|
| - SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);
|
| - } else {
|
| - SpillLo = loOperand(Src0RM);
|
| - SpillHi = hiOperand(Src0RM);
|
| - }
|
| -
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - Variable *T_Lo = makeReg(IceType_i32);
|
| - Variable *T_Hi = makeReg(IceType_i32);
|
| -
|
| - _mov(T_Lo, SpillLo);
|
| - _mov(DestLo, T_Lo);
|
| - _mov(T_Hi, SpillHi);
|
| - _mov(DestHi, T_Hi);
|
| - } break;
|
| - case IceType_f64: {
|
| - Src0 = legalize(Src0);
|
| - assert(Src0->getType() == IceType_i64);
|
| - if (llvm::isa<OperandX8632Mem>(Src0)) {
|
| - Variable *T = Func->makeVariable(Dest->getType());
|
| - _movq(T, Src0);
|
| - _movq(Dest, T);
|
| - break;
|
| - }
|
| - // a.f64 = bitcast b.i64 ==>
|
| - // t_lo.i32 = b_lo.i32
|
| - // FakeDef(s.f64)
|
| - // lo(s.f64) = t_lo.i32
|
| - // t_hi.i32 = b_hi.i32
|
| - // hi(s.f64) = t_hi.i32
|
| - // a.f64 = s.f64
|
| - SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
|
| - SpillVar->setLinkedTo(Dest);
|
| - Variable *Spill = SpillVar;
|
| - Spill->setWeight(RegWeight::Zero);
|
| -
|
| - Variable *T_Lo = nullptr, *T_Hi = nullptr;
|
| - VariableSplit *SpillLo =
|
| - VariableSplit::create(Func, Spill, VariableSplit::Low);
|
| - VariableSplit *SpillHi =
|
| - VariableSplit::create(Func, Spill, VariableSplit::High);
|
| - _mov(T_Lo, loOperand(Src0));
|
| - // Technically, the Spill is defined after the _store happens, but
|
| - // SpillLo is considered a "use" of Spill so define Spill before it
|
| - // is used.
|
| - Context.insert(InstFakeDef::create(Func, Spill));
|
| - _store(T_Lo, SpillLo);
|
| - _mov(T_Hi, hiOperand(Src0));
|
| - _store(T_Hi, SpillHi);
|
| - _movq(Dest, Spill);
|
| - } break;
|
| - case IceType_v8i1: {
|
| - assert(Src0->getType() == IceType_i8);
|
| - InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
|
| - Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
|
| - // Arguments to functions are required to be at least 32 bits wide.
|
| - lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
|
| - Call->addArg(Src0AsI32);
|
| - lowerCall(Call);
|
| - } break;
|
| - case IceType_v16i1: {
|
| - assert(Src0->getType() == IceType_i16);
|
| - InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
|
| - Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
|
| - // Arguments to functions are required to be at least 32 bits wide.
|
| - lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
|
| - Call->addArg(Src0AsI32);
|
| - lowerCall(Call);
|
| - } break;
|
| - case IceType_v8i16:
|
| - case IceType_v16i8:
|
| - case IceType_v4i32:
|
| - case IceType_v4f32: {
|
| - _movp(Dest, legalizeToVar(Src0));
|
| - } break;
|
| - }
|
| - break;
|
| - }
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
|
| - Operand *SourceVectNotLegalized = Inst->getSrc(0);
|
| - ConstantInteger32 *ElementIndex =
|
| - llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
|
| - // Only constant indices are allowed in PNaCl IR.
|
| - assert(ElementIndex);
|
| -
|
| - unsigned Index = ElementIndex->getValue();
|
| - Type Ty = SourceVectNotLegalized->getType();
|
| - Type ElementTy = typeElementType(Ty);
|
| - Type InVectorElementTy = getInVectorElementType(Ty);
|
| - Variable *ExtractedElementR = makeReg(InVectorElementTy);
|
| -
|
| - // TODO(wala): Determine the best lowering sequences for each type.
|
| - bool CanUsePextr =
|
| - Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
|
| - if (CanUsePextr && Ty != IceType_v4f32) {
|
| - // Use pextrb, pextrw, or pextrd.
|
| - Constant *Mask = Ctx->getConstantInt32(Index);
|
| - Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
|
| - _pextr(ExtractedElementR, SourceVectR, Mask);
|
| - } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
|
| - // Use pshufd and movd/movss.
|
| - Variable *T = nullptr;
|
| - if (Index) {
|
| - // The shuffle only needs to occur if the element to be extracted
|
| - // is not at the lowest index.
|
| - Constant *Mask = Ctx->getConstantInt32(Index);
|
| - T = makeReg(Ty);
|
| - _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
|
| - } else {
|
| - T = legalizeToVar(SourceVectNotLegalized);
|
| - }
|
| -
|
| - if (InVectorElementTy == IceType_i32) {
|
| - _movd(ExtractedElementR, T);
|
| - } else { // Ty == IceType_f32
|
| - // TODO(wala): _movss is only used here because _mov does not
|
| - // allow a vector source and a scalar destination. _mov should be
|
| - // able to be used here.
|
| - // _movss is a binary instruction, so the FakeDef is needed to
|
| - // keep the live range analysis consistent.
|
| - Context.insert(InstFakeDef::create(Func, ExtractedElementR));
|
| - _movss(ExtractedElementR, T);
|
| - }
|
| - } else {
|
| - assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
|
| - // Spill the value to a stack slot and do the extraction in memory.
|
| - //
|
| - // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
|
| - // support for legalizing to mem is implemented.
|
| - Variable *Slot = Func->makeVariable(Ty);
|
| - Slot->setWeight(RegWeight::Zero);
|
| - _movp(Slot, legalizeToVar(SourceVectNotLegalized));
|
| -
|
| - // Compute the location of the element in memory.
|
| - unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
|
| - OperandX8632Mem *Loc =
|
| - getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
|
| - _mov(ExtractedElementR, Loc);
|
| - }
|
| -
|
| - if (ElementTy == IceType_i1) {
|
| - // Truncate extracted integers to i1s if necessary.
|
| - Variable *T = makeReg(IceType_i1);
|
| - InstCast *Cast =
|
| - InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
|
| - lowerCast(Cast);
|
| - ExtractedElementR = T;
|
| - }
|
| -
|
| - // Copy the element to the destination.
|
| - Variable *Dest = Inst->getDest();
|
| - _mov(Dest, ExtractedElementR);
|
| -}
|
| -
|
| -void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
|
| - Operand *Src0 = Inst->getSrc(0);
|
| - Operand *Src1 = Inst->getSrc(1);
|
| - Variable *Dest = Inst->getDest();
|
| -
|
| - if (isVectorType(Dest->getType())) {
|
| - InstFcmp::FCond Condition = Inst->getCondition();
|
| - size_t Index = static_cast<size_t>(Condition);
|
| - assert(Index < TableFcmpSize);
|
| -
|
| - if (TableFcmp[Index].SwapVectorOperands) {
|
| - Operand *T = Src0;
|
| - Src0 = Src1;
|
| - Src1 = T;
|
| - }
|
| -
|
| - Variable *T = nullptr;
|
| -
|
| - if (Condition == InstFcmp::True) {
|
| - // makeVectorOfOnes() requires an integer vector type.
|
| - T = makeVectorOfMinusOnes(IceType_v4i32);
|
| - } else if (Condition == InstFcmp::False) {
|
| - T = makeVectorOfZeros(Dest->getType());
|
| - } else {
|
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - if (llvm::isa<OperandX8632Mem>(Src1RM))
|
| - Src1RM = legalizeToVar(Src1RM);
|
| -
|
| - switch (Condition) {
|
| - default: {
|
| - CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
|
| - assert(Predicate != CondX86::Cmpps_Invalid);
|
| - T = makeReg(Src0RM->getType());
|
| - _movp(T, Src0RM);
|
| - _cmpps(T, Src1RM, Predicate);
|
| - } break;
|
| - case InstFcmp::One: {
|
| - // Check both unequal and ordered.
|
| - T = makeReg(Src0RM->getType());
|
| - Variable *T2 = makeReg(Src0RM->getType());
|
| - _movp(T, Src0RM);
|
| - _cmpps(T, Src1RM, CondX86::Cmpps_neq);
|
| - _movp(T2, Src0RM);
|
| - _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
|
| - _pand(T, T2);
|
| - } break;
|
| - case InstFcmp::Ueq: {
|
| - // Check both equal or unordered.
|
| - T = makeReg(Src0RM->getType());
|
| - Variable *T2 = makeReg(Src0RM->getType());
|
| - _movp(T, Src0RM);
|
| - _cmpps(T, Src1RM, CondX86::Cmpps_eq);
|
| - _movp(T2, Src0RM);
|
| - _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
|
| - _por(T, T2);
|
| - } break;
|
| - }
|
| - }
|
| -
|
| - _movp(Dest, T);
|
| - eliminateNextVectorSextInstruction(Dest);
|
| - return;
|
| - }
|
| -
|
| - // Lowering a = fcmp cond, b, c
|
| - // ucomiss b, c /* only if C1 != Br_None */
|
| - // /* but swap b,c order if SwapOperands==true */
|
| - // mov a, <default>
|
| - // j<C1> label /* only if C1 != Br_None */
|
| - // j<C2> label /* only if C2 != Br_None */
|
| - // FakeUse(a) /* only if C1 != Br_None */
|
| - // mov a, !<default> /* only if C1 != Br_None */
|
| - // label: /* only if C1 != Br_None */
|
| - //
|
| - // setcc lowering when C1 != Br_None && C2 == Br_None:
|
| - // ucomiss b, c /* but swap b,c order if SwapOperands==true */
|
| - // setcc a, C1
|
| - InstFcmp::FCond Condition = Inst->getCondition();
|
| - size_t Index = static_cast<size_t>(Condition);
|
| - assert(Index < TableFcmpSize);
|
| - if (TableFcmp[Index].SwapScalarOperands)
|
| - std::swap(Src0, Src1);
|
| - bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
|
| - bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
|
| - if (HasC1) {
|
| - Src0 = legalize(Src0);
|
| - Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
|
| - Variable *T = nullptr;
|
| - _mov(T, Src0);
|
| - _ucomiss(T, Src1RM);
|
| - if (!HasC2) {
|
| - assert(TableFcmp[Index].Default);
|
| - _setcc(Dest, TableFcmp[Index].C1);
|
| - return;
|
| - }
|
| - }
|
| - Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
|
| - _mov(Dest, Default);
|
| - if (HasC1) {
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - _br(TableFcmp[Index].C1, Label);
|
| - if (HasC2) {
|
| - _br(TableFcmp[Index].C2, Label);
|
| - }
|
| - Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
|
| - _mov_nonkillable(Dest, NonDefault);
|
| - Context.insert(Label);
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
|
| - Operand *Src0 = legalize(Inst->getSrc(0));
|
| - Operand *Src1 = legalize(Inst->getSrc(1));
|
| - Variable *Dest = Inst->getDest();
|
| -
|
| - if (isVectorType(Dest->getType())) {
|
| - Type Ty = Src0->getType();
|
| - // Promote i1 vectors to 128 bit integer vector types.
|
| - if (typeElementType(Ty) == IceType_i1) {
|
| - Type NewTy = IceType_NUM;
|
| - switch (Ty) {
|
| - default:
|
| - llvm_unreachable("unexpected type");
|
| - break;
|
| - case IceType_v4i1:
|
| - NewTy = IceType_v4i32;
|
| - break;
|
| - case IceType_v8i1:
|
| - NewTy = IceType_v8i16;
|
| - break;
|
| - case IceType_v16i1:
|
| - NewTy = IceType_v16i8;
|
| - break;
|
| - }
|
| - Variable *NewSrc0 = Func->makeVariable(NewTy);
|
| - Variable *NewSrc1 = Func->makeVariable(NewTy);
|
| - lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
|
| - lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
|
| - Src0 = NewSrc0;
|
| - Src1 = NewSrc1;
|
| - Ty = NewTy;
|
| - }
|
| -
|
| - InstIcmp::ICond Condition = Inst->getCondition();
|
| -
|
| - Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
|
| -
|
| - // SSE2 only has signed comparison operations. Transform unsigned
|
| - // inputs in a manner that allows for the use of signed comparison
|
| - // operations by flipping the high order bits.
|
| - if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
|
| - Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
|
| - Variable *T0 = makeReg(Ty);
|
| - Variable *T1 = makeReg(Ty);
|
| - Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
|
| - _movp(T0, Src0RM);
|
| - _pxor(T0, HighOrderBits);
|
| - _movp(T1, Src1RM);
|
| - _pxor(T1, HighOrderBits);
|
| - Src0RM = T0;
|
| - Src1RM = T1;
|
| - }
|
| -
|
| - Variable *T = makeReg(Ty);
|
| - switch (Condition) {
|
| - default:
|
| - llvm_unreachable("unexpected condition");
|
| - break;
|
| - case InstIcmp::Eq: {
|
| - if (llvm::isa<OperandX8632Mem>(Src1RM))
|
| - Src1RM = legalizeToVar(Src1RM);
|
| - _movp(T, Src0RM);
|
| - _pcmpeq(T, Src1RM);
|
| - } break;
|
| - case InstIcmp::Ne: {
|
| - if (llvm::isa<OperandX8632Mem>(Src1RM))
|
| - Src1RM = legalizeToVar(Src1RM);
|
| - _movp(T, Src0RM);
|
| - _pcmpeq(T, Src1RM);
|
| - Variable *MinusOne = makeVectorOfMinusOnes(Ty);
|
| - _pxor(T, MinusOne);
|
| - } break;
|
| - case InstIcmp::Ugt:
|
| - case InstIcmp::Sgt: {
|
| - if (llvm::isa<OperandX8632Mem>(Src1RM))
|
| - Src1RM = legalizeToVar(Src1RM);
|
| - _movp(T, Src0RM);
|
| - _pcmpgt(T, Src1RM);
|
| - } break;
|
| - case InstIcmp::Uge:
|
| - case InstIcmp::Sge: {
|
| - // !(Src1RM > Src0RM)
|
| - if (llvm::isa<OperandX8632Mem>(Src0RM))
|
| - Src0RM = legalizeToVar(Src0RM);
|
| - _movp(T, Src1RM);
|
| - _pcmpgt(T, Src0RM);
|
| - Variable *MinusOne = makeVectorOfMinusOnes(Ty);
|
| - _pxor(T, MinusOne);
|
| - } break;
|
| - case InstIcmp::Ult:
|
| - case InstIcmp::Slt: {
|
| - if (llvm::isa<OperandX8632Mem>(Src0RM))
|
| - Src0RM = legalizeToVar(Src0RM);
|
| - _movp(T, Src1RM);
|
| - _pcmpgt(T, Src0RM);
|
| - } break;
|
| - case InstIcmp::Ule:
|
| - case InstIcmp::Sle: {
|
| - // !(Src0RM > Src1RM)
|
| - if (llvm::isa<OperandX8632Mem>(Src1RM))
|
| - Src1RM = legalizeToVar(Src1RM);
|
| - _movp(T, Src0RM);
|
| - _pcmpgt(T, Src1RM);
|
| - Variable *MinusOne = makeVectorOfMinusOnes(Ty);
|
| - _pxor(T, MinusOne);
|
| - } break;
|
| - }
|
| -
|
| - _movp(Dest, T);
|
| - eliminateNextVectorSextInstruction(Dest);
|
| - return;
|
| - }
|
| -
|
| - // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
|
| - if (Src0->getType() == IceType_i64) {
|
| - InstIcmp::ICond Condition = Inst->getCondition();
|
| - size_t Index = static_cast<size_t>(Condition);
|
| - assert(Index < TableIcmp64Size);
|
| - Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
|
| - Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
|
| - Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
|
| - Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - Constant *One = Ctx->getConstantInt32(1);
|
| - InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
|
| - InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
|
| - _mov(Dest, One);
|
| - _cmp(Src0HiRM, Src1HiRI);
|
| - if (TableIcmp64[Index].C1 != CondX86::Br_None)
|
| - _br(TableIcmp64[Index].C1, LabelTrue);
|
| - if (TableIcmp64[Index].C2 != CondX86::Br_None)
|
| - _br(TableIcmp64[Index].C2, LabelFalse);
|
| - _cmp(Src0LoRM, Src1LoRI);
|
| - _br(TableIcmp64[Index].C3, LabelTrue);
|
| - Context.insert(LabelFalse);
|
| - _mov_nonkillable(Dest, Zero);
|
| - Context.insert(LabelTrue);
|
| - return;
|
| - }
|
| -
|
| - // cmp b, c
|
| - Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
|
| - _cmp(Src0RM, Src1);
|
| - _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
|
| -}
|
| -
|
| -void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
|
| - Operand *SourceVectNotLegalized = Inst->getSrc(0);
|
| - Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
|
| - ConstantInteger32 *ElementIndex =
|
| - llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
|
| - // Only constant indices are allowed in PNaCl IR.
|
| - assert(ElementIndex);
|
| - unsigned Index = ElementIndex->getValue();
|
| - assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
|
| -
|
| - Type Ty = SourceVectNotLegalized->getType();
|
| - Type ElementTy = typeElementType(Ty);
|
| - Type InVectorElementTy = getInVectorElementType(Ty);
|
| -
|
| - if (ElementTy == IceType_i1) {
|
| - // Expand the element to the appropriate size for it to be inserted
|
| - // in the vector.
|
| - Variable *Expanded = Func->makeVariable(InVectorElementTy);
|
| - InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
|
| - ElementToInsertNotLegalized);
|
| - lowerCast(Cast);
|
| - ElementToInsertNotLegalized = Expanded;
|
| - }
|
| -
|
| - if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
|
| - // Use insertps, pinsrb, pinsrw, or pinsrd.
|
| - Operand *ElementRM =
|
| - legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
|
| - Operand *SourceVectRM =
|
| - legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
|
| - Variable *T = makeReg(Ty);
|
| - _movp(T, SourceVectRM);
|
| - if (Ty == IceType_v4f32)
|
| - _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
|
| - else
|
| - _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
|
| - _movp(Inst->getDest(), T);
|
| - } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
|
| - // Use shufps or movss.
|
| - Variable *ElementR = nullptr;
|
| - Operand *SourceVectRM =
|
| - legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
|
| -
|
| - if (InVectorElementTy == IceType_f32) {
|
| - // ElementR will be in an XMM register since it is floating point.
|
| - ElementR = legalizeToVar(ElementToInsertNotLegalized);
|
| - } else {
|
| - // Copy an integer to an XMM register.
|
| - Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
|
| - ElementR = makeReg(Ty);
|
| - _movd(ElementR, T);
|
| - }
|
| -
|
| - if (Index == 0) {
|
| - Variable *T = makeReg(Ty);
|
| - _movp(T, SourceVectRM);
|
| - _movss(T, ElementR);
|
| - _movp(Inst->getDest(), T);
|
| - return;
|
| - }
|
| -
|
| - // shufps treats the source and desination operands as vectors of
|
| - // four doublewords. The destination's two high doublewords are
|
| - // selected from the source operand and the two low doublewords are
|
| - // selected from the (original value of) the destination operand.
|
| - // An insertelement operation can be effected with a sequence of two
|
| - // shufps operations with appropriate masks. In all cases below,
|
| - // Element[0] is being inserted into SourceVectOperand. Indices are
|
| - // ordered from left to right.
|
| - //
|
| - // insertelement into index 1 (result is stored in ElementR):
|
| - // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
|
| - // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
|
| - //
|
| - // insertelement into index 2 (result is stored in T):
|
| - // T := SourceVectRM
|
| - // ElementR := ElementR[0, 0] T[0, 3]
|
| - // T := T[0, 1] ElementR[0, 3]
|
| - //
|
| - // insertelement into index 3 (result is stored in T):
|
| - // T := SourceVectRM
|
| - // ElementR := ElementR[0, 0] T[0, 2]
|
| - // T := T[0, 1] ElementR[3, 0]
|
| - const unsigned char Mask1[3] = {0, 192, 128};
|
| - const unsigned char Mask2[3] = {227, 196, 52};
|
| -
|
| - Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
|
| - Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
|
| -
|
| - if (Index == 1) {
|
| - _shufps(ElementR, SourceVectRM, Mask1Constant);
|
| - _shufps(ElementR, SourceVectRM, Mask2Constant);
|
| - _movp(Inst->getDest(), ElementR);
|
| - } else {
|
| - Variable *T = makeReg(Ty);
|
| - _movp(T, SourceVectRM);
|
| - _shufps(ElementR, T, Mask1Constant);
|
| - _shufps(T, ElementR, Mask2Constant);
|
| - _movp(Inst->getDest(), T);
|
| - }
|
| - } else {
|
| - assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
|
| - // Spill the value to a stack slot and perform the insertion in
|
| - // memory.
|
| - //
|
| - // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
|
| - // support for legalizing to mem is implemented.
|
| - Variable *Slot = Func->makeVariable(Ty);
|
| - Slot->setWeight(RegWeight::Zero);
|
| - _movp(Slot, legalizeToVar(SourceVectNotLegalized));
|
| -
|
| - // Compute the location of the position to insert in memory.
|
| - unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
|
| - OperandX8632Mem *Loc =
|
| - getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
|
| - _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
|
| -
|
| - Variable *T = makeReg(Ty);
|
| - _movp(T, Slot);
|
| - _movp(Inst->getDest(), T);
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
|
| - switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
|
| - case Intrinsics::AtomicCmpxchg: {
|
| - if (!Intrinsics::isMemoryOrderValid(
|
| - ID, getConstantMemoryOrder(Instr->getArg(3)),
|
| - getConstantMemoryOrder(Instr->getArg(4)))) {
|
| - Func->setError("Unexpected memory ordering for AtomicCmpxchg");
|
| - return;
|
| - }
|
| - Variable *DestPrev = Instr->getDest();
|
| - Operand *PtrToMem = Instr->getArg(0);
|
| - Operand *Expected = Instr->getArg(1);
|
| - Operand *Desired = Instr->getArg(2);
|
| - if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
|
| - return;
|
| - lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
|
| - return;
|
| - }
|
| - case Intrinsics::AtomicFence:
|
| - if (!Intrinsics::isMemoryOrderValid(
|
| - ID, getConstantMemoryOrder(Instr->getArg(0)))) {
|
| - Func->setError("Unexpected memory ordering for AtomicFence");
|
| - return;
|
| - }
|
| - _mfence();
|
| - return;
|
| - case Intrinsics::AtomicFenceAll:
|
| - // NOTE: FenceAll should prevent and load/store from being moved
|
| - // across the fence (both atomic and non-atomic). The InstX8632Mfence
|
| - // instruction is currently marked coarsely as "HasSideEffects".
|
| - _mfence();
|
| - return;
|
| - case Intrinsics::AtomicIsLockFree: {
|
| - // X86 is always lock free for 8/16/32/64 bit accesses.
|
| - // TODO(jvoung): Since the result is constant when given a constant
|
| - // byte size, this opens up DCE opportunities.
|
| - Operand *ByteSize = Instr->getArg(0);
|
| - Variable *Dest = Instr->getDest();
|
| - if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
|
| - Constant *Result;
|
| - switch (CI->getValue()) {
|
| - default:
|
| - // Some x86-64 processors support the cmpxchg16b intruction, which
|
| - // can make 16-byte operations lock free (when used with the LOCK
|
| - // prefix). However, that's not supported in 32-bit mode, so just
|
| - // return 0 even for large sizes.
|
| - Result = Ctx->getConstantZero(IceType_i32);
|
| - break;
|
| - case 1:
|
| - case 2:
|
| - case 4:
|
| - case 8:
|
| - Result = Ctx->getConstantInt32(1);
|
| - break;
|
| - }
|
| - _mov(Dest, Result);
|
| - return;
|
| - }
|
| - // The PNaCl ABI requires the byte size to be a compile-time constant.
|
| - Func->setError("AtomicIsLockFree byte size should be compile-time const");
|
| - return;
|
| - }
|
| - case Intrinsics::AtomicLoad: {
|
| - // We require the memory address to be naturally aligned.
|
| - // Given that is the case, then normal loads are atomic.
|
| - if (!Intrinsics::isMemoryOrderValid(
|
| - ID, getConstantMemoryOrder(Instr->getArg(1)))) {
|
| - Func->setError("Unexpected memory ordering for AtomicLoad");
|
| - return;
|
| - }
|
| - Variable *Dest = Instr->getDest();
|
| - if (Dest->getType() == IceType_i64) {
|
| - // Follow what GCC does and use a movq instead of what lowerLoad()
|
| - // normally does (split the load into two).
|
| - // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
|
| - // can't happen anyway, since this is x86-32 and integer arithmetic only
|
| - // happens on 32-bit quantities.
|
| - Variable *T = makeReg(IceType_f64);
|
| - OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
|
| - _movq(T, Addr);
|
| - // Then cast the bits back out of the XMM register to the i64 Dest.
|
| - InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
|
| - lowerCast(Cast);
|
| - // Make sure that the atomic load isn't elided when unused.
|
| - Context.insert(InstFakeUse::create(Func, Dest->getLo()));
|
| - Context.insert(InstFakeUse::create(Func, Dest->getHi()));
|
| - return;
|
| - }
|
| - InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
|
| - lowerLoad(Load);
|
| - // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
|
| - // Since lowerLoad may fuse the load w/ an arithmetic instruction,
|
| - // insert the FakeUse on the last-inserted instruction's dest.
|
| - Context.insert(
|
| - InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
|
| - return;
|
| - }
|
| - case Intrinsics::AtomicRMW:
|
| - if (!Intrinsics::isMemoryOrderValid(
|
| - ID, getConstantMemoryOrder(Instr->getArg(3)))) {
|
| - Func->setError("Unexpected memory ordering for AtomicRMW");
|
| - return;
|
| - }
|
| - lowerAtomicRMW(
|
| - Instr->getDest(),
|
| - static_cast<uint32_t>(
|
| - llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
|
| - Instr->getArg(1), Instr->getArg(2));
|
| - return;
|
| - case Intrinsics::AtomicStore: {
|
| - if (!Intrinsics::isMemoryOrderValid(
|
| - ID, getConstantMemoryOrder(Instr->getArg(2)))) {
|
| - Func->setError("Unexpected memory ordering for AtomicStore");
|
| - return;
|
| - }
|
| - // We require the memory address to be naturally aligned.
|
| - // Given that is the case, then normal stores are atomic.
|
| - // Add a fence after the store to make it visible.
|
| - Operand *Value = Instr->getArg(0);
|
| - Operand *Ptr = Instr->getArg(1);
|
| - if (Value->getType() == IceType_i64) {
|
| - // Use a movq instead of what lowerStore() normally does
|
| - // (split the store into two), following what GCC does.
|
| - // Cast the bits from int -> to an xmm register first.
|
| - Variable *T = makeReg(IceType_f64);
|
| - InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
|
| - lowerCast(Cast);
|
| - // Then store XMM w/ a movq.
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64);
|
| - _storeq(T, Addr);
|
| - _mfence();
|
| - return;
|
| - }
|
| - InstStore *Store = InstStore::create(Func, Value, Ptr);
|
| - lowerStore(Store);
|
| - _mfence();
|
| - return;
|
| - }
|
| - case Intrinsics::Bswap: {
|
| - Variable *Dest = Instr->getDest();
|
| - Operand *Val = Instr->getArg(0);
|
| - // In 32-bit mode, bswap only works on 32-bit arguments, and the
|
| - // argument must be a register. Use rotate left for 16-bit bswap.
|
| - if (Val->getType() == IceType_i64) {
|
| - Variable *T_Lo = legalizeToVar(loOperand(Val));
|
| - Variable *T_Hi = legalizeToVar(hiOperand(Val));
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - _bswap(T_Lo);
|
| - _bswap(T_Hi);
|
| - _mov(DestLo, T_Hi);
|
| - _mov(DestHi, T_Lo);
|
| - } else if (Val->getType() == IceType_i32) {
|
| - Variable *T = legalizeToVar(Val);
|
| - _bswap(T);
|
| - _mov(Dest, T);
|
| - } else {
|
| - assert(Val->getType() == IceType_i16);
|
| - Val = legalize(Val);
|
| - Constant *Eight = Ctx->getConstantInt16(8);
|
| - Variable *T = nullptr;
|
| - _mov(T, Val);
|
| - _rol(T, Eight);
|
| - _mov(Dest, T);
|
| - }
|
| - return;
|
| - }
|
| - case Intrinsics::Ctpop: {
|
| - Variable *Dest = Instr->getDest();
|
| - Operand *Val = Instr->getArg(0);
|
| - InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
|
| - ? H_call_ctpop_i32
|
| - : H_call_ctpop_i64,
|
| - Dest, 1);
|
| - Call->addArg(Val);
|
| - lowerCall(Call);
|
| - // The popcount helpers always return 32-bit values, while the intrinsic's
|
| - // signature matches the native POPCNT instruction and fills a 64-bit reg
|
| - // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
|
| - // the user doesn't do that in the IR. If the user does that in the IR,
|
| - // then this zero'ing instruction is dead and gets optimized out.
|
| - if (Val->getType() == IceType_i64) {
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - _mov(DestHi, Zero);
|
| - }
|
| - return;
|
| - }
|
| - case Intrinsics::Ctlz: {
|
| - // The "is zero undef" parameter is ignored and we always return
|
| - // a well-defined value.
|
| - Operand *Val = legalize(Instr->getArg(0));
|
| - Operand *FirstVal;
|
| - Operand *SecondVal = nullptr;
|
| - if (Val->getType() == IceType_i64) {
|
| - FirstVal = loOperand(Val);
|
| - SecondVal = hiOperand(Val);
|
| - } else {
|
| - FirstVal = Val;
|
| - }
|
| - const bool IsCttz = false;
|
| - lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
|
| - SecondVal);
|
| - return;
|
| - }
|
| - case Intrinsics::Cttz: {
|
| - // The "is zero undef" parameter is ignored and we always return
|
| - // a well-defined value.
|
| - Operand *Val = legalize(Instr->getArg(0));
|
| - Operand *FirstVal;
|
| - Operand *SecondVal = nullptr;
|
| - if (Val->getType() == IceType_i64) {
|
| - FirstVal = hiOperand(Val);
|
| - SecondVal = loOperand(Val);
|
| - } else {
|
| - FirstVal = Val;
|
| - }
|
| - const bool IsCttz = true;
|
| - lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
|
| - SecondVal);
|
| - return;
|
| - }
|
| - case Intrinsics::Fabs: {
|
| - Operand *Src = legalize(Instr->getArg(0));
|
| - Type Ty = Src->getType();
|
| - Variable *Dest = Instr->getDest();
|
| - Variable *T = makeVectorOfFabsMask(Ty);
|
| - // The pand instruction operates on an m128 memory operand, so if
|
| - // Src is an f32 or f64, we need to make sure it's in a register.
|
| - if (isVectorType(Ty)) {
|
| - if (llvm::isa<OperandX8632Mem>(Src))
|
| - Src = legalizeToVar(Src);
|
| - } else {
|
| - Src = legalizeToVar(Src);
|
| - }
|
| - _pand(T, Src);
|
| - if (isVectorType(Ty))
|
| - _movp(Dest, T);
|
| - else
|
| - _mov(Dest, T);
|
| - return;
|
| - }
|
| - case Intrinsics::Longjmp: {
|
| - InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
|
| - Call->addArg(Instr->getArg(0));
|
| - Call->addArg(Instr->getArg(1));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case Intrinsics::Memcpy: {
|
| - // In the future, we could potentially emit an inline memcpy/memset, etc.
|
| - // for intrinsic calls w/ a known length.
|
| - InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
|
| - Call->addArg(Instr->getArg(0));
|
| - Call->addArg(Instr->getArg(1));
|
| - Call->addArg(Instr->getArg(2));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case Intrinsics::Memmove: {
|
| - InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
|
| - Call->addArg(Instr->getArg(0));
|
| - Call->addArg(Instr->getArg(1));
|
| - Call->addArg(Instr->getArg(2));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case Intrinsics::Memset: {
|
| - // The value operand needs to be extended to a stack slot size
|
| - // because the PNaCl ABI requires arguments to be at least 32 bits
|
| - // wide.
|
| - Operand *ValOp = Instr->getArg(1);
|
| - assert(ValOp->getType() == IceType_i8);
|
| - Variable *ValExt = Func->makeVariable(stackSlotType());
|
| - lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
|
| - InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
|
| - Call->addArg(Instr->getArg(0));
|
| - Call->addArg(ValExt);
|
| - Call->addArg(Instr->getArg(2));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case Intrinsics::NaClReadTP: {
|
| - if (Ctx->getFlags().getUseSandboxing()) {
|
| - Constant *Zero = Ctx->getConstantZero(IceType_i32);
|
| - Operand *Src =
|
| - OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,
|
| - OperandX8632Mem::SegReg_GS);
|
| - Variable *Dest = Instr->getDest();
|
| - Variable *T = nullptr;
|
| - _mov(T, Src);
|
| - _mov(Dest, T);
|
| - } else {
|
| - InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
|
| - lowerCall(Call);
|
| - }
|
| - return;
|
| - }
|
| - case Intrinsics::Setjmp: {
|
| - InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
|
| - Call->addArg(Instr->getArg(0));
|
| - lowerCall(Call);
|
| - return;
|
| - }
|
| - case Intrinsics::Sqrt: {
|
| - Operand *Src = legalize(Instr->getArg(0));
|
| - Variable *Dest = Instr->getDest();
|
| - Variable *T = makeReg(Dest->getType());
|
| - _sqrtss(T, Src);
|
| - _mov(Dest, T);
|
| - return;
|
| - }
|
| - case Intrinsics::Stacksave: {
|
| - Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
|
| - Variable *Dest = Instr->getDest();
|
| - _mov(Dest, esp);
|
| - return;
|
| - }
|
| - case Intrinsics::Stackrestore: {
|
| - Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
|
| - _mov_nonkillable(esp, Instr->getArg(0));
|
| - return;
|
| - }
|
| - case Intrinsics::Trap:
|
| - _ud2();
|
| - return;
|
| - case Intrinsics::UnknownIntrinsic:
|
| - Func->setError("Should not be lowering UnknownIntrinsic");
|
| - return;
|
| - }
|
| - return;
|
| -}
|
| -
|
| -void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
|
| - Operand *Expected, Operand *Desired) {
|
| - if (Expected->getType() == IceType_i64) {
|
| - // Reserve the pre-colored registers first, before adding any more
|
| - // infinite-weight variables from formMemoryOperand's legalization.
|
| - Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
|
| - Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
|
| - Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
|
| - _mov(T_eax, loOperand(Expected));
|
| - _mov(T_edx, hiOperand(Expected));
|
| - _mov(T_ebx, loOperand(Desired));
|
| - _mov(T_ecx, hiOperand(Desired));
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
|
| - const bool Locked = true;
|
| - _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
|
| - _mov(DestLo, T_eax);
|
| - _mov(DestHi, T_edx);
|
| - return;
|
| - }
|
| - Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
|
| - _mov(T_eax, Expected);
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
|
| - Variable *DesiredReg = legalizeToVar(Desired);
|
| - const bool Locked = true;
|
| - _cmpxchg(Addr, T_eax, DesiredReg, Locked);
|
| - _mov(DestPrev, T_eax);
|
| -}
|
| -
|
| -bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
|
| - Operand *Expected,
|
| - Operand *Desired) {
|
| - if (Ctx->getFlags().getOptLevel() == Opt_m1)
|
| - return false;
|
| - // Peek ahead a few instructions and see how Dest is used.
|
| - // It's very common to have:
|
| - //
|
| - // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
|
| - // [%y_phi = ...] // list of phi stores
|
| - // %p = icmp eq i32 %x, %expected
|
| - // br i1 %p, label %l1, label %l2
|
| - //
|
| - // which we can optimize into:
|
| - //
|
| - // %x = <cmpxchg code>
|
| - // [%y_phi = ...] // list of phi stores
|
| - // br eq, %l1, %l2
|
| - InstList::iterator I = Context.getCur();
|
| - // I is currently the InstIntrinsicCall. Peek past that.
|
| - // This assumes that the atomic cmpxchg has not been lowered yet,
|
| - // so that the instructions seen in the scan from "Cur" is simple.
|
| - assert(llvm::isa<InstIntrinsicCall>(*I));
|
| - Inst *NextInst = Context.getNextInst(I);
|
| - if (!NextInst)
|
| - return false;
|
| - // There might be phi assignments right before the compare+branch, since this
|
| - // could be a backward branch for a loop. This placement of assignments is
|
| - // determined by placePhiStores().
|
| - std::vector<InstAssign *> PhiAssigns;
|
| - while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
|
| - if (PhiAssign->getDest() == Dest)
|
| - return false;
|
| - PhiAssigns.push_back(PhiAssign);
|
| - NextInst = Context.getNextInst(I);
|
| - if (!NextInst)
|
| - return false;
|
| - }
|
| - if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
|
| - if (!(NextCmp->getCondition() == InstIcmp::Eq &&
|
| - ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
|
| - (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
|
| - return false;
|
| - }
|
| - NextInst = Context.getNextInst(I);
|
| - if (!NextInst)
|
| - return false;
|
| - if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
|
| - if (!NextBr->isUnconditional() &&
|
| - NextCmp->getDest() == NextBr->getCondition() &&
|
| - NextBr->isLastUse(NextCmp->getDest())) {
|
| - lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
|
| - for (size_t i = 0; i < PhiAssigns.size(); ++i) {
|
| - // Lower the phi assignments now, before the branch (same placement
|
| - // as before).
|
| - InstAssign *PhiAssign = PhiAssigns[i];
|
| - PhiAssign->setDeleted();
|
| - lowerAssign(PhiAssign);
|
| - Context.advanceNext();
|
| - }
|
| - _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
|
| - // Skip over the old compare and branch, by deleting them.
|
| - NextCmp->setDeleted();
|
| - NextBr->setDeleted();
|
| - Context.advanceNext();
|
| - Context.advanceNext();
|
| - return true;
|
| - }
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
|
| - Operand *Ptr, Operand *Val) {
|
| - bool NeedsCmpxchg = false;
|
| - LowerBinOp Op_Lo = nullptr;
|
| - LowerBinOp Op_Hi = nullptr;
|
| - switch (Operation) {
|
| - default:
|
| - Func->setError("Unknown AtomicRMW operation");
|
| - return;
|
| - case Intrinsics::AtomicAdd: {
|
| - if (Dest->getType() == IceType_i64) {
|
| - // All the fall-through paths must set this to true, but use this
|
| - // for asserting.
|
| - NeedsCmpxchg = true;
|
| - Op_Lo = &TargetX8632::_add;
|
| - Op_Hi = &TargetX8632::_adc;
|
| - break;
|
| - }
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
|
| - const bool Locked = true;
|
| - Variable *T = nullptr;
|
| - _mov(T, Val);
|
| - _xadd(Addr, T, Locked);
|
| - _mov(Dest, T);
|
| - return;
|
| - }
|
| - case Intrinsics::AtomicSub: {
|
| - if (Dest->getType() == IceType_i64) {
|
| - NeedsCmpxchg = true;
|
| - Op_Lo = &TargetX8632::_sub;
|
| - Op_Hi = &TargetX8632::_sbb;
|
| - break;
|
| - }
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
|
| - const bool Locked = true;
|
| - Variable *T = nullptr;
|
| - _mov(T, Val);
|
| - _neg(T);
|
| - _xadd(Addr, T, Locked);
|
| - _mov(Dest, T);
|
| - return;
|
| - }
|
| - case Intrinsics::AtomicOr:
|
| - // TODO(jvoung): If Dest is null or dead, then some of these
|
| - // operations do not need an "exchange", but just a locked op.
|
| - // That appears to be "worth" it for sub, or, and, and xor.
|
| - // xadd is probably fine vs lock add for add, and xchg is fine
|
| - // vs an atomic store.
|
| - NeedsCmpxchg = true;
|
| - Op_Lo = &TargetX8632::_or;
|
| - Op_Hi = &TargetX8632::_or;
|
| - break;
|
| - case Intrinsics::AtomicAnd:
|
| - NeedsCmpxchg = true;
|
| - Op_Lo = &TargetX8632::_and;
|
| - Op_Hi = &TargetX8632::_and;
|
| - break;
|
| - case Intrinsics::AtomicXor:
|
| - NeedsCmpxchg = true;
|
| - Op_Lo = &TargetX8632::_xor;
|
| - Op_Hi = &TargetX8632::_xor;
|
| - break;
|
| - case Intrinsics::AtomicExchange:
|
| - if (Dest->getType() == IceType_i64) {
|
| - NeedsCmpxchg = true;
|
| - // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
|
| - // just need to be moved to the ecx and ebx registers.
|
| - Op_Lo = nullptr;
|
| - Op_Hi = nullptr;
|
| - break;
|
| - }
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
|
| - Variable *T = nullptr;
|
| - _mov(T, Val);
|
| - _xchg(Addr, T);
|
| - _mov(Dest, T);
|
| - return;
|
| - }
|
| - // Otherwise, we need a cmpxchg loop.
|
| - (void)NeedsCmpxchg;
|
| - assert(NeedsCmpxchg);
|
| - expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
|
| -}
|
| -
|
| -void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
|
| - Variable *Dest, Operand *Ptr,
|
| - Operand *Val) {
|
| - // Expand a more complex RMW operation as a cmpxchg loop:
|
| - // For 64-bit:
|
| - // mov eax, [ptr]
|
| - // mov edx, [ptr + 4]
|
| - // .LABEL:
|
| - // mov ebx, eax
|
| - // <Op_Lo> ebx, <desired_adj_lo>
|
| - // mov ecx, edx
|
| - // <Op_Hi> ecx, <desired_adj_hi>
|
| - // lock cmpxchg8b [ptr]
|
| - // jne .LABEL
|
| - // mov <dest_lo>, eax
|
| - // mov <dest_lo>, edx
|
| - //
|
| - // For 32-bit:
|
| - // mov eax, [ptr]
|
| - // .LABEL:
|
| - // mov <reg>, eax
|
| - // op <reg>, [desired_adj]
|
| - // lock cmpxchg [ptr], <reg>
|
| - // jne .LABEL
|
| - // mov <dest>, eax
|
| - //
|
| - // If Op_{Lo,Hi} are nullptr, then just copy the value.
|
| - Val = legalize(Val);
|
| - Type Ty = Val->getType();
|
| - if (Ty == IceType_i64) {
|
| - Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
|
| - Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
|
| - _mov(T_eax, loOperand(Addr));
|
| - _mov(T_edx, hiOperand(Addr));
|
| - Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
|
| - Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
|
| - if (!IsXchg8b) {
|
| - Context.insert(Label);
|
| - _mov(T_ebx, T_eax);
|
| - (this->*Op_Lo)(T_ebx, loOperand(Val));
|
| - _mov(T_ecx, T_edx);
|
| - (this->*Op_Hi)(T_ecx, hiOperand(Val));
|
| - } else {
|
| - // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
|
| - // It just needs the Val loaded into ebx and ecx.
|
| - // That can also be done before the loop.
|
| - _mov(T_ebx, loOperand(Val));
|
| - _mov(T_ecx, hiOperand(Val));
|
| - Context.insert(Label);
|
| - }
|
| - const bool Locked = true;
|
| - _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
|
| - _br(CondX86::Br_ne, Label);
|
| - if (!IsXchg8b) {
|
| - // If Val is a variable, model the extended live range of Val through
|
| - // the end of the loop, since it will be re-used by the loop.
|
| - if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
|
| - Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
|
| - Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
|
| - Context.insert(InstFakeUse::create(Func, ValLo));
|
| - Context.insert(InstFakeUse::create(Func, ValHi));
|
| - }
|
| - } else {
|
| - // For xchg, the loop is slightly smaller and ebx/ecx are used.
|
| - Context.insert(InstFakeUse::create(Func, T_ebx));
|
| - Context.insert(InstFakeUse::create(Func, T_ecx));
|
| - }
|
| - // The address base (if any) is also reused in the loop.
|
| - if (Variable *Base = Addr->getBase())
|
| - Context.insert(InstFakeUse::create(Func, Base));
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - _mov(DestLo, T_eax);
|
| - _mov(DestHi, T_edx);
|
| - return;
|
| - }
|
| - OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
|
| - Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
|
| - _mov(T_eax, Addr);
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - Context.insert(Label);
|
| - // We want to pick a different register for T than Eax, so don't use
|
| - // _mov(T == nullptr, T_eax).
|
| - Variable *T = makeReg(Ty);
|
| - _mov(T, T_eax);
|
| - (this->*Op_Lo)(T, Val);
|
| - const bool Locked = true;
|
| - _cmpxchg(Addr, T_eax, T, Locked);
|
| - _br(CondX86::Br_ne, Label);
|
| - // If Val is a variable, model the extended live range of Val through
|
| - // the end of the loop, since it will be re-used by the loop.
|
| - if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
|
| - Context.insert(InstFakeUse::create(Func, ValVar));
|
| - }
|
| - // The address base (if any) is also reused in the loop.
|
| - if (Variable *Base = Addr->getBase())
|
| - Context.insert(InstFakeUse::create(Func, Base));
|
| - _mov(Dest, T_eax);
|
| -}
|
| -
|
| -// Lowers count {trailing, leading} zeros intrinsic.
|
| -//
|
| -// We could do constant folding here, but that should have
|
| -// been done by the front-end/middle-end optimizations.
|
| -void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
|
| - Operand *FirstVal, Operand *SecondVal) {
|
| - // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
|
| - // Then the instructions will handle the Val == 0 case much more simply
|
| - // and won't require conversion from bit position to number of zeros.
|
| - //
|
| - // Otherwise:
|
| - // bsr IF_NOT_ZERO, Val
|
| - // mov T_DEST, 63
|
| - // cmovne T_DEST, IF_NOT_ZERO
|
| - // xor T_DEST, 31
|
| - // mov DEST, T_DEST
|
| - //
|
| - // NOTE: T_DEST must be a register because cmov requires its dest to be a
|
| - // register. Also, bsf and bsr require their dest to be a register.
|
| - //
|
| - // The xor DEST, 31 converts a bit position to # of leading zeroes.
|
| - // E.g., for 000... 00001100, bsr will say that the most significant bit
|
| - // set is at position 3, while the number of leading zeros is 28. Xor is
|
| - // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
|
| - //
|
| - // Similar for 64-bit, but start w/ speculating that the upper 32 bits
|
| - // are all zero, and compute the result for that case (checking the lower
|
| - // 32 bits). Then actually compute the result for the upper bits and
|
| - // cmov in the result from the lower computation if the earlier speculation
|
| - // was correct.
|
| - //
|
| - // Cttz, is similar, but uses bsf instead, and doesn't require the xor
|
| - // bit position conversion, and the speculation is reversed.
|
| - assert(Ty == IceType_i32 || Ty == IceType_i64);
|
| - Variable *T = makeReg(IceType_i32);
|
| - Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
|
| - if (Cttz) {
|
| - _bsf(T, FirstValRM);
|
| - } else {
|
| - _bsr(T, FirstValRM);
|
| - }
|
| - Variable *T_Dest = makeReg(IceType_i32);
|
| - Constant *ThirtyTwo = Ctx->getConstantInt32(32);
|
| - Constant *ThirtyOne = Ctx->getConstantInt32(31);
|
| - if (Cttz) {
|
| - _mov(T_Dest, ThirtyTwo);
|
| - } else {
|
| - Constant *SixtyThree = Ctx->getConstantInt32(63);
|
| - _mov(T_Dest, SixtyThree);
|
| - }
|
| - _cmov(T_Dest, T, CondX86::Br_ne);
|
| - if (!Cttz) {
|
| - _xor(T_Dest, ThirtyOne);
|
| - }
|
| - if (Ty == IceType_i32) {
|
| - _mov(Dest, T_Dest);
|
| - return;
|
| - }
|
| - _add(T_Dest, ThirtyTwo);
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - // Will be using "test" on this, so we need a registerized variable.
|
| - Variable *SecondVar = legalizeToVar(SecondVal);
|
| - Variable *T_Dest2 = makeReg(IceType_i32);
|
| - if (Cttz) {
|
| - _bsf(T_Dest2, SecondVar);
|
| - } else {
|
| - _bsr(T_Dest2, SecondVar);
|
| - _xor(T_Dest2, ThirtyOne);
|
| - }
|
| - _test(SecondVar, SecondVar);
|
| - _cmov(T_Dest2, T_Dest, CondX86::Br_e);
|
| - _mov(DestLo, T_Dest2);
|
| - _mov(DestHi, Ctx->getConstantZero(IceType_i32));
|
| -}
|
| -
|
| -namespace {
|
| -
|
| -bool isAdd(const Inst *Inst) {
|
| - if (const InstArithmetic *Arith =
|
| - llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
|
| - return (Arith->getOp() == InstArithmetic::Add);
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -void dumpAddressOpt(const Cfg *Func, const Variable *Base,
|
| - const Variable *Index, uint16_t Shift, int32_t Offset,
|
| - const Inst *Reason) {
|
| - if (!ALLOW_DUMP)
|
| - return;
|
| - if (!Func->isVerbose(IceV_AddrOpt))
|
| - return;
|
| - OstreamLocker L(Func->getContext());
|
| - Ostream &Str = Func->getContext()->getStrDump();
|
| - Str << "Instruction: ";
|
| - Reason->dumpDecorated(Func);
|
| - Str << " results in Base=";
|
| - if (Base)
|
| - Base->dump(Func);
|
| - else
|
| - Str << "<null>";
|
| - Str << ", Index=";
|
| - if (Index)
|
| - Index->dump(Func);
|
| - else
|
| - Str << "<null>";
|
| - Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
|
| -}
|
| -
|
| -bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
|
| - const Inst *&Reason) {
|
| - // Var originates from Var=SrcVar ==>
|
| - // set Var:=SrcVar
|
| - if (Var == nullptr)
|
| - return false;
|
| - if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
|
| - assert(!VMetadata->isMultiDef(Var));
|
| - if (llvm::isa<InstAssign>(VarAssign)) {
|
| - Operand *SrcOp = VarAssign->getSrc(0);
|
| - assert(SrcOp);
|
| - if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
|
| - if (!VMetadata->isMultiDef(SrcVar) &&
|
| - // TODO: ensure SrcVar stays single-BB
|
| - true) {
|
| - Var = SrcVar;
|
| - Reason = VarAssign;
|
| - return true;
|
| - }
|
| - }
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
|
| - Variable *&Index, uint16_t &Shift,
|
| - const Inst *&Reason) {
|
| - // Index==nullptr && Base is Base=Var1+Var2 ==>
|
| - // set Base=Var1, Index=Var2, Shift=0
|
| - if (Base == nullptr)
|
| - return false;
|
| - if (Index != nullptr)
|
| - return false;
|
| - const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
|
| - if (BaseInst == nullptr)
|
| - return false;
|
| - assert(!VMetadata->isMultiDef(Base));
|
| - if (BaseInst->getSrcSize() < 2)
|
| - return false;
|
| - if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
|
| - if (VMetadata->isMultiDef(Var1))
|
| - return false;
|
| - if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
|
| - if (VMetadata->isMultiDef(Var2))
|
| - return false;
|
| - if (isAdd(BaseInst) &&
|
| - // TODO: ensure Var1 and Var2 stay single-BB
|
| - true) {
|
| - Base = Var1;
|
| - Index = Var2;
|
| - Shift = 0; // should already have been 0
|
| - Reason = BaseInst;
|
| - return true;
|
| - }
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
|
| - uint16_t &Shift, const Inst *&Reason) {
|
| - // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
|
| - // Index=Var, Shift+=log2(Const)
|
| - if (Index == nullptr)
|
| - return false;
|
| - const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
|
| - if (IndexInst == nullptr)
|
| - return false;
|
| - assert(!VMetadata->isMultiDef(Index));
|
| - if (IndexInst->getSrcSize() < 2)
|
| - return false;
|
| - if (const InstArithmetic *ArithInst =
|
| - llvm::dyn_cast<InstArithmetic>(IndexInst)) {
|
| - if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
|
| - if (ConstantInteger32 *Const =
|
| - llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
|
| - if (ArithInst->getOp() == InstArithmetic::Mul &&
|
| - !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
|
| - uint64_t Mult = Const->getValue();
|
| - uint32_t LogMult;
|
| - switch (Mult) {
|
| - case 1:
|
| - LogMult = 0;
|
| - break;
|
| - case 2:
|
| - LogMult = 1;
|
| - break;
|
| - case 4:
|
| - LogMult = 2;
|
| - break;
|
| - case 8:
|
| - LogMult = 3;
|
| - break;
|
| - default:
|
| - return false;
|
| - }
|
| - if (Shift + LogMult <= 3) {
|
| - Index = Var;
|
| - Shift += LogMult;
|
| - Reason = IndexInst;
|
| - return true;
|
| - }
|
| - }
|
| - }
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
|
| - int32_t &Offset, const Inst *&Reason) {
|
| - // Base is Base=Var+Const || Base is Base=Const+Var ==>
|
| - // set Base=Var, Offset+=Const
|
| - // Base is Base=Var-Const ==>
|
| - // set Base=Var, Offset-=Const
|
| - if (Base == nullptr)
|
| - return false;
|
| - const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
|
| - if (BaseInst == nullptr)
|
| - return false;
|
| - assert(!VMetadata->isMultiDef(Base));
|
| - if (const InstArithmetic *ArithInst =
|
| - llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
|
| - if (ArithInst->getOp() != InstArithmetic::Add &&
|
| - ArithInst->getOp() != InstArithmetic::Sub)
|
| - return false;
|
| - bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
|
| - Variable *Var = nullptr;
|
| - ConstantInteger32 *Const = nullptr;
|
| - if (Variable *VariableOperand =
|
| - llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
|
| - Var = VariableOperand;
|
| - Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
|
| - } else if (IsAdd) {
|
| - Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
|
| - Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
|
| - }
|
| - if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
|
| - return false;
|
| - int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
|
| - if (Utils::WouldOverflowAdd(Offset, MoreOffset))
|
| - return false;
|
| - Base = Var;
|
| - Offset += MoreOffset;
|
| - Reason = BaseInst;
|
| - return true;
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
|
| - Variable *&Index, uint16_t &Shift, int32_t &Offset) {
|
| - Func->resetCurrentNode();
|
| - if (Func->isVerbose(IceV_AddrOpt)) {
|
| - OstreamLocker L(Func->getContext());
|
| - Ostream &Str = Func->getContext()->getStrDump();
|
| - Str << "\nStarting computeAddressOpt for instruction:\n ";
|
| - Instr->dumpDecorated(Func);
|
| - }
|
| - (void)Offset; // TODO: pattern-match for non-zero offsets.
|
| - if (Base == nullptr)
|
| - return;
|
| - // If the Base has more than one use or is live across multiple
|
| - // blocks, then don't go further. Alternatively (?), never consider
|
| - // a transformation that would change a variable that is currently
|
| - // *not* live across basic block boundaries into one that *is*.
|
| - if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
|
| - return;
|
| -
|
| - const VariablesMetadata *VMetadata = Func->getVMetadata();
|
| - bool Continue = true;
|
| - while (Continue) {
|
| - const Inst *Reason = nullptr;
|
| - if (matchTransitiveAssign(VMetadata, Base, Reason) ||
|
| - matchTransitiveAssign(VMetadata, Index, Reason) ||
|
| - matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
|
| - matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
|
| - matchOffsetBase(VMetadata, Base, Offset, Reason)) {
|
| - dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
|
| - } else {
|
| - Continue = false;
|
| - }
|
| -
|
| - // Index is Index=Var<<Const && Const+Shift<=3 ==>
|
| - // Index=Var, Shift+=Const
|
| -
|
| - // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
|
| - // Index=Var, Shift+=log2(Const)
|
| -
|
| - // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
|
| - // swap(Index,Base)
|
| - // Similar for Base=Const*Var and Base=Var<<Const
|
| -
|
| - // Index is Index=Var+Const ==>
|
| - // set Index=Var, Offset+=(Const<<Shift)
|
| -
|
| - // Index is Index=Const+Var ==>
|
| - // set Index=Var, Offset+=(Const<<Shift)
|
| -
|
| - // Index is Index=Var-Const ==>
|
| - // set Index=Var, Offset-=(Const<<Shift)
|
| -
|
| - // TODO: consider overflow issues with respect to Offset.
|
| - // TODO: handle symbolic constants.
|
| - }
|
| -}
|
| -
|
| -} // anonymous namespace
|
| -
|
| -void TargetX8632::lowerLoad(const InstLoad *Load) {
|
| - // A Load instruction can be treated the same as an Assign
|
| - // instruction, after the source operand is transformed into an
|
| - // OperandX8632Mem operand. Note that the address mode
|
| - // optimization already creates an OperandX8632Mem operand, so it
|
| - // doesn't need another level of transformation.
|
| - Variable *DestLoad = Load->getDest();
|
| - Type Ty = DestLoad->getType();
|
| - Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
|
| - InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
|
| - lowerAssign(Assign);
|
| -}
|
| -
|
| -void TargetX8632::doAddressOptLoad() {
|
| - Inst *Inst = Context.getCur();
|
| - Variable *Dest = Inst->getDest();
|
| - Operand *Addr = Inst->getSrc(0);
|
| - Variable *Index = nullptr;
|
| - uint16_t Shift = 0;
|
| - int32_t Offset = 0; // TODO: make Constant
|
| - // Vanilla ICE load instructions should not use the segment registers,
|
| - // and computeAddressOpt only works at the level of Variables and Constants,
|
| - // not other OperandX8632Mem, so there should be no mention of segment
|
| - // registers there either.
|
| - const OperandX8632Mem::SegmentRegisters SegmentReg =
|
| - OperandX8632Mem::DefaultSegment;
|
| - Variable *Base = llvm::dyn_cast<Variable>(Addr);
|
| - computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
|
| - if (Base && Addr != Base) {
|
| - Inst->setDeleted();
|
| - Constant *OffsetOp = Ctx->getConstantInt32(Offset);
|
| - Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
|
| - Shift, SegmentReg);
|
| - Context.insert(InstLoad::create(Func, Dest, Addr));
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::randomlyInsertNop(float Probability) {
|
| - RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
|
| - if (RNG.getTrueWithProbability(Probability)) {
|
| - _nop(RNG(X86_NUM_NOP_VARIANTS));
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
|
| - Func->setError("Phi found in regular instruction list");
|
| -}
|
| -
|
| -void TargetX8632::lowerRet(const InstRet *Inst) {
|
| - Variable *Reg = nullptr;
|
| - if (Inst->hasRetValue()) {
|
| - Operand *Src0 = legalize(Inst->getRetValue());
|
| - if (Src0->getType() == IceType_i64) {
|
| - Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
|
| - Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
|
| - Reg = eax;
|
| - Context.insert(InstFakeUse::create(Func, edx));
|
| - } else if (isScalarFloatingType(Src0->getType())) {
|
| - _fld(Src0);
|
| - } else if (isVectorType(Src0->getType())) {
|
| - Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
|
| - } else {
|
| - _mov(Reg, Src0, RegX8632::Reg_eax);
|
| - }
|
| - }
|
| - // Add a ret instruction even if sandboxing is enabled, because
|
| - // addEpilog explicitly looks for a ret instruction as a marker for
|
| - // where to insert the frame removal instructions.
|
| - _ret(Reg);
|
| - // Add a fake use of esp to make sure esp stays alive for the entire
|
| - // function. Otherwise post-call esp adjustments get dead-code
|
| - // eliminated. TODO: Are there more places where the fake use
|
| - // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
|
| - // have a ret instruction.
|
| - Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
|
| - Context.insert(InstFakeUse::create(Func, esp));
|
| -}
|
| -
|
| -void TargetX8632::lowerSelect(const InstSelect *Inst) {
|
| - Variable *Dest = Inst->getDest();
|
| - Type DestTy = Dest->getType();
|
| - Operand *SrcT = Inst->getTrueOperand();
|
| - Operand *SrcF = Inst->getFalseOperand();
|
| - Operand *Condition = Inst->getCondition();
|
| -
|
| - if (isVectorType(DestTy)) {
|
| - Type SrcTy = SrcT->getType();
|
| - Variable *T = makeReg(SrcTy);
|
| - Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
|
| - Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
|
| - if (InstructionSet >= SSE4_1) {
|
| - // TODO(wala): If the condition operand is a constant, use blendps
|
| - // or pblendw.
|
| - //
|
| - // Use blendvps or pblendvb to implement select.
|
| - if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
|
| - SrcTy == IceType_v4f32) {
|
| - Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
|
| - Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
|
| - _movp(xmm0, ConditionRM);
|
| - _psll(xmm0, Ctx->getConstantInt8(31));
|
| - _movp(T, SrcFRM);
|
| - _blendvps(T, SrcTRM, xmm0);
|
| - _movp(Dest, T);
|
| - } else {
|
| - assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
|
| - Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
|
| - : IceType_v16i8;
|
| - Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
|
| - lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
|
| - _movp(T, SrcFRM);
|
| - _pblendvb(T, SrcTRM, xmm0);
|
| - _movp(Dest, T);
|
| - }
|
| - return;
|
| - }
|
| - // Lower select without SSE4.1:
|
| - // a=d?b:c ==>
|
| - // if elementtype(d) != i1:
|
| - // d=sext(d);
|
| - // a=(b&d)|(c&~d);
|
| - Variable *T2 = makeReg(SrcTy);
|
| - // Sign extend the condition operand if applicable.
|
| - if (SrcTy == IceType_v4f32) {
|
| - // The sext operation takes only integer arguments.
|
| - Variable *T3 = Func->makeVariable(IceType_v4i32);
|
| - lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
|
| - _movp(T, T3);
|
| - } else if (typeElementType(SrcTy) != IceType_i1) {
|
| - lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
|
| - } else {
|
| - Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
|
| - _movp(T, ConditionRM);
|
| - }
|
| - _movp(T2, T);
|
| - _pand(T, SrcTRM);
|
| - _pandn(T2, SrcFRM);
|
| - _por(T, T2);
|
| - _movp(Dest, T);
|
| -
|
| - return;
|
| - }
|
| -
|
| - CondX86::BrCond Cond = CondX86::Br_ne;
|
| - Operand *CmpOpnd0 = nullptr;
|
| - Operand *CmpOpnd1 = nullptr;
|
| - // Handle folding opportunities.
|
| - if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
|
| - assert(Producer->isDeleted());
|
| - switch (BoolFolding::getProducerKind(Producer)) {
|
| - default:
|
| - break;
|
| - case BoolFolding::PK_Icmp32: {
|
| - auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
|
| - Cond = getIcmp32Mapping(Cmp->getCondition());
|
| - CmpOpnd1 = legalize(Producer->getSrc(1));
|
| - CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
|
| - } break;
|
| - }
|
| - }
|
| - if (CmpOpnd0 == nullptr) {
|
| - CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
|
| - CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
|
| - }
|
| - assert(CmpOpnd0);
|
| - assert(CmpOpnd1);
|
| -
|
| - _cmp(CmpOpnd0, CmpOpnd1);
|
| - if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
|
| - // The cmov instruction doesn't allow 8-bit or FP operands, so
|
| - // we need explicit control flow.
|
| - // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
|
| - _mov(Dest, SrcT);
|
| - _br(Cond, Label);
|
| - SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
|
| - _mov_nonkillable(Dest, SrcF);
|
| - Context.insert(Label);
|
| - return;
|
| - }
|
| - // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
|
| - // But if SrcT is immediate, we might be able to do better, as
|
| - // the cmov instruction doesn't allow an immediate operand:
|
| - // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
|
| - if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
|
| - std::swap(SrcT, SrcF);
|
| - Cond = InstX8632::getOppositeCondition(Cond);
|
| - }
|
| - if (DestTy == IceType_i64) {
|
| - // Set the low portion.
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *TLo = nullptr;
|
| - Operand *SrcFLo = legalize(loOperand(SrcF));
|
| - _mov(TLo, SrcFLo);
|
| - Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
|
| - _cmov(TLo, SrcTLo, Cond);
|
| - _mov(DestLo, TLo);
|
| - // Set the high portion.
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - Variable *THi = nullptr;
|
| - Operand *SrcFHi = legalize(hiOperand(SrcF));
|
| - _mov(THi, SrcFHi);
|
| - Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
|
| - _cmov(THi, SrcTHi, Cond);
|
| - _mov(DestHi, THi);
|
| - return;
|
| - }
|
| -
|
| - assert(DestTy == IceType_i16 || DestTy == IceType_i32);
|
| - Variable *T = nullptr;
|
| - SrcF = legalize(SrcF);
|
| - _mov(T, SrcF);
|
| - SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
|
| - _cmov(T, SrcT, Cond);
|
| - _mov(Dest, T);
|
| -}
|
| -
|
| -void TargetX8632::lowerStore(const InstStore *Inst) {
|
| - Operand *Value = Inst->getData();
|
| - Operand *Addr = Inst->getAddr();
|
| - OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
|
| - Type Ty = NewAddr->getType();
|
| -
|
| - if (Ty == IceType_i64) {
|
| - Value = legalize(Value);
|
| - Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
|
| - Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
|
| - _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
|
| - _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
|
| - } else if (isVectorType(Ty)) {
|
| - _storep(legalizeToVar(Value), NewAddr);
|
| - } else {
|
| - Value = legalize(Value, Legal_Reg | Legal_Imm);
|
| - _store(Value, NewAddr);
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::doAddressOptStore() {
|
| - InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
|
| - Operand *Data = Inst->getData();
|
| - Operand *Addr = Inst->getAddr();
|
| - Variable *Index = nullptr;
|
| - uint16_t Shift = 0;
|
| - int32_t Offset = 0; // TODO: make Constant
|
| - Variable *Base = llvm::dyn_cast<Variable>(Addr);
|
| - // Vanilla ICE store instructions should not use the segment registers,
|
| - // and computeAddressOpt only works at the level of Variables and Constants,
|
| - // not other OperandX8632Mem, so there should be no mention of segment
|
| - // registers there either.
|
| - const OperandX8632Mem::SegmentRegisters SegmentReg =
|
| - OperandX8632Mem::DefaultSegment;
|
| - computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
|
| - if (Base && Addr != Base) {
|
| - Inst->setDeleted();
|
| - Constant *OffsetOp = Ctx->getConstantInt32(Offset);
|
| - Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
|
| - Shift, SegmentReg);
|
| - InstStore *NewStore = InstStore::create(Func, Data, Addr);
|
| - if (Inst->getDest())
|
| - NewStore->setRmwBeacon(Inst->getRmwBeacon());
|
| - Context.insert(NewStore);
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
|
| - // This implements the most naive possible lowering.
|
| - // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
|
| - Operand *Src0 = Inst->getComparison();
|
| - SizeT NumCases = Inst->getNumCases();
|
| - if (Src0->getType() == IceType_i64) {
|
| - Src0 = legalize(Src0); // get Base/Index into physical registers
|
| - Operand *Src0Lo = loOperand(Src0);
|
| - Operand *Src0Hi = hiOperand(Src0);
|
| - if (NumCases >= 2) {
|
| - Src0Lo = legalizeToVar(Src0Lo);
|
| - Src0Hi = legalizeToVar(Src0Hi);
|
| - } else {
|
| - Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
|
| - Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
|
| - }
|
| - for (SizeT I = 0; I < NumCases; ++I) {
|
| - Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
|
| - Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
|
| - InstX8632Label *Label = InstX8632Label::create(Func, this);
|
| - _cmp(Src0Lo, ValueLo);
|
| - _br(CondX86::Br_ne, Label);
|
| - _cmp(Src0Hi, ValueHi);
|
| - _br(CondX86::Br_e, Inst->getLabel(I));
|
| - Context.insert(Label);
|
| - }
|
| - _br(Inst->getLabelDefault());
|
| - return;
|
| - }
|
| - // OK, we'll be slightly less naive by forcing Src into a physical
|
| - // register if there are 2 or more uses.
|
| - if (NumCases >= 2)
|
| - Src0 = legalizeToVar(Src0);
|
| - else
|
| - Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
|
| - for (SizeT I = 0; I < NumCases; ++I) {
|
| - Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
|
| - _cmp(Src0, Value);
|
| - _br(CondX86::Br_e, Inst->getLabel(I));
|
| - }
|
| -
|
| - _br(Inst->getLabelDefault());
|
| -}
|
| -
|
| -void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
|
| - Variable *Dest, Operand *Src0,
|
| - Operand *Src1) {
|
| - assert(isVectorType(Dest->getType()));
|
| - Type Ty = Dest->getType();
|
| - Type ElementTy = typeElementType(Ty);
|
| - SizeT NumElements = typeNumElements(Ty);
|
| -
|
| - Operand *T = Ctx->getConstantUndef(Ty);
|
| - for (SizeT I = 0; I < NumElements; ++I) {
|
| - Constant *Index = Ctx->getConstantInt32(I);
|
| -
|
| - // Extract the next two inputs.
|
| - Variable *Op0 = Func->makeVariable(ElementTy);
|
| - lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
|
| - Variable *Op1 = Func->makeVariable(ElementTy);
|
| - lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
|
| -
|
| - // Perform the arithmetic as a scalar operation.
|
| - Variable *Res = Func->makeVariable(ElementTy);
|
| - lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
|
| -
|
| - // Insert the result into position.
|
| - Variable *DestT = Func->makeVariable(Ty);
|
| - lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
|
| - T = DestT;
|
| - }
|
| -
|
| - lowerAssign(InstAssign::create(Func, Dest, T));
|
| -}
|
| -
|
| -// The following pattern occurs often in lowered C and C++ code:
|
| -//
|
| -// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
|
| -// %cmp.ext = sext <n x i1> %cmp to <n x ty>
|
| -//
|
| -// We can eliminate the sext operation by copying the result of pcmpeqd,
|
| -// pcmpgtd, or cmpps (which produce sign extended results) to the result
|
| -// of the sext operation.
|
| -void TargetX8632::eliminateNextVectorSextInstruction(
|
| - Variable *SignExtendedResult) {
|
| - if (InstCast *NextCast =
|
| - llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
|
| - if (NextCast->getCastKind() == InstCast::Sext &&
|
| - NextCast->getSrc(0) == SignExtendedResult) {
|
| - NextCast->setDeleted();
|
| - _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
|
| - // Skip over the instruction.
|
| - Context.advanceNext();
|
| - }
|
| - }
|
| -}
|
| -
|
| -void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
|
| -
|
| -void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {
|
| - // If the beacon variable's live range does not end in this
|
| - // instruction, then it must end in the modified Store instruction
|
| - // that follows. This means that the original Store instruction is
|
| - // still there, either because the value being stored is used beyond
|
| - // the Store instruction, or because dead code elimination did not
|
| - // happen. In either case, we cancel RMW lowering (and the caller
|
| - // deletes the RMW instruction).
|
| - if (!RMW->isLastUse(RMW->getBeacon()))
|
| - return;
|
| - Operand *Src = RMW->getData();
|
| - Type Ty = Src->getType();
|
| - OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
|
| - if (Ty == IceType_i64) {
|
| - Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
|
| - Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
|
| - OperandX8632Mem *AddrLo = llvm::cast<OperandX8632Mem>(loOperand(Addr));
|
| - OperandX8632Mem *AddrHi = llvm::cast<OperandX8632Mem>(hiOperand(Addr));
|
| - switch (RMW->getOp()) {
|
| - default:
|
| - // TODO(stichnot): Implement other arithmetic operators.
|
| - break;
|
| - case InstArithmetic::Add:
|
| - _add_rmw(AddrLo, SrcLo);
|
| - _adc_rmw(AddrHi, SrcHi);
|
| - return;
|
| - case InstArithmetic::Sub:
|
| - _sub_rmw(AddrLo, SrcLo);
|
| - _sbb_rmw(AddrHi, SrcHi);
|
| - return;
|
| - case InstArithmetic::And:
|
| - _and_rmw(AddrLo, SrcLo);
|
| - _and_rmw(AddrHi, SrcHi);
|
| - return;
|
| - case InstArithmetic::Or:
|
| - _or_rmw(AddrLo, SrcLo);
|
| - _or_rmw(AddrHi, SrcHi);
|
| - return;
|
| - case InstArithmetic::Xor:
|
| - _xor_rmw(AddrLo, SrcLo);
|
| - _xor_rmw(AddrHi, SrcHi);
|
| - return;
|
| - }
|
| - } else {
|
| - // i8, i16, i32
|
| - switch (RMW->getOp()) {
|
| - default:
|
| - // TODO(stichnot): Implement other arithmetic operators.
|
| - break;
|
| - case InstArithmetic::Add:
|
| - Src = legalize(Src, Legal_Reg | Legal_Imm);
|
| - _add_rmw(Addr, Src);
|
| - return;
|
| - case InstArithmetic::Sub:
|
| - Src = legalize(Src, Legal_Reg | Legal_Imm);
|
| - _sub_rmw(Addr, Src);
|
| - return;
|
| - case InstArithmetic::And:
|
| - Src = legalize(Src, Legal_Reg | Legal_Imm);
|
| - _and_rmw(Addr, Src);
|
| - return;
|
| - case InstArithmetic::Or:
|
| - Src = legalize(Src, Legal_Reg | Legal_Imm);
|
| - _or_rmw(Addr, Src);
|
| - return;
|
| - case InstArithmetic::Xor:
|
| - Src = legalize(Src, Legal_Reg | Legal_Imm);
|
| - _xor_rmw(Addr, Src);
|
| - return;
|
| - }
|
| - }
|
| - llvm::report_fatal_error("Couldn't lower RMW instruction");
|
| -}
|
| -
|
| -void TargetX8632::lowerOther(const Inst *Instr) {
|
| - if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
|
| - lowerRMW(RMW);
|
| - } else {
|
| - TargetLowering::lowerOther(Instr);
|
| - }
|
| -}
|
| -
|
| -// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
|
| -// preserve integrity of liveness analysis. Undef values are also
|
| -// turned into zeroes, since loOperand() and hiOperand() don't expect
|
| -// Undef input.
|
| -void TargetX8632::prelowerPhis() {
|
| - // Pause constant blinding or pooling, blinding or pooling will be done later
|
| - // during phi lowering assignments
|
| - BoolFlagSaver B(RandomizationPoolingPaused, true);
|
| -
|
| - CfgNode *Node = Context.getNode();
|
| - for (Inst &I : Node->getPhis()) {
|
| - auto Phi = llvm::dyn_cast<InstPhi>(&I);
|
| - if (Phi->isDeleted())
|
| - continue;
|
| - Variable *Dest = Phi->getDest();
|
| - if (Dest->getType() == IceType_i64) {
|
| - Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
|
| - Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
|
| - InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
|
| - InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
|
| - for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
|
| - Operand *Src = Phi->getSrc(I);
|
| - CfgNode *Label = Phi->getLabel(I);
|
| - if (llvm::isa<ConstantUndef>(Src))
|
| - Src = Ctx->getConstantZero(Dest->getType());
|
| - PhiLo->addArgument(loOperand(Src), Label);
|
| - PhiHi->addArgument(hiOperand(Src), Label);
|
| - }
|
| - Node->getPhis().push_back(PhiLo);
|
| - Node->getPhis().push_back(PhiHi);
|
| - Phi->setDeleted();
|
| - }
|
| - }
|
| -}
|
| -
|
| -namespace {
|
| -
|
| -bool isMemoryOperand(const Operand *Opnd) {
|
| - if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
|
| - return !Var->hasReg();
|
| - // We treat vector undef values the same as a memory operand,
|
| - // because they do in fact need a register to materialize the vector
|
| - // of zeroes into.
|
| - if (llvm::isa<ConstantUndef>(Opnd))
|
| - return isScalarFloatingType(Opnd->getType()) ||
|
| - isVectorType(Opnd->getType());
|
| - if (llvm::isa<Constant>(Opnd))
|
| - return isScalarFloatingType(Opnd->getType());
|
| - return true;
|
| -}
|
| -
|
| -} // end of anonymous namespace
|
| -
|
| -// Lower the pre-ordered list of assignments into mov instructions.
|
| -// Also has to do some ad-hoc register allocation as necessary.
|
| -void TargetX8632::lowerPhiAssignments(CfgNode *Node,
|
| - const AssignList &Assignments) {
|
| - // Check that this is a properly initialized shell of a node.
|
| - assert(Node->getOutEdges().size() == 1);
|
| - assert(Node->getInsts().empty());
|
| - assert(Node->getPhis().empty());
|
| - CfgNode *Succ = Node->getOutEdges().front();
|
| - getContext().init(Node);
|
| - // Register set setup similar to regAlloc().
|
| - RegSetMask RegInclude = RegSet_All;
|
| - RegSetMask RegExclude = RegSet_StackPointer;
|
| - if (hasFramePointer())
|
| - RegExclude |= RegSet_FramePointer;
|
| - llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
|
| - bool NeedsRegs = false;
|
| - // Initialize the set of available registers to the set of what is
|
| - // available (not live) at the beginning of the successor block,
|
| - // minus all registers used as Dest operands in the Assignments. To
|
| - // do this, we start off assuming all registers are available, then
|
| - // iterate through the Assignments and remove Dest registers.
|
| - // During this iteration, we also determine whether we will actually
|
| - // need any extra registers for memory-to-memory copies. If so, we
|
| - // do the actual work of removing the live-in registers from the
|
| - // set. TODO(stichnot): This work is being repeated for every split
|
| - // edge to the successor, so consider updating LiveIn just once
|
| - // after all the edges are split.
|
| - for (const Inst &I : Assignments) {
|
| - Variable *Dest = I.getDest();
|
| - if (Dest->hasReg()) {
|
| - Available[Dest->getRegNum()] = false;
|
| - } else if (isMemoryOperand(I.getSrc(0))) {
|
| - NeedsRegs = true; // Src and Dest are both in memory
|
| - }
|
| - }
|
| - if (NeedsRegs) {
|
| - LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
|
| - for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
|
| - Variable *Var = Func->getLiveness()->getVariable(i, Succ);
|
| - if (Var->hasReg())
|
| - Available[Var->getRegNum()] = false;
|
| - }
|
| - }
|
| - // Iterate backwards through the Assignments. After lowering each
|
| - // assignment, add Dest to the set of available registers, and
|
| - // remove Src from the set of available registers. Iteration is
|
| - // done backwards to enable incremental updates of the available
|
| - // register set, and the lowered instruction numbers may be out of
|
| - // order, but that can be worked around by renumbering the block
|
| - // afterwards if necessary.
|
| - for (const Inst &I : reverse_range(Assignments)) {
|
| - Context.rewind();
|
| - auto Assign = llvm::dyn_cast<InstAssign>(&I);
|
| - Variable *Dest = Assign->getDest();
|
| -
|
| - // If the source operand is ConstantUndef, do not legalize it.
|
| - // In function test_split_undef_int_vec, the advanced phi
|
| - // lowering process will find an assignment of undefined
|
| - // vector. This vector, as the Src here, will crash if it
|
| - // go through legalize(). legalize() will create new variable
|
| - // with makeVectorOfZeros(), but this new variable will be
|
| - // assigned a stack slot. This will fail the assertion in
|
| - // IceInstX8632.cpp:789, as XmmEmitterRegOp() complain:
|
| - // Var->hasReg() fails. Note this failure is irrelevant to
|
| - // randomization or pooling of constants.
|
| - // So, we do not call legalize() to add pool label for the
|
| - // src operands of phi assignment instructions.
|
| - // Instead, we manually add pool label for constant float and
|
| - // constant double values here.
|
| - // Note going through legalize() does not affect the testing
|
| - // results of SPEC2K and xtests.
|
| - Operand *Src = Assign->getSrc(0);
|
| - if (!llvm::isa<ConstantUndef>(Assign->getSrc(0))) {
|
| - Src = legalize(Src);
|
| - }
|
| -
|
| - Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
|
| - // Use normal assignment lowering, except lower mem=mem specially
|
| - // so we can register-allocate at the same time.
|
| - if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
|
| - lowerAssign(Assign);
|
| - } else {
|
| - assert(Dest->getType() == Src->getType());
|
| - const llvm::SmallBitVector &RegsForType =
|
| - getRegisterSetForType(Dest->getType());
|
| - llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
|
| - Variable *SpillLoc = nullptr;
|
| - Variable *Preg = nullptr;
|
| - // TODO(stichnot): Opportunity for register randomization.
|
| - int32_t RegNum = AvailRegsForType.find_first();
|
| - bool IsVector = isVectorType(Dest->getType());
|
| - bool NeedSpill = (RegNum == -1);
|
| - if (NeedSpill) {
|
| - // Pick some register to spill and update RegNum.
|
| - // TODO(stichnot): Opportunity for register randomization.
|
| - RegNum = RegsForType.find_first();
|
| - Preg = getPhysicalRegister(RegNum, Dest->getType());
|
| - SpillLoc = Func->makeVariable(Dest->getType());
|
| - // Create a fake def of the physical register to avoid
|
| - // liveness inconsistency problems during late-stage liveness
|
| - // analysis (e.g. asm-verbose mode).
|
| - Context.insert(InstFakeDef::create(Func, Preg));
|
| - if (IsVector)
|
| - _movp(SpillLoc, Preg);
|
| - else
|
| - _mov(SpillLoc, Preg);
|
| - }
|
| - assert(RegNum >= 0);
|
| - if (llvm::isa<ConstantUndef>(Src))
|
| - // Materialize an actual constant instead of undef. RegNum is
|
| - // passed in for vector types because undef vectors are
|
| - // lowered to vector register of zeroes.
|
| - Src =
|
| - legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
|
| - Variable *Tmp = makeReg(Dest->getType(), RegNum);
|
| - if (IsVector) {
|
| - _movp(Tmp, Src);
|
| - _movp(Dest, Tmp);
|
| - } else {
|
| - _mov(Tmp, Src);
|
| - _mov(Dest, Tmp);
|
| - }
|
| - if (NeedSpill) {
|
| - // Restore the spilled register.
|
| - if (IsVector)
|
| - _movp(Preg, SpillLoc);
|
| - else
|
| - _mov(Preg, SpillLoc);
|
| - // Create a fake use of the physical register to keep it live
|
| - // for late-stage liveness analysis (e.g. asm-verbose mode).
|
| - Context.insert(InstFakeUse::create(Func, Preg));
|
| - }
|
| - }
|
| - // Update register availability before moving to the previous
|
| - // instruction on the Assignments list.
|
| - if (Dest->hasReg())
|
| - Available[Dest->getRegNum()] = true;
|
| - if (SrcVar && SrcVar->hasReg())
|
| - Available[SrcVar->getRegNum()] = false;
|
| - }
|
| -
|
| - // Add the terminator branch instruction to the end.
|
| - Context.setInsertPoint(Context.getEnd());
|
| - _br(Succ);
|
| -}
|
| -
|
| -// There is no support for loading or emitting vector constants, so the
|
| -// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
|
| -// etc. are initialized with register operations.
|
| +//===----------------------------------------------------------------------===//
|
| //
|
| -// TODO(wala): Add limited support for vector constants so that
|
| -// complex initialization in registers is unnecessary.
|
| -
|
| -Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
|
| - Variable *Reg = makeReg(Ty, RegNum);
|
| - // Insert a FakeDef, since otherwise the live range of Reg might
|
| - // be overestimated.
|
| - Context.insert(InstFakeDef::create(Func, Reg));
|
| - _pxor(Reg, Reg);
|
| - return Reg;
|
| -}
|
| -
|
| -Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
|
| - Variable *MinusOnes = makeReg(Ty, RegNum);
|
| - // Insert a FakeDef so the live range of MinusOnes is not overestimated.
|
| - Context.insert(InstFakeDef::create(Func, MinusOnes));
|
| - _pcmpeq(MinusOnes, MinusOnes);
|
| - return MinusOnes;
|
| -}
|
| -
|
| -Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
|
| - Variable *Dest = makeVectorOfZeros(Ty, RegNum);
|
| - Variable *MinusOne = makeVectorOfMinusOnes(Ty);
|
| - _psub(Dest, MinusOne);
|
| - return Dest;
|
| -}
|
| -
|
| -Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
|
| - assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
|
| - Ty == IceType_v16i8);
|
| - if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
|
| - Variable *Reg = makeVectorOfOnes(Ty, RegNum);
|
| - SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
|
| - _psll(Reg, Ctx->getConstantInt8(Shift));
|
| - return Reg;
|
| - } else {
|
| - // SSE has no left shift operation for vectors of 8 bit integers.
|
| - const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
|
| - Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
|
| - Variable *Reg = makeReg(Ty, RegNum);
|
| - _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
|
| - _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
|
| - return Reg;
|
| - }
|
| -}
|
| -
|
| -// Construct a mask in a register that can be and'ed with a
|
| -// floating-point value to mask off its sign bit. The value will be
|
| -// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
|
| -// for f64. Construct it as vector of ones logically right shifted
|
| -// one bit. TODO(stichnot): Fix the wala TODO above, to represent
|
| -// vector constants in memory.
|
| -Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) {
|
| - Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
|
| - _psrl(Reg, Ctx->getConstantInt8(1));
|
| - return Reg;
|
| -}
|
| -
|
| -OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
|
| - Variable *Slot,
|
| - uint32_t Offset) {
|
| - // Ensure that Loc is a stack slot.
|
| - assert(Slot->getWeight().isZero());
|
| - assert(Slot->getRegNum() == Variable::NoRegister);
|
| - // Compute the location of Loc in memory.
|
| - // TODO(wala,stichnot): lea should not be required. The address of
|
| - // the stack slot is known at compile time (although not until after
|
| - // addProlog()).
|
| - const Type PointerType = IceType_i32;
|
| - Variable *Loc = makeReg(PointerType);
|
| - _lea(Loc, Slot);
|
| - Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
|
| - return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
|
| -}
|
| -
|
| -// Helper for legalize() to emit the right code to lower an operand to a
|
| -// register of the appropriate type.
|
| -Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
|
| - Type Ty = Src->getType();
|
| - Variable *Reg = makeReg(Ty, RegNum);
|
| - if (isVectorType(Ty)) {
|
| - _movp(Reg, Src);
|
| - } else {
|
| - _mov(Reg, Src);
|
| - }
|
| - return Reg;
|
| -}
|
| -
|
| -Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
|
| - int32_t RegNum) {
|
| - Type Ty = From->getType();
|
| - // Assert that a physical register is allowed. To date, all calls
|
| - // to legalize() allow a physical register. If a physical register
|
| - // needs to be explicitly disallowed, then new code will need to be
|
| - // written to force a spill.
|
| - assert(Allowed & Legal_Reg);
|
| - // If we're asking for a specific physical register, make sure we're
|
| - // not allowing any other operand kinds. (This could be future
|
| - // work, e.g. allow the shl shift amount to be either an immediate
|
| - // or in ecx.)
|
| - assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
|
| -
|
| - if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
|
| - // Before doing anything with a Mem operand, we need to ensure
|
| - // that the Base and Index components are in physical registers.
|
| - Variable *Base = Mem->getBase();
|
| - Variable *Index = Mem->getIndex();
|
| - Variable *RegBase = nullptr;
|
| - Variable *RegIndex = nullptr;
|
| - if (Base) {
|
| - RegBase = legalizeToVar(Base);
|
| - }
|
| - if (Index) {
|
| - RegIndex = legalizeToVar(Index);
|
| - }
|
| - if (Base != RegBase || Index != RegIndex) {
|
| - Mem =
|
| - OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,
|
| - Mem->getShift(), Mem->getSegmentRegister());
|
| - }
|
| -
|
| - // For all Memory Operands, we do randomization/pooling here
|
| - From = randomizeOrPoolImmediate(Mem);
|
| -
|
| - if (!(Allowed & Legal_Mem)) {
|
| - From = copyToReg(From, RegNum);
|
| - }
|
| - return From;
|
| - }
|
| - if (auto *Const = llvm::dyn_cast<Constant>(From)) {
|
| - if (llvm::isa<ConstantUndef>(Const)) {
|
| - // Lower undefs to zero. Another option is to lower undefs to an
|
| - // uninitialized register; however, using an uninitialized register
|
| - // results in less predictable code.
|
| - //
|
| - // If in the future the implementation is changed to lower undef
|
| - // values to uninitialized registers, a FakeDef will be needed:
|
| - // Context.insert(InstFakeDef::create(Func, Reg));
|
| - // This is in order to ensure that the live range of Reg is not
|
| - // overestimated. If the constant being lowered is a 64 bit value,
|
| - // then the result should be split and the lo and hi components will
|
| - // need to go in uninitialized registers.
|
| - if (isVectorType(Ty))
|
| - return makeVectorOfZeros(Ty, RegNum);
|
| - Const = Ctx->getConstantZero(Ty);
|
| - From = Const;
|
| - }
|
| - // There should be no constants of vector type (other than undef).
|
| - assert(!isVectorType(Ty));
|
| -
|
| - // If the operand is an 32 bit constant integer, we should check
|
| - // whether we need to randomize it or pool it.
|
| - if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
|
| - Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
|
| - if (NewConst != Const) {
|
| - return NewConst;
|
| - }
|
| - }
|
| -
|
| - // Convert a scalar floating point constant into an explicit
|
| - // memory operand.
|
| - if (isScalarFloatingType(Ty)) {
|
| - Variable *Base = nullptr;
|
| - std::string Buffer;
|
| - llvm::raw_string_ostream StrBuf(Buffer);
|
| - llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
|
| - llvm::cast<Constant>(From)->setShouldBePooled(true);
|
| - Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
|
| - From = OperandX8632Mem::create(Func, Ty, Base, Offset);
|
| - }
|
| - bool NeedsReg = false;
|
| - if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
|
| - // Immediate specifically not allowed
|
| - NeedsReg = true;
|
| - if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
|
| - // On x86, FP constants are lowered to mem operands.
|
| - NeedsReg = true;
|
| - if (NeedsReg) {
|
| - From = copyToReg(From, RegNum);
|
| - }
|
| - return From;
|
| - }
|
| - if (auto Var = llvm::dyn_cast<Variable>(From)) {
|
| - // Check if the variable is guaranteed a physical register. This
|
| - // can happen either when the variable is pre-colored or when it is
|
| - // assigned infinite weight.
|
| - bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
|
| - // We need a new physical register for the operand if:
|
| - // Mem is not allowed and Var isn't guaranteed a physical
|
| - // register, or
|
| - // RegNum is required and Var->getRegNum() doesn't match.
|
| - if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
|
| - (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
|
| - From = copyToReg(From, RegNum);
|
| - }
|
| - return From;
|
| - }
|
| - llvm_unreachable("Unhandled operand kind in legalize()");
|
| - return From;
|
| -}
|
| -
|
| -// Provide a trivial wrapper to legalize() for this common usage.
|
| -Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
|
| - return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
|
| -}
|
| +// This file implements the TargetLoweringX8632 class, which
|
| +// consists almost entirely of the lowering sequence for each
|
| +// high-level instruction.
|
| +//
|
| +//===----------------------------------------------------------------------===//
|
|
|
| -// For the cmp instruction, if Src1 is an immediate, or known to be a
|
| -// physical register, we can allow Src0 to be a memory operand.
|
| -// Otherwise, Src0 must be copied into a physical register.
|
| -// (Actually, either Src0 or Src1 can be chosen for the physical
|
| -// register, but unfortunately we have to commit to one or the other
|
| -// before register allocation.)
|
| -Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
|
| - bool IsSrc1ImmOrReg = false;
|
| - if (llvm::isa<Constant>(Src1)) {
|
| - IsSrc1ImmOrReg = true;
|
| - } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
|
| - if (Var->hasReg())
|
| - IsSrc1ImmOrReg = true;
|
| - }
|
| - return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
|
| -}
|
| +#include "IceTargetLoweringX8632.h"
|
|
|
| -OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty,
|
| - bool DoLegalize) {
|
| - OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);
|
| - // It may be the case that address mode optimization already creates
|
| - // an OperandX8632Mem, so in that case it wouldn't need another level
|
| - // of transformation.
|
| - if (!Mem) {
|
| - Variable *Base = llvm::dyn_cast<Variable>(Opnd);
|
| - Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
|
| - assert(Base || Offset);
|
| - if (Offset) {
|
| - // During memory operand building, we do not blind or pool
|
| - // the constant offset, we will work on the whole memory
|
| - // operand later as one entity later, this save one instruction.
|
| - // By turning blinding and pooling off, we guarantee
|
| - // legalize(Offset) will return a constant*.
|
| - {
|
| - BoolFlagSaver B(RandomizationPoolingPaused, true);
|
| +#include "IceTargetLoweringX86Base.h"
|
|
|
| - Offset = llvm::cast<Constant>(legalize(Offset));
|
| - }
|
| +namespace Ice {
|
| +namespace X86Internal {
|
| +template <> struct MachineTraits<TargetX8632> {
|
| + using InstructionSet = TargetX8632::X86InstructionSet;
|
|
|
| - assert(llvm::isa<ConstantInteger32>(Offset) ||
|
| - llvm::isa<ConstantRelocatable>(Offset));
|
| - }
|
| - Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
|
| + // The following table summarizes the logic for lowering the fcmp
|
| + // instruction. There is one table entry for each of the 16 conditions.
|
| + //
|
| + // The first four columns describe the case when the operands are
|
| + // floating point scalar values. A comment in lowerFcmp() describes the
|
| + // lowering template. In the most general case, there is a compare
|
| + // followed by two conditional branches, because some fcmp conditions
|
| + // don't map to a single x86 conditional branch. However, in many cases
|
| + // it is possible to swap the operands in the comparison and have a
|
| + // single conditional branch. Since it's quite tedious to validate the
|
| + // table by hand, good execution tests are helpful.
|
| + //
|
| + // The last two columns describe the case when the operands are vectors
|
| + // of floating point values. For most fcmp conditions, there is a clear
|
| + // mapping to a single x86 cmpps instruction variant. Some fcmp
|
| + // conditions require special code to handle and these are marked in the
|
| + // table with a Cmpps_Invalid predicate.
|
| + static const struct TableFcmpType {
|
| + uint32_t Default;
|
| + bool SwapScalarOperands;
|
| + CondX86::BrCond C1, C2;
|
| + bool SwapVectorOperands;
|
| + CondX86::CmppsCond Predicate;
|
| + } TableFcmp[];
|
| + static const size_t TableFcmpSize;
|
| +
|
| + // The following table summarizes the logic for lowering the icmp instruction
|
| + // for i32 and narrower types. Each icmp condition has a clear mapping to an
|
| + // x86 conditional branch instruction.
|
| +
|
| + static const struct TableIcmp32Type {
|
| + CondX86::BrCond Mapping;
|
| + } TableIcmp32[];
|
| + static const size_t TableIcmp32Size;
|
| +
|
| + // The following table summarizes the logic for lowering the icmp instruction
|
| + // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
|
| + // conditional branches are needed. For the other conditions, three separate
|
| + // conditional branches are needed.
|
| + static const struct TableIcmp64Type {
|
| + CondX86::BrCond C1, C2, C3;
|
| + } TableIcmp64[];
|
| + static const size_t TableIcmp64Size;
|
| +
|
| + static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
|
| + size_t Index = static_cast<size_t>(Cond);
|
| + assert(Index < TableIcmp32Size);
|
| + return TableIcmp32[Index].Mapping;
|
| + }
|
| +
|
| + static const struct TableTypeX8632AttributesType {
|
| + Type InVectorElementType;
|
| + } TableTypeX8632Attributes[];
|
| + static const size_t TableTypeX8632AttributesSize;
|
| +
|
| + // Return the type which the elements of the vector have in the X86
|
| + // representation of the vector.
|
| + static Type getInVectorElementType(Type Ty) {
|
| + assert(isVectorType(Ty));
|
| + size_t Index = static_cast<size_t>(Ty);
|
| + (void)Index;
|
| + assert(Index < TableTypeX8632AttributesSize);
|
| + return TableTypeX8632Attributes[Ty].InVectorElementType;
|
| + }
|
| +
|
| + // The maximum number of arguments to pass in XMM registers
|
| + static constexpr uint32_t X86_MAX_XMM_ARGS = 4;
|
| + // The number of bits in a byte
|
| + static constexpr uint32_t X86_CHAR_BIT = 8;
|
| + // Stack alignment
|
| + static const uint32_t X86_STACK_ALIGNMENT_BYTES;
|
| + // Size of the return address on the stack
|
| + static constexpr uint32_t X86_RET_IP_SIZE_BYTES = 4;
|
| + // The number of different NOP instructions
|
| + static constexpr uint32_t X86_NUM_NOP_VARIANTS = 5;
|
| +
|
| + // Value is in bytes. Return Value adjusted to the next highest multiple
|
| + // of the stack alignment.
|
| + static uint32_t applyStackAlignment(uint32_t Value) {
|
| + return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
|
| }
|
| - // Do legalization, which contains randomization/pooling
|
| - // or do randomization/pooling.
|
| - return llvm::cast<OperandX8632Mem>(
|
| - DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
|
| -}
|
| -
|
| -Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
|
| - // There aren't any 64-bit integer registers for x86-32.
|
| - assert(Type != IceType_i64);
|
| - Variable *Reg = Func->makeVariable(Type);
|
| - if (RegNum == Variable::NoRegister)
|
| - Reg->setWeightInfinite();
|
| - else
|
| - Reg->setRegNum(RegNum);
|
| - return Reg;
|
| -}
|
| -
|
| -void TargetX8632::postLower() {
|
| - if (Ctx->getFlags().getOptLevel() == Opt_m1)
|
| - return;
|
| - inferTwoAddress();
|
| -}
|
| -
|
| -void TargetX8632::makeRandomRegisterPermutation(
|
| - llvm::SmallVectorImpl<int32_t> &Permutation,
|
| - const llvm::SmallBitVector &ExcludeRegisters) const {
|
| - // TODO(stichnot): Declaring Permutation this way loses type/size
|
| - // information. Fix this in conjunction with the caller-side TODO.
|
| - assert(Permutation.size() >= RegX8632::Reg_NUM);
|
| - // Expected upper bound on the number of registers in a single
|
| - // equivalence class. For x86-32, this would comprise the 8 XMM
|
| - // registers. This is for performance, not correctness.
|
| - static const unsigned MaxEquivalenceClassSize = 8;
|
| - typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
|
| - typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
|
| - EquivalenceClassMap EquivalenceClasses;
|
| - SizeT NumShuffled = 0, NumPreserved = 0;
|
| +};
|
|
|
| -// Build up the equivalence classes of registers by looking at the
|
| -// register properties as well as whether the registers should be
|
| -// explicitly excluded from shuffling.
|
| -#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
|
| - frameptr, isI8, isInt, isFP) \
|
| - if (ExcludeRegisters[RegX8632::val]) { \
|
| - /* val stays the same in the resulting permutation. */ \
|
| - Permutation[RegX8632::val] = RegX8632::val; \
|
| - ++NumPreserved; \
|
| - } else { \
|
| - const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
|
| - (isInt << 3) | (isFP << 4); \
|
| - /* val is assigned to an equivalence class based on its properties. */ \
|
| - EquivalenceClasses[Index].push_back(RegX8632::val); \
|
| - }
|
| - REGX8632_TABLE
|
| +const MachineTraits<TargetX8632>::TableFcmpType
|
| + MachineTraits<TargetX8632>::TableFcmp[] = {
|
| +#define X(val, dflt, swapS, C1, C2, swapV, pred) \
|
| + { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
|
| + ,
|
| + FCMPX8632_TABLE
|
| #undef X
|
| +};
|
|
|
| - RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
|
| +constexpr size_t MachineTraits<TargetX8632>::TableFcmpSize =
|
| + llvm::array_lengthof(TableFcmp);
|
|
|
| - // Shuffle the resulting equivalence classes.
|
| - for (auto I : EquivalenceClasses) {
|
| - const RegisterList &List = I.second;
|
| - RegisterList Shuffled(List);
|
| - RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
|
| - for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
|
| - Permutation[List[SI]] = Shuffled[SI];
|
| - ++NumShuffled;
|
| - }
|
| - }
|
| +const MachineTraits<TargetX8632>::TableIcmp32Type
|
| + MachineTraits<TargetX8632>::TableIcmp32[] = {
|
| +#define X(val, C_32, C1_64, C2_64, C3_64) \
|
| + { CondX86::C_32 } \
|
| + ,
|
| + ICMPX8632_TABLE
|
| +#undef X
|
| +};
|
|
|
| - assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
|
| +constexpr size_t MachineTraits<TargetX8632>::TableIcmp32Size =
|
| + llvm::array_lengthof(TableIcmp32);
|
|
|
| - if (Func->isVerbose(IceV_Random)) {
|
| - OstreamLocker L(Func->getContext());
|
| - Ostream &Str = Func->getContext()->getStrDump();
|
| - Str << "Register equivalence classes:\n";
|
| - for (auto I : EquivalenceClasses) {
|
| - Str << "{";
|
| - const RegisterList &List = I.second;
|
| - bool First = true;
|
| - for (int32_t Register : List) {
|
| - if (!First)
|
| - Str << " ";
|
| - First = false;
|
| - Str << getRegName(Register, IceType_i32);
|
| - }
|
| - Str << "}\n";
|
| - }
|
| - }
|
| -}
|
| +const MachineTraits<TargetX8632>::TableIcmp64Type
|
| + MachineTraits<TargetX8632>::TableIcmp64[] = {
|
| +#define X(val, C_32, C1_64, C2_64, C3_64) \
|
| + { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
|
| + ,
|
| + ICMPX8632_TABLE
|
| +#undef X
|
| +};
|
|
|
| -void TargetX8632::emit(const ConstantInteger32 *C) const {
|
| - if (!ALLOW_DUMP)
|
| - return;
|
| - Ostream &Str = Ctx->getStrEmit();
|
| - Str << getConstantPrefix() << C->getValue();
|
| -}
|
| +constexpr size_t MachineTraits<TargetX8632>::TableIcmp64Size =
|
| + llvm::array_lengthof(TableIcmp64);
|
|
|
| -void TargetX8632::emit(const ConstantInteger64 *) const {
|
| - llvm::report_fatal_error("Not expecting to emit 64-bit integers");
|
| -}
|
| +const MachineTraits<TargetX8632>::TableTypeX8632AttributesType
|
| + MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = {
|
| +#define X(tag, elementty, cvt, sdss, pack, width, fld) \
|
| + { elementty } \
|
| + ,
|
| + ICETYPEX8632_TABLE
|
| +#undef X
|
| +};
|
|
|
| -void TargetX8632::emit(const ConstantFloat *C) const {
|
| - if (!ALLOW_DUMP)
|
| - return;
|
| - Ostream &Str = Ctx->getStrEmit();
|
| - C->emitPoolLabel(Str);
|
| -}
|
| +constexpr size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =
|
| + llvm::array_lengthof(TableTypeX8632Attributes);
|
|
|
| -void TargetX8632::emit(const ConstantDouble *C) const {
|
| - if (!ALLOW_DUMP)
|
| - return;
|
| - Ostream &Str = Ctx->getStrEmit();
|
| - C->emitPoolLabel(Str);
|
| -}
|
| +const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
|
| +} // end of namespace X86Internal
|
|
|
| -void TargetX8632::emit(const ConstantUndef *) const {
|
| - llvm::report_fatal_error("undef value encountered by emitter.");
|
| +TargetX8632 *TargetX8632::create(Cfg *Func) {
|
| + return X86Internal::TargetX86Base<TargetX8632>::create(Func);
|
| }
|
|
|
| TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
|
| : TargetDataLowering(Ctx) {}
|
|
|
| -void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
|
| - const IceString &SectionSuffix) {
|
| - switch (Ctx->getFlags().getOutFileType()) {
|
| - case FT_Elf: {
|
| - ELFObjectWriter *Writer = Ctx->getObjectWriter();
|
| - Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
|
| - } break;
|
| - case FT_Asm:
|
| - case FT_Iasm: {
|
| - const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
|
| - OstreamLocker L(Ctx);
|
| - for (const VariableDeclaration *Var : Vars) {
|
| - if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
|
| - emitGlobal(*Var, SectionSuffix);
|
| - }
|
| - }
|
| - } break;
|
| - }
|
| -}
|
| -
|
| +namespace {
|
| template <typename T> struct PoolTypeConverter {};
|
|
|
| template <> struct PoolTypeConverter<float> {
|
| @@ -5457,6 +225,7 @@ template <> struct PoolTypeConverter<uint8_t> {
|
| const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
|
| const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
|
| const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
|
| +} // end of anonymous namespace
|
|
|
| template <typename T>
|
| void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
|
| @@ -5521,200 +290,135 @@ void TargetDataX8632::lowerConstants() {
|
| }
|
| }
|
|
|
| -TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
|
| - : TargetHeaderLowering(Ctx) {}
|
| -
|
| -// Randomize or pool an Immediate.
|
| -Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate,
|
| - int32_t RegNum) {
|
| - assert(llvm::isa<ConstantInteger32>(Immediate) ||
|
| - llvm::isa<ConstantRelocatable>(Immediate));
|
| - if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
|
| - RandomizationPoolingPaused == true) {
|
| - // Immediates randomization/pooling off or paused
|
| - return Immediate;
|
| - }
|
| - if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
|
| - Ctx->statsUpdateRPImms();
|
| - if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
|
| - RPI_Randomize) {
|
| - // blind the constant
|
| - // FROM:
|
| - // imm
|
| - // TO:
|
| - // insert: mov imm+cookie, Reg
|
| - // insert: lea -cookie[Reg], Reg
|
| - // => Reg
|
| - // If we have already assigned a phy register, we must come from
|
| - // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
|
| - // the assigned register as this assignment is that start of its use-def
|
| - // chain. So we add RegNum argument here.
|
| - // Note we use 'lea' instruction instead of 'xor' to avoid affecting
|
| - // the flags.
|
| - Variable *Reg = makeReg(IceType_i32, RegNum);
|
| - ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
|
| - uint32_t Value = Integer->getValue();
|
| - uint32_t Cookie = Ctx->getRandomizationCookie();
|
| - _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
|
| - Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
|
| - _lea(Reg,
|
| - OperandX8632Mem::create(Func, IceType_i32, Reg, Offset, nullptr, 0));
|
| - // make sure liveness analysis won't kill this variable, otherwise a
|
| - // liveness
|
| - // assertion will be triggered.
|
| - _set_dest_nonkillable();
|
| - if (Immediate->getType() != IceType_i32) {
|
| - Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
|
| - _mov(TruncReg, Reg);
|
| - return TruncReg;
|
| +void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
|
| + const IceString &SectionSuffix) {
|
| + switch (Ctx->getFlags().getOutFileType()) {
|
| + case FT_Elf: {
|
| + ELFObjectWriter *Writer = Ctx->getObjectWriter();
|
| + Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
|
| + } break;
|
| + case FT_Asm:
|
| + case FT_Iasm: {
|
| + const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
|
| + OstreamLocker L(Ctx);
|
| + for (const VariableDeclaration *Var : Vars) {
|
| + if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
|
| + emitGlobal(*Var, SectionSuffix);
|
| }
|
| - return Reg;
|
| - }
|
| - if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
|
| - // pool the constant
|
| - // FROM:
|
| - // imm
|
| - // TO:
|
| - // insert: mov $label, Reg
|
| - // => Reg
|
| - assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
|
| - Immediate->setShouldBePooled(true);
|
| - // if we have already assigned a phy register, we must come from
|
| - // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
|
| - // the assigned register as this assignment is that start of its use-def
|
| - // chain. So we add RegNum argument here.
|
| - Variable *Reg = makeReg(Immediate->getType(), RegNum);
|
| - IceString Label;
|
| - llvm::raw_string_ostream Label_stream(Label);
|
| - Immediate->emitPoolLabel(Label_stream);
|
| - const RelocOffsetT Offset = 0;
|
| - const bool SuppressMangling = true;
|
| - Constant *Symbol =
|
| - Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
|
| - OperandX8632Mem *MemOperand =
|
| - OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);
|
| - _mov(Reg, MemOperand);
|
| - return Reg;
|
| }
|
| - assert("Unsupported -randomize-pool-immediates option" && false);
|
| + } break;
|
| }
|
| - // the constant Immediate is not eligible for blinding/pooling
|
| - return Immediate;
|
| }
|
|
|
| -OperandX8632Mem *
|
| -TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,
|
| - int32_t RegNum) {
|
| - assert(MemOperand);
|
| - if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
|
| - RandomizationPoolingPaused == true) {
|
| - // immediates randomization/pooling is turned off
|
| - return MemOperand;
|
| - }
|
| -
|
| - // If this memory operand is already a randommized one, we do
|
| - // not randomize it again.
|
| - if (MemOperand->getRandomized())
|
| - return MemOperand;
|
| +TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
|
| + : TargetHeaderLowering(Ctx) {}
|
|
|
| - if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
|
| - if (C->shouldBeRandomizedOrPooled(Ctx)) {
|
| - // The offset of this mem operand should be blinded or pooled
|
| - Ctx->statsUpdateRPImms();
|
| - if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
|
| - RPI_Randomize) {
|
| - // blind the constant offset
|
| - // FROM:
|
| - // offset[base, index, shift]
|
| - // TO:
|
| - // insert: lea offset+cookie[base], RegTemp
|
| - // => -cookie[RegTemp, index, shift]
|
| - uint32_t Value =
|
| - llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
|
| - ->getValue();
|
| - uint32_t Cookie = Ctx->getRandomizationCookie();
|
| - Constant *Mask1 = Ctx->getConstantInt(
|
| - MemOperand->getOffset()->getType(), Cookie + Value);
|
| - Constant *Mask2 =
|
| - Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
|
| +// In some cases, there are x-macros tables for both high-level and
|
| +// low-level instructions/operands that use the same enum key value.
|
| +// The tables are kept separate to maintain a proper separation
|
| +// between abstraction layers. There is a risk that the tables could
|
| +// get out of sync if enum values are reordered or if entries are
|
| +// added or deleted. The following dummy namespaces use
|
| +// static_asserts to ensure everything is kept in sync.
|
|
|
| - OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(
|
| - Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
|
| - // If we have already assigned a physical register, we must come from
|
| - // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
|
| - // the assigned register as this assignment is that start of its use-def
|
| - // chain. So we add RegNum argument here.
|
| - Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
|
| - _lea(RegTemp, TempMemOperand);
|
| - // As source operand doesn't use the dstreg, we don't need to add
|
| - // _set_dest_nonkillable().
|
| - // But if we use the same Dest Reg, that is, with RegNum
|
| - // assigned, we should add this _set_dest_nonkillable()
|
| - if (RegNum != Variable::NoRegister)
|
| - _set_dest_nonkillable();
|
| +namespace {
|
| +// Validate the enum values in FCMPX8632_TABLE.
|
| +namespace dummy1 {
|
| +// Define a temporary set of enum values based on low-level table
|
| +// entries.
|
| +enum _tmp_enum {
|
| +#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
|
| + FCMPX8632_TABLE
|
| +#undef X
|
| + _num
|
| +};
|
| +// Define a set of constants based on high-level table entries.
|
| +#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
|
| +ICEINSTFCMP_TABLE
|
| +#undef X
|
| +// Define a set of constants based on low-level table entries, and
|
| +// ensure the table entry keys are consistent.
|
| +#define X(val, dflt, swapS, C1, C2, swapV, pred) \
|
| + static const int _table2_##val = _tmp_##val; \
|
| + static_assert( \
|
| + _table1_##val == _table2_##val, \
|
| + "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
|
| +FCMPX8632_TABLE
|
| +#undef X
|
| +// Repeat the static asserts with respect to the high-level table
|
| +// entries in case the high-level table has extra entries.
|
| +#define X(tag, str) \
|
| + static_assert( \
|
| + _table1_##tag == _table2_##tag, \
|
| + "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
|
| +ICEINSTFCMP_TABLE
|
| +#undef X
|
| +} // end of namespace dummy1
|
|
|
| - OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
|
| - Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
|
| - MemOperand->getShift(), MemOperand->getSegmentRegister());
|
| +// Validate the enum values in ICMPX8632_TABLE.
|
| +namespace dummy2 {
|
| +// Define a temporary set of enum values based on low-level table
|
| +// entries.
|
| +enum _tmp_enum {
|
| +#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
|
| + ICMPX8632_TABLE
|
| +#undef X
|
| + _num
|
| +};
|
| +// Define a set of constants based on high-level table entries.
|
| +#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
|
| +ICEINSTICMP_TABLE
|
| +#undef X
|
| +// Define a set of constants based on low-level table entries, and
|
| +// ensure the table entry keys are consistent.
|
| +#define X(val, C_32, C1_64, C2_64, C3_64) \
|
| + static const int _table2_##val = _tmp_##val; \
|
| + static_assert( \
|
| + _table1_##val == _table2_##val, \
|
| + "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
|
| +ICMPX8632_TABLE
|
| +#undef X
|
| +// Repeat the static asserts with respect to the high-level table
|
| +// entries in case the high-level table has extra entries.
|
| +#define X(tag, str) \
|
| + static_assert( \
|
| + _table1_##tag == _table2_##tag, \
|
| + "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
|
| +ICEINSTICMP_TABLE
|
| +#undef X
|
| +} // end of namespace dummy2
|
|
|
| - // Label this memory operand as randomize, so we won't randomize it
|
| - // again in case we call legalize() mutiple times on this memory
|
| - // operand.
|
| - NewMemOperand->setRandomized(true);
|
| - return NewMemOperand;
|
| - }
|
| - if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
|
| - // pool the constant offset
|
| - // FROM:
|
| - // offset[base, index, shift]
|
| - // TO:
|
| - // insert: mov $label, RegTemp
|
| - // insert: lea [base, RegTemp], RegTemp
|
| - // =>[RegTemp, index, shift]
|
| - assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
|
| - RPI_Pool);
|
| - // Memory operand should never exist as source operands in phi
|
| - // lowering assignments, so there is no need to reuse any registers
|
| - // here. For phi lowering, we should not ask for new physical
|
| - // registers in general.
|
| - // However, if we do meet Memory Operand during phi lowering, we
|
| - // should not blind or pool the immediates for now.
|
| - if (RegNum != Variable::NoRegister)
|
| - return MemOperand;
|
| - Variable *RegTemp = makeReg(IceType_i32);
|
| - IceString Label;
|
| - llvm::raw_string_ostream Label_stream(Label);
|
| - MemOperand->getOffset()->emitPoolLabel(Label_stream);
|
| - MemOperand->getOffset()->setShouldBePooled(true);
|
| - const RelocOffsetT SymOffset = 0;
|
| - bool SuppressMangling = true;
|
| - Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
|
| - SuppressMangling);
|
| - OperandX8632Mem *SymbolOperand = OperandX8632Mem::create(
|
| - Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
|
| - _mov(RegTemp, SymbolOperand);
|
| - // If we have a base variable here, we should add the lea instruction
|
| - // to add the value of the base variable to RegTemp. If there is no
|
| - // base variable, we won't need this lea instruction.
|
| - if (MemOperand->getBase()) {
|
| - OperandX8632Mem *CalculateOperand = OperandX8632Mem::create(
|
| - Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
|
| - RegTemp, 0, MemOperand->getSegmentRegister());
|
| - _lea(RegTemp, CalculateOperand);
|
| - _set_dest_nonkillable();
|
| - }
|
| - OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
|
| - Func, MemOperand->getType(), RegTemp, nullptr,
|
| - MemOperand->getIndex(), MemOperand->getShift(),
|
| - MemOperand->getSegmentRegister());
|
| - return NewMemOperand;
|
| - }
|
| - assert("Unsupported -randomize-pool-immediates option" && false);
|
| - }
|
| - }
|
| - // the offset is not eligible for blinding or pooling, return the original
|
| - // mem operand
|
| - return MemOperand;
|
| -}
|
| +// Validate the enum values in ICETYPEX8632_TABLE.
|
| +namespace dummy3 {
|
| +// Define a temporary set of enum values based on low-level table
|
| +// entries.
|
| +enum _tmp_enum {
|
| +#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
|
| + ICETYPEX8632_TABLE
|
| +#undef X
|
| + _num
|
| +};
|
| +// Define a set of constants based on high-level table entries.
|
| +#define X(tag, size, align, elts, elty, str) \
|
| + static const int _table1_##tag = tag;
|
| +ICETYPE_TABLE
|
| +#undef X
|
| +// Define a set of constants based on low-level table entries, and
|
| +// ensure the table entry keys are consistent.
|
| +#define X(tag, elementty, cvt, sdss, pack, width, fld) \
|
| + static const int _table2_##tag = _tmp_##tag; \
|
| + static_assert(_table1_##tag == _table2_##tag, \
|
| + "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
|
| +ICETYPEX8632_TABLE
|
| +#undef X
|
| +// Repeat the static asserts with respect to the high-level table
|
| +// entries in case the high-level table has extra entries.
|
| +#define X(tag, size, align, elts, elty, str) \
|
| + static_assert(_table1_##tag == _table2_##tag, \
|
| + "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
|
| +ICETYPE_TABLE
|
| +#undef X
|
| +} // end of namespace dummy3
|
| +} // end of anonymous namespace
|
|
|
| } // end of namespace Ice
|
|
|