Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(45)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index c1ba40429a8f788b78ed600e2a8f731d1f3ac83b..55a0bfc638789fa9a2d49f03783d79762f9b0f5f 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2,5397 +2,165 @@
//
// The Subzero Code Generator
//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TargetLoweringX8632 class, which
-// consists almost entirely of the lowering sequence for each
-// high-level instruction.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/MathExtras.h"
-
-#include "IceCfg.h"
-#include "IceCfgNode.h"
-#include "IceClFlags.h"
-#include "IceDefs.h"
-#include "IceELFObjectWriter.h"
-#include "IceGlobalInits.h"
-#include "IceInstX8632.h"
-#include "IceLiveness.h"
-#include "IceOperand.h"
-#include "IceRegistersX8632.h"
-#include "IceTargetLoweringX8632.def"
-#include "IceTargetLoweringX8632.h"
-#include "IceUtils.h"
-
-namespace Ice {
-
-namespace {
-
-// The following table summarizes the logic for lowering the fcmp
-// instruction. There is one table entry for each of the 16 conditions.
-//
-// The first four columns describe the case when the operands are
-// floating point scalar values. A comment in lowerFcmp() describes the
-// lowering template. In the most general case, there is a compare
-// followed by two conditional branches, because some fcmp conditions
-// don't map to a single x86 conditional branch. However, in many cases
-// it is possible to swap the operands in the comparison and have a
-// single conditional branch. Since it's quite tedious to validate the
-// table by hand, good execution tests are helpful.
-//
-// The last two columns describe the case when the operands are vectors
-// of floating point values. For most fcmp conditions, there is a clear
-// mapping to a single x86 cmpps instruction variant. Some fcmp
-// conditions require special code to handle and these are marked in the
-// table with a Cmpps_Invalid predicate.
-const struct TableFcmp_ {
- uint32_t Default;
- bool SwapScalarOperands;
- CondX86::BrCond C1, C2;
- bool SwapVectorOperands;
- CondX86::CmppsCond Predicate;
-} TableFcmp[] = {
-#define X(val, dflt, swapS, C1, C2, swapV, pred) \
- { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
- ,
- FCMPX8632_TABLE
-#undef X
-};
-const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
-
-// The following table summarizes the logic for lowering the icmp instruction
-// for i32 and narrower types. Each icmp condition has a clear mapping to an
-// x86 conditional branch instruction.
-
-const struct TableIcmp32_ {
- CondX86::BrCond Mapping;
-} TableIcmp32[] = {
-#define X(val, C_32, C1_64, C2_64, C3_64) \
- { CondX86::C_32 } \
- ,
- ICMPX8632_TABLE
-#undef X
-};
-const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
-
-// The following table summarizes the logic for lowering the icmp instruction
-// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
-// conditional branches are needed. For the other conditions, three separate
-// conditional branches are needed.
-const struct TableIcmp64_ {
- CondX86::BrCond C1, C2, C3;
-} TableIcmp64[] = {
-#define X(val, C_32, C1_64, C2_64, C3_64) \
- { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
- ,
- ICMPX8632_TABLE
-#undef X
-};
-const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
-
-CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
- size_t Index = static_cast<size_t>(Cond);
- assert(Index < TableIcmp32Size);
- return TableIcmp32[Index].Mapping;
-}
-
-const struct TableTypeX8632Attributes_ {
- Type InVectorElementType;
-} TableTypeX8632Attributes[] = {
-#define X(tag, elementty, cvt, sdss, pack, width, fld) \
- { elementty } \
- ,
- ICETYPEX8632_TABLE
-#undef X
-};
-const size_t TableTypeX8632AttributesSize =
- llvm::array_lengthof(TableTypeX8632Attributes);
-
-// Return the type which the elements of the vector have in the X86
-// representation of the vector.
-Type getInVectorElementType(Type Ty) {
- assert(isVectorType(Ty));
- size_t Index = static_cast<size_t>(Ty);
- (void)Index;
- assert(Index < TableTypeX8632AttributesSize);
- return TableTypeX8632Attributes[Ty].InVectorElementType;
-}
-
-// The maximum number of arguments to pass in XMM registers
-const uint32_t X86_MAX_XMM_ARGS = 4;
-// The number of bits in a byte
-const uint32_t X86_CHAR_BIT = 8;
-// Stack alignment
-const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
-// Size of the return address on the stack
-const uint32_t X86_RET_IP_SIZE_BYTES = 4;
-// The number of different NOP instructions
-const uint32_t X86_NUM_NOP_VARIANTS = 5;
-
-// Value is in bytes. Return Value adjusted to the next highest multiple
-// of the stack alignment.
-uint32_t applyStackAlignment(uint32_t Value) {
- return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
-}
-
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers. There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted. The following dummy namespaces use
-// static_asserts to ensure everything is kept in sync.
-
-// Validate the enum values in FCMPX8632_TABLE.
-namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
-enum _tmp_enum {
-#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
- FCMPX8632_TABLE
-#undef X
- _num
-};
-// Define a set of constants based on high-level table entries.
-#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
-ICEINSTFCMP_TABLE
-#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
-#define X(val, dflt, swapS, C1, C2, swapV, pred) \
- static const int _table2_##val = _tmp_##val; \
- static_assert( \
- _table1_##val == _table2_##val, \
- "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
-FCMPX8632_TABLE
-#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
-#define X(tag, str) \
- static_assert( \
- _table1_##tag == _table2_##tag, \
- "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
-ICEINSTFCMP_TABLE
-#undef X
-} // end of namespace dummy1
-
-// Validate the enum values in ICMPX8632_TABLE.
-namespace dummy2 {
-// Define a temporary set of enum values based on low-level table
-// entries.
-enum _tmp_enum {
-#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
- ICMPX8632_TABLE
-#undef X
- _num
-};
-// Define a set of constants based on high-level table entries.
-#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
-ICEINSTICMP_TABLE
-#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
-#define X(val, C_32, C1_64, C2_64, C3_64) \
- static const int _table2_##val = _tmp_##val; \
- static_assert( \
- _table1_##val == _table2_##val, \
- "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
-ICMPX8632_TABLE
-#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
-#define X(tag, str) \
- static_assert( \
- _table1_##tag == _table2_##tag, \
- "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
-ICEINSTICMP_TABLE
-#undef X
-} // end of namespace dummy2
-
-// Validate the enum values in ICETYPEX8632_TABLE.
-namespace dummy3 {
-// Define a temporary set of enum values based on low-level table
-// entries.
-enum _tmp_enum {
-#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
- ICETYPEX8632_TABLE
-#undef X
- _num
-};
-// Define a set of constants based on high-level table entries.
-#define X(tag, size, align, elts, elty, str) \
- static const int _table1_##tag = tag;
-ICETYPE_TABLE
-#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
-#define X(tag, elementty, cvt, sdss, pack, width, fld) \
- static const int _table2_##tag = _tmp_##tag; \
- static_assert(_table1_##tag == _table2_##tag, \
- "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
-ICETYPEX8632_TABLE
-#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
-#define X(tag, size, align, elts, elty, str) \
- static_assert(_table1_##tag == _table2_##tag, \
- "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
-ICETYPE_TABLE
-#undef X
-} // end of namespace dummy3
-
-// A helper class to ease the settings of RandomizationPoolingPause
-// to disable constant blinding or pooling for some translation phases.
-class BoolFlagSaver {
- BoolFlagSaver() = delete;
- BoolFlagSaver(const BoolFlagSaver &) = delete;
- BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
-
-public:
- BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
- ~BoolFlagSaver() { Flag = OldValue; }
-
-private:
- const bool OldValue;
- bool &Flag;
-};
-
-} // end of anonymous namespace
-
-BoolFoldingEntry::BoolFoldingEntry(Inst *I)
- : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {}
-
-BoolFolding::BoolFoldingProducerKind
-BoolFolding::getProducerKind(const Inst *Instr) {
- if (llvm::isa<InstIcmp>(Instr)) {
- if (Instr->getSrc(0)->getType() != IceType_i64)
- return PK_Icmp32;
- return PK_None; // TODO(stichnot): actually PK_Icmp64;
- }
- return PK_None; // TODO(stichnot): remove this
-
- if (llvm::isa<InstFcmp>(Instr))
- return PK_Fcmp;
- if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
- switch (Cast->getCastKind()) {
- default:
- return PK_None;
- case InstCast::Trunc:
- return PK_Trunc;
- }
- }
- return PK_None;
-}
-
-BoolFolding::BoolFoldingConsumerKind
-BoolFolding::getConsumerKind(const Inst *Instr) {
- if (llvm::isa<InstBr>(Instr))
- return CK_Br;
- if (llvm::isa<InstSelect>(Instr))
- return CK_Select;
- return CK_None; // TODO(stichnot): remove this
-
- if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {
- switch (Cast->getCastKind()) {
- default:
- return CK_None;
- case InstCast::Sext:
- return CK_Sext;
- case InstCast::Zext:
- return CK_Zext;
- }
- }
- return CK_None;
-}
-
-// Returns true if the producing instruction has a "complex" lowering
-// sequence. This generally means that its lowering sequence requires
-// more than one conditional branch, namely 64-bit integer compares
-// and some floating-point compares. When this is true, and there is
-// more than one consumer, we prefer to disable the folding
-// optimization because it minimizes branches.
-bool BoolFolding::hasComplexLowering(const Inst *Instr) {
- switch (getProducerKind(Instr)) {
- default:
- return false;
- case PK_Icmp64:
- return true;
- case PK_Fcmp:
- return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=
- CondX86::Br_None;
- }
-}
-
-void BoolFolding::init(CfgNode *Node) {
- Producers.clear();
- for (Inst &Instr : Node->getInsts()) {
- // Check whether Instr is a valid producer.
- Variable *Var = Instr.getDest();
- if (!Instr.isDeleted() // only consider non-deleted instructions
- && Var // only instructions with an actual dest var
- && Var->getType() == IceType_i1 // only bool-type dest vars
- && getProducerKind(&Instr) != PK_None) { // white-listed instructions
- Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);
- }
- // Check each src variable against the map.
- for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {
- Operand *Src = Instr.getSrc(I);
- SizeT NumVars = Src->getNumVars();
- for (SizeT J = 0; J < NumVars; ++J) {
- const Variable *Var = Src->getVar(J);
- SizeT VarNum = Var->getIndex();
- if (containsValid(VarNum)) {
- if (I != 0 // All valid consumers use Var as the first source operand
- || getConsumerKind(&Instr) == CK_None // must be white-listed
- || (Producers[VarNum].IsComplex && // complex can't be multi-use
- Producers[VarNum].NumUses > 0)) {
- setInvalid(VarNum);
- continue;
- }
- ++Producers[VarNum].NumUses;
- if (Instr.isLastUse(Var)) {
- Producers[VarNum].IsLiveOut = false;
- }
- }
- }
- }
- }
- for (auto &I : Producers) {
- // Ignore entries previously marked invalid.
- if (I.second.Instr == nullptr)
- continue;
- // Disable the producer if its dest may be live beyond this block.
- if (I.second.IsLiveOut) {
- setInvalid(I.first);
- continue;
- }
- // Mark as "dead" rather than outright deleting. This is so that
- // other peephole style optimizations during or before lowering
- // have access to this instruction in undeleted form. See for
- // example tryOptimizedCmpxchgCmpBr().
- I.second.Instr->setDead();
- }
-}
-
-const Inst *BoolFolding::getProducerFor(const Operand *Opnd) const {
- auto *Var = llvm::dyn_cast<const Variable>(Opnd);
- if (Var == nullptr)
- return nullptr;
- SizeT VarNum = Var->getIndex();
- auto Element = Producers.find(VarNum);
- if (Element == Producers.end())
- return nullptr;
- return Element->second.Instr;
-}
-
-void BoolFolding::dump(const Cfg *Func) const {
- if (!ALLOW_DUMP || !Func->isVerbose(IceV_Folding))
- return;
- OstreamLocker L(Func->getContext());
- Ostream &Str = Func->getContext()->getStrDump();
- for (auto &I : Producers) {
- if (I.second.Instr == nullptr)
- continue;
- Str << "Found foldable producer:\n ";
- I.second.Instr->dump(Func);
- Str << "\n";
- }
-}
-
-void TargetX8632::initNodeForLowering(CfgNode *Node) {
- FoldingInfo.init(Node);
- FoldingInfo.dump(Func);
-}
-
-TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) {
- static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==
- (TargetInstructionSet::X86InstructionSet_End -
- TargetInstructionSet::X86InstructionSet_Begin),
- "X86InstructionSet range different from TargetInstructionSet");
- if (Func->getContext()->getFlags().getTargetInstructionSet() !=
- TargetInstructionSet::BaseInstructionSet) {
- InstructionSet = static_cast<X86InstructionSet>(
- (Func->getContext()->getFlags().getTargetInstructionSet() -
- TargetInstructionSet::X86InstructionSet_Begin) +
- X86InstructionSet::Begin);
- }
- // TODO: Don't initialize IntegerRegisters and friends every time.
- // Instead, initialize in some sort of static initializer for the
- // class.
- llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
- llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
- llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
- llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
- llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
- ScratchRegs.resize(RegX8632::Reg_NUM);
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
- frameptr, isI8, isInt, isFP) \
- IntegerRegisters[RegX8632::val] = isInt; \
- IntegerRegistersI8[RegX8632::val] = isI8; \
- FloatRegisters[RegX8632::val] = isFP; \
- VectorRegisters[RegX8632::val] = isFP; \
- ScratchRegs[RegX8632::val] = scratch;
- REGX8632_TABLE;
-#undef X
- TypeToRegisterSet[IceType_void] = InvalidRegisters;
- TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
- TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
- TypeToRegisterSet[IceType_i16] = IntegerRegisters;
- TypeToRegisterSet[IceType_i32] = IntegerRegisters;
- TypeToRegisterSet[IceType_i64] = IntegerRegisters;
- TypeToRegisterSet[IceType_f32] = FloatRegisters;
- TypeToRegisterSet[IceType_f64] = FloatRegisters;
- TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
- TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
- TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
- TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
- TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
- TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
- TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
-}
-
-void TargetX8632::translateO2() {
- TimerMarker T(TimerStack::TT_O2, Func);
-
- if (!Ctx->getFlags().getPhiEdgeSplit()) {
- // Lower Phi instructions.
- Func->placePhiLoads();
- if (Func->hasError())
- return;
- Func->placePhiStores();
- if (Func->hasError())
- return;
- Func->deletePhis();
- if (Func->hasError())
- return;
- Func->dump("After Phi lowering");
- }
-
- // Address mode optimization.
- Func->getVMetadata()->init(VMK_SingleDefs);
- Func->doAddressOpt();
-
- // Find read-modify-write opportunities. Do this after address mode
- // optimization so that doAddressOpt() doesn't need to be applied to RMW
- // instructions as well.
- findRMW();
- Func->dump("After RMW transform");
-
- // Argument lowering
- Func->doArgLowering();
-
- // Target lowering. This requires liveness analysis for some parts
- // of the lowering decisions, such as compare/branch fusing. If
- // non-lightweight liveness analysis is used, the instructions need
- // to be renumbered first. TODO: This renumbering should only be
- // necessary if we're actually calculating live intervals, which we
- // only do for register allocation.
- Func->renumberInstructions();
- if (Func->hasError())
- return;
-
- // TODO: It should be sufficient to use the fastest liveness
- // calculation, i.e. livenessLightweight(). However, for some
- // reason that slows down the rest of the translation. Investigate.
- Func->liveness(Liveness_Basic);
- if (Func->hasError())
- return;
- Func->dump("After x86 address mode opt");
-
- // Disable constant blinding or pooling for load optimization.
- {
- BoolFlagSaver B(RandomizationPoolingPaused, true);
- doLoadOpt();
- }
- Func->genCode();
- if (Func->hasError())
- return;
- Func->dump("After x86 codegen");
-
- // Register allocation. This requires instruction renumbering and
- // full liveness analysis.
- Func->renumberInstructions();
- if (Func->hasError())
- return;
- Func->liveness(Liveness_Intervals);
- if (Func->hasError())
- return;
- // Validate the live range computations. The expensive validation
- // call is deliberately only made when assertions are enabled.
- assert(Func->validateLiveness());
- // The post-codegen dump is done here, after liveness analysis and
- // associated cleanup, to make the dump cleaner and more useful.
- Func->dump("After initial x8632 codegen");
- Func->getVMetadata()->init(VMK_All);
- regAlloc(RAK_Global);
- if (Func->hasError())
- return;
- Func->dump("After linear scan regalloc");
-
- if (Ctx->getFlags().getPhiEdgeSplit()) {
- // We need to pause constant blinding or pooling during advanced
- // phi lowering, unless the lowering assignment has a physical
- // register for the dest Variable.
- {
- BoolFlagSaver B(RandomizationPoolingPaused, true);
- Func->advancedPhiLowering();
- }
- Func->dump("After advanced Phi lowering");
- }
-
- // Stack frame mapping.
- Func->genFrame();
- if (Func->hasError())
- return;
- Func->dump("After stack frame mapping");
-
- Func->contractEmptyNodes();
- Func->reorderNodes();
-
- // Branch optimization. This needs to be done just before code
- // emission. In particular, no transformations that insert or
- // reorder CfgNodes should be done after branch optimization. We go
- // ahead and do it before nop insertion to reduce the amount of work
- // needed for searching for opportunities.
- Func->doBranchOpt();
- Func->dump("After branch optimization");
-
- // Nop insertion
- if (Ctx->getFlags().shouldDoNopInsertion()) {
- Func->doNopInsertion();
- }
-}
-
-void TargetX8632::translateOm1() {
- TimerMarker T(TimerStack::TT_Om1, Func);
-
- Func->placePhiLoads();
- if (Func->hasError())
- return;
- Func->placePhiStores();
- if (Func->hasError())
- return;
- Func->deletePhis();
- if (Func->hasError())
- return;
- Func->dump("After Phi lowering");
-
- Func->doArgLowering();
-
- Func->genCode();
- if (Func->hasError())
- return;
- Func->dump("After initial x8632 codegen");
-
- regAlloc(RAK_InfOnly);
- if (Func->hasError())
- return;
- Func->dump("After regalloc of infinite-weight variables");
-
- Func->genFrame();
- if (Func->hasError())
- return;
- Func->dump("After stack frame mapping");
-
- // Nop insertion
- if (Ctx->getFlags().shouldDoNopInsertion()) {
- Func->doNopInsertion();
- }
-}
-
-namespace {
-
-bool canRMW(const InstArithmetic *Arith) {
- Type Ty = Arith->getDest()->getType();
- // X86 vector instructions write to a register and have no RMW
- // option.
- if (isVectorType(Ty))
- return false;
- bool isI64 = Ty == IceType_i64;
-
- switch (Arith->getOp()) {
- // Not handled for lack of simple lowering:
- // shift on i64
- // mul, udiv, urem, sdiv, srem, frem
- // Not handled for lack of RMW instructions:
- // fadd, fsub, fmul, fdiv (also vector types)
- default:
- return false;
- case InstArithmetic::Add:
- case InstArithmetic::Sub:
- case InstArithmetic::And:
- case InstArithmetic::Or:
- case InstArithmetic::Xor:
- return true;
- case InstArithmetic::Shl:
- case InstArithmetic::Lshr:
- case InstArithmetic::Ashr:
- return false; // TODO(stichnot): implement
- return !isI64;
- }
-}
-
-bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
- if (A == B)
- return true;
- if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {
- if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {
- return MemA->getBase() == MemB->getBase() &&
- MemA->getOffset() == MemB->getOffset() &&
- MemA->getIndex() == MemB->getIndex() &&
- MemA->getShift() == MemB->getShift() &&
- MemA->getSegmentRegister() == MemB->getSegmentRegister();
- }
- }
- return false;
-}
-
-} // end of anonymous namespace
-
-void TargetX8632::findRMW() {
- Func->dump("Before RMW");
- OstreamLocker L(Func->getContext());
- Ostream &Str = Func->getContext()->getStrDump();
- for (CfgNode *Node : Func->getNodes()) {
- // Walk through the instructions, considering each sequence of 3
- // instructions, and look for the particular RMW pattern. Note that this
- // search can be "broken" (false negatives) if there are intervening deleted
- // instructions, or intervening instructions that could be safely moved out
- // of the way to reveal an RMW pattern.
- auto E = Node->getInsts().end();
- auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
- for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
- // Make I3 skip over deleted instructions.
- while (I3 != E && I3->isDeleted())
- ++I3;
- if (I1 == E || I2 == E || I3 == E)
- continue;
- assert(!I1->isDeleted());
- assert(!I2->isDeleted());
- assert(!I3->isDeleted());
- if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
- if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
- if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
- // Look for:
- // a = Load addr
- // b = <op> a, other
- // Store b, addr
- // Change to:
- // a = Load addr
- // b = <op> a, other
- // x = FakeDef
- // RMW <op>, addr, other, x
- // b = Store b, addr, x
- // Note that inferTwoAddress() makes sure setDestNonKillable() gets
- // called on the updated Store instruction, to avoid liveness
- // problems later.
- //
- // With this transformation, the Store instruction acquires a Dest
- // variable and is now subject to dead code elimination if there are
- // no more uses of "b". Variable "x" is a beacon for determining
- // whether the Store instruction gets dead-code eliminated. If the
- // Store instruction is eliminated, then it must be the case that
- // the RMW instruction ends x's live range, and therefore the RMW
- // instruction will be retained and later lowered. On the other
- // hand, if the RMW instruction does not end x's live range, then
- // the Store instruction must still be present, and therefore the
- // RMW instruction is ignored during lowering because it is
- // redundant with the Store instruction.
- //
- // Note that if "a" has further uses, the RMW transformation may
- // still trigger, resulting in two loads and one store, which is
- // worse than the original one load and one store. However, this is
- // probably rare, and caching probably keeps it just as fast.
- if (!isSameMemAddressOperand(Load->getSourceAddress(),
- Store->getAddr()))
- continue;
- Operand *ArithSrcFromLoad = Arith->getSrc(0);
- Operand *ArithSrcOther = Arith->getSrc(1);
- if (ArithSrcFromLoad != Load->getDest()) {
- if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
- continue;
- std::swap(ArithSrcFromLoad, ArithSrcOther);
- }
- if (Arith->getDest() != Store->getData())
- continue;
- if (!canRMW(Arith))
- continue;
- if (Func->isVerbose(IceV_RMW)) {
- Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
- Load->dump(Func);
- Str << "\n ";
- Arith->dump(Func);
- Str << "\n ";
- Store->dump(Func);
- Str << "\n";
- }
- Variable *Beacon = Func->makeVariable(IceType_i32);
- Beacon->setWeight(0);
- Store->setRmwBeacon(Beacon);
- InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
- Node->getInsts().insert(I3, BeaconDef);
- InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(
- Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
- Node->getInsts().insert(I3, RMW);
- }
- }
- }
- }
- }
-}
-
-namespace {
-
-// Converts a ConstantInteger32 operand into its constant value, or
-// MemoryOrderInvalid if the operand is not a ConstantInteger32.
-uint64_t getConstantMemoryOrder(Operand *Opnd) {
- if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
- return Integer->getValue();
- return Intrinsics::MemoryOrderInvalid;
-}
-
-// Determines whether the dest of a Load instruction can be folded
-// into one of the src operands of a 2-operand instruction. This is
-// true as long as the load dest matches exactly one of the binary
-// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
-// the answer is true.
-bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
- Operand *&Src0, Operand *&Src1) {
- if (Src0 == LoadDest && Src1 != LoadDest) {
- Src0 = LoadSrc;
- return true;
- }
- if (Src0 != LoadDest && Src1 == LoadDest) {
- Src1 = LoadSrc;
- return true;
- }
- return false;
-}
-
-} // end of anonymous namespace
-
-void TargetX8632::doLoadOpt() {
- for (CfgNode *Node : Func->getNodes()) {
- Context.init(Node);
- while (!Context.atEnd()) {
- Variable *LoadDest = nullptr;
- Operand *LoadSrc = nullptr;
- Inst *CurInst = Context.getCur();
- Inst *Next = Context.getNextInst();
- // Determine whether the current instruction is a Load
- // instruction or equivalent.
- if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
- // An InstLoad always qualifies.
- LoadDest = Load->getDest();
- const bool DoLegalize = false;
- LoadSrc = formMemoryOperand(Load->getSourceAddress(),
- LoadDest->getType(), DoLegalize);
- } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
- // An AtomicLoad intrinsic qualifies as long as it has a valid
- // memory ordering, and can be implemented in a single
- // instruction (i.e., not i64).
- Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
- if (ID == Intrinsics::AtomicLoad &&
- Intrin->getDest()->getType() != IceType_i64 &&
- Intrinsics::isMemoryOrderValid(
- ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
- LoadDest = Intrin->getDest();
- const bool DoLegalize = false;
- LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
- DoLegalize);
- }
- }
- // A Load instruction can be folded into the following
- // instruction only if the following instruction ends the Load's
- // Dest variable's live range.
- if (LoadDest && Next && Next->isLastUse(LoadDest)) {
- assert(LoadSrc);
- Inst *NewInst = nullptr;
- if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
- Operand *Src0 = Arith->getSrc(0);
- Operand *Src1 = Arith->getSrc(1);
- if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
- NewInst = InstArithmetic::create(Func, Arith->getOp(),
- Arith->getDest(), Src0, Src1);
- }
- } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {
- Operand *Src0 = Icmp->getSrc(0);
- Operand *Src1 = Icmp->getSrc(1);
- if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
- NewInst = InstIcmp::create(Func, Icmp->getCondition(),
- Icmp->getDest(), Src0, Src1);
- }
- } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {
- Operand *Src0 = Fcmp->getSrc(0);
- Operand *Src1 = Fcmp->getSrc(1);
- if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
- NewInst = InstFcmp::create(Func, Fcmp->getCondition(),
- Fcmp->getDest(), Src0, Src1);
- }
- } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
- Operand *Src0 = Select->getTrueOperand();
- Operand *Src1 = Select->getFalseOperand();
- if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
- NewInst = InstSelect::create(Func, Select->getDest(),
- Select->getCondition(), Src0, Src1);
- }
- } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
- // The load dest can always be folded into a Cast
- // instruction.
- Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
- if (Src0 == LoadDest) {
- NewInst = InstCast::create(Func, Cast->getCastKind(),
- Cast->getDest(), LoadSrc);
- }
- }
- if (NewInst) {
- CurInst->setDeleted();
- Next->setDeleted();
- Context.insert(NewInst);
- // Update NewInst->LiveRangesEnded so that target lowering
- // may benefit. Also update NewInst->HasSideEffects.
- NewInst->spliceLivenessInfo(Next, CurInst);
- }
- }
- Context.advanceCur();
- Context.advanceNext();
- }
- }
- Func->dump("After load optimization");
-}
-
-bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
- if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
- return Br->optimizeBranch(NextNode);
- }
- return false;
-}
-
-IceString TargetX8632::RegNames[] = {
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
- frameptr, isI8, isInt, isFP) \
- name,
- REGX8632_TABLE
-#undef X
-};
-
-Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {
- if (Ty == IceType_void)
- Ty = IceType_i32;
- if (PhysicalRegisters[Ty].empty())
- PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);
- assert(RegNum < PhysicalRegisters[Ty].size());
- Variable *Reg = PhysicalRegisters[Ty][RegNum];
- if (Reg == nullptr) {
- Reg = Func->makeVariable(Ty);
- Reg->setRegNum(RegNum);
- PhysicalRegisters[Ty][RegNum] = Reg;
- // Specially mark esp as an "argument" so that it is considered
- // live upon function entry.
- if (RegNum == RegX8632::Reg_esp) {
- Func->addImplicitArg(Reg);
- Reg->setIgnoreLiveness();
- }
- }
- return Reg;
-}
-
-IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
- assert(RegNum < RegX8632::Reg_NUM);
- static IceString RegNames8[] = {
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
- frameptr, isI8, isInt, isFP) \
- name8,
- REGX8632_TABLE
-#undef X
- };
- static IceString RegNames16[] = {
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
- frameptr, isI8, isInt, isFP) \
- name16,
- REGX8632_TABLE
-#undef X
- };
- switch (Ty) {
- case IceType_i1:
- case IceType_i8:
- return RegNames8[RegNum];
- case IceType_i16:
- return RegNames16[RegNum];
- default:
- return RegNames[RegNum];
- }
-}
-
-void TargetX8632::emitVariable(const Variable *Var) const {
- Ostream &Str = Ctx->getStrEmit();
- if (Var->hasReg()) {
- Str << "%" << getRegName(Var->getRegNum(), Var->getType());
- return;
- }
- if (Var->getWeight().isInf()) {
- llvm_unreachable("Infinite-weight Variable has no register assigned");
- }
- int32_t Offset = Var->getStackOffset();
- if (!hasFramePointer())
- Offset += getStackAdjustment();
- if (Offset)
- Str << Offset;
- const Type FrameSPTy = IceType_i32;
- Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";
-}
-
-X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {
- if (Var->hasReg())
- llvm_unreachable("Stack Variable has a register assigned");
- if (Var->getWeight().isInf()) {
- llvm_unreachable("Infinite-weight Variable has no register assigned");
- }
- int32_t Offset = Var->getStackOffset();
- if (!hasFramePointer())
- Offset += getStackAdjustment();
- return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);
-}
-
-void TargetX8632::lowerArguments() {
- VarList &Args = Func->getArgs();
- // The first four arguments of vector type, regardless of their
- // position relative to the other arguments in the argument list, are
- // passed in registers xmm0 - xmm3.
- unsigned NumXmmArgs = 0;
-
- Context.init(Func->getEntryNode());
- Context.setInsertPoint(Context.getCur());
-
- for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
- ++I) {
- Variable *Arg = Args[I];
- Type Ty = Arg->getType();
- if (!isVectorType(Ty))
- continue;
- // Replace Arg in the argument list with the home register. Then
- // generate an instruction in the prolog to copy the home register
- // to the assigned location of Arg.
- int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
- ++NumXmmArgs;
- Variable *RegisterArg = Func->makeVariable(Ty);
- if (ALLOW_DUMP)
- RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
- RegisterArg->setRegNum(RegNum);
- RegisterArg->setIsArg();
- Arg->setIsArg(false);
-
- Args[I] = RegisterArg;
- Context.insert(InstAssign::create(Func, Arg, RegisterArg));
- }
-}
-
-// Helper function for addProlog().
-//
-// This assumes Arg is an argument passed on the stack. This sets the
-// frame offset for Arg and updates InArgsSizeBytes according to Arg's
-// width. For an I64 arg that has been split into Lo and Hi components,
-// it calls itself recursively on the components, taking care to handle
-// Lo first because of the little-endian architecture. Lastly, this
-// function generates an instruction to copy Arg into its assigned
-// register if applicable.
-void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
- size_t BasicFrameOffset,
- size_t &InArgsSizeBytes) {
- Variable *Lo = Arg->getLo();
- Variable *Hi = Arg->getHi();
- Type Ty = Arg->getType();
- if (Lo && Hi && Ty == IceType_i64) {
- assert(Lo->getType() != IceType_i64); // don't want infinite recursion
- assert(Hi->getType() != IceType_i64); // don't want infinite recursion
- finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
- finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
- return;
- }
- if (isVectorType(Ty)) {
- InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
- }
- Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
- InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
- if (Arg->hasReg()) {
- assert(Ty != IceType_i64);
- OperandX8632Mem *Mem = OperandX8632Mem::create(
- Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
- if (isVectorType(Arg->getType())) {
- _movp(Arg, Mem);
- } else {
- _mov(Arg, Mem);
- }
- // This argument-copying instruction uses an explicit
- // OperandX8632Mem operand instead of a Variable, so its
- // fill-from-stack operation has to be tracked separately for
- // statistics.
- Ctx->statsUpdateFills();
- }
-}
-
-Type TargetX8632::stackSlotType() { return IceType_i32; }
-
-void TargetX8632::addProlog(CfgNode *Node) {
- // Stack frame layout:
- //
- // +------------------------+
- // | 1. return address |
- // +------------------------+
- // | 2. preserved registers |
- // +------------------------+
- // | 3. padding |
- // +------------------------+
- // | 4. global spill area |
- // +------------------------+
- // | 5. padding |
- // +------------------------+
- // | 6. local spill area |
- // +------------------------+
- // | 7. padding |
- // +------------------------+
- // | 8. allocas |
- // +------------------------+
- //
- // The following variables record the size in bytes of the given areas:
- // * X86_RET_IP_SIZE_BYTES: area 1
- // * PreservedRegsSizeBytes: area 2
- // * SpillAreaPaddingBytes: area 3
- // * GlobalsSize: area 4
- // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
- // * LocalsSpillAreaSize: area 6
- // * SpillAreaSizeBytes: areas 3 - 7
-
- // Determine stack frame offsets for each Variable without a
- // register assignment. This can be done as one variable per stack
- // slot. Or, do coalescing by running the register allocator again
- // with an infinite set of registers (as a side effect, this gives
- // variables a second chance at physical register assignment).
- //
- // A middle ground approach is to leverage sparsity and allocate one
- // block of space on the frame for globals (variables with
- // multi-block lifetime), and one block to share for locals
- // (single-block lifetime).
-
- Context.init(Node);
- Context.setInsertPoint(Context.getCur());
-
- llvm::SmallBitVector CalleeSaves =
- getRegisterSet(RegSet_CalleeSave, RegSet_None);
- RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
- VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;
- size_t GlobalsSize = 0;
- // If there is a separate locals area, this represents that area.
- // Otherwise it counts any variable not counted by GlobalsSize.
- SpillAreaSizeBytes = 0;
- // If there is a separate locals area, this specifies the alignment
- // for it.
- uint32_t LocalsSlotsAlignmentBytes = 0;
- // The entire spill locations area gets aligned to largest natural
- // alignment of the variables that have a spill slot.
- uint32_t SpillAreaAlignmentBytes = 0;
- // A spill slot linked to a variable with a stack slot should reuse
- // that stack slot.
- std::function<bool(Variable *)> TargetVarHook =
- [&VariablesLinkedToSpillSlots](Variable *Var) {
- if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {
- assert(Var->getWeight().isZero());
- if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
- VariablesLinkedToSpillSlots.push_back(Var);
- return true;
- }
- }
- return false;
- };
-
- // Compute the list of spilled variables and bounds for GlobalsSize, etc.
- getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
- &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
- &LocalsSlotsAlignmentBytes, TargetVarHook);
- uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
- SpillAreaSizeBytes += GlobalsSize;
-
- // Add push instructions for preserved registers.
- uint32_t NumCallee = 0;
- size_t PreservedRegsSizeBytes = 0;
- for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
- if (CalleeSaves[i] && RegsUsed[i]) {
- ++NumCallee;
- PreservedRegsSizeBytes += 4;
- _push(getPhysicalRegister(i));
- }
- }
- Ctx->statsUpdateRegistersSaved(NumCallee);
-
- // Generate "push ebp; mov ebp, esp"
- if (IsEbpBasedFrame) {
- assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
- .count() == 0);
- PreservedRegsSizeBytes += 4;
- Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
- Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
- _push(ebp);
- _mov(ebp, esp);
- // Keep ebp live for late-stage liveness analysis
- // (e.g. asm-verbose mode).
- Context.insert(InstFakeUse::create(Func, ebp));
- }
-
- // Align the variables area. SpillAreaPaddingBytes is the size of
- // the region after the preserved registers and before the spill areas.
- // LocalsSlotsPaddingBytes is the amount of padding between the globals
- // and locals area if they are separate.
- assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
- assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
- uint32_t SpillAreaPaddingBytes = 0;
- uint32_t LocalsSlotsPaddingBytes = 0;
- alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,
- SpillAreaAlignmentBytes, GlobalsSize,
- LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,
- &LocalsSlotsPaddingBytes);
- SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
- uint32_t GlobalsAndSubsequentPaddingSize =
- GlobalsSize + LocalsSlotsPaddingBytes;
-
- // Align esp if necessary.
- if (NeedsStackAlignment) {
- uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
- uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
- SpillAreaSizeBytes = StackSize - StackOffset;
- }
-
- // Generate "sub esp, SpillAreaSizeBytes"
- if (SpillAreaSizeBytes)
- _sub(getPhysicalRegister(RegX8632::Reg_esp),
- Ctx->getConstantInt32(SpillAreaSizeBytes));
- Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
-
- resetStackAdjustment();
-
- // Fill in stack offsets for stack args, and copy args into registers
- // for those that were register-allocated. Args are pushed right to
- // left, so Arg[0] is closest to the stack/frame pointer.
- Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
- size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
- if (!IsEbpBasedFrame)
- BasicFrameOffset += SpillAreaSizeBytes;
-
- const VarList &Args = Func->getArgs();
- size_t InArgsSizeBytes = 0;
- unsigned NumXmmArgs = 0;
- for (Variable *Arg : Args) {
- // Skip arguments passed in registers.
- if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
- ++NumXmmArgs;
- continue;
- }
- finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
- }
-
- // Fill in stack offsets for locals.
- assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
- SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
- IsEbpBasedFrame);
- // Assign stack offsets to variables that have been linked to spilled
- // variables.
- for (Variable *Var : VariablesLinkedToSpillSlots) {
- Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();
- Var->setStackOffset(Linked->getStackOffset());
- }
- this->HasComputedFrame = true;
-
- if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
- OstreamLocker L(Func->getContext());
- Ostream &Str = Func->getContext()->getStrDump();
-
- Str << "Stack layout:\n";
- uint32_t EspAdjustmentPaddingSize =
- SpillAreaSizeBytes - LocalsSpillAreaSize -
- GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
- Str << " in-args = " << InArgsSizeBytes << " bytes\n"
- << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
- << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
- << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
- << " globals spill area = " << GlobalsSize << " bytes\n"
- << " globals-locals spill areas intermediate padding = "
- << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
- << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
- << " esp alignment padding = " << EspAdjustmentPaddingSize
- << " bytes\n";
-
- Str << "Stack details:\n"
- << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"
- << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
- << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
- << " bytes\n"
- << " is ebp based = " << IsEbpBasedFrame << "\n";
- }
-}
-
-void TargetX8632::addEpilog(CfgNode *Node) {
- InstList &Insts = Node->getInsts();
- InstList::reverse_iterator RI, E;
- for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
- if (llvm::isa<InstX8632Ret>(*RI))
- break;
- }
- if (RI == E)
- return;
-
- // Convert the reverse_iterator position into its corresponding
- // (forward) iterator position.
- InstList::iterator InsertPoint = RI.base();
- --InsertPoint;
- Context.init(Node);
- Context.setInsertPoint(InsertPoint);
-
- Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
- if (IsEbpBasedFrame) {
- Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
- // For late-stage liveness analysis (e.g. asm-verbose mode),
- // adding a fake use of esp before the assignment of esp=ebp keeps
- // previous esp adjustments from being dead-code eliminated.
- Context.insert(InstFakeUse::create(Func, esp));
- _mov(esp, ebp);
- _pop(ebp);
- } else {
- // add esp, SpillAreaSizeBytes
- if (SpillAreaSizeBytes)
- _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));
- }
-
- // Add pop instructions for preserved registers.
- llvm::SmallBitVector CalleeSaves =
- getRegisterSet(RegSet_CalleeSave, RegSet_None);
- for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
- SizeT j = CalleeSaves.size() - i - 1;
- if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
- continue;
- if (CalleeSaves[j] && RegsUsed[j]) {
- _pop(getPhysicalRegister(j));
- }
- }
-
- if (!Ctx->getFlags().getUseSandboxing())
- return;
- // Change the original ret instruction into a sandboxed return sequence.
- // t:ecx = pop
- // bundle_lock
- // and t, ~31
- // jmp *t
- // bundle_unlock
- // FakeUse <original_ret_operand>
- const SizeT BundleSize = 1
- << Func->getAssembler<>()->getBundleAlignLog2Bytes();
- Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
- _pop(T_ecx);
- _bundle_lock();
- _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
- _jmp(T_ecx);
- _bundle_unlock();
- if (RI->getSrcSize()) {
- Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
- Context.insert(InstFakeUse::create(Func, RetValue));
- }
- RI->setDeleted();
-}
-
-void TargetX8632::split64(Variable *Var) {
- switch (Var->getType()) {
- default:
- return;
- case IceType_i64:
- // TODO: Only consider F64 if we need to push each half when
- // passing as an argument to a function call. Note that each half
- // is still typed as I32.
- case IceType_f64:
- break;
- }
- Variable *Lo = Var->getLo();
- Variable *Hi = Var->getHi();
- if (Lo) {
- assert(Hi);
- return;
- }
- assert(Hi == nullptr);
- Lo = Func->makeVariable(IceType_i32);
- Hi = Func->makeVariable(IceType_i32);
- if (ALLOW_DUMP) {
- Lo->setName(Func, Var->getName(Func) + "__lo");
- Hi->setName(Func, Var->getName(Func) + "__hi");
- }
- Var->setLoHi(Lo, Hi);
- if (Var->getIsArg()) {
- Lo->setIsArg();
- Hi->setIsArg();
- }
-}
-
-Operand *TargetX8632::loOperand(Operand *Operand) {
- assert(Operand->getType() == IceType_i64 ||
- Operand->getType() == IceType_f64);
- if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
- return Operand;
- if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
- split64(Var);
- return Var->getLo();
- }
- if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
- ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
- Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
- return legalize(ConstInt);
- }
- if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
- OperandX8632Mem *MemOperand = OperandX8632Mem::create(
- Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
- Mem->getShift(), Mem->getSegmentRegister());
- // Test if we should randomize or pool the offset, if so randomize it or
- // pool it then create mem operand with the blinded/pooled constant.
- // Otherwise, return the mem operand as ordinary mem operand.
- return legalize(MemOperand);
- }
- llvm_unreachable("Unsupported operand type");
- return nullptr;
-}
-
-Operand *TargetX8632::hiOperand(Operand *Operand) {
- assert(Operand->getType() == IceType_i64 ||
- Operand->getType() == IceType_f64);
- if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
- return Operand;
- if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
- split64(Var);
- return Var->getHi();
- }
- if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
- ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
- Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
- // check if we need to blind/pool the constant
- return legalize(ConstInt);
- }
- if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
- Constant *Offset = Mem->getOffset();
- if (Offset == nullptr) {
- Offset = Ctx->getConstantInt32(4);
- } else if (ConstantInteger32 *IntOffset =
- llvm::dyn_cast<ConstantInteger32>(Offset)) {
- Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
- } else if (ConstantRelocatable *SymOffset =
- llvm::dyn_cast<ConstantRelocatable>(Offset)) {
- assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
- Offset =
- Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
- SymOffset->getSuppressMangling());
- }
- OperandX8632Mem *MemOperand = OperandX8632Mem::create(
- Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
- Mem->getShift(), Mem->getSegmentRegister());
- // Test if the Offset is an eligible i32 constants for randomization and
- // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
- // operand.
- return legalize(MemOperand);
- }
- llvm_unreachable("Unsupported operand type");
- return nullptr;
-}
-
-llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
- RegSetMask Exclude) const {
- llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
-
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
- frameptr, isI8, isInt, isFP) \
- if (scratch && (Include & RegSet_CallerSave)) \
- Registers[RegX8632::val] = true; \
- if (preserved && (Include & RegSet_CalleeSave)) \
- Registers[RegX8632::val] = true; \
- if (stackptr && (Include & RegSet_StackPointer)) \
- Registers[RegX8632::val] = true; \
- if (frameptr && (Include & RegSet_FramePointer)) \
- Registers[RegX8632::val] = true; \
- if (scratch && (Exclude & RegSet_CallerSave)) \
- Registers[RegX8632::val] = false; \
- if (preserved && (Exclude & RegSet_CalleeSave)) \
- Registers[RegX8632::val] = false; \
- if (stackptr && (Exclude & RegSet_StackPointer)) \
- Registers[RegX8632::val] = false; \
- if (frameptr && (Exclude & RegSet_FramePointer)) \
- Registers[RegX8632::val] = false;
-
- REGX8632_TABLE
-
-#undef X
-
- return Registers;
-}
-
-void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
- IsEbpBasedFrame = true;
- // Conservatively require the stack to be aligned. Some stack
- // adjustment operations implemented below assume that the stack is
- // aligned before the alloca. All the alloca code ensures that the
- // stack alignment is preserved after the alloca. The stack alignment
- // restriction can be relaxed in some cases.
- NeedsStackAlignment = true;
-
- // TODO(stichnot): minimize the number of adjustments of esp, etc.
- Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
- Operand *TotalSize = legalize(Inst->getSizeInBytes());
- Variable *Dest = Inst->getDest();
- uint32_t AlignmentParam = Inst->getAlignInBytes();
- // For default align=0, set it to the real value 1, to avoid any
- // bit-manipulation problems below.
- AlignmentParam = std::max(AlignmentParam, 1u);
-
- // LLVM enforces power of 2 alignment.
- assert(llvm::isPowerOf2_32(AlignmentParam));
- assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES));
-
- uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
- if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
- _and(esp, Ctx->getConstantInt32(-Alignment));
- }
- if (const auto *ConstantTotalSize =
- llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
- uint32_t Value = ConstantTotalSize->getValue();
- Value = Utils::applyAlignment(Value, Alignment);
- _sub(esp, Ctx->getConstantInt32(Value));
- } else {
- // Non-constant sizes need to be adjusted to the next highest
- // multiple of the required alignment at runtime.
- Variable *T = makeReg(IceType_i32);
- _mov(T, TotalSize);
- _add(T, Ctx->getConstantInt32(Alignment - 1));
- _and(T, Ctx->getConstantInt32(-Alignment));
- _sub(esp, T);
- }
- _mov(Dest, esp);
-}
-
-// Strength-reduce scalar integer multiplication by a constant (for
-// i32 or narrower) for certain constants. The lea instruction can be
-// used to multiply by 3, 5, or 9, and the lsh instruction can be used
-// to multiply by powers of 2. These can be combined such that
-// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
-// combined with left-shifting by 2.
-bool TargetX8632::optimizeScalarMul(Variable *Dest, Operand *Src0,
- int32_t Src1) {
- // Disable this optimization for Om1 and O0, just to keep things
- // simple there.
- if (Ctx->getFlags().getOptLevel() < Opt_1)
- return false;
- Type Ty = Dest->getType();
- Variable *T = nullptr;
- if (Src1 == -1) {
- _mov(T, Src0);
- _neg(T);
- _mov(Dest, T);
- return true;
- }
- if (Src1 == 0) {
- _mov(Dest, Ctx->getConstantZero(Ty));
- return true;
- }
- if (Src1 == 1) {
- _mov(T, Src0);
- _mov(Dest, T);
- return true;
- }
- // Don't bother with the edge case where Src1 == MININT.
- if (Src1 == -Src1)
- return false;
- const bool Src1IsNegative = Src1 < 0;
- if (Src1IsNegative)
- Src1 = -Src1;
- uint32_t Count9 = 0;
- uint32_t Count5 = 0;
- uint32_t Count3 = 0;
- uint32_t Count2 = 0;
- uint32_t CountOps = 0;
- while (Src1 > 1) {
- if (Src1 % 9 == 0) {
- ++CountOps;
- ++Count9;
- Src1 /= 9;
- } else if (Src1 % 5 == 0) {
- ++CountOps;
- ++Count5;
- Src1 /= 5;
- } else if (Src1 % 3 == 0) {
- ++CountOps;
- ++Count3;
- Src1 /= 3;
- } else if (Src1 % 2 == 0) {
- if (Count2 == 0)
- ++CountOps;
- ++Count2;
- Src1 /= 2;
- } else {
- return false;
- }
- }
- // Lea optimization only works for i16 and i32 types, not i8.
- if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
- return false;
- // Limit the number of lea/shl operations for a single multiply, to
- // a somewhat arbitrary choice of 3.
- const uint32_t MaxOpsForOptimizedMul = 3;
- if (CountOps > MaxOpsForOptimizedMul)
- return false;
- _mov(T, Src0);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- for (uint32_t i = 0; i < Count9; ++i) {
- const uint16_t Shift = 3; // log2(9-1)
- _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
- _set_dest_nonkillable();
- }
- for (uint32_t i = 0; i < Count5; ++i) {
- const uint16_t Shift = 2; // log2(5-1)
- _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
- _set_dest_nonkillable();
- }
- for (uint32_t i = 0; i < Count3; ++i) {
- const uint16_t Shift = 1; // log2(3-1)
- _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
- _set_dest_nonkillable();
- }
- if (Count2) {
- _shl(T, Ctx->getConstantInt(Ty, Count2));
- }
- if (Src1IsNegative)
- _neg(T);
- _mov(Dest, T);
- return true;
-}
-
-void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
- Variable *Dest = Inst->getDest();
- Operand *Src0 = legalize(Inst->getSrc(0));
- Operand *Src1 = legalize(Inst->getSrc(1));
- if (Inst->isCommutative()) {
- if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
- std::swap(Src0, Src1);
- if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
- std::swap(Src0, Src1);
- }
- if (Dest->getType() == IceType_i64) {
- // These helper-call-involved instructions are lowered in this
- // separate switch. This is because loOperand() and hiOperand()
- // may insert redundant instructions for constant blinding and
- // pooling. Such redundant instructions will fail liveness analysis
- // under -Om1 setting. And, actually these arguments do not need
- // to be processed with loOperand() and hiOperand() to be used.
- switch (Inst->getOp()) {
- case InstArithmetic::Udiv: {
- const SizeT MaxSrcs = 2;
- InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
- Call->addArg(Inst->getSrc(0));
- Call->addArg(Inst->getSrc(1));
- lowerCall(Call);
- return;
- }
- case InstArithmetic::Sdiv: {
- const SizeT MaxSrcs = 2;
- InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);
- Call->addArg(Inst->getSrc(0));
- Call->addArg(Inst->getSrc(1));
- lowerCall(Call);
- return;
- }
- case InstArithmetic::Urem: {
- const SizeT MaxSrcs = 2;
- InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);
- Call->addArg(Inst->getSrc(0));
- Call->addArg(Inst->getSrc(1));
- lowerCall(Call);
- return;
- }
- case InstArithmetic::Srem: {
- const SizeT MaxSrcs = 2;
- InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);
- Call->addArg(Inst->getSrc(0));
- Call->addArg(Inst->getSrc(1));
- lowerCall(Call);
- return;
- }
- default:
- break;
- }
-
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Operand *Src0Lo = loOperand(Src0);
- Operand *Src0Hi = hiOperand(Src0);
- Operand *Src1Lo = loOperand(Src1);
- Operand *Src1Hi = hiOperand(Src1);
- Variable *T_Lo = nullptr, *T_Hi = nullptr;
- switch (Inst->getOp()) {
- case InstArithmetic::_num:
- llvm_unreachable("Unknown arithmetic operator");
- break;
- case InstArithmetic::Add:
- _mov(T_Lo, Src0Lo);
- _add(T_Lo, Src1Lo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, Src0Hi);
- _adc(T_Hi, Src1Hi);
- _mov(DestHi, T_Hi);
- break;
- case InstArithmetic::And:
- _mov(T_Lo, Src0Lo);
- _and(T_Lo, Src1Lo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, Src0Hi);
- _and(T_Hi, Src1Hi);
- _mov(DestHi, T_Hi);
- break;
- case InstArithmetic::Or:
- _mov(T_Lo, Src0Lo);
- _or(T_Lo, Src1Lo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, Src0Hi);
- _or(T_Hi, Src1Hi);
- _mov(DestHi, T_Hi);
- break;
- case InstArithmetic::Xor:
- _mov(T_Lo, Src0Lo);
- _xor(T_Lo, Src1Lo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, Src0Hi);
- _xor(T_Hi, Src1Hi);
- _mov(DestHi, T_Hi);
- break;
- case InstArithmetic::Sub:
- _mov(T_Lo, Src0Lo);
- _sub(T_Lo, Src1Lo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, Src0Hi);
- _sbb(T_Hi, Src1Hi);
- _mov(DestHi, T_Hi);
- break;
- case InstArithmetic::Mul: {
- Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
- Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
- // gcc does the following:
- // a=b*c ==>
- // t1 = b.hi; t1 *=(imul) c.lo
- // t2 = c.hi; t2 *=(imul) b.lo
- // t3:eax = b.lo
- // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
- // a.lo = t4.lo
- // t4.hi += t1
- // t4.hi += t2
- // a.hi = t4.hi
- // The mul instruction cannot take an immediate operand.
- Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
- _mov(T_1, Src0Hi);
- _imul(T_1, Src1Lo);
- _mov(T_2, Src1Hi);
- _imul(T_2, Src0Lo);
- _mov(T_3, Src0Lo, RegX8632::Reg_eax);
- _mul(T_4Lo, T_3, Src1Lo);
- // The mul instruction produces two dest variables, edx:eax. We
- // create a fake definition of edx to account for this.
- Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
- _mov(DestLo, T_4Lo);
- _add(T_4Hi, T_1);
- _add(T_4Hi, T_2);
- _mov(DestHi, T_4Hi);
- } break;
- case InstArithmetic::Shl: {
- // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
- // gcc does the following:
- // a=b<<c ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t3 = shld t3, t2, t1
- // t2 = shl t2, t1
- // test t1, 0x20
- // je L1
- // use(t3)
- // t3 = t2
- // t2 = 0
- // L1:
- // a.lo = t2
- // a.hi = t3
- Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shld(T_3, T_2, T_1);
- _shl(T_2, T_1);
- _test(T_1, BitTest);
- _br(CondX86::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the
- // intra-block control flow, so we need the _mov_nonkillable
- // variant to avoid liveness problems.
- _mov_nonkillable(T_3, T_2);
- _mov_nonkillable(T_2, Zero);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
- } break;
- case InstArithmetic::Lshr: {
- // a=b>>c (unsigned) ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t2 = shrd t2, t3, t1
- // t3 = shr t3, t1
- // test t1, 0x20
- // je L1
- // use(t2)
- // t2 = t3
- // t3 = 0
- // L1:
- // a.lo = t2
- // a.hi = t3
- Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shrd(T_2, T_3, T_1);
- _shr(T_3, T_1);
- _test(T_1, BitTest);
- _br(CondX86::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the
- // intra-block control flow, so we need the _mov_nonkillable
- // variant to avoid liveness problems.
- _mov_nonkillable(T_2, T_3);
- _mov_nonkillable(T_3, Zero);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
- } break;
- case InstArithmetic::Ashr: {
- // a=b>>c (signed) ==>
- // t1:ecx = c.lo & 0xff
- // t2 = b.lo
- // t3 = b.hi
- // t2 = shrd t2, t3, t1
- // t3 = sar t3, t1
- // test t1, 0x20
- // je L1
- // use(t2)
- // t2 = t3
- // t3 = sar t3, 0x1f
- // L1:
- // a.lo = t2
- // a.hi = t3
- Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
- Constant *BitTest = Ctx->getConstantInt32(0x20);
- Constant *SignExtend = Ctx->getConstantInt32(0x1f);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
- _mov(T_2, Src0Lo);
- _mov(T_3, Src0Hi);
- _shrd(T_2, T_3, T_1);
- _sar(T_3, T_1);
- _test(T_1, BitTest);
- _br(CondX86::Br_e, Label);
- // T_2 and T_3 are being assigned again because of the
- // intra-block control flow, so T_2 needs the _mov_nonkillable
- // variant to avoid liveness problems. T_3 doesn't need special
- // treatment because it is reassigned via _sar instead of _mov.
- _mov_nonkillable(T_2, T_3);
- _sar(T_3, SignExtend);
- Context.insert(Label);
- _mov(DestLo, T_2);
- _mov(DestHi, T_3);
- } break;
- case InstArithmetic::Fadd:
- case InstArithmetic::Fsub:
- case InstArithmetic::Fmul:
- case InstArithmetic::Fdiv:
- case InstArithmetic::Frem:
- llvm_unreachable("FP instruction with i64 type");
- break;
- case InstArithmetic::Udiv:
- case InstArithmetic::Sdiv:
- case InstArithmetic::Urem:
- case InstArithmetic::Srem:
- llvm_unreachable("Call-helper-involved instruction for i64 type \
- should have already been handled before");
- break;
- }
- return;
- }
- if (isVectorType(Dest->getType())) {
- // TODO: Trap on integer divide and integer modulo by zero.
- // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
- if (llvm::isa<OperandX8632Mem>(Src1))
- Src1 = legalizeToVar(Src1);
- switch (Inst->getOp()) {
- case InstArithmetic::_num:
- llvm_unreachable("Unknown arithmetic operator");
- break;
- case InstArithmetic::Add: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _padd(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::And: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _pand(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Or: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _por(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Xor: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _pxor(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Sub: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _psub(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Mul: {
- bool TypesAreValidForPmull =
- Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
- bool InstructionSetIsValidForPmull =
- Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
- if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _pmull(T, Src1);
- _movp(Dest, T);
- } else if (Dest->getType() == IceType_v4i32) {
- // Lowering sequence:
- // Note: The mask arguments have index 0 on the left.
- //
- // movups T1, Src0
- // pshufd T2, Src0, {1,0,3,0}
- // pshufd T3, Src1, {1,0,3,0}
- // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
- // pmuludq T1, Src1
- // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
- // pmuludq T2, T3
- // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
- // shufps T1, T2, {0,2,0,2}
- // pshufd T4, T1, {0,2,1,3}
- // movups Dest, T4
-
- // Mask that directs pshufd to create a vector with entries
- // Src[1, 0, 3, 0]
- const unsigned Constant1030 = 0x31;
- Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
- // Mask that directs shufps to create a vector with entries
- // Dest[0, 2], Src[0, 2]
- const unsigned Mask0202 = 0x88;
- // Mask that directs pshufd to create a vector with entries
- // Src[0, 2, 1, 3]
- const unsigned Mask0213 = 0xd8;
- Variable *T1 = makeReg(IceType_v4i32);
- Variable *T2 = makeReg(IceType_v4i32);
- Variable *T3 = makeReg(IceType_v4i32);
- Variable *T4 = makeReg(IceType_v4i32);
- _movp(T1, Src0);
- _pshufd(T2, Src0, Mask1030);
- _pshufd(T3, Src1, Mask1030);
- _pmuludq(T1, Src1);
- _pmuludq(T2, T3);
- _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));
- _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));
- _movp(Dest, T4);
- } else {
- assert(Dest->getType() == IceType_v16i8);
- scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
- }
- } break;
- case InstArithmetic::Shl:
- case InstArithmetic::Lshr:
- case InstArithmetic::Ashr:
- case InstArithmetic::Udiv:
- case InstArithmetic::Urem:
- case InstArithmetic::Sdiv:
- case InstArithmetic::Srem:
- scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
- break;
- case InstArithmetic::Fadd: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _addps(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Fsub: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _subps(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Fmul: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _mulps(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Fdiv: {
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0);
- _divps(T, Src1);
- _movp(Dest, T);
- } break;
- case InstArithmetic::Frem:
- scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
- break;
- }
- return;
- }
- Variable *T_edx = nullptr;
- Variable *T = nullptr;
- switch (Inst->getOp()) {
- case InstArithmetic::_num:
- llvm_unreachable("Unknown arithmetic operator");
- break;
- case InstArithmetic::Add:
- _mov(T, Src0);
- _add(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::And:
- _mov(T, Src0);
- _and(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Or:
- _mov(T, Src0);
- _or(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Xor:
- _mov(T, Src0);
- _xor(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Sub:
- _mov(T, Src0);
- _sub(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Mul:
- if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
- if (optimizeScalarMul(Dest, Src0, C->getValue()))
- return;
- }
- // The 8-bit version of imul only allows the form "imul r/m8"
- // where T must be in eax.
- if (isByteSizedArithType(Dest->getType())) {
- _mov(T, Src0, RegX8632::Reg_eax);
- Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- } else {
- _mov(T, Src0);
- }
- _imul(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Shl:
- _mov(T, Src0);
- if (!llvm::isa<Constant>(Src1))
- Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
- _shl(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Lshr:
- _mov(T, Src0);
- if (!llvm::isa<Constant>(Src1))
- Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
- _shr(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Ashr:
- _mov(T, Src0);
- if (!llvm::isa<Constant>(Src1))
- Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);
- _sar(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Udiv:
- // div and idiv are the few arithmetic operators that do not allow
- // immediates as the operand.
- Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- Variable *T_ah = nullptr;
- Constant *Zero = Ctx->getConstantZero(IceType_i8);
- _mov(T, Src0, RegX8632::Reg_eax);
- _mov(T_ah, Zero, RegX8632::Reg_ah);
- _div(T, Src1, T_ah);
- _mov(Dest, T);
- } else {
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(T, Src0, RegX8632::Reg_eax);
- _mov(T_edx, Zero, RegX8632::Reg_edx);
- _div(T, Src1, T_edx);
- _mov(Dest, T);
- }
- break;
- case InstArithmetic::Sdiv:
- // TODO(stichnot): Enable this after doing better performance
- // and cross testing.
- if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
- // Optimize division by constant power of 2, but not for Om1
- // or O0, just to keep things simple there.
- if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
- int32_t Divisor = C->getValue();
- uint32_t UDivisor = static_cast<uint32_t>(Divisor);
- if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
- uint32_t LogDiv = llvm::Log2_32(UDivisor);
- Type Ty = Dest->getType();
- // LLVM does the following for dest=src/(1<<log):
- // t=src
- // sar t,typewidth-1 // -1 if src is negative, 0 if not
- // shr t,typewidth-log
- // add t,src
- // sar t,log
- // dest=t
- uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
- _mov(T, Src0);
- // If for some reason we are dividing by 1, just treat it
- // like an assignment.
- if (LogDiv > 0) {
- // The initial sar is unnecessary when dividing by 2.
- if (LogDiv > 1)
- _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
- _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
- _add(T, Src0);
- _sar(T, Ctx->getConstantInt(Ty, LogDiv));
- }
- _mov(Dest, T);
- return;
- }
- }
- }
- Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- _mov(T, Src0, RegX8632::Reg_eax);
- _cbwdq(T, T);
- _idiv(T, Src1, T);
- _mov(Dest, T);
- } else {
- T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
- _mov(T, Src0, RegX8632::Reg_eax);
- _cbwdq(T_edx, T);
- _idiv(T, Src1, T_edx);
- _mov(Dest, T);
- }
- break;
- case InstArithmetic::Urem:
- Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- Variable *T_ah = nullptr;
- Constant *Zero = Ctx->getConstantZero(IceType_i8);
- _mov(T, Src0, RegX8632::Reg_eax);
- _mov(T_ah, Zero, RegX8632::Reg_ah);
- _div(T_ah, Src1, T);
- _mov(Dest, T_ah);
- } else {
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(T_edx, Zero, RegX8632::Reg_edx);
- _mov(T, Src0, RegX8632::Reg_eax);
- _div(T_edx, Src1, T);
- _mov(Dest, T_edx);
- }
- break;
- case InstArithmetic::Srem:
- // TODO(stichnot): Enable this after doing better performance
- // and cross testing.
- if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
- // Optimize mod by constant power of 2, but not for Om1 or O0,
- // just to keep things simple there.
- if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
- int32_t Divisor = C->getValue();
- uint32_t UDivisor = static_cast<uint32_t>(Divisor);
- if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
- uint32_t LogDiv = llvm::Log2_32(UDivisor);
- Type Ty = Dest->getType();
- // LLVM does the following for dest=src%(1<<log):
- // t=src
- // sar t,typewidth-1 // -1 if src is negative, 0 if not
- // shr t,typewidth-log
- // add t,src
- // and t, -(1<<log)
- // sub t,src
- // neg t
- // dest=t
- uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);
- // If for some reason we are dividing by 1, just assign 0.
- if (LogDiv == 0) {
- _mov(Dest, Ctx->getConstantZero(Ty));
- return;
- }
- _mov(T, Src0);
- // The initial sar is unnecessary when dividing by 2.
- if (LogDiv > 1)
- _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
- _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
- _add(T, Src0);
- _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));
- _sub(T, Src0);
- _neg(T);
- _mov(Dest, T);
- return;
- }
- }
- }
- Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
- if (isByteSizedArithType(Dest->getType())) {
- Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
- _mov(T, Src0, RegX8632::Reg_eax);
- _cbwdq(T, T);
- Context.insert(InstFakeDef::create(Func, T_ah));
- _idiv(T_ah, Src1, T);
- _mov(Dest, T_ah);
- } else {
- T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
- _mov(T, Src0, RegX8632::Reg_eax);
- _cbwdq(T_edx, T);
- _idiv(T_edx, Src1, T);
- _mov(Dest, T_edx);
- }
- break;
- case InstArithmetic::Fadd:
- _mov(T, Src0);
- _addss(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Fsub:
- _mov(T, Src0);
- _subss(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Fmul:
- _mov(T, Src0);
- _mulss(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Fdiv:
- _mov(T, Src0);
- _divss(T, Src1);
- _mov(Dest, T);
- break;
- case InstArithmetic::Frem: {
- const SizeT MaxSrcs = 2;
- Type Ty = Dest->getType();
- InstCall *Call = makeHelperCall(
- isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);
- Call->addArg(Src0);
- Call->addArg(Src1);
- return lowerCall(Call);
- }
- }
-}
-
-void TargetX8632::lowerAssign(const InstAssign *Inst) {
- Variable *Dest = Inst->getDest();
- Operand *Src0 = Inst->getSrc(0);
- assert(Dest->getType() == Src0->getType());
- if (Dest->getType() == IceType_i64) {
- Src0 = legalize(Src0);
- Operand *Src0Lo = loOperand(Src0);
- Operand *Src0Hi = hiOperand(Src0);
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = nullptr, *T_Hi = nullptr;
- _mov(T_Lo, Src0Lo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, Src0Hi);
- _mov(DestHi, T_Hi);
- } else {
- Operand *RI;
- if (Dest->hasReg()) {
- // If Dest already has a physical register, then legalize the
- // Src operand into a Variable with the same register
- // assignment. This is mostly a workaround for advanced phi
- // lowering's ad-hoc register allocation which assumes no
- // register allocation is needed when at least one of the
- // operands is non-memory.
-
- // If we have a physical register for the dest variable, we can
- // enable our constant blinding or pooling again. Note this is
- // only for advancedPhiLowering(), the flag flip should leave
- // no other side effect.
- {
- BoolFlagSaver B(RandomizationPoolingPaused, false);
- RI = legalize(Src0, Legal_Reg, Dest->getRegNum());
- }
- } else {
- // If Dest could be a stack operand, then RI must be a physical
- // register or a scalar integer immediate.
- RI = legalize(Src0, Legal_Reg | Legal_Imm);
- }
- if (isVectorType(Dest->getType()))
- _movp(Dest, RI);
- else
- _mov(Dest, RI);
- }
-}
-
-void TargetX8632::lowerBr(const InstBr *Inst) {
- if (Inst->isUnconditional()) {
- _br(Inst->getTargetUnconditional());
- return;
- }
- Operand *Cond = Inst->getCondition();
-
- // Handle folding opportunities.
- if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
- assert(Producer->isDeleted());
- switch (BoolFolding::getProducerKind(Producer)) {
- default:
- break;
- case BoolFolding::PK_Icmp32: {
- // TODO(stichnot): Refactor similarities between this block and
- // the corresponding code in lowerIcmp().
- auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
- Operand *Src0 = Producer->getSrc(0);
- Operand *Src1 = legalize(Producer->getSrc(1));
- Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
- _cmp(Src0RM, Src1);
- _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
- Inst->getTargetFalse());
- return;
- }
- }
- }
-
- Operand *Src0 = legalize(Cond, Legal_Reg | Legal_Mem);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _cmp(Src0, Zero);
- _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
-}
-
-void TargetX8632::lowerCall(const InstCall *Instr) {
- // x86-32 calling convention:
- //
- // * At the point before the call, the stack must be aligned to 16
- // bytes.
- //
- // * The first four arguments of vector type, regardless of their
- // position relative to the other arguments in the argument list, are
- // placed in registers xmm0 - xmm3.
- //
- // * Other arguments are pushed onto the stack in right-to-left order,
- // such that the left-most argument ends up on the top of the stack at
- // the lowest memory address.
- //
- // * Stack arguments of vector type are aligned to start at the next
- // highest multiple of 16 bytes. Other stack arguments are aligned to
- // 4 bytes.
- //
- // This intends to match the section "IA-32 Function Calling
- // Convention" of the document "OS X ABI Function Call Guide" by
- // Apple.
- NeedsStackAlignment = true;
-
- typedef std::vector<Operand *> OperandList;
- OperandList XmmArgs;
- OperandList StackArgs, StackArgLocations;
- uint32_t ParameterAreaSizeBytes = 0;
-
- // Classify each argument operand according to the location where the
- // argument is passed.
- for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
- Operand *Arg = Instr->getArg(i);
- Type Ty = Arg->getType();
- // The PNaCl ABI requires the width of arguments to be at least 32 bits.
- assert(typeWidthInBytes(Ty) >= 4);
- if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
- XmmArgs.push_back(Arg);
- } else {
- StackArgs.push_back(Arg);
- if (isVectorType(Arg->getType())) {
- ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
- }
- Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
- Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
- StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
- ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
- }
- }
-
- // Adjust the parameter area so that the stack is aligned. It is
- // assumed that the stack is already aligned at the start of the
- // calling sequence.
- ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
-
- // Subtract the appropriate amount for the argument area. This also
- // takes care of setting the stack adjustment during emission.
- //
- // TODO: If for some reason the call instruction gets dead-code
- // eliminated after lowering, we would need to ensure that the
- // pre-call and the post-call esp adjustment get eliminated as well.
- if (ParameterAreaSizeBytes) {
- _adjust_stack(ParameterAreaSizeBytes);
- }
-
- // Copy arguments that are passed on the stack to the appropriate
- // stack locations.
- for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
- lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
- }
-
- // Copy arguments to be passed in registers to the appropriate
- // registers.
- // TODO: Investigate the impact of lowering arguments passed in
- // registers after lowering stack arguments as opposed to the other
- // way around. Lowering register arguments after stack arguments may
- // reduce register pressure. On the other hand, lowering register
- // arguments first (before stack arguments) may result in more compact
- // code, as the memory operand displacements may end up being smaller
- // before any stack adjustment is done.
- for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
- Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);
- // Generate a FakeUse of register arguments so that they do not get
- // dead code eliminated as a result of the FakeKill of scratch
- // registers after the call.
- Context.insert(InstFakeUse::create(Func, Reg));
- }
- // Generate the call instruction. Assign its result to a temporary
- // with high register allocation weight.
- Variable *Dest = Instr->getDest();
- // ReturnReg doubles as ReturnRegLo as necessary.
- Variable *ReturnReg = nullptr;
- Variable *ReturnRegHi = nullptr;
- if (Dest) {
- switch (Dest->getType()) {
- case IceType_NUM:
- llvm_unreachable("Invalid Call dest type");
- break;
- case IceType_void:
- break;
- case IceType_i1:
- case IceType_i8:
- case IceType_i16:
- case IceType_i32:
- ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
- break;
- case IceType_i64:
- ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
- ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
- break;
- case IceType_f32:
- case IceType_f64:
- // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with
- // the fstp instruction.
- break;
- case IceType_v4i1:
- case IceType_v8i1:
- case IceType_v16i1:
- case IceType_v16i8:
- case IceType_v8i16:
- case IceType_v4i32:
- case IceType_v4f32:
- ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
- break;
- }
- }
- Operand *CallTarget = legalize(Instr->getCallTarget());
- const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
- if (NeedSandboxing) {
- if (llvm::isa<Constant>(CallTarget)) {
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- } else {
- Variable *CallTargetVar = nullptr;
- _mov(CallTargetVar, CallTarget);
- _bundle_lock(InstBundleLock::Opt_AlignToEnd);
- const SizeT BundleSize =
- 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();
- _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
- CallTarget = CallTargetVar;
- }
- }
- Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
- Context.insert(NewCall);
- if (NeedSandboxing)
- _bundle_unlock();
- if (ReturnRegHi)
- Context.insert(InstFakeDef::create(Func, ReturnRegHi));
-
- // Add the appropriate offset to esp. The call instruction takes care
- // of resetting the stack offset during emission.
- if (ParameterAreaSizeBytes) {
- Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
- _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));
- }
-
- // Insert a register-kill pseudo instruction.
- Context.insert(InstFakeKill::create(Func, NewCall));
-
- // Generate a FakeUse to keep the call live if necessary.
- if (Instr->hasSideEffects() && ReturnReg) {
- Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);
- Context.insert(FakeUse);
- }
-
- if (!Dest)
- return;
-
- // Assign the result of the call to Dest.
- if (ReturnReg) {
- if (ReturnRegHi) {
- assert(Dest->getType() == IceType_i64);
- split64(Dest);
- Variable *DestLo = Dest->getLo();
- Variable *DestHi = Dest->getHi();
- _mov(DestLo, ReturnReg);
- _mov(DestHi, ReturnRegHi);
- } else {
- assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
- Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
- isVectorType(Dest->getType()));
- if (isVectorType(Dest->getType())) {
- _movp(Dest, ReturnReg);
- } else {
- _mov(Dest, ReturnReg);
- }
- }
- } else if (isScalarFloatingType(Dest->getType())) {
- // Special treatment for an FP function which returns its result in
- // st(0).
- // If Dest ends up being a physical xmm register, the fstp emit code
- // will route st(0) through a temporary stack slot.
- _fstp(Dest);
- // Create a fake use of Dest in case it actually isn't used,
- // because st(0) still needs to be popped.
- Context.insert(InstFakeUse::create(Func, Dest));
- }
-}
-
-void TargetX8632::lowerCast(const InstCast *Inst) {
- // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
- InstCast::OpKind CastKind = Inst->getCastKind();
- Variable *Dest = Inst->getDest();
- switch (CastKind) {
- default:
- Func->setError("Cast type not supported");
- return;
- case InstCast::Sext: {
- // Src0RM is the source operand legalized to physical register or memory,
- // but not immediate, since the relevant x86 native instructions don't
- // allow an immediate operand. If the operand is an immediate, we could
- // consider computing the strength-reduced result at translation time,
- // but we're unlikely to see something like that in the bitcode that
- // the optimizer wouldn't have already taken care of.
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (isVectorType(Dest->getType())) {
- Type DestTy = Dest->getType();
- if (DestTy == IceType_v16i8) {
- // onemask = materialize(1,1,...); dst = (src & onemask) > 0
- Variable *OneMask = makeVectorOfOnes(Dest->getType());
- Variable *T = makeReg(DestTy);
- _movp(T, Src0RM);
- _pand(T, OneMask);
- Variable *Zeros = makeVectorOfZeros(Dest->getType());
- _pcmpgt(T, Zeros);
- _movp(Dest, T);
- } else {
- // width = width(elty) - 1; dest = (src << width) >> width
- SizeT ShiftAmount =
- X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;
- Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
- Variable *T = makeReg(DestTy);
- _movp(T, Src0RM);
- _psll(T, ShiftConstant);
- _psra(T, ShiftConstant);
- _movp(Dest, T);
- }
- } else if (Dest->getType() == IceType_i64) {
- // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
- Constant *Shift = Ctx->getConstantInt32(31);
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = makeReg(DestLo->getType());
- if (Src0RM->getType() == IceType_i32) {
- _mov(T_Lo, Src0RM);
- } else if (Src0RM->getType() == IceType_i1) {
- _movzx(T_Lo, Src0RM);
- _shl(T_Lo, Shift);
- _sar(T_Lo, Shift);
- } else {
- _movsx(T_Lo, Src0RM);
- }
- _mov(DestLo, T_Lo);
- Variable *T_Hi = nullptr;
- _mov(T_Hi, T_Lo);
- if (Src0RM->getType() != IceType_i1)
- // For i1, the sar instruction is already done above.
- _sar(T_Hi, Shift);
- _mov(DestHi, T_Hi);
- } else if (Src0RM->getType() == IceType_i1) {
- // t1 = src
- // shl t1, dst_bitwidth - 1
- // sar t1, dst_bitwidth - 1
- // dst = t1
- size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
- Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
- Variable *T = makeReg(Dest->getType());
- if (typeWidthInBytes(Dest->getType()) <=
- typeWidthInBytes(Src0RM->getType())) {
- _mov(T, Src0RM);
- } else {
- // Widen the source using movsx or movzx. (It doesn't matter
- // which one, since the following shl/sar overwrite the bits.)
- _movzx(T, Src0RM);
- }
- _shl(T, ShiftAmount);
- _sar(T, ShiftAmount);
- _mov(Dest, T);
- } else {
- // t1 = movsx src; dst = t1
- Variable *T = makeReg(Dest->getType());
- _movsx(T, Src0RM);
- _mov(Dest, T);
- }
- break;
- }
- case InstCast::Zext: {
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (isVectorType(Dest->getType())) {
- // onemask = materialize(1,1,...); dest = onemask & src
- Type DestTy = Dest->getType();
- Variable *OneMask = makeVectorOfOnes(DestTy);
- Variable *T = makeReg(DestTy);
- _movp(T, Src0RM);
- _pand(T, OneMask);
- _movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
- // t1=movzx src; dst.lo=t1; dst.hi=0
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *Tmp = makeReg(DestLo->getType());
- if (Src0RM->getType() == IceType_i32) {
- _mov(Tmp, Src0RM);
- } else {
- _movzx(Tmp, Src0RM);
- }
- if (Src0RM->getType() == IceType_i1) {
- Constant *One = Ctx->getConstantInt32(1);
- _and(Tmp, One);
- }
- _mov(DestLo, Tmp);
- _mov(DestHi, Zero);
- } else if (Src0RM->getType() == IceType_i1) {
- // t = Src0RM; t &= 1; Dest = t
- Constant *One = Ctx->getConstantInt32(1);
- Type DestTy = Dest->getType();
- Variable *T;
- if (DestTy == IceType_i8) {
- T = makeReg(DestTy);
- _mov(T, Src0RM);
- } else {
- // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.
- T = makeReg(IceType_i32);
- _movzx(T, Src0RM);
- }
- _and(T, One);
- _mov(Dest, T);
- } else {
- // t1 = movzx src; dst = t1
- Variable *T = makeReg(Dest->getType());
- _movzx(T, Src0RM);
- _mov(Dest, T);
- }
- break;
- }
- case InstCast::Trunc: {
- if (isVectorType(Dest->getType())) {
- // onemask = materialize(1,1,...); dst = src & onemask
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- Type Src0Ty = Src0RM->getType();
- Variable *OneMask = makeVectorOfOnes(Src0Ty);
- Variable *T = makeReg(Dest->getType());
- _movp(T, Src0RM);
- _pand(T, OneMask);
- _movp(Dest, T);
- } else {
- Operand *Src0 = Inst->getSrc(0);
- if (Src0->getType() == IceType_i64)
- Src0 = loOperand(Src0);
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- // t1 = trunc Src0RM; Dest = t1
- Variable *T = nullptr;
- _mov(T, Src0RM);
- if (Dest->getType() == IceType_i1)
- _and(T, Ctx->getConstantInt1(1));
- _mov(Dest, T);
- }
- break;
- }
- case InstCast::Fptrunc:
- case InstCast::Fpext: {
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- // t1 = cvt Src0RM; Dest = t1
- Variable *T = makeReg(Dest->getType());
- _cvt(T, Src0RM, InstX8632Cvt::Float2float);
- _mov(Dest, T);
- break;
- }
- case InstCast::Fptosi:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4i32 &&
- Inst->getSrc(0)->getType() == IceType_v4f32);
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (llvm::isa<OperandX8632Mem>(Src0RM))
- Src0RM = legalizeToVar(Src0RM);
- Variable *T = makeReg(Dest->getType());
- _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);
- _movp(Dest, T);
- } else if (Dest->getType() == IceType_i64) {
- // Use a helper for converting floating-point values to 64-bit
- // integers. SSE2 appears to have no way to convert from xmm
- // registers to something like the edx:eax register pair, and
- // gcc and clang both want to use x87 instructions complete with
- // temporary manipulation of the status word. This helper is
- // not needed for x86-64.
- split64(Dest);
- const SizeT MaxSrcs = 1;
- Type SrcType = Inst->getSrc(0)->getType();
- InstCall *Call =
- makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
- : H_fptosi_f64_i64,
- Dest, MaxSrcs);
- Call->addArg(Inst->getSrc(0));
- lowerCall(Call);
- } else {
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
- Variable *T_2 = makeReg(Dest->getType());
- _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
- _mov(T_2, T_1); // T_1 and T_2 may have different integer types
- if (Dest->getType() == IceType_i1)
- _and(T_2, Ctx->getConstantInt1(1));
- _mov(Dest, T_2);
- }
- break;
- case InstCast::Fptoui:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4i32 &&
- Inst->getSrc(0)->getType() == IceType_v4f32);
- const SizeT MaxSrcs = 1;
- InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);
- Call->addArg(Inst->getSrc(0));
- lowerCall(Call);
- } else if (Dest->getType() == IceType_i64 ||
- Dest->getType() == IceType_i32) {
- // Use a helper for both x86-32 and x86-64.
- split64(Dest);
- const SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
- Type SrcType = Inst->getSrc(0)->getType();
- IceString TargetString;
- if (isInt32Asserting32Or64(DestType)) {
- TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32
- : H_fptoui_f64_i32;
- } else {
- TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64
- : H_fptoui_f64_i64;
- }
- InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
- Call->addArg(Inst->getSrc(0));
- lowerCall(Call);
- return;
- } else {
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
- Variable *T_1 = makeReg(IceType_i32);
- Variable *T_2 = makeReg(Dest->getType());
- _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);
- _mov(T_2, T_1); // T_1 and T_2 may have different integer types
- if (Dest->getType() == IceType_i1)
- _and(T_2, Ctx->getConstantInt1(1));
- _mov(Dest, T_2);
- }
- break;
- case InstCast::Sitofp:
- if (isVectorType(Dest->getType())) {
- assert(Dest->getType() == IceType_v4f32 &&
- Inst->getSrc(0)->getType() == IceType_v4i32);
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- if (llvm::isa<OperandX8632Mem>(Src0RM))
- Src0RM = legalizeToVar(Src0RM);
- Variable *T = makeReg(Dest->getType());
- _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
- _movp(Dest, T);
- } else if (Inst->getSrc(0)->getType() == IceType_i64) {
- // Use a helper for x86-32.
- const SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
- InstCall *Call =
- makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
- : H_sitofp_i64_f64,
- Dest, MaxSrcs);
- // TODO: Call the correct compiler-rt helper function.
- Call->addArg(Inst->getSrc(0));
- lowerCall(Call);
- return;
- } else {
- Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
- // Sign-extend the operand.
- // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
- Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
- _mov(T_1, Src0RM);
- else
- _movsx(T_1, Src0RM);
- _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
- _mov(Dest, T_2);
- }
- break;
- case InstCast::Uitofp: {
- Operand *Src0 = Inst->getSrc(0);
- if (isVectorType(Src0->getType())) {
- assert(Dest->getType() == IceType_v4f32 &&
- Src0->getType() == IceType_v4i32);
- const SizeT MaxSrcs = 1;
- InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
- Call->addArg(Src0);
- lowerCall(Call);
- } else if (Src0->getType() == IceType_i64 ||
- Src0->getType() == IceType_i32) {
- // Use a helper for x86-32 and x86-64. Also use a helper for
- // i32 on x86-32.
- const SizeT MaxSrcs = 1;
- Type DestType = Dest->getType();
- IceString TargetString;
- if (isInt32Asserting32Or64(Src0->getType())) {
- TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
- : H_uitofp_i32_f64;
- } else {
- TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
- : H_uitofp_i64_f64;
- }
- InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
- Call->addArg(Src0);
- lowerCall(Call);
- return;
- } else {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- // Zero-extend the operand.
- // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
- Variable *T_1 = makeReg(IceType_i32);
- Variable *T_2 = makeReg(Dest->getType());
- if (Src0RM->getType() == IceType_i32)
- _mov(T_1, Src0RM);
- else
- _movzx(T_1, Src0RM);
- _cvt(T_2, T_1, InstX8632Cvt::Si2ss);
- _mov(Dest, T_2);
- }
- break;
- }
- case InstCast::Bitcast: {
- Operand *Src0 = Inst->getSrc(0);
- if (Dest->getType() == Src0->getType()) {
- InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
- lowerAssign(Assign);
- return;
- }
- switch (Dest->getType()) {
- default:
- llvm_unreachable("Unexpected Bitcast dest type");
- case IceType_i8: {
- assert(Src0->getType() == IceType_v8i1);
- InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);
- Call->addArg(Src0);
- lowerCall(Call);
- } break;
- case IceType_i16: {
- assert(Src0->getType() == IceType_v16i1);
- InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);
- Call->addArg(Src0);
- lowerCall(Call);
- } break;
- case IceType_i32:
- case IceType_f32: {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- Type DestType = Dest->getType();
- Type SrcType = Src0RM->getType();
- (void)DestType;
- assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
- (DestType == IceType_f32 && SrcType == IceType_i32));
- // a.i32 = bitcast b.f32 ==>
- // t.f32 = b.f32
- // s.f32 = spill t.f32
- // a.i32 = s.f32
- Variable *T = nullptr;
- // TODO: Should be able to force a spill setup by calling legalize() with
- // Legal_Mem and not Legal_Reg or Legal_Imm.
- SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);
- SpillVar->setLinkedTo(Dest);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
- _mov(T, Src0RM);
- _mov(Spill, T);
- _mov(Dest, Spill);
- } break;
- case IceType_i64: {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- assert(Src0RM->getType() == IceType_f64);
- // a.i64 = bitcast b.f64 ==>
- // s.f64 = spill b.f64
- // t_lo.i32 = lo(s.f64)
- // a_lo.i32 = t_lo.i32
- // t_hi.i32 = hi(s.f64)
- // a_hi.i32 = t_hi.i32
- Operand *SpillLo, *SpillHi;
- if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
- SpillVariable *SpillVar =
- Func->makeVariable<SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Src0Var);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
- _movq(Spill, Src0RM);
- SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);
- SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);
- } else {
- SpillLo = loOperand(Src0RM);
- SpillHi = hiOperand(Src0RM);
- }
-
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *T_Lo = makeReg(IceType_i32);
- Variable *T_Hi = makeReg(IceType_i32);
-
- _mov(T_Lo, SpillLo);
- _mov(DestLo, T_Lo);
- _mov(T_Hi, SpillHi);
- _mov(DestHi, T_Hi);
- } break;
- case IceType_f64: {
- Src0 = legalize(Src0);
- assert(Src0->getType() == IceType_i64);
- if (llvm::isa<OperandX8632Mem>(Src0)) {
- Variable *T = Func->makeVariable(Dest->getType());
- _movq(T, Src0);
- _movq(Dest, T);
- break;
- }
- // a.f64 = bitcast b.i64 ==>
- // t_lo.i32 = b_lo.i32
- // FakeDef(s.f64)
- // lo(s.f64) = t_lo.i32
- // t_hi.i32 = b_hi.i32
- // hi(s.f64) = t_hi.i32
- // a.f64 = s.f64
- SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);
- SpillVar->setLinkedTo(Dest);
- Variable *Spill = SpillVar;
- Spill->setWeight(RegWeight::Zero);
-
- Variable *T_Lo = nullptr, *T_Hi = nullptr;
- VariableSplit *SpillLo =
- VariableSplit::create(Func, Spill, VariableSplit::Low);
- VariableSplit *SpillHi =
- VariableSplit::create(Func, Spill, VariableSplit::High);
- _mov(T_Lo, loOperand(Src0));
- // Technically, the Spill is defined after the _store happens, but
- // SpillLo is considered a "use" of Spill so define Spill before it
- // is used.
- Context.insert(InstFakeDef::create(Func, Spill));
- _store(T_Lo, SpillLo);
- _mov(T_Hi, hiOperand(Src0));
- _store(T_Hi, SpillHi);
- _movq(Dest, Spill);
- } break;
- case IceType_v8i1: {
- assert(Src0->getType() == IceType_i8);
- InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
- Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
- // Arguments to functions are required to be at least 32 bits wide.
- lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
- Call->addArg(Src0AsI32);
- lowerCall(Call);
- } break;
- case IceType_v16i1: {
- assert(Src0->getType() == IceType_i16);
- InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);
- Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
- // Arguments to functions are required to be at least 32 bits wide.
- lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));
- Call->addArg(Src0AsI32);
- lowerCall(Call);
- } break;
- case IceType_v8i16:
- case IceType_v16i8:
- case IceType_v4i32:
- case IceType_v4f32: {
- _movp(Dest, legalizeToVar(Src0));
- } break;
- }
- break;
- }
- }
-}
-
-void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
- Operand *SourceVectNotLegalized = Inst->getSrc(0);
- ConstantInteger32 *ElementIndex =
- llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));
- // Only constant indices are allowed in PNaCl IR.
- assert(ElementIndex);
-
- unsigned Index = ElementIndex->getValue();
- Type Ty = SourceVectNotLegalized->getType();
- Type ElementTy = typeElementType(Ty);
- Type InVectorElementTy = getInVectorElementType(Ty);
- Variable *ExtractedElementR = makeReg(InVectorElementTy);
-
- // TODO(wala): Determine the best lowering sequences for each type.
- bool CanUsePextr =
- Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
- if (CanUsePextr && Ty != IceType_v4f32) {
- // Use pextrb, pextrw, or pextrd.
- Constant *Mask = Ctx->getConstantInt32(Index);
- Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
- _pextr(ExtractedElementR, SourceVectR, Mask);
- } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
- // Use pshufd and movd/movss.
- Variable *T = nullptr;
- if (Index) {
- // The shuffle only needs to occur if the element to be extracted
- // is not at the lowest index.
- Constant *Mask = Ctx->getConstantInt32(Index);
- T = makeReg(Ty);
- _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
- } else {
- T = legalizeToVar(SourceVectNotLegalized);
- }
-
- if (InVectorElementTy == IceType_i32) {
- _movd(ExtractedElementR, T);
- } else { // Ty == IceType_f32
- // TODO(wala): _movss is only used here because _mov does not
- // allow a vector source and a scalar destination. _mov should be
- // able to be used here.
- // _movss is a binary instruction, so the FakeDef is needed to
- // keep the live range analysis consistent.
- Context.insert(InstFakeDef::create(Func, ExtractedElementR));
- _movss(ExtractedElementR, T);
- }
- } else {
- assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
- // Spill the value to a stack slot and do the extraction in memory.
- //
- // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
- // support for legalizing to mem is implemented.
- Variable *Slot = Func->makeVariable(Ty);
- Slot->setWeight(RegWeight::Zero);
- _movp(Slot, legalizeToVar(SourceVectNotLegalized));
-
- // Compute the location of the element in memory.
- unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
- OperandX8632Mem *Loc =
- getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
- _mov(ExtractedElementR, Loc);
- }
-
- if (ElementTy == IceType_i1) {
- // Truncate extracted integers to i1s if necessary.
- Variable *T = makeReg(IceType_i1);
- InstCast *Cast =
- InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
- lowerCast(Cast);
- ExtractedElementR = T;
- }
-
- // Copy the element to the destination.
- Variable *Dest = Inst->getDest();
- _mov(Dest, ExtractedElementR);
-}
-
-void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
- Operand *Src0 = Inst->getSrc(0);
- Operand *Src1 = Inst->getSrc(1);
- Variable *Dest = Inst->getDest();
-
- if (isVectorType(Dest->getType())) {
- InstFcmp::FCond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < TableFcmpSize);
-
- if (TableFcmp[Index].SwapVectorOperands) {
- Operand *T = Src0;
- Src0 = Src1;
- Src1 = T;
- }
-
- Variable *T = nullptr;
-
- if (Condition == InstFcmp::True) {
- // makeVectorOfOnes() requires an integer vector type.
- T = makeVectorOfMinusOnes(IceType_v4i32);
- } else if (Condition == InstFcmp::False) {
- T = makeVectorOfZeros(Dest->getType());
- } else {
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
- if (llvm::isa<OperandX8632Mem>(Src1RM))
- Src1RM = legalizeToVar(Src1RM);
-
- switch (Condition) {
- default: {
- CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
- assert(Predicate != CondX86::Cmpps_Invalid);
- T = makeReg(Src0RM->getType());
- _movp(T, Src0RM);
- _cmpps(T, Src1RM, Predicate);
- } break;
- case InstFcmp::One: {
- // Check both unequal and ordered.
- T = makeReg(Src0RM->getType());
- Variable *T2 = makeReg(Src0RM->getType());
- _movp(T, Src0RM);
- _cmpps(T, Src1RM, CondX86::Cmpps_neq);
- _movp(T2, Src0RM);
- _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
- _pand(T, T2);
- } break;
- case InstFcmp::Ueq: {
- // Check both equal or unordered.
- T = makeReg(Src0RM->getType());
- Variable *T2 = makeReg(Src0RM->getType());
- _movp(T, Src0RM);
- _cmpps(T, Src1RM, CondX86::Cmpps_eq);
- _movp(T2, Src0RM);
- _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
- _por(T, T2);
- } break;
- }
- }
-
- _movp(Dest, T);
- eliminateNextVectorSextInstruction(Dest);
- return;
- }
-
- // Lowering a = fcmp cond, b, c
- // ucomiss b, c /* only if C1 != Br_None */
- // /* but swap b,c order if SwapOperands==true */
- // mov a, <default>
- // j<C1> label /* only if C1 != Br_None */
- // j<C2> label /* only if C2 != Br_None */
- // FakeUse(a) /* only if C1 != Br_None */
- // mov a, !<default> /* only if C1 != Br_None */
- // label: /* only if C1 != Br_None */
- //
- // setcc lowering when C1 != Br_None && C2 == Br_None:
- // ucomiss b, c /* but swap b,c order if SwapOperands==true */
- // setcc a, C1
- InstFcmp::FCond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < TableFcmpSize);
- if (TableFcmp[Index].SwapScalarOperands)
- std::swap(Src0, Src1);
- bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
- bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
- if (HasC1) {
- Src0 = legalize(Src0);
- Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
- Variable *T = nullptr;
- _mov(T, Src0);
- _ucomiss(T, Src1RM);
- if (!HasC2) {
- assert(TableFcmp[Index].Default);
- _setcc(Dest, TableFcmp[Index].C1);
- return;
- }
- }
- Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);
- _mov(Dest, Default);
- if (HasC1) {
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- _br(TableFcmp[Index].C1, Label);
- if (HasC2) {
- _br(TableFcmp[Index].C2, Label);
- }
- Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);
- _mov_nonkillable(Dest, NonDefault);
- Context.insert(Label);
- }
-}
-
-void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
- Operand *Src0 = legalize(Inst->getSrc(0));
- Operand *Src1 = legalize(Inst->getSrc(1));
- Variable *Dest = Inst->getDest();
-
- if (isVectorType(Dest->getType())) {
- Type Ty = Src0->getType();
- // Promote i1 vectors to 128 bit integer vector types.
- if (typeElementType(Ty) == IceType_i1) {
- Type NewTy = IceType_NUM;
- switch (Ty) {
- default:
- llvm_unreachable("unexpected type");
- break;
- case IceType_v4i1:
- NewTy = IceType_v4i32;
- break;
- case IceType_v8i1:
- NewTy = IceType_v8i16;
- break;
- case IceType_v16i1:
- NewTy = IceType_v16i8;
- break;
- }
- Variable *NewSrc0 = Func->makeVariable(NewTy);
- Variable *NewSrc1 = Func->makeVariable(NewTy);
- lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));
- lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));
- Src0 = NewSrc0;
- Src1 = NewSrc1;
- Ty = NewTy;
- }
-
- InstIcmp::ICond Condition = Inst->getCondition();
-
- Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
- Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
-
- // SSE2 only has signed comparison operations. Transform unsigned
- // inputs in a manner that allows for the use of signed comparison
- // operations by flipping the high order bits.
- if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
- Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
- Variable *T0 = makeReg(Ty);
- Variable *T1 = makeReg(Ty);
- Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
- _movp(T0, Src0RM);
- _pxor(T0, HighOrderBits);
- _movp(T1, Src1RM);
- _pxor(T1, HighOrderBits);
- Src0RM = T0;
- Src1RM = T1;
- }
-
- Variable *T = makeReg(Ty);
- switch (Condition) {
- default:
- llvm_unreachable("unexpected condition");
- break;
- case InstIcmp::Eq: {
- if (llvm::isa<OperandX8632Mem>(Src1RM))
- Src1RM = legalizeToVar(Src1RM);
- _movp(T, Src0RM);
- _pcmpeq(T, Src1RM);
- } break;
- case InstIcmp::Ne: {
- if (llvm::isa<OperandX8632Mem>(Src1RM))
- Src1RM = legalizeToVar(Src1RM);
- _movp(T, Src0RM);
- _pcmpeq(T, Src1RM);
- Variable *MinusOne = makeVectorOfMinusOnes(Ty);
- _pxor(T, MinusOne);
- } break;
- case InstIcmp::Ugt:
- case InstIcmp::Sgt: {
- if (llvm::isa<OperandX8632Mem>(Src1RM))
- Src1RM = legalizeToVar(Src1RM);
- _movp(T, Src0RM);
- _pcmpgt(T, Src1RM);
- } break;
- case InstIcmp::Uge:
- case InstIcmp::Sge: {
- // !(Src1RM > Src0RM)
- if (llvm::isa<OperandX8632Mem>(Src0RM))
- Src0RM = legalizeToVar(Src0RM);
- _movp(T, Src1RM);
- _pcmpgt(T, Src0RM);
- Variable *MinusOne = makeVectorOfMinusOnes(Ty);
- _pxor(T, MinusOne);
- } break;
- case InstIcmp::Ult:
- case InstIcmp::Slt: {
- if (llvm::isa<OperandX8632Mem>(Src0RM))
- Src0RM = legalizeToVar(Src0RM);
- _movp(T, Src1RM);
- _pcmpgt(T, Src0RM);
- } break;
- case InstIcmp::Ule:
- case InstIcmp::Sle: {
- // !(Src0RM > Src1RM)
- if (llvm::isa<OperandX8632Mem>(Src1RM))
- Src1RM = legalizeToVar(Src1RM);
- _movp(T, Src0RM);
- _pcmpgt(T, Src1RM);
- Variable *MinusOne = makeVectorOfMinusOnes(Ty);
- _pxor(T, MinusOne);
- } break;
- }
-
- _movp(Dest, T);
- eliminateNextVectorSextInstruction(Dest);
- return;
- }
-
- // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
- if (Src0->getType() == IceType_i64) {
- InstIcmp::ICond Condition = Inst->getCondition();
- size_t Index = static_cast<size_t>(Condition);
- assert(Index < TableIcmp64Size);
- Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
- Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
- Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- Constant *One = Ctx->getConstantInt32(1);
- InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
- InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
- _mov(Dest, One);
- _cmp(Src0HiRM, Src1HiRI);
- if (TableIcmp64[Index].C1 != CondX86::Br_None)
- _br(TableIcmp64[Index].C1, LabelTrue);
- if (TableIcmp64[Index].C2 != CondX86::Br_None)
- _br(TableIcmp64[Index].C2, LabelFalse);
- _cmp(Src0LoRM, Src1LoRI);
- _br(TableIcmp64[Index].C3, LabelTrue);
- Context.insert(LabelFalse);
- _mov_nonkillable(Dest, Zero);
- Context.insert(LabelTrue);
- return;
- }
-
- // cmp b, c
- Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
- _cmp(Src0RM, Src1);
- _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));
-}
-
-void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
- Operand *SourceVectNotLegalized = Inst->getSrc(0);
- Operand *ElementToInsertNotLegalized = Inst->getSrc(1);
- ConstantInteger32 *ElementIndex =
- llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));
- // Only constant indices are allowed in PNaCl IR.
- assert(ElementIndex);
- unsigned Index = ElementIndex->getValue();
- assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
-
- Type Ty = SourceVectNotLegalized->getType();
- Type ElementTy = typeElementType(Ty);
- Type InVectorElementTy = getInVectorElementType(Ty);
-
- if (ElementTy == IceType_i1) {
- // Expand the element to the appropriate size for it to be inserted
- // in the vector.
- Variable *Expanded = Func->makeVariable(InVectorElementTy);
- InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
- ElementToInsertNotLegalized);
- lowerCast(Cast);
- ElementToInsertNotLegalized = Expanded;
- }
-
- if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1) {
- // Use insertps, pinsrb, pinsrw, or pinsrd.
- Operand *ElementRM =
- legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
- Operand *SourceVectRM =
- legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
- Variable *T = makeReg(Ty);
- _movp(T, SourceVectRM);
- if (Ty == IceType_v4f32)
- _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));
- else
- _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));
- _movp(Inst->getDest(), T);
- } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
- // Use shufps or movss.
- Variable *ElementR = nullptr;
- Operand *SourceVectRM =
- legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
-
- if (InVectorElementTy == IceType_f32) {
- // ElementR will be in an XMM register since it is floating point.
- ElementR = legalizeToVar(ElementToInsertNotLegalized);
- } else {
- // Copy an integer to an XMM register.
- Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg | Legal_Mem);
- ElementR = makeReg(Ty);
- _movd(ElementR, T);
- }
-
- if (Index == 0) {
- Variable *T = makeReg(Ty);
- _movp(T, SourceVectRM);
- _movss(T, ElementR);
- _movp(Inst->getDest(), T);
- return;
- }
-
- // shufps treats the source and desination operands as vectors of
- // four doublewords. The destination's two high doublewords are
- // selected from the source operand and the two low doublewords are
- // selected from the (original value of) the destination operand.
- // An insertelement operation can be effected with a sequence of two
- // shufps operations with appropriate masks. In all cases below,
- // Element[0] is being inserted into SourceVectOperand. Indices are
- // ordered from left to right.
- //
- // insertelement into index 1 (result is stored in ElementR):
- // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
- // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
- //
- // insertelement into index 2 (result is stored in T):
- // T := SourceVectRM
- // ElementR := ElementR[0, 0] T[0, 3]
- // T := T[0, 1] ElementR[0, 3]
- //
- // insertelement into index 3 (result is stored in T):
- // T := SourceVectRM
- // ElementR := ElementR[0, 0] T[0, 2]
- // T := T[0, 1] ElementR[3, 0]
- const unsigned char Mask1[3] = {0, 192, 128};
- const unsigned char Mask2[3] = {227, 196, 52};
-
- Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
- Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
-
- if (Index == 1) {
- _shufps(ElementR, SourceVectRM, Mask1Constant);
- _shufps(ElementR, SourceVectRM, Mask2Constant);
- _movp(Inst->getDest(), ElementR);
- } else {
- Variable *T = makeReg(Ty);
- _movp(T, SourceVectRM);
- _shufps(ElementR, T, Mask1Constant);
- _shufps(T, ElementR, Mask2Constant);
- _movp(Inst->getDest(), T);
- }
- } else {
- assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
- // Spill the value to a stack slot and perform the insertion in
- // memory.
- //
- // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
- // support for legalizing to mem is implemented.
- Variable *Slot = Func->makeVariable(Ty);
- Slot->setWeight(RegWeight::Zero);
- _movp(Slot, legalizeToVar(SourceVectNotLegalized));
-
- // Compute the location of the position to insert in memory.
- unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
- OperandX8632Mem *Loc =
- getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
- _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
-
- Variable *T = makeReg(Ty);
- _movp(T, Slot);
- _movp(Inst->getDest(), T);
- }
-}
-
-void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
- switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {
- case Intrinsics::AtomicCmpxchg: {
- if (!Intrinsics::isMemoryOrderValid(
- ID, getConstantMemoryOrder(Instr->getArg(3)),
- getConstantMemoryOrder(Instr->getArg(4)))) {
- Func->setError("Unexpected memory ordering for AtomicCmpxchg");
- return;
- }
- Variable *DestPrev = Instr->getDest();
- Operand *PtrToMem = Instr->getArg(0);
- Operand *Expected = Instr->getArg(1);
- Operand *Desired = Instr->getArg(2);
- if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))
- return;
- lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);
- return;
- }
- case Intrinsics::AtomicFence:
- if (!Intrinsics::isMemoryOrderValid(
- ID, getConstantMemoryOrder(Instr->getArg(0)))) {
- Func->setError("Unexpected memory ordering for AtomicFence");
- return;
- }
- _mfence();
- return;
- case Intrinsics::AtomicFenceAll:
- // NOTE: FenceAll should prevent and load/store from being moved
- // across the fence (both atomic and non-atomic). The InstX8632Mfence
- // instruction is currently marked coarsely as "HasSideEffects".
- _mfence();
- return;
- case Intrinsics::AtomicIsLockFree: {
- // X86 is always lock free for 8/16/32/64 bit accesses.
- // TODO(jvoung): Since the result is constant when given a constant
- // byte size, this opens up DCE opportunities.
- Operand *ByteSize = Instr->getArg(0);
- Variable *Dest = Instr->getDest();
- if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
- Constant *Result;
- switch (CI->getValue()) {
- default:
- // Some x86-64 processors support the cmpxchg16b intruction, which
- // can make 16-byte operations lock free (when used with the LOCK
- // prefix). However, that's not supported in 32-bit mode, so just
- // return 0 even for large sizes.
- Result = Ctx->getConstantZero(IceType_i32);
- break;
- case 1:
- case 2:
- case 4:
- case 8:
- Result = Ctx->getConstantInt32(1);
- break;
- }
- _mov(Dest, Result);
- return;
- }
- // The PNaCl ABI requires the byte size to be a compile-time constant.
- Func->setError("AtomicIsLockFree byte size should be compile-time const");
- return;
- }
- case Intrinsics::AtomicLoad: {
- // We require the memory address to be naturally aligned.
- // Given that is the case, then normal loads are atomic.
- if (!Intrinsics::isMemoryOrderValid(
- ID, getConstantMemoryOrder(Instr->getArg(1)))) {
- Func->setError("Unexpected memory ordering for AtomicLoad");
- return;
- }
- Variable *Dest = Instr->getDest();
- if (Dest->getType() == IceType_i64) {
- // Follow what GCC does and use a movq instead of what lowerLoad()
- // normally does (split the load into two).
- // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
- // can't happen anyway, since this is x86-32 and integer arithmetic only
- // happens on 32-bit quantities.
- Variable *T = makeReg(IceType_f64);
- OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);
- _movq(T, Addr);
- // Then cast the bits back out of the XMM register to the i64 Dest.
- InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
- lowerCast(Cast);
- // Make sure that the atomic load isn't elided when unused.
- Context.insert(InstFakeUse::create(Func, Dest->getLo()));
- Context.insert(InstFakeUse::create(Func, Dest->getHi()));
- return;
- }
- InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
- lowerLoad(Load);
- // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
- // Since lowerLoad may fuse the load w/ an arithmetic instruction,
- // insert the FakeUse on the last-inserted instruction's dest.
- Context.insert(
- InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
- return;
- }
- case Intrinsics::AtomicRMW:
- if (!Intrinsics::isMemoryOrderValid(
- ID, getConstantMemoryOrder(Instr->getArg(3)))) {
- Func->setError("Unexpected memory ordering for AtomicRMW");
- return;
- }
- lowerAtomicRMW(
- Instr->getDest(),
- static_cast<uint32_t>(
- llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
- Instr->getArg(1), Instr->getArg(2));
- return;
- case Intrinsics::AtomicStore: {
- if (!Intrinsics::isMemoryOrderValid(
- ID, getConstantMemoryOrder(Instr->getArg(2)))) {
- Func->setError("Unexpected memory ordering for AtomicStore");
- return;
- }
- // We require the memory address to be naturally aligned.
- // Given that is the case, then normal stores are atomic.
- // Add a fence after the store to make it visible.
- Operand *Value = Instr->getArg(0);
- Operand *Ptr = Instr->getArg(1);
- if (Value->getType() == IceType_i64) {
- // Use a movq instead of what lowerStore() normally does
- // (split the store into two), following what GCC does.
- // Cast the bits from int -> to an xmm register first.
- Variable *T = makeReg(IceType_f64);
- InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
- lowerCast(Cast);
- // Then store XMM w/ a movq.
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64);
- _storeq(T, Addr);
- _mfence();
- return;
- }
- InstStore *Store = InstStore::create(Func, Value, Ptr);
- lowerStore(Store);
- _mfence();
- return;
- }
- case Intrinsics::Bswap: {
- Variable *Dest = Instr->getDest();
- Operand *Val = Instr->getArg(0);
- // In 32-bit mode, bswap only works on 32-bit arguments, and the
- // argument must be a register. Use rotate left for 16-bit bswap.
- if (Val->getType() == IceType_i64) {
- Variable *T_Lo = legalizeToVar(loOperand(Val));
- Variable *T_Hi = legalizeToVar(hiOperand(Val));
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- _bswap(T_Lo);
- _bswap(T_Hi);
- _mov(DestLo, T_Hi);
- _mov(DestHi, T_Lo);
- } else if (Val->getType() == IceType_i32) {
- Variable *T = legalizeToVar(Val);
- _bswap(T);
- _mov(Dest, T);
- } else {
- assert(Val->getType() == IceType_i16);
- Val = legalize(Val);
- Constant *Eight = Ctx->getConstantInt16(8);
- Variable *T = nullptr;
- _mov(T, Val);
- _rol(T, Eight);
- _mov(Dest, T);
- }
- return;
- }
- case Intrinsics::Ctpop: {
- Variable *Dest = Instr->getDest();
- Operand *Val = Instr->getArg(0);
- InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
- ? H_call_ctpop_i32
- : H_call_ctpop_i64,
- Dest, 1);
- Call->addArg(Val);
- lowerCall(Call);
- // The popcount helpers always return 32-bit values, while the intrinsic's
- // signature matches the native POPCNT instruction and fills a 64-bit reg
- // (in 64-bit mode). Thus, clear the upper bits of the dest just in case
- // the user doesn't do that in the IR. If the user does that in the IR,
- // then this zero'ing instruction is dead and gets optimized out.
- if (Val->getType() == IceType_i64) {
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- _mov(DestHi, Zero);
- }
- return;
- }
- case Intrinsics::Ctlz: {
- // The "is zero undef" parameter is ignored and we always return
- // a well-defined value.
- Operand *Val = legalize(Instr->getArg(0));
- Operand *FirstVal;
- Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
- FirstVal = loOperand(Val);
- SecondVal = hiOperand(Val);
- } else {
- FirstVal = Val;
- }
- const bool IsCttz = false;
- lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
- SecondVal);
- return;
- }
- case Intrinsics::Cttz: {
- // The "is zero undef" parameter is ignored and we always return
- // a well-defined value.
- Operand *Val = legalize(Instr->getArg(0));
- Operand *FirstVal;
- Operand *SecondVal = nullptr;
- if (Val->getType() == IceType_i64) {
- FirstVal = hiOperand(Val);
- SecondVal = loOperand(Val);
- } else {
- FirstVal = Val;
- }
- const bool IsCttz = true;
- lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
- SecondVal);
- return;
- }
- case Intrinsics::Fabs: {
- Operand *Src = legalize(Instr->getArg(0));
- Type Ty = Src->getType();
- Variable *Dest = Instr->getDest();
- Variable *T = makeVectorOfFabsMask(Ty);
- // The pand instruction operates on an m128 memory operand, so if
- // Src is an f32 or f64, we need to make sure it's in a register.
- if (isVectorType(Ty)) {
- if (llvm::isa<OperandX8632Mem>(Src))
- Src = legalizeToVar(Src);
- } else {
- Src = legalizeToVar(Src);
- }
- _pand(T, Src);
- if (isVectorType(Ty))
- _movp(Dest, T);
- else
- _mov(Dest, T);
- return;
- }
- case Intrinsics::Longjmp: {
- InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);
- Call->addArg(Instr->getArg(0));
- Call->addArg(Instr->getArg(1));
- lowerCall(Call);
- return;
- }
- case Intrinsics::Memcpy: {
- // In the future, we could potentially emit an inline memcpy/memset, etc.
- // for intrinsic calls w/ a known length.
- InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);
- Call->addArg(Instr->getArg(0));
- Call->addArg(Instr->getArg(1));
- Call->addArg(Instr->getArg(2));
- lowerCall(Call);
- return;
- }
- case Intrinsics::Memmove: {
- InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);
- Call->addArg(Instr->getArg(0));
- Call->addArg(Instr->getArg(1));
- Call->addArg(Instr->getArg(2));
- lowerCall(Call);
- return;
- }
- case Intrinsics::Memset: {
- // The value operand needs to be extended to a stack slot size
- // because the PNaCl ABI requires arguments to be at least 32 bits
- // wide.
- Operand *ValOp = Instr->getArg(1);
- assert(ValOp->getType() == IceType_i8);
- Variable *ValExt = Func->makeVariable(stackSlotType());
- lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
- InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
- Call->addArg(Instr->getArg(0));
- Call->addArg(ValExt);
- Call->addArg(Instr->getArg(2));
- lowerCall(Call);
- return;
- }
- case Intrinsics::NaClReadTP: {
- if (Ctx->getFlags().getUseSandboxing()) {
- Constant *Zero = Ctx->getConstantZero(IceType_i32);
- Operand *Src =
- OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,
- OperandX8632Mem::SegReg_GS);
- Variable *Dest = Instr->getDest();
- Variable *T = nullptr;
- _mov(T, Src);
- _mov(Dest, T);
- } else {
- InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
- lowerCall(Call);
- }
- return;
- }
- case Intrinsics::Setjmp: {
- InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);
- Call->addArg(Instr->getArg(0));
- lowerCall(Call);
- return;
- }
- case Intrinsics::Sqrt: {
- Operand *Src = legalize(Instr->getArg(0));
- Variable *Dest = Instr->getDest();
- Variable *T = makeReg(Dest->getType());
- _sqrtss(T, Src);
- _mov(Dest, T);
- return;
- }
- case Intrinsics::Stacksave: {
- Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
- Variable *Dest = Instr->getDest();
- _mov(Dest, esp);
- return;
- }
- case Intrinsics::Stackrestore: {
- Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
- _mov_nonkillable(esp, Instr->getArg(0));
- return;
- }
- case Intrinsics::Trap:
- _ud2();
- return;
- case Intrinsics::UnknownIntrinsic:
- Func->setError("Should not be lowering UnknownIntrinsic");
- return;
- }
- return;
-}
-
-void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
- Operand *Expected, Operand *Desired) {
- if (Expected->getType() == IceType_i64) {
- // Reserve the pre-colored registers first, before adding any more
- // infinite-weight variables from formMemoryOperand's legalization.
- Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
- Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
- Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
- Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
- _mov(T_eax, loOperand(Expected));
- _mov(T_edx, hiOperand(Expected));
- _mov(T_ebx, loOperand(Desired));
- _mov(T_ecx, hiOperand(Desired));
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
- const bool Locked = true;
- _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
- Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
- _mov(DestLo, T_eax);
- _mov(DestHi, T_edx);
- return;
- }
- Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
- _mov(T_eax, Expected);
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());
- Variable *DesiredReg = legalizeToVar(Desired);
- const bool Locked = true;
- _cmpxchg(Addr, T_eax, DesiredReg, Locked);
- _mov(DestPrev, T_eax);
-}
-
-bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
- Operand *Expected,
- Operand *Desired) {
- if (Ctx->getFlags().getOptLevel() == Opt_m1)
- return false;
- // Peek ahead a few instructions and see how Dest is used.
- // It's very common to have:
- //
- // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)
- // [%y_phi = ...] // list of phi stores
- // %p = icmp eq i32 %x, %expected
- // br i1 %p, label %l1, label %l2
- //
- // which we can optimize into:
- //
- // %x = <cmpxchg code>
- // [%y_phi = ...] // list of phi stores
- // br eq, %l1, %l2
- InstList::iterator I = Context.getCur();
- // I is currently the InstIntrinsicCall. Peek past that.
- // This assumes that the atomic cmpxchg has not been lowered yet,
- // so that the instructions seen in the scan from "Cur" is simple.
- assert(llvm::isa<InstIntrinsicCall>(*I));
- Inst *NextInst = Context.getNextInst(I);
- if (!NextInst)
- return false;
- // There might be phi assignments right before the compare+branch, since this
- // could be a backward branch for a loop. This placement of assignments is
- // determined by placePhiStores().
- std::vector<InstAssign *> PhiAssigns;
- while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {
- if (PhiAssign->getDest() == Dest)
- return false;
- PhiAssigns.push_back(PhiAssign);
- NextInst = Context.getNextInst(I);
- if (!NextInst)
- return false;
- }
- if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {
- if (!(NextCmp->getCondition() == InstIcmp::Eq &&
- ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) ||
- (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {
- return false;
- }
- NextInst = Context.getNextInst(I);
- if (!NextInst)
- return false;
- if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {
- if (!NextBr->isUnconditional() &&
- NextCmp->getDest() == NextBr->getCondition() &&
- NextBr->isLastUse(NextCmp->getDest())) {
- lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
- for (size_t i = 0; i < PhiAssigns.size(); ++i) {
- // Lower the phi assignments now, before the branch (same placement
- // as before).
- InstAssign *PhiAssign = PhiAssigns[i];
- PhiAssign->setDeleted();
- lowerAssign(PhiAssign);
- Context.advanceNext();
- }
- _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
- // Skip over the old compare and branch, by deleting them.
- NextCmp->setDeleted();
- NextBr->setDeleted();
- Context.advanceNext();
- Context.advanceNext();
- return true;
- }
- }
- }
- return false;
-}
-
-void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
- Operand *Ptr, Operand *Val) {
- bool NeedsCmpxchg = false;
- LowerBinOp Op_Lo = nullptr;
- LowerBinOp Op_Hi = nullptr;
- switch (Operation) {
- default:
- Func->setError("Unknown AtomicRMW operation");
- return;
- case Intrinsics::AtomicAdd: {
- if (Dest->getType() == IceType_i64) {
- // All the fall-through paths must set this to true, but use this
- // for asserting.
- NeedsCmpxchg = true;
- Op_Lo = &TargetX8632::_add;
- Op_Hi = &TargetX8632::_adc;
- break;
- }
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
- const bool Locked = true;
- Variable *T = nullptr;
- _mov(T, Val);
- _xadd(Addr, T, Locked);
- _mov(Dest, T);
- return;
- }
- case Intrinsics::AtomicSub: {
- if (Dest->getType() == IceType_i64) {
- NeedsCmpxchg = true;
- Op_Lo = &TargetX8632::_sub;
- Op_Hi = &TargetX8632::_sbb;
- break;
- }
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
- const bool Locked = true;
- Variable *T = nullptr;
- _mov(T, Val);
- _neg(T);
- _xadd(Addr, T, Locked);
- _mov(Dest, T);
- return;
- }
- case Intrinsics::AtomicOr:
- // TODO(jvoung): If Dest is null or dead, then some of these
- // operations do not need an "exchange", but just a locked op.
- // That appears to be "worth" it for sub, or, and, and xor.
- // xadd is probably fine vs lock add for add, and xchg is fine
- // vs an atomic store.
- NeedsCmpxchg = true;
- Op_Lo = &TargetX8632::_or;
- Op_Hi = &TargetX8632::_or;
- break;
- case Intrinsics::AtomicAnd:
- NeedsCmpxchg = true;
- Op_Lo = &TargetX8632::_and;
- Op_Hi = &TargetX8632::_and;
- break;
- case Intrinsics::AtomicXor:
- NeedsCmpxchg = true;
- Op_Lo = &TargetX8632::_xor;
- Op_Hi = &TargetX8632::_xor;
- break;
- case Intrinsics::AtomicExchange:
- if (Dest->getType() == IceType_i64) {
- NeedsCmpxchg = true;
- // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
- // just need to be moved to the ecx and ebx registers.
- Op_Lo = nullptr;
- Op_Hi = nullptr;
- break;
- }
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());
- Variable *T = nullptr;
- _mov(T, Val);
- _xchg(Addr, T);
- _mov(Dest, T);
- return;
- }
- // Otherwise, we need a cmpxchg loop.
- (void)NeedsCmpxchg;
- assert(NeedsCmpxchg);
- expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
-}
-
-void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,
- Variable *Dest, Operand *Ptr,
- Operand *Val) {
- // Expand a more complex RMW operation as a cmpxchg loop:
- // For 64-bit:
- // mov eax, [ptr]
- // mov edx, [ptr + 4]
- // .LABEL:
- // mov ebx, eax
- // <Op_Lo> ebx, <desired_adj_lo>
- // mov ecx, edx
- // <Op_Hi> ecx, <desired_adj_hi>
- // lock cmpxchg8b [ptr]
- // jne .LABEL
- // mov <dest_lo>, eax
- // mov <dest_lo>, edx
- //
- // For 32-bit:
- // mov eax, [ptr]
- // .LABEL:
- // mov <reg>, eax
- // op <reg>, [desired_adj]
- // lock cmpxchg [ptr], <reg>
- // jne .LABEL
- // mov <dest>, eax
- //
- // If Op_{Lo,Hi} are nullptr, then just copy the value.
- Val = legalize(Val);
- Type Ty = Val->getType();
- if (Ty == IceType_i64) {
- Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
- Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
- _mov(T_eax, loOperand(Addr));
- _mov(T_edx, hiOperand(Addr));
- Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
- Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
- if (!IsXchg8b) {
- Context.insert(Label);
- _mov(T_ebx, T_eax);
- (this->*Op_Lo)(T_ebx, loOperand(Val));
- _mov(T_ecx, T_edx);
- (this->*Op_Hi)(T_ecx, hiOperand(Val));
- } else {
- // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
- // It just needs the Val loaded into ebx and ecx.
- // That can also be done before the loop.
- _mov(T_ebx, loOperand(Val));
- _mov(T_ecx, hiOperand(Val));
- Context.insert(Label);
- }
- const bool Locked = true;
- _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
- _br(CondX86::Br_ne, Label);
- if (!IsXchg8b) {
- // If Val is a variable, model the extended live range of Val through
- // the end of the loop, since it will be re-used by the loop.
- if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
- Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
- Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
- Context.insert(InstFakeUse::create(Func, ValLo));
- Context.insert(InstFakeUse::create(Func, ValHi));
- }
- } else {
- // For xchg, the loop is slightly smaller and ebx/ecx are used.
- Context.insert(InstFakeUse::create(Func, T_ebx));
- Context.insert(InstFakeUse::create(Func, T_ecx));
- }
- // The address base (if any) is also reused in the loop.
- if (Variable *Base = Addr->getBase())
- Context.insert(InstFakeUse::create(Func, Base));
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- _mov(DestLo, T_eax);
- _mov(DestHi, T_edx);
- return;
- }
- OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);
- Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
- _mov(T_eax, Addr);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- Context.insert(Label);
- // We want to pick a different register for T than Eax, so don't use
- // _mov(T == nullptr, T_eax).
- Variable *T = makeReg(Ty);
- _mov(T, T_eax);
- (this->*Op_Lo)(T, Val);
- const bool Locked = true;
- _cmpxchg(Addr, T_eax, T, Locked);
- _br(CondX86::Br_ne, Label);
- // If Val is a variable, model the extended live range of Val through
- // the end of the loop, since it will be re-used by the loop.
- if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
- Context.insert(InstFakeUse::create(Func, ValVar));
- }
- // The address base (if any) is also reused in the loop.
- if (Variable *Base = Addr->getBase())
- Context.insert(InstFakeUse::create(Func, Base));
- _mov(Dest, T_eax);
-}
-
-// Lowers count {trailing, leading} zeros intrinsic.
-//
-// We could do constant folding here, but that should have
-// been done by the front-end/middle-end optimizations.
-void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
- Operand *FirstVal, Operand *SecondVal) {
- // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
- // Then the instructions will handle the Val == 0 case much more simply
- // and won't require conversion from bit position to number of zeros.
- //
- // Otherwise:
- // bsr IF_NOT_ZERO, Val
- // mov T_DEST, 63
- // cmovne T_DEST, IF_NOT_ZERO
- // xor T_DEST, 31
- // mov DEST, T_DEST
- //
- // NOTE: T_DEST must be a register because cmov requires its dest to be a
- // register. Also, bsf and bsr require their dest to be a register.
- //
- // The xor DEST, 31 converts a bit position to # of leading zeroes.
- // E.g., for 000... 00001100, bsr will say that the most significant bit
- // set is at position 3, while the number of leading zeros is 28. Xor is
- // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).
- //
- // Similar for 64-bit, but start w/ speculating that the upper 32 bits
- // are all zero, and compute the result for that case (checking the lower
- // 32 bits). Then actually compute the result for the upper bits and
- // cmov in the result from the lower computation if the earlier speculation
- // was correct.
- //
- // Cttz, is similar, but uses bsf instead, and doesn't require the xor
- // bit position conversion, and the speculation is reversed.
- assert(Ty == IceType_i32 || Ty == IceType_i64);
- Variable *T = makeReg(IceType_i32);
- Operand *FirstValRM = legalize(FirstVal, Legal_Mem | Legal_Reg);
- if (Cttz) {
- _bsf(T, FirstValRM);
- } else {
- _bsr(T, FirstValRM);
- }
- Variable *T_Dest = makeReg(IceType_i32);
- Constant *ThirtyTwo = Ctx->getConstantInt32(32);
- Constant *ThirtyOne = Ctx->getConstantInt32(31);
- if (Cttz) {
- _mov(T_Dest, ThirtyTwo);
- } else {
- Constant *SixtyThree = Ctx->getConstantInt32(63);
- _mov(T_Dest, SixtyThree);
- }
- _cmov(T_Dest, T, CondX86::Br_ne);
- if (!Cttz) {
- _xor(T_Dest, ThirtyOne);
- }
- if (Ty == IceType_i32) {
- _mov(Dest, T_Dest);
- return;
- }
- _add(T_Dest, ThirtyTwo);
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- // Will be using "test" on this, so we need a registerized variable.
- Variable *SecondVar = legalizeToVar(SecondVal);
- Variable *T_Dest2 = makeReg(IceType_i32);
- if (Cttz) {
- _bsf(T_Dest2, SecondVar);
- } else {
- _bsr(T_Dest2, SecondVar);
- _xor(T_Dest2, ThirtyOne);
- }
- _test(SecondVar, SecondVar);
- _cmov(T_Dest2, T_Dest, CondX86::Br_e);
- _mov(DestLo, T_Dest2);
- _mov(DestHi, Ctx->getConstantZero(IceType_i32));
-}
-
-namespace {
-
-bool isAdd(const Inst *Inst) {
- if (const InstArithmetic *Arith =
- llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
- return (Arith->getOp() == InstArithmetic::Add);
- }
- return false;
-}
-
-void dumpAddressOpt(const Cfg *Func, const Variable *Base,
- const Variable *Index, uint16_t Shift, int32_t Offset,
- const Inst *Reason) {
- if (!ALLOW_DUMP)
- return;
- if (!Func->isVerbose(IceV_AddrOpt))
- return;
- OstreamLocker L(Func->getContext());
- Ostream &Str = Func->getContext()->getStrDump();
- Str << "Instruction: ";
- Reason->dumpDecorated(Func);
- Str << " results in Base=";
- if (Base)
- Base->dump(Func);
- else
- Str << "<null>";
- Str << ", Index=";
- if (Index)
- Index->dump(Func);
- else
- Str << "<null>";
- Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
-}
-
-bool matchTransitiveAssign(const VariablesMetadata *VMetadata, Variable *&Var,
- const Inst *&Reason) {
- // Var originates from Var=SrcVar ==>
- // set Var:=SrcVar
- if (Var == nullptr)
- return false;
- if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
- assert(!VMetadata->isMultiDef(Var));
- if (llvm::isa<InstAssign>(VarAssign)) {
- Operand *SrcOp = VarAssign->getSrc(0);
- assert(SrcOp);
- if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
- if (!VMetadata->isMultiDef(SrcVar) &&
- // TODO: ensure SrcVar stays single-BB
- true) {
- Var = SrcVar;
- Reason = VarAssign;
- return true;
- }
- }
- }
- }
- return false;
-}
-
-bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable *&Base,
- Variable *&Index, uint16_t &Shift,
- const Inst *&Reason) {
- // Index==nullptr && Base is Base=Var1+Var2 ==>
- // set Base=Var1, Index=Var2, Shift=0
- if (Base == nullptr)
- return false;
- if (Index != nullptr)
- return false;
- const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
- if (BaseInst == nullptr)
- return false;
- assert(!VMetadata->isMultiDef(Base));
- if (BaseInst->getSrcSize() < 2)
- return false;
- if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {
- if (VMetadata->isMultiDef(Var1))
- return false;
- if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {
- if (VMetadata->isMultiDef(Var2))
- return false;
- if (isAdd(BaseInst) &&
- // TODO: ensure Var1 and Var2 stay single-BB
- true) {
- Base = Var1;
- Index = Var2;
- Shift = 0; // should already have been 0
- Reason = BaseInst;
- return true;
- }
- }
- }
- return false;
-}
-
-bool matchShiftedIndex(const VariablesMetadata *VMetadata, Variable *&Index,
- uint16_t &Shift, const Inst *&Reason) {
- // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>
- // Index=Var, Shift+=log2(Const)
- if (Index == nullptr)
- return false;
- const Inst *IndexInst = VMetadata->getSingleDefinition(Index);
- if (IndexInst == nullptr)
- return false;
- assert(!VMetadata->isMultiDef(Index));
- if (IndexInst->getSrcSize() < 2)
- return false;
- if (const InstArithmetic *ArithInst =
- llvm::dyn_cast<InstArithmetic>(IndexInst)) {
- if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
- if (ConstantInteger32 *Const =
- llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {
- if (ArithInst->getOp() == InstArithmetic::Mul &&
- !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {
- uint64_t Mult = Const->getValue();
- uint32_t LogMult;
- switch (Mult) {
- case 1:
- LogMult = 0;
- break;
- case 2:
- LogMult = 1;
- break;
- case 4:
- LogMult = 2;
- break;
- case 8:
- LogMult = 3;
- break;
- default:
- return false;
- }
- if (Shift + LogMult <= 3) {
- Index = Var;
- Shift += LogMult;
- Reason = IndexInst;
- return true;
- }
- }
- }
- }
- }
- return false;
-}
-
-bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable *&Base,
- int32_t &Offset, const Inst *&Reason) {
- // Base is Base=Var+Const || Base is Base=Const+Var ==>
- // set Base=Var, Offset+=Const
- // Base is Base=Var-Const ==>
- // set Base=Var, Offset-=Const
- if (Base == nullptr)
- return false;
- const Inst *BaseInst = VMetadata->getSingleDefinition(Base);
- if (BaseInst == nullptr)
- return false;
- assert(!VMetadata->isMultiDef(Base));
- if (const InstArithmetic *ArithInst =
- llvm::dyn_cast<const InstArithmetic>(BaseInst)) {
- if (ArithInst->getOp() != InstArithmetic::Add &&
- ArithInst->getOp() != InstArithmetic::Sub)
- return false;
- bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;
- Variable *Var = nullptr;
- ConstantInteger32 *Const = nullptr;
- if (Variable *VariableOperand =
- llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {
- Var = VariableOperand;
- Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
- } else if (IsAdd) {
- Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));
- Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));
- }
- if (Var == nullptr || Const == nullptr || VMetadata->isMultiDef(Var))
- return false;
- int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();
- if (Utils::WouldOverflowAdd(Offset, MoreOffset))
- return false;
- Base = Var;
- Offset += MoreOffset;
- Reason = BaseInst;
- return true;
- }
- return false;
-}
-
-void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,
- Variable *&Index, uint16_t &Shift, int32_t &Offset) {
- Func->resetCurrentNode();
- if (Func->isVerbose(IceV_AddrOpt)) {
- OstreamLocker L(Func->getContext());
- Ostream &Str = Func->getContext()->getStrDump();
- Str << "\nStarting computeAddressOpt for instruction:\n ";
- Instr->dumpDecorated(Func);
- }
- (void)Offset; // TODO: pattern-match for non-zero offsets.
- if (Base == nullptr)
- return;
- // If the Base has more than one use or is live across multiple
- // blocks, then don't go further. Alternatively (?), never consider
- // a transformation that would change a variable that is currently
- // *not* live across basic block boundaries into one that *is*.
- if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
- return;
-
- const VariablesMetadata *VMetadata = Func->getVMetadata();
- bool Continue = true;
- while (Continue) {
- const Inst *Reason = nullptr;
- if (matchTransitiveAssign(VMetadata, Base, Reason) ||
- matchTransitiveAssign(VMetadata, Index, Reason) ||
- matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) ||
- matchShiftedIndex(VMetadata, Index, Shift, Reason) ||
- matchOffsetBase(VMetadata, Base, Offset, Reason)) {
- dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);
- } else {
- Continue = false;
- }
-
- // Index is Index=Var<<Const && Const+Shift<=3 ==>
- // Index=Var, Shift+=Const
-
- // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>
- // Index=Var, Shift+=log2(Const)
-
- // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>
- // swap(Index,Base)
- // Similar for Base=Const*Var and Base=Var<<Const
-
- // Index is Index=Var+Const ==>
- // set Index=Var, Offset+=(Const<<Shift)
-
- // Index is Index=Const+Var ==>
- // set Index=Var, Offset+=(Const<<Shift)
-
- // Index is Index=Var-Const ==>
- // set Index=Var, Offset-=(Const<<Shift)
-
- // TODO: consider overflow issues with respect to Offset.
- // TODO: handle symbolic constants.
- }
-}
-
-} // anonymous namespace
-
-void TargetX8632::lowerLoad(const InstLoad *Load) {
- // A Load instruction can be treated the same as an Assign
- // instruction, after the source operand is transformed into an
- // OperandX8632Mem operand. Note that the address mode
- // optimization already creates an OperandX8632Mem operand, so it
- // doesn't need another level of transformation.
- Variable *DestLoad = Load->getDest();
- Type Ty = DestLoad->getType();
- Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
- InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
- lowerAssign(Assign);
-}
-
-void TargetX8632::doAddressOptLoad() {
- Inst *Inst = Context.getCur();
- Variable *Dest = Inst->getDest();
- Operand *Addr = Inst->getSrc(0);
- Variable *Index = nullptr;
- uint16_t Shift = 0;
- int32_t Offset = 0; // TODO: make Constant
- // Vanilla ICE load instructions should not use the segment registers,
- // and computeAddressOpt only works at the level of Variables and Constants,
- // not other OperandX8632Mem, so there should be no mention of segment
- // registers there either.
- const OperandX8632Mem::SegmentRegisters SegmentReg =
- OperandX8632Mem::DefaultSegment;
- Variable *Base = llvm::dyn_cast<Variable>(Addr);
- computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
- if (Base && Addr != Base) {
- Inst->setDeleted();
- Constant *OffsetOp = Ctx->getConstantInt32(Offset);
- Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
- Shift, SegmentReg);
- Context.insert(InstLoad::create(Func, Dest, Addr));
- }
-}
-
-void TargetX8632::randomlyInsertNop(float Probability) {
- RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
- if (RNG.getTrueWithProbability(Probability)) {
- _nop(RNG(X86_NUM_NOP_VARIANTS));
- }
-}
-
-void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
- Func->setError("Phi found in regular instruction list");
-}
-
-void TargetX8632::lowerRet(const InstRet *Inst) {
- Variable *Reg = nullptr;
- if (Inst->hasRetValue()) {
- Operand *Src0 = legalize(Inst->getRetValue());
- if (Src0->getType() == IceType_i64) {
- Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);
- Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);
- Reg = eax;
- Context.insert(InstFakeUse::create(Func, edx));
- } else if (isScalarFloatingType(Src0->getType())) {
- _fld(Src0);
- } else if (isVectorType(Src0->getType())) {
- Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);
- } else {
- _mov(Reg, Src0, RegX8632::Reg_eax);
- }
- }
- // Add a ret instruction even if sandboxing is enabled, because
- // addEpilog explicitly looks for a ret instruction as a marker for
- // where to insert the frame removal instructions.
- _ret(Reg);
- // Add a fake use of esp to make sure esp stays alive for the entire
- // function. Otherwise post-call esp adjustments get dead-code
- // eliminated. TODO: Are there more places where the fake use
- // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
- // have a ret instruction.
- Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
- Context.insert(InstFakeUse::create(Func, esp));
-}
-
-void TargetX8632::lowerSelect(const InstSelect *Inst) {
- Variable *Dest = Inst->getDest();
- Type DestTy = Dest->getType();
- Operand *SrcT = Inst->getTrueOperand();
- Operand *SrcF = Inst->getFalseOperand();
- Operand *Condition = Inst->getCondition();
-
- if (isVectorType(DestTy)) {
- Type SrcTy = SrcT->getType();
- Variable *T = makeReg(SrcTy);
- Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
- Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
- if (InstructionSet >= SSE4_1) {
- // TODO(wala): If the condition operand is a constant, use blendps
- // or pblendw.
- //
- // Use blendvps or pblendvb to implement select.
- if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
- SrcTy == IceType_v4f32) {
- Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
- Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
- _movp(xmm0, ConditionRM);
- _psll(xmm0, Ctx->getConstantInt8(31));
- _movp(T, SrcFRM);
- _blendvps(T, SrcTRM, xmm0);
- _movp(Dest, T);
- } else {
- assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
- Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
- : IceType_v16i8;
- Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
- lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
- _movp(T, SrcFRM);
- _pblendvb(T, SrcTRM, xmm0);
- _movp(Dest, T);
- }
- return;
- }
- // Lower select without SSE4.1:
- // a=d?b:c ==>
- // if elementtype(d) != i1:
- // d=sext(d);
- // a=(b&d)|(c&~d);
- Variable *T2 = makeReg(SrcTy);
- // Sign extend the condition operand if applicable.
- if (SrcTy == IceType_v4f32) {
- // The sext operation takes only integer arguments.
- Variable *T3 = Func->makeVariable(IceType_v4i32);
- lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
- _movp(T, T3);
- } else if (typeElementType(SrcTy) != IceType_i1) {
- lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
- } else {
- Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
- _movp(T, ConditionRM);
- }
- _movp(T2, T);
- _pand(T, SrcTRM);
- _pandn(T2, SrcFRM);
- _por(T, T2);
- _movp(Dest, T);
-
- return;
- }
-
- CondX86::BrCond Cond = CondX86::Br_ne;
- Operand *CmpOpnd0 = nullptr;
- Operand *CmpOpnd1 = nullptr;
- // Handle folding opportunities.
- if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {
- assert(Producer->isDeleted());
- switch (BoolFolding::getProducerKind(Producer)) {
- default:
- break;
- case BoolFolding::PK_Icmp32: {
- auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
- Cond = getIcmp32Mapping(Cmp->getCondition());
- CmpOpnd1 = legalize(Producer->getSrc(1));
- CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);
- } break;
- }
- }
- if (CmpOpnd0 == nullptr) {
- CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
- CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
- }
- assert(CmpOpnd0);
- assert(CmpOpnd1);
-
- _cmp(CmpOpnd0, CmpOpnd1);
- if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
- // The cmov instruction doesn't allow 8-bit or FP operands, so
- // we need explicit control flow.
- // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
- _mov(Dest, SrcT);
- _br(Cond, Label);
- SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
- _mov_nonkillable(Dest, SrcF);
- Context.insert(Label);
- return;
- }
- // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
- // But if SrcT is immediate, we might be able to do better, as
- // the cmov instruction doesn't allow an immediate operand:
- // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
- if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
- std::swap(SrcT, SrcF);
- Cond = InstX8632::getOppositeCondition(Cond);
- }
- if (DestTy == IceType_i64) {
- // Set the low portion.
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *TLo = nullptr;
- Operand *SrcFLo = legalize(loOperand(SrcF));
- _mov(TLo, SrcFLo);
- Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
- _cmov(TLo, SrcTLo, Cond);
- _mov(DestLo, TLo);
- // Set the high portion.
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- Variable *THi = nullptr;
- Operand *SrcFHi = legalize(hiOperand(SrcF));
- _mov(THi, SrcFHi);
- Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg | Legal_Mem);
- _cmov(THi, SrcTHi, Cond);
- _mov(DestHi, THi);
- return;
- }
-
- assert(DestTy == IceType_i16 || DestTy == IceType_i32);
- Variable *T = nullptr;
- SrcF = legalize(SrcF);
- _mov(T, SrcF);
- SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
- _cmov(T, SrcT, Cond);
- _mov(Dest, T);
-}
-
-void TargetX8632::lowerStore(const InstStore *Inst) {
- Operand *Value = Inst->getData();
- Operand *Addr = Inst->getAddr();
- OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
- Type Ty = NewAddr->getType();
-
- if (Ty == IceType_i64) {
- Value = legalize(Value);
- Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
- Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
- _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
- _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
- } else if (isVectorType(Ty)) {
- _storep(legalizeToVar(Value), NewAddr);
- } else {
- Value = legalize(Value, Legal_Reg | Legal_Imm);
- _store(Value, NewAddr);
- }
-}
-
-void TargetX8632::doAddressOptStore() {
- InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
- Operand *Data = Inst->getData();
- Operand *Addr = Inst->getAddr();
- Variable *Index = nullptr;
- uint16_t Shift = 0;
- int32_t Offset = 0; // TODO: make Constant
- Variable *Base = llvm::dyn_cast<Variable>(Addr);
- // Vanilla ICE store instructions should not use the segment registers,
- // and computeAddressOpt only works at the level of Variables and Constants,
- // not other OperandX8632Mem, so there should be no mention of segment
- // registers there either.
- const OperandX8632Mem::SegmentRegisters SegmentReg =
- OperandX8632Mem::DefaultSegment;
- computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
- if (Base && Addr != Base) {
- Inst->setDeleted();
- Constant *OffsetOp = Ctx->getConstantInt32(Offset);
- Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
- Shift, SegmentReg);
- InstStore *NewStore = InstStore::create(Func, Data, Addr);
- if (Inst->getDest())
- NewStore->setRmwBeacon(Inst->getRmwBeacon());
- Context.insert(NewStore);
- }
-}
-
-void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
- // This implements the most naive possible lowering.
- // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
- Operand *Src0 = Inst->getComparison();
- SizeT NumCases = Inst->getNumCases();
- if (Src0->getType() == IceType_i64) {
- Src0 = legalize(Src0); // get Base/Index into physical registers
- Operand *Src0Lo = loOperand(Src0);
- Operand *Src0Hi = hiOperand(Src0);
- if (NumCases >= 2) {
- Src0Lo = legalizeToVar(Src0Lo);
- Src0Hi = legalizeToVar(Src0Hi);
- } else {
- Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
- Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
- }
- for (SizeT I = 0; I < NumCases; ++I) {
- Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
- Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
- InstX8632Label *Label = InstX8632Label::create(Func, this);
- _cmp(Src0Lo, ValueLo);
- _br(CondX86::Br_ne, Label);
- _cmp(Src0Hi, ValueHi);
- _br(CondX86::Br_e, Inst->getLabel(I));
- Context.insert(Label);
- }
- _br(Inst->getLabelDefault());
- return;
- }
- // OK, we'll be slightly less naive by forcing Src into a physical
- // register if there are 2 or more uses.
- if (NumCases >= 2)
- Src0 = legalizeToVar(Src0);
- else
- Src0 = legalize(Src0, Legal_Reg | Legal_Mem);
- for (SizeT I = 0; I < NumCases; ++I) {
- Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));
- _cmp(Src0, Value);
- _br(CondX86::Br_e, Inst->getLabel(I));
- }
-
- _br(Inst->getLabelDefault());
-}
-
-void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
- Variable *Dest, Operand *Src0,
- Operand *Src1) {
- assert(isVectorType(Dest->getType()));
- Type Ty = Dest->getType();
- Type ElementTy = typeElementType(Ty);
- SizeT NumElements = typeNumElements(Ty);
-
- Operand *T = Ctx->getConstantUndef(Ty);
- for (SizeT I = 0; I < NumElements; ++I) {
- Constant *Index = Ctx->getConstantInt32(I);
-
- // Extract the next two inputs.
- Variable *Op0 = Func->makeVariable(ElementTy);
- lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));
- Variable *Op1 = Func->makeVariable(ElementTy);
- lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));
-
- // Perform the arithmetic as a scalar operation.
- Variable *Res = Func->makeVariable(ElementTy);
- lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));
-
- // Insert the result into position.
- Variable *DestT = Func->makeVariable(Ty);
- lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
- T = DestT;
- }
-
- lowerAssign(InstAssign::create(Func, Dest, T));
-}
-
-// The following pattern occurs often in lowered C and C++ code:
-//
-// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
-// %cmp.ext = sext <n x i1> %cmp to <n x ty>
-//
-// We can eliminate the sext operation by copying the result of pcmpeqd,
-// pcmpgtd, or cmpps (which produce sign extended results) to the result
-// of the sext operation.
-void TargetX8632::eliminateNextVectorSextInstruction(
- Variable *SignExtendedResult) {
- if (InstCast *NextCast =
- llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
- if (NextCast->getCastKind() == InstCast::Sext &&
- NextCast->getSrc(0) == SignExtendedResult) {
- NextCast->setDeleted();
- _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
- // Skip over the instruction.
- Context.advanceNext();
- }
- }
-}
-
-void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { _ud2(); }
-
-void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {
- // If the beacon variable's live range does not end in this
- // instruction, then it must end in the modified Store instruction
- // that follows. This means that the original Store instruction is
- // still there, either because the value being stored is used beyond
- // the Store instruction, or because dead code elimination did not
- // happen. In either case, we cancel RMW lowering (and the caller
- // deletes the RMW instruction).
- if (!RMW->isLastUse(RMW->getBeacon()))
- return;
- Operand *Src = RMW->getData();
- Type Ty = Src->getType();
- OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
- if (Ty == IceType_i64) {
- Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
- Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
- OperandX8632Mem *AddrLo = llvm::cast<OperandX8632Mem>(loOperand(Addr));
- OperandX8632Mem *AddrHi = llvm::cast<OperandX8632Mem>(hiOperand(Addr));
- switch (RMW->getOp()) {
- default:
- // TODO(stichnot): Implement other arithmetic operators.
- break;
- case InstArithmetic::Add:
- _add_rmw(AddrLo, SrcLo);
- _adc_rmw(AddrHi, SrcHi);
- return;
- case InstArithmetic::Sub:
- _sub_rmw(AddrLo, SrcLo);
- _sbb_rmw(AddrHi, SrcHi);
- return;
- case InstArithmetic::And:
- _and_rmw(AddrLo, SrcLo);
- _and_rmw(AddrHi, SrcHi);
- return;
- case InstArithmetic::Or:
- _or_rmw(AddrLo, SrcLo);
- _or_rmw(AddrHi, SrcHi);
- return;
- case InstArithmetic::Xor:
- _xor_rmw(AddrLo, SrcLo);
- _xor_rmw(AddrHi, SrcHi);
- return;
- }
- } else {
- // i8, i16, i32
- switch (RMW->getOp()) {
- default:
- // TODO(stichnot): Implement other arithmetic operators.
- break;
- case InstArithmetic::Add:
- Src = legalize(Src, Legal_Reg | Legal_Imm);
- _add_rmw(Addr, Src);
- return;
- case InstArithmetic::Sub:
- Src = legalize(Src, Legal_Reg | Legal_Imm);
- _sub_rmw(Addr, Src);
- return;
- case InstArithmetic::And:
- Src = legalize(Src, Legal_Reg | Legal_Imm);
- _and_rmw(Addr, Src);
- return;
- case InstArithmetic::Or:
- Src = legalize(Src, Legal_Reg | Legal_Imm);
- _or_rmw(Addr, Src);
- return;
- case InstArithmetic::Xor:
- Src = legalize(Src, Legal_Reg | Legal_Imm);
- _xor_rmw(Addr, Src);
- return;
- }
- }
- llvm::report_fatal_error("Couldn't lower RMW instruction");
-}
-
-void TargetX8632::lowerOther(const Inst *Instr) {
- if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
- lowerRMW(RMW);
- } else {
- TargetLowering::lowerOther(Instr);
- }
-}
-
-// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
-// preserve integrity of liveness analysis. Undef values are also
-// turned into zeroes, since loOperand() and hiOperand() don't expect
-// Undef input.
-void TargetX8632::prelowerPhis() {
- // Pause constant blinding or pooling, blinding or pooling will be done later
- // during phi lowering assignments
- BoolFlagSaver B(RandomizationPoolingPaused, true);
-
- CfgNode *Node = Context.getNode();
- for (Inst &I : Node->getPhis()) {
- auto Phi = llvm::dyn_cast<InstPhi>(&I);
- if (Phi->isDeleted())
- continue;
- Variable *Dest = Phi->getDest();
- if (Dest->getType() == IceType_i64) {
- Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
- Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
- InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);
- InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);
- for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
- Operand *Src = Phi->getSrc(I);
- CfgNode *Label = Phi->getLabel(I);
- if (llvm::isa<ConstantUndef>(Src))
- Src = Ctx->getConstantZero(Dest->getType());
- PhiLo->addArgument(loOperand(Src), Label);
- PhiHi->addArgument(hiOperand(Src), Label);
- }
- Node->getPhis().push_back(PhiLo);
- Node->getPhis().push_back(PhiHi);
- Phi->setDeleted();
- }
- }
-}
-
-namespace {
-
-bool isMemoryOperand(const Operand *Opnd) {
- if (const auto Var = llvm::dyn_cast<Variable>(Opnd))
- return !Var->hasReg();
- // We treat vector undef values the same as a memory operand,
- // because they do in fact need a register to materialize the vector
- // of zeroes into.
- if (llvm::isa<ConstantUndef>(Opnd))
- return isScalarFloatingType(Opnd->getType()) ||
- isVectorType(Opnd->getType());
- if (llvm::isa<Constant>(Opnd))
- return isScalarFloatingType(Opnd->getType());
- return true;
-}
-
-} // end of anonymous namespace
-
-// Lower the pre-ordered list of assignments into mov instructions.
-// Also has to do some ad-hoc register allocation as necessary.
-void TargetX8632::lowerPhiAssignments(CfgNode *Node,
- const AssignList &Assignments) {
- // Check that this is a properly initialized shell of a node.
- assert(Node->getOutEdges().size() == 1);
- assert(Node->getInsts().empty());
- assert(Node->getPhis().empty());
- CfgNode *Succ = Node->getOutEdges().front();
- getContext().init(Node);
- // Register set setup similar to regAlloc().
- RegSetMask RegInclude = RegSet_All;
- RegSetMask RegExclude = RegSet_StackPointer;
- if (hasFramePointer())
- RegExclude |= RegSet_FramePointer;
- llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);
- bool NeedsRegs = false;
- // Initialize the set of available registers to the set of what is
- // available (not live) at the beginning of the successor block,
- // minus all registers used as Dest operands in the Assignments. To
- // do this, we start off assuming all registers are available, then
- // iterate through the Assignments and remove Dest registers.
- // During this iteration, we also determine whether we will actually
- // need any extra registers for memory-to-memory copies. If so, we
- // do the actual work of removing the live-in registers from the
- // set. TODO(stichnot): This work is being repeated for every split
- // edge to the successor, so consider updating LiveIn just once
- // after all the edges are split.
- for (const Inst &I : Assignments) {
- Variable *Dest = I.getDest();
- if (Dest->hasReg()) {
- Available[Dest->getRegNum()] = false;
- } else if (isMemoryOperand(I.getSrc(0))) {
- NeedsRegs = true; // Src and Dest are both in memory
- }
- }
- if (NeedsRegs) {
- LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);
- for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {
- Variable *Var = Func->getLiveness()->getVariable(i, Succ);
- if (Var->hasReg())
- Available[Var->getRegNum()] = false;
- }
- }
- // Iterate backwards through the Assignments. After lowering each
- // assignment, add Dest to the set of available registers, and
- // remove Src from the set of available registers. Iteration is
- // done backwards to enable incremental updates of the available
- // register set, and the lowered instruction numbers may be out of
- // order, but that can be worked around by renumbering the block
- // afterwards if necessary.
- for (const Inst &I : reverse_range(Assignments)) {
- Context.rewind();
- auto Assign = llvm::dyn_cast<InstAssign>(&I);
- Variable *Dest = Assign->getDest();
-
- // If the source operand is ConstantUndef, do not legalize it.
- // In function test_split_undef_int_vec, the advanced phi
- // lowering process will find an assignment of undefined
- // vector. This vector, as the Src here, will crash if it
- // go through legalize(). legalize() will create new variable
- // with makeVectorOfZeros(), but this new variable will be
- // assigned a stack slot. This will fail the assertion in
- // IceInstX8632.cpp:789, as XmmEmitterRegOp() complain:
- // Var->hasReg() fails. Note this failure is irrelevant to
- // randomization or pooling of constants.
- // So, we do not call legalize() to add pool label for the
- // src operands of phi assignment instructions.
- // Instead, we manually add pool label for constant float and
- // constant double values here.
- // Note going through legalize() does not affect the testing
- // results of SPEC2K and xtests.
- Operand *Src = Assign->getSrc(0);
- if (!llvm::isa<ConstantUndef>(Assign->getSrc(0))) {
- Src = legalize(Src);
- }
-
- Variable *SrcVar = llvm::dyn_cast<Variable>(Src);
- // Use normal assignment lowering, except lower mem=mem specially
- // so we can register-allocate at the same time.
- if (!isMemoryOperand(Dest) || !isMemoryOperand(Src)) {
- lowerAssign(Assign);
- } else {
- assert(Dest->getType() == Src->getType());
- const llvm::SmallBitVector &RegsForType =
- getRegisterSetForType(Dest->getType());
- llvm::SmallBitVector AvailRegsForType = RegsForType & Available;
- Variable *SpillLoc = nullptr;
- Variable *Preg = nullptr;
- // TODO(stichnot): Opportunity for register randomization.
- int32_t RegNum = AvailRegsForType.find_first();
- bool IsVector = isVectorType(Dest->getType());
- bool NeedSpill = (RegNum == -1);
- if (NeedSpill) {
- // Pick some register to spill and update RegNum.
- // TODO(stichnot): Opportunity for register randomization.
- RegNum = RegsForType.find_first();
- Preg = getPhysicalRegister(RegNum, Dest->getType());
- SpillLoc = Func->makeVariable(Dest->getType());
- // Create a fake def of the physical register to avoid
- // liveness inconsistency problems during late-stage liveness
- // analysis (e.g. asm-verbose mode).
- Context.insert(InstFakeDef::create(Func, Preg));
- if (IsVector)
- _movp(SpillLoc, Preg);
- else
- _mov(SpillLoc, Preg);
- }
- assert(RegNum >= 0);
- if (llvm::isa<ConstantUndef>(Src))
- // Materialize an actual constant instead of undef. RegNum is
- // passed in for vector types because undef vectors are
- // lowered to vector register of zeroes.
- Src =
- legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);
- Variable *Tmp = makeReg(Dest->getType(), RegNum);
- if (IsVector) {
- _movp(Tmp, Src);
- _movp(Dest, Tmp);
- } else {
- _mov(Tmp, Src);
- _mov(Dest, Tmp);
- }
- if (NeedSpill) {
- // Restore the spilled register.
- if (IsVector)
- _movp(Preg, SpillLoc);
- else
- _mov(Preg, SpillLoc);
- // Create a fake use of the physical register to keep it live
- // for late-stage liveness analysis (e.g. asm-verbose mode).
- Context.insert(InstFakeUse::create(Func, Preg));
- }
- }
- // Update register availability before moving to the previous
- // instruction on the Assignments list.
- if (Dest->hasReg())
- Available[Dest->getRegNum()] = true;
- if (SrcVar && SrcVar->hasReg())
- Available[SrcVar->getRegNum()] = false;
- }
-
- // Add the terminator branch instruction to the end.
- Context.setInsertPoint(Context.getEnd());
- _br(Succ);
-}
-
-// There is no support for loading or emitting vector constants, so the
-// vector values returned from makeVectorOfZeros, makeVectorOfOnes,
-// etc. are initialized with register operations.
+//===----------------------------------------------------------------------===//
//
-// TODO(wala): Add limited support for vector constants so that
-// complex initialization in registers is unnecessary.
-
-Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {
- Variable *Reg = makeReg(Ty, RegNum);
- // Insert a FakeDef, since otherwise the live range of Reg might
- // be overestimated.
- Context.insert(InstFakeDef::create(Func, Reg));
- _pxor(Reg, Reg);
- return Reg;
-}
-
-Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {
- Variable *MinusOnes = makeReg(Ty, RegNum);
- // Insert a FakeDef so the live range of MinusOnes is not overestimated.
- Context.insert(InstFakeDef::create(Func, MinusOnes));
- _pcmpeq(MinusOnes, MinusOnes);
- return MinusOnes;
-}
-
-Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
- Variable *Dest = makeVectorOfZeros(Ty, RegNum);
- Variable *MinusOne = makeVectorOfMinusOnes(Ty);
- _psub(Dest, MinusOne);
- return Dest;
-}
-
-Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {
- assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
- Ty == IceType_v16i8);
- if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
- Variable *Reg = makeVectorOfOnes(Ty, RegNum);
- SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;
- _psll(Reg, Ctx->getConstantInt8(Shift));
- return Reg;
- } else {
- // SSE has no left shift operation for vectors of 8 bit integers.
- const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
- Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
- Variable *Reg = makeReg(Ty, RegNum);
- _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
- _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
- return Reg;
- }
-}
-
-// Construct a mask in a register that can be and'ed with a
-// floating-point value to mask off its sign bit. The value will be
-// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
-// for f64. Construct it as vector of ones logically right shifted
-// one bit. TODO(stichnot): Fix the wala TODO above, to represent
-// vector constants in memory.
-Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) {
- Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
- _psrl(Reg, Ctx->getConstantInt8(1));
- return Reg;
-}
-
-OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
- Variable *Slot,
- uint32_t Offset) {
- // Ensure that Loc is a stack slot.
- assert(Slot->getWeight().isZero());
- assert(Slot->getRegNum() == Variable::NoRegister);
- // Compute the location of Loc in memory.
- // TODO(wala,stichnot): lea should not be required. The address of
- // the stack slot is known at compile time (although not until after
- // addProlog()).
- const Type PointerType = IceType_i32;
- Variable *Loc = makeReg(PointerType);
- _lea(Loc, Slot);
- Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
- return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
-}
-
-// Helper for legalize() to emit the right code to lower an operand to a
-// register of the appropriate type.
-Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
- Type Ty = Src->getType();
- Variable *Reg = makeReg(Ty, RegNum);
- if (isVectorType(Ty)) {
- _movp(Reg, Src);
- } else {
- _mov(Reg, Src);
- }
- return Reg;
-}
-
-Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
- int32_t RegNum) {
- Type Ty = From->getType();
- // Assert that a physical register is allowed. To date, all calls
- // to legalize() allow a physical register. If a physical register
- // needs to be explicitly disallowed, then new code will need to be
- // written to force a spill.
- assert(Allowed & Legal_Reg);
- // If we're asking for a specific physical register, make sure we're
- // not allowing any other operand kinds. (This could be future
- // work, e.g. allow the shl shift amount to be either an immediate
- // or in ecx.)
- assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
-
- if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
- // Before doing anything with a Mem operand, we need to ensure
- // that the Base and Index components are in physical registers.
- Variable *Base = Mem->getBase();
- Variable *Index = Mem->getIndex();
- Variable *RegBase = nullptr;
- Variable *RegIndex = nullptr;
- if (Base) {
- RegBase = legalizeToVar(Base);
- }
- if (Index) {
- RegIndex = legalizeToVar(Index);
- }
- if (Base != RegBase || Index != RegIndex) {
- Mem =
- OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,
- Mem->getShift(), Mem->getSegmentRegister());
- }
-
- // For all Memory Operands, we do randomization/pooling here
- From = randomizeOrPoolImmediate(Mem);
-
- if (!(Allowed & Legal_Mem)) {
- From = copyToReg(From, RegNum);
- }
- return From;
- }
- if (auto *Const = llvm::dyn_cast<Constant>(From)) {
- if (llvm::isa<ConstantUndef>(Const)) {
- // Lower undefs to zero. Another option is to lower undefs to an
- // uninitialized register; however, using an uninitialized register
- // results in less predictable code.
- //
- // If in the future the implementation is changed to lower undef
- // values to uninitialized registers, a FakeDef will be needed:
- // Context.insert(InstFakeDef::create(Func, Reg));
- // This is in order to ensure that the live range of Reg is not
- // overestimated. If the constant being lowered is a 64 bit value,
- // then the result should be split and the lo and hi components will
- // need to go in uninitialized registers.
- if (isVectorType(Ty))
- return makeVectorOfZeros(Ty, RegNum);
- Const = Ctx->getConstantZero(Ty);
- From = Const;
- }
- // There should be no constants of vector type (other than undef).
- assert(!isVectorType(Ty));
-
- // If the operand is an 32 bit constant integer, we should check
- // whether we need to randomize it or pool it.
- if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
- Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
- if (NewConst != Const) {
- return NewConst;
- }
- }
-
- // Convert a scalar floating point constant into an explicit
- // memory operand.
- if (isScalarFloatingType(Ty)) {
- Variable *Base = nullptr;
- std::string Buffer;
- llvm::raw_string_ostream StrBuf(Buffer);
- llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
- llvm::cast<Constant>(From)->setShouldBePooled(true);
- Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
- From = OperandX8632Mem::create(Func, Ty, Base, Offset);
- }
- bool NeedsReg = false;
- if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
- // Immediate specifically not allowed
- NeedsReg = true;
- if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
- // On x86, FP constants are lowered to mem operands.
- NeedsReg = true;
- if (NeedsReg) {
- From = copyToReg(From, RegNum);
- }
- return From;
- }
- if (auto Var = llvm::dyn_cast<Variable>(From)) {
- // Check if the variable is guaranteed a physical register. This
- // can happen either when the variable is pre-colored or when it is
- // assigned infinite weight.
- bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
- // We need a new physical register for the operand if:
- // Mem is not allowed and Var isn't guaranteed a physical
- // register, or
- // RegNum is required and Var->getRegNum() doesn't match.
- if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
- (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
- From = copyToReg(From, RegNum);
- }
- return From;
- }
- llvm_unreachable("Unhandled operand kind in legalize()");
- return From;
-}
-
-// Provide a trivial wrapper to legalize() for this common usage.
-Variable *TargetX8632::legalizeToVar(Operand *From, int32_t RegNum) {
- return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
-}
+// This file implements the TargetLoweringX8632 class, which
+// consists almost entirely of the lowering sequence for each
+// high-level instruction.
+//
+//===----------------------------------------------------------------------===//
-// For the cmp instruction, if Src1 is an immediate, or known to be a
-// physical register, we can allow Src0 to be a memory operand.
-// Otherwise, Src0 must be copied into a physical register.
-// (Actually, either Src0 or Src1 can be chosen for the physical
-// register, but unfortunately we have to commit to one or the other
-// before register allocation.)
-Operand *TargetX8632::legalizeSrc0ForCmp(Operand *Src0, Operand *Src1) {
- bool IsSrc1ImmOrReg = false;
- if (llvm::isa<Constant>(Src1)) {
- IsSrc1ImmOrReg = true;
- } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
- if (Var->hasReg())
- IsSrc1ImmOrReg = true;
- }
- return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
-}
+#include "IceTargetLoweringX8632.h"
-OperandX8632Mem *TargetX8632::formMemoryOperand(Operand *Opnd, Type Ty,
- bool DoLegalize) {
- OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);
- // It may be the case that address mode optimization already creates
- // an OperandX8632Mem, so in that case it wouldn't need another level
- // of transformation.
- if (!Mem) {
- Variable *Base = llvm::dyn_cast<Variable>(Opnd);
- Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
- assert(Base || Offset);
- if (Offset) {
- // During memory operand building, we do not blind or pool
- // the constant offset, we will work on the whole memory
- // operand later as one entity later, this save one instruction.
- // By turning blinding and pooling off, we guarantee
- // legalize(Offset) will return a constant*.
- {
- BoolFlagSaver B(RandomizationPoolingPaused, true);
+#include "IceTargetLoweringX86Base.h"
- Offset = llvm::cast<Constant>(legalize(Offset));
- }
+namespace Ice {
+namespace X86Internal {
+template <> struct MachineTraits<TargetX8632> {
+ using InstructionSet = TargetX8632::X86InstructionSet;
- assert(llvm::isa<ConstantInteger32>(Offset) ||
- llvm::isa<ConstantRelocatable>(Offset));
- }
- Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);
+ // The following table summarizes the logic for lowering the fcmp
+ // instruction. There is one table entry for each of the 16 conditions.
+ //
+ // The first four columns describe the case when the operands are
+ // floating point scalar values. A comment in lowerFcmp() describes the
+ // lowering template. In the most general case, there is a compare
+ // followed by two conditional branches, because some fcmp conditions
+ // don't map to a single x86 conditional branch. However, in many cases
+ // it is possible to swap the operands in the comparison and have a
+ // single conditional branch. Since it's quite tedious to validate the
+ // table by hand, good execution tests are helpful.
+ //
+ // The last two columns describe the case when the operands are vectors
+ // of floating point values. For most fcmp conditions, there is a clear
+ // mapping to a single x86 cmpps instruction variant. Some fcmp
+ // conditions require special code to handle and these are marked in the
+ // table with a Cmpps_Invalid predicate.
+ static const struct TableFcmpType {
+ uint32_t Default;
+ bool SwapScalarOperands;
+ CondX86::BrCond C1, C2;
+ bool SwapVectorOperands;
+ CondX86::CmppsCond Predicate;
+ } TableFcmp[];
+ static const size_t TableFcmpSize;
+
+ // The following table summarizes the logic for lowering the icmp instruction
+ // for i32 and narrower types. Each icmp condition has a clear mapping to an
+ // x86 conditional branch instruction.
+
+ static const struct TableIcmp32Type {
+ CondX86::BrCond Mapping;
+ } TableIcmp32[];
+ static const size_t TableIcmp32Size;
+
+ // The following table summarizes the logic for lowering the icmp instruction
+ // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
+ // conditional branches are needed. For the other conditions, three separate
+ // conditional branches are needed.
+ static const struct TableIcmp64Type {
+ CondX86::BrCond C1, C2, C3;
+ } TableIcmp64[];
+ static const size_t TableIcmp64Size;
+
+ static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
+ size_t Index = static_cast<size_t>(Cond);
+ assert(Index < TableIcmp32Size);
+ return TableIcmp32[Index].Mapping;
+ }
+
+ static const struct TableTypeX8632AttributesType {
+ Type InVectorElementType;
+ } TableTypeX8632Attributes[];
+ static const size_t TableTypeX8632AttributesSize;
+
+ // Return the type which the elements of the vector have in the X86
+ // representation of the vector.
+ static Type getInVectorElementType(Type Ty) {
+ assert(isVectorType(Ty));
+ size_t Index = static_cast<size_t>(Ty);
+ (void)Index;
+ assert(Index < TableTypeX8632AttributesSize);
+ return TableTypeX8632Attributes[Ty].InVectorElementType;
+ }
+
+ // The maximum number of arguments to pass in XMM registers
+ static constexpr uint32_t X86_MAX_XMM_ARGS = 4;
+ // The number of bits in a byte
+ static constexpr uint32_t X86_CHAR_BIT = 8;
+ // Stack alignment
+ static const uint32_t X86_STACK_ALIGNMENT_BYTES;
+ // Size of the return address on the stack
+ static constexpr uint32_t X86_RET_IP_SIZE_BYTES = 4;
+ // The number of different NOP instructions
+ static constexpr uint32_t X86_NUM_NOP_VARIANTS = 5;
+
+ // Value is in bytes. Return Value adjusted to the next highest multiple
+ // of the stack alignment.
+ static uint32_t applyStackAlignment(uint32_t Value) {
+ return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
}
- // Do legalization, which contains randomization/pooling
- // or do randomization/pooling.
- return llvm::cast<OperandX8632Mem>(
- DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
-}
-
-Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
- // There aren't any 64-bit integer registers for x86-32.
- assert(Type != IceType_i64);
- Variable *Reg = Func->makeVariable(Type);
- if (RegNum == Variable::NoRegister)
- Reg->setWeightInfinite();
- else
- Reg->setRegNum(RegNum);
- return Reg;
-}
-
-void TargetX8632::postLower() {
- if (Ctx->getFlags().getOptLevel() == Opt_m1)
- return;
- inferTwoAddress();
-}
-
-void TargetX8632::makeRandomRegisterPermutation(
- llvm::SmallVectorImpl<int32_t> &Permutation,
- const llvm::SmallBitVector &ExcludeRegisters) const {
- // TODO(stichnot): Declaring Permutation this way loses type/size
- // information. Fix this in conjunction with the caller-side TODO.
- assert(Permutation.size() >= RegX8632::Reg_NUM);
- // Expected upper bound on the number of registers in a single
- // equivalence class. For x86-32, this would comprise the 8 XMM
- // registers. This is for performance, not correctness.
- static const unsigned MaxEquivalenceClassSize = 8;
- typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
- typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
- EquivalenceClassMap EquivalenceClasses;
- SizeT NumShuffled = 0, NumPreserved = 0;
+};
-// Build up the equivalence classes of registers by looking at the
-// register properties as well as whether the registers should be
-// explicitly excluded from shuffling.
-#define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
- frameptr, isI8, isInt, isFP) \
- if (ExcludeRegisters[RegX8632::val]) { \
- /* val stays the same in the resulting permutation. */ \
- Permutation[RegX8632::val] = RegX8632::val; \
- ++NumPreserved; \
- } else { \
- const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
- (isInt << 3) | (isFP << 4); \
- /* val is assigned to an equivalence class based on its properties. */ \
- EquivalenceClasses[Index].push_back(RegX8632::val); \
- }
- REGX8632_TABLE
+const MachineTraits<TargetX8632>::TableFcmpType
+ MachineTraits<TargetX8632>::TableFcmp[] = {
+#define X(val, dflt, swapS, C1, C2, swapV, pred) \
+ { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
+ ,
+ FCMPX8632_TABLE
#undef X
+};
- RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
+constexpr size_t MachineTraits<TargetX8632>::TableFcmpSize =
+ llvm::array_lengthof(TableFcmp);
- // Shuffle the resulting equivalence classes.
- for (auto I : EquivalenceClasses) {
- const RegisterList &List = I.second;
- RegisterList Shuffled(List);
- RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
- for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
- Permutation[List[SI]] = Shuffled[SI];
- ++NumShuffled;
- }
- }
+const MachineTraits<TargetX8632>::TableIcmp32Type
+ MachineTraits<TargetX8632>::TableIcmp32[] = {
+#define X(val, C_32, C1_64, C2_64, C3_64) \
+ { CondX86::C_32 } \
+ ,
+ ICMPX8632_TABLE
+#undef X
+};
- assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);
+constexpr size_t MachineTraits<TargetX8632>::TableIcmp32Size =
+ llvm::array_lengthof(TableIcmp32);
- if (Func->isVerbose(IceV_Random)) {
- OstreamLocker L(Func->getContext());
- Ostream &Str = Func->getContext()->getStrDump();
- Str << "Register equivalence classes:\n";
- for (auto I : EquivalenceClasses) {
- Str << "{";
- const RegisterList &List = I.second;
- bool First = true;
- for (int32_t Register : List) {
- if (!First)
- Str << " ";
- First = false;
- Str << getRegName(Register, IceType_i32);
- }
- Str << "}\n";
- }
- }
-}
+const MachineTraits<TargetX8632>::TableIcmp64Type
+ MachineTraits<TargetX8632>::TableIcmp64[] = {
+#define X(val, C_32, C1_64, C2_64, C3_64) \
+ { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
+ ,
+ ICMPX8632_TABLE
+#undef X
+};
-void TargetX8632::emit(const ConstantInteger32 *C) const {
- if (!ALLOW_DUMP)
- return;
- Ostream &Str = Ctx->getStrEmit();
- Str << getConstantPrefix() << C->getValue();
-}
+constexpr size_t MachineTraits<TargetX8632>::TableIcmp64Size =
+ llvm::array_lengthof(TableIcmp64);
-void TargetX8632::emit(const ConstantInteger64 *) const {
- llvm::report_fatal_error("Not expecting to emit 64-bit integers");
-}
+const MachineTraits<TargetX8632>::TableTypeX8632AttributesType
+ MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = {
+#define X(tag, elementty, cvt, sdss, pack, width, fld) \
+ { elementty } \
+ ,
+ ICETYPEX8632_TABLE
+#undef X
+};
-void TargetX8632::emit(const ConstantFloat *C) const {
- if (!ALLOW_DUMP)
- return;
- Ostream &Str = Ctx->getStrEmit();
- C->emitPoolLabel(Str);
-}
+constexpr size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =
+ llvm::array_lengthof(TableTypeX8632Attributes);
-void TargetX8632::emit(const ConstantDouble *C) const {
- if (!ALLOW_DUMP)
- return;
- Ostream &Str = Ctx->getStrEmit();
- C->emitPoolLabel(Str);
-}
+const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;
+} // end of namespace X86Internal
-void TargetX8632::emit(const ConstantUndef *) const {
- llvm::report_fatal_error("undef value encountered by emitter.");
+TargetX8632 *TargetX8632::create(Cfg *Func) {
+ return X86Internal::TargetX86Base<TargetX8632>::create(Func);
}
TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)
: TargetDataLowering(Ctx) {}
-void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
- const IceString &SectionSuffix) {
- switch (Ctx->getFlags().getOutFileType()) {
- case FT_Elf: {
- ELFObjectWriter *Writer = Ctx->getObjectWriter();
- Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
- } break;
- case FT_Asm:
- case FT_Iasm: {
- const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
- OstreamLocker L(Ctx);
- for (const VariableDeclaration *Var : Vars) {
- if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
- emitGlobal(*Var, SectionSuffix);
- }
- }
- } break;
- }
-}
-
+namespace {
template <typename T> struct PoolTypeConverter {};
template <> struct PoolTypeConverter<float> {
@@ -5457,6 +225,7 @@ template <> struct PoolTypeConverter<uint8_t> {
const char *PoolTypeConverter<uint8_t>::TypeName = "i8";
const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";
const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";
+} // end of anonymous namespace
template <typename T>
void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {
@@ -5521,200 +290,135 @@ void TargetDataX8632::lowerConstants() {
}
}
-TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
- : TargetHeaderLowering(Ctx) {}
-
-// Randomize or pool an Immediate.
-Operand *TargetX8632::randomizeOrPoolImmediate(Constant *Immediate,
- int32_t RegNum) {
- assert(llvm::isa<ConstantInteger32>(Immediate) ||
- llvm::isa<ConstantRelocatable>(Immediate));
- if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
- RandomizationPoolingPaused == true) {
- // Immediates randomization/pooling off or paused
- return Immediate;
- }
- if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {
- Ctx->statsUpdateRPImms();
- if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
- RPI_Randomize) {
- // blind the constant
- // FROM:
- // imm
- // TO:
- // insert: mov imm+cookie, Reg
- // insert: lea -cookie[Reg], Reg
- // => Reg
- // If we have already assigned a phy register, we must come from
- // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
- // chain. So we add RegNum argument here.
- // Note we use 'lea' instruction instead of 'xor' to avoid affecting
- // the flags.
- Variable *Reg = makeReg(IceType_i32, RegNum);
- ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
- uint32_t Value = Integer->getValue();
- uint32_t Cookie = Ctx->getRandomizationCookie();
- _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
- Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
- _lea(Reg,
- OperandX8632Mem::create(Func, IceType_i32, Reg, Offset, nullptr, 0));
- // make sure liveness analysis won't kill this variable, otherwise a
- // liveness
- // assertion will be triggered.
- _set_dest_nonkillable();
- if (Immediate->getType() != IceType_i32) {
- Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
- _mov(TruncReg, Reg);
- return TruncReg;
+void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,
+ const IceString &SectionSuffix) {
+ switch (Ctx->getFlags().getOutFileType()) {
+ case FT_Elf: {
+ ELFObjectWriter *Writer = Ctx->getObjectWriter();
+ Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);
+ } break;
+ case FT_Asm:
+ case FT_Iasm: {
+ const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();
+ OstreamLocker L(Ctx);
+ for (const VariableDeclaration *Var : Vars) {
+ if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {
+ emitGlobal(*Var, SectionSuffix);
}
- return Reg;
- }
- if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
- // pool the constant
- // FROM:
- // imm
- // TO:
- // insert: mov $label, Reg
- // => Reg
- assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
- Immediate->setShouldBePooled(true);
- // if we have already assigned a phy register, we must come from
- // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
- // chain. So we add RegNum argument here.
- Variable *Reg = makeReg(Immediate->getType(), RegNum);
- IceString Label;
- llvm::raw_string_ostream Label_stream(Label);
- Immediate->emitPoolLabel(Label_stream);
- const RelocOffsetT Offset = 0;
- const bool SuppressMangling = true;
- Constant *Symbol =
- Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
- OperandX8632Mem *MemOperand =
- OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);
- _mov(Reg, MemOperand);
- return Reg;
}
- assert("Unsupported -randomize-pool-immediates option" && false);
+ } break;
}
- // the constant Immediate is not eligible for blinding/pooling
- return Immediate;
}
-OperandX8632Mem *
-TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,
- int32_t RegNum) {
- assert(MemOperand);
- if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
- RandomizationPoolingPaused == true) {
- // immediates randomization/pooling is turned off
- return MemOperand;
- }
-
- // If this memory operand is already a randommized one, we do
- // not randomize it again.
- if (MemOperand->getRandomized())
- return MemOperand;
+TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)
+ : TargetHeaderLowering(Ctx) {}
- if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
- if (C->shouldBeRandomizedOrPooled(Ctx)) {
- // The offset of this mem operand should be blinded or pooled
- Ctx->statsUpdateRPImms();
- if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
- RPI_Randomize) {
- // blind the constant offset
- // FROM:
- // offset[base, index, shift]
- // TO:
- // insert: lea offset+cookie[base], RegTemp
- // => -cookie[RegTemp, index, shift]
- uint32_t Value =
- llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
- ->getValue();
- uint32_t Cookie = Ctx->getRandomizationCookie();
- Constant *Mask1 = Ctx->getConstantInt(
- MemOperand->getOffset()->getType(), Cookie + Value);
- Constant *Mask2 =
- Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
+// In some cases, there are x-macros tables for both high-level and
+// low-level instructions/operands that use the same enum key value.
+// The tables are kept separate to maintain a proper separation
+// between abstraction layers. There is a risk that the tables could
+// get out of sync if enum values are reordered or if entries are
+// added or deleted. The following dummy namespaces use
+// static_asserts to ensure everything is kept in sync.
- OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(
- Func, MemOperand->getType(), MemOperand->getBase(), Mask1);
- // If we have already assigned a physical register, we must come from
- // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
- // the assigned register as this assignment is that start of its use-def
- // chain. So we add RegNum argument here.
- Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
- _lea(RegTemp, TempMemOperand);
- // As source operand doesn't use the dstreg, we don't need to add
- // _set_dest_nonkillable().
- // But if we use the same Dest Reg, that is, with RegNum
- // assigned, we should add this _set_dest_nonkillable()
- if (RegNum != Variable::NoRegister)
- _set_dest_nonkillable();
+namespace {
+// Validate the enum values in FCMPX8632_TABLE.
+namespace dummy1 {
+// Define a temporary set of enum values based on low-level table
+// entries.
+enum _tmp_enum {
+#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,
+ FCMPX8632_TABLE
+#undef X
+ _num
+};
+// Define a set of constants based on high-level table entries.
+#define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
+ICEINSTFCMP_TABLE
+#undef X
+// Define a set of constants based on low-level table entries, and
+// ensure the table entry keys are consistent.
+#define X(val, dflt, swapS, C1, C2, swapV, pred) \
+ static const int _table2_##val = _tmp_##val; \
+ static_assert( \
+ _table1_##val == _table2_##val, \
+ "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
+FCMPX8632_TABLE
+#undef X
+// Repeat the static asserts with respect to the high-level table
+// entries in case the high-level table has extra entries.
+#define X(tag, str) \
+ static_assert( \
+ _table1_##tag == _table2_##tag, \
+ "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");
+ICEINSTFCMP_TABLE
+#undef X
+} // end of namespace dummy1
- OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
- Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),
- MemOperand->getShift(), MemOperand->getSegmentRegister());
+// Validate the enum values in ICMPX8632_TABLE.
+namespace dummy2 {
+// Define a temporary set of enum values based on low-level table
+// entries.
+enum _tmp_enum {
+#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
+ ICMPX8632_TABLE
+#undef X
+ _num
+};
+// Define a set of constants based on high-level table entries.
+#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
+ICEINSTICMP_TABLE
+#undef X
+// Define a set of constants based on low-level table entries, and
+// ensure the table entry keys are consistent.
+#define X(val, C_32, C1_64, C2_64, C3_64) \
+ static const int _table2_##val = _tmp_##val; \
+ static_assert( \
+ _table1_##val == _table2_##val, \
+ "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
+ICMPX8632_TABLE
+#undef X
+// Repeat the static asserts with respect to the high-level table
+// entries in case the high-level table has extra entries.
+#define X(tag, str) \
+ static_assert( \
+ _table1_##tag == _table2_##tag, \
+ "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");
+ICEINSTICMP_TABLE
+#undef X
+} // end of namespace dummy2
- // Label this memory operand as randomize, so we won't randomize it
- // again in case we call legalize() mutiple times on this memory
- // operand.
- NewMemOperand->setRandomized(true);
- return NewMemOperand;
- }
- if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
- // pool the constant offset
- // FROM:
- // offset[base, index, shift]
- // TO:
- // insert: mov $label, RegTemp
- // insert: lea [base, RegTemp], RegTemp
- // =>[RegTemp, index, shift]
- assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
- RPI_Pool);
- // Memory operand should never exist as source operands in phi
- // lowering assignments, so there is no need to reuse any registers
- // here. For phi lowering, we should not ask for new physical
- // registers in general.
- // However, if we do meet Memory Operand during phi lowering, we
- // should not blind or pool the immediates for now.
- if (RegNum != Variable::NoRegister)
- return MemOperand;
- Variable *RegTemp = makeReg(IceType_i32);
- IceString Label;
- llvm::raw_string_ostream Label_stream(Label);
- MemOperand->getOffset()->emitPoolLabel(Label_stream);
- MemOperand->getOffset()->setShouldBePooled(true);
- const RelocOffsetT SymOffset = 0;
- bool SuppressMangling = true;
- Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
- SuppressMangling);
- OperandX8632Mem *SymbolOperand = OperandX8632Mem::create(
- Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
- _mov(RegTemp, SymbolOperand);
- // If we have a base variable here, we should add the lea instruction
- // to add the value of the base variable to RegTemp. If there is no
- // base variable, we won't need this lea instruction.
- if (MemOperand->getBase()) {
- OperandX8632Mem *CalculateOperand = OperandX8632Mem::create(
- Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
- RegTemp, 0, MemOperand->getSegmentRegister());
- _lea(RegTemp, CalculateOperand);
- _set_dest_nonkillable();
- }
- OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(
- Func, MemOperand->getType(), RegTemp, nullptr,
- MemOperand->getIndex(), MemOperand->getShift(),
- MemOperand->getSegmentRegister());
- return NewMemOperand;
- }
- assert("Unsupported -randomize-pool-immediates option" && false);
- }
- }
- // the offset is not eligible for blinding or pooling, return the original
- // mem operand
- return MemOperand;
-}
+// Validate the enum values in ICETYPEX8632_TABLE.
+namespace dummy3 {
+// Define a temporary set of enum values based on low-level table
+// entries.
+enum _tmp_enum {
+#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,
+ ICETYPEX8632_TABLE
+#undef X
+ _num
+};
+// Define a set of constants based on high-level table entries.
+#define X(tag, size, align, elts, elty, str) \
+ static const int _table1_##tag = tag;
+ICETYPE_TABLE
+#undef X
+// Define a set of constants based on low-level table entries, and
+// ensure the table entry keys are consistent.
+#define X(tag, elementty, cvt, sdss, pack, width, fld) \
+ static const int _table2_##tag = _tmp_##tag; \
+ static_assert(_table1_##tag == _table2_##tag, \
+ "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
+ICETYPEX8632_TABLE
+#undef X
+// Repeat the static asserts with respect to the high-level table
+// entries in case the high-level table has extra entries.
+#define X(tag, size, align, elts, elty, str) \
+ static_assert(_table1_##tag == _table2_##tag, \
+ "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");
+ICETYPE_TABLE
+#undef X
+} // end of namespace dummy3
+} // end of anonymous namespace
} // end of namespace Ice

Powered by Google App Engine
This is Rietveld 408576698