Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(629)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1216963007: Doxygenize the documentation comments (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Rebase to master Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceThreading.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 ///
10 // This file implements the TargetLoweringX86Base class, which 10 /// \file
11 // consists almost entirely of the lowering sequence for each 11 /// This file implements the TargetLoweringX86Base class, which
12 // high-level instruction. 12 /// consists almost entirely of the lowering sequence for each
13 // 13 /// high-level instruction.
14 ///
14 //===----------------------------------------------------------------------===// 15 //===----------------------------------------------------------------------===//
15 16
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 17 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 18 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18 19
19 #include "IceCfg.h" 20 #include "IceCfg.h"
20 #include "IceCfgNode.h" 21 #include "IceCfgNode.h"
21 #include "IceClFlags.h" 22 #include "IceClFlags.h"
22 #include "IceDefs.h" 23 #include "IceDefs.h"
23 #include "IceELFObjectWriter.h" 24 #include "IceELFObjectWriter.h"
24 #include "IceGlobalInits.h" 25 #include "IceGlobalInits.h"
25 #include "IceInstX8632.h" 26 #include "IceInstX8632.h"
26 #include "IceLiveness.h" 27 #include "IceLiveness.h"
27 #include "IceOperand.h" 28 #include "IceOperand.h"
28 #include "IceRegistersX8632.h" 29 #include "IceRegistersX8632.h"
29 #include "IceTargetLoweringX8632.def" 30 #include "IceTargetLoweringX8632.def"
30 #include "IceTargetLoweringX8632.h" 31 #include "IceTargetLoweringX8632.h"
31 #include "IceUtils.h" 32 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/MathExtras.h"
33 34
34 namespace Ice { 35 namespace Ice {
35 namespace X86Internal { 36 namespace X86Internal {
36 37
37 // A helper class to ease the settings of RandomizationPoolingPause 38 /// A helper class to ease the settings of RandomizationPoolingPause
38 // to disable constant blinding or pooling for some translation phases. 39 /// to disable constant blinding or pooling for some translation phases.
39 class BoolFlagSaver { 40 class BoolFlagSaver {
40 BoolFlagSaver() = delete; 41 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete; 42 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43 44
44 public: 45 public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } 46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; } 47 ~BoolFlagSaver() { Flag = OldValue; }
47 48
48 private: 49 private:
49 const bool OldValue; 50 const bool OldValue;
50 bool &Flag; 51 bool &Flag;
51 }; 52 };
52 53
53 template <class MachineTraits> class BoolFoldingEntry { 54 template <class MachineTraits> class BoolFoldingEntry {
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; 55 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
55 56
56 public: 57 public:
57 BoolFoldingEntry() = default; 58 BoolFoldingEntry() = default;
58 explicit BoolFoldingEntry(Inst *I); 59 explicit BoolFoldingEntry(Inst *I);
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; 60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
60 // Instr is the instruction producing the i1-type variable of interest. 61 /// Instr is the instruction producing the i1-type variable of interest.
61 Inst *Instr = nullptr; 62 Inst *Instr = nullptr;
62 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). 63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
63 bool IsComplex = false; 64 bool IsComplex = false;
64 // IsLiveOut is initialized conservatively to true, and is set to false when 65 /// IsLiveOut is initialized conservatively to true, and is set to false when
65 // we encounter an instruction that ends Var's live range. We disable the 66 /// we encounter an instruction that ends Var's live range. We disable the
66 // folding optimization when Var is live beyond this basic block. Note that 67 /// folding optimization when Var is live beyond this basic block. Note that
67 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will 68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
68 // always be true and the folding optimization will never be performed. 69 /// always be true and the folding optimization will never be performed.
69 bool IsLiveOut = true; 70 bool IsLiveOut = true;
70 // NumUses counts the number of times Var is used as a source operand in the 71 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var, 72 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var. 73 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0; 74 uint32_t NumUses = 0;
74 }; 75 };
75 76
76 template <class MachineTraits> class BoolFolding { 77 template <class MachineTraits> class BoolFolding {
77 public: 78 public:
78 enum BoolFoldingProducerKind { 79 enum BoolFoldingProducerKind {
79 PK_None, 80 PK_None,
80 PK_Icmp32, 81 PK_Icmp32,
81 PK_Icmp64, 82 PK_Icmp64,
82 PK_Fcmp, 83 PK_Fcmp,
83 PK_Trunc 84 PK_Trunc
84 }; 85 };
85 86
86 // Currently the actual enum values are not used (other than CK_None), but we 87 /// Currently the actual enum values are not used (other than CK_None), but we
87 // go 88 /// go
88 // ahead and produce them anyway for symmetry with the 89 /// ahead and produce them anyway for symmetry with the
89 // BoolFoldingProducerKind. 90 /// BoolFoldingProducerKind.
90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
91 92
92 private: 93 private:
93 BoolFolding(const BoolFolding &) = delete; 94 BoolFolding(const BoolFolding &) = delete;
94 BoolFolding &operator=(const BoolFolding &) = delete; 95 BoolFolding &operator=(const BoolFolding &) = delete;
95 96
96 public: 97 public:
97 BoolFolding() = default; 98 BoolFolding() = default;
98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); 100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
100 static bool hasComplexLowering(const Inst *Instr); 101 static bool hasComplexLowering(const Inst *Instr);
101 void init(CfgNode *Node); 102 void init(CfgNode *Node);
102 const Inst *getProducerFor(const Operand *Opnd) const; 103 const Inst *getProducerFor(const Operand *Opnd) const;
103 void dump(const Cfg *Func) const; 104 void dump(const Cfg *Func) const;
104 105
105 private: 106 private:
106 // Returns true if Producers contains a valid entry for the given VarNum. 107 /// Returns true if Producers contains a valid entry for the given VarNum.
107 bool containsValid(SizeT VarNum) const { 108 bool containsValid(SizeT VarNum) const {
108 auto Element = Producers.find(VarNum); 109 auto Element = Producers.find(VarNum);
109 return Element != Producers.end() && Element->second.Instr != nullptr; 110 return Element != Producers.end() && Element->second.Instr != nullptr;
110 } 111 }
111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } 112 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
112 // Producers maps Variable::Number to a BoolFoldingEntry. 113 /// Producers maps Variable::Number to a BoolFoldingEntry.
113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; 114 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
114 }; 115 };
115 116
116 template <class MachineTraits> 117 template <class MachineTraits>
117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) 118 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} 119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
119 120
120 template <class MachineTraits> 121 template <class MachineTraits>
121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { 123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
155 return CK_None; 156 return CK_None;
156 case InstCast::Sext: 157 case InstCast::Sext:
157 return CK_Sext; 158 return CK_Sext;
158 case InstCast::Zext: 159 case InstCast::Zext:
159 return CK_Zext; 160 return CK_Zext;
160 } 161 }
161 } 162 }
162 return CK_None; 163 return CK_None;
163 } 164 }
164 165
165 // Returns true if the producing instruction has a "complex" lowering 166 /// Returns true if the producing instruction has a "complex" lowering
166 // sequence. This generally means that its lowering sequence requires 167 /// sequence. This generally means that its lowering sequence requires
167 // more than one conditional branch, namely 64-bit integer compares 168 /// more than one conditional branch, namely 64-bit integer compares
168 // and some floating-point compares. When this is true, and there is 169 /// and some floating-point compares. When this is true, and there is
169 // more than one consumer, we prefer to disable the folding 170 /// more than one consumer, we prefer to disable the folding
170 // optimization because it minimizes branches. 171 /// optimization because it minimizes branches.
171 template <class MachineTraits> 172 template <class MachineTraits>
172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 173 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
173 switch (getProducerKind(Instr)) { 174 switch (getProducerKind(Instr)) {
174 default: 175 default:
175 return false; 176 return false;
176 case PK_Icmp64: 177 case PK_Icmp64:
177 return true; 178 return true;
178 case PK_Fcmp: 179 case PK_Fcmp:
179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 180 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
180 .C2 != MachineTraits::Cond::Br_None; 181 .C2 != MachineTraits::Cond::Br_None;
(...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after
603 } 604 }
604 605
605 // Converts a ConstantInteger32 operand into its constant value, or 606 // Converts a ConstantInteger32 operand into its constant value, or
606 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 607 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
607 uint64_t getConstantMemoryOrder(Operand *Opnd) { 608 uint64_t getConstantMemoryOrder(Operand *Opnd) {
608 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 609 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
609 return Integer->getValue(); 610 return Integer->getValue();
610 return Intrinsics::MemoryOrderInvalid; 611 return Intrinsics::MemoryOrderInvalid;
611 } 612 }
612 613
613 // Determines whether the dest of a Load instruction can be folded 614 /// Determines whether the dest of a Load instruction can be folded
614 // into one of the src operands of a 2-operand instruction. This is 615 /// into one of the src operands of a 2-operand instruction. This is
615 // true as long as the load dest matches exactly one of the binary 616 /// true as long as the load dest matches exactly one of the binary
616 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if 617 /// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
617 // the answer is true. 618 /// the answer is true.
618 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, 619 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
619 Operand *&Src0, Operand *&Src1) { 620 Operand *&Src0, Operand *&Src1) {
620 if (Src0 == LoadDest && Src1 != LoadDest) { 621 if (Src0 == LoadDest && Src1 != LoadDest) {
621 Src0 = LoadSrc; 622 Src0 = LoadSrc;
622 return true; 623 return true;
623 } 624 }
624 if (Src0 != LoadDest && Src1 == LoadDest) { 625 if (Src0 != LoadDest && Src1 == LoadDest) {
625 Src1 = LoadSrc; 626 Src1 = LoadSrc;
626 return true; 627 return true;
627 } 628 }
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
845 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); 846 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
846 RegisterArg->setRegNum(RegNum); 847 RegisterArg->setRegNum(RegNum);
847 RegisterArg->setIsArg(); 848 RegisterArg->setIsArg();
848 Arg->setIsArg(false); 849 Arg->setIsArg(false);
849 850
850 Args[I] = RegisterArg; 851 Args[I] = RegisterArg;
851 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 852 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
852 } 853 }
853 } 854 }
854 855
855 // Helper function for addProlog(). 856 /// Helper function for addProlog().
856 // 857 ///
857 // This assumes Arg is an argument passed on the stack. This sets the 858 /// This assumes Arg is an argument passed on the stack. This sets the
858 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 859 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's
859 // width. For an I64 arg that has been split into Lo and Hi components, 860 /// width. For an I64 arg that has been split into Lo and Hi components,
860 // it calls itself recursively on the components, taking care to handle 861 /// it calls itself recursively on the components, taking care to handle
861 // Lo first because of the little-endian architecture. Lastly, this 862 /// Lo first because of the little-endian architecture. Lastly, this
862 // function generates an instruction to copy Arg into its assigned 863 /// function generates an instruction to copy Arg into its assigned
863 // register if applicable. 864 /// register if applicable.
864 template <class Machine> 865 template <class Machine>
865 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 866 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
866 Variable *FramePtr, 867 Variable *FramePtr,
867 size_t BasicFrameOffset, 868 size_t BasicFrameOffset,
868 size_t &InArgsSizeBytes) { 869 size_t &InArgsSizeBytes) {
869 Variable *Lo = Arg->getLo(); 870 Variable *Lo = Arg->getLo();
870 Variable *Hi = Arg->getHi(); 871 Variable *Hi = Arg->getHi();
871 Type Ty = Arg->getType(); 872 Type Ty = Arg->getType();
872 if (Lo && Hi && Ty == IceType_i64) { 873 if (Lo && Hi && Ty == IceType_i64) {
873 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 874 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
(...skipping 466 matching lines...) Expand 10 before | Expand all | Expand 10 after
1340 // multiple of the required alignment at runtime. 1341 // multiple of the required alignment at runtime.
1341 Variable *T = makeReg(IceType_i32); 1342 Variable *T = makeReg(IceType_i32);
1342 _mov(T, TotalSize); 1343 _mov(T, TotalSize);
1343 _add(T, Ctx->getConstantInt32(Alignment - 1)); 1344 _add(T, Ctx->getConstantInt32(Alignment - 1));
1344 _and(T, Ctx->getConstantInt32(-Alignment)); 1345 _and(T, Ctx->getConstantInt32(-Alignment));
1345 _sub(esp, T); 1346 _sub(esp, T);
1346 } 1347 }
1347 _mov(Dest, esp); 1348 _mov(Dest, esp);
1348 } 1349 }
1349 1350
1350 // Strength-reduce scalar integer multiplication by a constant (for 1351 /// Strength-reduce scalar integer multiplication by a constant (for
1351 // i32 or narrower) for certain constants. The lea instruction can be 1352 /// i32 or narrower) for certain constants. The lea instruction can be
1352 // used to multiply by 3, 5, or 9, and the lsh instruction can be used 1353 /// used to multiply by 3, 5, or 9, and the lsh instruction can be used
1353 // to multiply by powers of 2. These can be combined such that 1354 /// to multiply by powers of 2. These can be combined such that
1354 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, 1355 /// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1355 // combined with left-shifting by 2. 1356 /// combined with left-shifting by 2.
1356 template <class Machine> 1357 template <class Machine>
1357 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1358 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1358 int32_t Src1) { 1359 int32_t Src1) {
1359 // Disable this optimization for Om1 and O0, just to keep things 1360 // Disable this optimization for Om1 and O0, just to keep things
1360 // simple there. 1361 // simple there.
1361 if (Ctx->getFlags().getOptLevel() < Opt_1) 1362 if (Ctx->getFlags().getOptLevel() < Opt_1)
1362 return false; 1363 return false;
1363 Type Ty = Dest->getType(); 1364 Type Ty = Dest->getType();
1364 Variable *T = nullptr; 1365 Variable *T = nullptr;
1365 if (Src1 == -1) { 1366 if (Src1 == -1) {
(...skipping 1018 matching lines...) Expand 10 before | Expand all | Expand 10 after
2384 if (DestTy == IceType_v16i8) { 2385 if (DestTy == IceType_v16i8) {
2385 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 2386 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2386 Variable *OneMask = makeVectorOfOnes(Dest->getType()); 2387 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2387 Variable *T = makeReg(DestTy); 2388 Variable *T = makeReg(DestTy);
2388 _movp(T, Src0RM); 2389 _movp(T, Src0RM);
2389 _pand(T, OneMask); 2390 _pand(T, OneMask);
2390 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 2391 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2391 _pcmpgt(T, Zeros); 2392 _pcmpgt(T, Zeros);
2392 _movp(Dest, T); 2393 _movp(Dest, T);
2393 } else { 2394 } else {
2394 // width = width(elty) - 1; dest = (src << width) >> width 2395 /// width = width(elty) - 1; dest = (src << width) >> width
2395 SizeT ShiftAmount = 2396 SizeT ShiftAmount =
2396 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 2397 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2397 1; 2398 1;
2398 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 2399 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2399 Variable *T = makeReg(DestTy); 2400 Variable *T = makeReg(DestTy);
2400 _movp(T, Src0RM); 2401 _movp(T, Src0RM);
2401 _psll(T, ShiftConstant); 2402 _psll(T, ShiftConstant);
2402 _psra(T, ShiftConstant); 2403 _psra(T, ShiftConstant);
2403 _movp(Dest, T); 2404 _movp(Dest, T);
2404 } 2405 }
(...skipping 1527 matching lines...) Expand 10 before | Expand all | Expand 10 after
3932 // the end of the loop, since it will be re-used by the loop. 3933 // the end of the loop, since it will be re-used by the loop.
3933 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3934 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3934 Context.insert(InstFakeUse::create(Func, ValVar)); 3935 Context.insert(InstFakeUse::create(Func, ValVar));
3935 } 3936 }
3936 // The address base (if any) is also reused in the loop. 3937 // The address base (if any) is also reused in the loop.
3937 if (Variable *Base = Addr->getBase()) 3938 if (Variable *Base = Addr->getBase())
3938 Context.insert(InstFakeUse::create(Func, Base)); 3939 Context.insert(InstFakeUse::create(Func, Base));
3939 _mov(Dest, T_eax); 3940 _mov(Dest, T_eax);
3940 } 3941 }
3941 3942
3942 // Lowers count {trailing, leading} zeros intrinsic. 3943 /// Lowers count {trailing, leading} zeros intrinsic.
3943 // 3944 ///
3944 // We could do constant folding here, but that should have 3945 /// We could do constant folding here, but that should have
3945 // been done by the front-end/middle-end optimizations. 3946 /// been done by the front-end/middle-end optimizations.
3946 template <class Machine> 3947 template <class Machine>
3947 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, 3948 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3948 Operand *FirstVal, 3949 Operand *FirstVal,
3949 Operand *SecondVal) { 3950 Operand *SecondVal) {
3950 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). 3951 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3951 // Then the instructions will handle the Val == 0 case much more simply 3952 // Then the instructions will handle the Val == 0 case much more simply
3952 // and won't require conversion from bit position to number of zeros. 3953 // and won't require conversion from bit position to number of zeros.
3953 // 3954 //
3954 // Otherwise: 3955 // Otherwise:
3955 // bsr IF_NOT_ZERO, Val 3956 // bsr IF_NOT_ZERO, Val
(...skipping 645 matching lines...) Expand 10 before | Expand all | Expand 10 after
4601 4602
4602 // Insert the result into position. 4603 // Insert the result into position.
4603 Variable *DestT = Func->template makeVariable(Ty); 4604 Variable *DestT = Func->template makeVariable(Ty);
4604 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); 4605 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4605 T = DestT; 4606 T = DestT;
4606 } 4607 }
4607 4608
4608 lowerAssign(InstAssign::create(Func, Dest, T)); 4609 lowerAssign(InstAssign::create(Func, Dest, T));
4609 } 4610 }
4610 4611
4611 // The following pattern occurs often in lowered C and C++ code: 4612 /// The following pattern occurs often in lowered C and C++ code:
4612 // 4613 ///
4613 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 4614 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4614 // %cmp.ext = sext <n x i1> %cmp to <n x ty> 4615 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4615 // 4616 ///
4616 // We can eliminate the sext operation by copying the result of pcmpeqd, 4617 /// We can eliminate the sext operation by copying the result of pcmpeqd,
4617 // pcmpgtd, or cmpps (which produce sign extended results) to the result 4618 /// pcmpgtd, or cmpps (which produce sign extended results) to the result
4618 // of the sext operation. 4619 /// of the sext operation.
4619 template <class Machine> 4620 template <class Machine>
4620 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( 4621 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4621 Variable *SignExtendedResult) { 4622 Variable *SignExtendedResult) {
4622 if (InstCast *NextCast = 4623 if (InstCast *NextCast =
4623 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { 4624 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4624 if (NextCast->getCastKind() == InstCast::Sext && 4625 if (NextCast->getCastKind() == InstCast::Sext &&
4625 NextCast->getSrc(0) == SignExtendedResult) { 4626 NextCast->getSrc(0) == SignExtendedResult) {
4626 NextCast->setDeleted(); 4627 NextCast->setDeleted();
4627 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); 4628 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4628 // Skip over the instruction. 4629 // Skip over the instruction.
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
4714 4715
4715 template <class Machine> 4716 template <class Machine>
4716 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { 4717 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
4717 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { 4718 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4718 lowerRMW(RMW); 4719 lowerRMW(RMW);
4719 } else { 4720 } else {
4720 TargetLowering::lowerOther(Instr); 4721 TargetLowering::lowerOther(Instr);
4721 } 4722 }
4722 } 4723 }
4723 4724
4724 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4725 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4725 // preserve integrity of liveness analysis. Undef values are also 4726 /// preserve integrity of liveness analysis. Undef values are also
4726 // turned into zeroes, since loOperand() and hiOperand() don't expect 4727 /// turned into zeroes, since loOperand() and hiOperand() don't expect
4727 // Undef input. 4728 /// Undef input.
4728 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 4729 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4729 // Pause constant blinding or pooling, blinding or pooling will be done later 4730 // Pause constant blinding or pooling, blinding or pooling will be done later
4730 // during phi lowering assignments 4731 // during phi lowering assignments
4731 BoolFlagSaver B(RandomizationPoolingPaused, true); 4732 BoolFlagSaver B(RandomizationPoolingPaused, true);
4732 4733
4733 CfgNode *Node = Context.getNode(); 4734 CfgNode *Node = Context.getNode();
4734 for (Inst &I : Node->getPhis()) { 4735 for (Inst &I : Node->getPhis()) {
4735 auto Phi = llvm::dyn_cast<InstPhi>(&I); 4736 auto Phi = llvm::dyn_cast<InstPhi>(&I);
4736 if (Phi->isDeleted()) 4737 if (Phi->isDeleted())
4737 continue; 4738 continue;
(...skipping 25 matching lines...) Expand all
4763 // because they do in fact need a register to materialize the vector 4764 // because they do in fact need a register to materialize the vector
4764 // of zeroes into. 4765 // of zeroes into.
4765 if (llvm::isa<ConstantUndef>(Opnd)) 4766 if (llvm::isa<ConstantUndef>(Opnd))
4766 return isScalarFloatingType(Opnd->getType()) || 4767 return isScalarFloatingType(Opnd->getType()) ||
4767 isVectorType(Opnd->getType()); 4768 isVectorType(Opnd->getType());
4768 if (llvm::isa<Constant>(Opnd)) 4769 if (llvm::isa<Constant>(Opnd))
4769 return isScalarFloatingType(Opnd->getType()); 4770 return isScalarFloatingType(Opnd->getType());
4770 return true; 4771 return true;
4771 } 4772 }
4772 4773
4773 // Lower the pre-ordered list of assignments into mov instructions. 4774 /// Lower the pre-ordered list of assignments into mov instructions.
4774 // Also has to do some ad-hoc register allocation as necessary. 4775 /// Also has to do some ad-hoc register allocation as necessary.
4775 template <class Machine> 4776 template <class Machine>
4776 void TargetX86Base<Machine>::lowerPhiAssignments( 4777 void TargetX86Base<Machine>::lowerPhiAssignments(
4777 CfgNode *Node, const AssignList &Assignments) { 4778 CfgNode *Node, const AssignList &Assignments) {
4778 // Check that this is a properly initialized shell of a node. 4779 // Check that this is a properly initialized shell of a node.
4779 assert(Node->getOutEdges().size() == 1); 4780 assert(Node->getOutEdges().size() == 1);
4780 assert(Node->getInsts().empty()); 4781 assert(Node->getInsts().empty());
4781 assert(Node->getPhis().empty()); 4782 assert(Node->getPhis().empty());
4782 CfgNode *Succ = Node->getOutEdges().front(); 4783 CfgNode *Succ = Node->getOutEdges().front();
4783 getContext().init(Node); 4784 getContext().init(Node);
4784 // Register set setup similar to regAlloc(). 4785 // Register set setup similar to regAlloc().
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
4968 // SSE has no left shift operation for vectors of 8 bit integers. 4969 // SSE has no left shift operation for vectors of 8 bit integers.
4969 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4970 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4970 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 4971 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4971 Variable *Reg = makeReg(Ty, RegNum); 4972 Variable *Reg = makeReg(Ty, RegNum);
4972 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4973 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4973 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4974 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4974 return Reg; 4975 return Reg;
4975 } 4976 }
4976 } 4977 }
4977 4978
4978 // Construct a mask in a register that can be and'ed with a 4979 /// Construct a mask in a register that can be and'ed with a
4979 // floating-point value to mask off its sign bit. The value will be 4980 /// floating-point value to mask off its sign bit. The value will be
4980 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> 4981 /// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
4981 // for f64. Construct it as vector of ones logically right shifted 4982 /// for f64. Construct it as vector of ones logically right shifted
4982 // one bit. TODO(stichnot): Fix the wala TODO above, to represent 4983 /// one bit. TODO(stichnot): Fix the wala TODO above, to represent
4983 // vector constants in memory. 4984 /// vector constants in memory.
4984 template <class Machine> 4985 template <class Machine>
4985 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, 4986 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4986 int32_t RegNum) { 4987 int32_t RegNum) {
4987 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 4988 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4988 _psrl(Reg, Ctx->getConstantInt8(1)); 4989 _psrl(Reg, Ctx->getConstantInt8(1));
4989 return Reg; 4990 return Reg;
4990 } 4991 }
4991 4992
4992 template <class Machine> 4993 template <class Machine>
4993 OperandX8632Mem * 4994 OperandX8632Mem *
4994 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 4995 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4995 uint32_t Offset) { 4996 uint32_t Offset) {
4996 // Ensure that Loc is a stack slot. 4997 // Ensure that Loc is a stack slot.
4997 assert(Slot->getWeight().isZero()); 4998 assert(Slot->getWeight().isZero());
4998 assert(Slot->getRegNum() == Variable::NoRegister); 4999 assert(Slot->getRegNum() == Variable::NoRegister);
4999 // Compute the location of Loc in memory. 5000 // Compute the location of Loc in memory.
5000 // TODO(wala,stichnot): lea should not be required. The address of 5001 // TODO(wala,stichnot): lea should not be required. The address of
5001 // the stack slot is known at compile time (although not until after 5002 // the stack slot is known at compile time (although not until after
5002 // addProlog()). 5003 // addProlog()).
5003 const Type PointerType = IceType_i32; 5004 const Type PointerType = IceType_i32;
5004 Variable *Loc = makeReg(PointerType); 5005 Variable *Loc = makeReg(PointerType);
5005 _lea(Loc, Slot); 5006 _lea(Loc, Slot);
5006 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 5007 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5007 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 5008 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
5008 } 5009 }
5009 5010
5010 // Helper for legalize() to emit the right code to lower an operand to a 5011 /// Helper for legalize() to emit the right code to lower an operand to a
5011 // register of the appropriate type. 5012 /// register of the appropriate type.
5012 template <class Machine> 5013 template <class Machine>
5013 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 5014 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5014 Type Ty = Src->getType(); 5015 Type Ty = Src->getType();
5015 Variable *Reg = makeReg(Ty, RegNum); 5016 Variable *Reg = makeReg(Ty, RegNum);
5016 if (isVectorType(Ty)) { 5017 if (isVectorType(Ty)) {
5017 _movp(Reg, Src); 5018 _movp(Reg, Src);
5018 } else { 5019 } else {
5019 _mov(Reg, Src); 5020 _mov(Reg, Src);
5020 } 5021 }
5021 return Reg; 5022 return Reg;
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
5128 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 5129 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5129 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5130 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
5130 From = copyToReg(From, RegNum); 5131 From = copyToReg(From, RegNum);
5131 } 5132 }
5132 return From; 5133 return From;
5133 } 5134 }
5134 llvm_unreachable("Unhandled operand kind in legalize()"); 5135 llvm_unreachable("Unhandled operand kind in legalize()");
5135 return From; 5136 return From;
5136 } 5137 }
5137 5138
5138 // Provide a trivial wrapper to legalize() for this common usage. 5139 /// Provide a trivial wrapper to legalize() for this common usage.
5139 template <class Machine> 5140 template <class Machine>
5140 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { 5141 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) {
5141 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 5142 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
5142 } 5143 }
5143 5144
5144 // For the cmp instruction, if Src1 is an immediate, or known to be a 5145 /// For the cmp instruction, if Src1 is an immediate, or known to be a
5145 // physical register, we can allow Src0 to be a memory operand. 5146 /// physical register, we can allow Src0 to be a memory operand.
5146 // Otherwise, Src0 must be copied into a physical register. 5147 /// Otherwise, Src0 must be copied into a physical register.
5147 // (Actually, either Src0 or Src1 can be chosen for the physical 5148 /// (Actually, either Src0 or Src1 can be chosen for the physical
5148 // register, but unfortunately we have to commit to one or the other 5149 /// register, but unfortunately we have to commit to one or the other
5149 // before register allocation.) 5150 /// before register allocation.)
5150 template <class Machine> 5151 template <class Machine>
5151 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, 5152 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
5152 Operand *Src1) { 5153 Operand *Src1) {
5153 bool IsSrc1ImmOrReg = false; 5154 bool IsSrc1ImmOrReg = false;
5154 if (llvm::isa<Constant>(Src1)) { 5155 if (llvm::isa<Constant>(Src1)) {
5155 IsSrc1ImmOrReg = true; 5156 IsSrc1ImmOrReg = true;
5156 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 5157 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
5157 if (Var->hasReg()) 5158 if (Var->hasReg())
5158 IsSrc1ImmOrReg = true; 5159 IsSrc1ImmOrReg = true;
5159 } 5160 }
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
5308 return; 5309 return;
5309 Ostream &Str = Ctx->getStrEmit(); 5310 Ostream &Str = Ctx->getStrEmit();
5310 C->emitPoolLabel(Str); 5311 C->emitPoolLabel(Str);
5311 } 5312 }
5312 5313
5313 template <class Machine> 5314 template <class Machine>
5314 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { 5315 void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
5315 llvm::report_fatal_error("undef value encountered by emitter."); 5316 llvm::report_fatal_error("undef value encountered by emitter.");
5316 } 5317 }
5317 5318
5318 // Randomize or pool an Immediate. 5319 /// Randomize or pool an Immediate.
5319 template <class Machine> 5320 template <class Machine>
5320 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, 5321 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
5321 int32_t RegNum) { 5322 int32_t RegNum) {
5322 assert(llvm::isa<ConstantInteger32>(Immediate) || 5323 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5323 llvm::isa<ConstantRelocatable>(Immediate)); 5324 llvm::isa<ConstantRelocatable>(Immediate));
5324 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5325 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5325 RandomizationPoolingPaused == true) { 5326 RandomizationPoolingPaused == true) {
5326 // Immediates randomization/pooling off or paused 5327 // Immediates randomization/pooling off or paused
5327 return Immediate; 5328 return Immediate;
5328 } 5329 }
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
5507 } 5508 }
5508 // the offset is not eligible for blinding or pooling, return the original 5509 // the offset is not eligible for blinding or pooling, return the original
5509 // mem operand 5510 // mem operand
5510 return MemOperand; 5511 return MemOperand;
5511 } 5512 }
5512 5513
5513 } // end of namespace X86Internal 5514 } // end of namespace X86Internal
5514 } // end of namespace Ice 5515 } // end of namespace Ice
5515 5516
5516 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5517 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceThreading.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698