Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(183)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1216963007: Doxygenize the documentation comments (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX86Base class, which 10 // This file implements the TargetLoweringX86Base class, which
(...skipping 16 matching lines...) Expand all
27 #include "IceOperand.h" 27 #include "IceOperand.h"
28 #include "IceRegistersX8632.h" 28 #include "IceRegistersX8632.h"
29 #include "IceTargetLoweringX8632.def" 29 #include "IceTargetLoweringX8632.def"
30 #include "IceTargetLoweringX8632.h" 30 #include "IceTargetLoweringX8632.h"
31 #include "IceUtils.h" 31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h" 32 #include "llvm/Support/MathExtras.h"
33 33
34 namespace Ice { 34 namespace Ice {
35 namespace X86Internal { 35 namespace X86Internal {
36 36
37 // A helper class to ease the settings of RandomizationPoolingPause 37 /// A helper class to ease the settings of RandomizationPoolingPause
38 // to disable constant blinding or pooling for some translation phases. 38 /// to disable constant blinding or pooling for some translation phases.
39 class BoolFlagSaver { 39 class BoolFlagSaver {
40 BoolFlagSaver() = delete; 40 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete; 41 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43 43
44 public: 44 public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } 45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; } 46 ~BoolFlagSaver() { Flag = OldValue; }
47 47
48 private: 48 private:
49 const bool OldValue; 49 const bool OldValue;
50 bool &Flag; 50 bool &Flag;
51 }; 51 };
52 52
53 template <class MachineTraits> class BoolFoldingEntry { 53 template <class MachineTraits> class BoolFoldingEntry {
54 BoolFoldingEntry(const BoolFoldingEntry &) = delete; 54 BoolFoldingEntry(const BoolFoldingEntry &) = delete;
55 55
56 public: 56 public:
57 BoolFoldingEntry() = default; 57 BoolFoldingEntry() = default;
58 explicit BoolFoldingEntry(Inst *I); 58 explicit BoolFoldingEntry(Inst *I);
59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; 59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
60 // Instr is the instruction producing the i1-type variable of interest. 60 /// Instr is the instruction producing the i1-type variable of interest.
61 Inst *Instr = nullptr; 61 Inst *Instr = nullptr;
62 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). 62 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
63 bool IsComplex = false; 63 bool IsComplex = false;
64 // IsLiveOut is initialized conservatively to true, and is set to false when 64 /// IsLiveOut is initialized conservatively to true, and is set to false when
65 // we encounter an instruction that ends Var's live range. We disable the 65 /// we encounter an instruction that ends Var's live range. We disable the
66 // folding optimization when Var is live beyond this basic block. Note that 66 /// folding optimization when Var is live beyond this basic block. Note that
67 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will 67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
68 // always be true and the folding optimization will never be performed. 68 /// always be true and the folding optimization will never be performed.
69 bool IsLiveOut = true; 69 bool IsLiveOut = true;
70 // NumUses counts the number of times Var is used as a source operand in the 70 // NumUses counts the number of times Var is used as a source operand in the
71 // basic block. If IsComplex is true and there is more than one use of Var, 71 // basic block. If IsComplex is true and there is more than one use of Var,
72 // then the folding optimization is disabled for Var. 72 // then the folding optimization is disabled for Var.
73 uint32_t NumUses = 0; 73 uint32_t NumUses = 0;
74 }; 74 };
75 75
76 template <class MachineTraits> class BoolFolding { 76 template <class MachineTraits> class BoolFolding {
77 public: 77 public:
78 enum BoolFoldingProducerKind { 78 enum BoolFoldingProducerKind {
79 PK_None, 79 PK_None,
80 PK_Icmp32, 80 PK_Icmp32,
81 PK_Icmp64, 81 PK_Icmp64,
82 PK_Fcmp, 82 PK_Fcmp,
83 PK_Trunc 83 PK_Trunc
84 }; 84 };
85 85
86 // Currently the actual enum values are not used (other than CK_None), but we 86 /// Currently the actual enum values are not used (other than CK_None), but we
87 // go 87 /// go
88 // ahead and produce them anyway for symmetry with the 88 /// ahead and produce them anyway for symmetry with the
89 // BoolFoldingProducerKind. 89 /// BoolFoldingProducerKind.
90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
91 91
92 private: 92 private:
93 BoolFolding(const BoolFolding &) = delete; 93 BoolFolding(const BoolFolding &) = delete;
94 BoolFolding &operator=(const BoolFolding &) = delete; 94 BoolFolding &operator=(const BoolFolding &) = delete;
95 95
96 public: 96 public:
97 BoolFolding() = default; 97 BoolFolding() = default;
98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr); 99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);
100 static bool hasComplexLowering(const Inst *Instr); 100 static bool hasComplexLowering(const Inst *Instr);
101 void init(CfgNode *Node); 101 void init(CfgNode *Node);
102 const Inst *getProducerFor(const Operand *Opnd) const; 102 const Inst *getProducerFor(const Operand *Opnd) const;
103 void dump(const Cfg *Func) const; 103 void dump(const Cfg *Func) const;
104 104
105 private: 105 private:
106 // Returns true if Producers contains a valid entry for the given VarNum. 106 /// Returns true if Producers contains a valid entry for the given VarNum.
107 bool containsValid(SizeT VarNum) const { 107 bool containsValid(SizeT VarNum) const {
108 auto Element = Producers.find(VarNum); 108 auto Element = Producers.find(VarNum);
109 return Element != Producers.end() && Element->second.Instr != nullptr; 109 return Element != Producers.end() && Element->second.Instr != nullptr;
110 } 110 }
111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } 111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }
112 // Producers maps Variable::Number to a BoolFoldingEntry. 112 /// Producers maps Variable::Number to a BoolFoldingEntry.
113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; 113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;
114 }; 114 };
115 115
116 template <class MachineTraits> 116 template <class MachineTraits>
117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I) 117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)
118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {} 118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}
119 119
120 template <class MachineTraits> 120 template <class MachineTraits>
121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind 121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind
122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) { 122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
155 return CK_None; 155 return CK_None;
156 case InstCast::Sext: 156 case InstCast::Sext:
157 return CK_Sext; 157 return CK_Sext;
158 case InstCast::Zext: 158 case InstCast::Zext:
159 return CK_Zext; 159 return CK_Zext;
160 } 160 }
161 } 161 }
162 return CK_None; 162 return CK_None;
163 } 163 }
164 164
165 // Returns true if the producing instruction has a "complex" lowering 165 /// Returns true if the producing instruction has a "complex" lowering
166 // sequence. This generally means that its lowering sequence requires 166 /// sequence. This generally means that its lowering sequence requires
167 // more than one conditional branch, namely 64-bit integer compares 167 /// more than one conditional branch, namely 64-bit integer compares
168 // and some floating-point compares. When this is true, and there is 168 /// and some floating-point compares. When this is true, and there is
169 // more than one consumer, we prefer to disable the folding 169 /// more than one consumer, we prefer to disable the folding
170 // optimization because it minimizes branches. 170 /// optimization because it minimizes branches.
171 template <class MachineTraits> 171 template <class MachineTraits>
172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
173 switch (getProducerKind(Instr)) { 173 switch (getProducerKind(Instr)) {
174 default: 174 default:
175 return false; 175 return false;
176 case PK_Icmp64: 176 case PK_Icmp64:
177 return true; 177 return true;
178 case PK_Fcmp: 178 case PK_Fcmp:
179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
180 .C2 != CondX86::Br_None; 180 .C2 != CondX86::Br_None;
(...skipping 422 matching lines...) Expand 10 before | Expand all | Expand 10 after
603 } 603 }
604 604
605 // Converts a ConstantInteger32 operand into its constant value, or 605 // Converts a ConstantInteger32 operand into its constant value, or
606 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 606 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
607 uint64_t getConstantMemoryOrder(Operand *Opnd) { 607 uint64_t getConstantMemoryOrder(Operand *Opnd) {
608 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 608 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
609 return Integer->getValue(); 609 return Integer->getValue();
610 return Intrinsics::MemoryOrderInvalid; 610 return Intrinsics::MemoryOrderInvalid;
611 } 611 }
612 612
613 // Determines whether the dest of a Load instruction can be folded 613 /// Determines whether the dest of a Load instruction can be folded
614 // into one of the src operands of a 2-operand instruction. This is 614 /// into one of the src operands of a 2-operand instruction. This is
615 // true as long as the load dest matches exactly one of the binary 615 /// true as long as the load dest matches exactly one of the binary
616 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if 616 /// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if
617 // the answer is true. 617 /// the answer is true.
618 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, 618 bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
619 Operand *&Src0, Operand *&Src1) { 619 Operand *&Src0, Operand *&Src1) {
620 if (Src0 == LoadDest && Src1 != LoadDest) { 620 if (Src0 == LoadDest && Src1 != LoadDest) {
621 Src0 = LoadSrc; 621 Src0 = LoadSrc;
622 return true; 622 return true;
623 } 623 }
624 if (Src0 != LoadDest && Src1 == LoadDest) { 624 if (Src0 != LoadDest && Src1 == LoadDest) {
625 Src1 = LoadSrc; 625 Src1 = LoadSrc;
626 return true; 626 return true;
627 } 627 }
(...skipping 216 matching lines...) Expand 10 before | Expand all | Expand 10 after
844 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); 844 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
845 RegisterArg->setRegNum(RegNum); 845 RegisterArg->setRegNum(RegNum);
846 RegisterArg->setIsArg(); 846 RegisterArg->setIsArg();
847 Arg->setIsArg(false); 847 Arg->setIsArg(false);
848 848
849 Args[I] = RegisterArg; 849 Args[I] = RegisterArg;
850 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 850 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
851 } 851 }
852 } 852 }
853 853
854 // Helper function for addProlog(). 854 /// Helper function for addProlog().
855 // 855 ///
856 // This assumes Arg is an argument passed on the stack. This sets the 856 /// This assumes Arg is an argument passed on the stack. This sets the
857 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 857 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's
858 // width. For an I64 arg that has been split into Lo and Hi components, 858 /// width. For an I64 arg that has been split into Lo and Hi components,
859 // it calls itself recursively on the components, taking care to handle 859 /// it calls itself recursively on the components, taking care to handle
860 // Lo first because of the little-endian architecture. Lastly, this 860 /// Lo first because of the little-endian architecture. Lastly, this
861 // function generates an instruction to copy Arg into its assigned 861 /// function generates an instruction to copy Arg into its assigned
862 // register if applicable. 862 /// register if applicable.
863 template <class Machine> 863 template <class Machine>
864 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 864 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
865 Variable *FramePtr, 865 Variable *FramePtr,
866 size_t BasicFrameOffset, 866 size_t BasicFrameOffset,
867 size_t &InArgsSizeBytes) { 867 size_t &InArgsSizeBytes) {
868 Variable *Lo = Arg->getLo(); 868 Variable *Lo = Arg->getLo();
869 Variable *Hi = Arg->getHi(); 869 Variable *Hi = Arg->getHi();
870 Type Ty = Arg->getType(); 870 Type Ty = Arg->getType();
871 if (Lo && Hi && Ty == IceType_i64) { 871 if (Lo && Hi && Ty == IceType_i64) {
872 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 872 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
(...skipping 22 matching lines...) Expand all
895 // statistics. 895 // statistics.
896 Ctx->statsUpdateFills(); 896 Ctx->statsUpdateFills();
897 } 897 }
898 } 898 }
899 899
900 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 900 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
901 return IceType_i32; 901 return IceType_i32;
902 } 902 }
903 903
904 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) { 904 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {
905 // Stack frame layout: 905 // Stack frame layout:
Karl 2015/07/06 18:08:48 Not clear if this should be a doxygen comment.
ascull 2015/07/06 19:29:09 If it should be documented then it needs to be mov
jvoung (off chromium) 2015/07/06 20:07:43 Let's not doxygenize it for now. This is explaini
906 // 906 //
907 // +------------------------+ 907 // +------------------------+
908 // | 1. return address | 908 // | 1. return address |
909 // +------------------------+ 909 // +------------------------+
910 // | 2. preserved registers | 910 // | 2. preserved registers |
911 // +------------------------+ 911 // +------------------------+
912 // | 3. padding | 912 // | 3. padding |
913 // +------------------------+ 913 // +------------------------+
914 // | 4. global spill area | 914 // | 4. global spill area |
915 // +------------------------+ 915 // +------------------------+
(...skipping 223 matching lines...) Expand 10 before | Expand all | Expand 10 after
1139 SizeT j = CalleeSaves.size() - i - 1; 1139 SizeT j = CalleeSaves.size() - i - 1;
1140 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) 1140 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
1141 continue; 1141 continue;
1142 if (CalleeSaves[j] && RegsUsed[j]) { 1142 if (CalleeSaves[j] && RegsUsed[j]) {
1143 _pop(getPhysicalRegister(j)); 1143 _pop(getPhysicalRegister(j));
1144 } 1144 }
1145 } 1145 }
1146 1146
1147 if (!Ctx->getFlags().getUseSandboxing()) 1147 if (!Ctx->getFlags().getUseSandboxing())
1148 return; 1148 return;
1149 // Change the original ret instruction into a sandboxed return sequence. 1149 /// Change the original ret instruction into a sandboxed return sequence.
Karl 2015/07/06 18:08:49 This is in the middle of a method. Should it have
ascull 2015/07/06 19:29:09 Done.
1150 // t:ecx = pop 1150 /// t:ecx = pop
1151 // bundle_lock 1151 /// bundle_lock
1152 // and t, ~31 1152 /// and t, ~31
1153 // jmp *t 1153 /// jmp *t
1154 // bundle_unlock 1154 /// bundle_unlock
1155 // FakeUse <original_ret_operand> 1155 /// FakeUse <original_ret_operand>
1156 const SizeT BundleSize = 1156 const SizeT BundleSize =
1157 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); 1157 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
1158 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); 1158 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
1159 _pop(T_ecx); 1159 _pop(T_ecx);
1160 _bundle_lock(); 1160 _bundle_lock();
1161 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1))); 1161 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));
1162 _jmp(T_ecx); 1162 _jmp(T_ecx);
1163 _bundle_unlock(); 1163 _bundle_unlock();
1164 if (RI->getSrcSize()) { 1164 if (RI->getSrcSize()) {
1165 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0)); 1165 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
1339 // multiple of the required alignment at runtime. 1339 // multiple of the required alignment at runtime.
1340 Variable *T = makeReg(IceType_i32); 1340 Variable *T = makeReg(IceType_i32);
1341 _mov(T, TotalSize); 1341 _mov(T, TotalSize);
1342 _add(T, Ctx->getConstantInt32(Alignment - 1)); 1342 _add(T, Ctx->getConstantInt32(Alignment - 1));
1343 _and(T, Ctx->getConstantInt32(-Alignment)); 1343 _and(T, Ctx->getConstantInt32(-Alignment));
1344 _sub(esp, T); 1344 _sub(esp, T);
1345 } 1345 }
1346 _mov(Dest, esp); 1346 _mov(Dest, esp);
1347 } 1347 }
1348 1348
1349 // Strength-reduce scalar integer multiplication by a constant (for 1349 /// Strength-reduce scalar integer multiplication by a constant (for
1350 // i32 or narrower) for certain constants. The lea instruction can be 1350 /// i32 or narrower) for certain constants. The lea instruction can be
1351 // used to multiply by 3, 5, or 9, and the lsh instruction can be used 1351 /// used to multiply by 3, 5, or 9, and the lsh instruction can be used
1352 // to multiply by powers of 2. These can be combined such that 1352 /// to multiply by powers of 2. These can be combined such that
1353 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, 1353 /// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,
1354 // combined with left-shifting by 2. 1354 /// combined with left-shifting by 2.
1355 template <class Machine> 1355 template <class Machine>
1356 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, 1356 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
1357 int32_t Src1) { 1357 int32_t Src1) {
1358 // Disable this optimization for Om1 and O0, just to keep things 1358 // Disable this optimization for Om1 and O0, just to keep things
1359 // simple there. 1359 // simple there.
1360 if (Ctx->getFlags().getOptLevel() < Opt_1) 1360 if (Ctx->getFlags().getOptLevel() < Opt_1)
1361 return false; 1361 return false;
1362 Type Ty = Dest->getType(); 1362 Type Ty = Dest->getType();
1363 Variable *T = nullptr; 1363 Variable *T = nullptr;
1364 if (Src1 == -1) { 1364 if (Src1 == -1) {
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
1405 ++CountOps; 1405 ++CountOps;
1406 ++Count2; 1406 ++Count2;
1407 Src1 /= 2; 1407 Src1 /= 2;
1408 } else { 1408 } else {
1409 return false; 1409 return false;
1410 } 1410 }
1411 } 1411 }
1412 // Lea optimization only works for i16 and i32 types, not i8. 1412 // Lea optimization only works for i16 and i32 types, not i8.
1413 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) 1413 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1414 return false; 1414 return false;
1415 // Limit the number of lea/shl operations for a single multiply, to 1415 /// Limit the number of lea/shl operations for a single multiply, to
Karl 2015/07/06 18:08:49 Again, this is within a method. Should it have dox
ascull 2015/07/06 19:29:09 Done.
1416 // a somewhat arbitrary choice of 3. 1416 /// a somewhat arbitrary choice of 3.
1417 const uint32_t MaxOpsForOptimizedMul = 3; 1417 const uint32_t MaxOpsForOptimizedMul = 3;
1418 if (CountOps > MaxOpsForOptimizedMul) 1418 if (CountOps > MaxOpsForOptimizedMul)
1419 return false; 1419 return false;
1420 _mov(T, Src0); 1420 _mov(T, Src0);
1421 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1421 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1422 for (uint32_t i = 0; i < Count9; ++i) { 1422 for (uint32_t i = 0; i < Count9; ++i) {
1423 const uint16_t Shift = 3; // log2(9-1) 1423 const uint16_t Shift = 3; // log2(9-1)
1424 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); 1424 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));
1425 _set_dest_nonkillable(); 1425 _set_dest_nonkillable();
1426 } 1426 }
(...skipping 335 matching lines...) Expand 10 before | Expand all | Expand 10 after
1762 // pshufd T3, Src1, {1,0,3,0} 1762 // pshufd T3, Src1, {1,0,3,0}
1763 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} 1763 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1764 // pmuludq T1, Src1 1764 // pmuludq T1, Src1
1765 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]} 1765 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}
1766 // pmuludq T2, T3 1766 // pmuludq T2, T3
1767 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])} 1767 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}
1768 // shufps T1, T2, {0,2,0,2} 1768 // shufps T1, T2, {0,2,0,2}
1769 // pshufd T4, T1, {0,2,1,3} 1769 // pshufd T4, T1, {0,2,1,3}
1770 // movups Dest, T4 1770 // movups Dest, T4
1771 1771
1772 // Mask that directs pshufd to create a vector with entries 1772 /// Mask that directs pshufd to create a vector with entries
Karl 2015/07/06 18:08:48 Similar question here (not at declaration level).
ascull 2015/07/06 19:29:09 Done.
1773 // Src[1, 0, 3, 0] 1773 /// Src[1, 0, 3, 0]
1774 const unsigned Constant1030 = 0x31; 1774 const unsigned Constant1030 = 0x31;
1775 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); 1775 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);
1776 // Mask that directs shufps to create a vector with entries 1776 /// Mask that directs shufps to create a vector with entries
1777 // Dest[0, 2], Src[0, 2] 1777 /// Dest[0, 2], Src[0, 2]
1778 const unsigned Mask0202 = 0x88; 1778 const unsigned Mask0202 = 0x88;
1779 // Mask that directs pshufd to create a vector with entries 1779 /// Mask that directs pshufd to create a vector with entries
1780 // Src[0, 2, 1, 3] 1780 /// Src[0, 2, 1, 3]
1781 const unsigned Mask0213 = 0xd8; 1781 const unsigned Mask0213 = 0xd8;
1782 Variable *T1 = makeReg(IceType_v4i32); 1782 Variable *T1 = makeReg(IceType_v4i32);
1783 Variable *T2 = makeReg(IceType_v4i32); 1783 Variable *T2 = makeReg(IceType_v4i32);
1784 Variable *T3 = makeReg(IceType_v4i32); 1784 Variable *T3 = makeReg(IceType_v4i32);
1785 Variable *T4 = makeReg(IceType_v4i32); 1785 Variable *T4 = makeReg(IceType_v4i32);
1786 _movp(T1, Src0); 1786 _movp(T1, Src0);
1787 _pshufd(T2, Src0, Mask1030); 1787 _pshufd(T2, Src0, Mask1030);
1788 _pshufd(T3, Src1, Mask1030); 1788 _pshufd(T3, Src1, Mask1030);
1789 _pmuludq(T1, Src1); 1789 _pmuludq(T1, Src1);
1790 _pmuludq(T2, T3); 1790 _pmuludq(T2, T3);
(...skipping 589 matching lines...) Expand 10 before | Expand all | Expand 10 after
2380 if (DestTy == IceType_v16i8) { 2380 if (DestTy == IceType_v16i8) {
2381 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 2381 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
2382 Variable *OneMask = makeVectorOfOnes(Dest->getType()); 2382 Variable *OneMask = makeVectorOfOnes(Dest->getType());
2383 Variable *T = makeReg(DestTy); 2383 Variable *T = makeReg(DestTy);
2384 _movp(T, Src0RM); 2384 _movp(T, Src0RM);
2385 _pand(T, OneMask); 2385 _pand(T, OneMask);
2386 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 2386 Variable *Zeros = makeVectorOfZeros(Dest->getType());
2387 _pcmpgt(T, Zeros); 2387 _pcmpgt(T, Zeros);
2388 _movp(Dest, T); 2388 _movp(Dest, T);
2389 } else { 2389 } else {
2390 // width = width(elty) - 1; dest = (src << width) >> width 2390 /// width = width(elty) - 1; dest = (src << width) >> width
2391 SizeT ShiftAmount = 2391 SizeT ShiftAmount =
2392 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 2392 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -
2393 1; 2393 1;
2394 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount); 2394 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);
2395 Variable *T = makeReg(DestTy); 2395 Variable *T = makeReg(DestTy);
2396 _movp(T, Src0RM); 2396 _movp(T, Src0RM);
2397 _psll(T, ShiftConstant); 2397 _psll(T, ShiftConstant);
2398 _psra(T, ShiftConstant); 2398 _psra(T, ShiftConstant);
2399 _movp(Dest, T); 2399 _movp(Dest, T);
2400 } 2400 }
(...skipping 211 matching lines...) Expand 10 before | Expand all | Expand 10 after
2612 if (isVectorType(Dest->getType())) { 2612 if (isVectorType(Dest->getType())) {
2613 assert(Dest->getType() == IceType_v4f32 && 2613 assert(Dest->getType() == IceType_v4f32 &&
2614 Inst->getSrc(0)->getType() == IceType_v4i32); 2614 Inst->getSrc(0)->getType() == IceType_v4i32);
2615 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2615 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2616 if (llvm::isa<OperandX8632Mem>(Src0RM)) 2616 if (llvm::isa<OperandX8632Mem>(Src0RM))
2617 Src0RM = legalizeToVar(Src0RM); 2617 Src0RM = legalizeToVar(Src0RM);
2618 Variable *T = makeReg(Dest->getType()); 2618 Variable *T = makeReg(Dest->getType());
2619 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); 2619 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);
2620 _movp(Dest, T); 2620 _movp(Dest, T);
2621 } else if (Inst->getSrc(0)->getType() == IceType_i64) { 2621 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
2622 // Use a helper for x86-32. 2622 /// Use a helper for x86-32.
Karl 2015/07/06 18:08:49 Should this be a doxygen comment (inside method).
ascull 2015/07/06 19:29:09 Done.
2623 const SizeT MaxSrcs = 1; 2623 const SizeT MaxSrcs = 1;
2624 Type DestType = Dest->getType(); 2624 Type DestType = Dest->getType();
2625 InstCall *Call = 2625 InstCall *Call =
2626 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2626 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2627 : H_sitofp_i64_f64, 2627 : H_sitofp_i64_f64,
2628 Dest, MaxSrcs); 2628 Dest, MaxSrcs);
2629 // TODO: Call the correct compiler-rt helper function. 2629 // TODO: Call the correct compiler-rt helper function.
2630 Call->addArg(Inst->getSrc(0)); 2630 Call->addArg(Inst->getSrc(0));
2631 lowerCall(Call); 2631 lowerCall(Call);
2632 return; 2632 return;
(...skipping 15 matching lines...) Expand all
2648 Operand *Src0 = Inst->getSrc(0); 2648 Operand *Src0 = Inst->getSrc(0);
2649 if (isVectorType(Src0->getType())) { 2649 if (isVectorType(Src0->getType())) {
2650 assert(Dest->getType() == IceType_v4f32 && 2650 assert(Dest->getType() == IceType_v4f32 &&
2651 Src0->getType() == IceType_v4i32); 2651 Src0->getType() == IceType_v4i32);
2652 const SizeT MaxSrcs = 1; 2652 const SizeT MaxSrcs = 1;
2653 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2653 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2654 Call->addArg(Src0); 2654 Call->addArg(Src0);
2655 lowerCall(Call); 2655 lowerCall(Call);
2656 } else if (Src0->getType() == IceType_i64 || 2656 } else if (Src0->getType() == IceType_i64 ||
2657 Src0->getType() == IceType_i32) { 2657 Src0->getType() == IceType_i32) {
2658 // Use a helper for x86-32 and x86-64. Also use a helper for 2658 /// Use a helper for x86-32 and x86-64. Also use a helper for
2659 // i32 on x86-32. 2659 /// i32 on x86-32.
Karl 2015/07/06 18:08:48 Again, not a declaration. Should it be /// ?
ascull 2015/07/06 19:29:09 Done.
2660 const SizeT MaxSrcs = 1; 2660 const SizeT MaxSrcs = 1;
2661 Type DestType = Dest->getType(); 2661 Type DestType = Dest->getType();
2662 IceString TargetString; 2662 IceString TargetString;
2663 if (isInt32Asserting32Or64(Src0->getType())) { 2663 if (isInt32Asserting32Or64(Src0->getType())) {
2664 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2664 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2665 : H_uitofp_i32_f64; 2665 : H_uitofp_i32_f64;
2666 } else { 2666 } else {
2667 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2667 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2668 : H_uitofp_i64_f64; 2668 : H_uitofp_i64_f64;
2669 } 2669 }
(...skipping 558 matching lines...) Expand 10 before | Expand all | Expand 10 after
3228 } 3228 }
3229 3229
3230 if (Index == 0) { 3230 if (Index == 0) {
3231 Variable *T = makeReg(Ty); 3231 Variable *T = makeReg(Ty);
3232 _movp(T, SourceVectRM); 3232 _movp(T, SourceVectRM);
3233 _movss(T, ElementR); 3233 _movss(T, ElementR);
3234 _movp(Inst->getDest(), T); 3234 _movp(Inst->getDest(), T);
3235 return; 3235 return;
3236 } 3236 }
3237 3237
3238 // shufps treats the source and desination operands as vectors of 3238 /// shufps treats the source and desination operands as vectors of
3239 // four doublewords. The destination's two high doublewords are 3239 /// four doublewords. The destination's two high doublewords are
3240 // selected from the source operand and the two low doublewords are 3240 /// selected from the source operand and the two low doublewords are
3241 // selected from the (original value of) the destination operand. 3241 /// selected from the (original value of) the destination operand.
3242 // An insertelement operation can be effected with a sequence of two 3242 /// An insertelement operation can be effected with a sequence of two
3243 // shufps operations with appropriate masks. In all cases below, 3243 /// shufps operations with appropriate masks. In all cases below,
3244 // Element[0] is being inserted into SourceVectOperand. Indices are 3244 /// Element[0] is being inserted into SourceVectOperand. Indices are
3245 // ordered from left to right. 3245 /// ordered from left to right.
3246 // 3246 ///
3247 // insertelement into index 1 (result is stored in ElementR): 3247 /// insertelement into index 1 (result is stored in ElementR):
3248 // ElementR := ElementR[0, 0] SourceVectRM[0, 0] 3248 /// ElementR := ElementR[0, 0] SourceVectRM[0, 0]
3249 // ElementR := ElementR[3, 0] SourceVectRM[2, 3] 3249 /// ElementR := ElementR[3, 0] SourceVectRM[2, 3]
3250 // 3250 ///
3251 // insertelement into index 2 (result is stored in T): 3251 /// insertelement into index 2 (result is stored in T):
3252 // T := SourceVectRM 3252 /// T := SourceVectRM
3253 // ElementR := ElementR[0, 0] T[0, 3] 3253 /// ElementR := ElementR[0, 0] T[0, 3]
3254 // T := T[0, 1] ElementR[0, 3] 3254 /// T := T[0, 1] ElementR[0, 3]
3255 // 3255 ///
3256 // insertelement into index 3 (result is stored in T): 3256 /// insertelement into index 3 (result is stored in T):
3257 // T := SourceVectRM 3257 /// T := SourceVectRM
3258 // ElementR := ElementR[0, 0] T[0, 2] 3258 /// ElementR := ElementR[0, 0] T[0, 2]
3259 // T := T[0, 1] ElementR[3, 0] 3259 /// T := T[0, 1] ElementR[3, 0]
Karl 2015/07/06 18:08:49 Again, inside method. Should this be /// ?
ascull 2015/07/06 19:29:09 Done.
3260 const unsigned char Mask1[3] = {0, 192, 128}; 3260 const unsigned char Mask1[3] = {0, 192, 128};
3261 const unsigned char Mask2[3] = {227, 196, 52}; 3261 const unsigned char Mask2[3] = {227, 196, 52};
3262 3262
3263 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]); 3263 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);
3264 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]); 3264 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);
3265 3265
3266 if (Index == 1) { 3266 if (Index == 1) {
3267 _shufps(ElementR, SourceVectRM, Mask1Constant); 3267 _shufps(ElementR, SourceVectRM, Mask1Constant);
3268 _shufps(ElementR, SourceVectRM, Mask2Constant); 3268 _shufps(ElementR, SourceVectRM, Mask2Constant);
3269 _movp(Inst->getDest(), ElementR); 3269 _movp(Inst->getDest(), ElementR);
(...skipping 654 matching lines...) Expand 10 before | Expand all | Expand 10 after
3924 // the end of the loop, since it will be re-used by the loop. 3924 // the end of the loop, since it will be re-used by the loop.
3925 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3925 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3926 Context.insert(InstFakeUse::create(Func, ValVar)); 3926 Context.insert(InstFakeUse::create(Func, ValVar));
3927 } 3927 }
3928 // The address base (if any) is also reused in the loop. 3928 // The address base (if any) is also reused in the loop.
3929 if (Variable *Base = Addr->getBase()) 3929 if (Variable *Base = Addr->getBase())
3930 Context.insert(InstFakeUse::create(Func, Base)); 3930 Context.insert(InstFakeUse::create(Func, Base));
3931 _mov(Dest, T_eax); 3931 _mov(Dest, T_eax);
3932 } 3932 }
3933 3933
3934 // Lowers count {trailing, leading} zeros intrinsic. 3934 /// Lowers count {trailing, leading} zeros intrinsic.
3935 // 3935 ///
3936 // We could do constant folding here, but that should have 3936 /// We could do constant folding here, but that should have
3937 // been done by the front-end/middle-end optimizations. 3937 /// been done by the front-end/middle-end optimizations.
3938 template <class Machine> 3938 template <class Machine>
3939 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, 3939 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,
3940 Operand *FirstVal, 3940 Operand *FirstVal,
3941 Operand *SecondVal) { 3941 Operand *SecondVal) {
3942 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI). 3942 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).
3943 // Then the instructions will handle the Val == 0 case much more simply 3943 // Then the instructions will handle the Val == 0 case much more simply
3944 // and won't require conversion from bit position to number of zeros. 3944 // and won't require conversion from bit position to number of zeros.
3945 // 3945 //
3946 // Otherwise: 3946 // Otherwise:
3947 // bsr IF_NOT_ZERO, Val 3947 // bsr IF_NOT_ZERO, Val
(...skipping 315 matching lines...) Expand 10 before | Expand all | Expand 10 after
4263 lowerAssign(Assign); 4263 lowerAssign(Assign);
4264 } 4264 }
4265 4265
4266 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { 4266 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4267 Inst *Inst = Context.getCur(); 4267 Inst *Inst = Context.getCur();
4268 Variable *Dest = Inst->getDest(); 4268 Variable *Dest = Inst->getDest();
4269 Operand *Addr = Inst->getSrc(0); 4269 Operand *Addr = Inst->getSrc(0);
4270 Variable *Index = nullptr; 4270 Variable *Index = nullptr;
4271 uint16_t Shift = 0; 4271 uint16_t Shift = 0;
4272 int32_t Offset = 0; // TODO: make Constant 4272 int32_t Offset = 0; // TODO: make Constant
4273 // Vanilla ICE load instructions should not use the segment registers, 4273 /// Vanilla ICE load instructions should not use the segment registers,
4274 // and computeAddressOpt only works at the level of Variables and Constants, 4274 /// and computeAddressOpt only works at the level of Variables and Constants,
4275 // not other OperandX8632Mem, so there should be no mention of segment 4275 /// not other OperandX8632Mem, so there should be no mention of segment
4276 // registers there either. 4276 /// registers there either.
Karl 2015/07/06 18:08:48 Should this be /// (inside method).
ascull 2015/07/06 19:29:09 Done.
4277 const OperandX8632Mem::SegmentRegisters SegmentReg = 4277 const OperandX8632Mem::SegmentRegisters SegmentReg =
4278 OperandX8632Mem::DefaultSegment; 4278 OperandX8632Mem::DefaultSegment;
4279 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4279 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4280 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4280 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4281 if (Base && Addr != Base) { 4281 if (Base && Addr != Base) {
4282 Inst->setDeleted(); 4282 Inst->setDeleted();
4283 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4283 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4284 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 4284 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,
4285 Shift, SegmentReg); 4285 Shift, SegmentReg);
4286 Context.insert(InstLoad::create(Func, Dest, Addr)); 4286 Context.insert(InstLoad::create(Func, Dest, Addr));
(...skipping 207 matching lines...) Expand 10 before | Expand all | Expand 10 after
4494 } 4494 }
4495 4495
4496 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { 4496 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4497 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); 4497 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4498 Operand *Data = Inst->getData(); 4498 Operand *Data = Inst->getData();
4499 Operand *Addr = Inst->getAddr(); 4499 Operand *Addr = Inst->getAddr();
4500 Variable *Index = nullptr; 4500 Variable *Index = nullptr;
4501 uint16_t Shift = 0; 4501 uint16_t Shift = 0;
4502 int32_t Offset = 0; // TODO: make Constant 4502 int32_t Offset = 0; // TODO: make Constant
4503 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4503 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4504 // Vanilla ICE store instructions should not use the segment registers, 4504 /// Vanilla ICE store instructions should not use the segment registers,
4505 // and computeAddressOpt only works at the level of Variables and Constants, 4505 /// and computeAddressOpt only works at the level of Variables and Constants,
4506 // not other OperandX8632Mem, so there should be no mention of segment 4506 /// not other OperandX8632Mem, so there should be no mention of segment
4507 // registers there either. 4507 /// registers there either.
Karl 2015/07/06 18:08:48 Similar here (inside method).
ascull 2015/07/06 19:29:10 Done.
4508 const OperandX8632Mem::SegmentRegisters SegmentReg = 4508 const OperandX8632Mem::SegmentRegisters SegmentReg =
4509 OperandX8632Mem::DefaultSegment; 4509 OperandX8632Mem::DefaultSegment;
4510 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4510 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4511 if (Base && Addr != Base) { 4511 if (Base && Addr != Base) {
4512 Inst->setDeleted(); 4512 Inst->setDeleted();
4513 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4513 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4514 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4514 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,
4515 Shift, SegmentReg); 4515 Shift, SegmentReg);
4516 InstStore *NewStore = InstStore::create(Func, Data, Addr); 4516 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4517 if (Inst->getDest()) 4517 if (Inst->getDest())
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
4590 4590
4591 // Insert the result into position. 4591 // Insert the result into position.
4592 Variable *DestT = Func->template makeVariable(Ty); 4592 Variable *DestT = Func->template makeVariable(Ty);
4593 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index)); 4593 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));
4594 T = DestT; 4594 T = DestT;
4595 } 4595 }
4596 4596
4597 lowerAssign(InstAssign::create(Func, Dest, T)); 4597 lowerAssign(InstAssign::create(Func, Dest, T));
4598 } 4598 }
4599 4599
4600 // The following pattern occurs often in lowered C and C++ code: 4600 /// The following pattern occurs often in lowered C and C++ code:
4601 // 4601 ///
4602 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1 4602 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4603 // %cmp.ext = sext <n x i1> %cmp to <n x ty> 4603 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4604 // 4604 ///
4605 // We can eliminate the sext operation by copying the result of pcmpeqd, 4605 /// We can eliminate the sext operation by copying the result of pcmpeqd,
4606 // pcmpgtd, or cmpps (which produce sign extended results) to the result 4606 /// pcmpgtd, or cmpps (which produce sign extended results) to the result
4607 // of the sext operation. 4607 /// of the sext operation.
4608 template <class Machine> 4608 template <class Machine>
4609 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( 4609 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4610 Variable *SignExtendedResult) { 4610 Variable *SignExtendedResult) {
4611 if (InstCast *NextCast = 4611 if (InstCast *NextCast =
4612 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { 4612 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4613 if (NextCast->getCastKind() == InstCast::Sext && 4613 if (NextCast->getCastKind() == InstCast::Sext &&
4614 NextCast->getSrc(0) == SignExtendedResult) { 4614 NextCast->getSrc(0) == SignExtendedResult) {
4615 NextCast->setDeleted(); 4615 NextCast->setDeleted();
4616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult)); 4616 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));
4617 // Skip over the instruction. 4617 // Skip over the instruction.
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
4703 4703
4704 template <class Machine> 4704 template <class Machine>
4705 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { 4705 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
4706 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { 4706 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {
4707 lowerRMW(RMW); 4707 lowerRMW(RMW);
4708 } else { 4708 } else {
4709 TargetLowering::lowerOther(Instr); 4709 TargetLowering::lowerOther(Instr);
4710 } 4710 }
4711 } 4711 }
4712 4712
4713 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4713 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4714 // preserve integrity of liveness analysis. Undef values are also 4714 /// preserve integrity of liveness analysis. Undef values are also
4715 // turned into zeroes, since loOperand() and hiOperand() don't expect 4715 /// turned into zeroes, since loOperand() and hiOperand() don't expect
4716 // Undef input. 4716 /// Undef input.
4717 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 4717 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4718 // Pause constant blinding or pooling, blinding or pooling will be done later 4718 // Pause constant blinding or pooling, blinding or pooling will be done later
4719 // during phi lowering assignments 4719 // during phi lowering assignments
4720 BoolFlagSaver B(RandomizationPoolingPaused, true); 4720 BoolFlagSaver B(RandomizationPoolingPaused, true);
4721 4721
4722 CfgNode *Node = Context.getNode(); 4722 CfgNode *Node = Context.getNode();
4723 for (Inst &I : Node->getPhis()) { 4723 for (Inst &I : Node->getPhis()) {
4724 auto Phi = llvm::dyn_cast<InstPhi>(&I); 4724 auto Phi = llvm::dyn_cast<InstPhi>(&I);
4725 if (Phi->isDeleted()) 4725 if (Phi->isDeleted())
4726 continue; 4726 continue;
(...skipping 25 matching lines...) Expand all
4752 // because they do in fact need a register to materialize the vector 4752 // because they do in fact need a register to materialize the vector
4753 // of zeroes into. 4753 // of zeroes into.
4754 if (llvm::isa<ConstantUndef>(Opnd)) 4754 if (llvm::isa<ConstantUndef>(Opnd))
4755 return isScalarFloatingType(Opnd->getType()) || 4755 return isScalarFloatingType(Opnd->getType()) ||
4756 isVectorType(Opnd->getType()); 4756 isVectorType(Opnd->getType());
4757 if (llvm::isa<Constant>(Opnd)) 4757 if (llvm::isa<Constant>(Opnd))
4758 return isScalarFloatingType(Opnd->getType()); 4758 return isScalarFloatingType(Opnd->getType());
4759 return true; 4759 return true;
4760 } 4760 }
4761 4761
4762 // Lower the pre-ordered list of assignments into mov instructions. 4762 /// Lower the pre-ordered list of assignments into mov instructions.
4763 // Also has to do some ad-hoc register allocation as necessary. 4763 /// Also has to do some ad-hoc register allocation as necessary.
4764 template <class Machine> 4764 template <class Machine>
4765 void TargetX86Base<Machine>::lowerPhiAssignments( 4765 void TargetX86Base<Machine>::lowerPhiAssignments(
4766 CfgNode *Node, const AssignList &Assignments) { 4766 CfgNode *Node, const AssignList &Assignments) {
4767 // Check that this is a properly initialized shell of a node. 4767 // Check that this is a properly initialized shell of a node.
4768 assert(Node->getOutEdges().size() == 1); 4768 assert(Node->getOutEdges().size() == 1);
4769 assert(Node->getInsts().empty()); 4769 assert(Node->getInsts().empty());
4770 assert(Node->getPhis().empty()); 4770 assert(Node->getPhis().empty());
4771 CfgNode *Succ = Node->getOutEdges().front(); 4771 CfgNode *Succ = Node->getOutEdges().front();
4772 getContext().init(Node); 4772 getContext().init(Node);
4773 // Register set setup similar to regAlloc(). 4773 // Register set setup similar to regAlloc().
(...skipping 173 matching lines...) Expand 10 before | Expand all | Expand 10 after
4947 int32_t RegNum) { 4947 int32_t RegNum) {
4948 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 || 4948 assert(Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v8i16 ||
4949 Ty == IceType_v16i8); 4949 Ty == IceType_v16i8);
4950 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) { 4950 if (Ty == IceType_v4f32 || Ty == IceType_v4i32 || Ty == IceType_v8i16) {
4951 Variable *Reg = makeVectorOfOnes(Ty, RegNum); 4951 Variable *Reg = makeVectorOfOnes(Ty, RegNum);
4952 SizeT Shift = 4952 SizeT Shift =
4953 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1; 4953 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;
4954 _psll(Reg, Ctx->getConstantInt8(Shift)); 4954 _psll(Reg, Ctx->getConstantInt8(Shift));
4955 return Reg; 4955 return Reg;
4956 } else { 4956 } else {
4957 // SSE has no left shift operation for vectors of 8 bit integers. 4957 /// SSE has no left shift operation for vectors of 8 bit integers.
Karl 2015/07/06 18:08:49 Why? (inside method).
ascull 2015/07/06 19:29:09 Done.
4958 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4958 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4959 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 4959 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4960 Variable *Reg = makeReg(Ty, RegNum); 4960 Variable *Reg = makeReg(Ty, RegNum);
4961 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4961 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4962 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4962 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4963 return Reg; 4963 return Reg;
4964 } 4964 }
4965 } 4965 }
4966 4966
4967 // Construct a mask in a register that can be and'ed with a 4967 /// Construct a mask in a register that can be and'ed with a
4968 // floating-point value to mask off its sign bit. The value will be 4968 /// floating-point value to mask off its sign bit. The value will be
4969 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> 4969 /// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>
4970 // for f64. Construct it as vector of ones logically right shifted 4970 /// for f64. Construct it as vector of ones logically right shifted
4971 // one bit. TODO(stichnot): Fix the wala TODO above, to represent 4971 /// one bit. TODO(stichnot): Fix the wala TODO above, to represent
4972 // vector constants in memory. 4972 /// vector constants in memory.
4973 template <class Machine> 4973 template <class Machine>
4974 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, 4974 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4975 int32_t RegNum) { 4975 int32_t RegNum) {
4976 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 4976 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4977 _psrl(Reg, Ctx->getConstantInt8(1)); 4977 _psrl(Reg, Ctx->getConstantInt8(1));
4978 return Reg; 4978 return Reg;
4979 } 4979 }
4980 4980
4981 template <class Machine> 4981 template <class Machine>
4982 OperandX8632Mem * 4982 OperandX8632Mem *
4983 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 4983 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4984 uint32_t Offset) { 4984 uint32_t Offset) {
4985 // Ensure that Loc is a stack slot. 4985 // Ensure that Loc is a stack slot.
4986 assert(Slot->getWeight().isZero()); 4986 assert(Slot->getWeight().isZero());
4987 assert(Slot->getRegNum() == Variable::NoRegister); 4987 assert(Slot->getRegNum() == Variable::NoRegister);
4988 // Compute the location of Loc in memory. 4988 /// Compute the location of Loc in memory.
Karl 2015/07/06 18:08:49 Why? (inside method).
ascull 2015/07/06 19:29:09 Done.
4989 // TODO(wala,stichnot): lea should not be required. The address of 4989 // TODO(wala,stichnot): lea should not be required. The address of
4990 // the stack slot is known at compile time (although not until after 4990 // the stack slot is known at compile time (although not until after
4991 // addProlog()). 4991 // addProlog()).
4992 const Type PointerType = IceType_i32; 4992 const Type PointerType = IceType_i32;
4993 Variable *Loc = makeReg(PointerType); 4993 Variable *Loc = makeReg(PointerType);
4994 _lea(Loc, Slot); 4994 _lea(Loc, Slot);
4995 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 4995 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
4996 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4996 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
4997 } 4997 }
4998 4998
4999 // Helper for legalize() to emit the right code to lower an operand to a 4999 /// Helper for legalize() to emit the right code to lower an operand to a
5000 // register of the appropriate type. 5000 /// register of the appropriate type.
5001 template <class Machine> 5001 template <class Machine>
5002 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 5002 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5003 Type Ty = Src->getType(); 5003 Type Ty = Src->getType();
5004 Variable *Reg = makeReg(Ty, RegNum); 5004 Variable *Reg = makeReg(Ty, RegNum);
5005 if (isVectorType(Ty)) { 5005 if (isVectorType(Ty)) {
5006 _movp(Reg, Src); 5006 _movp(Reg, Src);
5007 } else { 5007 } else {
5008 _mov(Reg, Src); 5008 _mov(Reg, Src);
5009 } 5009 }
5010 return Reg; 5010 return Reg;
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
5099 NeedsReg = true; 5099 NeedsReg = true;
5100 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) 5100 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5101 // On x86, FP constants are lowered to mem operands. 5101 // On x86, FP constants are lowered to mem operands.
5102 NeedsReg = true; 5102 NeedsReg = true;
5103 if (NeedsReg) { 5103 if (NeedsReg) {
5104 From = copyToReg(From, RegNum); 5104 From = copyToReg(From, RegNum);
5105 } 5105 }
5106 return From; 5106 return From;
5107 } 5107 }
5108 if (auto Var = llvm::dyn_cast<Variable>(From)) { 5108 if (auto Var = llvm::dyn_cast<Variable>(From)) {
5109 // Check if the variable is guaranteed a physical register. This 5109 /// Check if the variable is guaranteed a physical register. This
5110 // can happen either when the variable is pre-colored or when it is 5110 /// can happen either when the variable is pre-colored or when it is
5111 // assigned infinite weight. 5111 /// assigned infinite weight.
Karl 2015/07/06 18:08:49 Why? (inside method).
ascull 2015/07/06 19:29:09 Done.
5112 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf()); 5112 bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf());
5113 // We need a new physical register for the operand if: 5113 // We need a new physical register for the operand if:
5114 // Mem is not allowed and Var isn't guaranteed a physical 5114 // Mem is not allowed and Var isn't guaranteed a physical
5115 // register, or 5115 // register, or
5116 // RegNum is required and Var->getRegNum() doesn't match. 5116 // RegNum is required and Var->getRegNum() doesn't match.
5117 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 5117 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5118 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5118 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
5119 From = copyToReg(From, RegNum); 5119 From = copyToReg(From, RegNum);
5120 } 5120 }
5121 return From; 5121 return From;
5122 } 5122 }
5123 llvm_unreachable("Unhandled operand kind in legalize()"); 5123 llvm_unreachable("Unhandled operand kind in legalize()");
5124 return From; 5124 return From;
5125 } 5125 }
5126 5126
5127 // Provide a trivial wrapper to legalize() for this common usage. 5127 /// Provide a trivial wrapper to legalize() for this common usage.
5128 template <class Machine> 5128 template <class Machine>
5129 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { 5129 Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) {
5130 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 5130 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
5131 } 5131 }
5132 5132
5133 // For the cmp instruction, if Src1 is an immediate, or known to be a 5133 /// For the cmp instruction, if Src1 is an immediate, or known to be a
5134 // physical register, we can allow Src0 to be a memory operand. 5134 /// physical register, we can allow Src0 to be a memory operand.
5135 // Otherwise, Src0 must be copied into a physical register. 5135 /// Otherwise, Src0 must be copied into a physical register.
5136 // (Actually, either Src0 or Src1 can be chosen for the physical 5136 /// (Actually, either Src0 or Src1 can be chosen for the physical
5137 // register, but unfortunately we have to commit to one or the other 5137 /// register, but unfortunately we have to commit to one or the other
5138 // before register allocation.) 5138 /// before register allocation.)
5139 template <class Machine> 5139 template <class Machine>
5140 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, 5140 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
5141 Operand *Src1) { 5141 Operand *Src1) {
5142 bool IsSrc1ImmOrReg = false; 5142 bool IsSrc1ImmOrReg = false;
5143 if (llvm::isa<Constant>(Src1)) { 5143 if (llvm::isa<Constant>(Src1)) {
5144 IsSrc1ImmOrReg = true; 5144 IsSrc1ImmOrReg = true;
5145 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 5145 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
5146 if (Var->hasReg()) 5146 if (Var->hasReg())
5147 IsSrc1ImmOrReg = true; 5147 IsSrc1ImmOrReg = true;
5148 } 5148 }
(...skipping 53 matching lines...) Expand 10 before | Expand all | Expand 10 after
5202 inferTwoAddress(); 5202 inferTwoAddress();
5203 } 5203 }
5204 5204
5205 template <class Machine> 5205 template <class Machine>
5206 void TargetX86Base<Machine>::makeRandomRegisterPermutation( 5206 void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5207 llvm::SmallVectorImpl<int32_t> &Permutation, 5207 llvm::SmallVectorImpl<int32_t> &Permutation,
5208 const llvm::SmallBitVector &ExcludeRegisters) const { 5208 const llvm::SmallBitVector &ExcludeRegisters) const {
5209 // TODO(stichnot): Declaring Permutation this way loses type/size 5209 // TODO(stichnot): Declaring Permutation this way loses type/size
5210 // information. Fix this in conjunction with the caller-side TODO. 5210 // information. Fix this in conjunction with the caller-side TODO.
5211 assert(Permutation.size() >= RegX8632::Reg_NUM); 5211 assert(Permutation.size() >= RegX8632::Reg_NUM);
5212 // Expected upper bound on the number of registers in a single 5212 /// Expected upper bound on the number of registers in a single
5213 // equivalence class. For x86-32, this would comprise the 8 XMM 5213 /// equivalence class. For x86-32, this would comprise the 8 XMM
5214 // registers. This is for performance, not correctness. 5214 /// registers. This is for performance, not correctness.
Karl 2015/07/06 18:08:49 Why ? (inside method).
ascull 2015/07/06 19:29:09 Done.
5215 static const unsigned MaxEquivalenceClassSize = 8; 5215 static const unsigned MaxEquivalenceClassSize = 8;
5216 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; 5216 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
5217 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap; 5217 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
5218 EquivalenceClassMap EquivalenceClasses; 5218 EquivalenceClassMap EquivalenceClasses;
5219 SizeT NumShuffled = 0, NumPreserved = 0; 5219 SizeT NumShuffled = 0, NumPreserved = 0;
5220 5220
5221 // Build up the equivalence classes of registers by looking at the 5221 // Build up the equivalence classes of registers by looking at the
5222 // register properties as well as whether the registers should be 5222 // register properties as well as whether the registers should be
5223 // explicitly excluded from shuffling. 5223 // explicitly excluded from shuffling.
5224 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 5224 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
5297 return; 5297 return;
5298 Ostream &Str = Ctx->getStrEmit(); 5298 Ostream &Str = Ctx->getStrEmit();
5299 C->emitPoolLabel(Str); 5299 C->emitPoolLabel(Str);
5300 } 5300 }
5301 5301
5302 template <class Machine> 5302 template <class Machine>
5303 void TargetX86Base<Machine>::emit(const ConstantUndef *) const { 5303 void TargetX86Base<Machine>::emit(const ConstantUndef *) const {
5304 llvm::report_fatal_error("undef value encountered by emitter."); 5304 llvm::report_fatal_error("undef value encountered by emitter.");
5305 } 5305 }
5306 5306
5307 // Randomize or pool an Immediate. 5307 /// Randomize or pool an Immediate.
5308 template <class Machine> 5308 template <class Machine>
5309 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, 5309 Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,
5310 int32_t RegNum) { 5310 int32_t RegNum) {
5311 assert(llvm::isa<ConstantInteger32>(Immediate) || 5311 assert(llvm::isa<ConstantInteger32>(Immediate) ||
5312 llvm::isa<ConstantRelocatable>(Immediate)); 5312 llvm::isa<ConstantRelocatable>(Immediate));
5313 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5313 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5314 RandomizationPoolingPaused == true) { 5314 RandomizationPoolingPaused == true) {
5315 // Immediates randomization/pooling off or paused 5315 // Immediates randomization/pooling off or paused
5316 return Immediate; 5316 return Immediate;
5317 } 5317 }
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
5496 } 5496 }
5497 // the offset is not eligible for blinding or pooling, return the original 5497 // the offset is not eligible for blinding or pooling, return the original
5498 // mem operand 5498 // mem operand
5499 return MemOperand; 5499 return MemOperand;
5500 } 5500 }
5501 5501
5502 } // end of namespace X86Internal 5502 } // end of namespace X86Internal
5503 } // end of namespace Ice 5503 } // end of namespace Ice
5504 5504
5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5505 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698