src/IceTargetLoweringX86BaseImpl.h - Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation.

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX86Base class, which

11 // consists almost entirely of the lowering sequence for each	11 // consists almost entirely of the lowering sequence for each

12 // high-level instruction.	12 // high-level instruction.

13 //	13 //

14 //===----------------------------------------------------------------------===//	14 //===----------------------------------------------------------------------===//

15	15

	16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

	17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

	18

16 #include "llvm/Support/MathExtras.h"	19 #include "llvm/Support/MathExtras.h"

17	20

18 #include "IceCfg.h"	21 #include "IceCfg.h"

19 #include "IceCfgNode.h"	22 #include "IceCfgNode.h"

20 #include "IceClFlags.h"	23 #include "IceClFlags.h"

21 #include "IceDefs.h"	24 #include "IceDefs.h"

22 #include "IceELFObjectWriter.h"	25 #include "IceELFObjectWriter.h"

23 #include "IceGlobalInits.h"	26 #include "IceGlobalInits.h"

24 #include "IceInstX8632.h"	27 #include "IceInstX8632.h"

25 #include "IceLiveness.h"	28 #include "IceLiveness.h"

26 #include "IceOperand.h"	29 #include "IceOperand.h"

27 #include "IceRegistersX8632.h"	30 #include "IceRegistersX8632.h"

28 #include "IceTargetLoweringX8632.def"	31 #include "IceTargetLoweringX8632.def"

29 #include "IceTargetLoweringX8632.h"	32 #include "IceTargetLoweringX8632.h"

30 #include "IceUtils.h"	33 #include "IceUtils.h"

31	34

32 namespace Ice {	35 namespace Ice {

33	36 namespace X86Internal {

34 namespace {

35

36 // The following table summarizes the logic for lowering the fcmp

37 // instruction. There is one table entry for each of the 16 conditions.

38 //

39 // The first four columns describe the case when the operands are

40 // floating point scalar values. A comment in lowerFcmp() describes the

41 // lowering template. In the most general case, there is a compare

42 // followed by two conditional branches, because some fcmp conditions

43 // don't map to a single x86 conditional branch. However, in many cases

44 // it is possible to swap the operands in the comparison and have a

45 // single conditional branch. Since it's quite tedious to validate the

46 // table by hand, good execution tests are helpful.

47 //

48 // The last two columns describe the case when the operands are vectors

49 // of floating point values. For most fcmp conditions, there is a clear

50 // mapping to a single x86 cmpps instruction variant. Some fcmp

51 // conditions require special code to handle and these are marked in the

52 // table with a Cmpps_Invalid predicate.

53 const struct TableFcmp_ {

54 uint32_t Default;

55 bool SwapScalarOperands;

56 CondX86::BrCond C1, C2;

57 bool SwapVectorOperands;

58 CondX86::CmppsCond Predicate;

59 } TableFcmp[] = {

60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \

62 ,

63 FCMPX8632_TABLE

64 #undef X

65 };

66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

67

68 // The following table summarizes the logic for lowering the icmp instruction

69 // for i32 and narrower types. Each icmp condition has a clear mapping to an

70 // x86 conditional branch instruction.

71

72 const struct TableIcmp32_ {

73 CondX86::BrCond Mapping;

74 } TableIcmp32[] = {

75 #define X(val, C_32, C1_64, C2_64, C3_64) \

76 { CondX86::C_32 } \

77 ,

78 ICMPX8632_TABLE

79 #undef X

80 };

81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);

82

83 // The following table summarizes the logic for lowering the icmp instruction

84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and

85 // conditional branches are needed. For the other conditions, three separate

86 // conditional branches are needed.

87 const struct TableIcmp64_ {

88 CondX86::BrCond C1, C2, C3;

89 } TableIcmp64[] = {

90 #define X(val, C_32, C1_64, C2_64, C3_64) \

91 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \

92 ,

93 ICMPX8632_TABLE

94 #undef X

95 };

96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);

97

98 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

99 size_t Index = static_cast<size_t>(Cond);

100 assert(Index < TableIcmp32Size);

101 return TableIcmp32[Index].Mapping;

102 }

103

104 const struct TableTypeX8632Attributes_ {

105 Type InVectorElementType;

106 } TableTypeX8632Attributes[] = {

107 #define X(tag, elementty, cvt, sdss, pack, width, fld) \

108 { elementty } \

109 ,

110 ICETYPEX8632_TABLE

111 #undef X

112 };

113 const size_t TableTypeX8632AttributesSize =

114 llvm::array_lengthof(TableTypeX8632Attributes);

115

116 // Return the type which the elements of the vector have in the X86

117 // representation of the vector.

118 Type getInVectorElementType(Type Ty) {

119 assert(isVectorType(Ty));

120 size_t Index = static_cast<size_t>(Ty);

121 (void)Index;

122 assert(Index < TableTypeX8632AttributesSize);

123 return TableTypeX8632Attributes[Ty].InVectorElementType;

124 }

125

126 // The maximum number of arguments to pass in XMM registers

127 const uint32_t X86_MAX_XMM_ARGS = 4;

128 // The number of bits in a byte

129 const uint32_t X86_CHAR_BIT = 8;

130 // Stack alignment

131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;

132 // Size of the return address on the stack

133 const uint32_t X86_RET_IP_SIZE_BYTES = 4;

134 // The number of different NOP instructions

135 const uint32_t X86_NUM_NOP_VARIANTS = 5;

136

137 // Value is in bytes. Return Value adjusted to the next highest multiple

138 // of the stack alignment.

139 uint32_t applyStackAlignment(uint32_t Value) {

140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

141 }

142

143 // In some cases, there are x-macros tables for both high-level and

144 // low-level instructions/operands that use the same enum key value.

145 // The tables are kept separate to maintain a proper separation

146 // between abstraction layers. There is a risk that the tables could

147 // get out of sync if enum values are reordered or if entries are

148 // added or deleted. The following dummy namespaces use

149 // static_asserts to ensure everything is kept in sync.

150

151 // Validate the enum values in FCMPX8632_TABLE.

152 namespace dummy1 {

153 // Define a temporary set of enum values based on low-level table

154 // entries.

155 enum _tmp_enum {

156 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,

157 FCMPX8632_TABLE

158 #undef X

159 _num

160 };

161 // Define a set of constants based on high-level table entries.

162 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

163 ICEINSTFCMP_TABLE

164 #undef X

165 // Define a set of constants based on low-level table entries, and

166 // ensure the table entry keys are consistent.

167 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

168 static const int _table2_##val = _tmp_##val; \

169 static_assert( \

170 _table1_##val == _table2_##val, \

171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

172 FCMPX8632_TABLE

173 #undef X

174 // Repeat the static asserts with respect to the high-level table

175 // entries in case the high-level table has extra entries.

176 #define X(tag, str) \

177 static_assert( \

178 _table1_##tag == _table2_##tag, \

179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

180 ICEINSTFCMP_TABLE

181 #undef X

182 } // end of namespace dummy1

183

184 // Validate the enum values in ICMPX8632_TABLE.

185 namespace dummy2 {

186 // Define a temporary set of enum values based on low-level table

187 // entries.

188 enum _tmp_enum {

189 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

190 ICMPX8632_TABLE

191 #undef X

192 _num

193 };

194 // Define a set of constants based on high-level table entries.

195 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

196 ICEINSTICMP_TABLE

197 #undef X

198 // Define a set of constants based on low-level table entries, and

199 // ensure the table entry keys are consistent.

200 #define X(val, C_32, C1_64, C2_64, C3_64) \

201 static const int _table2_##val = _tmp_##val; \

202 static_assert( \

203 _table1_##val == _table2_##val, \

204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

205 ICMPX8632_TABLE

206 #undef X

207 // Repeat the static asserts with respect to the high-level table

208 // entries in case the high-level table has extra entries.

209 #define X(tag, str) \

210 static_assert( \

211 _table1_##tag == _table2_##tag, \

212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

213 ICEINSTICMP_TABLE

214 #undef X

215 } // end of namespace dummy2

216

217 // Validate the enum values in ICETYPEX8632_TABLE.

218 namespace dummy3 {

219 // Define a temporary set of enum values based on low-level table

220 // entries.

221 enum _tmp_enum {

222 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,

223 ICETYPEX8632_TABLE

224 #undef X

225 _num

226 };

227 // Define a set of constants based on high-level table entries.

228 #define X(tag, size, align, elts, elty, str) \

229 static const int _table1_##tag = tag;

230 ICETYPE_TABLE

231 #undef X

232 // Define a set of constants based on low-level table entries, and

233 // ensure the table entry keys are consistent.

234 #define X(tag, elementty, cvt, sdss, pack, width, fld) \

235 static const int _table2_##tag = _tmp_##tag; \

236 static_assert(_table1_##tag == _table2_##tag, \

237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

238 ICETYPEX8632_TABLE

239 #undef X

240 // Repeat the static asserts with respect to the high-level table

241 // entries in case the high-level table has extra entries.

242 #define X(tag, size, align, elts, elty, str) \

243 static_assert(_table1_##tag == _table2_##tag, \

244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

245 ICETYPE_TABLE

246 #undef X

247 } // end of namespace dummy3

248	37

249 // A helper class to ease the settings of RandomizationPoolingPause	38 // A helper class to ease the settings of RandomizationPoolingPause

250 // to disable constant blinding or pooling for some translation phases.	39 // to disable constant blinding or pooling for some translation phases.

251 class BoolFlagSaver {	40 class BoolFlagSaver {

252 BoolFlagSaver() = delete;	41 BoolFlagSaver() = delete;

253 BoolFlagSaver(const BoolFlagSaver &) = delete;	42 BoolFlagSaver(const BoolFlagSaver &) = delete;

254 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;	43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;

255	44

256 public:	45 public:

257 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }	46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }

258 ~BoolFlagSaver() { Flag = OldValue; }	47 ~BoolFlagSaver() { Flag = OldValue; }

259	48

260 private:	49 private:

261 const bool OldValue;	50 const bool OldValue;

262 bool &Flag;	51 bool &Flag;

263 };	52 };

264	53

265 } // end of anonymous namespace	54 template <class MachineTraits> class BoolFoldingEntry {

	55 BoolFoldingEntry(const BoolFoldingEntry &) = delete;

266	56

267 BoolFoldingEntry::BoolFoldingEntry(Inst *I)	57 public:

268 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {}	58 BoolFoldingEntry() = default;

	59 explicit BoolFoldingEntry(Inst *I);

	60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;

	61 // Instr is the instruction producing the i1-type variable of interest.

	62 Inst *Instr = nullptr;

	63 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).

	64 bool IsComplex = false;

	65 // IsLiveOut is initialized conservatively to true, and is set to false when

	66 // we encounter an instruction that ends Var's live range. We disable the

	67 // folding optimization when Var is live beyond this basic block. Note that

	68 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will

	69 // always be true and the folding optimization will never be performed.

	70 bool IsLiveOut = true;

	71 // NumUses counts the number of times Var is used as a source operand in the

	72 // basic block. If IsComplex is true and there is more than one use of Var,

	73 // then the folding optimization is disabled for Var.

	74 uint32_t NumUses = 0;

	75 };

269	76

270 BoolFolding::BoolFoldingProducerKind	77 template <class MachineTraits> class BoolFolding {

271 BoolFolding::getProducerKind(const Inst *Instr) {	78 public:

	79 enum BoolFoldingProducerKind {

	80 PK_None,

	81 PK_Icmp32,

	82 PK_Icmp64,

	83 PK_Fcmp,

	84 PK_Trunc

	85 };

	86

	87 // Currently the actual enum values are not used (other than CK_None), but we

	88 // go

	89 // ahead and produce them anyway for symmetry with the

	90 // BoolFoldingProducerKind.

	91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };

	92

	93 private:

	94 BoolFolding(const BoolFolding &) = delete;

	95 BoolFolding &operator=(const BoolFolding &) = delete;

	96

	97 public:

	98 BoolFolding() = default;

	99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);

	100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);

	101 static bool hasComplexLowering(const Inst *Instr);

	102 void init(CfgNode *Node);

	103 const Inst getProducerFor(const Operand Opnd) const;

	104 void dump(const Cfg *Func) const;

	105

	106 private:

	107 // Returns true if Producers contains a valid entry for the given VarNum.

	108 bool containsValid(SizeT VarNum) const {

	109 auto Element = Producers.find(VarNum);

	110 return Element != Producers.end() && Element->second.Instr != nullptr;

	111 }

	112 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }

	113 // Producers maps Variable::Number to a BoolFoldingEntry.

	114 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;

	115 };

	116

	117 template <class MachineTraits>

	118 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)

	119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}

	120

	121 template <class MachineTraits>

	122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind

	123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {

272 if (llvm::isa<InstIcmp>(Instr)) {	124 if (llvm::isa<InstIcmp>(Instr)) {

273 if (Instr->getSrc(0)->getType() != IceType_i64)	125 if (Instr->getSrc(0)->getType() != IceType_i64)

274 return PK_Icmp32;	126 return PK_Icmp32;

275 return PK_None; // TODO(stichnot): actually PK_Icmp64;	127 return PK_None; // TODO(stichnot): actually PK_Icmp64;

276 }	128 }

277 return PK_None; // TODO(stichnot): remove this	129 return PK_None; // TODO(stichnot): remove this

278	130

279 if (llvm::isa<InstFcmp>(Instr))	131 if (llvm::isa<InstFcmp>(Instr))

280 return PK_Fcmp;	132 return PK_Fcmp;

281 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {	133 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {

282 switch (Cast->getCastKind()) {	134 switch (Cast->getCastKind()) {

283 default:	135 default:

284 return PK_None;	136 return PK_None;

285 case InstCast::Trunc:	137 case InstCast::Trunc:

286 return PK_Trunc;	138 return PK_Trunc;

287 }	139 }

288 }	140 }

289 return PK_None;	141 return PK_None;

290 }	142 }

291	143

292 BoolFolding::BoolFoldingConsumerKind	144 template <class MachineTraits>

293 BoolFolding::getConsumerKind(const Inst *Instr) {	145 typename BoolFolding<MachineTraits>::BoolFoldingConsumerKind

	146 BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {

294 if (llvm::isa<InstBr>(Instr))	147 if (llvm::isa<InstBr>(Instr))

295 return CK_Br;	148 return CK_Br;

296 if (llvm::isa<InstSelect>(Instr))	149 if (llvm::isa<InstSelect>(Instr))

297 return CK_Select;	150 return CK_Select;

298 return CK_None; // TODO(stichnot): remove this	151 return CK_None; // TODO(stichnot): remove this

299	152

300 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {	153 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {

301 switch (Cast->getCastKind()) {	154 switch (Cast->getCastKind()) {

302 default:	155 default:

303 return CK_None;	156 return CK_None;

304 case InstCast::Sext:	157 case InstCast::Sext:

305 return CK_Sext;	158 return CK_Sext;

306 case InstCast::Zext:	159 case InstCast::Zext:

307 return CK_Zext;	160 return CK_Zext;

308 }	161 }

309 }	162 }

310 return CK_None;	163 return CK_None;

311 }	164 }

312	165

313 // Returns true if the producing instruction has a "complex" lowering	166 // Returns true if the producing instruction has a "complex" lowering

314 // sequence. This generally means that its lowering sequence requires	167 // sequence. This generally means that its lowering sequence requires

315 // more than one conditional branch, namely 64-bit integer compares	168 // more than one conditional branch, namely 64-bit integer compares

316 // and some floating-point compares. When this is true, and there is	169 // and some floating-point compares. When this is true, and there is

317 // more than one consumer, we prefer to disable the folding	170 // more than one consumer, we prefer to disable the folding

318 // optimization because it minimizes branches.	171 // optimization because it minimizes branches.

319 bool BoolFolding::hasComplexLowering(const Inst *Instr) {	172 template <class MachineTraits>

	173 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {

320 switch (getProducerKind(Instr)) {	174 switch (getProducerKind(Instr)) {

321 default:	175 default:

322 return false;	176 return false;

323 case PK_Icmp64:	177 case PK_Icmp64:

324 return true;	178 return true;

325 case PK_Fcmp:	179 case PK_Fcmp:

326 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=	180 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]

327 CondX86::Br_None;	181 .C2 != CondX86::Br_None;

328 }	182 }

329 }	183 }

330	184

331 void BoolFolding::init(CfgNode *Node) {	185 template <class MachineTraits>

	186 void BoolFolding<MachineTraits>::init(CfgNode *Node) {

332 Producers.clear();	187 Producers.clear();

333 for (Inst &Instr : Node->getInsts()) {	188 for (Inst &Instr : Node->getInsts()) {

334 // Check whether Instr is a valid producer.	189 // Check whether Instr is a valid producer.

335 Variable *Var = Instr.getDest();	190 Variable *Var = Instr.getDest();

336 if (!Instr.isDeleted() // only consider non-deleted instructions	191 if (!Instr.isDeleted() // only consider non-deleted instructions

337 && Var // only instructions with an actual dest var	192 && Var // only instructions with an actual dest var

338 && Var->getType() == IceType_i1 // only bool-type dest vars	193 && Var->getType() == IceType_i1 // only bool-type dest vars

339 && getProducerKind(&Instr) != PK_None) { // white-listed instructions	194 && getProducerKind(&Instr) != PK_None) { // white-listed instructions

340 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);	195 Producers[Var->getIndex()] = BoolFoldingEntry<MachineTraits>(&Instr);

341 }	196 }

342 // Check each src variable against the map.	197 // Check each src variable against the map.

343 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {	198 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {

344 Operand *Src = Instr.getSrc(I);	199 Operand *Src = Instr.getSrc(I);

345 SizeT NumVars = Src->getNumVars();	200 SizeT NumVars = Src->getNumVars();

346 for (SizeT J = 0; J < NumVars; ++J) {	201 for (SizeT J = 0; J < NumVars; ++J) {

347 const Variable *Var = Src->getVar(J);	202 const Variable *Var = Src->getVar(J);

348 SizeT VarNum = Var->getIndex();	203 SizeT VarNum = Var->getIndex();

349 if (containsValid(VarNum)) {	204 if (containsValid(VarNum)) {

350 if (I != 0 // All valid consumers use Var as the first source operand	205 if (I != 0 // All valid consumers use Var as the first source operand

(...skipping 21 matching lines...) Expand all Loading...
372 continue;	227 continue;

373 }	228 }

374 // Mark as "dead" rather than outright deleting. This is so that	229 // Mark as "dead" rather than outright deleting. This is so that

375 // other peephole style optimizations during or before lowering	230 // other peephole style optimizations during or before lowering

376 // have access to this instruction in undeleted form. See for	231 // have access to this instruction in undeleted form. See for

377 // example tryOptimizedCmpxchgCmpBr().	232 // example tryOptimizedCmpxchgCmpBr().

378 I.second.Instr->setDead();	233 I.second.Instr->setDead();

379 }	234 }

380 }	235 }

381	236

382 const Inst BoolFolding::getProducerFor(const Operand Opnd) const {	237 template <class MachineTraits>

	238 const Inst *

	239 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {

383 auto *Var = llvm::dyn_cast<const Variable>(Opnd);	240 auto *Var = llvm::dyn_cast<const Variable>(Opnd);

384 if (Var == nullptr)	241 if (Var == nullptr)

385 return nullptr;	242 return nullptr;

386 SizeT VarNum = Var->getIndex();	243 SizeT VarNum = Var->getIndex();

387 auto Element = Producers.find(VarNum);	244 auto Element = Producers.find(VarNum);

388 if (Element == Producers.end())	245 if (Element == Producers.end())

389 return nullptr;	246 return nullptr;

390 return Element->second.Instr;	247 return Element->second.Instr;

391 }	248 }

392	249

393 void BoolFolding::dump(const Cfg *Func) const {	250 template <class MachineTraits>

	251 void BoolFolding<MachineTraits>::dump(const Cfg *Func) const {

394 if (!ALLOW_DUMP \|\| !Func->isVerbose(IceV_Folding))	252 if (!ALLOW_DUMP \|\| !Func->isVerbose(IceV_Folding))

395 return;	253 return;

396 OstreamLocker L(Func->getContext());	254 OstreamLocker L(Func->getContext());

397 Ostream &Str = Func->getContext()->getStrDump();	255 Ostream &Str = Func->getContext()->getStrDump();

398 for (auto &I : Producers) {	256 for (auto &I : Producers) {

399 if (I.second.Instr == nullptr)	257 if (I.second.Instr == nullptr)

400 continue;	258 continue;

401 Str << "Found foldable producer:\n ";	259 Str << "Found foldable producer:\n ";

402 I.second.Instr->dump(Func);	260 I.second.Instr->dump(Func);

403 Str << "\n";	261 Str << "\n";

404 }	262 }

405 }	263 }

406	264

407 void TargetX8632::initNodeForLowering(CfgNode *Node) {	265 template <class Machine>

	266 void TargetX86Base<Machine>::initNodeForLowering(CfgNode *Node) {

408 FoldingInfo.init(Node);	267 FoldingInfo.init(Node);

409 FoldingInfo.dump(Func);	268 FoldingInfo.dump(Func);

410 }	269 }

411	270

412 TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) {	271 template <class Machine>

413 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==	272 TargetX86Base<Machine>::TargetX86Base(Cfg *Func)

414 (TargetInstructionSet::X86InstructionSet_End -	273 : Machine(Func) {

415 TargetInstructionSet::X86InstructionSet_Begin),	274 static_assert(

416 "X86InstructionSet range different from TargetInstructionSet");	275 (Traits::InstructionSet::End - Traits::InstructionSet::Begin) ==

	276 (TargetInstructionSet::X86InstructionSet_End -

	277 TargetInstructionSet::X86InstructionSet_Begin),

	278 "Traits::InstructionSet range different from TargetInstructionSet");

417 if (Func->getContext()->getFlags().getTargetInstructionSet() !=	279 if (Func->getContext()->getFlags().getTargetInstructionSet() !=

418 TargetInstructionSet::BaseInstructionSet) {	280 TargetInstructionSet::BaseInstructionSet) {

419 InstructionSet = static_cast<X86InstructionSet>(	281 InstructionSet = static_cast<typename Traits::InstructionSet>(

420 (Func->getContext()->getFlags().getTargetInstructionSet() -	282 (Func->getContext()->getFlags().getTargetInstructionSet() -

421 TargetInstructionSet::X86InstructionSet_Begin) +	283 TargetInstructionSet::X86InstructionSet_Begin) +

422 X86InstructionSet::Begin);	284 Traits::InstructionSet::Begin);

423 }	285 }

424 // TODO: Don't initialize IntegerRegisters and friends every time.	286 // TODO: Don't initialize IntegerRegisters and friends every time.

425 // Instead, initialize in some sort of static initializer for the	287 // Instead, initialize in some sort of static initializer for the

426 // class.	288 // class.

427 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);	289 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);

428 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);	290 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);

429 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);	291 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);

430 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);	292 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);

431 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);	293 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);

432 ScratchRegs.resize(RegX8632::Reg_NUM);	294 ScratchRegs.resize(RegX8632::Reg_NUM);

(...skipping 16 matching lines...) Expand all Loading...
449 TypeToRegisterSet[IceType_f64] = FloatRegisters;	311 TypeToRegisterSet[IceType_f64] = FloatRegisters;

450 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;	312 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;

451 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;	313 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;

452 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;	314 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;

453 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;	315 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;

454 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;	316 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;

455 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;	317 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;

456 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;	318 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;

457 }	319 }

458	320

459 void TargetX8632::translateO2() {	321 template <class Machine> void TargetX86Base<Machine>::translateO2() {

460 TimerMarker T(TimerStack::TT_O2, Func);	322 TimerMarker T(TimerStack::TT_O2, Func);

461	323

462 if (!Ctx->getFlags().getPhiEdgeSplit()) {	324 if (!Ctx->getFlags().getPhiEdgeSplit()) {

463 // Lower Phi instructions.	325 // Lower Phi instructions.

464 Func->placePhiLoads();	326 Func->placePhiLoads();

465 if (Func->hasError())	327 if (Func->hasError())

466 return;	328 return;

467 Func->placePhiStores();	329 Func->placePhiStores();

468 if (Func->hasError())	330 if (Func->hasError())

469 return;	331 return;

(...skipping 91 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
561 // needed for searching for opportunities.	423 // needed for searching for opportunities.

562 Func->doBranchOpt();	424 Func->doBranchOpt();

563 Func->dump("After branch optimization");	425 Func->dump("After branch optimization");

564	426

565 // Nop insertion	427 // Nop insertion

566 if (Ctx->getFlags().shouldDoNopInsertion()) {	428 if (Ctx->getFlags().shouldDoNopInsertion()) {

567 Func->doNopInsertion();	429 Func->doNopInsertion();

568 }	430 }

569 }	431 }

570	432

571 void TargetX8632::translateOm1() {	433 template <class Machine> void TargetX86Base<Machine>::translateOm1() {

572 TimerMarker T(TimerStack::TT_Om1, Func);	434 TimerMarker T(TimerStack::TT_Om1, Func);

573	435

574 Func->placePhiLoads();	436 Func->placePhiLoads();

575 if (Func->hasError())	437 if (Func->hasError())

576 return;	438 return;

577 Func->placePhiStores();	439 Func->placePhiStores();

578 if (Func->hasError())	440 if (Func->hasError())

579 return;	441 return;

580 Func->deletePhis();	442 Func->deletePhis();

581 if (Func->hasError())	443 if (Func->hasError())

(...skipping 16 matching lines...) Expand all Loading...
598 if (Func->hasError())	460 if (Func->hasError())

599 return;	461 return;

600 Func->dump("After stack frame mapping");	462 Func->dump("After stack frame mapping");

601	463

602 // Nop insertion	464 // Nop insertion

603 if (Ctx->getFlags().shouldDoNopInsertion()) {	465 if (Ctx->getFlags().shouldDoNopInsertion()) {

604 Func->doNopInsertion();	466 Func->doNopInsertion();

605 }	467 }

606 }	468 }

607	469

608 namespace {

609

610 bool canRMW(const InstArithmetic *Arith) {	470 bool canRMW(const InstArithmetic *Arith) {

611 Type Ty = Arith->getDest()->getType();	471 Type Ty = Arith->getDest()->getType();

612 // X86 vector instructions write to a register and have no RMW	472 // X86 vector instructions write to a register and have no RMW

613 // option.	473 // option.

614 if (isVectorType(Ty))	474 if (isVectorType(Ty))

615 return false;	475 return false;

616 bool isI64 = Ty == IceType_i64;	476 bool isI64 = Ty == IceType_i64;

617	477

618 switch (Arith->getOp()) {	478 switch (Arith->getOp()) {

619 // Not handled for lack of simple lowering:	479 // Not handled for lack of simple lowering:

(...skipping 25 matching lines...) Expand all Loading...
645 return MemA->getBase() == MemB->getBase() &&	505 return MemA->getBase() == MemB->getBase() &&

646 MemA->getOffset() == MemB->getOffset() &&	506 MemA->getOffset() == MemB->getOffset() &&

647 MemA->getIndex() == MemB->getIndex() &&	507 MemA->getIndex() == MemB->getIndex() &&

648 MemA->getShift() == MemB->getShift() &&	508 MemA->getShift() == MemB->getShift() &&

649 MemA->getSegmentRegister() == MemB->getSegmentRegister();	509 MemA->getSegmentRegister() == MemB->getSegmentRegister();

650 }	510 }

651 }	511 }

652 return false;	512 return false;

653 }	513 }

654	514

655 } // end of anonymous namespace	515 template <class Machine> void TargetX86Base<Machine>::findRMW() {

656

657 void TargetX8632::findRMW() {

658 Func->dump("Before RMW");	516 Func->dump("Before RMW");

659 OstreamLocker L(Func->getContext());	517 OstreamLocker L(Func->getContext());

660 Ostream &Str = Func->getContext()->getStrDump();	518 Ostream &Str = Func->getContext()->getStrDump();

661 for (CfgNode *Node : Func->getNodes()) {	519 for (CfgNode *Node : Func->getNodes()) {

662 // Walk through the instructions, considering each sequence of 3	520 // Walk through the instructions, considering each sequence of 3

663 // instructions, and look for the particular RMW pattern. Note that this	521 // instructions, and look for the particular RMW pattern. Note that this

664 // search can be "broken" (false negatives) if there are intervening deleted	522 // search can be "broken" (false negatives) if there are intervening deleted

665 // instructions, or intervening instructions that could be safely moved out	523 // instructions, or intervening instructions that could be safely moved out

666 // of the way to reveal an RMW pattern.	524 // of the way to reveal an RMW pattern.

667 auto E = Node->getInsts().end();	525 auto E = Node->getInsts().end();

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
739 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(	597 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(

740 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());	598 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());

741 Node->getInsts().insert(I3, RMW);	599 Node->getInsts().insert(I3, RMW);

742 }	600 }

743 }	601 }

744 }	602 }

745 }	603 }

746 }	604 }

747 }	605 }

748	606

749 namespace {

750

751 // Converts a ConstantInteger32 operand into its constant value, or	607 // Converts a ConstantInteger32 operand into its constant value, or

752 // MemoryOrderInvalid if the operand is not a ConstantInteger32.	608 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

753 uint64_t getConstantMemoryOrder(Operand *Opnd) {	609 uint64_t getConstantMemoryOrder(Operand *Opnd) {

754 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))	610 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

755 return Integer->getValue();	611 return Integer->getValue();

756 return Intrinsics::MemoryOrderInvalid;	612 return Intrinsics::MemoryOrderInvalid;

757 }	613 }

758	614

759 // Determines whether the dest of a Load instruction can be folded	615 // Determines whether the dest of a Load instruction can be folded

760 // into one of the src operands of a 2-operand instruction. This is	616 // into one of the src operands of a 2-operand instruction. This is

761 // true as long as the load dest matches exactly one of the binary	617 // true as long as the load dest matches exactly one of the binary

762 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if	618 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if

763 // the answer is true.	619 // the answer is true.

764 bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,	620 bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,

765 Operand &Src0, Operand &Src1) {	621 Operand &Src0, Operand &Src1) {

766 if (Src0 == LoadDest && Src1 != LoadDest) {	622 if (Src0 == LoadDest && Src1 != LoadDest) {

767 Src0 = LoadSrc;	623 Src0 = LoadSrc;

768 return true;	624 return true;

769 }	625 }

770 if (Src0 != LoadDest && Src1 == LoadDest) {	626 if (Src0 != LoadDest && Src1 == LoadDest) {

771 Src1 = LoadSrc;	627 Src1 = LoadSrc;

772 return true;	628 return true;

773 }	629 }

774 return false;	630 return false;

775 }	631 }

776	632

777 } // end of anonymous namespace	633 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

778

779 void TargetX8632::doLoadOpt() {

780 for (CfgNode *Node : Func->getNodes()) {	634 for (CfgNode *Node : Func->getNodes()) {

781 Context.init(Node);	635 Context.init(Node);

782 while (!Context.atEnd()) {	636 while (!Context.atEnd()) {

783 Variable *LoadDest = nullptr;	637 Variable *LoadDest = nullptr;

784 Operand *LoadSrc = nullptr;	638 Operand *LoadSrc = nullptr;

785 Inst *CurInst = Context.getCur();	639 Inst *CurInst = Context.getCur();

786 Inst *Next = Context.getNextInst();	640 Inst *Next = Context.getNextInst();

787 // Determine whether the current instruction is a Load	641 // Determine whether the current instruction is a Load

788 // instruction or equivalent.	642 // instruction or equivalent.

789 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {	643 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
859 NewInst->spliceLivenessInfo(Next, CurInst);	713 NewInst->spliceLivenessInfo(Next, CurInst);

860 }	714 }

861 }	715 }

862 Context.advanceCur();	716 Context.advanceCur();

863 Context.advanceNext();	717 Context.advanceNext();

864 }	718 }

865 }	719 }

866 Func->dump("After load optimization");	720 Func->dump("After load optimization");

867 }	721 }

868	722

869 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {	723 template <class Machine>

	724 bool TargetX86Base<Machine>::doBranchOpt(Inst I, const CfgNode NextNode) {

870 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {	725 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {

871 return Br->optimizeBranch(NextNode);	726 return Br->optimizeBranch(NextNode);

872 }	727 }

873 return false;	728 return false;

874 }	729 }

875	730

876 IceString TargetX8632::RegNames[] = {	731 template <class Machine>

	732 IceString TargetX86Base<Machine>::RegNames[] = {

877 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \	733 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

878 frameptr, isI8, isInt, isFP) \	734 frameptr, isI8, isInt, isFP) \

879 name,	735 name,

880 REGX8632_TABLE	736 REGX8632_TABLE

881 #undef X	737 #undef X

882 };	738 };

883	739

884 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {	740 template <class Machine>

	741 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {

885 if (Ty == IceType_void)	742 if (Ty == IceType_void)

886 Ty = IceType_i32;	743 Ty = IceType_i32;

887 if (PhysicalRegisters[Ty].empty())	744 if (PhysicalRegisters[Ty].empty())

888 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);	745 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);

889 assert(RegNum < PhysicalRegisters[Ty].size());	746 assert(RegNum < PhysicalRegisters[Ty].size());

890 Variable *Reg = PhysicalRegisters[Ty][RegNum];	747 Variable *Reg = PhysicalRegisters[Ty][RegNum];

891 if (Reg == nullptr) {	748 if (Reg == nullptr) {

892 Reg = Func->makeVariable(Ty);	749 Reg = Func->makeVariable(Ty);

893 Reg->setRegNum(RegNum);	750 Reg->setRegNum(RegNum);

894 PhysicalRegisters[Ty][RegNum] = Reg;	751 PhysicalRegisters[Ty][RegNum] = Reg;

895 // Specially mark esp as an "argument" so that it is considered	752 // Specially mark esp as an "argument" so that it is considered

896 // live upon function entry.	753 // live upon function entry.

897 if (RegNum == RegX8632::Reg_esp) {	754 if (RegNum == RegX8632::Reg_esp) {

898 Func->addImplicitArg(Reg);	755 Func->addImplicitArg(Reg);

899 Reg->setIgnoreLiveness();	756 Reg->setIgnoreLiveness();

900 }	757 }

901 }	758 }

902 return Reg;	759 return Reg;

903 }	760 }

904	761

905 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {	762 template <class Machine>

	763 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {

906 assert(RegNum < RegX8632::Reg_NUM);	764 assert(RegNum < RegX8632::Reg_NUM);

907 static IceString RegNames8[] = {	765 static IceString RegNames8[] = {

908 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \	766 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

909 frameptr, isI8, isInt, isFP) \	767 frameptr, isI8, isInt, isFP) \

910 name8,	768 name8,

911 REGX8632_TABLE	769 REGX8632_TABLE

912 #undef X	770 #undef X

913 };	771 };

914 static IceString RegNames16[] = {	772 static IceString RegNames16[] = {

915 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \	773 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

916 frameptr, isI8, isInt, isFP) \	774 frameptr, isI8, isInt, isFP) \

917 name16,	775 name16,

918 REGX8632_TABLE	776 REGX8632_TABLE

919 #undef X	777 #undef X

920 };	778 };

921 switch (Ty) {	779 switch (Ty) {

922 case IceType_i1:	780 case IceType_i1:

923 case IceType_i8:	781 case IceType_i8:

924 return RegNames8[RegNum];	782 return RegNames8[RegNum];

925 case IceType_i16:	783 case IceType_i16:

926 return RegNames16[RegNum];	784 return RegNames16[RegNum];

927 default:	785 default:

928 return RegNames[RegNum];	786 return RegNames[RegNum];

929 }	787 }

930 }	788 }

931	789

932 void TargetX8632::emitVariable(const Variable *Var) const {	790 template <class Machine>

	791 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {

933 Ostream &Str = Ctx->getStrEmit();	792 Ostream &Str = Ctx->getStrEmit();

934 if (Var->hasReg()) {	793 if (Var->hasReg()) {

935 Str << "%" << getRegName(Var->getRegNum(), Var->getType());	794 Str << "%" << getRegName(Var->getRegNum(), Var->getType());

936 return;	795 return;

937 }	796 }

938 if (Var->getWeight().isInf()) {	797 if (Var->getWeight().isInf()) {

939 llvm_unreachable("Infinite-weight Variable has no register assigned");	798 llvm_unreachable("Infinite-weight Variable has no register assigned");

940 }	799 }

941 int32_t Offset = Var->getStackOffset();	800 int32_t Offset = Var->getStackOffset();

942 if (!hasFramePointer())	801 if (!hasFramePointer())

943 Offset += getStackAdjustment();	802 Offset += getStackAdjustment();

944 if (Offset)	803 if (Offset)

945 Str << Offset;	804 Str << Offset;

946 const Type FrameSPTy = IceType_i32;	805 const Type FrameSPTy = IceType_i32;

947 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";	806 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";

948 }	807 }

949	808

950 X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {	809 template <class Machine>

	810 X8632::Address

	811 TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {

951 if (Var->hasReg())	812 if (Var->hasReg())

952 llvm_unreachable("Stack Variable has a register assigned");	813 llvm_unreachable("Stack Variable has a register assigned");

953 if (Var->getWeight().isInf()) {	814 if (Var->getWeight().isInf()) {

954 llvm_unreachable("Infinite-weight Variable has no register assigned");	815 llvm_unreachable("Infinite-weight Variable has no register assigned");

955 }	816 }

956 int32_t Offset = Var->getStackOffset();	817 int32_t Offset = Var->getStackOffset();

957 if (!hasFramePointer())	818 if (!hasFramePointer())

958 Offset += getStackAdjustment();	819 Offset += getStackAdjustment();

959 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);	820 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);

960 }	821 }

961	822

962 void TargetX8632::lowerArguments() {	823 template <class Machine> void TargetX86Base<Machine>::lowerArguments() {

963 VarList &Args = Func->getArgs();	824 VarList &Args = Func->getArgs();

964 // The first four arguments of vector type, regardless of their	825 // The first four arguments of vector type, regardless of their

965 // position relative to the other arguments in the argument list, are	826 // position relative to the other arguments in the argument list, are

966 // passed in registers xmm0 - xmm3.	827 // passed in registers xmm0 - xmm3.

967 unsigned NumXmmArgs = 0;	828 unsigned NumXmmArgs = 0;

968	829

969 Context.init(Func->getEntryNode());	830 Context.init(Func->getEntryNode());

970 Context.setInsertPoint(Context.getCur());	831 Context.setInsertPoint(Context.getCur());

971	832

972 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;	833 for (SizeT I = 0, E = Args.size();

973 ++I) {	834 I < E && NumXmmArgs < Traits::X86_MAX_XMM_ARGS; ++I) {

974 Variable *Arg = Args[I];	835 Variable *Arg = Args[I];

975 Type Ty = Arg->getType();	836 Type Ty = Arg->getType();

976 if (!isVectorType(Ty))	837 if (!isVectorType(Ty))

977 continue;	838 continue;

978 // Replace Arg in the argument list with the home register. Then	839 // Replace Arg in the argument list with the home register. Then

979 // generate an instruction in the prolog to copy the home register	840 // generate an instruction in the prolog to copy the home register

980 // to the assigned location of Arg.	841 // to the assigned location of Arg.

981 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;	842 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;

982 ++NumXmmArgs;	843 ++NumXmmArgs;

983 Variable *RegisterArg = Func->makeVariable(Ty);	844 Variable *RegisterArg = Func->makeVariable(Ty);

(...skipping 10 matching lines...) Expand all Loading...
994	855

995 // Helper function for addProlog().	856 // Helper function for addProlog().

996 //	857 //

997 // This assumes Arg is an argument passed on the stack. This sets the	858 // This assumes Arg is an argument passed on the stack. This sets the

998 // frame offset for Arg and updates InArgsSizeBytes according to Arg's	859 // frame offset for Arg and updates InArgsSizeBytes according to Arg's

999 // width. For an I64 arg that has been split into Lo and Hi components,	860 // width. For an I64 arg that has been split into Lo and Hi components,

1000 // it calls itself recursively on the components, taking care to handle	861 // it calls itself recursively on the components, taking care to handle

1001 // Lo first because of the little-endian architecture. Lastly, this	862 // Lo first because of the little-endian architecture. Lastly, this

1002 // function generates an instruction to copy Arg into its assigned	863 // function generates an instruction to copy Arg into its assigned

1003 // register if applicable.	864 // register if applicable.

1004 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,	865 template <class Machine>

1005 size_t BasicFrameOffset,	866 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

1006 size_t &InArgsSizeBytes) {	867 Variable *FramePtr,

	868 size_t BasicFrameOffset,

	869 size_t &InArgsSizeBytes) {

1007 Variable *Lo = Arg->getLo();	870 Variable *Lo = Arg->getLo();

1008 Variable *Hi = Arg->getHi();	871 Variable *Hi = Arg->getHi();

1009 Type Ty = Arg->getType();	872 Type Ty = Arg->getType();

1010 if (Lo && Hi && Ty == IceType_i64) {	873 if (Lo && Hi && Ty == IceType_i64) {

1011 assert(Lo->getType() != IceType_i64); // don't want infinite recursion	874 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

1012 assert(Hi->getType() != IceType_i64); // don't want infinite recursion	875 assert(Hi->getType() != IceType_i64); // don't want infinite recursion

1013 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);	876 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

1014 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);	877 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

1015 return;	878 return;

1016 }	879 }

1017 if (isVectorType(Ty)) {	880 if (isVectorType(Ty)) {

1018 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);	881 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);

1019 }	882 }

1020 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);	883 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

1021 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);	884 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

1022 if (Arg->hasReg()) {	885 if (Arg->hasReg()) {

1023 assert(Ty != IceType_i64);	886 assert(Ty != IceType_i64);

1024 OperandX8632Mem *Mem = OperandX8632Mem::create(	887 OperandX8632Mem *Mem = OperandX8632Mem::create(

1025 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));	888 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));

1026 if (isVectorType(Arg->getType())) {	889 if (isVectorType(Arg->getType())) {

1027 _movp(Arg, Mem);	890 _movp(Arg, Mem);

1028 } else {	891 } else {

1029 _mov(Arg, Mem);	892 _mov(Arg, Mem);

1030 }	893 }

1031 // This argument-copying instruction uses an explicit	894 // This argument-copying instruction uses an explicit

1032 // OperandX8632Mem operand instead of a Variable, so its	895 // OperandX8632Mem operand instead of a Variable, so its

1033 // fill-from-stack operation has to be tracked separately for	896 // fill-from-stack operation has to be tracked separately for

1034 // statistics.	897 // statistics.

1035 Ctx->statsUpdateFills();	898 Ctx->statsUpdateFills();

1036 }	899 }

1037 }	900 }

1038	901

1039 Type TargetX8632::stackSlotType() { return IceType_i32; }	902 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {

	903 return IceType_i32;

	904 }

1040	905

1041 void TargetX8632::addProlog(CfgNode *Node) {	906 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {

1042 // Stack frame layout:	907 // Stack frame layout:

1043 //	908 //

1044 // +------------------------+	909 // +------------------------+

1045 // \| 1. return address \|	910 // \| 1. return address \|

1046 // +------------------------+	911 // +------------------------+

1047 // \| 2. preserved registers \|	912 // \| 2. preserved registers \|

1048 // +------------------------+	913 // +------------------------+

1049 // \| 3. padding \|	914 // \| 3. padding \|

1050 // +------------------------+	915 // +------------------------+

1051 // \| 4. global spill area \|	916 // \| 4. global spill area \|

(...skipping 88 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1140 _mov(ebp, esp);	1005 _mov(ebp, esp);

1141 // Keep ebp live for late-stage liveness analysis	1006 // Keep ebp live for late-stage liveness analysis

1142 // (e.g. asm-verbose mode).	1007 // (e.g. asm-verbose mode).

1143 Context.insert(InstFakeUse::create(Func, ebp));	1008 Context.insert(InstFakeUse::create(Func, ebp));

1144 }	1009 }

1145	1010

1146 // Align the variables area. SpillAreaPaddingBytes is the size of	1011 // Align the variables area. SpillAreaPaddingBytes is the size of

1147 // the region after the preserved registers and before the spill areas.	1012 // the region after the preserved registers and before the spill areas.

1148 // LocalsSlotsPaddingBytes is the amount of padding between the globals	1013 // LocalsSlotsPaddingBytes is the amount of padding between the globals

1149 // and locals area if they are separate.	1014 // and locals area if they are separate.

1150 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);	1015 assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES);

1151 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);	1016 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

1152 uint32_t SpillAreaPaddingBytes = 0;	1017 uint32_t SpillAreaPaddingBytes = 0;

1153 uint32_t LocalsSlotsPaddingBytes = 0;	1018 uint32_t LocalsSlotsPaddingBytes = 0;

1154 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,	1019 alignStackSpillAreas(Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

1155 SpillAreaAlignmentBytes, GlobalsSize,	1020 SpillAreaAlignmentBytes, GlobalsSize,

1156 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,	1021 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

1157 &LocalsSlotsPaddingBytes);	1022 &LocalsSlotsPaddingBytes);

1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;	1023 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

1159 uint32_t GlobalsAndSubsequentPaddingSize =	1024 uint32_t GlobalsAndSubsequentPaddingSize =

1160 GlobalsSize + LocalsSlotsPaddingBytes;	1025 GlobalsSize + LocalsSlotsPaddingBytes;

1161	1026

1162 // Align esp if necessary.	1027 // Align esp if necessary.

1163 if (NeedsStackAlignment) {	1028 if (NeedsStackAlignment) {

1164 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;	1029 uint32_t StackOffset =

1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);	1030 Traits::X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

	1031 uint32_t StackSize =

	1032 Traits::applyStackAlignment(StackOffset + SpillAreaSizeBytes);

1166 SpillAreaSizeBytes = StackSize - StackOffset;	1033 SpillAreaSizeBytes = StackSize - StackOffset;

1167 }	1034 }

1168	1035

1169 // Generate "sub esp, SpillAreaSizeBytes"	1036 // Generate "sub esp, SpillAreaSizeBytes"

1170 if (SpillAreaSizeBytes)	1037 if (SpillAreaSizeBytes)

1171 _sub(getPhysicalRegister(RegX8632::Reg_esp),	1038 _sub(getPhysicalRegister(RegX8632::Reg_esp),

1172 Ctx->getConstantInt32(SpillAreaSizeBytes));	1039 Ctx->getConstantInt32(SpillAreaSizeBytes));

1173 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);	1040 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

1174	1041

1175 resetStackAdjustment();	1042 resetStackAdjustment();

1176	1043

1177 // Fill in stack offsets for stack args, and copy args into registers	1044 // Fill in stack offsets for stack args, and copy args into registers

1178 // for those that were register-allocated. Args are pushed right to	1045 // for those that were register-allocated. Args are pushed right to

1179 // left, so Arg[0] is closest to the stack/frame pointer.	1046 // left, so Arg[0] is closest to the stack/frame pointer.

1180 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	1047 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

1181 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;	1048 size_t BasicFrameOffset =

	1049 PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES;

1182 if (!IsEbpBasedFrame)	1050 if (!IsEbpBasedFrame)

1183 BasicFrameOffset += SpillAreaSizeBytes;	1051 BasicFrameOffset += SpillAreaSizeBytes;

1184	1052

1185 const VarList &Args = Func->getArgs();	1053 const VarList &Args = Func->getArgs();

1186 size_t InArgsSizeBytes = 0;	1054 size_t InArgsSizeBytes = 0;

1187 unsigned NumXmmArgs = 0;	1055 unsigned NumXmmArgs = 0;

1188 for (Variable *Arg : Args) {	1056 for (Variable *Arg : Args) {

1189 // Skip arguments passed in registers.	1057 // Skip arguments passed in registers.

1190 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {	1058 if (isVectorType(Arg->getType()) && NumXmmArgs < Traits::X86_MAX_XMM_ARGS) {

1191 ++NumXmmArgs;	1059 ++NumXmmArgs;

1192 continue;	1060 continue;

1193 }	1061 }

1194 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);	1062 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

1195 }	1063 }

1196	1064

1197 // Fill in stack offsets for locals.	1065 // Fill in stack offsets for locals.

1198 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,	1066 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

1199 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,	1067 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

1200 IsEbpBasedFrame);	1068 IsEbpBasedFrame);

1201 // Assign stack offsets to variables that have been linked to spilled	1069 // Assign stack offsets to variables that have been linked to spilled

1202 // variables.	1070 // variables.

1203 for (Variable *Var : VariablesLinkedToSpillSlots) {	1071 for (Variable *Var : VariablesLinkedToSpillSlots) {

1204 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();	1072 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();

1205 Var->setStackOffset(Linked->getStackOffset());	1073 Var->setStackOffset(Linked->getStackOffset());

1206 }	1074 }

1207 this->HasComputedFrame = true;	1075 this->HasComputedFrame = true;

1208	1076

1209 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {	1077 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {

1210 OstreamLocker L(Func->getContext());	1078 OstreamLocker L(Func->getContext());

1211 Ostream &Str = Func->getContext()->getStrDump();	1079 Ostream &Str = Func->getContext()->getStrDump();

1212	1080

1213 Str << "Stack layout:\n";	1081 Str << "Stack layout:\n";

1214 uint32_t EspAdjustmentPaddingSize =	1082 uint32_t EspAdjustmentPaddingSize =

1215 SpillAreaSizeBytes - LocalsSpillAreaSize -	1083 SpillAreaSizeBytes - LocalsSpillAreaSize -

1216 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;	1084 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

1217 Str << " in-args = " << InArgsSizeBytes << " bytes\n"	1085 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

1218 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"	1086 << " return address = " << Traits::X86_RET_IP_SIZE_BYTES << " bytes\n"

1219 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"	1087 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

1220 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"	1088 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

1221 << " globals spill area = " << GlobalsSize << " bytes\n"	1089 << " globals spill area = " << GlobalsSize << " bytes\n"

1222 << " globals-locals spill areas intermediate padding = "	1090 << " globals-locals spill areas intermediate padding = "

1223 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"	1091 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

1224 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"	1092 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

1225 << " esp alignment padding = " << EspAdjustmentPaddingSize	1093 << " esp alignment padding = " << EspAdjustmentPaddingSize

1226 << " bytes\n";	1094 << " bytes\n";

1227	1095

1228 Str << "Stack details:\n"	1096 Str << "Stack details:\n"

1229 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"	1097 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"

1230 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"	1098 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

1231 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes	1099 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

1232 << " bytes\n"	1100 << " bytes\n"

1233 << " is ebp based = " << IsEbpBasedFrame << "\n";	1101 << " is ebp based = " << IsEbpBasedFrame << "\n";

1234 }	1102 }

1235 }	1103 }

1236	1104

1237 void TargetX8632::addEpilog(CfgNode *Node) {	1105 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {

1238 InstList &Insts = Node->getInsts();	1106 InstList &Insts = Node->getInsts();

1239 InstList::reverse_iterator RI, E;	1107 InstList::reverse_iterator RI, E;

1240 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {	1108 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

1241 if (llvm::isa<InstX8632Ret>(*RI))	1109 if (llvm::isa<InstX8632Ret>(*RI))

1242 break;	1110 break;

1243 }	1111 }

1244 if (RI == E)	1112 if (RI == E)

1245 return;	1113 return;

1246	1114

1247 // Convert the reverse_iterator position into its corresponding	1115 // Convert the reverse_iterator position into its corresponding

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1280	1148

1281 if (!Ctx->getFlags().getUseSandboxing())	1149 if (!Ctx->getFlags().getUseSandboxing())

1282 return;	1150 return;

1283 // Change the original ret instruction into a sandboxed return sequence.	1151 // Change the original ret instruction into a sandboxed return sequence.

1284 // t:ecx = pop	1152 // t:ecx = pop

1285 // bundle_lock	1153 // bundle_lock

1286 // and t, ~31	1154 // and t, ~31

1287 // jmp *t	1155 // jmp *t

1288 // bundle_unlock	1156 // bundle_unlock

1289 // FakeUse <original_ret_operand>	1157 // FakeUse <original_ret_operand>

1290 const SizeT BundleSize = 1	1158 const SizeT BundleSize =

1291 << Func->getAssembler<>()->getBundleAlignLog2Bytes();	1159 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();

1292 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);	1160 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

1293 _pop(T_ecx);	1161 _pop(T_ecx);

1294 _bundle_lock();	1162 _bundle_lock();

1295 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));	1163 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));

1296 _jmp(T_ecx);	1164 _jmp(T_ecx);

1297 _bundle_unlock();	1165 _bundle_unlock();

1298 if (RI->getSrcSize()) {	1166 if (RI->getSrcSize()) {

1299 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));	1167 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));

1300 Context.insert(InstFakeUse::create(Func, RetValue));	1168 Context.insert(InstFakeUse::create(Func, RetValue));

1301 }	1169 }

1302 RI->setDeleted();	1170 RI->setDeleted();

1303 }	1171 }

1304	1172

1305 void TargetX8632::split64(Variable *Var) {	1173 template <class Machine> void TargetX86Base<Machine>::split64(Variable *Var) {

1306 switch (Var->getType()) {	1174 switch (Var->getType()) {

1307 default:	1175 default:

1308 return;	1176 return;

1309 case IceType_i64:	1177 case IceType_i64:

1310 // TODO: Only consider F64 if we need to push each half when	1178 // TODO: Only consider F64 if we need to push each half when

1311 // passing as an argument to a function call. Note that each half	1179 // passing as an argument to a function call. Note that each half

1312 // is still typed as I32.	1180 // is still typed as I32.

1313 case IceType_f64:	1181 case IceType_f64:

1314 break;	1182 break;

1315 }	1183 }

(...skipping 10 matching lines...) Expand all Loading...
1326 Lo->setName(Func, Var->getName(Func) + "__lo");	1194 Lo->setName(Func, Var->getName(Func) + "__lo");

1327 Hi->setName(Func, Var->getName(Func) + "__hi");	1195 Hi->setName(Func, Var->getName(Func) + "__hi");

1328 }	1196 }

1329 Var->setLoHi(Lo, Hi);	1197 Var->setLoHi(Lo, Hi);

1330 if (Var->getIsArg()) {	1198 if (Var->getIsArg()) {

1331 Lo->setIsArg();	1199 Lo->setIsArg();

1332 Hi->setIsArg();	1200 Hi->setIsArg();

1333 }	1201 }

1334 }	1202 }

1335	1203

1336 Operand TargetX8632::loOperand(Operand Operand) {	1204 template <class Machine>

	1205 Operand TargetX86Base<Machine>::loOperand(Operand Operand) {

1337 assert(Operand->getType() == IceType_i64 \|\|	1206 assert(Operand->getType() == IceType_i64 \|\|

1338 Operand->getType() == IceType_f64);	1207 Operand->getType() == IceType_f64);

1339 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)	1208 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

1340 return Operand;	1209 return Operand;

1341 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {	1210 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1342 split64(Var);	1211 split64(Var);

1343 return Var->getLo();	1212 return Var->getLo();

1344 }	1213 }

1345 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {	1214 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1346 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(	1215 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(

1347 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));	1216 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));

1348 return legalize(ConstInt);	1217 return legalize(ConstInt);

1349 }	1218 }

1350 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {	1219 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

1351 OperandX8632Mem *MemOperand = OperandX8632Mem::create(	1220 OperandX8632Mem *MemOperand = OperandX8632Mem::create(

1352 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),	1221 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),

1353 Mem->getShift(), Mem->getSegmentRegister());	1222 Mem->getShift(), Mem->getSegmentRegister());

1354 // Test if we should randomize or pool the offset, if so randomize it or	1223 // Test if we should randomize or pool the offset, if so randomize it or

1355 // pool it then create mem operand with the blinded/pooled constant.	1224 // pool it then create mem operand with the blinded/pooled constant.

1356 // Otherwise, return the mem operand as ordinary mem operand.	1225 // Otherwise, return the mem operand as ordinary mem operand.

1357 return legalize(MemOperand);	1226 return legalize(MemOperand);

1358 }	1227 }

1359 llvm_unreachable("Unsupported operand type");	1228 llvm_unreachable("Unsupported operand type");

1360 return nullptr;	1229 return nullptr;

1361 }	1230 }

1362	1231

1363 Operand TargetX8632::hiOperand(Operand Operand) {	1232 template <class Machine>

	1233 Operand TargetX86Base<Machine>::hiOperand(Operand Operand) {

1364 assert(Operand->getType() == IceType_i64 \|\|	1234 assert(Operand->getType() == IceType_i64 \|\|

1365 Operand->getType() == IceType_f64);	1235 Operand->getType() == IceType_f64);

1366 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)	1236 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

1367 return Operand;	1237 return Operand;

1368 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {	1238 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1369 split64(Var);	1239 split64(Var);

1370 return Var->getHi();	1240 return Var->getHi();

1371 }	1241 }

1372 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {	1242 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1373 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(	1243 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(

(...skipping 20 matching lines...) Expand all Loading...
1394 Mem->getShift(), Mem->getSegmentRegister());	1264 Mem->getShift(), Mem->getSegmentRegister());

1395 // Test if the Offset is an eligible i32 constants for randomization and	1265 // Test if the Offset is an eligible i32 constants for randomization and

1396 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem	1266 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem

1397 // operand.	1267 // operand.

1398 return legalize(MemOperand);	1268 return legalize(MemOperand);

1399 }	1269 }

1400 llvm_unreachable("Unsupported operand type");	1270 llvm_unreachable("Unsupported operand type");

1401 return nullptr;	1271 return nullptr;

1402 }	1272 }

1403	1273

1404 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,	1274 template <class Machine>

1405 RegSetMask Exclude) const {	1275 llvm::SmallBitVector

	1276 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,

	1277 RegSetMask Exclude) const {

1406 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);	1278 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);

1407	1279

1408 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \	1280 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

1409 frameptr, isI8, isInt, isFP) \	1281 frameptr, isI8, isInt, isFP) \

1410 if (scratch && (Include & RegSet_CallerSave)) \	1282 if (scratch && (Include & RegSet_CallerSave)) \

1411 Registers[RegX8632::val] = true; \	1283 Registers[RegX8632::val] = true; \

1412 if (preserved && (Include & RegSet_CalleeSave)) \	1284 if (preserved && (Include & RegSet_CalleeSave)) \

1413 Registers[RegX8632::val] = true; \	1285 Registers[RegX8632::val] = true; \

1414 if (stackptr && (Include & RegSet_StackPointer)) \	1286 if (stackptr && (Include & RegSet_StackPointer)) \

1415 Registers[RegX8632::val] = true; \	1287 Registers[RegX8632::val] = true; \

1416 if (frameptr && (Include & RegSet_FramePointer)) \	1288 if (frameptr && (Include & RegSet_FramePointer)) \

1417 Registers[RegX8632::val] = true; \	1289 Registers[RegX8632::val] = true; \

1418 if (scratch && (Exclude & RegSet_CallerSave)) \	1290 if (scratch && (Exclude & RegSet_CallerSave)) \

1419 Registers[RegX8632::val] = false; \	1291 Registers[RegX8632::val] = false; \

1420 if (preserved && (Exclude & RegSet_CalleeSave)) \	1292 if (preserved && (Exclude & RegSet_CalleeSave)) \

1421 Registers[RegX8632::val] = false; \	1293 Registers[RegX8632::val] = false; \

1422 if (stackptr && (Exclude & RegSet_StackPointer)) \	1294 if (stackptr && (Exclude & RegSet_StackPointer)) \

1423 Registers[RegX8632::val] = false; \	1295 Registers[RegX8632::val] = false; \

1424 if (frameptr && (Exclude & RegSet_FramePointer)) \	1296 if (frameptr && (Exclude & RegSet_FramePointer)) \

1425 Registers[RegX8632::val] = false;	1297 Registers[RegX8632::val] = false;

1426	1298

1427 REGX8632_TABLE	1299 REGX8632_TABLE

1428	1300

1429 #undef X	1301 #undef X

1430	1302

1431 return Registers;	1303 return Registers;

1432 }	1304 }

1433	1305

1434 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {	1306 template <class Machine>

	1307 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

1435 IsEbpBasedFrame = true;	1308 IsEbpBasedFrame = true;

1436 // Conservatively require the stack to be aligned. Some stack	1309 // Conservatively require the stack to be aligned. Some stack

1437 // adjustment operations implemented below assume that the stack is	1310 // adjustment operations implemented below assume that the stack is

1438 // aligned before the alloca. All the alloca code ensures that the	1311 // aligned before the alloca. All the alloca code ensures that the

1439 // stack alignment is preserved after the alloca. The stack alignment	1312 // stack alignment is preserved after the alloca. The stack alignment

1440 // restriction can be relaxed in some cases.	1313 // restriction can be relaxed in some cases.

1441 NeedsStackAlignment = true;	1314 NeedsStackAlignment = true;

1442	1315

1443 // TODO(stichnot): minimize the number of adjustments of esp, etc.	1316 // TODO(stichnot): minimize the number of adjustments of esp, etc.

1444 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);	1317 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

1445 Operand *TotalSize = legalize(Inst->getSizeInBytes());	1318 Operand *TotalSize = legalize(Inst->getSizeInBytes());

1446 Variable *Dest = Inst->getDest();	1319 Variable *Dest = Inst->getDest();

1447 uint32_t AlignmentParam = Inst->getAlignInBytes();	1320 uint32_t AlignmentParam = Inst->getAlignInBytes();

1448 // For default align=0, set it to the real value 1, to avoid any	1321 // For default align=0, set it to the real value 1, to avoid any

1449 // bit-manipulation problems below.	1322 // bit-manipulation problems below.

1450 AlignmentParam = std::max(AlignmentParam, 1u);	1323 AlignmentParam = std::max(AlignmentParam, 1u);

1451	1324

1452 // LLVM enforces power of 2 alignment.	1325 // LLVM enforces power of 2 alignment.

1453 assert(llvm::isPowerOf2_32(AlignmentParam));	1326 assert(llvm::isPowerOf2_32(AlignmentParam));

1454 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES));	1327 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));

1455	1328

1456 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);	1329 uint32_t Alignment =

1457 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {	1330 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);

	1331 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {

1458 _and(esp, Ctx->getConstantInt32(-Alignment));	1332 _and(esp, Ctx->getConstantInt32(-Alignment));

1459 }	1333 }

1460 if (const auto *ConstantTotalSize =	1334 if (const auto *ConstantTotalSize =

1461 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {	1335 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

1462 uint32_t Value = ConstantTotalSize->getValue();	1336 uint32_t Value = ConstantTotalSize->getValue();

1463 Value = Utils::applyAlignment(Value, Alignment);	1337 Value = Utils::applyAlignment(Value, Alignment);

1464 _sub(esp, Ctx->getConstantInt32(Value));	1338 _sub(esp, Ctx->getConstantInt32(Value));

1465 } else {	1339 } else {

1466 // Non-constant sizes need to be adjusted to the next highest	1340 // Non-constant sizes need to be adjusted to the next highest

1467 // multiple of the required alignment at runtime.	1341 // multiple of the required alignment at runtime.

1468 Variable *T = makeReg(IceType_i32);	1342 Variable *T = makeReg(IceType_i32);

1469 _mov(T, TotalSize);	1343 _mov(T, TotalSize);

1470 _add(T, Ctx->getConstantInt32(Alignment - 1));	1344 _add(T, Ctx->getConstantInt32(Alignment - 1));

1471 _and(T, Ctx->getConstantInt32(-Alignment));	1345 _and(T, Ctx->getConstantInt32(-Alignment));

1472 _sub(esp, T);	1346 _sub(esp, T);

1473 }	1347 }

1474 _mov(Dest, esp);	1348 _mov(Dest, esp);

1475 }	1349 }

1476	1350

1477 // Strength-reduce scalar integer multiplication by a constant (for	1351 // Strength-reduce scalar integer multiplication by a constant (for

1478 // i32 or narrower) for certain constants. The lea instruction can be	1352 // i32 or narrower) for certain constants. The lea instruction can be

1479 // used to multiply by 3, 5, or 9, and the lsh instruction can be used	1353 // used to multiply by 3, 5, or 9, and the lsh instruction can be used

1480 // to multiply by powers of 2. These can be combined such that	1354 // to multiply by powers of 2. These can be combined such that

1481 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,	1355 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,

1482 // combined with left-shifting by 2.	1356 // combined with left-shifting by 2.

1483 bool TargetX8632::optimizeScalarMul(Variable Dest, Operand Src0,	1357 template <class Machine>

1484 int32_t Src1) {	1358 bool TargetX86Base<Machine>::optimizeScalarMul(Variable Dest, Operand Src0,

	1359 int32_t Src1) {

1485 // Disable this optimization for Om1 and O0, just to keep things	1360 // Disable this optimization for Om1 and O0, just to keep things

1486 // simple there.	1361 // simple there.

1487 if (Ctx->getFlags().getOptLevel() < Opt_1)	1362 if (Ctx->getFlags().getOptLevel() < Opt_1)

1488 return false;	1363 return false;

1489 Type Ty = Dest->getType();	1364 Type Ty = Dest->getType();

1490 Variable *T = nullptr;	1365 Variable *T = nullptr;

1491 if (Src1 == -1) {	1366 if (Src1 == -1) {

1492 _mov(T, Src0);	1367 _mov(T, Src0);

1493 _neg(T);	1368 _neg(T);

1494 _mov(Dest, T);	1369 _mov(Dest, T);

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1563 }	1438 }

1564 if (Count2) {	1439 if (Count2) {

1565 _shl(T, Ctx->getConstantInt(Ty, Count2));	1440 _shl(T, Ctx->getConstantInt(Ty, Count2));

1566 }	1441 }

1567 if (Src1IsNegative)	1442 if (Src1IsNegative)

1568 _neg(T);	1443 _neg(T);

1569 _mov(Dest, T);	1444 _mov(Dest, T);

1570 return true;	1445 return true;

1571 }	1446 }

1572	1447

1573 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {	1448 template <class Machine>

	1449 void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

1574 Variable *Dest = Inst->getDest();	1450 Variable *Dest = Inst->getDest();

1575 Operand *Src0 = legalize(Inst->getSrc(0));	1451 Operand *Src0 = legalize(Inst->getSrc(0));

1576 Operand *Src1 = legalize(Inst->getSrc(1));	1452 Operand *Src1 = legalize(Inst->getSrc(1));

1577 if (Inst->isCommutative()) {	1453 if (Inst->isCommutative()) {

1578 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))	1454 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))

1579 std::swap(Src0, Src1);	1455 std::swap(Src0, Src1);

1580 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))	1456 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))

1581 std::swap(Src0, Src1);	1457 std::swap(Src0, Src1);

1582 }	1458 }

1583 if (Dest->getType() == IceType_i64) {	1459 if (Dest->getType() == IceType_i64) {

(...skipping 282 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1866 case InstArithmetic::Sub: {	1742 case InstArithmetic::Sub: {

1867 Variable *T = makeReg(Dest->getType());	1743 Variable *T = makeReg(Dest->getType());

1868 _movp(T, Src0);	1744 _movp(T, Src0);

1869 _psub(T, Src1);	1745 _psub(T, Src1);

1870 _movp(Dest, T);	1746 _movp(Dest, T);

1871 } break;	1747 } break;

1872 case InstArithmetic::Mul: {	1748 case InstArithmetic::Mul: {

1873 bool TypesAreValidForPmull =	1749 bool TypesAreValidForPmull =

1874 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;	1750 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;

1875 bool InstructionSetIsValidForPmull =	1751 bool InstructionSetIsValidForPmull =

1876 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;	1752 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= Machine::SSE4_1;

1877 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {	1753 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

1878 Variable *T = makeReg(Dest->getType());	1754 Variable *T = makeReg(Dest->getType());

1879 _movp(T, Src0);	1755 _movp(T, Src0);

1880 _pmull(T, Src1);	1756 _pmull(T, Src1);

1881 _movp(Dest, T);	1757 _movp(Dest, T);

1882 } else if (Dest->getType() == IceType_v4i32) {	1758 } else if (Dest->getType() == IceType_v4i32) {

1883 // Lowering sequence:	1759 // Lowering sequence:

1884 // Note: The mask arguments have index 0 on the left.	1760 // Note: The mask arguments have index 0 on the left.

1885 //	1761 //

1886 // movups T1, Src0	1762 // movups T1, Src0

(...skipping 173 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2060 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {	1936 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

2061 uint32_t LogDiv = llvm::Log2_32(UDivisor);	1937 uint32_t LogDiv = llvm::Log2_32(UDivisor);

2062 Type Ty = Dest->getType();	1938 Type Ty = Dest->getType();

2063 // LLVM does the following for dest=src/(1<<log):	1939 // LLVM does the following for dest=src/(1<<log):

2064 // t=src	1940 // t=src

2065 // sar t,typewidth-1 // -1 if src is negative, 0 if not	1941 // sar t,typewidth-1 // -1 if src is negative, 0 if not

2066 // shr t,typewidth-log	1942 // shr t,typewidth-log

2067 // add t,src	1943 // add t,src

2068 // sar t,log	1944 // sar t,log

2069 // dest=t	1945 // dest=t

2070 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);	1946 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

2071 _mov(T, Src0);	1947 _mov(T, Src0);

2072 // If for some reason we are dividing by 1, just treat it	1948 // If for some reason we are dividing by 1, just treat it

2073 // like an assignment.	1949 // like an assignment.

2074 if (LogDiv > 0) {	1950 if (LogDiv > 0) {

2075 // The initial sar is unnecessary when dividing by 2.	1951 // The initial sar is unnecessary when dividing by 2.

2076 if (LogDiv > 1)	1952 if (LogDiv > 1)

2077 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));	1953 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

2078 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));	1954 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

2079 _add(T, Src0);	1955 _add(T, Src0);

2080 _sar(T, Ctx->getConstantInt(Ty, LogDiv));	1956 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2129 Type Ty = Dest->getType();	2005 Type Ty = Dest->getType();

2130 // LLVM does the following for dest=src%(1<<log):	2006 // LLVM does the following for dest=src%(1<<log):

2131 // t=src	2007 // t=src

2132 // sar t,typewidth-1 // -1 if src is negative, 0 if not	2008 // sar t,typewidth-1 // -1 if src is negative, 0 if not

2133 // shr t,typewidth-log	2009 // shr t,typewidth-log

2134 // add t,src	2010 // add t,src

2135 // and t, -(1<<log)	2011 // and t, -(1<<log)

2136 // sub t,src	2012 // sub t,src

2137 // neg t	2013 // neg t

2138 // dest=t	2014 // dest=t

2139 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);	2015 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

2140 // If for some reason we are dividing by 1, just assign 0.	2016 // If for some reason we are dividing by 1, just assign 0.

2141 if (LogDiv == 0) {	2017 if (LogDiv == 0) {

2142 _mov(Dest, Ctx->getConstantZero(Ty));	2018 _mov(Dest, Ctx->getConstantZero(Ty));

2143 return;	2019 return;

2144 }	2020 }

2145 _mov(T, Src0);	2021 _mov(T, Src0);

2146 // The initial sar is unnecessary when dividing by 2.	2022 // The initial sar is unnecessary when dividing by 2.

2147 if (LogDiv > 1)	2023 if (LogDiv > 1)

2148 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));	2024 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

2149 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));	2025 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2197 Type Ty = Dest->getType();	2073 Type Ty = Dest->getType();

2198 InstCall *Call = makeHelperCall(	2074 InstCall *Call = makeHelperCall(

2199 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);	2075 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

2200 Call->addArg(Src0);	2076 Call->addArg(Src0);

2201 Call->addArg(Src1);	2077 Call->addArg(Src1);

2202 return lowerCall(Call);	2078 return lowerCall(Call);

2203 }	2079 }

2204 }	2080 }

2205 }	2081 }

2206	2082

2207 void TargetX8632::lowerAssign(const InstAssign *Inst) {	2083 template <class Machine>

	2084 void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {

2208 Variable *Dest = Inst->getDest();	2085 Variable *Dest = Inst->getDest();

2209 Operand *Src0 = Inst->getSrc(0);	2086 Operand *Src0 = Inst->getSrc(0);

2210 assert(Dest->getType() == Src0->getType());	2087 assert(Dest->getType() == Src0->getType());

2211 if (Dest->getType() == IceType_i64) {	2088 if (Dest->getType() == IceType_i64) {

2212 Src0 = legalize(Src0);	2089 Src0 = legalize(Src0);

2213 Operand *Src0Lo = loOperand(Src0);	2090 Operand *Src0Lo = loOperand(Src0);

2214 Operand *Src0Hi = hiOperand(Src0);	2091 Operand *Src0Hi = hiOperand(Src0);

2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	2092 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	2093 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2217 Variable T_Lo = nullptr, T_Hi = nullptr;	2094 Variable T_Lo = nullptr, T_Hi = nullptr;

(...skipping 24 matching lines...) Expand all Loading...
2242 // register or a scalar integer immediate.	2119 // register or a scalar integer immediate.

2243 RI = legalize(Src0, Legal_Reg \| Legal_Imm);	2120 RI = legalize(Src0, Legal_Reg \| Legal_Imm);

2244 }	2121 }

2245 if (isVectorType(Dest->getType()))	2122 if (isVectorType(Dest->getType()))

2246 _movp(Dest, RI);	2123 _movp(Dest, RI);

2247 else	2124 else

2248 _mov(Dest, RI);	2125 _mov(Dest, RI);

2249 }	2126 }

2250 }	2127 }

2251	2128

2252 void TargetX8632::lowerBr(const InstBr *Inst) {	2129 template <class Machine>

	2130 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {

2253 if (Inst->isUnconditional()) {	2131 if (Inst->isUnconditional()) {

2254 _br(Inst->getTargetUnconditional());	2132 _br(Inst->getTargetUnconditional());

2255 return;	2133 return;

2256 }	2134 }

2257 Operand *Cond = Inst->getCondition();	2135 Operand *Cond = Inst->getCondition();

2258	2136

2259 // Handle folding opportunities.	2137 // Handle folding opportunities.

2260 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {	2138 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {

2261 assert(Producer->isDeleted());	2139 assert(Producer->isDeleted());

2262 switch (BoolFolding::getProducerKind(Producer)) {	2140 switch (BoolFolding::getProducerKind(Producer)) {

2263 default:	2141 default:

2264 break;	2142 break;

2265 case BoolFolding::PK_Icmp32: {	2143 case BoolFolding::PK_Icmp32: {

2266 // TODO(stichnot): Refactor similarities between this block and	2144 // TODO(stichnot): Refactor similarities between this block and

2267 // the corresponding code in lowerIcmp().	2145 // the corresponding code in lowerIcmp().

2268 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);	2146 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);

2269 Operand *Src0 = Producer->getSrc(0);	2147 Operand *Src0 = Producer->getSrc(0);

2270 Operand *Src1 = legalize(Producer->getSrc(1));	2148 Operand *Src1 = legalize(Producer->getSrc(1));

2271 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);	2149 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

2272 _cmp(Src0RM, Src1);	2150 _cmp(Src0RM, Src1);

2273 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),	2151 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),

2274 Inst->getTargetFalse());	2152 Inst->getTargetFalse());

2275 return;	2153 return;

2276 }	2154 }

2277 }	2155 }

2278 }	2156 }

2279	2157

2280 Operand *Src0 = legalize(Cond, Legal_Reg \| Legal_Mem);	2158 Operand *Src0 = legalize(Cond, Legal_Reg \| Legal_Mem);

2281 Constant *Zero = Ctx->getConstantZero(IceType_i32);	2159 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2282 _cmp(Src0, Zero);	2160 _cmp(Src0, Zero);

2283 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());	2161 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

2284 }	2162 }

2285	2163

2286 void TargetX8632::lowerCall(const InstCall *Instr) {	2164 template <class Machine>

	2165 void TargetX86Base<Machine>::lowerCall(const InstCall *Instr) {

2287 // x86-32 calling convention:	2166 // x86-32 calling convention:

2288 //	2167 //

2289 // * At the point before the call, the stack must be aligned to 16	2168 // * At the point before the call, the stack must be aligned to 16

2290 // bytes.	2169 // bytes.

2291 //	2170 //

2292 // * The first four arguments of vector type, regardless of their	2171 // * The first four arguments of vector type, regardless of their

2293 // position relative to the other arguments in the argument list, are	2172 // position relative to the other arguments in the argument list, are

2294 // placed in registers xmm0 - xmm3.	2173 // placed in registers xmm0 - xmm3.

2295 //	2174 //

2296 // * Other arguments are pushed onto the stack in right-to-left order,	2175 // * Other arguments are pushed onto the stack in right-to-left order,

(...skipping 14 matching lines...) Expand all Loading...
2311 OperandList StackArgs, StackArgLocations;	2190 OperandList StackArgs, StackArgLocations;

2312 uint32_t ParameterAreaSizeBytes = 0;	2191 uint32_t ParameterAreaSizeBytes = 0;

2313	2192

2314 // Classify each argument operand according to the location where the	2193 // Classify each argument operand according to the location where the

2315 // argument is passed.	2194 // argument is passed.

2316 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {	2195 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

2317 Operand *Arg = Instr->getArg(i);	2196 Operand *Arg = Instr->getArg(i);

2318 Type Ty = Arg->getType();	2197 Type Ty = Arg->getType();

2319 // The PNaCl ABI requires the width of arguments to be at least 32 bits.	2198 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

2320 assert(typeWidthInBytes(Ty) >= 4);	2199 assert(typeWidthInBytes(Ty) >= 4);

2321 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {	2200 if (isVectorType(Ty) && XmmArgs.size() < Traits::X86_MAX_XMM_ARGS) {

2322 XmmArgs.push_back(Arg);	2201 XmmArgs.push_back(Arg);

2323 } else {	2202 } else {

2324 StackArgs.push_back(Arg);	2203 StackArgs.push_back(Arg);

2325 if (isVectorType(Arg->getType())) {	2204 if (isVectorType(Arg->getType())) {

2326 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	2205 ParameterAreaSizeBytes =

	2206 Traits::applyStackAlignment(ParameterAreaSizeBytes);

2327 }	2207 }

2328 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);	2208 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

2329 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);	2209 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

2330 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));	2210 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));

2331 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());	2211 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

2332 }	2212 }

2333 }	2213 }

2334	2214

2335 // Adjust the parameter area so that the stack is aligned. It is	2215 // Adjust the parameter area so that the stack is aligned. It is

2336 // assumed that the stack is already aligned at the start of the	2216 // assumed that the stack is already aligned at the start of the

2337 // calling sequence.	2217 // calling sequence.

2338 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	2218 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);

2339	2219

2340 // Subtract the appropriate amount for the argument area. This also	2220 // Subtract the appropriate amount for the argument area. This also

2341 // takes care of setting the stack adjustment during emission.	2221 // takes care of setting the stack adjustment during emission.

2342 //	2222 //

2343 // TODO: If for some reason the call instruction gets dead-code	2223 // TODO: If for some reason the call instruction gets dead-code

2344 // eliminated after lowering, we would need to ensure that the	2224 // eliminated after lowering, we would need to ensure that the

2345 // pre-call and the post-call esp adjustment get eliminated as well.	2225 // pre-call and the post-call esp adjustment get eliminated as well.

2346 if (ParameterAreaSizeBytes) {	2226 if (ParameterAreaSizeBytes) {

2347 _adjust_stack(ParameterAreaSizeBytes);	2227 _adjust_stack(ParameterAreaSizeBytes);

2348 }	2228 }

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2411 Operand *CallTarget = legalize(Instr->getCallTarget());	2291 Operand *CallTarget = legalize(Instr->getCallTarget());

2412 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();	2292 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

2413 if (NeedSandboxing) {	2293 if (NeedSandboxing) {

2414 if (llvm::isa<Constant>(CallTarget)) {	2294 if (llvm::isa<Constant>(CallTarget)) {

2415 _bundle_lock(InstBundleLock::Opt_AlignToEnd);	2295 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

2416 } else {	2296 } else {

2417 Variable *CallTargetVar = nullptr;	2297 Variable *CallTargetVar = nullptr;

2418 _mov(CallTargetVar, CallTarget);	2298 _mov(CallTargetVar, CallTarget);

2419 _bundle_lock(InstBundleLock::Opt_AlignToEnd);	2299 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

2420 const SizeT BundleSize =	2300 const SizeT BundleSize =

2421 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();	2301 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();

2422 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));	2302 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

2423 CallTarget = CallTargetVar;	2303 CallTarget = CallTargetVar;

2424 }	2304 }

2425 }	2305 }

2426 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);	2306 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);

2427 Context.insert(NewCall);	2307 Context.insert(NewCall);

2428 if (NeedSandboxing)	2308 if (NeedSandboxing)

2429 _bundle_unlock();	2309 _bundle_unlock();

2430 if (ReturnRegHi)	2310 if (ReturnRegHi)

2431 Context.insert(InstFakeDef::create(Func, ReturnRegHi));	2311 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2473 // st(0).	2353 // st(0).

2474 // If Dest ends up being a physical xmm register, the fstp emit code	2354 // If Dest ends up being a physical xmm register, the fstp emit code

2475 // will route st(0) through a temporary stack slot.	2355 // will route st(0) through a temporary stack slot.

2476 _fstp(Dest);	2356 _fstp(Dest);

2477 // Create a fake use of Dest in case it actually isn't used,	2357 // Create a fake use of Dest in case it actually isn't used,

2478 // because st(0) still needs to be popped.	2358 // because st(0) still needs to be popped.

2479 Context.insert(InstFakeUse::create(Func, Dest));	2359 Context.insert(InstFakeUse::create(Func, Dest));

2480 }	2360 }

2481 }	2361 }

2482	2362

2483 void TargetX8632::lowerCast(const InstCast *Inst) {	2363 template <class Machine>

	2364 void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

2484 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)	2365 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

2485 InstCast::OpKind CastKind = Inst->getCastKind();	2366 InstCast::OpKind CastKind = Inst->getCastKind();

2486 Variable *Dest = Inst->getDest();	2367 Variable *Dest = Inst->getDest();

2487 switch (CastKind) {	2368 switch (CastKind) {

2488 default:	2369 default:

2489 Func->setError("Cast type not supported");	2370 Func->setError("Cast type not supported");

2490 return;	2371 return;

2491 case InstCast::Sext: {	2372 case InstCast::Sext: {

2492 // Src0RM is the source operand legalized to physical register or memory,	2373 // Src0RM is the source operand legalized to physical register or memory,

2493 // but not immediate, since the relevant x86 native instructions don't	2374 // but not immediate, since the relevant x86 native instructions don't

2494 // allow an immediate operand. If the operand is an immediate, we could	2375 // allow an immediate operand. If the operand is an immediate, we could

2495 // consider computing the strength-reduced result at translation time,	2376 // consider computing the strength-reduced result at translation time,

2496 // but we're unlikely to see something like that in the bitcode that	2377 // but we're unlikely to see something like that in the bitcode that

2497 // the optimizer wouldn't have already taken care of.	2378 // the optimizer wouldn't have already taken care of.

2498 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);	2379 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2499 if (isVectorType(Dest->getType())) {	2380 if (isVectorType(Dest->getType())) {

2500 Type DestTy = Dest->getType();	2381 Type DestTy = Dest->getType();

2501 if (DestTy == IceType_v16i8) {	2382 if (DestTy == IceType_v16i8) {

2502 // onemask = materialize(1,1,...); dst = (src & onemask) > 0	2383 // onemask = materialize(1,1,...); dst = (src & onemask) > 0

2503 Variable *OneMask = makeVectorOfOnes(Dest->getType());	2384 Variable *OneMask = makeVectorOfOnes(Dest->getType());

2504 Variable *T = makeReg(DestTy);	2385 Variable *T = makeReg(DestTy);

2505 _movp(T, Src0RM);	2386 _movp(T, Src0RM);

2506 _pand(T, OneMask);	2387 _pand(T, OneMask);

2507 Variable *Zeros = makeVectorOfZeros(Dest->getType());	2388 Variable *Zeros = makeVectorOfZeros(Dest->getType());

2508 _pcmpgt(T, Zeros);	2389 _pcmpgt(T, Zeros);

2509 _movp(Dest, T);	2390 _movp(Dest, T);

2510 } else {	2391 } else {

2511 // width = width(elty) - 1; dest = (src << width) >> width	2392 // width = width(elty) - 1; dest = (src << width) >> width

2512 SizeT ShiftAmount =	2393 SizeT ShiftAmount =

2513 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;	2394 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -

	2395 1;

2514 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);	2396 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);

2515 Variable *T = makeReg(DestTy);	2397 Variable *T = makeReg(DestTy);

2516 _movp(T, Src0RM);	2398 _movp(T, Src0RM);

2517 _psll(T, ShiftConstant);	2399 _psll(T, ShiftConstant);

2518 _psra(T, ShiftConstant);	2400 _psra(T, ShiftConstant);

2519 _movp(Dest, T);	2401 _movp(Dest, T);

2520 }	2402 }

2521 } else if (Dest->getType() == IceType_i64) {	2403 } else if (Dest->getType() == IceType_i64) {

2522 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2	2404 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

2523 Constant *Shift = Ctx->getConstantInt32(31);	2405 Constant *Shift = Ctx->getConstantInt32(31);

(...skipping 14 matching lines...) Expand all Loading...
2538 _mov(T_Hi, T_Lo);	2420 _mov(T_Hi, T_Lo);

2539 if (Src0RM->getType() != IceType_i1)	2421 if (Src0RM->getType() != IceType_i1)

2540 // For i1, the sar instruction is already done above.	2422 // For i1, the sar instruction is already done above.

2541 _sar(T_Hi, Shift);	2423 _sar(T_Hi, Shift);

2542 _mov(DestHi, T_Hi);	2424 _mov(DestHi, T_Hi);

2543 } else if (Src0RM->getType() == IceType_i1) {	2425 } else if (Src0RM->getType() == IceType_i1) {

2544 // t1 = src	2426 // t1 = src

2545 // shl t1, dst_bitwidth - 1	2427 // shl t1, dst_bitwidth - 1

2546 // sar t1, dst_bitwidth - 1	2428 // sar t1, dst_bitwidth - 1

2547 // dst = t1	2429 // dst = t1

2548 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());	2430 size_t DestBits =

	2431 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

2549 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);	2432 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);

2550 Variable *T = makeReg(Dest->getType());	2433 Variable *T = makeReg(Dest->getType());

2551 if (typeWidthInBytes(Dest->getType()) <=	2434 if (typeWidthInBytes(Dest->getType()) <=

2552 typeWidthInBytes(Src0RM->getType())) {	2435 typeWidthInBytes(Src0RM->getType())) {

2553 _mov(T, Src0RM);	2436 _mov(T, Src0RM);

2554 } else {	2437 } else {

2555 // Widen the source using movsx or movzx. (It doesn't matter	2438 // Widen the source using movsx or movzx. (It doesn't matter

2556 // which one, since the following shl/sar overwrite the bits.)	2439 // which one, since the following shl/sar overwrite the bits.)

2557 _movzx(T, Src0RM);	2440 _movzx(T, Src0RM);

2558 }	2441 }

(...skipping 384 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2943 case IceType_v4i32:	2826 case IceType_v4i32:

2944 case IceType_v4f32: {	2827 case IceType_v4f32: {

2945 _movp(Dest, legalizeToVar(Src0));	2828 _movp(Dest, legalizeToVar(Src0));

2946 } break;	2829 } break;

2947 }	2830 }

2948 break;	2831 break;

2949 }	2832 }

2950 }	2833 }

2951 }	2834 }

2952	2835

2953 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {	2836 template <class Machine>

	2837 void TargetX86Base<Machine>::lowerExtractElement(

	2838 const InstExtractElement *Inst) {

2954 Operand *SourceVectNotLegalized = Inst->getSrc(0);	2839 Operand *SourceVectNotLegalized = Inst->getSrc(0);

2955 ConstantInteger32 *ElementIndex =	2840 ConstantInteger32 *ElementIndex =

2956 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));	2841 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));

2957 // Only constant indices are allowed in PNaCl IR.	2842 // Only constant indices are allowed in PNaCl IR.

2958 assert(ElementIndex);	2843 assert(ElementIndex);

2959	2844

2960 unsigned Index = ElementIndex->getValue();	2845 unsigned Index = ElementIndex->getValue();

2961 Type Ty = SourceVectNotLegalized->getType();	2846 Type Ty = SourceVectNotLegalized->getType();

2962 Type ElementTy = typeElementType(Ty);	2847 Type ElementTy = typeElementType(Ty);

2963 Type InVectorElementTy = getInVectorElementType(Ty);	2848 Type InVectorElementTy = Traits::getInVectorElementType(Ty);

2964 Variable *ExtractedElementR = makeReg(InVectorElementTy);	2849 Variable *ExtractedElementR = makeReg(InVectorElementTy);

2965	2850

2966 // TODO(wala): Determine the best lowering sequences for each type.	2851 // TODO(wala): Determine the best lowering sequences for each type.

2967 bool CanUsePextr =	2852 bool CanUsePextr = Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|

2968 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;	2853 InstructionSet >= Machine::SSE4_1;

2969 if (CanUsePextr && Ty != IceType_v4f32) {	2854 if (CanUsePextr && Ty != IceType_v4f32) {

2970 // Use pextrb, pextrw, or pextrd.	2855 // Use pextrb, pextrw, or pextrd.

2971 Constant *Mask = Ctx->getConstantInt32(Index);	2856 Constant *Mask = Ctx->getConstantInt32(Index);

2972 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);	2857 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);

2973 _pextr(ExtractedElementR, SourceVectR, Mask);	2858 _pextr(ExtractedElementR, SourceVectR, Mask);

2974 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {	2859 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2975 // Use pshufd and movd/movss.	2860 // Use pshufd and movd/movss.

2976 Variable *T = nullptr;	2861 Variable *T = nullptr;

2977 if (Index) {	2862 if (Index) {

2978 // The shuffle only needs to occur if the element to be extracted	2863 // The shuffle only needs to occur if the element to be extracted

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3019 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);	2904 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);

3020 lowerCast(Cast);	2905 lowerCast(Cast);

3021 ExtractedElementR = T;	2906 ExtractedElementR = T;

3022 }	2907 }

3023	2908

3024 // Copy the element to the destination.	2909 // Copy the element to the destination.

3025 Variable *Dest = Inst->getDest();	2910 Variable *Dest = Inst->getDest();

3026 _mov(Dest, ExtractedElementR);	2911 _mov(Dest, ExtractedElementR);

3027 }	2912 }

3028	2913

3029 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {	2914 template <class Machine>

	2915 void TargetX86Base<Machine>::lowerFcmp(const InstFcmp *Inst) {

3030 Operand *Src0 = Inst->getSrc(0);	2916 Operand *Src0 = Inst->getSrc(0);

3031 Operand *Src1 = Inst->getSrc(1);	2917 Operand *Src1 = Inst->getSrc(1);

3032 Variable *Dest = Inst->getDest();	2918 Variable *Dest = Inst->getDest();

3033	2919

3034 if (isVectorType(Dest->getType())) {	2920 if (isVectorType(Dest->getType())) {

3035 InstFcmp::FCond Condition = Inst->getCondition();	2921 InstFcmp::FCond Condition = Inst->getCondition();

3036 size_t Index = static_cast<size_t>(Condition);	2922 size_t Index = static_cast<size_t>(Condition);

3037 assert(Index < TableFcmpSize);	2923 assert(Index < Traits::TableFcmpSize);

3038	2924

3039 if (TableFcmp[Index].SwapVectorOperands) {	2925 if (Traits::TableFcmp[Index].SwapVectorOperands) {

3040 Operand *T = Src0;	2926 Operand *T = Src0;

3041 Src0 = Src1;	2927 Src0 = Src1;

3042 Src1 = T;	2928 Src1 = T;

3043 }	2929 }

3044	2930

3045 Variable *T = nullptr;	2931 Variable *T = nullptr;

3046	2932

3047 if (Condition == InstFcmp::True) {	2933 if (Condition == InstFcmp::True) {

3048 // makeVectorOfOnes() requires an integer vector type.	2934 // makeVectorOfOnes() requires an integer vector type.

3049 T = makeVectorOfMinusOnes(IceType_v4i32);	2935 T = makeVectorOfMinusOnes(IceType_v4i32);

3050 } else if (Condition == InstFcmp::False) {	2936 } else if (Condition == InstFcmp::False) {

3051 T = makeVectorOfZeros(Dest->getType());	2937 T = makeVectorOfZeros(Dest->getType());

3052 } else {	2938 } else {

3053 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2939 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

3054 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2940 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

3055 if (llvm::isa<OperandX8632Mem>(Src1RM))	2941 if (llvm::isa<OperandX8632Mem>(Src1RM))

3056 Src1RM = legalizeToVar(Src1RM);	2942 Src1RM = legalizeToVar(Src1RM);

3057	2943

3058 switch (Condition) {	2944 switch (Condition) {

3059 default: {	2945 default: {

3060 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;	2946 CondX86::CmppsCond Predicate = Traits::TableFcmp[Index].Predicate;

3061 assert(Predicate != CondX86::Cmpps_Invalid);	2947 assert(Predicate != CondX86::Cmpps_Invalid);

3062 T = makeReg(Src0RM->getType());	2948 T = makeReg(Src0RM->getType());

3063 _movp(T, Src0RM);	2949 _movp(T, Src0RM);

3064 _cmpps(T, Src1RM, Predicate);	2950 _cmpps(T, Src1RM, Predicate);

3065 } break;	2951 } break;

3066 case InstFcmp::One: {	2952 case InstFcmp::One: {

3067 // Check both unequal and ordered.	2953 // Check both unequal and ordered.

3068 T = makeReg(Src0RM->getType());	2954 T = makeReg(Src0RM->getType());

3069 Variable *T2 = makeReg(Src0RM->getType());	2955 Variable *T2 = makeReg(Src0RM->getType());

3070 _movp(T, Src0RM);	2956 _movp(T, Src0RM);

(...skipping 28 matching lines...) Expand all Loading...
3099 // j<C2> label /* only if C2 != Br_None */	2985 // j<C2> label /* only if C2 != Br_None */

3100 // FakeUse(a) /* only if C1 != Br_None */	2986 // FakeUse(a) /* only if C1 != Br_None */

3101 // mov a, !<default> /* only if C1 != Br_None */	2987 // mov a, !<default> /* only if C1 != Br_None */

3102 // label: /* only if C1 != Br_None */	2988 // label: /* only if C1 != Br_None */

3103 //	2989 //

3104 // setcc lowering when C1 != Br_None && C2 == Br_None:	2990 // setcc lowering when C1 != Br_None && C2 == Br_None:

3105 // ucomiss b, c /* but swap b,c order if SwapOperands==true */	2991 // ucomiss b, c /* but swap b,c order if SwapOperands==true */

3106 // setcc a, C1	2992 // setcc a, C1

3107 InstFcmp::FCond Condition = Inst->getCondition();	2993 InstFcmp::FCond Condition = Inst->getCondition();

3108 size_t Index = static_cast<size_t>(Condition);	2994 size_t Index = static_cast<size_t>(Condition);

3109 assert(Index < TableFcmpSize);	2995 assert(Index < Traits::TableFcmpSize);

3110 if (TableFcmp[Index].SwapScalarOperands)	2996 if (Traits::TableFcmp[Index].SwapScalarOperands)

3111 std::swap(Src0, Src1);	2997 std::swap(Src0, Src1);

3112 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);	2998 bool HasC1 = (Traits::TableFcmp[Index].C1 != CondX86::Br_None);

3113 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);	2999 bool HasC2 = (Traits::TableFcmp[Index].C2 != CondX86::Br_None);

3114 if (HasC1) {	3000 if (HasC1) {

3115 Src0 = legalize(Src0);	3001 Src0 = legalize(Src0);

3116 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	3002 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

3117 Variable *T = nullptr;	3003 Variable *T = nullptr;

3118 _mov(T, Src0);	3004 _mov(T, Src0);

3119 _ucomiss(T, Src1RM);	3005 _ucomiss(T, Src1RM);

3120 if (!HasC2) {	3006 if (!HasC2) {

3121 assert(TableFcmp[Index].Default);	3007 assert(Traits::TableFcmp[Index].Default);

3122 _setcc(Dest, TableFcmp[Index].C1);	3008 _setcc(Dest, Traits::TableFcmp[Index].C1);

3123 return;	3009 return;

3124 }	3010 }

3125 }	3011 }

3126 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);	3012 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);

3127 _mov(Dest, Default);	3013 _mov(Dest, Default);

3128 if (HasC1) {	3014 if (HasC1) {

3129 InstX8632Label *Label = InstX8632Label::create(Func, this);	3015 InstX8632Label *Label = InstX8632Label::create(Func, this);

3130 _br(TableFcmp[Index].C1, Label);	3016 _br(Traits::TableFcmp[Index].C1, Label);

3131 if (HasC2) {	3017 if (HasC2) {

3132 _br(TableFcmp[Index].C2, Label);	3018 _br(Traits::TableFcmp[Index].C2, Label);

3133 }	3019 }

3134 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);	3020 Constant *NonDefault =

	3021 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);

3135 _mov_nonkillable(Dest, NonDefault);	3022 _mov_nonkillable(Dest, NonDefault);

3136 Context.insert(Label);	3023 Context.insert(Label);

3137 }	3024 }

3138 }	3025 }

3139	3026

3140 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {	3027 template <class Machine>

	3028 void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {

3141 Operand *Src0 = legalize(Inst->getSrc(0));	3029 Operand *Src0 = legalize(Inst->getSrc(0));

3142 Operand *Src1 = legalize(Inst->getSrc(1));	3030 Operand *Src1 = legalize(Inst->getSrc(1));

3143 Variable *Dest = Inst->getDest();	3031 Variable *Dest = Inst->getDest();

3144	3032

3145 if (isVectorType(Dest->getType())) {	3033 if (isVectorType(Dest->getType())) {

3146 Type Ty = Src0->getType();	3034 Type Ty = Src0->getType();

3147 // Promote i1 vectors to 128 bit integer vector types.	3035 // Promote i1 vectors to 128 bit integer vector types.

3148 if (typeElementType(Ty) == IceType_i1) {	3036 if (typeElementType(Ty) == IceType_i1) {

3149 Type NewTy = IceType_NUM;	3037 Type NewTy = IceType_NUM;

3150 switch (Ty) {	3038 switch (Ty) {

(...skipping 97 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3248	3136

3249 _movp(Dest, T);	3137 _movp(Dest, T);

3250 eliminateNextVectorSextInstruction(Dest);	3138 eliminateNextVectorSextInstruction(Dest);

3251 return;	3139 return;

3252 }	3140 }

3253	3141

3254 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:	3142 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

3255 if (Src0->getType() == IceType_i64) {	3143 if (Src0->getType() == IceType_i64) {

3256 InstIcmp::ICond Condition = Inst->getCondition();	3144 InstIcmp::ICond Condition = Inst->getCondition();

3257 size_t Index = static_cast<size_t>(Condition);	3145 size_t Index = static_cast<size_t>(Condition);

3258 assert(Index < TableIcmp64Size);	3146 assert(Index < Traits::TableIcmp64Size);

3259 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);	3147 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);

3260 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);	3148 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);

3261 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);	3149 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

3262 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);	3150 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

3263 Constant *Zero = Ctx->getConstantZero(IceType_i32);	3151 Constant *Zero = Ctx->getConstantZero(IceType_i32);

3264 Constant *One = Ctx->getConstantInt32(1);	3152 Constant *One = Ctx->getConstantInt32(1);

3265 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);	3153 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

3266 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);	3154 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

3267 _mov(Dest, One);	3155 _mov(Dest, One);

3268 _cmp(Src0HiRM, Src1HiRI);	3156 _cmp(Src0HiRM, Src1HiRI);

3269 if (TableIcmp64[Index].C1 != CondX86::Br_None)	3157 if (Traits::TableIcmp64[Index].C1 != CondX86::Br_None)

3270 _br(TableIcmp64[Index].C1, LabelTrue);	3158 _br(Traits::TableIcmp64[Index].C1, LabelTrue);

3271 if (TableIcmp64[Index].C2 != CondX86::Br_None)	3159 if (Traits::TableIcmp64[Index].C2 != CondX86::Br_None)

3272 _br(TableIcmp64[Index].C2, LabelFalse);	3160 _br(Traits::TableIcmp64[Index].C2, LabelFalse);

3273 _cmp(Src0LoRM, Src1LoRI);	3161 _cmp(Src0LoRM, Src1LoRI);

3274 _br(TableIcmp64[Index].C3, LabelTrue);	3162 _br(Traits::TableIcmp64[Index].C3, LabelTrue);

3275 Context.insert(LabelFalse);	3163 Context.insert(LabelFalse);

3276 _mov_nonkillable(Dest, Zero);	3164 _mov_nonkillable(Dest, Zero);

3277 Context.insert(LabelTrue);	3165 Context.insert(LabelTrue);

3278 return;	3166 return;

3279 }	3167 }

3280	3168

3281 // cmp b, c	3169 // cmp b, c

3282 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);	3170 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

3283 _cmp(Src0RM, Src1);	3171 _cmp(Src0RM, Src1);

3284 _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));	3172 _setcc(Dest, Traits::getIcmp32Mapping(Inst->getCondition()));

3285 }	3173 }

3286	3174

3287 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {	3175 template <class Machine>

	3176 void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {

3288 Operand *SourceVectNotLegalized = Inst->getSrc(0);	3177 Operand *SourceVectNotLegalized = Inst->getSrc(0);

3289 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);	3178 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);

3290 ConstantInteger32 *ElementIndex =	3179 ConstantInteger32 *ElementIndex =

3291 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));	3180 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));

3292 // Only constant indices are allowed in PNaCl IR.	3181 // Only constant indices are allowed in PNaCl IR.

3293 assert(ElementIndex);	3182 assert(ElementIndex);

3294 unsigned Index = ElementIndex->getValue();	3183 unsigned Index = ElementIndex->getValue();

3295 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));	3184 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));

3296	3185

3297 Type Ty = SourceVectNotLegalized->getType();	3186 Type Ty = SourceVectNotLegalized->getType();

3298 Type ElementTy = typeElementType(Ty);	3187 Type ElementTy = typeElementType(Ty);

3299 Type InVectorElementTy = getInVectorElementType(Ty);	3188 Type InVectorElementTy = Traits::getInVectorElementType(Ty);

3300	3189

3301 if (ElementTy == IceType_i1) {	3190 if (ElementTy == IceType_i1) {

3302 // Expand the element to the appropriate size for it to be inserted	3191 // Expand the element to the appropriate size for it to be inserted

3303 // in the vector.	3192 // in the vector.

3304 Variable *Expanded = Func->makeVariable(InVectorElementTy);	3193 Variable *Expanded = Func->makeVariable(InVectorElementTy);

3305 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,	3194 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,

3306 ElementToInsertNotLegalized);	3195 ElementToInsertNotLegalized);

3307 lowerCast(Cast);	3196 lowerCast(Cast);

3308 ElementToInsertNotLegalized = Expanded;	3197 ElementToInsertNotLegalized = Expanded;

3309 }	3198 }

3310	3199

3311 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1) {	3200 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\|

	3201 InstructionSet >= Machine::SSE4_1) {

3312 // Use insertps, pinsrb, pinsrw, or pinsrd.	3202 // Use insertps, pinsrb, pinsrw, or pinsrd.

3313 Operand *ElementRM =	3203 Operand *ElementRM =

3314 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);	3204 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

3315 Operand *SourceVectRM =	3205 Operand *SourceVectRM =

3316 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);	3206 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

3317 Variable *T = makeReg(Ty);	3207 Variable *T = makeReg(Ty);

3318 _movp(T, SourceVectRM);	3208 _movp(T, SourceVectRM);

3319 if (Ty == IceType_v4f32)	3209 if (Ty == IceType_v4f32)

3320 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));	3210 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));

3321 else	3211 else

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3400 OperandX8632Mem *Loc =	3290 OperandX8632Mem *Loc =

3401 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);	3291 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);

3402 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);	3292 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);

3403	3293

3404 Variable *T = makeReg(Ty);	3294 Variable *T = makeReg(Ty);

3405 _movp(T, Slot);	3295 _movp(T, Slot);

3406 _movp(Inst->getDest(), T);	3296 _movp(Inst->getDest(), T);

3407 }	3297 }

3408 }	3298 }

3409	3299

3410 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {	3300 template <class Machine>

	3301 void TargetX86Base<Machine>::lowerIntrinsicCall(

	3302 const InstIntrinsicCall *Instr) {

3411 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {	3303 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {

3412 case Intrinsics::AtomicCmpxchg: {	3304 case Intrinsics::AtomicCmpxchg: {

3413 if (!Intrinsics::isMemoryOrderValid(	3305 if (!Intrinsics::isMemoryOrderValid(

3414 ID, getConstantMemoryOrder(Instr->getArg(3)),	3306 ID, getConstantMemoryOrder(Instr->getArg(3)),

3415 getConstantMemoryOrder(Instr->getArg(4)))) {	3307 getConstantMemoryOrder(Instr->getArg(4)))) {

3416 Func->setError("Unexpected memory ordering for AtomicCmpxchg");	3308 Func->setError("Unexpected memory ordering for AtomicCmpxchg");

3417 return;	3309 return;

3418 }	3310 }

3419 Variable *DestPrev = Instr->getDest();	3311 Variable *DestPrev = Instr->getDest();

3420 Operand *PtrToMem = Instr->getArg(0);	3312 Operand *PtrToMem = Instr->getArg(0);

(...skipping 82 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3503 Context.insert(	3395 Context.insert(

3504 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));	3396 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

3505 return;	3397 return;

3506 }	3398 }

3507 case Intrinsics::AtomicRMW:	3399 case Intrinsics::AtomicRMW:

3508 if (!Intrinsics::isMemoryOrderValid(	3400 if (!Intrinsics::isMemoryOrderValid(

3509 ID, getConstantMemoryOrder(Instr->getArg(3)))) {	3401 ID, getConstantMemoryOrder(Instr->getArg(3)))) {

3510 Func->setError("Unexpected memory ordering for AtomicRMW");	3402 Func->setError("Unexpected memory ordering for AtomicRMW");

3511 return;	3403 return;

3512 }	3404 }

3513 lowerAtomicRMW(	3405 lowerAtomicRMW(Instr->getDest(),

3514 Instr->getDest(),	3406 static_cast<uint32_t>(llvm::cast<ConstantInteger32>(

3515 static_cast<uint32_t>(	3407 Instr->getArg(0))->getValue()),

3516 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),	3408 Instr->getArg(1), Instr->getArg(2));

3517 Instr->getArg(1), Instr->getArg(2));

3518 return;	3409 return;

3519 case Intrinsics::AtomicStore: {	3410 case Intrinsics::AtomicStore: {

3520 if (!Intrinsics::isMemoryOrderValid(	3411 if (!Intrinsics::isMemoryOrderValid(

3521 ID, getConstantMemoryOrder(Instr->getArg(2)))) {	3412 ID, getConstantMemoryOrder(Instr->getArg(2)))) {

3522 Func->setError("Unexpected memory ordering for AtomicStore");	3413 Func->setError("Unexpected memory ordering for AtomicStore");

3523 return;	3414 return;

3524 }	3415 }

3525 // We require the memory address to be naturally aligned.	3416 // We require the memory address to be naturally aligned.

3526 // Given that is the case, then normal stores are atomic.	3417 // Given that is the case, then normal stores are atomic.

3527 // Add a fence after the store to make it visible.	3418 // Add a fence after the store to make it visible.

(...skipping 205 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3733 case Intrinsics::Trap:	3624 case Intrinsics::Trap:

3734 _ud2();	3625 _ud2();

3735 return;	3626 return;

3736 case Intrinsics::UnknownIntrinsic:	3627 case Intrinsics::UnknownIntrinsic:

3737 Func->setError("Should not be lowering UnknownIntrinsic");	3628 Func->setError("Should not be lowering UnknownIntrinsic");

3738 return;	3629 return;

3739 }	3630 }

3740 return;	3631 return;

3741 }	3632 }

3742	3633

3743 void TargetX8632::lowerAtomicCmpxchg(Variable DestPrev, Operand Ptr,	3634 template <class Machine>

3744 Operand Expected, Operand Desired) {	3635 void TargetX86Base<Machine>::lowerAtomicCmpxchg(Variable *DestPrev,

	3636 Operand Ptr, Operand Expected,

	3637 Operand *Desired) {

3745 if (Expected->getType() == IceType_i64) {	3638 if (Expected->getType() == IceType_i64) {

3746 // Reserve the pre-colored registers first, before adding any more	3639 // Reserve the pre-colored registers first, before adding any more

3747 // infinite-weight variables from formMemoryOperand's legalization.	3640 // infinite-weight variables from formMemoryOperand's legalization.

3748 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);	3641 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

3749 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);	3642 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);

3750 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);	3643 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

3751 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);	3644 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);

3752 _mov(T_eax, loOperand(Expected));	3645 _mov(T_eax, loOperand(Expected));

3753 _mov(T_edx, hiOperand(Expected));	3646 _mov(T_edx, hiOperand(Expected));

3754 _mov(T_ebx, loOperand(Desired));	3647 _mov(T_ebx, loOperand(Desired));

3755 _mov(T_ecx, hiOperand(Desired));	3648 _mov(T_ecx, hiOperand(Desired));

3756 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());	3649 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());

3757 const bool Locked = true;	3650 const bool Locked = true;

3758 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3651 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3759 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));	3652 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));

3760 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));	3653 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));

3761 _mov(DestLo, T_eax);	3654 _mov(DestLo, T_eax);

3762 _mov(DestHi, T_edx);	3655 _mov(DestHi, T_edx);

3763 return;	3656 return;

3764 }	3657 }

3765 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);	3658 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);

3766 _mov(T_eax, Expected);	3659 _mov(T_eax, Expected);

3767 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());	3660 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());

3768 Variable *DesiredReg = legalizeToVar(Desired);	3661 Variable *DesiredReg = legalizeToVar(Desired);

3769 const bool Locked = true;	3662 const bool Locked = true;

3770 _cmpxchg(Addr, T_eax, DesiredReg, Locked);	3663 _cmpxchg(Addr, T_eax, DesiredReg, Locked);

3771 _mov(DestPrev, T_eax);	3664 _mov(DestPrev, T_eax);

3772 }	3665 }

3773	3666

3774 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable Dest, Operand PtrToMem,	3667 template <class Machine>

3775 Operand *Expected,	3668 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,

3776 Operand *Desired) {	3669 Operand *PtrToMem,

	3670 Operand *Expected,

	3671 Operand *Desired) {

3777 if (Ctx->getFlags().getOptLevel() == Opt_m1)	3672 if (Ctx->getFlags().getOptLevel() == Opt_m1)

3778 return false;	3673 return false;

3779 // Peek ahead a few instructions and see how Dest is used.	3674 // Peek ahead a few instructions and see how Dest is used.

3780 // It's very common to have:	3675 // It's very common to have:

3781 //	3676 //

3782 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)	3677 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)

3783 // [%y_phi = ...] // list of phi stores	3678 // [%y_phi = ...] // list of phi stores

3784 // %p = icmp eq i32 %x, %expected	3679 // %p = icmp eq i32 %x, %expected

3785 // br i1 %p, label %l1, label %l2	3680 // br i1 %p, label %l1, label %l2

3786 //	3681 //

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3837 NextBr->setDeleted();	3732 NextBr->setDeleted();

3838 Context.advanceNext();	3733 Context.advanceNext();

3839 Context.advanceNext();	3734 Context.advanceNext();

3840 return true;	3735 return true;

3841 }	3736 }

3842 }	3737 }

3843 }	3738 }

3844 return false;	3739 return false;

3845 }	3740 }

3846	3741

3847 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,	3742 template <class Machine>

3848 Operand Ptr, Operand Val) {	3743 void TargetX86Base<Machine>::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

	3744 Operand Ptr, Operand Val) {

3849 bool NeedsCmpxchg = false;	3745 bool NeedsCmpxchg = false;

3850 LowerBinOp Op_Lo = nullptr;	3746 LowerBinOp Op_Lo = nullptr;

3851 LowerBinOp Op_Hi = nullptr;	3747 LowerBinOp Op_Hi = nullptr;

3852 switch (Operation) {	3748 switch (Operation) {

3853 default:	3749 default:

3854 Func->setError("Unknown AtomicRMW operation");	3750 Func->setError("Unknown AtomicRMW operation");

3855 return;	3751 return;

3856 case Intrinsics::AtomicAdd: {	3752 case Intrinsics::AtomicAdd: {

3857 if (Dest->getType() == IceType_i64) {	3753 if (Dest->getType() == IceType_i64) {

3858 // All the fall-through paths must set this to true, but use this	3754 // All the fall-through paths must set this to true, but use this

3859 // for asserting.	3755 // for asserting.

3860 NeedsCmpxchg = true;	3756 NeedsCmpxchg = true;

3861 Op_Lo = &TargetX8632::_add;	3757 Op_Lo = &TargetX86Base<Machine>::_add;

3862 Op_Hi = &TargetX8632::_adc;	3758 Op_Hi = &TargetX86Base<Machine>::_adc;

3863 break;	3759 break;

3864 }	3760 }

3865 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());	3761 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

3866 const bool Locked = true;	3762 const bool Locked = true;

3867 Variable *T = nullptr;	3763 Variable *T = nullptr;

3868 _mov(T, Val);	3764 _mov(T, Val);

3869 _xadd(Addr, T, Locked);	3765 _xadd(Addr, T, Locked);

3870 _mov(Dest, T);	3766 _mov(Dest, T);

3871 return;	3767 return;

3872 }	3768 }

3873 case Intrinsics::AtomicSub: {	3769 case Intrinsics::AtomicSub: {

3874 if (Dest->getType() == IceType_i64) {	3770 if (Dest->getType() == IceType_i64) {

3875 NeedsCmpxchg = true;	3771 NeedsCmpxchg = true;

3876 Op_Lo = &TargetX8632::_sub;	3772 Op_Lo = &TargetX86Base<Machine>::_sub;

3877 Op_Hi = &TargetX8632::_sbb;	3773 Op_Hi = &TargetX86Base<Machine>::_sbb;

3878 break;	3774 break;

3879 }	3775 }

3880 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());	3776 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

3881 const bool Locked = true;	3777 const bool Locked = true;

3882 Variable *T = nullptr;	3778 Variable *T = nullptr;

3883 _mov(T, Val);	3779 _mov(T, Val);

3884 _neg(T);	3780 _neg(T);

3885 _xadd(Addr, T, Locked);	3781 _xadd(Addr, T, Locked);

3886 _mov(Dest, T);	3782 _mov(Dest, T);

3887 return;	3783 return;

3888 }	3784 }

3889 case Intrinsics::AtomicOr:	3785 case Intrinsics::AtomicOr:

3890 // TODO(jvoung): If Dest is null or dead, then some of these	3786 // TODO(jvoung): If Dest is null or dead, then some of these

3891 // operations do not need an "exchange", but just a locked op.	3787 // operations do not need an "exchange", but just a locked op.

3892 // That appears to be "worth" it for sub, or, and, and xor.	3788 // That appears to be "worth" it for sub, or, and, and xor.

3893 // xadd is probably fine vs lock add for add, and xchg is fine	3789 // xadd is probably fine vs lock add for add, and xchg is fine

3894 // vs an atomic store.	3790 // vs an atomic store.

3895 NeedsCmpxchg = true;	3791 NeedsCmpxchg = true;

3896 Op_Lo = &TargetX8632::_or;	3792 Op_Lo = &TargetX86Base<Machine>::_or;

3897 Op_Hi = &TargetX8632::_or;	3793 Op_Hi = &TargetX86Base<Machine>::_or;

3898 break;	3794 break;

3899 case Intrinsics::AtomicAnd:	3795 case Intrinsics::AtomicAnd:

3900 NeedsCmpxchg = true;	3796 NeedsCmpxchg = true;

3901 Op_Lo = &TargetX8632::_and;	3797 Op_Lo = &TargetX86Base<Machine>::_and;

3902 Op_Hi = &TargetX8632::_and;	3798 Op_Hi = &TargetX86Base<Machine>::_and;

3903 break;	3799 break;

3904 case Intrinsics::AtomicXor:	3800 case Intrinsics::AtomicXor:

3905 NeedsCmpxchg = true;	3801 NeedsCmpxchg = true;

3906 Op_Lo = &TargetX8632::_xor;	3802 Op_Lo = &TargetX86Base<Machine>::_xor;

3907 Op_Hi = &TargetX8632::_xor;	3803 Op_Hi = &TargetX86Base<Machine>::_xor;

3908 break;	3804 break;

3909 case Intrinsics::AtomicExchange:	3805 case Intrinsics::AtomicExchange:

3910 if (Dest->getType() == IceType_i64) {	3806 if (Dest->getType() == IceType_i64) {

3911 NeedsCmpxchg = true;	3807 NeedsCmpxchg = true;

3912 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values	3808 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values

3913 // just need to be moved to the ecx and ebx registers.	3809 // just need to be moved to the ecx and ebx registers.

3914 Op_Lo = nullptr;	3810 Op_Lo = nullptr;

3915 Op_Hi = nullptr;	3811 Op_Hi = nullptr;

3916 break;	3812 break;

3917 }	3813 }

3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());	3814 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

3919 Variable *T = nullptr;	3815 Variable *T = nullptr;

3920 _mov(T, Val);	3816 _mov(T, Val);

3921 _xchg(Addr, T);	3817 _xchg(Addr, T);

3922 _mov(Dest, T);	3818 _mov(Dest, T);

3923 return;	3819 return;

3924 }	3820 }

3925 // Otherwise, we need a cmpxchg loop.	3821 // Otherwise, we need a cmpxchg loop.

3926 (void)NeedsCmpxchg;	3822 (void)NeedsCmpxchg;

3927 assert(NeedsCmpxchg);	3823 assert(NeedsCmpxchg);

3928 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);	3824 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);

3929 }	3825 }

3930	3826

3931 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,	3827 template <class Machine>

3932 Variable Dest, Operand Ptr,	3828 void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo,

3933 Operand *Val) {	3829 LowerBinOp Op_Hi,

	3830 Variable *Dest,

	3831 Operand *Ptr,

	3832 Operand *Val) {

3934 // Expand a more complex RMW operation as a cmpxchg loop:	3833 // Expand a more complex RMW operation as a cmpxchg loop:

3935 // For 64-bit:	3834 // For 64-bit:

3936 // mov eax, [ptr]	3835 // mov eax, [ptr]

3937 // mov edx, [ptr + 4]	3836 // mov edx, [ptr + 4]

3938 // .LABEL:	3837 // .LABEL:

3939 // mov ebx, eax	3838 // mov ebx, eax

3940 // <Op_Lo> ebx, <desired_adj_lo>	3839 // <Op_Lo> ebx, <desired_adj_lo>

3941 // mov ecx, edx	3840 // mov ecx, edx

3942 // <Op_Hi> ecx, <desired_adj_hi>	3841 // <Op_Hi> ecx, <desired_adj_hi>

3943 // lock cmpxchg8b [ptr]	3842 // lock cmpxchg8b [ptr]

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4028 // The address base (if any) is also reused in the loop.	3927 // The address base (if any) is also reused in the loop.

4029 if (Variable *Base = Addr->getBase())	3928 if (Variable *Base = Addr->getBase())

4030 Context.insert(InstFakeUse::create(Func, Base));	3929 Context.insert(InstFakeUse::create(Func, Base));

4031 _mov(Dest, T_eax);	3930 _mov(Dest, T_eax);

4032 }	3931 }

4033	3932

4034 // Lowers count {trailing, leading} zeros intrinsic.	3933 // Lowers count {trailing, leading} zeros intrinsic.

4035 //	3934 //

4036 // We could do constant folding here, but that should have	3935 // We could do constant folding here, but that should have

4037 // been done by the front-end/middle-end optimizations.	3936 // been done by the front-end/middle-end optimizations.

4038 void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,	3937 template <class Machine>

4039 Operand FirstVal, Operand SecondVal) {	3938 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,

	3939 Operand *FirstVal,

	3940 Operand *SecondVal) {

4040 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).	3941 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).

4041 // Then the instructions will handle the Val == 0 case much more simply	3942 // Then the instructions will handle the Val == 0 case much more simply

4042 // and won't require conversion from bit position to number of zeros.	3943 // and won't require conversion from bit position to number of zeros.

4043 //	3944 //

4044 // Otherwise:	3945 // Otherwise:

4045 // bsr IF_NOT_ZERO, Val	3946 // bsr IF_NOT_ZERO, Val

4046 // mov T_DEST, 63	3947 // mov T_DEST, 63

4047 // cmovne T_DEST, IF_NOT_ZERO	3948 // cmovne T_DEST, IF_NOT_ZERO

4048 // xor T_DEST, 31	3949 // xor T_DEST, 31

4049 // mov DEST, T_DEST	3950 // mov DEST, T_DEST

(...skipping 50 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4100 } else {	4001 } else {

4101 _bsr(T_Dest2, SecondVar);	4002 _bsr(T_Dest2, SecondVar);

4102 _xor(T_Dest2, ThirtyOne);	4003 _xor(T_Dest2, ThirtyOne);

4103 }	4004 }

4104 _test(SecondVar, SecondVar);	4005 _test(SecondVar, SecondVar);

4105 _cmov(T_Dest2, T_Dest, CondX86::Br_e);	4006 _cmov(T_Dest2, T_Dest, CondX86::Br_e);

4106 _mov(DestLo, T_Dest2);	4007 _mov(DestLo, T_Dest2);

4107 _mov(DestHi, Ctx->getConstantZero(IceType_i32));	4008 _mov(DestHi, Ctx->getConstantZero(IceType_i32));

4108 }	4009 }

4109	4010

4110 namespace {

4111

4112 bool isAdd(const Inst *Inst) {	4011 bool isAdd(const Inst *Inst) {

4113 if (const InstArithmetic *Arith =	4012 if (const InstArithmetic *Arith =

4114 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {	4013 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {

4115 return (Arith->getOp() == InstArithmetic::Add);	4014 return (Arith->getOp() == InstArithmetic::Add);

4116 }	4015 }

4117 return false;	4016 return false;

4118 }	4017 }

4119	4018

4120 void dumpAddressOpt(const Cfg Func, const Variable Base,	4019 void dumpAddressOpt(const Cfg Func, const Variable Base,

4121 const Variable *Index, uint16_t Shift, int32_t Offset,	4020 const Variable *Index, uint16_t Shift, int32_t Offset,

(...skipping 220 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4342 // set Index=Var, Offset+=(Const<<Shift)	4241 // set Index=Var, Offset+=(Const<<Shift)

4343	4242

4344 // Index is Index=Var-Const ==>	4243 // Index is Index=Var-Const ==>

4345 // set Index=Var, Offset-=(Const<<Shift)	4244 // set Index=Var, Offset-=(Const<<Shift)

4346	4245

4347 // TODO: consider overflow issues with respect to Offset.	4246 // TODO: consider overflow issues with respect to Offset.

4348 // TODO: handle symbolic constants.	4247 // TODO: handle symbolic constants.

4349 }	4248 }

4350 }	4249 }

4351	4250

4352 } // anonymous namespace	4251 template <class Machine>

4353	4252 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {

4354 void TargetX8632::lowerLoad(const InstLoad *Load) {

4355 // A Load instruction can be treated the same as an Assign	4253 // A Load instruction can be treated the same as an Assign

4356 // instruction, after the source operand is transformed into an	4254 // instruction, after the source operand is transformed into an

4357 // OperandX8632Mem operand. Note that the address mode	4255 // OperandX8632Mem operand. Note that the address mode

4358 // optimization already creates an OperandX8632Mem operand, so it	4256 // optimization already creates an OperandX8632Mem operand, so it

4359 // doesn't need another level of transformation.	4257 // doesn't need another level of transformation.

4360 Variable *DestLoad = Load->getDest();	4258 Variable *DestLoad = Load->getDest();

4361 Type Ty = DestLoad->getType();	4259 Type Ty = DestLoad->getType();

4362 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);	4260 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);

4363 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);	4261 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);

4364 lowerAssign(Assign);	4262 lowerAssign(Assign);

4365 }	4263 }

4366	4264

4367 void TargetX8632::doAddressOptLoad() {	4265 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {

4368 Inst *Inst = Context.getCur();	4266 Inst *Inst = Context.getCur();

4369 Variable *Dest = Inst->getDest();	4267 Variable *Dest = Inst->getDest();

4370 Operand *Addr = Inst->getSrc(0);	4268 Operand *Addr = Inst->getSrc(0);

4371 Variable *Index = nullptr;	4269 Variable *Index = nullptr;

4372 uint16_t Shift = 0;	4270 uint16_t Shift = 0;

4373 int32_t Offset = 0; // TODO: make Constant	4271 int32_t Offset = 0; // TODO: make Constant

4374 // Vanilla ICE load instructions should not use the segment registers,	4272 // Vanilla ICE load instructions should not use the segment registers,

4375 // and computeAddressOpt only works at the level of Variables and Constants,	4273 // and computeAddressOpt only works at the level of Variables and Constants,

4376 // not other OperandX8632Mem, so there should be no mention of segment	4274 // not other OperandX8632Mem, so there should be no mention of segment

4377 // registers there either.	4275 // registers there either.

4378 const OperandX8632Mem::SegmentRegisters SegmentReg =	4276 const OperandX8632Mem::SegmentRegisters SegmentReg =

4379 OperandX8632Mem::DefaultSegment;	4277 OperandX8632Mem::DefaultSegment;

4380 Variable *Base = llvm::dyn_cast<Variable>(Addr);	4278 Variable *Base = llvm::dyn_cast<Variable>(Addr);

4381 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);	4279 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

4382 if (Base && Addr != Base) {	4280 if (Base && Addr != Base) {

4383 Inst->setDeleted();	4281 Inst->setDeleted();

4384 Constant *OffsetOp = Ctx->getConstantInt32(Offset);	4282 Constant *OffsetOp = Ctx->getConstantInt32(Offset);

4385 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,	4283 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,

4386 Shift, SegmentReg);	4284 Shift, SegmentReg);

4387 Context.insert(InstLoad::create(Func, Dest, Addr));	4285 Context.insert(InstLoad::create(Func, Dest, Addr));

4388 }	4286 }

4389 }	4287 }

4390	4288

4391 void TargetX8632::randomlyInsertNop(float Probability) {	4289 template <class Machine>

	4290 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {

4392 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());	4291 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());

4393 if (RNG.getTrueWithProbability(Probability)) {	4292 if (RNG.getTrueWithProbability(Probability)) {

4394 _nop(RNG(X86_NUM_NOP_VARIANTS));	4293 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));

4395 }	4294 }

4396 }	4295 }

4397	4296

4398 void TargetX8632::lowerPhi(const InstPhi * /Inst/) {	4297 template <class Machine>

	4298 void TargetX86Base<Machine>::lowerPhi(const InstPhi * /Inst/) {

4399 Func->setError("Phi found in regular instruction list");	4299 Func->setError("Phi found in regular instruction list");

4400 }	4300 }

4401	4301

4402 void TargetX8632::lowerRet(const InstRet *Inst) {	4302 template <class Machine>

	4303 void TargetX86Base<Machine>::lowerRet(const InstRet *Inst) {

4403 Variable *Reg = nullptr;	4304 Variable *Reg = nullptr;

4404 if (Inst->hasRetValue()) {	4305 if (Inst->hasRetValue()) {

4405 Operand *Src0 = legalize(Inst->getRetValue());	4306 Operand *Src0 = legalize(Inst->getRetValue());

4406 if (Src0->getType() == IceType_i64) {	4307 if (Src0->getType() == IceType_i64) {

4407 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);	4308 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);

4408 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);	4309 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);

4409 Reg = eax;	4310 Reg = eax;

4410 Context.insert(InstFakeUse::create(Func, edx));	4311 Context.insert(InstFakeUse::create(Func, edx));

4411 } else if (isScalarFloatingType(Src0->getType())) {	4312 } else if (isScalarFloatingType(Src0->getType())) {

4412 _fld(Src0);	4313 _fld(Src0);

4413 } else if (isVectorType(Src0->getType())) {	4314 } else if (isVectorType(Src0->getType())) {

4414 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);	4315 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);

4415 } else {	4316 } else {

4416 _mov(Reg, Src0, RegX8632::Reg_eax);	4317 _mov(Reg, Src0, RegX8632::Reg_eax);

4417 }	4318 }

4418 }	4319 }

4419 // Add a ret instruction even if sandboxing is enabled, because	4320 // Add a ret instruction even if sandboxing is enabled, because

4420 // addEpilog explicitly looks for a ret instruction as a marker for	4321 // addEpilog explicitly looks for a ret instruction as a marker for

4421 // where to insert the frame removal instructions.	4322 // where to insert the frame removal instructions.

4422 _ret(Reg);	4323 _ret(Reg);

4423 // Add a fake use of esp to make sure esp stays alive for the entire	4324 // Add a fake use of esp to make sure esp stays alive for the entire

4424 // function. Otherwise post-call esp adjustments get dead-code	4325 // function. Otherwise post-call esp adjustments get dead-code

4425 // eliminated. TODO: Are there more places where the fake use	4326 // eliminated. TODO: Are there more places where the fake use

4426 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not	4327 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

4427 // have a ret instruction.	4328 // have a ret instruction.

4428 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);	4329 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

4429 Context.insert(InstFakeUse::create(Func, esp));	4330 Context.insert(InstFakeUse::create(Func, esp));

4430 }	4331 }

4431	4332

4432 void TargetX8632::lowerSelect(const InstSelect *Inst) {	4333 template <class Machine>

	4334 void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {

4433 Variable *Dest = Inst->getDest();	4335 Variable *Dest = Inst->getDest();

4434 Type DestTy = Dest->getType();	4336 Type DestTy = Dest->getType();

4435 Operand *SrcT = Inst->getTrueOperand();	4337 Operand *SrcT = Inst->getTrueOperand();

4436 Operand *SrcF = Inst->getFalseOperand();	4338 Operand *SrcF = Inst->getFalseOperand();

4437 Operand *Condition = Inst->getCondition();	4339 Operand *Condition = Inst->getCondition();

4438	4340

4439 if (isVectorType(DestTy)) {	4341 if (isVectorType(DestTy)) {

4440 Type SrcTy = SrcT->getType();	4342 Type SrcTy = SrcT->getType();

4441 Variable *T = makeReg(SrcTy);	4343 Variable *T = makeReg(SrcTy);

4442 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);	4344 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);

4443 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);	4345 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);

4444 if (InstructionSet >= SSE4_1) {	4346 if (InstructionSet >= Machine::SSE4_1) {

4445 // TODO(wala): If the condition operand is a constant, use blendps	4347 // TODO(wala): If the condition operand is a constant, use blendps

4446 // or pblendw.	4348 // or pblendw.

4447 //	4349 //

4448 // Use blendvps or pblendvb to implement select.	4350 // Use blendvps or pblendvb to implement select.

4449 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	4351 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

4450 SrcTy == IceType_v4f32) {	4352 SrcTy == IceType_v4f32) {

4451 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	4353 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

4452 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);	4354 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);

4453 _movp(xmm0, ConditionRM);	4355 _movp(xmm0, ConditionRM);

4454 _psll(xmm0, Ctx->getConstantInt8(31));	4356 _psll(xmm0, Ctx->getConstantInt8(31));

4455 _movp(T, SrcFRM);	4357 _movp(T, SrcFRM);

4456 _blendvps(T, SrcTRM, xmm0);	4358 _blendvps(T, SrcTRM, xmm0);

4457 _movp(Dest, T);	4359 _movp(Dest, T);

4458 } else {	4360 } else {

4459 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);	4361 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

4460 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16	4362 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

4461 : IceType_v16i8;	4363 : IceType_v16i8;

4462 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);	4364 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);

4463 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));	4365 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

4464 _movp(T, SrcFRM);	4366 _movp(T, SrcFRM);

4465 _pblendvb(T, SrcTRM, xmm0);	4367 _pblendvb(T, SrcTRM, xmm0);

4466 _movp(Dest, T);	4368 _movp(Dest, T);

4467 }	4369 }

4468 return;	4370 return;

4469 }	4371 }

4470 // Lower select without SSE4.1:	4372 // Lower select without Machine::SSE4.1:

4471 // a=d?b:c ==>	4373 // a=d?b:c ==>

4472 // if elementtype(d) != i1:	4374 // if elementtype(d) != i1:

4473 // d=sext(d);	4375 // d=sext(d);

4474 // a=(b&d)\|(c&~d);	4376 // a=(b&d)\|(c&~d);

4475 Variable *T2 = makeReg(SrcTy);	4377 Variable *T2 = makeReg(SrcTy);

4476 // Sign extend the condition operand if applicable.	4378 // Sign extend the condition operand if applicable.

4477 if (SrcTy == IceType_v4f32) {	4379 if (SrcTy == IceType_v4f32) {

4478 // The sext operation takes only integer arguments.	4380 // The sext operation takes only integer arguments.

4479 Variable *T3 = Func->makeVariable(IceType_v4i32);	4381 Variable *T3 = Func->makeVariable(IceType_v4i32);

4480 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));	4382 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));

(...skipping 17 matching lines...) Expand all Loading...
4498 Operand *CmpOpnd0 = nullptr;	4400 Operand *CmpOpnd0 = nullptr;

4499 Operand *CmpOpnd1 = nullptr;	4401 Operand *CmpOpnd1 = nullptr;

4500 // Handle folding opportunities.	4402 // Handle folding opportunities.

4501 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {	4403 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {

4502 assert(Producer->isDeleted());	4404 assert(Producer->isDeleted());

4503 switch (BoolFolding::getProducerKind(Producer)) {	4405 switch (BoolFolding::getProducerKind(Producer)) {

4504 default:	4406 default:

4505 break;	4407 break;

4506 case BoolFolding::PK_Icmp32: {	4408 case BoolFolding::PK_Icmp32: {

4507 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);	4409 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);

4508 Cond = getIcmp32Mapping(Cmp->getCondition());	4410 Cond = Traits::getIcmp32Mapping(Cmp->getCondition());

4509 CmpOpnd1 = legalize(Producer->getSrc(1));	4411 CmpOpnd1 = legalize(Producer->getSrc(1));

4510 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);	4412 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);

4511 } break;	4413 } break;

4512 }	4414 }

4513 }	4415 }

4514 if (CmpOpnd0 == nullptr) {	4416 if (CmpOpnd0 == nullptr) {

4515 CmpOpnd0 = legalize(Condition, Legal_Reg \| Legal_Mem);	4417 CmpOpnd0 = legalize(Condition, Legal_Reg \| Legal_Mem);

4516 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);	4418 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);

4517 }	4419 }

4518 assert(CmpOpnd0);	4420 assert(CmpOpnd0);

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4562	4464

4563 assert(DestTy == IceType_i16 \|\| DestTy == IceType_i32);	4465 assert(DestTy == IceType_i16 \|\| DestTy == IceType_i32);

4564 Variable *T = nullptr;	4466 Variable *T = nullptr;

4565 SrcF = legalize(SrcF);	4467 SrcF = legalize(SrcF);

4566 _mov(T, SrcF);	4468 _mov(T, SrcF);

4567 SrcT = legalize(SrcT, Legal_Reg \| Legal_Mem);	4469 SrcT = legalize(SrcT, Legal_Reg \| Legal_Mem);

4568 _cmov(T, SrcT, Cond);	4470 _cmov(T, SrcT, Cond);

4569 _mov(Dest, T);	4471 _mov(Dest, T);

4570 }	4472 }

4571	4473

4572 void TargetX8632::lowerStore(const InstStore *Inst) {	4474 template <class Machine>

	4475 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {

4573 Operand *Value = Inst->getData();	4476 Operand *Value = Inst->getData();

4574 Operand *Addr = Inst->getAddr();	4477 Operand *Addr = Inst->getAddr();

4575 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());	4478 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());

4576 Type Ty = NewAddr->getType();	4479 Type Ty = NewAddr->getType();

4577	4480

4578 if (Ty == IceType_i64) {	4481 if (Ty == IceType_i64) {

4579 Value = legalize(Value);	4482 Value = legalize(Value);

4580 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm);	4483 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm);

4581 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm);	4484 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm);

4582 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));	4485 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));

4583 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));	4486 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));

4584 } else if (isVectorType(Ty)) {	4487 } else if (isVectorType(Ty)) {

4585 _storep(legalizeToVar(Value), NewAddr);	4488 _storep(legalizeToVar(Value), NewAddr);

4586 } else {	4489 } else {

4587 Value = legalize(Value, Legal_Reg \| Legal_Imm);	4490 Value = legalize(Value, Legal_Reg \| Legal_Imm);

4588 _store(Value, NewAddr);	4491 _store(Value, NewAddr);

4589 }	4492 }

4590 }	4493 }

4591	4494

4592 void TargetX8632::doAddressOptStore() {	4495 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {

4593 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());	4496 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());

4594 Operand *Data = Inst->getData();	4497 Operand *Data = Inst->getData();

4595 Operand *Addr = Inst->getAddr();	4498 Operand *Addr = Inst->getAddr();

4596 Variable *Index = nullptr;	4499 Variable *Index = nullptr;

4597 uint16_t Shift = 0;	4500 uint16_t Shift = 0;

4598 int32_t Offset = 0; // TODO: make Constant	4501 int32_t Offset = 0; // TODO: make Constant

4599 Variable *Base = llvm::dyn_cast<Variable>(Addr);	4502 Variable *Base = llvm::dyn_cast<Variable>(Addr);

4600 // Vanilla ICE store instructions should not use the segment registers,	4503 // Vanilla ICE store instructions should not use the segment registers,

4601 // and computeAddressOpt only works at the level of Variables and Constants,	4504 // and computeAddressOpt only works at the level of Variables and Constants,

4602 // not other OperandX8632Mem, so there should be no mention of segment	4505 // not other OperandX8632Mem, so there should be no mention of segment

4603 // registers there either.	4506 // registers there either.

4604 const OperandX8632Mem::SegmentRegisters SegmentReg =	4507 const OperandX8632Mem::SegmentRegisters SegmentReg =

4605 OperandX8632Mem::DefaultSegment;	4508 OperandX8632Mem::DefaultSegment;

4606 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);	4509 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

4607 if (Base && Addr != Base) {	4510 if (Base && Addr != Base) {

4608 Inst->setDeleted();	4511 Inst->setDeleted();

4609 Constant *OffsetOp = Ctx->getConstantInt32(Offset);	4512 Constant *OffsetOp = Ctx->getConstantInt32(Offset);

4610 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,	4513 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,

4611 Shift, SegmentReg);	4514 Shift, SegmentReg);

4612 InstStore *NewStore = InstStore::create(Func, Data, Addr);	4515 InstStore *NewStore = InstStore::create(Func, Data, Addr);

4613 if (Inst->getDest())	4516 if (Inst->getDest())

4614 NewStore->setRmwBeacon(Inst->getRmwBeacon());	4517 NewStore->setRmwBeacon(Inst->getRmwBeacon());

4615 Context.insert(NewStore);	4518 Context.insert(NewStore);

4616 }	4519 }

4617 }	4520 }

4618	4521

4619 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {	4522 template <class Machine>

	4523 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {

4620 // This implements the most naive possible lowering.	4524 // This implements the most naive possible lowering.

4621 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default	4525 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

4622 Operand *Src0 = Inst->getComparison();	4526 Operand *Src0 = Inst->getComparison();

4623 SizeT NumCases = Inst->getNumCases();	4527 SizeT NumCases = Inst->getNumCases();

4624 if (Src0->getType() == IceType_i64) {	4528 if (Src0->getType() == IceType_i64) {

4625 Src0 = legalize(Src0); // get Base/Index into physical registers	4529 Src0 = legalize(Src0); // get Base/Index into physical registers

4626 Operand *Src0Lo = loOperand(Src0);	4530 Operand *Src0Lo = loOperand(Src0);

4627 Operand *Src0Hi = hiOperand(Src0);	4531 Operand *Src0Hi = hiOperand(Src0);

4628 if (NumCases >= 2) {	4532 if (NumCases >= 2) {

4629 Src0Lo = legalizeToVar(Src0Lo);	4533 Src0Lo = legalizeToVar(Src0Lo);

(...skipping 23 matching lines...) Expand all Loading...
4653 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem);	4557 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem);

4654 for (SizeT I = 0; I < NumCases; ++I) {	4558 for (SizeT I = 0; I < NumCases; ++I) {

4655 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));	4559 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));

4656 _cmp(Src0, Value);	4560 _cmp(Src0, Value);

4657 _br(CondX86::Br_e, Inst->getLabel(I));	4561 _br(CondX86::Br_e, Inst->getLabel(I));

4658 }	4562 }

4659	4563

4660 _br(Inst->getLabelDefault());	4564 _br(Inst->getLabelDefault());

4661 }	4565 }

4662	4566

4663 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,	4567 template <class Machine>

4664 Variable Dest, Operand Src0,	4568 void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,

4665 Operand *Src1) {	4569 Variable Dest, Operand Src0,

	4570 Operand *Src1) {

4666 assert(isVectorType(Dest->getType()));	4571 assert(isVectorType(Dest->getType()));

4667 Type Ty = Dest->getType();	4572 Type Ty = Dest->getType();

4668 Type ElementTy = typeElementType(Ty);	4573 Type ElementTy = typeElementType(Ty);

4669 SizeT NumElements = typeNumElements(Ty);	4574 SizeT NumElements = typeNumElements(Ty);

4670	4575

4671 Operand *T = Ctx->getConstantUndef(Ty);	4576 Operand *T = Ctx->getConstantUndef(Ty);

4672 for (SizeT I = 0; I < NumElements; ++I) {	4577 for (SizeT I = 0; I < NumElements; ++I) {

4673 Constant *Index = Ctx->getConstantInt32(I);	4578 Constant *Index = Ctx->getConstantInt32(I);

4674	4579

4675 // Extract the next two inputs.	4580 // Extract the next two inputs.

(...skipping 16 matching lines...) Expand all Loading...
4692 }	4597 }

4693	4598

4694 // The following pattern occurs often in lowered C and C++ code:	4599 // The following pattern occurs often in lowered C and C++ code:

4695 //	4600 //

4696 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1	4601 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1

4697 // %cmp.ext = sext <n x i1> %cmp to <n x ty>	4602 // %cmp.ext = sext <n x i1> %cmp to <n x ty>

4698 //	4603 //

4699 // We can eliminate the sext operation by copying the result of pcmpeqd,	4604 // We can eliminate the sext operation by copying the result of pcmpeqd,

4700 // pcmpgtd, or cmpps (which produce sign extended results) to the result	4605 // pcmpgtd, or cmpps (which produce sign extended results) to the result

4701 // of the sext operation.	4606 // of the sext operation.

4702 void TargetX8632::eliminateNextVectorSextInstruction(	4607 template <class Machine>

	4608 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(

4703 Variable *SignExtendedResult) {	4609 Variable *SignExtendedResult) {

4704 if (InstCast *NextCast =	4610 if (InstCast *NextCast =

4705 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {	4611 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

4706 if (NextCast->getCastKind() == InstCast::Sext &&	4612 if (NextCast->getCastKind() == InstCast::Sext &&

4707 NextCast->getSrc(0) == SignExtendedResult) {	4613 NextCast->getSrc(0) == SignExtendedResult) {

4708 NextCast->setDeleted();	4614 NextCast->setDeleted();

4709 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));	4615 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));

4710 // Skip over the instruction.	4616 // Skip over the instruction.

4711 Context.advanceNext();	4617 Context.advanceNext();

4712 }	4618 }

4713 }	4619 }

4714 }	4620 }

4715	4621

4716 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) { _ud2(); }	4622 template <class Machine>

	4623 void TargetX86Base<Machine>::lowerUnreachable(

	4624 const InstUnreachable * /Inst/) {

	4625 _ud2();

	4626 }

4717	4627

4718 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {	4628 template <class Machine>

	4629 void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) {

4719 // If the beacon variable's live range does not end in this	4630 // If the beacon variable's live range does not end in this

4720 // instruction, then it must end in the modified Store instruction	4631 // instruction, then it must end in the modified Store instruction

4721 // that follows. This means that the original Store instruction is	4632 // that follows. This means that the original Store instruction is

4722 // still there, either because the value being stored is used beyond	4633 // still there, either because the value being stored is used beyond

4723 // the Store instruction, or because dead code elimination did not	4634 // the Store instruction, or because dead code elimination did not

4724 // happen. In either case, we cancel RMW lowering (and the caller	4635 // happen. In either case, we cancel RMW lowering (and the caller

4725 // deletes the RMW instruction).	4636 // deletes the RMW instruction).

4726 if (!RMW->isLastUse(RMW->getBeacon()))	4637 if (!RMW->isLastUse(RMW->getBeacon()))

4727 return;	4638 return;

4728 Operand *Src = RMW->getData();	4639 Operand *Src = RMW->getData();

(...skipping 53 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4782 return;	4693 return;

4783 case InstArithmetic::Xor:	4694 case InstArithmetic::Xor:

4784 Src = legalize(Src, Legal_Reg \| Legal_Imm);	4695 Src = legalize(Src, Legal_Reg \| Legal_Imm);

4785 _xor_rmw(Addr, Src);	4696 _xor_rmw(Addr, Src);

4786 return;	4697 return;

4787 }	4698 }

4788 }	4699 }

4789 llvm::report_fatal_error("Couldn't lower RMW instruction");	4700 llvm::report_fatal_error("Couldn't lower RMW instruction");

4790 }	4701 }

4791	4702

4792 void TargetX8632::lowerOther(const Inst *Instr) {	4703 template <class Machine>

	4704 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {

4793 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {	4705 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {

4794 lowerRMW(RMW);	4706 lowerRMW(RMW);

4795 } else {	4707 } else {

4796 TargetLowering::lowerOther(Instr);	4708 TargetLowering::lowerOther(Instr);

4797 }	4709 }

4798 }	4710 }

4799	4711

4800 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to	4712 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to

4801 // preserve integrity of liveness analysis. Undef values are also	4713 // preserve integrity of liveness analysis. Undef values are also

4802 // turned into zeroes, since loOperand() and hiOperand() don't expect	4714 // turned into zeroes, since loOperand() and hiOperand() don't expect

4803 // Undef input.	4715 // Undef input.

4804 void TargetX8632::prelowerPhis() {	4716 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {

4805 // Pause constant blinding or pooling, blinding or pooling will be done later	4717 // Pause constant blinding or pooling, blinding or pooling will be done later

4806 // during phi lowering assignments	4718 // during phi lowering assignments

4807 BoolFlagSaver B(RandomizationPoolingPaused, true);	4719 BoolFlagSaver B(RandomizationPoolingPaused, true);

4808	4720

4809 CfgNode *Node = Context.getNode();	4721 CfgNode *Node = Context.getNode();

4810 for (Inst &I : Node->getPhis()) {	4722 for (Inst &I : Node->getPhis()) {

4811 auto Phi = llvm::dyn_cast<InstPhi>(&I);	4723 auto Phi = llvm::dyn_cast<InstPhi>(&I);

4812 if (Phi->isDeleted())	4724 if (Phi->isDeleted())

4813 continue;	4725 continue;

4814 Variable *Dest = Phi->getDest();	4726 Variable *Dest = Phi->getDest();

(...skipping 10 matching lines...) Expand all Loading...
4825 PhiLo->addArgument(loOperand(Src), Label);	4737 PhiLo->addArgument(loOperand(Src), Label);

4826 PhiHi->addArgument(hiOperand(Src), Label);	4738 PhiHi->addArgument(hiOperand(Src), Label);

4827 }	4739 }

4828 Node->getPhis().push_back(PhiLo);	4740 Node->getPhis().push_back(PhiLo);

4829 Node->getPhis().push_back(PhiHi);	4741 Node->getPhis().push_back(PhiHi);

4830 Phi->setDeleted();	4742 Phi->setDeleted();

4831 }	4743 }

4832 }	4744 }

4833 }	4745 }

4834	4746

4835 namespace {

4836

4837 bool isMemoryOperand(const Operand *Opnd) {	4747 bool isMemoryOperand(const Operand *Opnd) {

4838 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))	4748 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))

4839 return !Var->hasReg();	4749 return !Var->hasReg();

4840 // We treat vector undef values the same as a memory operand,	4750 // We treat vector undef values the same as a memory operand,

4841 // because they do in fact need a register to materialize the vector	4751 // because they do in fact need a register to materialize the vector

4842 // of zeroes into.	4752 // of zeroes into.

4843 if (llvm::isa<ConstantUndef>(Opnd))	4753 if (llvm::isa<ConstantUndef>(Opnd))

4844 return isScalarFloatingType(Opnd->getType()) \|\|	4754 return isScalarFloatingType(Opnd->getType()) \|\|

4845 isVectorType(Opnd->getType());	4755 isVectorType(Opnd->getType());

4846 if (llvm::isa<Constant>(Opnd))	4756 if (llvm::isa<Constant>(Opnd))

4847 return isScalarFloatingType(Opnd->getType());	4757 return isScalarFloatingType(Opnd->getType());

4848 return true;	4758 return true;

4849 }	4759 }

4850	4760

4851 } // end of anonymous namespace

4852

4853 // Lower the pre-ordered list of assignments into mov instructions.	4761 // Lower the pre-ordered list of assignments into mov instructions.

4854 // Also has to do some ad-hoc register allocation as necessary.	4762 // Also has to do some ad-hoc register allocation as necessary.

4855 void TargetX8632::lowerPhiAssignments(CfgNode *Node,	4763 template <class Machine>

4856 const AssignList &Assignments) {	4764 void TargetX86Base<Machine>::lowerPhiAssignments(

	4765 CfgNode *Node, const AssignList &Assignments) {

4857 // Check that this is a properly initialized shell of a node.	4766 // Check that this is a properly initialized shell of a node.

4858 assert(Node->getOutEdges().size() == 1);	4767 assert(Node->getOutEdges().size() == 1);

4859 assert(Node->getInsts().empty());	4768 assert(Node->getInsts().empty());

4860 assert(Node->getPhis().empty());	4769 assert(Node->getPhis().empty());

4861 CfgNode *Succ = Node->getOutEdges().front();	4770 CfgNode *Succ = Node->getOutEdges().front();

4862 getContext().init(Node);	4771 getContext().init(Node);

4863 // Register set setup similar to regAlloc().	4772 // Register set setup similar to regAlloc().

4864 RegSetMask RegInclude = RegSet_All;	4773 RegSetMask RegInclude = RegSet_All;

4865 RegSetMask RegExclude = RegSet_StackPointer;	4774 RegSetMask RegExclude = RegSet_StackPointer;

4866 if (hasFramePointer())	4775 if (hasFramePointer())

(...skipping 130 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4997 _br(Succ);	4906 _br(Succ);

4998 }	4907 }

4999	4908

5000 // There is no support for loading or emitting vector constants, so the	4909 // There is no support for loading or emitting vector constants, so the

5001 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,	4910 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,

5002 // etc. are initialized with register operations.	4911 // etc. are initialized with register operations.

5003 //	4912 //

5004 // TODO(wala): Add limited support for vector constants so that	4913 // TODO(wala): Add limited support for vector constants so that

5005 // complex initialization in registers is unnecessary.	4914 // complex initialization in registers is unnecessary.

5006	4915

5007 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {	4916 template <class Machine>

	4917 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {

5008 Variable *Reg = makeReg(Ty, RegNum);	4918 Variable *Reg = makeReg(Ty, RegNum);

5009 // Insert a FakeDef, since otherwise the live range of Reg might	4919 // Insert a FakeDef, since otherwise the live range of Reg might

5010 // be overestimated.	4920 // be overestimated.

5011 Context.insert(InstFakeDef::create(Func, Reg));	4921 Context.insert(InstFakeDef::create(Func, Reg));

5012 _pxor(Reg, Reg);	4922 _pxor(Reg, Reg);

5013 return Reg;	4923 return Reg;

5014 }	4924 }

5015	4925

5016 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {	4926 template <class Machine>

	4927 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,

	4928 int32_t RegNum) {

5017 Variable *MinusOnes = makeReg(Ty, RegNum);	4929 Variable *MinusOnes = makeReg(Ty, RegNum);

5018 // Insert a FakeDef so the live range of MinusOnes is not overestimated.	4930 // Insert a FakeDef so the live range of MinusOnes is not overestimated.

5019 Context.insert(InstFakeDef::create(Func, MinusOnes));	4931 Context.insert(InstFakeDef::create(Func, MinusOnes));

5020 _pcmpeq(MinusOnes, MinusOnes);	4932 _pcmpeq(MinusOnes, MinusOnes);

5021 return MinusOnes;	4933 return MinusOnes;

5022 }	4934 }

5023	4935

5024 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {	4936 template <class Machine>

	4937 Variable *TargetX86Base<Machine>::makeVectorOfOnes(Type Ty, int32_t RegNum) {

5025 Variable *Dest = makeVectorOfZeros(Ty, RegNum);	4938 Variable *Dest = makeVectorOfZeros(Ty, RegNum);

5026 Variable *MinusOne = makeVectorOfMinusOnes(Ty);	4939 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

5027 _psub(Dest, MinusOne);	4940 _psub(Dest, MinusOne);

5028 return Dest;	4941 return Dest;

5029 }	4942 }

5030	4943

5031 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {	4944 template <class Machine>

	4945 Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,

	4946 int32_t RegNum) {

5032 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|	4947 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|

5033 Ty == IceType_v16i8);	4948 Ty == IceType_v16i8);

5034 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {	4949 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {

5035 Variable *Reg = makeVectorOfOnes(Ty, RegNum);	4950 Variable *Reg = makeVectorOfOnes(Ty, RegNum);

5036 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;	4951 SizeT Shift =

	4952 typeWidthInBytes(typeElementType(Ty)) * Traits::X86_CHAR_BIT - 1;

5037 _psll(Reg, Ctx->getConstantInt8(Shift));	4953 _psll(Reg, Ctx->getConstantInt8(Shift));

5038 return Reg;	4954 return Reg;

5039 } else {	4955 } else {

5040 // SSE has no left shift operation for vectors of 8 bit integers.	4956 // SSE has no left shift operation for vectors of 8 bit integers.

5041 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	4957 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

5042 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);	4958 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

5043 Variable *Reg = makeReg(Ty, RegNum);	4959 Variable *Reg = makeReg(Ty, RegNum);

5044 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	4960 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

5045 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	4961 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

5046 return Reg;	4962 return Reg;

5047 }	4963 }

5048 }	4964 }

5049	4965

5050 // Construct a mask in a register that can be and'ed with a	4966 // Construct a mask in a register that can be and'ed with a

5051 // floating-point value to mask off its sign bit. The value will be	4967 // floating-point value to mask off its sign bit. The value will be

5052 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>	4968 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>

5053 // for f64. Construct it as vector of ones logically right shifted	4969 // for f64. Construct it as vector of ones logically right shifted

5054 // one bit. TODO(stichnot): Fix the wala TODO above, to represent	4970 // one bit. TODO(stichnot): Fix the wala TODO above, to represent

5055 // vector constants in memory.	4971 // vector constants in memory.

5056 Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) {	4972 template <class Machine>

	4973 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,

	4974 int32_t RegNum) {

5057 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);	4975 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);

5058 _psrl(Reg, Ctx->getConstantInt8(1));	4976 _psrl(Reg, Ctx->getConstantInt8(1));

5059 return Reg;	4977 return Reg;

5060 }	4978 }

5061	4979

5062 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,	4980 template <class Machine>

5063 Variable *Slot,	4981 OperandX8632Mem *

5064 uint32_t Offset) {	4982 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,

	4983 uint32_t Offset) {

5065 // Ensure that Loc is a stack slot.	4984 // Ensure that Loc is a stack slot.

5066 assert(Slot->getWeight().isZero());	4985 assert(Slot->getWeight().isZero());

5067 assert(Slot->getRegNum() == Variable::NoRegister);	4986 assert(Slot->getRegNum() == Variable::NoRegister);

5068 // Compute the location of Loc in memory.	4987 // Compute the location of Loc in memory.

5069 // TODO(wala,stichnot): lea should not be required. The address of	4988 // TODO(wala,stichnot): lea should not be required. The address of

5070 // the stack slot is known at compile time (although not until after	4989 // the stack slot is known at compile time (although not until after

5071 // addProlog()).	4990 // addProlog()).

5072 const Type PointerType = IceType_i32;	4991 const Type PointerType = IceType_i32;

5073 Variable *Loc = makeReg(PointerType);	4992 Variable *Loc = makeReg(PointerType);

5074 _lea(Loc, Slot);	4993 _lea(Loc, Slot);

5075 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);	4994 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

5076 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);	4995 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);

5077 }	4996 }

5078	4997

5079 // Helper for legalize() to emit the right code to lower an operand to a	4998 // Helper for legalize() to emit the right code to lower an operand to a

5080 // register of the appropriate type.	4999 // register of the appropriate type.

5081 Variable TargetX8632::copyToReg(Operand Src, int32_t RegNum) {	5000 template <class Machine>

	5001 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {

5082 Type Ty = Src->getType();	5002 Type Ty = Src->getType();

5083 Variable *Reg = makeReg(Ty, RegNum);	5003 Variable *Reg = makeReg(Ty, RegNum);

5084 if (isVectorType(Ty)) {	5004 if (isVectorType(Ty)) {

5085 _movp(Reg, Src);	5005 _movp(Reg, Src);

5086 } else {	5006 } else {

5087 _mov(Reg, Src);	5007 _mov(Reg, Src);

5088 }	5008 }

5089 return Reg;	5009 return Reg;

5090 }	5010 }

5091	5011

5092 Operand TargetX8632::legalize(Operand From, LegalMask Allowed,	5012 template <class Machine>

5093 int32_t RegNum) {	5013 Operand TargetX86Base<Machine>::legalize(Operand From, LegalMask Allowed,

	5014 int32_t RegNum) {

5094 Type Ty = From->getType();	5015 Type Ty = From->getType();

5095 // Assert that a physical register is allowed. To date, all calls	5016 // Assert that a physical register is allowed. To date, all calls

5096 // to legalize() allow a physical register. If a physical register	5017 // to legalize() allow a physical register. If a physical register

5097 // needs to be explicitly disallowed, then new code will need to be	5018 // needs to be explicitly disallowed, then new code will need to be

5098 // written to force a spill.	5019 // written to force a spill.

5099 assert(Allowed & Legal_Reg);	5020 assert(Allowed & Legal_Reg);

5100 // If we're asking for a specific physical register, make sure we're	5021 // If we're asking for a specific physical register, make sure we're

5101 // not allowing any other operand kinds. (This could be future	5022 // not allowing any other operand kinds. (This could be future

5102 // work, e.g. allow the shl shift amount to be either an immediate	5023 // work, e.g. allow the shl shift amount to be either an immediate

5103 // or in ecx.)	5024 // or in ecx.)

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5196 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {	5117 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

5197 From = copyToReg(From, RegNum);	5118 From = copyToReg(From, RegNum);

5198 }	5119 }

5199 return From;	5120 return From;

5200 }	5121 }

5201 llvm_unreachable("Unhandled operand kind in legalize()");	5122 llvm_unreachable("Unhandled operand kind in legalize()");

5202 return From;	5123 return From;

5203 }	5124 }

5204	5125

5205 // Provide a trivial wrapper to legalize() for this common usage.	5126 // Provide a trivial wrapper to legalize() for this common usage.

5206 Variable TargetX8632::legalizeToVar(Operand From, int32_t RegNum) {	5127 template <class Machine>

	5128 Variable TargetX86Base<Machine>::legalizeToVar(Operand From, int32_t RegNum) {

5207 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));	5129 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));

5208 }	5130 }

5209	5131

5210 // For the cmp instruction, if Src1 is an immediate, or known to be a	5132 // For the cmp instruction, if Src1 is an immediate, or known to be a

5211 // physical register, we can allow Src0 to be a memory operand.	5133 // physical register, we can allow Src0 to be a memory operand.

5212 // Otherwise, Src0 must be copied into a physical register.	5134 // Otherwise, Src0 must be copied into a physical register.

5213 // (Actually, either Src0 or Src1 can be chosen for the physical	5135 // (Actually, either Src0 or Src1 can be chosen for the physical

5214 // register, but unfortunately we have to commit to one or the other	5136 // register, but unfortunately we have to commit to one or the other

5215 // before register allocation.)	5137 // before register allocation.)

5216 Operand TargetX8632::legalizeSrc0ForCmp(Operand Src0, Operand *Src1) {	5138 template <class Machine>

	5139 Operand TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand Src0,

	5140 Operand *Src1) {

5217 bool IsSrc1ImmOrReg = false;	5141 bool IsSrc1ImmOrReg = false;

5218 if (llvm::isa<Constant>(Src1)) {	5142 if (llvm::isa<Constant>(Src1)) {

5219 IsSrc1ImmOrReg = true;	5143 IsSrc1ImmOrReg = true;

5220 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {	5144 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

5221 if (Var->hasReg())	5145 if (Var->hasReg())

5222 IsSrc1ImmOrReg = true;	5146 IsSrc1ImmOrReg = true;

5223 }	5147 }

5224 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg \| Legal_Mem) : Legal_Reg);	5148 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg \| Legal_Mem) : Legal_Reg);

5225 }	5149 }

5226	5150

5227 OperandX8632Mem TargetX8632::formMemoryOperand(Operand Opnd, Type Ty,	5151 template <class Machine>

5228 bool DoLegalize) {	5152 OperandX8632Mem TargetX86Base<Machine>::formMemoryOperand(Operand Opnd,

	5153 Type Ty,

	5154 bool DoLegalize) {

5229 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);	5155 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);

5230 // It may be the case that address mode optimization already creates	5156 // It may be the case that address mode optimization already creates

5231 // an OperandX8632Mem, so in that case it wouldn't need another level	5157 // an OperandX8632Mem, so in that case it wouldn't need another level

5232 // of transformation.	5158 // of transformation.

5233 if (!Mem) {	5159 if (!Mem) {

5234 Variable *Base = llvm::dyn_cast<Variable>(Opnd);	5160 Variable *Base = llvm::dyn_cast<Variable>(Opnd);

5235 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);	5161 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);

5236 assert(Base \|\| Offset);	5162 assert(Base \|\| Offset);

5237 if (Offset) {	5163 if (Offset) {

5238 // During memory operand building, we do not blind or pool	5164 // During memory operand building, we do not blind or pool

(...skipping 11 matching lines...) Expand all Loading...
5250 llvm::isa<ConstantRelocatable>(Offset));	5176 llvm::isa<ConstantRelocatable>(Offset));

5251 }	5177 }

5252 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);	5178 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);

5253 }	5179 }

5254 // Do legalization, which contains randomization/pooling	5180 // Do legalization, which contains randomization/pooling

5255 // or do randomization/pooling.	5181 // or do randomization/pooling.

5256 return llvm::cast<OperandX8632Mem>(	5182 return llvm::cast<OperandX8632Mem>(

5257 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));	5183 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));

5258 }	5184 }

5259	5185

5260 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {	5186 template <class Machine>

	5187 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {

5261 // There aren't any 64-bit integer registers for x86-32.	5188 // There aren't any 64-bit integer registers for x86-32.

5262 assert(Type != IceType_i64);	5189 assert(Type != IceType_i64);

5263 Variable *Reg = Func->makeVariable(Type);	5190 Variable *Reg = Func->makeVariable(Type);

5264 if (RegNum == Variable::NoRegister)	5191 if (RegNum == Variable::NoRegister)

5265 Reg->setWeightInfinite();	5192 Reg->setWeightInfinite();

5266 else	5193 else

5267 Reg->setRegNum(RegNum);	5194 Reg->setRegNum(RegNum);

5268 return Reg;	5195 return Reg;

5269 }	5196 }

5270	5197

5271 void TargetX8632::postLower() {	5198 template <class Machine> void TargetX86Base<Machine>::postLower() {

5272 if (Ctx->getFlags().getOptLevel() == Opt_m1)	5199 if (Ctx->getFlags().getOptLevel() == Opt_m1)

5273 return;	5200 return;

5274 inferTwoAddress();	5201 inferTwoAddress();

5275 }	5202 }

5276	5203

5277 void TargetX8632::makeRandomRegisterPermutation(	5204 template <class Machine>

	5205 void TargetX86Base<Machine>::makeRandomRegisterPermutation(

5278 llvm::SmallVectorImpl<int32_t> &Permutation,	5206 llvm::SmallVectorImpl<int32_t> &Permutation,

5279 const llvm::SmallBitVector &ExcludeRegisters) const {	5207 const llvm::SmallBitVector &ExcludeRegisters) const {

5280 // TODO(stichnot): Declaring Permutation this way loses type/size	5208 // TODO(stichnot): Declaring Permutation this way loses type/size

5281 // information. Fix this in conjunction with the caller-side TODO.	5209 // information. Fix this in conjunction with the caller-side TODO.

5282 assert(Permutation.size() >= RegX8632::Reg_NUM);	5210 assert(Permutation.size() >= RegX8632::Reg_NUM);

5283 // Expected upper bound on the number of registers in a single	5211 // Expected upper bound on the number of registers in a single

5284 // equivalence class. For x86-32, this would comprise the 8 XMM	5212 // equivalence class. For x86-32, this would comprise the 8 XMM

5285 // registers. This is for performance, not correctness.	5213 // registers. This is for performance, not correctness.

5286 static const unsigned MaxEquivalenceClassSize = 8;	5214 static const unsigned MaxEquivalenceClassSize = 8;

5287 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;	5215 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5334 if (!First)	5262 if (!First)

5335 Str << " ";	5263 Str << " ";

5336 First = false;	5264 First = false;

5337 Str << getRegName(Register, IceType_i32);	5265 Str << getRegName(Register, IceType_i32);

5338 }	5266 }

5339 Str << "}\n";	5267 Str << "}\n";

5340 }	5268 }

5341 }	5269 }

5342 }	5270 }

5343	5271

5344 void TargetX8632::emit(const ConstantInteger32 *C) const {	5272 template <class Machine>

	5273 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {

5345 if (!ALLOW_DUMP)	5274 if (!ALLOW_DUMP)

5346 return;	5275 return;

5347 Ostream &Str = Ctx->getStrEmit();	5276 Ostream &Str = Ctx->getStrEmit();

5348 Str << getConstantPrefix() << C->getValue();	5277 Str << getConstantPrefix() << C->getValue();

5349 }	5278 }

5350	5279

5351 void TargetX8632::emit(const ConstantInteger64 *) const {	5280 template <class Machine>

	5281 void TargetX86Base<Machine>::emit(const ConstantInteger64 *) const {

5352 llvm::report_fatal_error("Not expecting to emit 64-bit integers");	5282 llvm::report_fatal_error("Not expecting to emit 64-bit integers");

5353 }	5283 }

5354	5284

5355 void TargetX8632::emit(const ConstantFloat *C) const {	5285 template <class Machine>

	5286 void TargetX86Base<Machine>::emit(const ConstantFloat *C) const {

5356 if (!ALLOW_DUMP)	5287 if (!ALLOW_DUMP)

5357 return;	5288 return;

5358 Ostream &Str = Ctx->getStrEmit();	5289 Ostream &Str = Ctx->getStrEmit();

5359 C->emitPoolLabel(Str);	5290 C->emitPoolLabel(Str);

5360 }	5291 }

5361	5292

5362 void TargetX8632::emit(const ConstantDouble *C) const {	5293 template <class Machine>

	5294 void TargetX86Base<Machine>::emit(const ConstantDouble *C) const {

5363 if (!ALLOW_DUMP)	5295 if (!ALLOW_DUMP)

5364 return;	5296 return;

5365 Ostream &Str = Ctx->getStrEmit();	5297 Ostream &Str = Ctx->getStrEmit();

5366 C->emitPoolLabel(Str);	5298 C->emitPoolLabel(Str);

5367 }	5299 }

5368	5300

5369 void TargetX8632::emit(const ConstantUndef *) const {	5301 template <class Machine>

	5302 void TargetX86Base<Machine>::emit(const ConstantUndef *) const {

5370 llvm::report_fatal_error("undef value encountered by emitter.");	5303 llvm::report_fatal_error("undef value encountered by emitter.");

5371 }	5304 }

5372	5305

5373 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)

5374 : TargetDataLowering(Ctx) {}

5375

5376 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,

5377 const IceString &SectionSuffix) {

5378 switch (Ctx->getFlags().getOutFileType()) {

5379 case FT_Elf: {

5380 ELFObjectWriter *Writer = Ctx->getObjectWriter();

5381 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);

5382 } break;

5383 case FT_Asm:

5384 case FT_Iasm: {

5385 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();

5386 OstreamLocker L(Ctx);

5387 for (const VariableDeclaration *Var : Vars) {

5388 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {

5389 emitGlobal(*Var, SectionSuffix);

5390 }

5391 }

5392 } break;

5393 }

5394 }

5395

5396 template <typename T> struct PoolTypeConverter {};

5397

5398 template <> struct PoolTypeConverter<float> {

5399 typedef uint32_t PrimitiveIntType;

5400 typedef ConstantFloat IceType;

5401 static const Type Ty = IceType_f32;

5402 static const char *TypeName;

5403 static const char *AsmTag;

5404 static const char *PrintfString;

5405 };

5406 const char *PoolTypeConverter<float>::TypeName = "float";

5407 const char *PoolTypeConverter<float>::AsmTag = ".long";

5408 const char *PoolTypeConverter<float>::PrintfString = "0x%x";

5409

5410 template <> struct PoolTypeConverter<double> {

5411 typedef uint64_t PrimitiveIntType;

5412 typedef ConstantDouble IceType;

5413 static const Type Ty = IceType_f64;

5414 static const char *TypeName;

5415 static const char *AsmTag;

5416 static const char *PrintfString;

5417 };

5418 const char *PoolTypeConverter<double>::TypeName = "double";

5419 const char *PoolTypeConverter<double>::AsmTag = ".quad";

5420 const char *PoolTypeConverter<double>::PrintfString = "0x%llx";

5421

5422 // Add converter for int type constant pooling

5423 template <> struct PoolTypeConverter<uint32_t> {

5424 typedef uint32_t PrimitiveIntType;

5425 typedef ConstantInteger32 IceType;

5426 static const Type Ty = IceType_i32;

5427 static const char *TypeName;

5428 static const char *AsmTag;

5429 static const char *PrintfString;

5430 };

5431 const char *PoolTypeConverter<uint32_t>::TypeName = "i32";

5432 const char *PoolTypeConverter<uint32_t>::AsmTag = ".long";

5433 const char *PoolTypeConverter<uint32_t>::PrintfString = "0x%x";

5434

5435 // Add converter for int type constant pooling

5436 template <> struct PoolTypeConverter<uint16_t> {

5437 typedef uint32_t PrimitiveIntType;

5438 typedef ConstantInteger32 IceType;

5439 static const Type Ty = IceType_i16;

5440 static const char *TypeName;

5441 static const char *AsmTag;

5442 static const char *PrintfString;

5443 };

5444 const char *PoolTypeConverter<uint16_t>::TypeName = "i16";

5445 const char *PoolTypeConverter<uint16_t>::AsmTag = ".short";

5446 const char *PoolTypeConverter<uint16_t>::PrintfString = "0x%x";

5447

5448 // Add converter for int type constant pooling

5449 template <> struct PoolTypeConverter<uint8_t> {

5450 typedef uint32_t PrimitiveIntType;

5451 typedef ConstantInteger32 IceType;

5452 static const Type Ty = IceType_i8;

5453 static const char *TypeName;

5454 static const char *AsmTag;

5455 static const char *PrintfString;

5456 };

5457 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";

5458 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";

5459 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";

5460

5461 template <typename T>

5462 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {

5463 if (!ALLOW_DUMP)

5464 return;

5465 Ostream &Str = Ctx->getStrEmit();

5466 Type Ty = T::Ty;

5467 SizeT Align = typeAlignInBytes(Ty);

5468 ConstantList Pool = Ctx->getConstantPool(Ty);

5469

5470 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",@progbits," << Align

5471 << "\n";

5472 Str << "\t.align\t" << Align << "\n";

5473 for (Constant *C : Pool) {

5474 if (!C->getShouldBePooled())

5475 continue;

5476 typename T::IceType *Const = llvm::cast<typename T::IceType>(C);

5477 typename T::IceType::PrimType Value = Const->getValue();

5478 // Use memcpy() to copy bits from Value into RawValue in a way

5479 // that avoids breaking strict-aliasing rules.

5480 typename T::PrimitiveIntType RawValue;

5481 memcpy(&RawValue, &Value, sizeof(Value));

5482 char buf[30];

5483 int CharsPrinted =

5484 snprintf(buf, llvm::array_lengthof(buf), T::PrintfString, RawValue);

5485 assert(CharsPrinted >= 0 &&

5486 (size_t)CharsPrinted < llvm::array_lengthof(buf));

5487 (void)CharsPrinted; // avoid warnings if asserts are disabled

5488 Const->emitPoolLabel(Str);

5489 Str << ":\n\t" << T::AsmTag << "\t" << buf << "\t# " << T::TypeName << " "

5490 << Value << "\n";

5491 }

5492 }

5493

5494 void TargetDataX8632::lowerConstants() {

5495 if (Ctx->getFlags().getDisableTranslation())

5496 return;

5497 // No need to emit constants from the int pool since (for x86) they

5498 // are embedded as immediates in the instructions, just emit float/double.

5499 switch (Ctx->getFlags().getOutFileType()) {

5500 case FT_Elf: {

5501 ELFObjectWriter *Writer = Ctx->getObjectWriter();

5502

5503 Writer->writeConstantPool<ConstantInteger32>(IceType_i8);

5504 Writer->writeConstantPool<ConstantInteger32>(IceType_i16);

5505 Writer->writeConstantPool<ConstantInteger32>(IceType_i32);

5506

5507 Writer->writeConstantPool<ConstantFloat>(IceType_f32);

5508 Writer->writeConstantPool<ConstantDouble>(IceType_f64);

5509 } break;

5510 case FT_Asm:

5511 case FT_Iasm: {

5512 OstreamLocker L(Ctx);

5513

5514 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);

5515 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);

5516 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);

5517

5518 emitConstantPool<PoolTypeConverter<float>>(Ctx);

5519 emitConstantPool<PoolTypeConverter<double>>(Ctx);

5520 } break;

5521 }

5522 }

5523

5524 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)

5525 : TargetHeaderLowering(Ctx) {}

5526

5527 // Randomize or pool an Immediate.	5306 // Randomize or pool an Immediate.

5528 Operand TargetX8632::randomizeOrPoolImmediate(Constant Immediate,	5307 template <class Machine>

5529 int32_t RegNum) {	5308 Operand TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant Immediate,

	5309 int32_t RegNum) {

5530 assert(llvm::isa<ConstantInteger32>(Immediate) \|\|	5310 assert(llvm::isa<ConstantInteger32>(Immediate) \|\|

5531 llvm::isa<ConstantRelocatable>(Immediate));	5311 llvm::isa<ConstantRelocatable>(Immediate));

5532 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|	5312 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|

5533 RandomizationPoolingPaused == true) {	5313 RandomizationPoolingPaused == true) {

5534 // Immediates randomization/pooling off or paused	5314 // Immediates randomization/pooling off or paused

5535 return Immediate;	5315 return Immediate;

5536 }	5316 }

5537 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {	5317 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {

5538 Ctx->statsUpdateRPImms();	5318 Ctx->statsUpdateRPImms();

5539 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==	5319 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

(...skipping 55 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5595 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);	5375 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);

5596 _mov(Reg, MemOperand);	5376 _mov(Reg, MemOperand);

5597 return Reg;	5377 return Reg;

5598 }	5378 }

5599 assert("Unsupported -randomize-pool-immediates option" && false);	5379 assert("Unsupported -randomize-pool-immediates option" && false);

5600 }	5380 }

5601 // the constant Immediate is not eligible for blinding/pooling	5381 // the constant Immediate is not eligible for blinding/pooling

5602 return Immediate;	5382 return Immediate;

5603 }	5383 }

5604	5384

	5385 template <class Machine>

5605 OperandX8632Mem *	5386 OperandX8632Mem *

5606 TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,	5387 TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,

5607 int32_t RegNum) {	5388 int32_t RegNum) {

5608 assert(MemOperand);	5389 assert(MemOperand);

5609 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|	5390 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|

5610 RandomizationPoolingPaused == true) {	5391 RandomizationPoolingPaused == true) {

5611 // immediates randomization/pooling is turned off	5392 // immediates randomization/pooling is turned off

5612 return MemOperand;	5393 return MemOperand;

5613 }	5394 }

5614	5395

5615 // If this memory operand is already a randommized one, we do	5396 // If this memory operand is already a randommized one, we do

5616 // not randomize it again.	5397 // not randomize it again.

5617 if (MemOperand->getRandomized())	5398 if (MemOperand->getRandomized())

5618 return MemOperand;	5399 return MemOperand;

5619	5400

5620 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {	5401 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {

5621 if (C->shouldBeRandomizedOrPooled(Ctx)) {	5402 if (C->shouldBeRandomizedOrPooled(Ctx)) {

5622 // The offset of this mem operand should be blinded or pooled	5403 // The offset of this mem operand should be blinded or pooled

5623 Ctx->statsUpdateRPImms();	5404 Ctx->statsUpdateRPImms();

5624 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==	5405 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

5625 RPI_Randomize) {	5406 RPI_Randomize) {

5626 // blind the constant offset	5407 // blind the constant offset

5627 // FROM:	5408 // FROM:

5628 // offset[base, index, shift]	5409 // offset[base, index, shift]

5629 // TO:	5410 // TO:

5630 // insert: lea offset+cookie[base], RegTemp	5411 // insert: lea offset+cookie[base], RegTemp

5631 // => -cookie[RegTemp, index, shift]	5412 // => -cookie[RegTemp, index, shift]

5632 uint32_t Value =	5413 uint32_t Value = llvm::dyn_cast<ConstantInteger32>(

5633 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())	5414 MemOperand->getOffset())->getValue();

5634 ->getValue();

5635 uint32_t Cookie = Ctx->getRandomizationCookie();	5415 uint32_t Cookie = Ctx->getRandomizationCookie();

5636 Constant *Mask1 = Ctx->getConstantInt(	5416 Constant *Mask1 = Ctx->getConstantInt(

5637 MemOperand->getOffset()->getType(), Cookie + Value);	5417 MemOperand->getOffset()->getType(), Cookie + Value);

5638 Constant *Mask2 =	5418 Constant *Mask2 =

5639 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);	5419 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);

5640	5420

5641 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(	5421 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(

5642 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);	5422 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);

5643 // If we have already assigned a physical register, we must come from	5423 // If we have already assigned a physical register, we must come from

5644 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse	5424 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5710 return NewMemOperand;	5490 return NewMemOperand;

5711 }	5491 }

5712 assert("Unsupported -randomize-pool-immediates option" && false);	5492 assert("Unsupported -randomize-pool-immediates option" && false);

5713 }	5493 }

5714 }	5494 }

5715 // the offset is not eligible for blinding or pooling, return the original	5495 // the offset is not eligible for blinding or pooling, return the original

5716 // mem operand	5496 // mem operand

5717 return MemOperand;	5497 return MemOperand;

5718 }	5498 }

5719	5499

	5500 } // end of namespace X86Internal

5720 } // end of namespace Ice	5501 } // end of namespace Ice

	5502

	5503 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« src/IceTargetLoweringX8632.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »