src/IceTargetLoweringX8632.cpp - Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 1202533003: Extracts an TargetX86Base target which will be used as the common X86{32,64} implementation. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Addresses comments. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.

7 //

8 //===----------------------------------------------------------------------===//	5 //===----------------------------------------------------------------------===//

9 //	6 //

10 // This file implements the TargetLoweringX8632 class, which	7 // This file implements the TargetLoweringX8632 class, which

11 // consists almost entirely of the lowering sequence for each	8 // consists almost entirely of the lowering sequence for each

12 // high-level instruction.	9 // high-level instruction.

13 //	10 //

14 //===----------------------------------------------------------------------===//	11 //===----------------------------------------------------------------------===//

15	12

16 #include "llvm/Support/MathExtras.h"	13 #include "IceTargetLoweringX8632.h"

17	14

18 #include "IceCfg.h"	15 #include "IceTargetLoweringX86Base.h"

19 #include "IceCfgNode.h"

20 #include "IceClFlags.h"

21 #include "IceDefs.h"

22 #include "IceELFObjectWriter.h"

23 #include "IceGlobalInits.h"

24 #include "IceInstX8632.h"

25 #include "IceLiveness.h"

26 #include "IceOperand.h"

27 #include "IceRegistersX8632.h"

28 #include "IceTargetLoweringX8632.def"

29 #include "IceTargetLoweringX8632.h"

30 #include "IceUtils.h"

31	16

32 namespace Ice {	17 namespace Ice {

	18 namespace X86Internal {

	19 template <> struct MachineTraits<TargetX8632> {

	20 using InstructionSet = TargetX8632::X86InstructionSet;

33	21

34 namespace {	22 // The following table summarizes the logic for lowering the fcmp

	23 // instruction. There is one table entry for each of the 16 conditions.

	24 //

	25 // The first four columns describe the case when the operands are

	26 // floating point scalar values. A comment in lowerFcmp() describes the

	27 // lowering template. In the most general case, there is a compare

	28 // followed by two conditional branches, because some fcmp conditions

	29 // don't map to a single x86 conditional branch. However, in many cases

	30 // it is possible to swap the operands in the comparison and have a

	31 // single conditional branch. Since it's quite tedious to validate the

	32 // table by hand, good execution tests are helpful.

	33 //

	34 // The last two columns describe the case when the operands are vectors

	35 // of floating point values. For most fcmp conditions, there is a clear

	36 // mapping to a single x86 cmpps instruction variant. Some fcmp

	37 // conditions require special code to handle and these are marked in the

	38 // table with a Cmpps_Invalid predicate.

	39 static const struct TableFcmpType {

	40 uint32_t Default;

	41 bool SwapScalarOperands;

	42 CondX86::BrCond C1, C2;

	43 bool SwapVectorOperands;

	44 CondX86::CmppsCond Predicate;

	45 } TableFcmp[];

	46 static const size_t TableFcmpSize;

35	47

36 // The following table summarizes the logic for lowering the fcmp	48 // The following table summarizes the logic for lowering the icmp instruction

37 // instruction. There is one table entry for each of the 16 conditions.	49 // for i32 and narrower types. Each icmp condition has a clear mapping to an

38 //	50 // x86 conditional branch instruction.

39 // The first four columns describe the case when the operands are	51

40 // floating point scalar values. A comment in lowerFcmp() describes the	52 static const struct TableIcmp32Type {

41 // lowering template. In the most general case, there is a compare	53 CondX86::BrCond Mapping;

42 // followed by two conditional branches, because some fcmp conditions	54 } TableIcmp32[];

43 // don't map to a single x86 conditional branch. However, in many cases	55 static const size_t TableIcmp32Size;

44 // it is possible to swap the operands in the comparison and have a	56

45 // single conditional branch. Since it's quite tedious to validate the	57 // The following table summarizes the logic for lowering the icmp instruction

46 // table by hand, good execution tests are helpful.	58 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and

47 //	59 // conditional branches are needed. For the other conditions, three separate

48 // The last two columns describe the case when the operands are vectors	60 // conditional branches are needed.

49 // of floating point values. For most fcmp conditions, there is a clear	61 static const struct TableIcmp64Type {

50 // mapping to a single x86 cmpps instruction variant. Some fcmp	62 CondX86::BrCond C1, C2, C3;

51 // conditions require special code to handle and these are marked in the	63 } TableIcmp64[];

52 // table with a Cmpps_Invalid predicate.	64 static const size_t TableIcmp64Size;

53 const struct TableFcmp_ {	65

54 uint32_t Default;	66 static CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

55 bool SwapScalarOperands;	67 size_t Index = static_cast<size_t>(Cond);

56 CondX86::BrCond C1, C2;	68 assert(Index < TableIcmp32Size);

57 bool SwapVectorOperands;	69 return TableIcmp32[Index].Mapping;

58 CondX86::CmppsCond Predicate;	70 }

59 } TableFcmp[] = {	71

	72 static const struct TableTypeX8632AttributesType {

	73 Type InVectorElementType;

	74 } TableTypeX8632Attributes[];

	75 static const size_t TableTypeX8632AttributesSize;

	76

	77 // Return the type which the elements of the vector have in the X86

	78 // representation of the vector.

	79 static Type getInVectorElementType(Type Ty) {

	80 assert(isVectorType(Ty));

	81 size_t Index = static_cast<size_t>(Ty);

	82 (void)Index;

	83 assert(Index < TableTypeX8632AttributesSize);

	84 return TableTypeX8632Attributes[Ty].InVectorElementType;

	85 }

	86

	87 // The maximum number of arguments to pass in XMM registers

	88 static constexpr uint32_t X86_MAX_XMM_ARGS = 4;

	89 // The number of bits in a byte

	90 static constexpr uint32_t X86_CHAR_BIT = 8;

	91 // Stack alignment

	92 static const uint32_t X86_STACK_ALIGNMENT_BYTES;

	93 // Size of the return address on the stack

	94 static constexpr uint32_t X86_RET_IP_SIZE_BYTES = 4;

	95 // The number of different NOP instructions

	96 static constexpr uint32_t X86_NUM_NOP_VARIANTS = 5;

	97

	98 // Value is in bytes. Return Value adjusted to the next highest multiple

	99 // of the stack alignment.

	100 static uint32_t applyStackAlignment(uint32_t Value) {

	101 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

	102 }

	103 };

	104

	105 const MachineTraits<TargetX8632>::TableFcmpType

	106 MachineTraits<TargetX8632>::TableFcmp[] = {

60 #define X(val, dflt, swapS, C1, C2, swapV, pred) \	107 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

61 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \	108 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \

62 ,	109 ,

63 FCMPX8632_TABLE	110 FCMPX8632_TABLE

64 #undef X	111 #undef X

65 };	112 };

66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

67	113

68 // The following table summarizes the logic for lowering the icmp instruction	114 constexpr size_t MachineTraits<TargetX8632>::TableFcmpSize =

69 // for i32 and narrower types. Each icmp condition has a clear mapping to an	115 llvm::array_lengthof(TableFcmp);

70 // x86 conditional branch instruction.

71	116

72 const struct TableIcmp32_ {	117 const MachineTraits<TargetX8632>::TableIcmp32Type

73 CondX86::BrCond Mapping;	118 MachineTraits<TargetX8632>::TableIcmp32[] = {

74 } TableIcmp32[] = {

75 #define X(val, C_32, C1_64, C2_64, C3_64) \	119 #define X(val, C_32, C1_64, C2_64, C3_64) \

76 { CondX86::C_32 } \	120 { CondX86::C_32 } \

77 ,	121 ,

78 ICMPX8632_TABLE	122 ICMPX8632_TABLE

79 #undef X	123 #undef X

80 };	124 };

81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);

82	125

83 // The following table summarizes the logic for lowering the icmp instruction	126 constexpr size_t MachineTraits<TargetX8632>::TableIcmp32Size =

84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and	127 llvm::array_lengthof(TableIcmp32);

85 // conditional branches are needed. For the other conditions, three separate	128

86 // conditional branches are needed.	129 const MachineTraits<TargetX8632>::TableIcmp64Type

87 const struct TableIcmp64_ {	130 MachineTraits<TargetX8632>::TableIcmp64[] = {

88 CondX86::BrCond C1, C2, C3;

89 } TableIcmp64[] = {

90 #define X(val, C_32, C1_64, C2_64, C3_64) \	131 #define X(val, C_32, C1_64, C2_64, C3_64) \

91 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \	132 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \

92 ,	133 ,

93 ICMPX8632_TABLE	134 ICMPX8632_TABLE

94 #undef X	135 #undef X

95 };	136 };

96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);

97	137

98 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {	138 constexpr size_t MachineTraits<TargetX8632>::TableIcmp64Size =

99 size_t Index = static_cast<size_t>(Cond);	139 llvm::array_lengthof(TableIcmp64);

100 assert(Index < TableIcmp32Size);

101 return TableIcmp32[Index].Mapping;

102 }

103	140

104 const struct TableTypeX8632Attributes_ {	141 const MachineTraits<TargetX8632>::TableTypeX8632AttributesType

105 Type InVectorElementType;	142 MachineTraits<TargetX8632>::TableTypeX8632Attributes[] = {

106 } TableTypeX8632Attributes[] = {

107 #define X(tag, elementty, cvt, sdss, pack, width, fld) \	143 #define X(tag, elementty, cvt, sdss, pack, width, fld) \

108 { elementty } \	144 { elementty } \

109 ,	145 ,

110 ICETYPEX8632_TABLE	146 ICETYPEX8632_TABLE

111 #undef X

112 };

113 const size_t TableTypeX8632AttributesSize =

114 llvm::array_lengthof(TableTypeX8632Attributes);

115

116 // Return the type which the elements of the vector have in the X86

117 // representation of the vector.

118 Type getInVectorElementType(Type Ty) {

119 assert(isVectorType(Ty));

120 size_t Index = static_cast<size_t>(Ty);

121 (void)Index;

122 assert(Index < TableTypeX8632AttributesSize);

123 return TableTypeX8632Attributes[Ty].InVectorElementType;

124 }

125

126 // The maximum number of arguments to pass in XMM registers

127 const uint32_t X86_MAX_XMM_ARGS = 4;

128 // The number of bits in a byte

129 const uint32_t X86_CHAR_BIT = 8;

130 // Stack alignment

131 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;

132 // Size of the return address on the stack

133 const uint32_t X86_RET_IP_SIZE_BYTES = 4;

134 // The number of different NOP instructions

135 const uint32_t X86_NUM_NOP_VARIANTS = 5;

136

137 // Value is in bytes. Return Value adjusted to the next highest multiple

138 // of the stack alignment.

139 uint32_t applyStackAlignment(uint32_t Value) {

140 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);

141 }

142

143 // In some cases, there are x-macros tables for both high-level and

144 // low-level instructions/operands that use the same enum key value.

145 // The tables are kept separate to maintain a proper separation

146 // between abstraction layers. There is a risk that the tables could

147 // get out of sync if enum values are reordered or if entries are

148 // added or deleted. The following dummy namespaces use

149 // static_asserts to ensure everything is kept in sync.

150

151 // Validate the enum values in FCMPX8632_TABLE.

152 namespace dummy1 {

153 // Define a temporary set of enum values based on low-level table

154 // entries.

155 enum _tmp_enum {

156 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,

157 FCMPX8632_TABLE

158 #undef X

159 _num

160 };

161 // Define a set of constants based on high-level table entries.

162 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

163 ICEINSTFCMP_TABLE

164 #undef X

165 // Define a set of constants based on low-level table entries, and

166 // ensure the table entry keys are consistent.

167 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

168 static const int _table2_##val = _tmp_##val; \

169 static_assert( \

170 _table1_##val == _table2_##val, \

171 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

172 FCMPX8632_TABLE

173 #undef X

174 // Repeat the static asserts with respect to the high-level table

175 // entries in case the high-level table has extra entries.

176 #define X(tag, str) \

177 static_assert( \

178 _table1_##tag == _table2_##tag, \

179 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

180 ICEINSTFCMP_TABLE

181 #undef X

182 } // end of namespace dummy1

183

184 // Validate the enum values in ICMPX8632_TABLE.

185 namespace dummy2 {

186 // Define a temporary set of enum values based on low-level table

187 // entries.

188 enum _tmp_enum {

189 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

190 ICMPX8632_TABLE

191 #undef X

192 _num

193 };

194 // Define a set of constants based on high-level table entries.

195 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

196 ICEINSTICMP_TABLE

197 #undef X

198 // Define a set of constants based on low-level table entries, and

199 // ensure the table entry keys are consistent.

200 #define X(val, C_32, C1_64, C2_64, C3_64) \

201 static const int _table2_##val = _tmp_##val; \

202 static_assert( \

203 _table1_##val == _table2_##val, \

204 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

205 ICMPX8632_TABLE

206 #undef X

207 // Repeat the static asserts with respect to the high-level table

208 // entries in case the high-level table has extra entries.

209 #define X(tag, str) \

210 static_assert( \

211 _table1_##tag == _table2_##tag, \

212 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

213 ICEINSTICMP_TABLE

214 #undef X

215 } // end of namespace dummy2

216

217 // Validate the enum values in ICETYPEX8632_TABLE.

218 namespace dummy3 {

219 // Define a temporary set of enum values based on low-level table

220 // entries.

221 enum _tmp_enum {

222 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,

223 ICETYPEX8632_TABLE

224 #undef X

225 _num

226 };

227 // Define a set of constants based on high-level table entries.

228 #define X(tag, size, align, elts, elty, str) \

229 static const int _table1_##tag = tag;

230 ICETYPE_TABLE

231 #undef X

232 // Define a set of constants based on low-level table entries, and

233 // ensure the table entry keys are consistent.

234 #define X(tag, elementty, cvt, sdss, pack, width, fld) \

235 static const int _table2_##tag = _tmp_##tag; \

236 static_assert(_table1_##tag == _table2_##tag, \

237 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

238 ICETYPEX8632_TABLE

239 #undef X

240 // Repeat the static asserts with respect to the high-level table

241 // entries in case the high-level table has extra entries.

242 #define X(tag, size, align, elts, elty, str) \

243 static_assert(_table1_##tag == _table2_##tag, \

244 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

245 ICETYPE_TABLE

246 #undef X

247 } // end of namespace dummy3

248

249 // A helper class to ease the settings of RandomizationPoolingPause

250 // to disable constant blinding or pooling for some translation phases.

251 class BoolFlagSaver {

252 BoolFlagSaver() = delete;

253 BoolFlagSaver(const BoolFlagSaver &) = delete;

254 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;

255

256 public:

257 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }

258 ~BoolFlagSaver() { Flag = OldValue; }

259

260 private:

261 const bool OldValue;

262 bool &Flag;

263 };

264

265 } // end of anonymous namespace

266

267 BoolFoldingEntry::BoolFoldingEntry(Inst *I)

268 : Instr(I), IsComplex(BoolFolding::hasComplexLowering(I)) {}

269

270 BoolFolding::BoolFoldingProducerKind

271 BoolFolding::getProducerKind(const Inst *Instr) {

272 if (llvm::isa<InstIcmp>(Instr)) {

273 if (Instr->getSrc(0)->getType() != IceType_i64)

274 return PK_Icmp32;

275 return PK_None; // TODO(stichnot): actually PK_Icmp64;

276 }

277 return PK_None; // TODO(stichnot): remove this

278

279 if (llvm::isa<InstFcmp>(Instr))

280 return PK_Fcmp;

281 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {

282 switch (Cast->getCastKind()) {

283 default:

284 return PK_None;

285 case InstCast::Trunc:

286 return PK_Trunc;

287 }

288 }

289 return PK_None;

290 }

291

292 BoolFolding::BoolFoldingConsumerKind

293 BoolFolding::getConsumerKind(const Inst *Instr) {

294 if (llvm::isa<InstBr>(Instr))

295 return CK_Br;

296 if (llvm::isa<InstSelect>(Instr))

297 return CK_Select;

298 return CK_None; // TODO(stichnot): remove this

299

300 if (auto *Cast = llvm::dyn_cast<InstCast>(Instr)) {

301 switch (Cast->getCastKind()) {

302 default:

303 return CK_None;

304 case InstCast::Sext:

305 return CK_Sext;

306 case InstCast::Zext:

307 return CK_Zext;

308 }

309 }

310 return CK_None;

311 }

312

313 // Returns true if the producing instruction has a "complex" lowering

314 // sequence. This generally means that its lowering sequence requires

315 // more than one conditional branch, namely 64-bit integer compares

316 // and some floating-point compares. When this is true, and there is

317 // more than one consumer, we prefer to disable the folding

318 // optimization because it minimizes branches.

319 bool BoolFolding::hasComplexLowering(const Inst *Instr) {

320 switch (getProducerKind(Instr)) {

321 default:

322 return false;

323 case PK_Icmp64:

324 return true;

325 case PK_Fcmp:

326 return TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()].C2 !=

327 CondX86::Br_None;

328 }

329 }

330

331 void BoolFolding::init(CfgNode *Node) {

332 Producers.clear();

333 for (Inst &Instr : Node->getInsts()) {

334 // Check whether Instr is a valid producer.

335 Variable *Var = Instr.getDest();

336 if (!Instr.isDeleted() // only consider non-deleted instructions

337 && Var // only instructions with an actual dest var

338 && Var->getType() == IceType_i1 // only bool-type dest vars

339 && getProducerKind(&Instr) != PK_None) { // white-listed instructions

340 Producers[Var->getIndex()] = BoolFoldingEntry(&Instr);

341 }

342 // Check each src variable against the map.

343 for (SizeT I = 0; I < Instr.getSrcSize(); ++I) {

344 Operand *Src = Instr.getSrc(I);

345 SizeT NumVars = Src->getNumVars();

346 for (SizeT J = 0; J < NumVars; ++J) {

347 const Variable *Var = Src->getVar(J);

348 SizeT VarNum = Var->getIndex();

349 if (containsValid(VarNum)) {

350 if (I != 0 // All valid consumers use Var as the first source operand

351 \|\| getConsumerKind(&Instr) == CK_None // must be white-listed

352 \|\| (Producers[VarNum].IsComplex && // complex can't be multi-use

353 Producers[VarNum].NumUses > 0)) {

354 setInvalid(VarNum);

355 continue;

356 }

357 ++Producers[VarNum].NumUses;

358 if (Instr.isLastUse(Var)) {

359 Producers[VarNum].IsLiveOut = false;

360 }

361 }

362 }

363 }

364 }

365 for (auto &I : Producers) {

366 // Ignore entries previously marked invalid.

367 if (I.second.Instr == nullptr)

368 continue;

369 // Disable the producer if its dest may be live beyond this block.

370 if (I.second.IsLiveOut) {

371 setInvalid(I.first);

372 continue;

373 }

374 // Mark as "dead" rather than outright deleting. This is so that

375 // other peephole style optimizations during or before lowering

376 // have access to this instruction in undeleted form. See for

377 // example tryOptimizedCmpxchgCmpBr().

378 I.second.Instr->setDead();

379 }

380 }

381

382 const Inst BoolFolding::getProducerFor(const Operand Opnd) const {

383 auto *Var = llvm::dyn_cast<const Variable>(Opnd);

384 if (Var == nullptr)

385 return nullptr;

386 SizeT VarNum = Var->getIndex();

387 auto Element = Producers.find(VarNum);

388 if (Element == Producers.end())

389 return nullptr;

390 return Element->second.Instr;

391 }

392

393 void BoolFolding::dump(const Cfg *Func) const {

394 if (!ALLOW_DUMP \|\| !Func->isVerbose(IceV_Folding))

395 return;

396 OstreamLocker L(Func->getContext());

397 Ostream &Str = Func->getContext()->getStrDump();

398 for (auto &I : Producers) {

399 if (I.second.Instr == nullptr)

400 continue;

401 Str << "Found foldable producer:\n ";

402 I.second.Instr->dump(Func);

403 Str << "\n";

404 }

405 }

406

407 void TargetX8632::initNodeForLowering(CfgNode *Node) {

408 FoldingInfo.init(Node);

409 FoldingInfo.dump(Func);

410 }

411

412 TargetX8632::TargetX8632(Cfg *Func) : TargetLowering(Func) {

413 static_assert((X86InstructionSet::End - X86InstructionSet::Begin) ==

414 (TargetInstructionSet::X86InstructionSet_End -

415 TargetInstructionSet::X86InstructionSet_Begin),

416 "X86InstructionSet range different from TargetInstructionSet");

417 if (Func->getContext()->getFlags().getTargetInstructionSet() !=

418 TargetInstructionSet::BaseInstructionSet) {

419 InstructionSet = static_cast<X86InstructionSet>(

420 (Func->getContext()->getFlags().getTargetInstructionSet() -

421 TargetInstructionSet::X86InstructionSet_Begin) +

422 X86InstructionSet::Begin);

423 }

424 // TODO: Don't initialize IntegerRegisters and friends every time.

425 // Instead, initialize in some sort of static initializer for the

426 // class.

427 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);

428 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);

429 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);

430 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);

431 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);

432 ScratchRegs.resize(RegX8632::Reg_NUM);

433 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

434 frameptr, isI8, isInt, isFP) \

435 IntegerRegisters[RegX8632::val] = isInt; \

436 IntegerRegistersI8[RegX8632::val] = isI8; \

437 FloatRegisters[RegX8632::val] = isFP; \

438 VectorRegisters[RegX8632::val] = isFP; \

439 ScratchRegs[RegX8632::val] = scratch;

440 REGX8632_TABLE;

441 #undef X

442 TypeToRegisterSet[IceType_void] = InvalidRegisters;

443 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;

444 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;

445 TypeToRegisterSet[IceType_i16] = IntegerRegisters;

446 TypeToRegisterSet[IceType_i32] = IntegerRegisters;

447 TypeToRegisterSet[IceType_i64] = IntegerRegisters;

448 TypeToRegisterSet[IceType_f32] = FloatRegisters;

449 TypeToRegisterSet[IceType_f64] = FloatRegisters;

450 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;

451 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;

452 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;

453 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;

454 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;

455 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;

456 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;

457 }

458

459 void TargetX8632::translateO2() {

460 TimerMarker T(TimerStack::TT_O2, Func);

461

462 if (!Ctx->getFlags().getPhiEdgeSplit()) {

463 // Lower Phi instructions.

464 Func->placePhiLoads();

465 if (Func->hasError())

466 return;

467 Func->placePhiStores();

468 if (Func->hasError())

469 return;

470 Func->deletePhis();

471 if (Func->hasError())

472 return;

473 Func->dump("After Phi lowering");

474 }

475

476 // Address mode optimization.

477 Func->getVMetadata()->init(VMK_SingleDefs);

478 Func->doAddressOpt();

479

480 // Find read-modify-write opportunities. Do this after address mode

481 // optimization so that doAddressOpt() doesn't need to be applied to RMW

482 // instructions as well.

483 findRMW();

484 Func->dump("After RMW transform");

485

486 // Argument lowering

487 Func->doArgLowering();

488

489 // Target lowering. This requires liveness analysis for some parts

490 // of the lowering decisions, such as compare/branch fusing. If

491 // non-lightweight liveness analysis is used, the instructions need

492 // to be renumbered first. TODO: This renumbering should only be

493 // necessary if we're actually calculating live intervals, which we

494 // only do for register allocation.

495 Func->renumberInstructions();

496 if (Func->hasError())

497 return;

498

499 // TODO: It should be sufficient to use the fastest liveness

500 // calculation, i.e. livenessLightweight(). However, for some

501 // reason that slows down the rest of the translation. Investigate.

502 Func->liveness(Liveness_Basic);

503 if (Func->hasError())

504 return;

505 Func->dump("After x86 address mode opt");

506

507 // Disable constant blinding or pooling for load optimization.

508 {

509 BoolFlagSaver B(RandomizationPoolingPaused, true);

510 doLoadOpt();

511 }

512 Func->genCode();

513 if (Func->hasError())

514 return;

515 Func->dump("After x86 codegen");

516

517 // Register allocation. This requires instruction renumbering and

518 // full liveness analysis.

519 Func->renumberInstructions();

520 if (Func->hasError())

521 return;

522 Func->liveness(Liveness_Intervals);

523 if (Func->hasError())

524 return;

525 // Validate the live range computations. The expensive validation

526 // call is deliberately only made when assertions are enabled.

527 assert(Func->validateLiveness());

528 // The post-codegen dump is done here, after liveness analysis and

529 // associated cleanup, to make the dump cleaner and more useful.

530 Func->dump("After initial x8632 codegen");

531 Func->getVMetadata()->init(VMK_All);

532 regAlloc(RAK_Global);

533 if (Func->hasError())

534 return;

535 Func->dump("After linear scan regalloc");

536

537 if (Ctx->getFlags().getPhiEdgeSplit()) {

538 // We need to pause constant blinding or pooling during advanced

539 // phi lowering, unless the lowering assignment has a physical

540 // register for the dest Variable.

541 {

542 BoolFlagSaver B(RandomizationPoolingPaused, true);

543 Func->advancedPhiLowering();

544 }

545 Func->dump("After advanced Phi lowering");

546 }

547

548 // Stack frame mapping.

549 Func->genFrame();

550 if (Func->hasError())

551 return;

552 Func->dump("After stack frame mapping");

553

554 Func->contractEmptyNodes();

555 Func->reorderNodes();

556

557 // Branch optimization. This needs to be done just before code

558 // emission. In particular, no transformations that insert or

559 // reorder CfgNodes should be done after branch optimization. We go

560 // ahead and do it before nop insertion to reduce the amount of work

561 // needed for searching for opportunities.

562 Func->doBranchOpt();

563 Func->dump("After branch optimization");

564

565 // Nop insertion

566 if (Ctx->getFlags().shouldDoNopInsertion()) {

567 Func->doNopInsertion();

568 }

569 }

570

571 void TargetX8632::translateOm1() {

572 TimerMarker T(TimerStack::TT_Om1, Func);

573

574 Func->placePhiLoads();

575 if (Func->hasError())

576 return;

577 Func->placePhiStores();

578 if (Func->hasError())

579 return;

580 Func->deletePhis();

581 if (Func->hasError())

582 return;

583 Func->dump("After Phi lowering");

584

585 Func->doArgLowering();

586

587 Func->genCode();

588 if (Func->hasError())

589 return;

590 Func->dump("After initial x8632 codegen");

591

592 regAlloc(RAK_InfOnly);

593 if (Func->hasError())

594 return;

595 Func->dump("After regalloc of infinite-weight variables");

596

597 Func->genFrame();

598 if (Func->hasError())

599 return;

600 Func->dump("After stack frame mapping");

601

602 // Nop insertion

603 if (Ctx->getFlags().shouldDoNopInsertion()) {

604 Func->doNopInsertion();

605 }

606 }

607

608 namespace {

609

610 bool canRMW(const InstArithmetic *Arith) {

611 Type Ty = Arith->getDest()->getType();

612 // X86 vector instructions write to a register and have no RMW

613 // option.

614 if (isVectorType(Ty))

615 return false;

616 bool isI64 = Ty == IceType_i64;

617

618 switch (Arith->getOp()) {

619 // Not handled for lack of simple lowering:

620 // shift on i64

621 // mul, udiv, urem, sdiv, srem, frem

622 // Not handled for lack of RMW instructions:

623 // fadd, fsub, fmul, fdiv (also vector types)

624 default:

625 return false;

626 case InstArithmetic::Add:

627 case InstArithmetic::Sub:

628 case InstArithmetic::And:

629 case InstArithmetic::Or:

630 case InstArithmetic::Xor:

631 return true;

632 case InstArithmetic::Shl:

633 case InstArithmetic::Lshr:

634 case InstArithmetic::Ashr:

635 return false; // TODO(stichnot): implement

636 return !isI64;

637 }

638 }

639

640 bool isSameMemAddressOperand(const Operand A, const Operand B) {

641 if (A == B)

642 return true;

643 if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) {

644 if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) {

645 return MemA->getBase() == MemB->getBase() &&

646 MemA->getOffset() == MemB->getOffset() &&

647 MemA->getIndex() == MemB->getIndex() &&

648 MemA->getShift() == MemB->getShift() &&

649 MemA->getSegmentRegister() == MemB->getSegmentRegister();

650 }

651 }

652 return false;

653 }

654

655 } // end of anonymous namespace

656

657 void TargetX8632::findRMW() {

658 Func->dump("Before RMW");

659 OstreamLocker L(Func->getContext());

660 Ostream &Str = Func->getContext()->getStrDump();

661 for (CfgNode *Node : Func->getNodes()) {

662 // Walk through the instructions, considering each sequence of 3

663 // instructions, and look for the particular RMW pattern. Note that this

664 // search can be "broken" (false negatives) if there are intervening deleted

665 // instructions, or intervening instructions that could be safely moved out

666 // of the way to reveal an RMW pattern.

667 auto E = Node->getInsts().end();

668 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();

669 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {

670 // Make I3 skip over deleted instructions.

671 while (I3 != E && I3->isDeleted())

672 ++I3;

673 if (I1 == E \|\| I2 == E \|\| I3 == E)

674 continue;

675 assert(!I1->isDeleted());

676 assert(!I2->isDeleted());

677 assert(!I3->isDeleted());

678 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {

679 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {

680 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {

681 // Look for:

682 // a = Load addr

683 // b = <op> a, other

684 // Store b, addr

685 // Change to:

686 // a = Load addr

687 // b = <op> a, other

688 // x = FakeDef

689 // RMW <op>, addr, other, x

690 // b = Store b, addr, x

691 // Note that inferTwoAddress() makes sure setDestNonKillable() gets

692 // called on the updated Store instruction, to avoid liveness

693 // problems later.

694 //

695 // With this transformation, the Store instruction acquires a Dest

696 // variable and is now subject to dead code elimination if there are

697 // no more uses of "b". Variable "x" is a beacon for determining

698 // whether the Store instruction gets dead-code eliminated. If the

699 // Store instruction is eliminated, then it must be the case that

700 // the RMW instruction ends x's live range, and therefore the RMW

701 // instruction will be retained and later lowered. On the other

702 // hand, if the RMW instruction does not end x's live range, then

703 // the Store instruction must still be present, and therefore the

704 // RMW instruction is ignored during lowering because it is

705 // redundant with the Store instruction.

706 //

707 // Note that if "a" has further uses, the RMW transformation may

708 // still trigger, resulting in two loads and one store, which is

709 // worse than the original one load and one store. However, this is

710 // probably rare, and caching probably keeps it just as fast.

711 if (!isSameMemAddressOperand(Load->getSourceAddress(),

712 Store->getAddr()))

713 continue;

714 Operand *ArithSrcFromLoad = Arith->getSrc(0);

715 Operand *ArithSrcOther = Arith->getSrc(1);

716 if (ArithSrcFromLoad != Load->getDest()) {

717 if (!Arith->isCommutative() \|\| ArithSrcOther != Load->getDest())

718 continue;

719 std::swap(ArithSrcFromLoad, ArithSrcOther);

720 }

721 if (Arith->getDest() != Store->getData())

722 continue;

723 if (!canRMW(Arith))

724 continue;

725 if (Func->isVerbose(IceV_RMW)) {

726 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";

727 Load->dump(Func);

728 Str << "\n ";

729 Arith->dump(Func);

730 Str << "\n ";

731 Store->dump(Func);

732 Str << "\n";

733 }

734 Variable *Beacon = Func->makeVariable(IceType_i32);

735 Beacon->setWeight(0);

736 Store->setRmwBeacon(Beacon);

737 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);

738 Node->getInsts().insert(I3, BeaconDef);

739 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create(

740 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());

741 Node->getInsts().insert(I3, RMW);

742 }

743 }

744 }

745 }

746 }

747 }

748

749 namespace {

750

751 // Converts a ConstantInteger32 operand into its constant value, or

752 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

753 uint64_t getConstantMemoryOrder(Operand *Opnd) {

754 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

755 return Integer->getValue();

756 return Intrinsics::MemoryOrderInvalid;

757 }

758

759 // Determines whether the dest of a Load instruction can be folded

760 // into one of the src operands of a 2-operand instruction. This is

761 // true as long as the load dest matches exactly one of the binary

762 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if

763 // the answer is true.

764 bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,

765 Operand &Src0, Operand &Src1) {

766 if (Src0 == LoadDest && Src1 != LoadDest) {

767 Src0 = LoadSrc;

768 return true;

769 }

770 if (Src0 != LoadDest && Src1 == LoadDest) {

771 Src1 = LoadSrc;

772 return true;

773 }

774 return false;

775 }

776

777 } // end of anonymous namespace

778

779 void TargetX8632::doLoadOpt() {

780 for (CfgNode *Node : Func->getNodes()) {

781 Context.init(Node);

782 while (!Context.atEnd()) {

783 Variable *LoadDest = nullptr;

784 Operand *LoadSrc = nullptr;

785 Inst *CurInst = Context.getCur();

786 Inst *Next = Context.getNextInst();

787 // Determine whether the current instruction is a Load

788 // instruction or equivalent.

789 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

790 // An InstLoad always qualifies.

791 LoadDest = Load->getDest();

792 const bool DoLegalize = false;

793 LoadSrc = formMemoryOperand(Load->getSourceAddress(),

794 LoadDest->getType(), DoLegalize);

795 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {

796 // An AtomicLoad intrinsic qualifies as long as it has a valid

797 // memory ordering, and can be implemented in a single

798 // instruction (i.e., not i64).

799 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;

800 if (ID == Intrinsics::AtomicLoad &&

801 Intrin->getDest()->getType() != IceType_i64 &&

802 Intrinsics::isMemoryOrderValid(

803 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {

804 LoadDest = Intrin->getDest();

805 const bool DoLegalize = false;

806 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),

807 DoLegalize);

808 }

809 }

810 // A Load instruction can be folded into the following

811 // instruction only if the following instruction ends the Load's

812 // Dest variable's live range.

813 if (LoadDest && Next && Next->isLastUse(LoadDest)) {

814 assert(LoadSrc);

815 Inst *NewInst = nullptr;

816 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {

817 Operand *Src0 = Arith->getSrc(0);

818 Operand *Src1 = Arith->getSrc(1);

819 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

820 NewInst = InstArithmetic::create(Func, Arith->getOp(),

821 Arith->getDest(), Src0, Src1);

822 }

823 } else if (auto *Icmp = llvm::dyn_cast<InstIcmp>(Next)) {

824 Operand *Src0 = Icmp->getSrc(0);

825 Operand *Src1 = Icmp->getSrc(1);

826 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

827 NewInst = InstIcmp::create(Func, Icmp->getCondition(),

828 Icmp->getDest(), Src0, Src1);

829 }

830 } else if (auto *Fcmp = llvm::dyn_cast<InstFcmp>(Next)) {

831 Operand *Src0 = Fcmp->getSrc(0);

832 Operand *Src1 = Fcmp->getSrc(1);

833 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

834 NewInst = InstFcmp::create(Func, Fcmp->getCondition(),

835 Fcmp->getDest(), Src0, Src1);

836 }

837 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {

838 Operand *Src0 = Select->getTrueOperand();

839 Operand *Src1 = Select->getFalseOperand();

840 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {

841 NewInst = InstSelect::create(Func, Select->getDest(),

842 Select->getCondition(), Src0, Src1);

843 }

844 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {

845 // The load dest can always be folded into a Cast

846 // instruction.

847 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));

848 if (Src0 == LoadDest) {

849 NewInst = InstCast::create(Func, Cast->getCastKind(),

850 Cast->getDest(), LoadSrc);

851 }

852 }

853 if (NewInst) {

854 CurInst->setDeleted();

855 Next->setDeleted();

856 Context.insert(NewInst);

857 // Update NewInst->LiveRangesEnded so that target lowering

858 // may benefit. Also update NewInst->HasSideEffects.

859 NewInst->spliceLivenessInfo(Next, CurInst);

860 }

861 }

862 Context.advanceCur();

863 Context.advanceNext();

864 }

865 }

866 Func->dump("After load optimization");

867 }

868

869 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {

870 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {

871 return Br->optimizeBranch(NextNode);

872 }

873 return false;

874 }

875

876 IceString TargetX8632::RegNames[] = {

877 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

878 frameptr, isI8, isInt, isFP) \

879 name,

880 REGX8632_TABLE

881 #undef X	147 #undef X

882 };	148 };

883	149

884 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum, Type Ty) {	150 constexpr size_t MachineTraits<TargetX8632>::TableTypeX8632AttributesSize =

885 if (Ty == IceType_void)	151 llvm::array_lengthof(TableTypeX8632Attributes);

886 Ty = IceType_i32;

887 if (PhysicalRegisters[Ty].empty())

888 PhysicalRegisters[Ty].resize(RegX8632::Reg_NUM);

889 assert(RegNum < PhysicalRegisters[Ty].size());

890 Variable *Reg = PhysicalRegisters[Ty][RegNum];

891 if (Reg == nullptr) {

892 Reg = Func->makeVariable(Ty);

893 Reg->setRegNum(RegNum);

894 PhysicalRegisters[Ty][RegNum] = Reg;

895 // Specially mark esp as an "argument" so that it is considered

896 // live upon function entry.

897 if (RegNum == RegX8632::Reg_esp) {

898 Func->addImplicitArg(Reg);

899 Reg->setIgnoreLiveness();

900 }

901 }

902 return Reg;

903 }

904	152

905 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {	153 const uint32_t MachineTraits<TargetX8632>::X86_STACK_ALIGNMENT_BYTES = 16;

906 assert(RegNum < RegX8632::Reg_NUM);	154 } // end of namespace X86Internal

907 static IceString RegNames8[] = {

908 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

909 frameptr, isI8, isInt, isFP) \

910 name8,

911 REGX8632_TABLE

912 #undef X

913 };

914 static IceString RegNames16[] = {

915 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

916 frameptr, isI8, isInt, isFP) \

917 name16,

918 REGX8632_TABLE

919 #undef X

920 };

921 switch (Ty) {

922 case IceType_i1:

923 case IceType_i8:

924 return RegNames8[RegNum];

925 case IceType_i16:

926 return RegNames16[RegNum];

927 default:

928 return RegNames[RegNum];

929 }

930 }

931	155

932 void TargetX8632::emitVariable(const Variable *Var) const {	156 TargetX8632 TargetX8632::create(Cfg Func) {

933 Ostream &Str = Ctx->getStrEmit();	157 return X86Internal::TargetX86Base<TargetX8632>::create(Func);

934 if (Var->hasReg()) {

935 Str << "%" << getRegName(Var->getRegNum(), Var->getType());

936 return;

937 }

938 if (Var->getWeight().isInf()) {

939 llvm_unreachable("Infinite-weight Variable has no register assigned");

940 }

941 int32_t Offset = Var->getStackOffset();

942 if (!hasFramePointer())

943 Offset += getStackAdjustment();

944 if (Offset)

945 Str << Offset;

946 const Type FrameSPTy = IceType_i32;

947 Str << "(%" << getRegName(getFrameOrStackReg(), FrameSPTy) << ")";

948 }

949

950 X8632::Address TargetX8632::stackVarToAsmOperand(const Variable *Var) const {

951 if (Var->hasReg())

952 llvm_unreachable("Stack Variable has a register assigned");

953 if (Var->getWeight().isInf()) {

954 llvm_unreachable("Infinite-weight Variable has no register assigned");

955 }

956 int32_t Offset = Var->getStackOffset();

957 if (!hasFramePointer())

958 Offset += getStackAdjustment();

959 return X8632::Address(RegX8632::getEncodedGPR(getFrameOrStackReg()), Offset);

960 }

961

962 void TargetX8632::lowerArguments() {

963 VarList &Args = Func->getArgs();

964 // The first four arguments of vector type, regardless of their

965 // position relative to the other arguments in the argument list, are

966 // passed in registers xmm0 - xmm3.

967 unsigned NumXmmArgs = 0;

968

969 Context.init(Func->getEntryNode());

970 Context.setInsertPoint(Context.getCur());

971

972 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;

973 ++I) {

974 Variable *Arg = Args[I];

975 Type Ty = Arg->getType();

976 if (!isVectorType(Ty))

977 continue;

978 // Replace Arg in the argument list with the home register. Then

979 // generate an instruction in the prolog to copy the home register

980 // to the assigned location of Arg.

981 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;

982 ++NumXmmArgs;

983 Variable *RegisterArg = Func->makeVariable(Ty);

984 if (ALLOW_DUMP)

985 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

986 RegisterArg->setRegNum(RegNum);

987 RegisterArg->setIsArg();

988 Arg->setIsArg(false);

989

990 Args[I] = RegisterArg;

991 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

992 }

993 }

994

995 // Helper function for addProlog().

996 //

997 // This assumes Arg is an argument passed on the stack. This sets the

998 // frame offset for Arg and updates InArgsSizeBytes according to Arg's

999 // width. For an I64 arg that has been split into Lo and Hi components,

1000 // it calls itself recursively on the components, taking care to handle

1001 // Lo first because of the little-endian architecture. Lastly, this

1002 // function generates an instruction to copy Arg into its assigned

1003 // register if applicable.

1004 void TargetX8632::finishArgumentLowering(Variable Arg, Variable FramePtr,

1005 size_t BasicFrameOffset,

1006 size_t &InArgsSizeBytes) {

1007 Variable *Lo = Arg->getLo();

1008 Variable *Hi = Arg->getHi();

1009 Type Ty = Arg->getType();

1010 if (Lo && Hi && Ty == IceType_i64) {

1011 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

1012 assert(Hi->getType() != IceType_i64); // don't want infinite recursion

1013 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

1014 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

1015 return;

1016 }

1017 if (isVectorType(Ty)) {

1018 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);

1019 }

1020 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

1021 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

1022 if (Arg->hasReg()) {

1023 assert(Ty != IceType_i64);

1024 OperandX8632Mem *Mem = OperandX8632Mem::create(

1025 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));

1026 if (isVectorType(Arg->getType())) {

1027 _movp(Arg, Mem);

1028 } else {

1029 _mov(Arg, Mem);

1030 }

1031 // This argument-copying instruction uses an explicit

1032 // OperandX8632Mem operand instead of a Variable, so its

1033 // fill-from-stack operation has to be tracked separately for

1034 // statistics.

1035 Ctx->statsUpdateFills();

1036 }

1037 }

1038

1039 Type TargetX8632::stackSlotType() { return IceType_i32; }

1040

1041 void TargetX8632::addProlog(CfgNode *Node) {

1042 // Stack frame layout:

1043 //

1044 // +------------------------+

1045 // \| 1. return address \|

1046 // +------------------------+

1047 // \| 2. preserved registers \|

1048 // +------------------------+

1049 // \| 3. padding \|

1050 // +------------------------+

1051 // \| 4. global spill area \|

1052 // +------------------------+

1053 // \| 5. padding \|

1054 // +------------------------+

1055 // \| 6. local spill area \|

1056 // +------------------------+

1057 // \| 7. padding \|

1058 // +------------------------+

1059 // \| 8. allocas \|

1060 // +------------------------+

1061 //

1062 // The following variables record the size in bytes of the given areas:

1063 // * X86_RET_IP_SIZE_BYTES: area 1

1064 // * PreservedRegsSizeBytes: area 2

1065 // * SpillAreaPaddingBytes: area 3

1066 // * GlobalsSize: area 4

1067 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5

1068 // * LocalsSpillAreaSize: area 6

1069 // * SpillAreaSizeBytes: areas 3 - 7

1070

1071 // Determine stack frame offsets for each Variable without a

1072 // register assignment. This can be done as one variable per stack

1073 // slot. Or, do coalescing by running the register allocator again

1074 // with an infinite set of registers (as a side effect, this gives

1075 // variables a second chance at physical register assignment).

1076 //

1077 // A middle ground approach is to leverage sparsity and allocate one

1078 // block of space on the frame for globals (variables with

1079 // multi-block lifetime), and one block to share for locals

1080 // (single-block lifetime).

1081

1082 Context.init(Node);

1083 Context.setInsertPoint(Context.getCur());

1084

1085 llvm::SmallBitVector CalleeSaves =

1086 getRegisterSet(RegSet_CalleeSave, RegSet_None);

1087 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

1088 VarList SortedSpilledVariables, VariablesLinkedToSpillSlots;

1089 size_t GlobalsSize = 0;

1090 // If there is a separate locals area, this represents that area.

1091 // Otherwise it counts any variable not counted by GlobalsSize.

1092 SpillAreaSizeBytes = 0;

1093 // If there is a separate locals area, this specifies the alignment

1094 // for it.

1095 uint32_t LocalsSlotsAlignmentBytes = 0;

1096 // The entire spill locations area gets aligned to largest natural

1097 // alignment of the variables that have a spill slot.

1098 uint32_t SpillAreaAlignmentBytes = 0;

1099 // A spill slot linked to a variable with a stack slot should reuse

1100 // that stack slot.

1101 std::function<bool(Variable *)> TargetVarHook =

1102 [&VariablesLinkedToSpillSlots](Variable *Var) {

1103 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) {

1104 assert(Var->getWeight().isZero());

1105 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {

1106 VariablesLinkedToSpillSlots.push_back(Var);

1107 return true;

1108 }

1109 }

1110 return false;

1111 };

1112

1113 // Compute the list of spilled variables and bounds for GlobalsSize, etc.

1114 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

1115 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

1116 &LocalsSlotsAlignmentBytes, TargetVarHook);

1117 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

1118 SpillAreaSizeBytes += GlobalsSize;

1119

1120 // Add push instructions for preserved registers.

1121 uint32_t NumCallee = 0;

1122 size_t PreservedRegsSizeBytes = 0;

1123 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

1124 if (CalleeSaves[i] && RegsUsed[i]) {

1125 ++NumCallee;

1126 PreservedRegsSizeBytes += 4;

1127 _push(getPhysicalRegister(i));

1128 }

1129 }

1130 Ctx->statsUpdateRegistersSaved(NumCallee);

1131

1132 // Generate "push ebp; mov ebp, esp"

1133 if (IsEbpBasedFrame) {

1134 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

1135 .count() == 0);

1136 PreservedRegsSizeBytes += 4;

1137 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);

1138 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

1139 _push(ebp);

1140 _mov(ebp, esp);

1141 // Keep ebp live for late-stage liveness analysis

1142 // (e.g. asm-verbose mode).

1143 Context.insert(InstFakeUse::create(Func, ebp));

1144 }

1145

1146 // Align the variables area. SpillAreaPaddingBytes is the size of

1147 // the region after the preserved registers and before the spill areas.

1148 // LocalsSlotsPaddingBytes is the amount of padding between the globals

1149 // and locals area if they are separate.

1150 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);

1151 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

1152 uint32_t SpillAreaPaddingBytes = 0;

1153 uint32_t LocalsSlotsPaddingBytes = 0;

1154 alignStackSpillAreas(X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes,

1155 SpillAreaAlignmentBytes, GlobalsSize,

1156 LocalsSlotsAlignmentBytes, &SpillAreaPaddingBytes,

1157 &LocalsSlotsPaddingBytes);

1158 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

1159 uint32_t GlobalsAndSubsequentPaddingSize =

1160 GlobalsSize + LocalsSlotsPaddingBytes;

1161

1162 // Align esp if necessary.

1163 if (NeedsStackAlignment) {

1164 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

1165 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

1166 SpillAreaSizeBytes = StackSize - StackOffset;

1167 }

1168

1169 // Generate "sub esp, SpillAreaSizeBytes"

1170 if (SpillAreaSizeBytes)

1171 _sub(getPhysicalRegister(RegX8632::Reg_esp),

1172 Ctx->getConstantInt32(SpillAreaSizeBytes));

1173 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

1174

1175 resetStackAdjustment();

1176

1177 // Fill in stack offsets for stack args, and copy args into registers

1178 // for those that were register-allocated. Args are pushed right to

1179 // left, so Arg[0] is closest to the stack/frame pointer.

1180 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

1181 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;

1182 if (!IsEbpBasedFrame)

1183 BasicFrameOffset += SpillAreaSizeBytes;

1184

1185 const VarList &Args = Func->getArgs();

1186 size_t InArgsSizeBytes = 0;

1187 unsigned NumXmmArgs = 0;

1188 for (Variable *Arg : Args) {

1189 // Skip arguments passed in registers.

1190 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {

1191 ++NumXmmArgs;

1192 continue;

1193 }

1194 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

1195 }

1196

1197 // Fill in stack offsets for locals.

1198 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

1199 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

1200 IsEbpBasedFrame);

1201 // Assign stack offsets to variables that have been linked to spilled

1202 // variables.

1203 for (Variable *Var : VariablesLinkedToSpillSlots) {

1204 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo();

1205 Var->setStackOffset(Linked->getStackOffset());

1206 }

1207 this->HasComputedFrame = true;

1208

1209 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {

1210 OstreamLocker L(Func->getContext());

1211 Ostream &Str = Func->getContext()->getStrDump();

1212

1213 Str << "Stack layout:\n";

1214 uint32_t EspAdjustmentPaddingSize =

1215 SpillAreaSizeBytes - LocalsSpillAreaSize -

1216 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

1217 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

1218 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"

1219 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

1220 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

1221 << " globals spill area = " << GlobalsSize << " bytes\n"

1222 << " globals-locals spill areas intermediate padding = "

1223 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

1224 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

1225 << " esp alignment padding = " << EspAdjustmentPaddingSize

1226 << " bytes\n";

1227

1228 Str << "Stack details:\n"

1229 << " esp adjustment = " << SpillAreaSizeBytes << " bytes\n"

1230 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

1231 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

1232 << " bytes\n"

1233 << " is ebp based = " << IsEbpBasedFrame << "\n";

1234 }

1235 }

1236

1237 void TargetX8632::addEpilog(CfgNode *Node) {

1238 InstList &Insts = Node->getInsts();

1239 InstList::reverse_iterator RI, E;

1240 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

1241 if (llvm::isa<InstX8632Ret>(*RI))

1242 break;

1243 }

1244 if (RI == E)

1245 return;

1246

1247 // Convert the reverse_iterator position into its corresponding

1248 // (forward) iterator position.

1249 InstList::iterator InsertPoint = RI.base();

1250 --InsertPoint;

1251 Context.init(Node);

1252 Context.setInsertPoint(InsertPoint);

1253

1254 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

1255 if (IsEbpBasedFrame) {

1256 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);

1257 // For late-stage liveness analysis (e.g. asm-verbose mode),

1258 // adding a fake use of esp before the assignment of esp=ebp keeps

1259 // previous esp adjustments from being dead-code eliminated.

1260 Context.insert(InstFakeUse::create(Func, esp));

1261 _mov(esp, ebp);

1262 _pop(ebp);

1263 } else {

1264 // add esp, SpillAreaSizeBytes

1265 if (SpillAreaSizeBytes)

1266 _add(esp, Ctx->getConstantInt32(SpillAreaSizeBytes));

1267 }

1268

1269 // Add pop instructions for preserved registers.

1270 llvm::SmallBitVector CalleeSaves =

1271 getRegisterSet(RegSet_CalleeSave, RegSet_None);

1272 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

1273 SizeT j = CalleeSaves.size() - i - 1;

1274 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)

1275 continue;

1276 if (CalleeSaves[j] && RegsUsed[j]) {

1277 _pop(getPhysicalRegister(j));

1278 }

1279 }

1280

1281 if (!Ctx->getFlags().getUseSandboxing())

1282 return;

1283 // Change the original ret instruction into a sandboxed return sequence.

1284 // t:ecx = pop

1285 // bundle_lock

1286 // and t, ~31

1287 // jmp *t

1288 // bundle_unlock

1289 // FakeUse <original_ret_operand>

1290 const SizeT BundleSize = 1

1291 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

1292 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

1293 _pop(T_ecx);

1294 _bundle_lock();

1295 _and(T_ecx, Ctx->getConstantInt32(~(BundleSize - 1)));

1296 _jmp(T_ecx);

1297 _bundle_unlock();

1298 if (RI->getSrcSize()) {

1299 Variable *RetValue = llvm::cast<Variable>(RI->getSrc(0));

1300 Context.insert(InstFakeUse::create(Func, RetValue));

1301 }

1302 RI->setDeleted();

1303 }

1304

1305 void TargetX8632::split64(Variable *Var) {

1306 switch (Var->getType()) {

1307 default:

1308 return;

1309 case IceType_i64:

1310 // TODO: Only consider F64 if we need to push each half when

1311 // passing as an argument to a function call. Note that each half

1312 // is still typed as I32.

1313 case IceType_f64:

1314 break;

1315 }

1316 Variable *Lo = Var->getLo();

1317 Variable *Hi = Var->getHi();

1318 if (Lo) {

1319 assert(Hi);

1320 return;

1321 }

1322 assert(Hi == nullptr);

1323 Lo = Func->makeVariable(IceType_i32);

1324 Hi = Func->makeVariable(IceType_i32);

1325 if (ALLOW_DUMP) {

1326 Lo->setName(Func, Var->getName(Func) + "__lo");

1327 Hi->setName(Func, Var->getName(Func) + "__hi");

1328 }

1329 Var->setLoHi(Lo, Hi);

1330 if (Var->getIsArg()) {

1331 Lo->setIsArg();

1332 Hi->setIsArg();

1333 }

1334 }

1335

1336 Operand TargetX8632::loOperand(Operand Operand) {

1337 assert(Operand->getType() == IceType_i64 \|\|

1338 Operand->getType() == IceType_f64);

1339 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

1340 return Operand;

1341 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1342 split64(Var);

1343 return Var->getLo();

1344 }

1345 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1346 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(

1347 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));

1348 return legalize(ConstInt);

1349 }

1350 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

1351 OperandX8632Mem *MemOperand = OperandX8632Mem::create(

1352 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),

1353 Mem->getShift(), Mem->getSegmentRegister());

1354 // Test if we should randomize or pool the offset, if so randomize it or

1355 // pool it then create mem operand with the blinded/pooled constant.

1356 // Otherwise, return the mem operand as ordinary mem operand.

1357 return legalize(MemOperand);

1358 }

1359 llvm_unreachable("Unsupported operand type");

1360 return nullptr;

1361 }

1362

1363 Operand TargetX8632::hiOperand(Operand Operand) {

1364 assert(Operand->getType() == IceType_i64 \|\|

1365 Operand->getType() == IceType_f64);

1366 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)

1367 return Operand;

1368 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {

1369 split64(Var);

1370 return Var->getHi();

1371 }

1372 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {

1373 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(

1374 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));

1375 // check if we need to blind/pool the constant

1376 return legalize(ConstInt);

1377 }

1378 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {

1379 Constant *Offset = Mem->getOffset();

1380 if (Offset == nullptr) {

1381 Offset = Ctx->getConstantInt32(4);

1382 } else if (ConstantInteger32 *IntOffset =

1383 llvm::dyn_cast<ConstantInteger32>(Offset)) {

1384 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());

1385 } else if (ConstantRelocatable *SymOffset =

1386 llvm::dyn_cast<ConstantRelocatable>(Offset)) {

1387 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));

1388 Offset =

1389 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),

1390 SymOffset->getSuppressMangling());

1391 }

1392 OperandX8632Mem *MemOperand = OperandX8632Mem::create(

1393 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),

1394 Mem->getShift(), Mem->getSegmentRegister());

1395 // Test if the Offset is an eligible i32 constants for randomization and

1396 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem

1397 // operand.

1398 return legalize(MemOperand);

1399 }

1400 llvm_unreachable("Unsupported operand type");

1401 return nullptr;

1402 }

1403

1404 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,

1405 RegSetMask Exclude) const {

1406 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);

1407

1408 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

1409 frameptr, isI8, isInt, isFP) \

1410 if (scratch && (Include & RegSet_CallerSave)) \

1411 Registers[RegX8632::val] = true; \

1412 if (preserved && (Include & RegSet_CalleeSave)) \

1413 Registers[RegX8632::val] = true; \

1414 if (stackptr && (Include & RegSet_StackPointer)) \

1415 Registers[RegX8632::val] = true; \

1416 if (frameptr && (Include & RegSet_FramePointer)) \

1417 Registers[RegX8632::val] = true; \

1418 if (scratch && (Exclude & RegSet_CallerSave)) \

1419 Registers[RegX8632::val] = false; \

1420 if (preserved && (Exclude & RegSet_CalleeSave)) \

1421 Registers[RegX8632::val] = false; \

1422 if (stackptr && (Exclude & RegSet_StackPointer)) \

1423 Registers[RegX8632::val] = false; \

1424 if (frameptr && (Exclude & RegSet_FramePointer)) \

1425 Registers[RegX8632::val] = false;

1426

1427 REGX8632_TABLE

1428

1429 #undef X

1430

1431 return Registers;

1432 }

1433

1434 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

1435 IsEbpBasedFrame = true;

1436 // Conservatively require the stack to be aligned. Some stack

1437 // adjustment operations implemented below assume that the stack is

1438 // aligned before the alloca. All the alloca code ensures that the

1439 // stack alignment is preserved after the alloca. The stack alignment

1440 // restriction can be relaxed in some cases.

1441 NeedsStackAlignment = true;

1442

1443 // TODO(stichnot): minimize the number of adjustments of esp, etc.

1444 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

1445 Operand *TotalSize = legalize(Inst->getSizeInBytes());

1446 Variable *Dest = Inst->getDest();

1447 uint32_t AlignmentParam = Inst->getAlignInBytes();

1448 // For default align=0, set it to the real value 1, to avoid any

1449 // bit-manipulation problems below.

1450 AlignmentParam = std::max(AlignmentParam, 1u);

1451

1452 // LLVM enforces power of 2 alignment.

1453 assert(llvm::isPowerOf2_32(AlignmentParam));

1454 assert(llvm::isPowerOf2_32(X86_STACK_ALIGNMENT_BYTES));

1455

1456 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);

1457 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {

1458 _and(esp, Ctx->getConstantInt32(-Alignment));

1459 }

1460 if (const auto *ConstantTotalSize =

1461 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {

1462 uint32_t Value = ConstantTotalSize->getValue();

1463 Value = Utils::applyAlignment(Value, Alignment);

1464 _sub(esp, Ctx->getConstantInt32(Value));

1465 } else {

1466 // Non-constant sizes need to be adjusted to the next highest

1467 // multiple of the required alignment at runtime.

1468 Variable *T = makeReg(IceType_i32);

1469 _mov(T, TotalSize);

1470 _add(T, Ctx->getConstantInt32(Alignment - 1));

1471 _and(T, Ctx->getConstantInt32(-Alignment));

1472 _sub(esp, T);

1473 }

1474 _mov(Dest, esp);

1475 }

1476

1477 // Strength-reduce scalar integer multiplication by a constant (for

1478 // i32 or narrower) for certain constants. The lea instruction can be

1479 // used to multiply by 3, 5, or 9, and the lsh instruction can be used

1480 // to multiply by powers of 2. These can be combined such that

1481 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,

1482 // combined with left-shifting by 2.

1483 bool TargetX8632::optimizeScalarMul(Variable Dest, Operand Src0,

1484 int32_t Src1) {

1485 // Disable this optimization for Om1 and O0, just to keep things

1486 // simple there.

1487 if (Ctx->getFlags().getOptLevel() < Opt_1)

1488 return false;

1489 Type Ty = Dest->getType();

1490 Variable *T = nullptr;

1491 if (Src1 == -1) {

1492 _mov(T, Src0);

1493 _neg(T);

1494 _mov(Dest, T);

1495 return true;

1496 }

1497 if (Src1 == 0) {

1498 _mov(Dest, Ctx->getConstantZero(Ty));

1499 return true;

1500 }

1501 if (Src1 == 1) {

1502 _mov(T, Src0);

1503 _mov(Dest, T);

1504 return true;

1505 }

1506 // Don't bother with the edge case where Src1 == MININT.

1507 if (Src1 == -Src1)

1508 return false;

1509 const bool Src1IsNegative = Src1 < 0;

1510 if (Src1IsNegative)

1511 Src1 = -Src1;

1512 uint32_t Count9 = 0;

1513 uint32_t Count5 = 0;

1514 uint32_t Count3 = 0;

1515 uint32_t Count2 = 0;

1516 uint32_t CountOps = 0;

1517 while (Src1 > 1) {

1518 if (Src1 % 9 == 0) {

1519 ++CountOps;

1520 ++Count9;

1521 Src1 /= 9;

1522 } else if (Src1 % 5 == 0) {

1523 ++CountOps;

1524 ++Count5;

1525 Src1 /= 5;

1526 } else if (Src1 % 3 == 0) {

1527 ++CountOps;

1528 ++Count3;

1529 Src1 /= 3;

1530 } else if (Src1 % 2 == 0) {

1531 if (Count2 == 0)

1532 ++CountOps;

1533 ++Count2;

1534 Src1 /= 2;

1535 } else {

1536 return false;

1537 }

1538 }

1539 // Lea optimization only works for i16 and i32 types, not i8.

1540 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 \|\| Count5 \|\| Count9))

1541 return false;

1542 // Limit the number of lea/shl operations for a single multiply, to

1543 // a somewhat arbitrary choice of 3.

1544 const uint32_t MaxOpsForOptimizedMul = 3;

1545 if (CountOps > MaxOpsForOptimizedMul)

1546 return false;

1547 _mov(T, Src0);

1548 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1549 for (uint32_t i = 0; i < Count9; ++i) {

1550 const uint16_t Shift = 3; // log2(9-1)

1551 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

1552 _set_dest_nonkillable();

1553 }

1554 for (uint32_t i = 0; i < Count5; ++i) {

1555 const uint16_t Shift = 2; // log2(5-1)

1556 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

1557 _set_dest_nonkillable();

1558 }

1559 for (uint32_t i = 0; i < Count3; ++i) {

1560 const uint16_t Shift = 1; // log2(3-1)

1561 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift));

1562 _set_dest_nonkillable();

1563 }

1564 if (Count2) {

1565 _shl(T, Ctx->getConstantInt(Ty, Count2));

1566 }

1567 if (Src1IsNegative)

1568 _neg(T);

1569 _mov(Dest, T);

1570 return true;

1571 }

1572

1573 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {

1574 Variable *Dest = Inst->getDest();

1575 Operand *Src0 = legalize(Inst->getSrc(0));

1576 Operand *Src1 = legalize(Inst->getSrc(1));

1577 if (Inst->isCommutative()) {

1578 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))

1579 std::swap(Src0, Src1);

1580 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))

1581 std::swap(Src0, Src1);

1582 }

1583 if (Dest->getType() == IceType_i64) {

1584 // These helper-call-involved instructions are lowered in this

1585 // separate switch. This is because loOperand() and hiOperand()

1586 // may insert redundant instructions for constant blinding and

1587 // pooling. Such redundant instructions will fail liveness analysis

1588 // under -Om1 setting. And, actually these arguments do not need

1589 // to be processed with loOperand() and hiOperand() to be used.

1590 switch (Inst->getOp()) {

1591 case InstArithmetic::Udiv: {

1592 const SizeT MaxSrcs = 2;

1593 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);

1594 Call->addArg(Inst->getSrc(0));

1595 Call->addArg(Inst->getSrc(1));

1596 lowerCall(Call);

1597 return;

1598 }

1599 case InstArithmetic::Sdiv: {

1600 const SizeT MaxSrcs = 2;

1601 InstCall *Call = makeHelperCall(H_sdiv_i64, Dest, MaxSrcs);

1602 Call->addArg(Inst->getSrc(0));

1603 Call->addArg(Inst->getSrc(1));

1604 lowerCall(Call);

1605 return;

1606 }

1607 case InstArithmetic::Urem: {

1608 const SizeT MaxSrcs = 2;

1609 InstCall *Call = makeHelperCall(H_urem_i64, Dest, MaxSrcs);

1610 Call->addArg(Inst->getSrc(0));

1611 Call->addArg(Inst->getSrc(1));

1612 lowerCall(Call);

1613 return;

1614 }

1615 case InstArithmetic::Srem: {

1616 const SizeT MaxSrcs = 2;

1617 InstCall *Call = makeHelperCall(H_srem_i64, Dest, MaxSrcs);

1618 Call->addArg(Inst->getSrc(0));

1619 Call->addArg(Inst->getSrc(1));

1620 lowerCall(Call);

1621 return;

1622 }

1623 default:

1624 break;

1625 }

1626

1627 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

1628 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

1629 Operand *Src0Lo = loOperand(Src0);

1630 Operand *Src0Hi = hiOperand(Src0);

1631 Operand *Src1Lo = loOperand(Src1);

1632 Operand *Src1Hi = hiOperand(Src1);

1633 Variable T_Lo = nullptr, T_Hi = nullptr;

1634 switch (Inst->getOp()) {

1635 case InstArithmetic::_num:

1636 llvm_unreachable("Unknown arithmetic operator");

1637 break;

1638 case InstArithmetic::Add:

1639 _mov(T_Lo, Src0Lo);

1640 _add(T_Lo, Src1Lo);

1641 _mov(DestLo, T_Lo);

1642 _mov(T_Hi, Src0Hi);

1643 _adc(T_Hi, Src1Hi);

1644 _mov(DestHi, T_Hi);

1645 break;

1646 case InstArithmetic::And:

1647 _mov(T_Lo, Src0Lo);

1648 _and(T_Lo, Src1Lo);

1649 _mov(DestLo, T_Lo);

1650 _mov(T_Hi, Src0Hi);

1651 _and(T_Hi, Src1Hi);

1652 _mov(DestHi, T_Hi);

1653 break;

1654 case InstArithmetic::Or:

1655 _mov(T_Lo, Src0Lo);

1656 _or(T_Lo, Src1Lo);

1657 _mov(DestLo, T_Lo);

1658 _mov(T_Hi, Src0Hi);

1659 _or(T_Hi, Src1Hi);

1660 _mov(DestHi, T_Hi);

1661 break;

1662 case InstArithmetic::Xor:

1663 _mov(T_Lo, Src0Lo);

1664 _xor(T_Lo, Src1Lo);

1665 _mov(DestLo, T_Lo);

1666 _mov(T_Hi, Src0Hi);

1667 _xor(T_Hi, Src1Hi);

1668 _mov(DestHi, T_Hi);

1669 break;

1670 case InstArithmetic::Sub:

1671 _mov(T_Lo, Src0Lo);

1672 _sub(T_Lo, Src1Lo);

1673 _mov(DestLo, T_Lo);

1674 _mov(T_Hi, Src0Hi);

1675 _sbb(T_Hi, Src1Hi);

1676 _mov(DestHi, T_Hi);

1677 break;

1678 case InstArithmetic::Mul: {

1679 Variable T_1 = nullptr, T_2 = nullptr, *T_3 = nullptr;

1680 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);

1681 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);

1682 // gcc does the following:

1683 // a=b*c ==>

1684 // t1 = b.hi; t1 *=(imul) c.lo

1685 // t2 = c.hi; t2 *=(imul) b.lo

1686 // t3:eax = b.lo

1687 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo

1688 // a.lo = t4.lo

1689 // t4.hi += t1

1690 // t4.hi += t2

1691 // a.hi = t4.hi

1692 // The mul instruction cannot take an immediate operand.

1693 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Mem);

1694 _mov(T_1, Src0Hi);

1695 _imul(T_1, Src1Lo);

1696 _mov(T_2, Src1Hi);

1697 _imul(T_2, Src0Lo);

1698 _mov(T_3, Src0Lo, RegX8632::Reg_eax);

1699 _mul(T_4Lo, T_3, Src1Lo);

1700 // The mul instruction produces two dest variables, edx:eax. We

1701 // create a fake definition of edx to account for this.

1702 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));

1703 _mov(DestLo, T_4Lo);

1704 _add(T_4Hi, T_1);

1705 _add(T_4Hi, T_2);

1706 _mov(DestHi, T_4Hi);

1707 } break;

1708 case InstArithmetic::Shl: {

1709 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.

1710 // gcc does the following:

1711 // a=b<<c ==>

1712 // t1:ecx = c.lo & 0xff

1713 // t2 = b.lo

1714 // t3 = b.hi

1715 // t3 = shld t3, t2, t1

1716 // t2 = shl t2, t1

1717 // test t1, 0x20

1718 // je L1

1719 // use(t3)

1720 // t3 = t2

1721 // t2 = 0

1722 // L1:

1723 // a.lo = t2

1724 // a.hi = t3

1725 Variable T_1 = nullptr, T_2 = nullptr, *T_3 = nullptr;

1726 Constant *BitTest = Ctx->getConstantInt32(0x20);

1727 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1728 InstX8632Label *Label = InstX8632Label::create(Func, this);

1729 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1730 _mov(T_2, Src0Lo);

1731 _mov(T_3, Src0Hi);

1732 _shld(T_3, T_2, T_1);

1733 _shl(T_2, T_1);

1734 _test(T_1, BitTest);

1735 _br(CondX86::Br_e, Label);

1736 // T_2 and T_3 are being assigned again because of the

1737 // intra-block control flow, so we need the _mov_nonkillable

1738 // variant to avoid liveness problems.

1739 _mov_nonkillable(T_3, T_2);

1740 _mov_nonkillable(T_2, Zero);

1741 Context.insert(Label);

1742 _mov(DestLo, T_2);

1743 _mov(DestHi, T_3);

1744 } break;

1745 case InstArithmetic::Lshr: {

1746 // a=b>>c (unsigned) ==>

1747 // t1:ecx = c.lo & 0xff

1748 // t2 = b.lo

1749 // t3 = b.hi

1750 // t2 = shrd t2, t3, t1

1751 // t3 = shr t3, t1

1752 // test t1, 0x20

1753 // je L1

1754 // use(t2)

1755 // t2 = t3

1756 // t3 = 0

1757 // L1:

1758 // a.lo = t2

1759 // a.hi = t3

1760 Variable T_1 = nullptr, T_2 = nullptr, *T_3 = nullptr;

1761 Constant *BitTest = Ctx->getConstantInt32(0x20);

1762 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1763 InstX8632Label *Label = InstX8632Label::create(Func, this);

1764 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1765 _mov(T_2, Src0Lo);

1766 _mov(T_3, Src0Hi);

1767 _shrd(T_2, T_3, T_1);

1768 _shr(T_3, T_1);

1769 _test(T_1, BitTest);

1770 _br(CondX86::Br_e, Label);

1771 // T_2 and T_3 are being assigned again because of the

1772 // intra-block control flow, so we need the _mov_nonkillable

1773 // variant to avoid liveness problems.

1774 _mov_nonkillable(T_2, T_3);

1775 _mov_nonkillable(T_3, Zero);

1776 Context.insert(Label);

1777 _mov(DestLo, T_2);

1778 _mov(DestHi, T_3);

1779 } break;

1780 case InstArithmetic::Ashr: {

1781 // a=b>>c (signed) ==>

1782 // t1:ecx = c.lo & 0xff

1783 // t2 = b.lo

1784 // t3 = b.hi

1785 // t2 = shrd t2, t3, t1

1786 // t3 = sar t3, t1

1787 // test t1, 0x20

1788 // je L1

1789 // use(t2)

1790 // t2 = t3

1791 // t3 = sar t3, 0x1f

1792 // L1:

1793 // a.lo = t2

1794 // a.hi = t3

1795 Variable T_1 = nullptr, T_2 = nullptr, *T_3 = nullptr;

1796 Constant *BitTest = Ctx->getConstantInt32(0x20);

1797 Constant *SignExtend = Ctx->getConstantInt32(0x1f);

1798 InstX8632Label *Label = InstX8632Label::create(Func, this);

1799 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1800 _mov(T_2, Src0Lo);

1801 _mov(T_3, Src0Hi);

1802 _shrd(T_2, T_3, T_1);

1803 _sar(T_3, T_1);

1804 _test(T_1, BitTest);

1805 _br(CondX86::Br_e, Label);

1806 // T_2 and T_3 are being assigned again because of the

1807 // intra-block control flow, so T_2 needs the _mov_nonkillable

1808 // variant to avoid liveness problems. T_3 doesn't need special

1809 // treatment because it is reassigned via _sar instead of _mov.

1810 _mov_nonkillable(T_2, T_3);

1811 _sar(T_3, SignExtend);

1812 Context.insert(Label);

1813 _mov(DestLo, T_2);

1814 _mov(DestHi, T_3);

1815 } break;

1816 case InstArithmetic::Fadd:

1817 case InstArithmetic::Fsub:

1818 case InstArithmetic::Fmul:

1819 case InstArithmetic::Fdiv:

1820 case InstArithmetic::Frem:

1821 llvm_unreachable("FP instruction with i64 type");

1822 break;

1823 case InstArithmetic::Udiv:

1824 case InstArithmetic::Sdiv:

1825 case InstArithmetic::Urem:

1826 case InstArithmetic::Srem:

1827 llvm_unreachable("Call-helper-involved instruction for i64 type \

1828 should have already been handled before");

1829 break;

1830 }

1831 return;

1832 }

1833 if (isVectorType(Dest->getType())) {

1834 // TODO: Trap on integer divide and integer modulo by zero.

1835 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

1836 if (llvm::isa<OperandX8632Mem>(Src1))

1837 Src1 = legalizeToVar(Src1);

1838 switch (Inst->getOp()) {

1839 case InstArithmetic::_num:

1840 llvm_unreachable("Unknown arithmetic operator");

1841 break;

1842 case InstArithmetic::Add: {

1843 Variable *T = makeReg(Dest->getType());

1844 _movp(T, Src0);

1845 _padd(T, Src1);

1846 _movp(Dest, T);

1847 } break;

1848 case InstArithmetic::And: {

1849 Variable *T = makeReg(Dest->getType());

1850 _movp(T, Src0);

1851 _pand(T, Src1);

1852 _movp(Dest, T);

1853 } break;

1854 case InstArithmetic::Or: {

1855 Variable *T = makeReg(Dest->getType());

1856 _movp(T, Src0);

1857 _por(T, Src1);

1858 _movp(Dest, T);

1859 } break;

1860 case InstArithmetic::Xor: {

1861 Variable *T = makeReg(Dest->getType());

1862 _movp(T, Src0);

1863 _pxor(T, Src1);

1864 _movp(Dest, T);

1865 } break;

1866 case InstArithmetic::Sub: {

1867 Variable *T = makeReg(Dest->getType());

1868 _movp(T, Src0);

1869 _psub(T, Src1);

1870 _movp(Dest, T);

1871 } break;

1872 case InstArithmetic::Mul: {

1873 bool TypesAreValidForPmull =

1874 Dest->getType() == IceType_v4i32 \|\| Dest->getType() == IceType_v8i16;

1875 bool InstructionSetIsValidForPmull =

1876 Dest->getType() == IceType_v8i16 \|\| InstructionSet >= SSE4_1;

1877 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {

1878 Variable *T = makeReg(Dest->getType());

1879 _movp(T, Src0);

1880 _pmull(T, Src1);

1881 _movp(Dest, T);

1882 } else if (Dest->getType() == IceType_v4i32) {

1883 // Lowering sequence:

1884 // Note: The mask arguments have index 0 on the left.

1885 //

1886 // movups T1, Src0

1887 // pshufd T2, Src0, {1,0,3,0}

1888 // pshufd T3, Src1, {1,0,3,0}

1889 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}

1890 // pmuludq T1, Src1

1891 // # T2 = {Src0[1] * Src1[1], Src0[3] * Src1[3]}

1892 // pmuludq T2, T3

1893 // # T1 = {lo(T1[0]), lo(T1[2]), lo(T2[0]), lo(T2[2])}

1894 // shufps T1, T2, {0,2,0,2}

1895 // pshufd T4, T1, {0,2,1,3}

1896 // movups Dest, T4

1897

1898 // Mask that directs pshufd to create a vector with entries

1899 // Src[1, 0, 3, 0]

1900 const unsigned Constant1030 = 0x31;

1901 Constant *Mask1030 = Ctx->getConstantInt32(Constant1030);

1902 // Mask that directs shufps to create a vector with entries

1903 // Dest[0, 2], Src[0, 2]

1904 const unsigned Mask0202 = 0x88;

1905 // Mask that directs pshufd to create a vector with entries

1906 // Src[0, 2, 1, 3]

1907 const unsigned Mask0213 = 0xd8;

1908 Variable *T1 = makeReg(IceType_v4i32);

1909 Variable *T2 = makeReg(IceType_v4i32);

1910 Variable *T3 = makeReg(IceType_v4i32);

1911 Variable *T4 = makeReg(IceType_v4i32);

1912 _movp(T1, Src0);

1913 _pshufd(T2, Src0, Mask1030);

1914 _pshufd(T3, Src1, Mask1030);

1915 _pmuludq(T1, Src1);

1916 _pmuludq(T2, T3);

1917 _shufps(T1, T2, Ctx->getConstantInt32(Mask0202));

1918 _pshufd(T4, T1, Ctx->getConstantInt32(Mask0213));

1919 _movp(Dest, T4);

1920 } else {

1921 assert(Dest->getType() == IceType_v16i8);

1922 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1923 }

1924 } break;

1925 case InstArithmetic::Shl:

1926 case InstArithmetic::Lshr:

1927 case InstArithmetic::Ashr:

1928 case InstArithmetic::Udiv:

1929 case InstArithmetic::Urem:

1930 case InstArithmetic::Sdiv:

1931 case InstArithmetic::Srem:

1932 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1933 break;

1934 case InstArithmetic::Fadd: {

1935 Variable *T = makeReg(Dest->getType());

1936 _movp(T, Src0);

1937 _addps(T, Src1);

1938 _movp(Dest, T);

1939 } break;

1940 case InstArithmetic::Fsub: {

1941 Variable *T = makeReg(Dest->getType());

1942 _movp(T, Src0);

1943 _subps(T, Src1);

1944 _movp(Dest, T);

1945 } break;

1946 case InstArithmetic::Fmul: {

1947 Variable *T = makeReg(Dest->getType());

1948 _movp(T, Src0);

1949 _mulps(T, Src1);

1950 _movp(Dest, T);

1951 } break;

1952 case InstArithmetic::Fdiv: {

1953 Variable *T = makeReg(Dest->getType());

1954 _movp(T, Src0);

1955 _divps(T, Src1);

1956 _movp(Dest, T);

1957 } break;

1958 case InstArithmetic::Frem:

1959 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);

1960 break;

1961 }

1962 return;

1963 }

1964 Variable *T_edx = nullptr;

1965 Variable *T = nullptr;

1966 switch (Inst->getOp()) {

1967 case InstArithmetic::_num:

1968 llvm_unreachable("Unknown arithmetic operator");

1969 break;

1970 case InstArithmetic::Add:

1971 _mov(T, Src0);

1972 _add(T, Src1);

1973 _mov(Dest, T);

1974 break;

1975 case InstArithmetic::And:

1976 _mov(T, Src0);

1977 _and(T, Src1);

1978 _mov(Dest, T);

1979 break;

1980 case InstArithmetic::Or:

1981 _mov(T, Src0);

1982 _or(T, Src1);

1983 _mov(Dest, T);

1984 break;

1985 case InstArithmetic::Xor:

1986 _mov(T, Src0);

1987 _xor(T, Src1);

1988 _mov(Dest, T);

1989 break;

1990 case InstArithmetic::Sub:

1991 _mov(T, Src0);

1992 _sub(T, Src1);

1993 _mov(Dest, T);

1994 break;

1995 case InstArithmetic::Mul:

1996 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

1997 if (optimizeScalarMul(Dest, Src0, C->getValue()))

1998 return;

1999 }

2000 // The 8-bit version of imul only allows the form "imul r/m8"

2001 // where T must be in eax.

2002 if (isByteSizedArithType(Dest->getType())) {

2003 _mov(T, Src0, RegX8632::Reg_eax);

2004 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

2005 } else {

2006 _mov(T, Src0);

2007 }

2008 _imul(T, Src1);

2009 _mov(Dest, T);

2010 break;

2011 case InstArithmetic::Shl:

2012 _mov(T, Src0);

2013 if (!llvm::isa<Constant>(Src1))

2014 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);

2015 _shl(T, Src1);

2016 _mov(Dest, T);

2017 break;

2018 case InstArithmetic::Lshr:

2019 _mov(T, Src0);

2020 if (!llvm::isa<Constant>(Src1))

2021 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);

2022 _shr(T, Src1);

2023 _mov(Dest, T);

2024 break;

2025 case InstArithmetic::Ashr:

2026 _mov(T, Src0);

2027 if (!llvm::isa<Constant>(Src1))

2028 Src1 = legalizeToVar(Src1, RegX8632::Reg_ecx);

2029 _sar(T, Src1);

2030 _mov(Dest, T);

2031 break;

2032 case InstArithmetic::Udiv:

2033 // div and idiv are the few arithmetic operators that do not allow

2034 // immediates as the operand.

2035 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

2036 if (isByteSizedArithType(Dest->getType())) {

2037 Variable *T_ah = nullptr;

2038 Constant *Zero = Ctx->getConstantZero(IceType_i8);

2039 _mov(T, Src0, RegX8632::Reg_eax);

2040 _mov(T_ah, Zero, RegX8632::Reg_ah);

2041 _div(T, Src1, T_ah);

2042 _mov(Dest, T);

2043 } else {

2044 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2045 _mov(T, Src0, RegX8632::Reg_eax);

2046 _mov(T_edx, Zero, RegX8632::Reg_edx);

2047 _div(T, Src1, T_edx);

2048 _mov(Dest, T);

2049 }

2050 break;

2051 case InstArithmetic::Sdiv:

2052 // TODO(stichnot): Enable this after doing better performance

2053 // and cross testing.

2054 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

2055 // Optimize division by constant power of 2, but not for Om1

2056 // or O0, just to keep things simple there.

2057 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

2058 int32_t Divisor = C->getValue();

2059 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

2060 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

2061 uint32_t LogDiv = llvm::Log2_32(UDivisor);

2062 Type Ty = Dest->getType();

2063 // LLVM does the following for dest=src/(1<<log):

2064 // t=src

2065 // sar t,typewidth-1 // -1 if src is negative, 0 if not

2066 // shr t,typewidth-log

2067 // add t,src

2068 // sar t,log

2069 // dest=t

2070 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

2071 _mov(T, Src0);

2072 // If for some reason we are dividing by 1, just treat it

2073 // like an assignment.

2074 if (LogDiv > 0) {

2075 // The initial sar is unnecessary when dividing by 2.

2076 if (LogDiv > 1)

2077 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

2078 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

2079 _add(T, Src0);

2080 _sar(T, Ctx->getConstantInt(Ty, LogDiv));

2081 }

2082 _mov(Dest, T);

2083 return;

2084 }

2085 }

2086 }

2087 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

2088 if (isByteSizedArithType(Dest->getType())) {

2089 _mov(T, Src0, RegX8632::Reg_eax);

2090 _cbwdq(T, T);

2091 _idiv(T, Src1, T);

2092 _mov(Dest, T);

2093 } else {

2094 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

2095 _mov(T, Src0, RegX8632::Reg_eax);

2096 _cbwdq(T_edx, T);

2097 _idiv(T, Src1, T_edx);

2098 _mov(Dest, T);

2099 }

2100 break;

2101 case InstArithmetic::Urem:

2102 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

2103 if (isByteSizedArithType(Dest->getType())) {

2104 Variable *T_ah = nullptr;

2105 Constant *Zero = Ctx->getConstantZero(IceType_i8);

2106 _mov(T, Src0, RegX8632::Reg_eax);

2107 _mov(T_ah, Zero, RegX8632::Reg_ah);

2108 _div(T_ah, Src1, T);

2109 _mov(Dest, T_ah);

2110 } else {

2111 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2112 _mov(T_edx, Zero, RegX8632::Reg_edx);

2113 _mov(T, Src0, RegX8632::Reg_eax);

2114 _div(T_edx, Src1, T);

2115 _mov(Dest, T_edx);

2116 }

2117 break;

2118 case InstArithmetic::Srem:

2119 // TODO(stichnot): Enable this after doing better performance

2120 // and cross testing.

2121 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

2122 // Optimize mod by constant power of 2, but not for Om1 or O0,

2123 // just to keep things simple there.

2124 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

2125 int32_t Divisor = C->getValue();

2126 uint32_t UDivisor = static_cast<uint32_t>(Divisor);

2127 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {

2128 uint32_t LogDiv = llvm::Log2_32(UDivisor);

2129 Type Ty = Dest->getType();

2130 // LLVM does the following for dest=src%(1<<log):

2131 // t=src

2132 // sar t,typewidth-1 // -1 if src is negative, 0 if not

2133 // shr t,typewidth-log

2134 // add t,src

2135 // and t, -(1<<log)

2136 // sub t,src

2137 // neg t

2138 // dest=t

2139 uint32_t TypeWidth = X86_CHAR_BIT * typeWidthInBytes(Ty);

2140 // If for some reason we are dividing by 1, just assign 0.

2141 if (LogDiv == 0) {

2142 _mov(Dest, Ctx->getConstantZero(Ty));

2143 return;

2144 }

2145 _mov(T, Src0);

2146 // The initial sar is unnecessary when dividing by 2.

2147 if (LogDiv > 1)

2148 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));

2149 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));

2150 _add(T, Src0);

2151 _and(T, Ctx->getConstantInt(Ty, -(1 << LogDiv)));

2152 _sub(T, Src0);

2153 _neg(T);

2154 _mov(Dest, T);

2155 return;

2156 }

2157 }

2158 }

2159 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

2160 if (isByteSizedArithType(Dest->getType())) {

2161 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);

2162 _mov(T, Src0, RegX8632::Reg_eax);

2163 _cbwdq(T, T);

2164 Context.insert(InstFakeDef::create(Func, T_ah));

2165 _idiv(T_ah, Src1, T);

2166 _mov(Dest, T_ah);

2167 } else {

2168 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

2169 _mov(T, Src0, RegX8632::Reg_eax);

2170 _cbwdq(T_edx, T);

2171 _idiv(T_edx, Src1, T);

2172 _mov(Dest, T_edx);

2173 }

2174 break;

2175 case InstArithmetic::Fadd:

2176 _mov(T, Src0);

2177 _addss(T, Src1);

2178 _mov(Dest, T);

2179 break;

2180 case InstArithmetic::Fsub:

2181 _mov(T, Src0);

2182 _subss(T, Src1);

2183 _mov(Dest, T);

2184 break;

2185 case InstArithmetic::Fmul:

2186 _mov(T, Src0);

2187 _mulss(T, Src1);

2188 _mov(Dest, T);

2189 break;

2190 case InstArithmetic::Fdiv:

2191 _mov(T, Src0);

2192 _divss(T, Src1);

2193 _mov(Dest, T);

2194 break;

2195 case InstArithmetic::Frem: {

2196 const SizeT MaxSrcs = 2;

2197 Type Ty = Dest->getType();

2198 InstCall *Call = makeHelperCall(

2199 isFloat32Asserting32Or64(Ty) ? H_frem_f32 : H_frem_f64, Dest, MaxSrcs);

2200 Call->addArg(Src0);

2201 Call->addArg(Src1);

2202 return lowerCall(Call);

2203 }

2204 }

2205 }

2206

2207 void TargetX8632::lowerAssign(const InstAssign *Inst) {

2208 Variable *Dest = Inst->getDest();

2209 Operand *Src0 = Inst->getSrc(0);

2210 assert(Dest->getType() == Src0->getType());

2211 if (Dest->getType() == IceType_i64) {

2212 Src0 = legalize(Src0);

2213 Operand *Src0Lo = loOperand(Src0);

2214 Operand *Src0Hi = hiOperand(Src0);

2215 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2216 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2217 Variable T_Lo = nullptr, T_Hi = nullptr;

2218 _mov(T_Lo, Src0Lo);

2219 _mov(DestLo, T_Lo);

2220 _mov(T_Hi, Src0Hi);

2221 _mov(DestHi, T_Hi);

2222 } else {

2223 Operand *RI;

2224 if (Dest->hasReg()) {

2225 // If Dest already has a physical register, then legalize the

2226 // Src operand into a Variable with the same register

2227 // assignment. This is mostly a workaround for advanced phi

2228 // lowering's ad-hoc register allocation which assumes no

2229 // register allocation is needed when at least one of the

2230 // operands is non-memory.

2231

2232 // If we have a physical register for the dest variable, we can

2233 // enable our constant blinding or pooling again. Note this is

2234 // only for advancedPhiLowering(), the flag flip should leave

2235 // no other side effect.

2236 {

2237 BoolFlagSaver B(RandomizationPoolingPaused, false);

2238 RI = legalize(Src0, Legal_Reg, Dest->getRegNum());

2239 }

2240 } else {

2241 // If Dest could be a stack operand, then RI must be a physical

2242 // register or a scalar integer immediate.

2243 RI = legalize(Src0, Legal_Reg \| Legal_Imm);

2244 }

2245 if (isVectorType(Dest->getType()))

2246 _movp(Dest, RI);

2247 else

2248 _mov(Dest, RI);

2249 }

2250 }

2251

2252 void TargetX8632::lowerBr(const InstBr *Inst) {

2253 if (Inst->isUnconditional()) {

2254 _br(Inst->getTargetUnconditional());

2255 return;

2256 }

2257 Operand *Cond = Inst->getCondition();

2258

2259 // Handle folding opportunities.

2260 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {

2261 assert(Producer->isDeleted());

2262 switch (BoolFolding::getProducerKind(Producer)) {

2263 default:

2264 break;

2265 case BoolFolding::PK_Icmp32: {

2266 // TODO(stichnot): Refactor similarities between this block and

2267 // the corresponding code in lowerIcmp().

2268 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);

2269 Operand *Src0 = Producer->getSrc(0);

2270 Operand *Src1 = legalize(Producer->getSrc(1));

2271 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

2272 _cmp(Src0RM, Src1);

2273 _br(getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),

2274 Inst->getTargetFalse());

2275 return;

2276 }

2277 }

2278 }

2279

2280 Operand *Src0 = legalize(Cond, Legal_Reg \| Legal_Mem);

2281 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2282 _cmp(Src0, Zero);

2283 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

2284 }

2285

2286 void TargetX8632::lowerCall(const InstCall *Instr) {

2287 // x86-32 calling convention:

2288 //

2289 // * At the point before the call, the stack must be aligned to 16

2290 // bytes.

2291 //

2292 // * The first four arguments of vector type, regardless of their

2293 // position relative to the other arguments in the argument list, are

2294 // placed in registers xmm0 - xmm3.

2295 //

2296 // * Other arguments are pushed onto the stack in right-to-left order,

2297 // such that the left-most argument ends up on the top of the stack at

2298 // the lowest memory address.

2299 //

2300 // * Stack arguments of vector type are aligned to start at the next

2301 // highest multiple of 16 bytes. Other stack arguments are aligned to

2302 // 4 bytes.

2303 //

2304 // This intends to match the section "IA-32 Function Calling

2305 // Convention" of the document "OS X ABI Function Call Guide" by

2306 // Apple.

2307 NeedsStackAlignment = true;

2308

2309 typedef std::vector<Operand *> OperandList;

2310 OperandList XmmArgs;

2311 OperandList StackArgs, StackArgLocations;

2312 uint32_t ParameterAreaSizeBytes = 0;

2313

2314 // Classify each argument operand according to the location where the

2315 // argument is passed.

2316 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {

2317 Operand *Arg = Instr->getArg(i);

2318 Type Ty = Arg->getType();

2319 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

2320 assert(typeWidthInBytes(Ty) >= 4);

2321 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {

2322 XmmArgs.push_back(Arg);

2323 } else {

2324 StackArgs.push_back(Arg);

2325 if (isVectorType(Arg->getType())) {

2326 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

2327 }

2328 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

2329 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);

2330 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));

2331 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

2332 }

2333 }

2334

2335 // Adjust the parameter area so that the stack is aligned. It is

2336 // assumed that the stack is already aligned at the start of the

2337 // calling sequence.

2338 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

2339

2340 // Subtract the appropriate amount for the argument area. This also

2341 // takes care of setting the stack adjustment during emission.

2342 //

2343 // TODO: If for some reason the call instruction gets dead-code

2344 // eliminated after lowering, we would need to ensure that the

2345 // pre-call and the post-call esp adjustment get eliminated as well.

2346 if (ParameterAreaSizeBytes) {

2347 _adjust_stack(ParameterAreaSizeBytes);

2348 }

2349

2350 // Copy arguments that are passed on the stack to the appropriate

2351 // stack locations.

2352 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {

2353 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));

2354 }

2355

2356 // Copy arguments to be passed in registers to the appropriate

2357 // registers.

2358 // TODO: Investigate the impact of lowering arguments passed in

2359 // registers after lowering stack arguments as opposed to the other

2360 // way around. Lowering register arguments after stack arguments may

2361 // reduce register pressure. On the other hand, lowering register

2362 // arguments first (before stack arguments) may result in more compact

2363 // code, as the memory operand displacements may end up being smaller

2364 // before any stack adjustment is done.

2365 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

2366 Variable *Reg = legalizeToVar(XmmArgs[i], RegX8632::Reg_xmm0 + i);

2367 // Generate a FakeUse of register arguments so that they do not get

2368 // dead code eliminated as a result of the FakeKill of scratch

2369 // registers after the call.

2370 Context.insert(InstFakeUse::create(Func, Reg));

2371 }

2372 // Generate the call instruction. Assign its result to a temporary

2373 // with high register allocation weight.

2374 Variable *Dest = Instr->getDest();

2375 // ReturnReg doubles as ReturnRegLo as necessary.

2376 Variable *ReturnReg = nullptr;

2377 Variable *ReturnRegHi = nullptr;

2378 if (Dest) {

2379 switch (Dest->getType()) {

2380 case IceType_NUM:

2381 llvm_unreachable("Invalid Call dest type");

2382 break;

2383 case IceType_void:

2384 break;

2385 case IceType_i1:

2386 case IceType_i8:

2387 case IceType_i16:

2388 case IceType_i32:

2389 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);

2390 break;

2391 case IceType_i64:

2392 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);

2393 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);

2394 break;

2395 case IceType_f32:

2396 case IceType_f64:

2397 // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with

2398 // the fstp instruction.

2399 break;

2400 case IceType_v4i1:

2401 case IceType_v8i1:

2402 case IceType_v16i1:

2403 case IceType_v16i8:

2404 case IceType_v8i16:

2405 case IceType_v4i32:

2406 case IceType_v4f32:

2407 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);

2408 break;

2409 }

2410 }

2411 Operand *CallTarget = legalize(Instr->getCallTarget());

2412 const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

2413 if (NeedSandboxing) {

2414 if (llvm::isa<Constant>(CallTarget)) {

2415 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

2416 } else {

2417 Variable *CallTargetVar = nullptr;

2418 _mov(CallTargetVar, CallTarget);

2419 _bundle_lock(InstBundleLock::Opt_AlignToEnd);

2420 const SizeT BundleSize =

2421 1 << Func->getAssembler<>()->getBundleAlignLog2Bytes();

2422 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));

2423 CallTarget = CallTargetVar;

2424 }

2425 }

2426 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);

2427 Context.insert(NewCall);

2428 if (NeedSandboxing)

2429 _bundle_unlock();

2430 if (ReturnRegHi)

2431 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

2432

2433 // Add the appropriate offset to esp. The call instruction takes care

2434 // of resetting the stack offset during emission.

2435 if (ParameterAreaSizeBytes) {

2436 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

2437 _add(esp, Ctx->getConstantInt32(ParameterAreaSizeBytes));

2438 }

2439

2440 // Insert a register-kill pseudo instruction.

2441 Context.insert(InstFakeKill::create(Func, NewCall));

2442

2443 // Generate a FakeUse to keep the call live if necessary.

2444 if (Instr->hasSideEffects() && ReturnReg) {

2445 Inst *FakeUse = InstFakeUse::create(Func, ReturnReg);

2446 Context.insert(FakeUse);

2447 }

2448

2449 if (!Dest)

2450 return;

2451

2452 // Assign the result of the call to Dest.

2453 if (ReturnReg) {

2454 if (ReturnRegHi) {

2455 assert(Dest->getType() == IceType_i64);

2456 split64(Dest);

2457 Variable *DestLo = Dest->getLo();

2458 Variable *DestHi = Dest->getHi();

2459 _mov(DestLo, ReturnReg);

2460 _mov(DestHi, ReturnRegHi);

2461 } else {

2462 assert(Dest->getType() == IceType_i32 \|\| Dest->getType() == IceType_i16 \|\|

2463 Dest->getType() == IceType_i8 \|\| Dest->getType() == IceType_i1 \|\|

2464 isVectorType(Dest->getType()));

2465 if (isVectorType(Dest->getType())) {

2466 _movp(Dest, ReturnReg);

2467 } else {

2468 _mov(Dest, ReturnReg);

2469 }

2470 }

2471 } else if (isScalarFloatingType(Dest->getType())) {

2472 // Special treatment for an FP function which returns its result in

2473 // st(0).

2474 // If Dest ends up being a physical xmm register, the fstp emit code

2475 // will route st(0) through a temporary stack slot.

2476 _fstp(Dest);

2477 // Create a fake use of Dest in case it actually isn't used,

2478 // because st(0) still needs to be popped.

2479 Context.insert(InstFakeUse::create(Func, Dest));

2480 }

2481 }

2482

2483 void TargetX8632::lowerCast(const InstCast *Inst) {

2484 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)

2485 InstCast::OpKind CastKind = Inst->getCastKind();

2486 Variable *Dest = Inst->getDest();

2487 switch (CastKind) {

2488 default:

2489 Func->setError("Cast type not supported");

2490 return;

2491 case InstCast::Sext: {

2492 // Src0RM is the source operand legalized to physical register or memory,

2493 // but not immediate, since the relevant x86 native instructions don't

2494 // allow an immediate operand. If the operand is an immediate, we could

2495 // consider computing the strength-reduced result at translation time,

2496 // but we're unlikely to see something like that in the bitcode that

2497 // the optimizer wouldn't have already taken care of.

2498 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2499 if (isVectorType(Dest->getType())) {

2500 Type DestTy = Dest->getType();

2501 if (DestTy == IceType_v16i8) {

2502 // onemask = materialize(1,1,...); dst = (src & onemask) > 0

2503 Variable *OneMask = makeVectorOfOnes(Dest->getType());

2504 Variable *T = makeReg(DestTy);

2505 _movp(T, Src0RM);

2506 _pand(T, OneMask);

2507 Variable *Zeros = makeVectorOfZeros(Dest->getType());

2508 _pcmpgt(T, Zeros);

2509 _movp(Dest, T);

2510 } else {

2511 // width = width(elty) - 1; dest = (src << width) >> width

2512 SizeT ShiftAmount =

2513 X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - 1;

2514 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);

2515 Variable *T = makeReg(DestTy);

2516 _movp(T, Src0RM);

2517 _psll(T, ShiftConstant);

2518 _psra(T, ShiftConstant);

2519 _movp(Dest, T);

2520 }

2521 } else if (Dest->getType() == IceType_i64) {

2522 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2

2523 Constant *Shift = Ctx->getConstantInt32(31);

2524 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2525 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2526 Variable *T_Lo = makeReg(DestLo->getType());

2527 if (Src0RM->getType() == IceType_i32) {

2528 _mov(T_Lo, Src0RM);

2529 } else if (Src0RM->getType() == IceType_i1) {

2530 _movzx(T_Lo, Src0RM);

2531 _shl(T_Lo, Shift);

2532 _sar(T_Lo, Shift);

2533 } else {

2534 _movsx(T_Lo, Src0RM);

2535 }

2536 _mov(DestLo, T_Lo);

2537 Variable *T_Hi = nullptr;

2538 _mov(T_Hi, T_Lo);

2539 if (Src0RM->getType() != IceType_i1)

2540 // For i1, the sar instruction is already done above.

2541 _sar(T_Hi, Shift);

2542 _mov(DestHi, T_Hi);

2543 } else if (Src0RM->getType() == IceType_i1) {

2544 // t1 = src

2545 // shl t1, dst_bitwidth - 1

2546 // sar t1, dst_bitwidth - 1

2547 // dst = t1

2548 size_t DestBits = X86_CHAR_BIT * typeWidthInBytes(Dest->getType());

2549 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);

2550 Variable *T = makeReg(Dest->getType());

2551 if (typeWidthInBytes(Dest->getType()) <=

2552 typeWidthInBytes(Src0RM->getType())) {

2553 _mov(T, Src0RM);

2554 } else {

2555 // Widen the source using movsx or movzx. (It doesn't matter

2556 // which one, since the following shl/sar overwrite the bits.)

2557 _movzx(T, Src0RM);

2558 }

2559 _shl(T, ShiftAmount);

2560 _sar(T, ShiftAmount);

2561 _mov(Dest, T);

2562 } else {

2563 // t1 = movsx src; dst = t1

2564 Variable *T = makeReg(Dest->getType());

2565 _movsx(T, Src0RM);

2566 _mov(Dest, T);

2567 }

2568 break;

2569 }

2570 case InstCast::Zext: {

2571 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2572 if (isVectorType(Dest->getType())) {

2573 // onemask = materialize(1,1,...); dest = onemask & src

2574 Type DestTy = Dest->getType();

2575 Variable *OneMask = makeVectorOfOnes(DestTy);

2576 Variable *T = makeReg(DestTy);

2577 _movp(T, Src0RM);

2578 _pand(T, OneMask);

2579 _movp(Dest, T);

2580 } else if (Dest->getType() == IceType_i64) {

2581 // t1=movzx src; dst.lo=t1; dst.hi=0

2582 Constant *Zero = Ctx->getConstantZero(IceType_i32);

2583 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2584 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2585 Variable *Tmp = makeReg(DestLo->getType());

2586 if (Src0RM->getType() == IceType_i32) {

2587 _mov(Tmp, Src0RM);

2588 } else {

2589 _movzx(Tmp, Src0RM);

2590 }

2591 if (Src0RM->getType() == IceType_i1) {

2592 Constant *One = Ctx->getConstantInt32(1);

2593 _and(Tmp, One);

2594 }

2595 _mov(DestLo, Tmp);

2596 _mov(DestHi, Zero);

2597 } else if (Src0RM->getType() == IceType_i1) {

2598 // t = Src0RM; t &= 1; Dest = t

2599 Constant *One = Ctx->getConstantInt32(1);

2600 Type DestTy = Dest->getType();

2601 Variable *T;

2602 if (DestTy == IceType_i8) {

2603 T = makeReg(DestTy);

2604 _mov(T, Src0RM);

2605 } else {

2606 // Use 32-bit for both 16-bit and 32-bit, since 32-bit ops are shorter.

2607 T = makeReg(IceType_i32);

2608 _movzx(T, Src0RM);

2609 }

2610 _and(T, One);

2611 _mov(Dest, T);

2612 } else {

2613 // t1 = movzx src; dst = t1

2614 Variable *T = makeReg(Dest->getType());

2615 _movzx(T, Src0RM);

2616 _mov(Dest, T);

2617 }

2618 break;

2619 }

2620 case InstCast::Trunc: {

2621 if (isVectorType(Dest->getType())) {

2622 // onemask = materialize(1,1,...); dst = src & onemask

2623 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2624 Type Src0Ty = Src0RM->getType();

2625 Variable *OneMask = makeVectorOfOnes(Src0Ty);

2626 Variable *T = makeReg(Dest->getType());

2627 _movp(T, Src0RM);

2628 _pand(T, OneMask);

2629 _movp(Dest, T);

2630 } else {

2631 Operand *Src0 = Inst->getSrc(0);

2632 if (Src0->getType() == IceType_i64)

2633 Src0 = loOperand(Src0);

2634 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2635 // t1 = trunc Src0RM; Dest = t1

2636 Variable *T = nullptr;

2637 _mov(T, Src0RM);

2638 if (Dest->getType() == IceType_i1)

2639 _and(T, Ctx->getConstantInt1(1));

2640 _mov(Dest, T);

2641 }

2642 break;

2643 }

2644 case InstCast::Fptrunc:

2645 case InstCast::Fpext: {

2646 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2647 // t1 = cvt Src0RM; Dest = t1

2648 Variable *T = makeReg(Dest->getType());

2649 _cvt(T, Src0RM, InstX8632Cvt::Float2float);

2650 _mov(Dest, T);

2651 break;

2652 }

2653 case InstCast::Fptosi:

2654 if (isVectorType(Dest->getType())) {

2655 assert(Dest->getType() == IceType_v4i32 &&

2656 Inst->getSrc(0)->getType() == IceType_v4f32);

2657 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2658 if (llvm::isa<OperandX8632Mem>(Src0RM))

2659 Src0RM = legalizeToVar(Src0RM);

2660 Variable *T = makeReg(Dest->getType());

2661 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq);

2662 _movp(Dest, T);

2663 } else if (Dest->getType() == IceType_i64) {

2664 // Use a helper for converting floating-point values to 64-bit

2665 // integers. SSE2 appears to have no way to convert from xmm

2666 // registers to something like the edx:eax register pair, and

2667 // gcc and clang both want to use x87 instructions complete with

2668 // temporary manipulation of the status word. This helper is

2669 // not needed for x86-64.

2670 split64(Dest);

2671 const SizeT MaxSrcs = 1;

2672 Type SrcType = Inst->getSrc(0)->getType();

2673 InstCall *Call =

2674 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64

2675 : H_fptosi_f64_i64,

2676 Dest, MaxSrcs);

2677 Call->addArg(Inst->getSrc(0));

2678 lowerCall(Call);

2679 } else {

2680 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2681 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2682 Variable *T_1 = makeReg(IceType_i32);

2683 Variable *T_2 = makeReg(Dest->getType());

2684 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);

2685 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2686 if (Dest->getType() == IceType_i1)

2687 _and(T_2, Ctx->getConstantInt1(1));

2688 _mov(Dest, T_2);

2689 }

2690 break;

2691 case InstCast::Fptoui:

2692 if (isVectorType(Dest->getType())) {

2693 assert(Dest->getType() == IceType_v4i32 &&

2694 Inst->getSrc(0)->getType() == IceType_v4f32);

2695 const SizeT MaxSrcs = 1;

2696 InstCall *Call = makeHelperCall(H_fptoui_4xi32_f32, Dest, MaxSrcs);

2697 Call->addArg(Inst->getSrc(0));

2698 lowerCall(Call);

2699 } else if (Dest->getType() == IceType_i64 \|\|

2700 Dest->getType() == IceType_i32) {

2701 // Use a helper for both x86-32 and x86-64.

2702 split64(Dest);

2703 const SizeT MaxSrcs = 1;

2704 Type DestType = Dest->getType();

2705 Type SrcType = Inst->getSrc(0)->getType();

2706 IceString TargetString;

2707 if (isInt32Asserting32Or64(DestType)) {

2708 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i32

2709 : H_fptoui_f64_i32;

2710 } else {

2711 TargetString = isFloat32Asserting32Or64(SrcType) ? H_fptoui_f32_i64

2712 : H_fptoui_f64_i64;

2713 }

2714 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2715 Call->addArg(Inst->getSrc(0));

2716 lowerCall(Call);

2717 return;

2718 } else {

2719 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2720 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type

2721 Variable *T_1 = makeReg(IceType_i32);

2722 Variable *T_2 = makeReg(Dest->getType());

2723 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si);

2724 _mov(T_2, T_1); // T_1 and T_2 may have different integer types

2725 if (Dest->getType() == IceType_i1)

2726 _and(T_2, Ctx->getConstantInt1(1));

2727 _mov(Dest, T_2);

2728 }

2729 break;

2730 case InstCast::Sitofp:

2731 if (isVectorType(Dest->getType())) {

2732 assert(Dest->getType() == IceType_v4f32 &&

2733 Inst->getSrc(0)->getType() == IceType_v4i32);

2734 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2735 if (llvm::isa<OperandX8632Mem>(Src0RM))

2736 Src0RM = legalizeToVar(Src0RM);

2737 Variable *T = makeReg(Dest->getType());

2738 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps);

2739 _movp(Dest, T);

2740 } else if (Inst->getSrc(0)->getType() == IceType_i64) {

2741 // Use a helper for x86-32.

2742 const SizeT MaxSrcs = 1;

2743 Type DestType = Dest->getType();

2744 InstCall *Call =

2745 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32

2746 : H_sitofp_i64_f64,

2747 Dest, MaxSrcs);

2748 // TODO: Call the correct compiler-rt helper function.

2749 Call->addArg(Inst->getSrc(0));

2750 lowerCall(Call);

2751 return;

2752 } else {

2753 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg \| Legal_Mem);

2754 // Sign-extend the operand.

2755 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2

2756 Variable *T_1 = makeReg(IceType_i32);

2757 Variable *T_2 = makeReg(Dest->getType());

2758 if (Src0RM->getType() == IceType_i32)

2759 _mov(T_1, Src0RM);

2760 else

2761 _movsx(T_1, Src0RM);

2762 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);

2763 _mov(Dest, T_2);

2764 }

2765 break;

2766 case InstCast::Uitofp: {

2767 Operand *Src0 = Inst->getSrc(0);

2768 if (isVectorType(Src0->getType())) {

2769 assert(Dest->getType() == IceType_v4f32 &&

2770 Src0->getType() == IceType_v4i32);

2771 const SizeT MaxSrcs = 1;

2772 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);

2773 Call->addArg(Src0);

2774 lowerCall(Call);

2775 } else if (Src0->getType() == IceType_i64 \|\|

2776 Src0->getType() == IceType_i32) {

2777 // Use a helper for x86-32 and x86-64. Also use a helper for

2778 // i32 on x86-32.

2779 const SizeT MaxSrcs = 1;

2780 Type DestType = Dest->getType();

2781 IceString TargetString;

2782 if (isInt32Asserting32Or64(Src0->getType())) {

2783 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32

2784 : H_uitofp_i32_f64;

2785 } else {

2786 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32

2787 : H_uitofp_i64_f64;

2788 }

2789 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);

2790 Call->addArg(Src0);

2791 lowerCall(Call);

2792 return;

2793 } else {

2794 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2795 // Zero-extend the operand.

2796 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2

2797 Variable *T_1 = makeReg(IceType_i32);

2798 Variable *T_2 = makeReg(Dest->getType());

2799 if (Src0RM->getType() == IceType_i32)

2800 _mov(T_1, Src0RM);

2801 else

2802 _movzx(T_1, Src0RM);

2803 _cvt(T_2, T_1, InstX8632Cvt::Si2ss);

2804 _mov(Dest, T_2);

2805 }

2806 break;

2807 }

2808 case InstCast::Bitcast: {

2809 Operand *Src0 = Inst->getSrc(0);

2810 if (Dest->getType() == Src0->getType()) {

2811 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);

2812 lowerAssign(Assign);

2813 return;

2814 }

2815 switch (Dest->getType()) {

2816 default:

2817 llvm_unreachable("Unexpected Bitcast dest type");

2818 case IceType_i8: {

2819 assert(Src0->getType() == IceType_v8i1);

2820 InstCall *Call = makeHelperCall(H_bitcast_8xi1_i8, Dest, 1);

2821 Call->addArg(Src0);

2822 lowerCall(Call);

2823 } break;

2824 case IceType_i16: {

2825 assert(Src0->getType() == IceType_v16i1);

2826 InstCall *Call = makeHelperCall(H_bitcast_16xi1_i16, Dest, 1);

2827 Call->addArg(Src0);

2828 lowerCall(Call);

2829 } break;

2830 case IceType_i32:

2831 case IceType_f32: {

2832 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2833 Type DestType = Dest->getType();

2834 Type SrcType = Src0RM->getType();

2835 (void)DestType;

2836 assert((DestType == IceType_i32 && SrcType == IceType_f32) \|\|

2837 (DestType == IceType_f32 && SrcType == IceType_i32));

2838 // a.i32 = bitcast b.f32 ==>

2839 // t.f32 = b.f32

2840 // s.f32 = spill t.f32

2841 // a.i32 = s.f32

2842 Variable *T = nullptr;

2843 // TODO: Should be able to force a spill setup by calling legalize() with

2844 // Legal_Mem and not Legal_Reg or Legal_Imm.

2845 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(SrcType);

2846 SpillVar->setLinkedTo(Dest);

2847 Variable *Spill = SpillVar;

2848 Spill->setWeight(RegWeight::Zero);

2849 _mov(T, Src0RM);

2850 _mov(Spill, T);

2851 _mov(Dest, Spill);

2852 } break;

2853 case IceType_i64: {

2854 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2855 assert(Src0RM->getType() == IceType_f64);

2856 // a.i64 = bitcast b.f64 ==>

2857 // s.f64 = spill b.f64

2858 // t_lo.i32 = lo(s.f64)

2859 // a_lo.i32 = t_lo.i32

2860 // t_hi.i32 = hi(s.f64)

2861 // a_hi.i32 = t_hi.i32

2862 Operand SpillLo, SpillHi;

2863 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {

2864 SpillVariable *SpillVar =

2865 Func->makeVariable<SpillVariable>(IceType_f64);

2866 SpillVar->setLinkedTo(Src0Var);

2867 Variable *Spill = SpillVar;

2868 Spill->setWeight(RegWeight::Zero);

2869 _movq(Spill, Src0RM);

2870 SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low);

2871 SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High);

2872 } else {

2873 SpillLo = loOperand(Src0RM);

2874 SpillHi = hiOperand(Src0RM);

2875 }

2876

2877 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

2878 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

2879 Variable *T_Lo = makeReg(IceType_i32);

2880 Variable *T_Hi = makeReg(IceType_i32);

2881

2882 _mov(T_Lo, SpillLo);

2883 _mov(DestLo, T_Lo);

2884 _mov(T_Hi, SpillHi);

2885 _mov(DestHi, T_Hi);

2886 } break;

2887 case IceType_f64: {

2888 Src0 = legalize(Src0);

2889 assert(Src0->getType() == IceType_i64);

2890 if (llvm::isa<OperandX8632Mem>(Src0)) {

2891 Variable *T = Func->makeVariable(Dest->getType());

2892 _movq(T, Src0);

2893 _movq(Dest, T);

2894 break;

2895 }

2896 // a.f64 = bitcast b.i64 ==>

2897 // t_lo.i32 = b_lo.i32

2898 // FakeDef(s.f64)

2899 // lo(s.f64) = t_lo.i32

2900 // t_hi.i32 = b_hi.i32

2901 // hi(s.f64) = t_hi.i32

2902 // a.f64 = s.f64

2903 SpillVariable *SpillVar = Func->makeVariable<SpillVariable>(IceType_f64);

2904 SpillVar->setLinkedTo(Dest);

2905 Variable *Spill = SpillVar;

2906 Spill->setWeight(RegWeight::Zero);

2907

2908 Variable T_Lo = nullptr, T_Hi = nullptr;

2909 VariableSplit *SpillLo =

2910 VariableSplit::create(Func, Spill, VariableSplit::Low);

2911 VariableSplit *SpillHi =

2912 VariableSplit::create(Func, Spill, VariableSplit::High);

2913 _mov(T_Lo, loOperand(Src0));

2914 // Technically, the Spill is defined after the _store happens, but

2915 // SpillLo is considered a "use" of Spill so define Spill before it

2916 // is used.

2917 Context.insert(InstFakeDef::create(Func, Spill));

2918 _store(T_Lo, SpillLo);

2919 _mov(T_Hi, hiOperand(Src0));

2920 _store(T_Hi, SpillHi);

2921 _movq(Dest, Spill);

2922 } break;

2923 case IceType_v8i1: {

2924 assert(Src0->getType() == IceType_i8);

2925 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);

2926 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());

2927 // Arguments to functions are required to be at least 32 bits wide.

2928 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

2929 Call->addArg(Src0AsI32);

2930 lowerCall(Call);

2931 } break;

2932 case IceType_v16i1: {

2933 assert(Src0->getType() == IceType_i16);

2934 InstCall *Call = makeHelperCall(H_bitcast_i16_16xi1, Dest, 1);

2935 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());

2936 // Arguments to functions are required to be at least 32 bits wide.

2937 lowerCast(InstCast::create(Func, InstCast::Zext, Src0AsI32, Src0));

2938 Call->addArg(Src0AsI32);

2939 lowerCall(Call);

2940 } break;

2941 case IceType_v8i16:

2942 case IceType_v16i8:

2943 case IceType_v4i32:

2944 case IceType_v4f32: {

2945 _movp(Dest, legalizeToVar(Src0));

2946 } break;

2947 }

2948 break;

2949 }

2950 }

2951 }

2952

2953 void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {

2954 Operand *SourceVectNotLegalized = Inst->getSrc(0);

2955 ConstantInteger32 *ElementIndex =

2956 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(1));

2957 // Only constant indices are allowed in PNaCl IR.

2958 assert(ElementIndex);

2959

2960 unsigned Index = ElementIndex->getValue();

2961 Type Ty = SourceVectNotLegalized->getType();

2962 Type ElementTy = typeElementType(Ty);

2963 Type InVectorElementTy = getInVectorElementType(Ty);

2964 Variable *ExtractedElementR = makeReg(InVectorElementTy);

2965

2966 // TODO(wala): Determine the best lowering sequences for each type.

2967 bool CanUsePextr =

2968 Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1;

2969 if (CanUsePextr && Ty != IceType_v4f32) {

2970 // Use pextrb, pextrw, or pextrd.

2971 Constant *Mask = Ctx->getConstantInt32(Index);

2972 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);

2973 _pextr(ExtractedElementR, SourceVectR, Mask);

2974 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

2975 // Use pshufd and movd/movss.

2976 Variable *T = nullptr;

2977 if (Index) {

2978 // The shuffle only needs to occur if the element to be extracted

2979 // is not at the lowest index.

2980 Constant *Mask = Ctx->getConstantInt32(Index);

2981 T = makeReg(Ty);

2982 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem), Mask);

2983 } else {

2984 T = legalizeToVar(SourceVectNotLegalized);

2985 }

2986

2987 if (InVectorElementTy == IceType_i32) {

2988 _movd(ExtractedElementR, T);

2989 } else { // Ty == IceType_f32

2990 // TODO(wala): _movss is only used here because _mov does not

2991 // allow a vector source and a scalar destination. _mov should be

2992 // able to be used here.

2993 // _movss is a binary instruction, so the FakeDef is needed to

2994 // keep the live range analysis consistent.

2995 Context.insert(InstFakeDef::create(Func, ExtractedElementR));

2996 _movss(ExtractedElementR, T);

2997 }

2998 } else {

2999 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

3000 // Spill the value to a stack slot and do the extraction in memory.

3001 //

3002 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

3003 // support for legalizing to mem is implemented.

3004 Variable *Slot = Func->makeVariable(Ty);

3005 Slot->setWeight(RegWeight::Zero);

3006 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

3007

3008 // Compute the location of the element in memory.

3009 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);

3010 OperandX8632Mem *Loc =

3011 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);

3012 _mov(ExtractedElementR, Loc);

3013 }

3014

3015 if (ElementTy == IceType_i1) {

3016 // Truncate extracted integers to i1s if necessary.

3017 Variable *T = makeReg(IceType_i1);

3018 InstCast *Cast =

3019 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);

3020 lowerCast(Cast);

3021 ExtractedElementR = T;

3022 }

3023

3024 // Copy the element to the destination.

3025 Variable *Dest = Inst->getDest();

3026 _mov(Dest, ExtractedElementR);

3027 }

3028

3029 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {

3030 Operand *Src0 = Inst->getSrc(0);

3031 Operand *Src1 = Inst->getSrc(1);

3032 Variable *Dest = Inst->getDest();

3033

3034 if (isVectorType(Dest->getType())) {

3035 InstFcmp::FCond Condition = Inst->getCondition();

3036 size_t Index = static_cast<size_t>(Condition);

3037 assert(Index < TableFcmpSize);

3038

3039 if (TableFcmp[Index].SwapVectorOperands) {

3040 Operand *T = Src0;

3041 Src0 = Src1;

3042 Src1 = T;

3043 }

3044

3045 Variable *T = nullptr;

3046

3047 if (Condition == InstFcmp::True) {

3048 // makeVectorOfOnes() requires an integer vector type.

3049 T = makeVectorOfMinusOnes(IceType_v4i32);

3050 } else if (Condition == InstFcmp::False) {

3051 T = makeVectorOfZeros(Dest->getType());

3052 } else {

3053 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

3054 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

3055 if (llvm::isa<OperandX8632Mem>(Src1RM))

3056 Src1RM = legalizeToVar(Src1RM);

3057

3058 switch (Condition) {

3059 default: {

3060 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;

3061 assert(Predicate != CondX86::Cmpps_Invalid);

3062 T = makeReg(Src0RM->getType());

3063 _movp(T, Src0RM);

3064 _cmpps(T, Src1RM, Predicate);

3065 } break;

3066 case InstFcmp::One: {

3067 // Check both unequal and ordered.

3068 T = makeReg(Src0RM->getType());

3069 Variable *T2 = makeReg(Src0RM->getType());

3070 _movp(T, Src0RM);

3071 _cmpps(T, Src1RM, CondX86::Cmpps_neq);

3072 _movp(T2, Src0RM);

3073 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);

3074 _pand(T, T2);

3075 } break;

3076 case InstFcmp::Ueq: {

3077 // Check both equal or unordered.

3078 T = makeReg(Src0RM->getType());

3079 Variable *T2 = makeReg(Src0RM->getType());

3080 _movp(T, Src0RM);

3081 _cmpps(T, Src1RM, CondX86::Cmpps_eq);

3082 _movp(T2, Src0RM);

3083 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);

3084 _por(T, T2);

3085 } break;

3086 }

3087 }

3088

3089 _movp(Dest, T);

3090 eliminateNextVectorSextInstruction(Dest);

3091 return;

3092 }

3093

3094 // Lowering a = fcmp cond, b, c

3095 // ucomiss b, c /* only if C1 != Br_None */

3096 // /* but swap b,c order if SwapOperands==true */

3097 // mov a, <default>

3098 // j<C1> label /* only if C1 != Br_None */

3099 // j<C2> label /* only if C2 != Br_None */

3100 // FakeUse(a) /* only if C1 != Br_None */

3101 // mov a, !<default> /* only if C1 != Br_None */

3102 // label: /* only if C1 != Br_None */

3103 //

3104 // setcc lowering when C1 != Br_None && C2 == Br_None:

3105 // ucomiss b, c /* but swap b,c order if SwapOperands==true */

3106 // setcc a, C1

3107 InstFcmp::FCond Condition = Inst->getCondition();

3108 size_t Index = static_cast<size_t>(Condition);

3109 assert(Index < TableFcmpSize);

3110 if (TableFcmp[Index].SwapScalarOperands)

3111 std::swap(Src0, Src1);

3112 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);

3113 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);

3114 if (HasC1) {

3115 Src0 = legalize(Src0);

3116 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

3117 Variable *T = nullptr;

3118 _mov(T, Src0);

3119 _ucomiss(T, Src1RM);

3120 if (!HasC2) {

3121 assert(TableFcmp[Index].Default);

3122 _setcc(Dest, TableFcmp[Index].C1);

3123 return;

3124 }

3125 }

3126 Constant *Default = Ctx->getConstantInt32(TableFcmp[Index].Default);

3127 _mov(Dest, Default);

3128 if (HasC1) {

3129 InstX8632Label *Label = InstX8632Label::create(Func, this);

3130 _br(TableFcmp[Index].C1, Label);

3131 if (HasC2) {

3132 _br(TableFcmp[Index].C2, Label);

3133 }

3134 Constant *NonDefault = Ctx->getConstantInt32(!TableFcmp[Index].Default);

3135 _mov_nonkillable(Dest, NonDefault);

3136 Context.insert(Label);

3137 }

3138 }

3139

3140 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {

3141 Operand *Src0 = legalize(Inst->getSrc(0));

3142 Operand *Src1 = legalize(Inst->getSrc(1));

3143 Variable *Dest = Inst->getDest();

3144

3145 if (isVectorType(Dest->getType())) {

3146 Type Ty = Src0->getType();

3147 // Promote i1 vectors to 128 bit integer vector types.

3148 if (typeElementType(Ty) == IceType_i1) {

3149 Type NewTy = IceType_NUM;

3150 switch (Ty) {

3151 default:

3152 llvm_unreachable("unexpected type");

3153 break;

3154 case IceType_v4i1:

3155 NewTy = IceType_v4i32;

3156 break;

3157 case IceType_v8i1:

3158 NewTy = IceType_v8i16;

3159 break;

3160 case IceType_v16i1:

3161 NewTy = IceType_v16i8;

3162 break;

3163 }

3164 Variable *NewSrc0 = Func->makeVariable(NewTy);

3165 Variable *NewSrc1 = Func->makeVariable(NewTy);

3166 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc0, Src0));

3167 lowerCast(InstCast::create(Func, InstCast::Sext, NewSrc1, Src1));

3168 Src0 = NewSrc0;

3169 Src1 = NewSrc1;

3170 Ty = NewTy;

3171 }

3172

3173 InstIcmp::ICond Condition = Inst->getCondition();

3174

3175 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

3176 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

3177

3178 // SSE2 only has signed comparison operations. Transform unsigned

3179 // inputs in a manner that allows for the use of signed comparison

3180 // operations by flipping the high order bits.

3181 if (Condition == InstIcmp::Ugt \|\| Condition == InstIcmp::Uge \|\|

3182 Condition == InstIcmp::Ult \|\| Condition == InstIcmp::Ule) {

3183 Variable *T0 = makeReg(Ty);

3184 Variable *T1 = makeReg(Ty);

3185 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);

3186 _movp(T0, Src0RM);

3187 _pxor(T0, HighOrderBits);

3188 _movp(T1, Src1RM);

3189 _pxor(T1, HighOrderBits);

3190 Src0RM = T0;

3191 Src1RM = T1;

3192 }

3193

3194 Variable *T = makeReg(Ty);

3195 switch (Condition) {

3196 default:

3197 llvm_unreachable("unexpected condition");

3198 break;

3199 case InstIcmp::Eq: {

3200 if (llvm::isa<OperandX8632Mem>(Src1RM))

3201 Src1RM = legalizeToVar(Src1RM);

3202 _movp(T, Src0RM);

3203 _pcmpeq(T, Src1RM);

3204 } break;

3205 case InstIcmp::Ne: {

3206 if (llvm::isa<OperandX8632Mem>(Src1RM))

3207 Src1RM = legalizeToVar(Src1RM);

3208 _movp(T, Src0RM);

3209 _pcmpeq(T, Src1RM);

3210 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

3211 _pxor(T, MinusOne);

3212 } break;

3213 case InstIcmp::Ugt:

3214 case InstIcmp::Sgt: {

3215 if (llvm::isa<OperandX8632Mem>(Src1RM))

3216 Src1RM = legalizeToVar(Src1RM);

3217 _movp(T, Src0RM);

3218 _pcmpgt(T, Src1RM);

3219 } break;

3220 case InstIcmp::Uge:

3221 case InstIcmp::Sge: {

3222 // !(Src1RM > Src0RM)

3223 if (llvm::isa<OperandX8632Mem>(Src0RM))

3224 Src0RM = legalizeToVar(Src0RM);

3225 _movp(T, Src1RM);

3226 _pcmpgt(T, Src0RM);

3227 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

3228 _pxor(T, MinusOne);

3229 } break;

3230 case InstIcmp::Ult:

3231 case InstIcmp::Slt: {

3232 if (llvm::isa<OperandX8632Mem>(Src0RM))

3233 Src0RM = legalizeToVar(Src0RM);

3234 _movp(T, Src1RM);

3235 _pcmpgt(T, Src0RM);

3236 } break;

3237 case InstIcmp::Ule:

3238 case InstIcmp::Sle: {

3239 // !(Src0RM > Src1RM)

3240 if (llvm::isa<OperandX8632Mem>(Src1RM))

3241 Src1RM = legalizeToVar(Src1RM);

3242 _movp(T, Src0RM);

3243 _pcmpgt(T, Src1RM);

3244 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

3245 _pxor(T, MinusOne);

3246 } break;

3247 }

3248

3249 _movp(Dest, T);

3250 eliminateNextVectorSextInstruction(Dest);

3251 return;

3252 }

3253

3254 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:

3255 if (Src0->getType() == IceType_i64) {

3256 InstIcmp::ICond Condition = Inst->getCondition();

3257 size_t Index = static_cast<size_t>(Condition);

3258 assert(Index < TableIcmp64Size);

3259 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);

3260 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);

3261 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

3262 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

3263 Constant *Zero = Ctx->getConstantZero(IceType_i32);

3264 Constant *One = Ctx->getConstantInt32(1);

3265 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

3266 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

3267 _mov(Dest, One);

3268 _cmp(Src0HiRM, Src1HiRI);

3269 if (TableIcmp64[Index].C1 != CondX86::Br_None)

3270 _br(TableIcmp64[Index].C1, LabelTrue);

3271 if (TableIcmp64[Index].C2 != CondX86::Br_None)

3272 _br(TableIcmp64[Index].C2, LabelFalse);

3273 _cmp(Src0LoRM, Src1LoRI);

3274 _br(TableIcmp64[Index].C3, LabelTrue);

3275 Context.insert(LabelFalse);

3276 _mov_nonkillable(Dest, Zero);

3277 Context.insert(LabelTrue);

3278 return;

3279 }

3280

3281 // cmp b, c

3282 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);

3283 _cmp(Src0RM, Src1);

3284 _setcc(Dest, getIcmp32Mapping(Inst->getCondition()));

3285 }

3286

3287 void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {

3288 Operand *SourceVectNotLegalized = Inst->getSrc(0);

3289 Operand *ElementToInsertNotLegalized = Inst->getSrc(1);

3290 ConstantInteger32 *ElementIndex =

3291 llvm::dyn_cast<ConstantInteger32>(Inst->getSrc(2));

3292 // Only constant indices are allowed in PNaCl IR.

3293 assert(ElementIndex);

3294 unsigned Index = ElementIndex->getValue();

3295 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));

3296

3297 Type Ty = SourceVectNotLegalized->getType();

3298 Type ElementTy = typeElementType(Ty);

3299 Type InVectorElementTy = getInVectorElementType(Ty);

3300

3301 if (ElementTy == IceType_i1) {

3302 // Expand the element to the appropriate size for it to be inserted

3303 // in the vector.

3304 Variable *Expanded = Func->makeVariable(InVectorElementTy);

3305 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,

3306 ElementToInsertNotLegalized);

3307 lowerCast(Cast);

3308 ElementToInsertNotLegalized = Expanded;

3309 }

3310

3311 if (Ty == IceType_v8i16 \|\| Ty == IceType_v8i1 \|\| InstructionSet >= SSE4_1) {

3312 // Use insertps, pinsrb, pinsrw, or pinsrd.

3313 Operand *ElementRM =

3314 legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

3315 Operand *SourceVectRM =

3316 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

3317 Variable *T = makeReg(Ty);

3318 _movp(T, SourceVectRM);

3319 if (Ty == IceType_v4f32)

3320 _insertps(T, ElementRM, Ctx->getConstantInt32(Index << 4));

3321 else

3322 _pinsr(T, ElementRM, Ctx->getConstantInt32(Index));

3323 _movp(Inst->getDest(), T);

3324 } else if (Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v4i1) {

3325 // Use shufps or movss.

3326 Variable *ElementR = nullptr;

3327 Operand *SourceVectRM =

3328 legalize(SourceVectNotLegalized, Legal_Reg \| Legal_Mem);

3329

3330 if (InVectorElementTy == IceType_f32) {

3331 // ElementR will be in an XMM register since it is floating point.

3332 ElementR = legalizeToVar(ElementToInsertNotLegalized);

3333 } else {

3334 // Copy an integer to an XMM register.

3335 Operand *T = legalize(ElementToInsertNotLegalized, Legal_Reg \| Legal_Mem);

3336 ElementR = makeReg(Ty);

3337 _movd(ElementR, T);

3338 }

3339

3340 if (Index == 0) {

3341 Variable *T = makeReg(Ty);

3342 _movp(T, SourceVectRM);

3343 _movss(T, ElementR);

3344 _movp(Inst->getDest(), T);

3345 return;

3346 }

3347

3348 // shufps treats the source and desination operands as vectors of

3349 // four doublewords. The destination's two high doublewords are

3350 // selected from the source operand and the two low doublewords are

3351 // selected from the (original value of) the destination operand.

3352 // An insertelement operation can be effected with a sequence of two

3353 // shufps operations with appropriate masks. In all cases below,

3354 // Element[0] is being inserted into SourceVectOperand. Indices are

3355 // ordered from left to right.

3356 //

3357 // insertelement into index 1 (result is stored in ElementR):

3358 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]

3359 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]

3360 //

3361 // insertelement into index 2 (result is stored in T):

3362 // T := SourceVectRM

3363 // ElementR := ElementR[0, 0] T[0, 3]

3364 // T := T[0, 1] ElementR[0, 3]

3365 //

3366 // insertelement into index 3 (result is stored in T):

3367 // T := SourceVectRM

3368 // ElementR := ElementR[0, 0] T[0, 2]

3369 // T := T[0, 1] ElementR[3, 0]

3370 const unsigned char Mask1[3] = {0, 192, 128};

3371 const unsigned char Mask2[3] = {227, 196, 52};

3372

3373 Constant *Mask1Constant = Ctx->getConstantInt32(Mask1[Index - 1]);

3374 Constant *Mask2Constant = Ctx->getConstantInt32(Mask2[Index - 1]);

3375

3376 if (Index == 1) {

3377 _shufps(ElementR, SourceVectRM, Mask1Constant);

3378 _shufps(ElementR, SourceVectRM, Mask2Constant);

3379 _movp(Inst->getDest(), ElementR);

3380 } else {

3381 Variable *T = makeReg(Ty);

3382 _movp(T, SourceVectRM);

3383 _shufps(ElementR, T, Mask1Constant);

3384 _shufps(T, ElementR, Mask2Constant);

3385 _movp(Inst->getDest(), T);

3386 }

3387 } else {

3388 assert(Ty == IceType_v16i8 \|\| Ty == IceType_v16i1);

3389 // Spill the value to a stack slot and perform the insertion in

3390 // memory.

3391 //

3392 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

3393 // support for legalizing to mem is implemented.

3394 Variable *Slot = Func->makeVariable(Ty);

3395 Slot->setWeight(RegWeight::Zero);

3396 _movp(Slot, legalizeToVar(SourceVectNotLegalized));

3397

3398 // Compute the location of the position to insert in memory.

3399 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);

3400 OperandX8632Mem *Loc =

3401 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);

3402 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);

3403

3404 Variable *T = makeReg(Ty);

3405 _movp(T, Slot);

3406 _movp(Inst->getDest(), T);

3407 }

3408 }

3409

3410 void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

3411 switch (Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID) {

3412 case Intrinsics::AtomicCmpxchg: {

3413 if (!Intrinsics::isMemoryOrderValid(

3414 ID, getConstantMemoryOrder(Instr->getArg(3)),

3415 getConstantMemoryOrder(Instr->getArg(4)))) {

3416 Func->setError("Unexpected memory ordering for AtomicCmpxchg");

3417 return;

3418 }

3419 Variable *DestPrev = Instr->getDest();

3420 Operand *PtrToMem = Instr->getArg(0);

3421 Operand *Expected = Instr->getArg(1);

3422 Operand *Desired = Instr->getArg(2);

3423 if (tryOptimizedCmpxchgCmpBr(DestPrev, PtrToMem, Expected, Desired))

3424 return;

3425 lowerAtomicCmpxchg(DestPrev, PtrToMem, Expected, Desired);

3426 return;

3427 }

3428 case Intrinsics::AtomicFence:

3429 if (!Intrinsics::isMemoryOrderValid(

3430 ID, getConstantMemoryOrder(Instr->getArg(0)))) {

3431 Func->setError("Unexpected memory ordering for AtomicFence");

3432 return;

3433 }

3434 _mfence();

3435 return;

3436 case Intrinsics::AtomicFenceAll:

3437 // NOTE: FenceAll should prevent and load/store from being moved

3438 // across the fence (both atomic and non-atomic). The InstX8632Mfence

3439 // instruction is currently marked coarsely as "HasSideEffects".

3440 _mfence();

3441 return;

3442 case Intrinsics::AtomicIsLockFree: {

3443 // X86 is always lock free for 8/16/32/64 bit accesses.

3444 // TODO(jvoung): Since the result is constant when given a constant

3445 // byte size, this opens up DCE opportunities.

3446 Operand *ByteSize = Instr->getArg(0);

3447 Variable *Dest = Instr->getDest();

3448 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {

3449 Constant *Result;

3450 switch (CI->getValue()) {

3451 default:

3452 // Some x86-64 processors support the cmpxchg16b intruction, which

3453 // can make 16-byte operations lock free (when used with the LOCK

3454 // prefix). However, that's not supported in 32-bit mode, so just

3455 // return 0 even for large sizes.

3456 Result = Ctx->getConstantZero(IceType_i32);

3457 break;

3458 case 1:

3459 case 2:

3460 case 4:

3461 case 8:

3462 Result = Ctx->getConstantInt32(1);

3463 break;

3464 }

3465 _mov(Dest, Result);

3466 return;

3467 }

3468 // The PNaCl ABI requires the byte size to be a compile-time constant.

3469 Func->setError("AtomicIsLockFree byte size should be compile-time const");

3470 return;

3471 }

3472 case Intrinsics::AtomicLoad: {

3473 // We require the memory address to be naturally aligned.

3474 // Given that is the case, then normal loads are atomic.

3475 if (!Intrinsics::isMemoryOrderValid(

3476 ID, getConstantMemoryOrder(Instr->getArg(1)))) {

3477 Func->setError("Unexpected memory ordering for AtomicLoad");

3478 return;

3479 }

3480 Variable *Dest = Instr->getDest();

3481 if (Dest->getType() == IceType_i64) {

3482 // Follow what GCC does and use a movq instead of what lowerLoad()

3483 // normally does (split the load into two).

3484 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding

3485 // can't happen anyway, since this is x86-32 and integer arithmetic only

3486 // happens on 32-bit quantities.

3487 Variable *T = makeReg(IceType_f64);

3488 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64);

3489 _movq(T, Addr);

3490 // Then cast the bits back out of the XMM register to the i64 Dest.

3491 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);

3492 lowerCast(Cast);

3493 // Make sure that the atomic load isn't elided when unused.

3494 Context.insert(InstFakeUse::create(Func, Dest->getLo()));

3495 Context.insert(InstFakeUse::create(Func, Dest->getHi()));

3496 return;

3497 }

3498 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));

3499 lowerLoad(Load);

3500 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.

3501 // Since lowerLoad may fuse the load w/ an arithmetic instruction,

3502 // insert the FakeUse on the last-inserted instruction's dest.

3503 Context.insert(

3504 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

3505 return;

3506 }

3507 case Intrinsics::AtomicRMW:

3508 if (!Intrinsics::isMemoryOrderValid(

3509 ID, getConstantMemoryOrder(Instr->getArg(3)))) {

3510 Func->setError("Unexpected memory ordering for AtomicRMW");

3511 return;

3512 }

3513 lowerAtomicRMW(

3514 Instr->getDest(),

3515 static_cast<uint32_t>(

3516 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),

3517 Instr->getArg(1), Instr->getArg(2));

3518 return;

3519 case Intrinsics::AtomicStore: {

3520 if (!Intrinsics::isMemoryOrderValid(

3521 ID, getConstantMemoryOrder(Instr->getArg(2)))) {

3522 Func->setError("Unexpected memory ordering for AtomicStore");

3523 return;

3524 }

3525 // We require the memory address to be naturally aligned.

3526 // Given that is the case, then normal stores are atomic.

3527 // Add a fence after the store to make it visible.

3528 Operand *Value = Instr->getArg(0);

3529 Operand *Ptr = Instr->getArg(1);

3530 if (Value->getType() == IceType_i64) {

3531 // Use a movq instead of what lowerStore() normally does

3532 // (split the store into two), following what GCC does.

3533 // Cast the bits from int -> to an xmm register first.

3534 Variable *T = makeReg(IceType_f64);

3535 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);

3536 lowerCast(Cast);

3537 // Then store XMM w/ a movq.

3538 OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64);

3539 _storeq(T, Addr);

3540 _mfence();

3541 return;

3542 }

3543 InstStore *Store = InstStore::create(Func, Value, Ptr);

3544 lowerStore(Store);

3545 _mfence();

3546 return;

3547 }

3548 case Intrinsics::Bswap: {

3549 Variable *Dest = Instr->getDest();

3550 Operand *Val = Instr->getArg(0);

3551 // In 32-bit mode, bswap only works on 32-bit arguments, and the

3552 // argument must be a register. Use rotate left for 16-bit bswap.

3553 if (Val->getType() == IceType_i64) {

3554 Variable *T_Lo = legalizeToVar(loOperand(Val));

3555 Variable *T_Hi = legalizeToVar(hiOperand(Val));

3556 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

3557 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3558 _bswap(T_Lo);

3559 _bswap(T_Hi);

3560 _mov(DestLo, T_Hi);

3561 _mov(DestHi, T_Lo);

3562 } else if (Val->getType() == IceType_i32) {

3563 Variable *T = legalizeToVar(Val);

3564 _bswap(T);

3565 _mov(Dest, T);

3566 } else {

3567 assert(Val->getType() == IceType_i16);

3568 Val = legalize(Val);

3569 Constant *Eight = Ctx->getConstantInt16(8);

3570 Variable *T = nullptr;

3571 _mov(T, Val);

3572 _rol(T, Eight);

3573 _mov(Dest, T);

3574 }

3575 return;

3576 }

3577 case Intrinsics::Ctpop: {

3578 Variable *Dest = Instr->getDest();

3579 Operand *Val = Instr->getArg(0);

3580 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())

3581 ? H_call_ctpop_i32

3582 : H_call_ctpop_i64,

3583 Dest, 1);

3584 Call->addArg(Val);

3585 lowerCall(Call);

3586 // The popcount helpers always return 32-bit values, while the intrinsic's

3587 // signature matches the native POPCNT instruction and fills a 64-bit reg

3588 // (in 64-bit mode). Thus, clear the upper bits of the dest just in case

3589 // the user doesn't do that in the IR. If the user does that in the IR,

3590 // then this zero'ing instruction is dead and gets optimized out.

3591 if (Val->getType() == IceType_i64) {

3592 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3593 Constant *Zero = Ctx->getConstantZero(IceType_i32);

3594 _mov(DestHi, Zero);

3595 }

3596 return;

3597 }

3598 case Intrinsics::Ctlz: {

3599 // The "is zero undef" parameter is ignored and we always return

3600 // a well-defined value.

3601 Operand *Val = legalize(Instr->getArg(0));

3602 Operand *FirstVal;

3603 Operand *SecondVal = nullptr;

3604 if (Val->getType() == IceType_i64) {

3605 FirstVal = loOperand(Val);

3606 SecondVal = hiOperand(Val);

3607 } else {

3608 FirstVal = Val;

3609 }

3610 const bool IsCttz = false;

3611 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3612 SecondVal);

3613 return;

3614 }

3615 case Intrinsics::Cttz: {

3616 // The "is zero undef" parameter is ignored and we always return

3617 // a well-defined value.

3618 Operand *Val = legalize(Instr->getArg(0));

3619 Operand *FirstVal;

3620 Operand *SecondVal = nullptr;

3621 if (Val->getType() == IceType_i64) {

3622 FirstVal = hiOperand(Val);

3623 SecondVal = loOperand(Val);

3624 } else {

3625 FirstVal = Val;

3626 }

3627 const bool IsCttz = true;

3628 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,

3629 SecondVal);

3630 return;

3631 }

3632 case Intrinsics::Fabs: {

3633 Operand *Src = legalize(Instr->getArg(0));

3634 Type Ty = Src->getType();

3635 Variable *Dest = Instr->getDest();

3636 Variable *T = makeVectorOfFabsMask(Ty);

3637 // The pand instruction operates on an m128 memory operand, so if

3638 // Src is an f32 or f64, we need to make sure it's in a register.

3639 if (isVectorType(Ty)) {

3640 if (llvm::isa<OperandX8632Mem>(Src))

3641 Src = legalizeToVar(Src);

3642 } else {

3643 Src = legalizeToVar(Src);

3644 }

3645 _pand(T, Src);

3646 if (isVectorType(Ty))

3647 _movp(Dest, T);

3648 else

3649 _mov(Dest, T);

3650 return;

3651 }

3652 case Intrinsics::Longjmp: {

3653 InstCall *Call = makeHelperCall(H_call_longjmp, nullptr, 2);

3654 Call->addArg(Instr->getArg(0));

3655 Call->addArg(Instr->getArg(1));

3656 lowerCall(Call);

3657 return;

3658 }

3659 case Intrinsics::Memcpy: {

3660 // In the future, we could potentially emit an inline memcpy/memset, etc.

3661 // for intrinsic calls w/ a known length.

3662 InstCall *Call = makeHelperCall(H_call_memcpy, nullptr, 3);

3663 Call->addArg(Instr->getArg(0));

3664 Call->addArg(Instr->getArg(1));

3665 Call->addArg(Instr->getArg(2));

3666 lowerCall(Call);

3667 return;

3668 }

3669 case Intrinsics::Memmove: {

3670 InstCall *Call = makeHelperCall(H_call_memmove, nullptr, 3);

3671 Call->addArg(Instr->getArg(0));

3672 Call->addArg(Instr->getArg(1));

3673 Call->addArg(Instr->getArg(2));

3674 lowerCall(Call);

3675 return;

3676 }

3677 case Intrinsics::Memset: {

3678 // The value operand needs to be extended to a stack slot size

3679 // because the PNaCl ABI requires arguments to be at least 32 bits

3680 // wide.

3681 Operand *ValOp = Instr->getArg(1);

3682 assert(ValOp->getType() == IceType_i8);

3683 Variable *ValExt = Func->makeVariable(stackSlotType());

3684 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));

3685 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);

3686 Call->addArg(Instr->getArg(0));

3687 Call->addArg(ValExt);

3688 Call->addArg(Instr->getArg(2));

3689 lowerCall(Call);

3690 return;

3691 }

3692 case Intrinsics::NaClReadTP: {

3693 if (Ctx->getFlags().getUseSandboxing()) {

3694 Constant *Zero = Ctx->getConstantZero(IceType_i32);

3695 Operand *Src =

3696 OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0,

3697 OperandX8632Mem::SegReg_GS);

3698 Variable *Dest = Instr->getDest();

3699 Variable *T = nullptr;

3700 _mov(T, Src);

3701 _mov(Dest, T);

3702 } else {

3703 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);

3704 lowerCall(Call);

3705 }

3706 return;

3707 }

3708 case Intrinsics::Setjmp: {

3709 InstCall *Call = makeHelperCall(H_call_setjmp, Instr->getDest(), 1);

3710 Call->addArg(Instr->getArg(0));

3711 lowerCall(Call);

3712 return;

3713 }

3714 case Intrinsics::Sqrt: {

3715 Operand *Src = legalize(Instr->getArg(0));

3716 Variable *Dest = Instr->getDest();

3717 Variable *T = makeReg(Dest->getType());

3718 _sqrtss(T, Src);

3719 _mov(Dest, T);

3720 return;

3721 }

3722 case Intrinsics::Stacksave: {

3723 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

3724 Variable *Dest = Instr->getDest();

3725 _mov(Dest, esp);

3726 return;

3727 }

3728 case Intrinsics::Stackrestore: {

3729 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

3730 _mov_nonkillable(esp, Instr->getArg(0));

3731 return;

3732 }

3733 case Intrinsics::Trap:

3734 _ud2();

3735 return;

3736 case Intrinsics::UnknownIntrinsic:

3737 Func->setError("Should not be lowering UnknownIntrinsic");

3738 return;

3739 }

3740 return;

3741 }

3742

3743 void TargetX8632::lowerAtomicCmpxchg(Variable DestPrev, Operand Ptr,

3744 Operand Expected, Operand Desired) {

3745 if (Expected->getType() == IceType_i64) {

3746 // Reserve the pre-colored registers first, before adding any more

3747 // infinite-weight variables from formMemoryOperand's legalization.

3748 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

3749 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);

3750 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

3751 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);

3752 _mov(T_eax, loOperand(Expected));

3753 _mov(T_edx, hiOperand(Expected));

3754 _mov(T_ebx, loOperand(Desired));

3755 _mov(T_ecx, hiOperand(Desired));

3756 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());

3757 const bool Locked = true;

3758 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3759 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));

3760 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));

3761 _mov(DestLo, T_eax);

3762 _mov(DestHi, T_edx);

3763 return;

3764 }

3765 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);

3766 _mov(T_eax, Expected);

3767 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType());

3768 Variable *DesiredReg = legalizeToVar(Desired);

3769 const bool Locked = true;

3770 _cmpxchg(Addr, T_eax, DesiredReg, Locked);

3771 _mov(DestPrev, T_eax);

3772 }

3773

3774 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable Dest, Operand PtrToMem,

3775 Operand *Expected,

3776 Operand *Desired) {

3777 if (Ctx->getFlags().getOptLevel() == Opt_m1)

3778 return false;

3779 // Peek ahead a few instructions and see how Dest is used.

3780 // It's very common to have:

3781 //

3782 // %x = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* ptr, i32 %expected, ...)

3783 // [%y_phi = ...] // list of phi stores

3784 // %p = icmp eq i32 %x, %expected

3785 // br i1 %p, label %l1, label %l2

3786 //

3787 // which we can optimize into:

3788 //

3789 // %x = <cmpxchg code>

3790 // [%y_phi = ...] // list of phi stores

3791 // br eq, %l1, %l2

3792 InstList::iterator I = Context.getCur();

3793 // I is currently the InstIntrinsicCall. Peek past that.

3794 // This assumes that the atomic cmpxchg has not been lowered yet,

3795 // so that the instructions seen in the scan from "Cur" is simple.

3796 assert(llvm::isa<InstIntrinsicCall>(*I));

3797 Inst *NextInst = Context.getNextInst(I);

3798 if (!NextInst)

3799 return false;

3800 // There might be phi assignments right before the compare+branch, since this

3801 // could be a backward branch for a loop. This placement of assignments is

3802 // determined by placePhiStores().

3803 std::vector<InstAssign *> PhiAssigns;

3804 while (InstAssign *PhiAssign = llvm::dyn_cast<InstAssign>(NextInst)) {

3805 if (PhiAssign->getDest() == Dest)

3806 return false;

3807 PhiAssigns.push_back(PhiAssign);

3808 NextInst = Context.getNextInst(I);

3809 if (!NextInst)

3810 return false;

3811 }

3812 if (InstIcmp *NextCmp = llvm::dyn_cast<InstIcmp>(NextInst)) {

3813 if (!(NextCmp->getCondition() == InstIcmp::Eq &&

3814 ((NextCmp->getSrc(0) == Dest && NextCmp->getSrc(1) == Expected) \|\|

3815 (NextCmp->getSrc(1) == Dest && NextCmp->getSrc(0) == Expected)))) {

3816 return false;

3817 }

3818 NextInst = Context.getNextInst(I);

3819 if (!NextInst)

3820 return false;

3821 if (InstBr *NextBr = llvm::dyn_cast<InstBr>(NextInst)) {

3822 if (!NextBr->isUnconditional() &&

3823 NextCmp->getDest() == NextBr->getCondition() &&

3824 NextBr->isLastUse(NextCmp->getDest())) {

3825 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);

3826 for (size_t i = 0; i < PhiAssigns.size(); ++i) {

3827 // Lower the phi assignments now, before the branch (same placement

3828 // as before).

3829 InstAssign *PhiAssign = PhiAssigns[i];

3830 PhiAssign->setDeleted();

3831 lowerAssign(PhiAssign);

3832 Context.advanceNext();

3833 }

3834 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());

3835 // Skip over the old compare and branch, by deleting them.

3836 NextCmp->setDeleted();

3837 NextBr->setDeleted();

3838 Context.advanceNext();

3839 Context.advanceNext();

3840 return true;

3841 }

3842 }

3843 }

3844 return false;

3845 }

3846

3847 void TargetX8632::lowerAtomicRMW(Variable *Dest, uint32_t Operation,

3848 Operand Ptr, Operand Val) {

3849 bool NeedsCmpxchg = false;

3850 LowerBinOp Op_Lo = nullptr;

3851 LowerBinOp Op_Hi = nullptr;

3852 switch (Operation) {

3853 default:

3854 Func->setError("Unknown AtomicRMW operation");

3855 return;

3856 case Intrinsics::AtomicAdd: {

3857 if (Dest->getType() == IceType_i64) {

3858 // All the fall-through paths must set this to true, but use this

3859 // for asserting.

3860 NeedsCmpxchg = true;

3861 Op_Lo = &TargetX8632::_add;

3862 Op_Hi = &TargetX8632::_adc;

3863 break;

3864 }

3865 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

3866 const bool Locked = true;

3867 Variable *T = nullptr;

3868 _mov(T, Val);

3869 _xadd(Addr, T, Locked);

3870 _mov(Dest, T);

3871 return;

3872 }

3873 case Intrinsics::AtomicSub: {

3874 if (Dest->getType() == IceType_i64) {

3875 NeedsCmpxchg = true;

3876 Op_Lo = &TargetX8632::_sub;

3877 Op_Hi = &TargetX8632::_sbb;

3878 break;

3879 }

3880 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

3881 const bool Locked = true;

3882 Variable *T = nullptr;

3883 _mov(T, Val);

3884 _neg(T);

3885 _xadd(Addr, T, Locked);

3886 _mov(Dest, T);

3887 return;

3888 }

3889 case Intrinsics::AtomicOr:

3890 // TODO(jvoung): If Dest is null or dead, then some of these

3891 // operations do not need an "exchange", but just a locked op.

3892 // That appears to be "worth" it for sub, or, and, and xor.

3893 // xadd is probably fine vs lock add for add, and xchg is fine

3894 // vs an atomic store.

3895 NeedsCmpxchg = true;

3896 Op_Lo = &TargetX8632::_or;

3897 Op_Hi = &TargetX8632::_or;

3898 break;

3899 case Intrinsics::AtomicAnd:

3900 NeedsCmpxchg = true;

3901 Op_Lo = &TargetX8632::_and;

3902 Op_Hi = &TargetX8632::_and;

3903 break;

3904 case Intrinsics::AtomicXor:

3905 NeedsCmpxchg = true;

3906 Op_Lo = &TargetX8632::_xor;

3907 Op_Hi = &TargetX8632::_xor;

3908 break;

3909 case Intrinsics::AtomicExchange:

3910 if (Dest->getType() == IceType_i64) {

3911 NeedsCmpxchg = true;

3912 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values

3913 // just need to be moved to the ecx and ebx registers.

3914 Op_Lo = nullptr;

3915 Op_Hi = nullptr;

3916 break;

3917 }

3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType());

3919 Variable *T = nullptr;

3920 _mov(T, Val);

3921 _xchg(Addr, T);

3922 _mov(Dest, T);

3923 return;

3924 }

3925 // Otherwise, we need a cmpxchg loop.

3926 (void)NeedsCmpxchg;

3927 assert(NeedsCmpxchg);

3928 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);

3929 }

3930

3931 void TargetX8632::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, LowerBinOp Op_Hi,

3932 Variable Dest, Operand Ptr,

3933 Operand *Val) {

3934 // Expand a more complex RMW operation as a cmpxchg loop:

3935 // For 64-bit:

3936 // mov eax, [ptr]

3937 // mov edx, [ptr + 4]

3938 // .LABEL:

3939 // mov ebx, eax

3940 // <Op_Lo> ebx, <desired_adj_lo>

3941 // mov ecx, edx

3942 // <Op_Hi> ecx, <desired_adj_hi>

3943 // lock cmpxchg8b [ptr]

3944 // jne .LABEL

3945 // mov <dest_lo>, eax

3946 // mov <dest_lo>, edx

3947 //

3948 // For 32-bit:

3949 // mov eax, [ptr]

3950 // .LABEL:

3951 // mov <reg>, eax

3952 // op <reg>, [desired_adj]

3953 // lock cmpxchg [ptr], <reg>

3954 // jne .LABEL

3955 // mov <dest>, eax

3956 //

3957 // If Op_{Lo,Hi} are nullptr, then just copy the value.

3958 Val = legalize(Val);

3959 Type Ty = Val->getType();

3960 if (Ty == IceType_i64) {

3961 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

3962 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);

3963 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);

3964 _mov(T_eax, loOperand(Addr));

3965 _mov(T_edx, hiOperand(Addr));

3966 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

3967 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);

3968 InstX8632Label *Label = InstX8632Label::create(Func, this);

3969 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;

3970 if (!IsXchg8b) {

3971 Context.insert(Label);

3972 _mov(T_ebx, T_eax);

3973 (this->*Op_Lo)(T_ebx, loOperand(Val));

3974 _mov(T_ecx, T_edx);

3975 (this->*Op_Hi)(T_ecx, hiOperand(Val));

3976 } else {

3977 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.

3978 // It just needs the Val loaded into ebx and ecx.

3979 // That can also be done before the loop.

3980 _mov(T_ebx, loOperand(Val));

3981 _mov(T_ecx, hiOperand(Val));

3982 Context.insert(Label);

3983 }

3984 const bool Locked = true;

3985 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3986 _br(CondX86::Br_ne, Label);

3987 if (!IsXchg8b) {

3988 // If Val is a variable, model the extended live range of Val through

3989 // the end of the loop, since it will be re-used by the loop.

3990 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3991 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));

3992 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));

3993 Context.insert(InstFakeUse::create(Func, ValLo));

3994 Context.insert(InstFakeUse::create(Func, ValHi));

3995 }

3996 } else {

3997 // For xchg, the loop is slightly smaller and ebx/ecx are used.

3998 Context.insert(InstFakeUse::create(Func, T_ebx));

3999 Context.insert(InstFakeUse::create(Func, T_ecx));

4000 }

4001 // The address base (if any) is also reused in the loop.

4002 if (Variable *Base = Addr->getBase())

4003 Context.insert(InstFakeUse::create(Func, Base));

4004 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

4005 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

4006 _mov(DestLo, T_eax);

4007 _mov(DestHi, T_edx);

4008 return;

4009 }

4010 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty);

4011 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);

4012 _mov(T_eax, Addr);

4013 InstX8632Label *Label = InstX8632Label::create(Func, this);

4014 Context.insert(Label);

4015 // We want to pick a different register for T than Eax, so don't use

4016 // _mov(T == nullptr, T_eax).

4017 Variable *T = makeReg(Ty);

4018 _mov(T, T_eax);

4019 (this->*Op_Lo)(T, Val);

4020 const bool Locked = true;

4021 _cmpxchg(Addr, T_eax, T, Locked);

4022 _br(CondX86::Br_ne, Label);

4023 // If Val is a variable, model the extended live range of Val through

4024 // the end of the loop, since it will be re-used by the loop.

4025 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

4026 Context.insert(InstFakeUse::create(Func, ValVar));

4027 }

4028 // The address base (if any) is also reused in the loop.

4029 if (Variable *Base = Addr->getBase())

4030 Context.insert(InstFakeUse::create(Func, Base));

4031 _mov(Dest, T_eax);

4032 }

4033

4034 // Lowers count {trailing, leading} zeros intrinsic.

4035 //

4036 // We could do constant folding here, but that should have

4037 // been done by the front-end/middle-end optimizations.

4038 void TargetX8632::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,

4039 Operand FirstVal, Operand SecondVal) {

4040 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).

4041 // Then the instructions will handle the Val == 0 case much more simply

4042 // and won't require conversion from bit position to number of zeros.

4043 //

4044 // Otherwise:

4045 // bsr IF_NOT_ZERO, Val

4046 // mov T_DEST, 63

4047 // cmovne T_DEST, IF_NOT_ZERO

4048 // xor T_DEST, 31

4049 // mov DEST, T_DEST

4050 //

4051 // NOTE: T_DEST must be a register because cmov requires its dest to be a

4052 // register. Also, bsf and bsr require their dest to be a register.

4053 //

4054 // The xor DEST, 31 converts a bit position to # of leading zeroes.

4055 // E.g., for 000... 00001100, bsr will say that the most significant bit

4056 // set is at position 3, while the number of leading zeros is 28. Xor is

4057 // like (31 - N) for N <= 31, and converts 63 to 32 (for the all-zeros case).

4058 //

4059 // Similar for 64-bit, but start w/ speculating that the upper 32 bits

4060 // are all zero, and compute the result for that case (checking the lower

4061 // 32 bits). Then actually compute the result for the upper bits and

4062 // cmov in the result from the lower computation if the earlier speculation

4063 // was correct.

4064 //

4065 // Cttz, is similar, but uses bsf instead, and doesn't require the xor

4066 // bit position conversion, and the speculation is reversed.

4067 assert(Ty == IceType_i32 \|\| Ty == IceType_i64);

4068 Variable *T = makeReg(IceType_i32);

4069 Operand *FirstValRM = legalize(FirstVal, Legal_Mem \| Legal_Reg);

4070 if (Cttz) {

4071 _bsf(T, FirstValRM);

4072 } else {

4073 _bsr(T, FirstValRM);

4074 }

4075 Variable *T_Dest = makeReg(IceType_i32);

4076 Constant *ThirtyTwo = Ctx->getConstantInt32(32);

4077 Constant *ThirtyOne = Ctx->getConstantInt32(31);

4078 if (Cttz) {

4079 _mov(T_Dest, ThirtyTwo);

4080 } else {

4081 Constant *SixtyThree = Ctx->getConstantInt32(63);

4082 _mov(T_Dest, SixtyThree);

4083 }

4084 _cmov(T_Dest, T, CondX86::Br_ne);

4085 if (!Cttz) {

4086 _xor(T_Dest, ThirtyOne);

4087 }

4088 if (Ty == IceType_i32) {

4089 _mov(Dest, T_Dest);

4090 return;

4091 }

4092 _add(T_Dest, ThirtyTwo);

4093 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

4094 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

4095 // Will be using "test" on this, so we need a registerized variable.

4096 Variable *SecondVar = legalizeToVar(SecondVal);

4097 Variable *T_Dest2 = makeReg(IceType_i32);

4098 if (Cttz) {

4099 _bsf(T_Dest2, SecondVar);

4100 } else {

4101 _bsr(T_Dest2, SecondVar);

4102 _xor(T_Dest2, ThirtyOne);

4103 }

4104 _test(SecondVar, SecondVar);

4105 _cmov(T_Dest2, T_Dest, CondX86::Br_e);

4106 _mov(DestLo, T_Dest2);

4107 _mov(DestHi, Ctx->getConstantZero(IceType_i32));

4108 }

4109

4110 namespace {

4111

4112 bool isAdd(const Inst *Inst) {

4113 if (const InstArithmetic *Arith =

4114 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {

4115 return (Arith->getOp() == InstArithmetic::Add);

4116 }

4117 return false;

4118 }

4119

4120 void dumpAddressOpt(const Cfg Func, const Variable Base,

4121 const Variable *Index, uint16_t Shift, int32_t Offset,

4122 const Inst *Reason) {

4123 if (!ALLOW_DUMP)

4124 return;

4125 if (!Func->isVerbose(IceV_AddrOpt))

4126 return;

4127 OstreamLocker L(Func->getContext());

4128 Ostream &Str = Func->getContext()->getStrDump();

4129 Str << "Instruction: ";

4130 Reason->dumpDecorated(Func);

4131 Str << " results in Base=";

4132 if (Base)

4133 Base->dump(Func);

4134 else

4135 Str << "<null>";

4136 Str << ", Index=";

4137 if (Index)

4138 Index->dump(Func);

4139 else

4140 Str << "<null>";

4141 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";

4142 }

4143

4144 bool matchTransitiveAssign(const VariablesMetadata VMetadata, Variable &Var,

4145 const Inst *&Reason) {

4146 // Var originates from Var=SrcVar ==>

4147 // set Var:=SrcVar

4148 if (Var == nullptr)

4149 return false;

4150 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {

4151 assert(!VMetadata->isMultiDef(Var));

4152 if (llvm::isa<InstAssign>(VarAssign)) {

4153 Operand *SrcOp = VarAssign->getSrc(0);

4154 assert(SrcOp);

4155 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {

4156 if (!VMetadata->isMultiDef(SrcVar) &&

4157 // TODO: ensure SrcVar stays single-BB

4158 true) {

4159 Var = SrcVar;

4160 Reason = VarAssign;

4161 return true;

4162 }

4163 }

4164 }

4165 }

4166 return false;

4167 }

4168

4169 bool matchCombinedBaseIndex(const VariablesMetadata VMetadata, Variable &Base,

4170 Variable *&Index, uint16_t &Shift,

4171 const Inst *&Reason) {

4172 // Index==nullptr && Base is Base=Var1+Var2 ==>

4173 // set Base=Var1, Index=Var2, Shift=0

4174 if (Base == nullptr)

4175 return false;

4176 if (Index != nullptr)

4177 return false;

4178 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);

4179 if (BaseInst == nullptr)

4180 return false;

4181 assert(!VMetadata->isMultiDef(Base));

4182 if (BaseInst->getSrcSize() < 2)

4183 return false;

4184 if (Variable *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0))) {

4185 if (VMetadata->isMultiDef(Var1))

4186 return false;

4187 if (Variable *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1))) {

4188 if (VMetadata->isMultiDef(Var2))

4189 return false;

4190 if (isAdd(BaseInst) &&

4191 // TODO: ensure Var1 and Var2 stay single-BB

4192 true) {

4193 Base = Var1;

4194 Index = Var2;

4195 Shift = 0; // should already have been 0

4196 Reason = BaseInst;

4197 return true;

4198 }

4199 }

4200 }

4201 return false;

4202 }

4203

4204 bool matchShiftedIndex(const VariablesMetadata VMetadata, Variable &Index,

4205 uint16_t &Shift, const Inst *&Reason) {

4206 // Index is Index=Var*Const && log2(Const)+Shift<=3 ==>

4207 // Index=Var, Shift+=log2(Const)

4208 if (Index == nullptr)

4209 return false;

4210 const Inst *IndexInst = VMetadata->getSingleDefinition(Index);

4211 if (IndexInst == nullptr)

4212 return false;

4213 assert(!VMetadata->isMultiDef(Index));

4214 if (IndexInst->getSrcSize() < 2)

4215 return false;

4216 if (const InstArithmetic *ArithInst =

4217 llvm::dyn_cast<InstArithmetic>(IndexInst)) {

4218 if (Variable *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {

4219 if (ConstantInteger32 *Const =

4220 llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1))) {

4221 if (ArithInst->getOp() == InstArithmetic::Mul &&

4222 !VMetadata->isMultiDef(Var) && Const->getType() == IceType_i32) {

4223 uint64_t Mult = Const->getValue();

4224 uint32_t LogMult;

4225 switch (Mult) {

4226 case 1:

4227 LogMult = 0;

4228 break;

4229 case 2:

4230 LogMult = 1;

4231 break;

4232 case 4:

4233 LogMult = 2;

4234 break;

4235 case 8:

4236 LogMult = 3;

4237 break;

4238 default:

4239 return false;

4240 }

4241 if (Shift + LogMult <= 3) {

4242 Index = Var;

4243 Shift += LogMult;

4244 Reason = IndexInst;

4245 return true;

4246 }

4247 }

4248 }

4249 }

4250 }

4251 return false;

4252 }

4253

4254 bool matchOffsetBase(const VariablesMetadata VMetadata, Variable &Base,

4255 int32_t &Offset, const Inst *&Reason) {

4256 // Base is Base=Var+Const \|\| Base is Base=Const+Var ==>

4257 // set Base=Var, Offset+=Const

4258 // Base is Base=Var-Const ==>

4259 // set Base=Var, Offset-=Const

4260 if (Base == nullptr)

4261 return false;

4262 const Inst *BaseInst = VMetadata->getSingleDefinition(Base);

4263 if (BaseInst == nullptr)

4264 return false;

4265 assert(!VMetadata->isMultiDef(Base));

4266 if (const InstArithmetic *ArithInst =

4267 llvm::dyn_cast<const InstArithmetic>(BaseInst)) {

4268 if (ArithInst->getOp() != InstArithmetic::Add &&

4269 ArithInst->getOp() != InstArithmetic::Sub)

4270 return false;

4271 bool IsAdd = ArithInst->getOp() == InstArithmetic::Add;

4272 Variable *Var = nullptr;

4273 ConstantInteger32 *Const = nullptr;

4274 if (Variable *VariableOperand =

4275 llvm::dyn_cast<Variable>(ArithInst->getSrc(0))) {

4276 Var = VariableOperand;

4277 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));

4278 } else if (IsAdd) {

4279 Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(0));

4280 Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(1));

4281 }

4282 if (Var == nullptr \|\| Const == nullptr \|\| VMetadata->isMultiDef(Var))

4283 return false;

4284 int32_t MoreOffset = IsAdd ? Const->getValue() : -Const->getValue();

4285 if (Utils::WouldOverflowAdd(Offset, MoreOffset))

4286 return false;

4287 Base = Var;

4288 Offset += MoreOffset;

4289 Reason = BaseInst;

4290 return true;

4291 }

4292 return false;

4293 }

4294

4295 void computeAddressOpt(Cfg Func, const Inst Instr, Variable *&Base,

4296 Variable *&Index, uint16_t &Shift, int32_t &Offset) {

4297 Func->resetCurrentNode();

4298 if (Func->isVerbose(IceV_AddrOpt)) {

4299 OstreamLocker L(Func->getContext());

4300 Ostream &Str = Func->getContext()->getStrDump();

4301 Str << "\nStarting computeAddressOpt for instruction:\n ";

4302 Instr->dumpDecorated(Func);

4303 }

4304 (void)Offset; // TODO: pattern-match for non-zero offsets.

4305 if (Base == nullptr)

4306 return;

4307 // If the Base has more than one use or is live across multiple

4308 // blocks, then don't go further. Alternatively (?), never consider

4309 // a transformation that would change a variable that is currently

4310 // not live across basic block boundaries into one that is.

4311 if (Func->getVMetadata()->isMultiBlock(Base) /* \|\| Base->getUseCount() > 1*/)

4312 return;

4313

4314 const VariablesMetadata *VMetadata = Func->getVMetadata();

4315 bool Continue = true;

4316 while (Continue) {

4317 const Inst *Reason = nullptr;

4318 if (matchTransitiveAssign(VMetadata, Base, Reason) \|\|

4319 matchTransitiveAssign(VMetadata, Index, Reason) \|\|

4320 matchCombinedBaseIndex(VMetadata, Base, Index, Shift, Reason) \|\|

4321 matchShiftedIndex(VMetadata, Index, Shift, Reason) \|\|

4322 matchOffsetBase(VMetadata, Base, Offset, Reason)) {

4323 dumpAddressOpt(Func, Base, Index, Shift, Offset, Reason);

4324 } else {

4325 Continue = false;

4326 }

4327

4328 // Index is Index=Var<<Const && Const+Shift<=3 ==>

4329 // Index=Var, Shift+=Const

4330

4331 // Index is Index=Const*Var && log2(Const)+Shift<=3 ==>

4332 // Index=Var, Shift+=log2(Const)

4333

4334 // Index && Shift==0 && Base is Base=Var*Const && log2(Const)+Shift<=3 ==>

4335 // swap(Index,Base)

4336 // Similar for Base=Const*Var and Base=Var<<Const

4337

4338 // Index is Index=Var+Const ==>

4339 // set Index=Var, Offset+=(Const<<Shift)

4340

4341 // Index is Index=Const+Var ==>

4342 // set Index=Var, Offset+=(Const<<Shift)

4343

4344 // Index is Index=Var-Const ==>

4345 // set Index=Var, Offset-=(Const<<Shift)

4346

4347 // TODO: consider overflow issues with respect to Offset.

4348 // TODO: handle symbolic constants.

4349 }

4350 }

4351

4352 } // anonymous namespace

4353

4354 void TargetX8632::lowerLoad(const InstLoad *Load) {

4355 // A Load instruction can be treated the same as an Assign

4356 // instruction, after the source operand is transformed into an

4357 // OperandX8632Mem operand. Note that the address mode

4358 // optimization already creates an OperandX8632Mem operand, so it

4359 // doesn't need another level of transformation.

4360 Variable *DestLoad = Load->getDest();

4361 Type Ty = DestLoad->getType();

4362 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);

4363 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);

4364 lowerAssign(Assign);

4365 }

4366

4367 void TargetX8632::doAddressOptLoad() {

4368 Inst *Inst = Context.getCur();

4369 Variable *Dest = Inst->getDest();

4370 Operand *Addr = Inst->getSrc(0);

4371 Variable *Index = nullptr;

4372 uint16_t Shift = 0;

4373 int32_t Offset = 0; // TODO: make Constant

4374 // Vanilla ICE load instructions should not use the segment registers,

4375 // and computeAddressOpt only works at the level of Variables and Constants,

4376 // not other OperandX8632Mem, so there should be no mention of segment

4377 // registers there either.

4378 const OperandX8632Mem::SegmentRegisters SegmentReg =

4379 OperandX8632Mem::DefaultSegment;

4380 Variable *Base = llvm::dyn_cast<Variable>(Addr);

4381 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

4382 if (Base && Addr != Base) {

4383 Inst->setDeleted();

4384 Constant *OffsetOp = Ctx->getConstantInt32(Offset);

4385 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index,

4386 Shift, SegmentReg);

4387 Context.insert(InstLoad::create(Func, Dest, Addr));

4388 }

4389 }

4390

4391 void TargetX8632::randomlyInsertNop(float Probability) {

4392 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());

4393 if (RNG.getTrueWithProbability(Probability)) {

4394 _nop(RNG(X86_NUM_NOP_VARIANTS));

4395 }

4396 }

4397

4398 void TargetX8632::lowerPhi(const InstPhi * /Inst/) {

4399 Func->setError("Phi found in regular instruction list");

4400 }

4401

4402 void TargetX8632::lowerRet(const InstRet *Inst) {

4403 Variable *Reg = nullptr;

4404 if (Inst->hasRetValue()) {

4405 Operand *Src0 = legalize(Inst->getRetValue());

4406 if (Src0->getType() == IceType_i64) {

4407 Variable *eax = legalizeToVar(loOperand(Src0), RegX8632::Reg_eax);

4408 Variable *edx = legalizeToVar(hiOperand(Src0), RegX8632::Reg_edx);

4409 Reg = eax;

4410 Context.insert(InstFakeUse::create(Func, edx));

4411 } else if (isScalarFloatingType(Src0->getType())) {

4412 _fld(Src0);

4413 } else if (isVectorType(Src0->getType())) {

4414 Reg = legalizeToVar(Src0, RegX8632::Reg_xmm0);

4415 } else {

4416 _mov(Reg, Src0, RegX8632::Reg_eax);

4417 }

4418 }

4419 // Add a ret instruction even if sandboxing is enabled, because

4420 // addEpilog explicitly looks for a ret instruction as a marker for

4421 // where to insert the frame removal instructions.

4422 _ret(Reg);

4423 // Add a fake use of esp to make sure esp stays alive for the entire

4424 // function. Otherwise post-call esp adjustments get dead-code

4425 // eliminated. TODO: Are there more places where the fake use

4426 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

4427 // have a ret instruction.

4428 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

4429 Context.insert(InstFakeUse::create(Func, esp));

4430 }

4431

4432 void TargetX8632::lowerSelect(const InstSelect *Inst) {

4433 Variable *Dest = Inst->getDest();

4434 Type DestTy = Dest->getType();

4435 Operand *SrcT = Inst->getTrueOperand();

4436 Operand *SrcF = Inst->getFalseOperand();

4437 Operand *Condition = Inst->getCondition();

4438

4439 if (isVectorType(DestTy)) {

4440 Type SrcTy = SrcT->getType();

4441 Variable *T = makeReg(SrcTy);

4442 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);

4443 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);

4444 if (InstructionSet >= SSE4_1) {

4445 // TODO(wala): If the condition operand is a constant, use blendps

4446 // or pblendw.

4447 //

4448 // Use blendvps or pblendvb to implement select.

4449 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

4450 SrcTy == IceType_v4f32) {

4451 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

4452 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);

4453 _movp(xmm0, ConditionRM);

4454 _psll(xmm0, Ctx->getConstantInt8(31));

4455 _movp(T, SrcFRM);

4456 _blendvps(T, SrcTRM, xmm0);

4457 _movp(Dest, T);

4458 } else {

4459 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

4460 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

4461 : IceType_v16i8;

4462 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);

4463 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

4464 _movp(T, SrcFRM);

4465 _pblendvb(T, SrcTRM, xmm0);

4466 _movp(Dest, T);

4467 }

4468 return;

4469 }

4470 // Lower select without SSE4.1:

4471 // a=d?b:c ==>

4472 // if elementtype(d) != i1:

4473 // d=sext(d);

4474 // a=(b&d)\|(c&~d);

4475 Variable *T2 = makeReg(SrcTy);

4476 // Sign extend the condition operand if applicable.

4477 if (SrcTy == IceType_v4f32) {

4478 // The sext operation takes only integer arguments.

4479 Variable *T3 = Func->makeVariable(IceType_v4i32);

4480 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));

4481 _movp(T, T3);

4482 } else if (typeElementType(SrcTy) != IceType_i1) {

4483 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));

4484 } else {

4485 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

4486 _movp(T, ConditionRM);

4487 }

4488 _movp(T2, T);

4489 _pand(T, SrcTRM);

4490 _pandn(T2, SrcFRM);

4491 _por(T, T2);

4492 _movp(Dest, T);

4493

4494 return;

4495 }

4496

4497 CondX86::BrCond Cond = CondX86::Br_ne;

4498 Operand *CmpOpnd0 = nullptr;

4499 Operand *CmpOpnd1 = nullptr;

4500 // Handle folding opportunities.

4501 if (const class Inst *Producer = FoldingInfo.getProducerFor(Condition)) {

4502 assert(Producer->isDeleted());

4503 switch (BoolFolding::getProducerKind(Producer)) {

4504 default:

4505 break;

4506 case BoolFolding::PK_Icmp32: {

4507 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);

4508 Cond = getIcmp32Mapping(Cmp->getCondition());

4509 CmpOpnd1 = legalize(Producer->getSrc(1));

4510 CmpOpnd0 = legalizeSrc0ForCmp(Producer->getSrc(0), CmpOpnd1);

4511 } break;

4512 }

4513 }

4514 if (CmpOpnd0 == nullptr) {

4515 CmpOpnd0 = legalize(Condition, Legal_Reg \| Legal_Mem);

4516 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);

4517 }

4518 assert(CmpOpnd0);

4519 assert(CmpOpnd1);

4520

4521 _cmp(CmpOpnd0, CmpOpnd1);

4522 if (typeWidthInBytes(DestTy) == 1 \|\| isFloatingType(DestTy)) {

4523 // The cmov instruction doesn't allow 8-bit or FP operands, so

4524 // we need explicit control flow.

4525 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:

4526 InstX8632Label *Label = InstX8632Label::create(Func, this);

4527 SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm);

4528 _mov(Dest, SrcT);

4529 _br(Cond, Label);

4530 SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm);

4531 _mov_nonkillable(Dest, SrcF);

4532 Context.insert(Label);

4533 return;

4534 }

4535 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t

4536 // But if SrcT is immediate, we might be able to do better, as

4537 // the cmov instruction doesn't allow an immediate operand:

4538 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t

4539 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {

4540 std::swap(SrcT, SrcF);

4541 Cond = InstX8632::getOppositeCondition(Cond);

4542 }

4543 if (DestTy == IceType_i64) {

4544 // Set the low portion.

4545 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

4546 Variable *TLo = nullptr;

4547 Operand *SrcFLo = legalize(loOperand(SrcF));

4548 _mov(TLo, SrcFLo);

4549 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg \| Legal_Mem);

4550 _cmov(TLo, SrcTLo, Cond);

4551 _mov(DestLo, TLo);

4552 // Set the high portion.

4553 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

4554 Variable *THi = nullptr;

4555 Operand *SrcFHi = legalize(hiOperand(SrcF));

4556 _mov(THi, SrcFHi);

4557 Operand *SrcTHi = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Mem);

4558 _cmov(THi, SrcTHi, Cond);

4559 _mov(DestHi, THi);

4560 return;

4561 }

4562

4563 assert(DestTy == IceType_i16 \|\| DestTy == IceType_i32);

4564 Variable *T = nullptr;

4565 SrcF = legalize(SrcF);

4566 _mov(T, SrcF);

4567 SrcT = legalize(SrcT, Legal_Reg \| Legal_Mem);

4568 _cmov(T, SrcT, Cond);

4569 _mov(Dest, T);

4570 }

4571

4572 void TargetX8632::lowerStore(const InstStore *Inst) {

4573 Operand *Value = Inst->getData();

4574 Operand *Addr = Inst->getAddr();

4575 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType());

4576 Type Ty = NewAddr->getType();

4577

4578 if (Ty == IceType_i64) {

4579 Value = legalize(Value);

4580 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg \| Legal_Imm);

4581 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg \| Legal_Imm);

4582 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));

4583 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));

4584 } else if (isVectorType(Ty)) {

4585 _storep(legalizeToVar(Value), NewAddr);

4586 } else {

4587 Value = legalize(Value, Legal_Reg \| Legal_Imm);

4588 _store(Value, NewAddr);

4589 }

4590 }

4591

4592 void TargetX8632::doAddressOptStore() {

4593 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());

4594 Operand *Data = Inst->getData();

4595 Operand *Addr = Inst->getAddr();

4596 Variable *Index = nullptr;

4597 uint16_t Shift = 0;

4598 int32_t Offset = 0; // TODO: make Constant

4599 Variable *Base = llvm::dyn_cast<Variable>(Addr);

4600 // Vanilla ICE store instructions should not use the segment registers,

4601 // and computeAddressOpt only works at the level of Variables and Constants,

4602 // not other OperandX8632Mem, so there should be no mention of segment

4603 // registers there either.

4604 const OperandX8632Mem::SegmentRegisters SegmentReg =

4605 OperandX8632Mem::DefaultSegment;

4606 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);

4607 if (Base && Addr != Base) {

4608 Inst->setDeleted();

4609 Constant *OffsetOp = Ctx->getConstantInt32(Offset);

4610 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index,

4611 Shift, SegmentReg);

4612 InstStore *NewStore = InstStore::create(Func, Data, Addr);

4613 if (Inst->getDest())

4614 NewStore->setRmwBeacon(Inst->getRmwBeacon());

4615 Context.insert(NewStore);

4616 }

4617 }

4618

4619 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {

4620 // This implements the most naive possible lowering.

4621 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default

4622 Operand *Src0 = Inst->getComparison();

4623 SizeT NumCases = Inst->getNumCases();

4624 if (Src0->getType() == IceType_i64) {

4625 Src0 = legalize(Src0); // get Base/Index into physical registers

4626 Operand *Src0Lo = loOperand(Src0);

4627 Operand *Src0Hi = hiOperand(Src0);

4628 if (NumCases >= 2) {

4629 Src0Lo = legalizeToVar(Src0Lo);

4630 Src0Hi = legalizeToVar(Src0Hi);

4631 } else {

4632 Src0Lo = legalize(Src0Lo, Legal_Reg \| Legal_Mem);

4633 Src0Hi = legalize(Src0Hi, Legal_Reg \| Legal_Mem);

4634 }

4635 for (SizeT I = 0; I < NumCases; ++I) {

4636 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));

4637 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);

4638 InstX8632Label *Label = InstX8632Label::create(Func, this);

4639 _cmp(Src0Lo, ValueLo);

4640 _br(CondX86::Br_ne, Label);

4641 _cmp(Src0Hi, ValueHi);

4642 _br(CondX86::Br_e, Inst->getLabel(I));

4643 Context.insert(Label);

4644 }

4645 _br(Inst->getLabelDefault());

4646 return;

4647 }

4648 // OK, we'll be slightly less naive by forcing Src into a physical

4649 // register if there are 2 or more uses.

4650 if (NumCases >= 2)

4651 Src0 = legalizeToVar(Src0);

4652 else

4653 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem);

4654 for (SizeT I = 0; I < NumCases; ++I) {

4655 Constant *Value = Ctx->getConstantInt32(Inst->getValue(I));

4656 _cmp(Src0, Value);

4657 _br(CondX86::Br_e, Inst->getLabel(I));

4658 }

4659

4660 _br(Inst->getLabelDefault());

4661 }

4662

4663 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,

4664 Variable Dest, Operand Src0,

4665 Operand *Src1) {

4666 assert(isVectorType(Dest->getType()));

4667 Type Ty = Dest->getType();

4668 Type ElementTy = typeElementType(Ty);

4669 SizeT NumElements = typeNumElements(Ty);

4670

4671 Operand *T = Ctx->getConstantUndef(Ty);

4672 for (SizeT I = 0; I < NumElements; ++I) {

4673 Constant *Index = Ctx->getConstantInt32(I);

4674

4675 // Extract the next two inputs.

4676 Variable *Op0 = Func->makeVariable(ElementTy);

4677 lowerExtractElement(InstExtractElement::create(Func, Op0, Src0, Index));

4678 Variable *Op1 = Func->makeVariable(ElementTy);

4679 lowerExtractElement(InstExtractElement::create(Func, Op1, Src1, Index));

4680

4681 // Perform the arithmetic as a scalar operation.

4682 Variable *Res = Func->makeVariable(ElementTy);

4683 lowerArithmetic(InstArithmetic::create(Func, Kind, Res, Op0, Op1));

4684

4685 // Insert the result into position.

4686 Variable *DestT = Func->makeVariable(Ty);

4687 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));

4688 T = DestT;

4689 }

4690

4691 lowerAssign(InstAssign::create(Func, Dest, T));

4692 }

4693

4694 // The following pattern occurs often in lowered C and C++ code:

4695 //

4696 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1

4697 // %cmp.ext = sext <n x i1> %cmp to <n x ty>

4698 //

4699 // We can eliminate the sext operation by copying the result of pcmpeqd,

4700 // pcmpgtd, or cmpps (which produce sign extended results) to the result

4701 // of the sext operation.

4702 void TargetX8632::eliminateNextVectorSextInstruction(

4703 Variable *SignExtendedResult) {

4704 if (InstCast *NextCast =

4705 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

4706 if (NextCast->getCastKind() == InstCast::Sext &&

4707 NextCast->getSrc(0) == SignExtendedResult) {

4708 NextCast->setDeleted();

4709 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));

4710 // Skip over the instruction.

4711 Context.advanceNext();

4712 }

4713 }

4714 }

4715

4716 void TargetX8632::lowerUnreachable(const InstUnreachable * /Inst/) { _ud2(); }

4717

4718 void TargetX8632::lowerRMW(const InstX8632FakeRMW *RMW) {

4719 // If the beacon variable's live range does not end in this

4720 // instruction, then it must end in the modified Store instruction

4721 // that follows. This means that the original Store instruction is

4722 // still there, either because the value being stored is used beyond

4723 // the Store instruction, or because dead code elimination did not

4724 // happen. In either case, we cancel RMW lowering (and the caller

4725 // deletes the RMW instruction).

4726 if (!RMW->isLastUse(RMW->getBeacon()))

4727 return;

4728 Operand *Src = RMW->getData();

4729 Type Ty = Src->getType();

4730 OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty);

4731 if (Ty == IceType_i64) {

4732 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg \| Legal_Imm);

4733 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg \| Legal_Imm);

4734 OperandX8632Mem *AddrLo = llvm::cast<OperandX8632Mem>(loOperand(Addr));

4735 OperandX8632Mem *AddrHi = llvm::cast<OperandX8632Mem>(hiOperand(Addr));

4736 switch (RMW->getOp()) {

4737 default:

4738 // TODO(stichnot): Implement other arithmetic operators.

4739 break;

4740 case InstArithmetic::Add:

4741 _add_rmw(AddrLo, SrcLo);

4742 _adc_rmw(AddrHi, SrcHi);

4743 return;

4744 case InstArithmetic::Sub:

4745 _sub_rmw(AddrLo, SrcLo);

4746 _sbb_rmw(AddrHi, SrcHi);

4747 return;

4748 case InstArithmetic::And:

4749 _and_rmw(AddrLo, SrcLo);

4750 _and_rmw(AddrHi, SrcHi);

4751 return;

4752 case InstArithmetic::Or:

4753 _or_rmw(AddrLo, SrcLo);

4754 _or_rmw(AddrHi, SrcHi);

4755 return;

4756 case InstArithmetic::Xor:

4757 _xor_rmw(AddrLo, SrcLo);

4758 _xor_rmw(AddrHi, SrcHi);

4759 return;

4760 }

4761 } else {

4762 // i8, i16, i32

4763 switch (RMW->getOp()) {

4764 default:

4765 // TODO(stichnot): Implement other arithmetic operators.

4766 break;

4767 case InstArithmetic::Add:

4768 Src = legalize(Src, Legal_Reg \| Legal_Imm);

4769 _add_rmw(Addr, Src);

4770 return;

4771 case InstArithmetic::Sub:

4772 Src = legalize(Src, Legal_Reg \| Legal_Imm);

4773 _sub_rmw(Addr, Src);

4774 return;

4775 case InstArithmetic::And:

4776 Src = legalize(Src, Legal_Reg \| Legal_Imm);

4777 _and_rmw(Addr, Src);

4778 return;

4779 case InstArithmetic::Or:

4780 Src = legalize(Src, Legal_Reg \| Legal_Imm);

4781 _or_rmw(Addr, Src);

4782 return;

4783 case InstArithmetic::Xor:

4784 Src = legalize(Src, Legal_Reg \| Legal_Imm);

4785 _xor_rmw(Addr, Src);

4786 return;

4787 }

4788 }

4789 llvm::report_fatal_error("Couldn't lower RMW instruction");

4790 }

4791

4792 void TargetX8632::lowerOther(const Inst *Instr) {

4793 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {

4794 lowerRMW(RMW);

4795 } else {

4796 TargetLowering::lowerOther(Instr);

4797 }

4798 }

4799

4800 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to

4801 // preserve integrity of liveness analysis. Undef values are also

4802 // turned into zeroes, since loOperand() and hiOperand() don't expect

4803 // Undef input.

4804 void TargetX8632::prelowerPhis() {

4805 // Pause constant blinding or pooling, blinding or pooling will be done later

4806 // during phi lowering assignments

4807 BoolFlagSaver B(RandomizationPoolingPaused, true);

4808

4809 CfgNode *Node = Context.getNode();

4810 for (Inst &I : Node->getPhis()) {

4811 auto Phi = llvm::dyn_cast<InstPhi>(&I);

4812 if (Phi->isDeleted())

4813 continue;

4814 Variable *Dest = Phi->getDest();

4815 if (Dest->getType() == IceType_i64) {

4816 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

4817 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

4818 InstPhi *PhiLo = InstPhi::create(Func, Phi->getSrcSize(), DestLo);

4819 InstPhi *PhiHi = InstPhi::create(Func, Phi->getSrcSize(), DestHi);

4820 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {

4821 Operand *Src = Phi->getSrc(I);

4822 CfgNode *Label = Phi->getLabel(I);

4823 if (llvm::isa<ConstantUndef>(Src))

4824 Src = Ctx->getConstantZero(Dest->getType());

4825 PhiLo->addArgument(loOperand(Src), Label);

4826 PhiHi->addArgument(hiOperand(Src), Label);

4827 }

4828 Node->getPhis().push_back(PhiLo);

4829 Node->getPhis().push_back(PhiHi);

4830 Phi->setDeleted();

4831 }

4832 }

4833 }

4834

4835 namespace {

4836

4837 bool isMemoryOperand(const Operand *Opnd) {

4838 if (const auto Var = llvm::dyn_cast<Variable>(Opnd))

4839 return !Var->hasReg();

4840 // We treat vector undef values the same as a memory operand,

4841 // because they do in fact need a register to materialize the vector

4842 // of zeroes into.

4843 if (llvm::isa<ConstantUndef>(Opnd))

4844 return isScalarFloatingType(Opnd->getType()) \|\|

4845 isVectorType(Opnd->getType());

4846 if (llvm::isa<Constant>(Opnd))

4847 return isScalarFloatingType(Opnd->getType());

4848 return true;

4849 }

4850

4851 } // end of anonymous namespace

4852

4853 // Lower the pre-ordered list of assignments into mov instructions.

4854 // Also has to do some ad-hoc register allocation as necessary.

4855 void TargetX8632::lowerPhiAssignments(CfgNode *Node,

4856 const AssignList &Assignments) {

4857 // Check that this is a properly initialized shell of a node.

4858 assert(Node->getOutEdges().size() == 1);

4859 assert(Node->getInsts().empty());

4860 assert(Node->getPhis().empty());

4861 CfgNode *Succ = Node->getOutEdges().front();

4862 getContext().init(Node);

4863 // Register set setup similar to regAlloc().

4864 RegSetMask RegInclude = RegSet_All;

4865 RegSetMask RegExclude = RegSet_StackPointer;

4866 if (hasFramePointer())

4867 RegExclude \|= RegSet_FramePointer;

4868 llvm::SmallBitVector Available = getRegisterSet(RegInclude, RegExclude);

4869 bool NeedsRegs = false;

4870 // Initialize the set of available registers to the set of what is

4871 // available (not live) at the beginning of the successor block,

4872 // minus all registers used as Dest operands in the Assignments. To

4873 // do this, we start off assuming all registers are available, then

4874 // iterate through the Assignments and remove Dest registers.

4875 // During this iteration, we also determine whether we will actually

4876 // need any extra registers for memory-to-memory copies. If so, we

4877 // do the actual work of removing the live-in registers from the

4878 // set. TODO(stichnot): This work is being repeated for every split

4879 // edge to the successor, so consider updating LiveIn just once

4880 // after all the edges are split.

4881 for (const Inst &I : Assignments) {

4882 Variable *Dest = I.getDest();

4883 if (Dest->hasReg()) {

4884 Available[Dest->getRegNum()] = false;

4885 } else if (isMemoryOperand(I.getSrc(0))) {

4886 NeedsRegs = true; // Src and Dest are both in memory

4887 }

4888 }

4889 if (NeedsRegs) {

4890 LivenessBV &LiveIn = Func->getLiveness()->getLiveIn(Succ);

4891 for (int i = LiveIn.find_first(); i != -1; i = LiveIn.find_next(i)) {

4892 Variable *Var = Func->getLiveness()->getVariable(i, Succ);

4893 if (Var->hasReg())

4894 Available[Var->getRegNum()] = false;

4895 }

4896 }

4897 // Iterate backwards through the Assignments. After lowering each

4898 // assignment, add Dest to the set of available registers, and

4899 // remove Src from the set of available registers. Iteration is

4900 // done backwards to enable incremental updates of the available

4901 // register set, and the lowered instruction numbers may be out of

4902 // order, but that can be worked around by renumbering the block

4903 // afterwards if necessary.

4904 for (const Inst &I : reverse_range(Assignments)) {

4905 Context.rewind();

4906 auto Assign = llvm::dyn_cast<InstAssign>(&I);

4907 Variable *Dest = Assign->getDest();

4908

4909 // If the source operand is ConstantUndef, do not legalize it.

4910 // In function test_split_undef_int_vec, the advanced phi

4911 // lowering process will find an assignment of undefined

4912 // vector. This vector, as the Src here, will crash if it

4913 // go through legalize(). legalize() will create new variable

4914 // with makeVectorOfZeros(), but this new variable will be

4915 // assigned a stack slot. This will fail the assertion in

4916 // IceInstX8632.cpp:789, as XmmEmitterRegOp() complain:

4917 // Var->hasReg() fails. Note this failure is irrelevant to

4918 // randomization or pooling of constants.

4919 // So, we do not call legalize() to add pool label for the

4920 // src operands of phi assignment instructions.

4921 // Instead, we manually add pool label for constant float and

4922 // constant double values here.

4923 // Note going through legalize() does not affect the testing

4924 // results of SPEC2K and xtests.

4925 Operand *Src = Assign->getSrc(0);

4926 if (!llvm::isa<ConstantUndef>(Assign->getSrc(0))) {

4927 Src = legalize(Src);

4928 }

4929

4930 Variable *SrcVar = llvm::dyn_cast<Variable>(Src);

4931 // Use normal assignment lowering, except lower mem=mem specially

4932 // so we can register-allocate at the same time.

4933 if (!isMemoryOperand(Dest) \|\| !isMemoryOperand(Src)) {

4934 lowerAssign(Assign);

4935 } else {

4936 assert(Dest->getType() == Src->getType());

4937 const llvm::SmallBitVector &RegsForType =

4938 getRegisterSetForType(Dest->getType());

4939 llvm::SmallBitVector AvailRegsForType = RegsForType & Available;

4940 Variable *SpillLoc = nullptr;

4941 Variable *Preg = nullptr;

4942 // TODO(stichnot): Opportunity for register randomization.

4943 int32_t RegNum = AvailRegsForType.find_first();

4944 bool IsVector = isVectorType(Dest->getType());

4945 bool NeedSpill = (RegNum == -1);

4946 if (NeedSpill) {

4947 // Pick some register to spill and update RegNum.

4948 // TODO(stichnot): Opportunity for register randomization.

4949 RegNum = RegsForType.find_first();

4950 Preg = getPhysicalRegister(RegNum, Dest->getType());

4951 SpillLoc = Func->makeVariable(Dest->getType());

4952 // Create a fake def of the physical register to avoid

4953 // liveness inconsistency problems during late-stage liveness

4954 // analysis (e.g. asm-verbose mode).

4955 Context.insert(InstFakeDef::create(Func, Preg));

4956 if (IsVector)

4957 _movp(SpillLoc, Preg);

4958 else

4959 _mov(SpillLoc, Preg);

4960 }

4961 assert(RegNum >= 0);

4962 if (llvm::isa<ConstantUndef>(Src))

4963 // Materialize an actual constant instead of undef. RegNum is

4964 // passed in for vector types because undef vectors are

4965 // lowered to vector register of zeroes.

4966 Src =

4967 legalize(Src, Legal_All, IsVector ? RegNum : Variable::NoRegister);

4968 Variable *Tmp = makeReg(Dest->getType(), RegNum);

4969 if (IsVector) {

4970 _movp(Tmp, Src);

4971 _movp(Dest, Tmp);

4972 } else {

4973 _mov(Tmp, Src);

4974 _mov(Dest, Tmp);

4975 }

4976 if (NeedSpill) {

4977 // Restore the spilled register.

4978 if (IsVector)

4979 _movp(Preg, SpillLoc);

4980 else

4981 _mov(Preg, SpillLoc);

4982 // Create a fake use of the physical register to keep it live

4983 // for late-stage liveness analysis (e.g. asm-verbose mode).

4984 Context.insert(InstFakeUse::create(Func, Preg));

4985 }

4986 }

4987 // Update register availability before moving to the previous

4988 // instruction on the Assignments list.

4989 if (Dest->hasReg())

4990 Available[Dest->getRegNum()] = true;

4991 if (SrcVar && SrcVar->hasReg())

4992 Available[SrcVar->getRegNum()] = false;

4993 }

4994

4995 // Add the terminator branch instruction to the end.

4996 Context.setInsertPoint(Context.getEnd());

4997 _br(Succ);

4998 }

4999

5000 // There is no support for loading or emitting vector constants, so the

5001 // vector values returned from makeVectorOfZeros, makeVectorOfOnes,

5002 // etc. are initialized with register operations.

5003 //

5004 // TODO(wala): Add limited support for vector constants so that

5005 // complex initialization in registers is unnecessary.

5006

5007 Variable *TargetX8632::makeVectorOfZeros(Type Ty, int32_t RegNum) {

5008 Variable *Reg = makeReg(Ty, RegNum);

5009 // Insert a FakeDef, since otherwise the live range of Reg might

5010 // be overestimated.

5011 Context.insert(InstFakeDef::create(Func, Reg));

5012 _pxor(Reg, Reg);

5013 return Reg;

5014 }

5015

5016 Variable *TargetX8632::makeVectorOfMinusOnes(Type Ty, int32_t RegNum) {

5017 Variable *MinusOnes = makeReg(Ty, RegNum);

5018 // Insert a FakeDef so the live range of MinusOnes is not overestimated.

5019 Context.insert(InstFakeDef::create(Func, MinusOnes));

5020 _pcmpeq(MinusOnes, MinusOnes);

5021 return MinusOnes;

5022 }

5023

5024 Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {

5025 Variable *Dest = makeVectorOfZeros(Ty, RegNum);

5026 Variable *MinusOne = makeVectorOfMinusOnes(Ty);

5027 _psub(Dest, MinusOne);

5028 return Dest;

5029 }

5030

5031 Variable *TargetX8632::makeVectorOfHighOrderBits(Type Ty, int32_t RegNum) {

5032 assert(Ty == IceType_v4i32 \|\| Ty == IceType_v4f32 \|\| Ty == IceType_v8i16 \|\|

5033 Ty == IceType_v16i8);

5034 if (Ty == IceType_v4f32 \|\| Ty == IceType_v4i32 \|\| Ty == IceType_v8i16) {

5035 Variable *Reg = makeVectorOfOnes(Ty, RegNum);

5036 SizeT Shift = typeWidthInBytes(typeElementType(Ty)) * X86_CHAR_BIT - 1;

5037 _psll(Reg, Ctx->getConstantInt8(Shift));

5038 return Reg;

5039 } else {

5040 // SSE has no left shift operation for vectors of 8 bit integers.

5041 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

5042 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

5043 Variable *Reg = makeReg(Ty, RegNum);

5044 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

5045 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

5046 return Reg;

5047 }

5048 }

5049

5050 // Construct a mask in a register that can be and'ed with a

5051 // floating-point value to mask off its sign bit. The value will be

5052 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>

5053 // for f64. Construct it as vector of ones logically right shifted

5054 // one bit. TODO(stichnot): Fix the wala TODO above, to represent

5055 // vector constants in memory.

5056 Variable *TargetX8632::makeVectorOfFabsMask(Type Ty, int32_t RegNum) {

5057 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);

5058 _psrl(Reg, Ctx->getConstantInt8(1));

5059 return Reg;

5060 }

5061

5062 OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,

5063 Variable *Slot,

5064 uint32_t Offset) {

5065 // Ensure that Loc is a stack slot.

5066 assert(Slot->getWeight().isZero());

5067 assert(Slot->getRegNum() == Variable::NoRegister);

5068 // Compute the location of Loc in memory.

5069 // TODO(wala,stichnot): lea should not be required. The address of

5070 // the stack slot is known at compile time (although not until after

5071 // addProlog()).

5072 const Type PointerType = IceType_i32;

5073 Variable *Loc = makeReg(PointerType);

5074 _lea(Loc, Slot);

5075 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

5076 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);

5077 }

5078

5079 // Helper for legalize() to emit the right code to lower an operand to a

5080 // register of the appropriate type.

5081 Variable TargetX8632::copyToReg(Operand Src, int32_t RegNum) {

5082 Type Ty = Src->getType();

5083 Variable *Reg = makeReg(Ty, RegNum);

5084 if (isVectorType(Ty)) {

5085 _movp(Reg, Src);

5086 } else {

5087 _mov(Reg, Src);

5088 }

5089 return Reg;

5090 }

5091

5092 Operand TargetX8632::legalize(Operand From, LegalMask Allowed,

5093 int32_t RegNum) {

5094 Type Ty = From->getType();

5095 // Assert that a physical register is allowed. To date, all calls

5096 // to legalize() allow a physical register. If a physical register

5097 // needs to be explicitly disallowed, then new code will need to be

5098 // written to force a spill.

5099 assert(Allowed & Legal_Reg);

5100 // If we're asking for a specific physical register, make sure we're

5101 // not allowing any other operand kinds. (This could be future

5102 // work, e.g. allow the shl shift amount to be either an immediate

5103 // or in ecx.)

5104 assert(RegNum == Variable::NoRegister \|\| Allowed == Legal_Reg);

5105

5106 if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {

5107 // Before doing anything with a Mem operand, we need to ensure

5108 // that the Base and Index components are in physical registers.

5109 Variable *Base = Mem->getBase();

5110 Variable *Index = Mem->getIndex();

5111 Variable *RegBase = nullptr;

5112 Variable *RegIndex = nullptr;

5113 if (Base) {

5114 RegBase = legalizeToVar(Base);

5115 }

5116 if (Index) {

5117 RegIndex = legalizeToVar(Index);

5118 }

5119 if (Base != RegBase \|\| Index != RegIndex) {

5120 Mem =

5121 OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex,

5122 Mem->getShift(), Mem->getSegmentRegister());

5123 }

5124

5125 // For all Memory Operands, we do randomization/pooling here

5126 From = randomizeOrPoolImmediate(Mem);

5127

5128 if (!(Allowed & Legal_Mem)) {

5129 From = copyToReg(From, RegNum);

5130 }

5131 return From;

5132 }

5133 if (auto *Const = llvm::dyn_cast<Constant>(From)) {

5134 if (llvm::isa<ConstantUndef>(Const)) {

5135 // Lower undefs to zero. Another option is to lower undefs to an

5136 // uninitialized register; however, using an uninitialized register

5137 // results in less predictable code.

5138 //

5139 // If in the future the implementation is changed to lower undef

5140 // values to uninitialized registers, a FakeDef will be needed:

5141 // Context.insert(InstFakeDef::create(Func, Reg));

5142 // This is in order to ensure that the live range of Reg is not

5143 // overestimated. If the constant being lowered is a 64 bit value,

5144 // then the result should be split and the lo and hi components will

5145 // need to go in uninitialized registers.

5146 if (isVectorType(Ty))

5147 return makeVectorOfZeros(Ty, RegNum);

5148 Const = Ctx->getConstantZero(Ty);

5149 From = Const;

5150 }

5151 // There should be no constants of vector type (other than undef).

5152 assert(!isVectorType(Ty));

5153

5154 // If the operand is an 32 bit constant integer, we should check

5155 // whether we need to randomize it or pool it.

5156 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {

5157 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);

5158 if (NewConst != Const) {

5159 return NewConst;

5160 }

5161 }

5162

5163 // Convert a scalar floating point constant into an explicit

5164 // memory operand.

5165 if (isScalarFloatingType(Ty)) {

5166 Variable *Base = nullptr;

5167 std::string Buffer;

5168 llvm::raw_string_ostream StrBuf(Buffer);

5169 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);

5170 llvm::cast<Constant>(From)->setShouldBePooled(true);

5171 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);

5172 From = OperandX8632Mem::create(Func, Ty, Base, Offset);

5173 }

5174 bool NeedsReg = false;

5175 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))

5176 // Immediate specifically not allowed

5177 NeedsReg = true;

5178 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))

5179 // On x86, FP constants are lowered to mem operands.

5180 NeedsReg = true;

5181 if (NeedsReg) {

5182 From = copyToReg(From, RegNum);

5183 }

5184 return From;

5185 }

5186 if (auto Var = llvm::dyn_cast<Variable>(From)) {

5187 // Check if the variable is guaranteed a physical register. This

5188 // can happen either when the variable is pre-colored or when it is

5189 // assigned infinite weight.

5190 bool MustHaveRegister = (Var->hasReg() \|\| Var->getWeight().isInf());

5191 // We need a new physical register for the operand if:

5192 // Mem is not allowed and Var isn't guaranteed a physical

5193 // register, or

5194 // RegNum is required and Var->getRegNum() doesn't match.

5195 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|

5196 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

5197 From = copyToReg(From, RegNum);

5198 }

5199 return From;

5200 }

5201 llvm_unreachable("Unhandled operand kind in legalize()");

5202 return From;

5203 }

5204

5205 // Provide a trivial wrapper to legalize() for this common usage.

5206 Variable TargetX8632::legalizeToVar(Operand From, int32_t RegNum) {

5207 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));

5208 }

5209

5210 // For the cmp instruction, if Src1 is an immediate, or known to be a

5211 // physical register, we can allow Src0 to be a memory operand.

5212 // Otherwise, Src0 must be copied into a physical register.

5213 // (Actually, either Src0 or Src1 can be chosen for the physical

5214 // register, but unfortunately we have to commit to one or the other

5215 // before register allocation.)

5216 Operand TargetX8632::legalizeSrc0ForCmp(Operand Src0, Operand *Src1) {

5217 bool IsSrc1ImmOrReg = false;

5218 if (llvm::isa<Constant>(Src1)) {

5219 IsSrc1ImmOrReg = true;

5220 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

5221 if (Var->hasReg())

5222 IsSrc1ImmOrReg = true;

5223 }

5224 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg \| Legal_Mem) : Legal_Reg);

5225 }

5226

5227 OperandX8632Mem TargetX8632::formMemoryOperand(Operand Opnd, Type Ty,

5228 bool DoLegalize) {

5229 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd);

5230 // It may be the case that address mode optimization already creates

5231 // an OperandX8632Mem, so in that case it wouldn't need another level

5232 // of transformation.

5233 if (!Mem) {

5234 Variable *Base = llvm::dyn_cast<Variable>(Opnd);

5235 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);

5236 assert(Base \|\| Offset);

5237 if (Offset) {

5238 // During memory operand building, we do not blind or pool

5239 // the constant offset, we will work on the whole memory

5240 // operand later as one entity later, this save one instruction.

5241 // By turning blinding and pooling off, we guarantee

5242 // legalize(Offset) will return a constant*.

5243 {

5244 BoolFlagSaver B(RandomizationPoolingPaused, true);

5245

5246 Offset = llvm::cast<Constant>(legalize(Offset));

5247 }

5248

5249 assert(llvm::isa<ConstantInteger32>(Offset) \|\|

5250 llvm::isa<ConstantRelocatable>(Offset));

5251 }

5252 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset);

5253 }

5254 // Do legalization, which contains randomization/pooling

5255 // or do randomization/pooling.

5256 return llvm::cast<OperandX8632Mem>(

5257 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));

5258 }

5259

5260 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {

5261 // There aren't any 64-bit integer registers for x86-32.

5262 assert(Type != IceType_i64);

5263 Variable *Reg = Func->makeVariable(Type);

5264 if (RegNum == Variable::NoRegister)

5265 Reg->setWeightInfinite();

5266 else

5267 Reg->setRegNum(RegNum);

5268 return Reg;

5269 }

5270

5271 void TargetX8632::postLower() {

5272 if (Ctx->getFlags().getOptLevel() == Opt_m1)

5273 return;

5274 inferTwoAddress();

5275 }

5276

5277 void TargetX8632::makeRandomRegisterPermutation(

5278 llvm::SmallVectorImpl<int32_t> &Permutation,

5279 const llvm::SmallBitVector &ExcludeRegisters) const {

5280 // TODO(stichnot): Declaring Permutation this way loses type/size

5281 // information. Fix this in conjunction with the caller-side TODO.

5282 assert(Permutation.size() >= RegX8632::Reg_NUM);

5283 // Expected upper bound on the number of registers in a single

5284 // equivalence class. For x86-32, this would comprise the 8 XMM

5285 // registers. This is for performance, not correctness.

5286 static const unsigned MaxEquivalenceClassSize = 8;

5287 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;

5288 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;

5289 EquivalenceClassMap EquivalenceClasses;

5290 SizeT NumShuffled = 0, NumPreserved = 0;

5291

5292 // Build up the equivalence classes of registers by looking at the

5293 // register properties as well as whether the registers should be

5294 // explicitly excluded from shuffling.

5295 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

5296 frameptr, isI8, isInt, isFP) \

5297 if (ExcludeRegisters[RegX8632::val]) { \

5298 /* val stays the same in the resulting permutation. */ \

5299 Permutation[RegX8632::val] = RegX8632::val; \

5300 ++NumPreserved; \

5301 } else { \

5302 const uint32_t Index = (scratch << 0) \| (preserved << 1) \| (isI8 << 2) \| \

5303 (isInt << 3) \| (isFP << 4); \

5304 /* val is assigned to an equivalence class based on its properties. */ \

5305 EquivalenceClasses[Index].push_back(RegX8632::val); \

5306 }

5307 REGX8632_TABLE

5308 #undef X

5309

5310 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());

5311

5312 // Shuffle the resulting equivalence classes.

5313 for (auto I : EquivalenceClasses) {

5314 const RegisterList &List = I.second;

5315 RegisterList Shuffled(List);

5316 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);

5317 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {

5318 Permutation[List[SI]] = Shuffled[SI];

5319 ++NumShuffled;

5320 }

5321 }

5322

5323 assert(NumShuffled + NumPreserved == RegX8632::Reg_NUM);

5324

5325 if (Func->isVerbose(IceV_Random)) {

5326 OstreamLocker L(Func->getContext());

5327 Ostream &Str = Func->getContext()->getStrDump();

5328 Str << "Register equivalence classes:\n";

5329 for (auto I : EquivalenceClasses) {

5330 Str << "{";

5331 const RegisterList &List = I.second;

5332 bool First = true;

5333 for (int32_t Register : List) {

5334 if (!First)

5335 Str << " ";

5336 First = false;

5337 Str << getRegName(Register, IceType_i32);

5338 }

5339 Str << "}\n";

5340 }

5341 }

5342 }

5343

5344 void TargetX8632::emit(const ConstantInteger32 *C) const {

5345 if (!ALLOW_DUMP)

5346 return;

5347 Ostream &Str = Ctx->getStrEmit();

5348 Str << getConstantPrefix() << C->getValue();

5349 }

5350

5351 void TargetX8632::emit(const ConstantInteger64 *) const {

5352 llvm::report_fatal_error("Not expecting to emit 64-bit integers");

5353 }

5354

5355 void TargetX8632::emit(const ConstantFloat *C) const {

5356 if (!ALLOW_DUMP)

5357 return;

5358 Ostream &Str = Ctx->getStrEmit();

5359 C->emitPoolLabel(Str);

5360 }

5361

5362 void TargetX8632::emit(const ConstantDouble *C) const {

5363 if (!ALLOW_DUMP)

5364 return;

5365 Ostream &Str = Ctx->getStrEmit();

5366 C->emitPoolLabel(Str);

5367 }

5368

5369 void TargetX8632::emit(const ConstantUndef *) const {

5370 llvm::report_fatal_error("undef value encountered by emitter.");

5371 }	158 }

5372	159

5373 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)	160 TargetDataX8632::TargetDataX8632(GlobalContext *Ctx)

5374 : TargetDataLowering(Ctx) {}	161 : TargetDataLowering(Ctx) {}

5375	162

5376 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,	163 namespace {

5377 const IceString &SectionSuffix) {

5378 switch (Ctx->getFlags().getOutFileType()) {

5379 case FT_Elf: {

5380 ELFObjectWriter *Writer = Ctx->getObjectWriter();

5381 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);

5382 } break;

5383 case FT_Asm:

5384 case FT_Iasm: {

5385 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();

5386 OstreamLocker L(Ctx);

5387 for (const VariableDeclaration *Var : Vars) {

5388 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {

5389 emitGlobal(*Var, SectionSuffix);

5390 }

5391 }

5392 } break;

5393 }

5394 }

5395

5396 template <typename T> struct PoolTypeConverter {};	164 template <typename T> struct PoolTypeConverter {};

5397	165

5398 template <> struct PoolTypeConverter<float> {	166 template <> struct PoolTypeConverter<float> {

5399 typedef uint32_t PrimitiveIntType;	167 typedef uint32_t PrimitiveIntType;

5400 typedef ConstantFloat IceType;	168 typedef ConstantFloat IceType;

5401 static const Type Ty = IceType_f32;	169 static const Type Ty = IceType_f32;

5402 static const char *TypeName;	170 static const char *TypeName;

5403 static const char *AsmTag;	171 static const char *AsmTag;

5404 static const char *PrintfString;	172 static const char *PrintfString;

5405 };	173 };

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5450 typedef uint32_t PrimitiveIntType;	218 typedef uint32_t PrimitiveIntType;

5451 typedef ConstantInteger32 IceType;	219 typedef ConstantInteger32 IceType;

5452 static const Type Ty = IceType_i8;	220 static const Type Ty = IceType_i8;

5453 static const char *TypeName;	221 static const char *TypeName;

5454 static const char *AsmTag;	222 static const char *AsmTag;

5455 static const char *PrintfString;	223 static const char *PrintfString;

5456 };	224 };

5457 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";	225 const char *PoolTypeConverter<uint8_t>::TypeName = "i8";

5458 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";	226 const char *PoolTypeConverter<uint8_t>::AsmTag = ".byte";

5459 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";	227 const char *PoolTypeConverter<uint8_t>::PrintfString = "0x%x";

	228 } // end of anonymous namespace

5460	229

5461 template <typename T>	230 template <typename T>

5462 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {	231 void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) {

5463 if (!ALLOW_DUMP)	232 if (!ALLOW_DUMP)

5464 return;	233 return;

5465 Ostream &Str = Ctx->getStrEmit();	234 Ostream &Str = Ctx->getStrEmit();

5466 Type Ty = T::Ty;	235 Type Ty = T::Ty;

5467 SizeT Align = typeAlignInBytes(Ty);	236 SizeT Align = typeAlignInBytes(Ty);

5468 ConstantList Pool = Ctx->getConstantPool(Ty);	237 ConstantList Pool = Ctx->getConstantPool(Ty);

5469	238

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5514 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);	283 emitConstantPool<PoolTypeConverter<uint8_t>>(Ctx);

5515 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);	284 emitConstantPool<PoolTypeConverter<uint16_t>>(Ctx);

5516 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);	285 emitConstantPool<PoolTypeConverter<uint32_t>>(Ctx);

5517	286

5518 emitConstantPool<PoolTypeConverter<float>>(Ctx);	287 emitConstantPool<PoolTypeConverter<float>>(Ctx);

5519 emitConstantPool<PoolTypeConverter<double>>(Ctx);	288 emitConstantPool<PoolTypeConverter<double>>(Ctx);

5520 } break;	289 } break;

5521 }	290 }

5522 }	291 }

5523	292

	293 void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars,

	294 const IceString &SectionSuffix) {

	295 switch (Ctx->getFlags().getOutFileType()) {

	296 case FT_Elf: {

	297 ELFObjectWriter *Writer = Ctx->getObjectWriter();

	298 Writer->writeDataSection(Vars, llvm::ELF::R_386_32, SectionSuffix);

	299 } break;

	300 case FT_Asm:

	301 case FT_Iasm: {

	302 const IceString &TranslateOnly = Ctx->getFlags().getTranslateOnly();

	303 OstreamLocker L(Ctx);

	304 for (const VariableDeclaration *Var : Vars) {

	305 if (GlobalContext::matchSymbolName(Var->getName(), TranslateOnly)) {

	306 emitGlobal(*Var, SectionSuffix);

	307 }

	308 }

	309 } break;

	310 }

	311 }

	312

5524 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)	313 TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx)

5525 : TargetHeaderLowering(Ctx) {}	314 : TargetHeaderLowering(Ctx) {}

5526	315

5527 // Randomize or pool an Immediate.	316 // In some cases, there are x-macros tables for both high-level and

5528 Operand TargetX8632::randomizeOrPoolImmediate(Constant Immediate,	317 // low-level instructions/operands that use the same enum key value.

5529 int32_t RegNum) {	318 // The tables are kept separate to maintain a proper separation

5530 assert(llvm::isa<ConstantInteger32>(Immediate) \|\|	319 // between abstraction layers. There is a risk that the tables could

5531 llvm::isa<ConstantRelocatable>(Immediate));	320 // get out of sync if enum values are reordered or if entries are

5532 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|	321 // added or deleted. The following dummy namespaces use

5533 RandomizationPoolingPaused == true) {	322 // static_asserts to ensure everything is kept in sync.

5534 // Immediates randomization/pooling off or paused

5535 return Immediate;

5536 }

5537 if (Immediate->shouldBeRandomizedOrPooled(Ctx)) {

5538 Ctx->statsUpdateRPImms();

5539 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

5540 RPI_Randomize) {

5541 // blind the constant

5542 // FROM:

5543 // imm

5544 // TO:

5545 // insert: mov imm+cookie, Reg

5546 // insert: lea -cookie[Reg], Reg

5547 // => Reg

5548 // If we have already assigned a phy register, we must come from

5549 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse

5550 // the assigned register as this assignment is that start of its use-def

5551 // chain. So we add RegNum argument here.

5552 // Note we use 'lea' instruction instead of 'xor' to avoid affecting

5553 // the flags.

5554 Variable *Reg = makeReg(IceType_i32, RegNum);

5555 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);

5556 uint32_t Value = Integer->getValue();

5557 uint32_t Cookie = Ctx->getRandomizationCookie();

5558 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));

5559 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);

5560 _lea(Reg,

5561 OperandX8632Mem::create(Func, IceType_i32, Reg, Offset, nullptr, 0));

5562 // make sure liveness analysis won't kill this variable, otherwise a

5563 // liveness

5564 // assertion will be triggered.

5565 _set_dest_nonkillable();

5566 if (Immediate->getType() != IceType_i32) {

5567 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);

5568 _mov(TruncReg, Reg);

5569 return TruncReg;

5570 }

5571 return Reg;

5572 }

5573 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {

5574 // pool the constant

5575 // FROM:

5576 // imm

5577 // TO:

5578 // insert: mov $label, Reg

5579 // => Reg

5580 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);

5581 Immediate->setShouldBePooled(true);

5582 // if we have already assigned a phy register, we must come from

5583 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse

5584 // the assigned register as this assignment is that start of its use-def

5585 // chain. So we add RegNum argument here.

5586 Variable *Reg = makeReg(Immediate->getType(), RegNum);

5587 IceString Label;

5588 llvm::raw_string_ostream Label_stream(Label);

5589 Immediate->emitPoolLabel(Label_stream);

5590 const RelocOffsetT Offset = 0;

5591 const bool SuppressMangling = true;

5592 Constant *Symbol =

5593 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);

5594 OperandX8632Mem *MemOperand =

5595 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol);

5596 _mov(Reg, MemOperand);

5597 return Reg;

5598 }

5599 assert("Unsupported -randomize-pool-immediates option" && false);

5600 }

5601 // the constant Immediate is not eligible for blinding/pooling

5602 return Immediate;

5603 }

5604	323

5605 OperandX8632Mem *	324 namespace {

5606 TargetX8632::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand,	325 // Validate the enum values in FCMPX8632_TABLE.

5607 int32_t RegNum) {	326 namespace dummy1 {

5608 assert(MemOperand);	327 // Define a temporary set of enum values based on low-level table

5609 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|	328 // entries.

5610 RandomizationPoolingPaused == true) {	329 enum _tmp_enum {

5611 // immediates randomization/pooling is turned off	330 #define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val,

5612 return MemOperand;	331 FCMPX8632_TABLE

5613 }	332 #undef X

	333 _num

	334 };

	335 // Define a set of constants based on high-level table entries.

	336 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;

	337 ICEINSTFCMP_TABLE

	338 #undef X

	339 // Define a set of constants based on low-level table entries, and

	340 // ensure the table entry keys are consistent.

	341 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

	342 static const int _table2_##val = _tmp_##val; \

	343 static_assert( \

	344 _table1_##val == _table2_##val, \

	345 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

	346 FCMPX8632_TABLE

	347 #undef X

	348 // Repeat the static asserts with respect to the high-level table

	349 // entries in case the high-level table has extra entries.

	350 #define X(tag, str) \

	351 static_assert( \

	352 _table1_##tag == _table2_##tag, \

	353 "Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE");

	354 ICEINSTFCMP_TABLE

	355 #undef X

	356 } // end of namespace dummy1

5614	357

5615 // If this memory operand is already a randommized one, we do	358 // Validate the enum values in ICMPX8632_TABLE.

5616 // not randomize it again.	359 namespace dummy2 {

5617 if (MemOperand->getRandomized())	360 // Define a temporary set of enum values based on low-level table

5618 return MemOperand;	361 // entries.

	362 enum _tmp_enum {

	363 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,

	364 ICMPX8632_TABLE

	365 #undef X

	366 _num

	367 };

	368 // Define a set of constants based on high-level table entries.

	369 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

	370 ICEINSTICMP_TABLE

	371 #undef X

	372 // Define a set of constants based on low-level table entries, and

	373 // ensure the table entry keys are consistent.

	374 #define X(val, C_32, C1_64, C2_64, C3_64) \

	375 static const int _table2_##val = _tmp_##val; \

	376 static_assert( \

	377 _table1_##val == _table2_##val, \

	378 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

	379 ICMPX8632_TABLE

	380 #undef X

	381 // Repeat the static asserts with respect to the high-level table

	382 // entries in case the high-level table has extra entries.

	383 #define X(tag, str) \

	384 static_assert( \

	385 _table1_##tag == _table2_##tag, \

	386 "Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE");

	387 ICEINSTICMP_TABLE

	388 #undef X

	389 } // end of namespace dummy2

5619	390

5620 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {	391 // Validate the enum values in ICETYPEX8632_TABLE.

5621 if (C->shouldBeRandomizedOrPooled(Ctx)) {	392 namespace dummy3 {

5622 // The offset of this mem operand should be blinded or pooled	393 // Define a temporary set of enum values based on low-level table

5623 Ctx->statsUpdateRPImms();	394 // entries.

5624 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==	395 enum _tmp_enum {

5625 RPI_Randomize) {	396 #define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag,

5626 // blind the constant offset	397 ICETYPEX8632_TABLE

5627 // FROM:	398 #undef X

5628 // offset[base, index, shift]	399 _num

5629 // TO:	400 };

5630 // insert: lea offset+cookie[base], RegTemp	401 // Define a set of constants based on high-level table entries.

5631 // => -cookie[RegTemp, index, shift]	402 #define X(tag, size, align, elts, elty, str) \

5632 uint32_t Value =	403 static const int _table1_##tag = tag;

5633 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())	404 ICETYPE_TABLE

5634 ->getValue();	405 #undef X

5635 uint32_t Cookie = Ctx->getRandomizationCookie();	406 // Define a set of constants based on low-level table entries, and

5636 Constant *Mask1 = Ctx->getConstantInt(	407 // ensure the table entry keys are consistent.

5637 MemOperand->getOffset()->getType(), Cookie + Value);	408 #define X(tag, elementty, cvt, sdss, pack, width, fld) \

5638 Constant *Mask2 =	409 static const int _table2_##tag = _tmp_##tag; \

5639 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);	410 static_assert(_table1_##tag == _table2_##tag, \

5640	411 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

5641 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create(	412 ICETYPEX8632_TABLE

5642 Func, MemOperand->getType(), MemOperand->getBase(), Mask1);	413 #undef X

5643 // If we have already assigned a physical register, we must come from	414 // Repeat the static asserts with respect to the high-level table

5644 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse	415 // entries in case the high-level table has extra entries.

5645 // the assigned register as this assignment is that start of its use-def	416 #define X(tag, size, align, elts, elty, str) \

5646 // chain. So we add RegNum argument here.	417 static_assert(_table1_##tag == _table2_##tag, \

5647 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);	418 "Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE");

5648 _lea(RegTemp, TempMemOperand);	419 ICETYPE_TABLE

5649 // As source operand doesn't use the dstreg, we don't need to add	420 #undef X

5650 // _set_dest_nonkillable().	421 } // end of namespace dummy3

5651 // But if we use the same Dest Reg, that is, with RegNum	422 } // end of anonymous namespace

5652 // assigned, we should add this _set_dest_nonkillable()

5653 if (RegNum != Variable::NoRegister)

5654 _set_dest_nonkillable();

5655

5656 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(

5657 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(),

5658 MemOperand->getShift(), MemOperand->getSegmentRegister());

5659

5660 // Label this memory operand as randomize, so we won't randomize it

5661 // again in case we call legalize() mutiple times on this memory

5662 // operand.

5663 NewMemOperand->setRandomized(true);

5664 return NewMemOperand;

5665 }

5666 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {

5667 // pool the constant offset

5668 // FROM:

5669 // offset[base, index, shift]

5670 // TO:

5671 // insert: mov $label, RegTemp

5672 // insert: lea [base, RegTemp], RegTemp

5673 // =>[RegTemp, index, shift]

5674 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

5675 RPI_Pool);

5676 // Memory operand should never exist as source operands in phi

5677 // lowering assignments, so there is no need to reuse any registers

5678 // here. For phi lowering, we should not ask for new physical

5679 // registers in general.

5680 // However, if we do meet Memory Operand during phi lowering, we

5681 // should not blind or pool the immediates for now.

5682 if (RegNum != Variable::NoRegister)

5683 return MemOperand;

5684 Variable *RegTemp = makeReg(IceType_i32);

5685 IceString Label;

5686 llvm::raw_string_ostream Label_stream(Label);

5687 MemOperand->getOffset()->emitPoolLabel(Label_stream);

5688 MemOperand->getOffset()->setShouldBePooled(true);

5689 const RelocOffsetT SymOffset = 0;

5690 bool SuppressMangling = true;

5691 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),

5692 SuppressMangling);

5693 OperandX8632Mem *SymbolOperand = OperandX8632Mem::create(

5694 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);

5695 _mov(RegTemp, SymbolOperand);

5696 // If we have a base variable here, we should add the lea instruction

5697 // to add the value of the base variable to RegTemp. If there is no

5698 // base variable, we won't need this lea instruction.

5699 if (MemOperand->getBase()) {

5700 OperandX8632Mem *CalculateOperand = OperandX8632Mem::create(

5701 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,

5702 RegTemp, 0, MemOperand->getSegmentRegister());

5703 _lea(RegTemp, CalculateOperand);

5704 _set_dest_nonkillable();

5705 }

5706 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create(

5707 Func, MemOperand->getType(), RegTemp, nullptr,

5708 MemOperand->getIndex(), MemOperand->getShift(),

5709 MemOperand->getSegmentRegister());

5710 return NewMemOperand;

5711 }

5712 assert("Unsupported -randomize-pool-immediates option" && false);

5713 }

5714 }

5715 // the offset is not eligible for blinding or pooling, return the original

5716 // mem operand

5717 return MemOperand;

5718 }

5719	423

5720 } // end of namespace Ice	424 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | src/IceTargetLoweringX86Base.h » ('j') | src/IceTargetLoweringX86BaseImpl.h » ('J')