Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(379)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1216933015: X8632 Templatization completed. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// This file implements the TargetLoweringX86Base class, which 11 /// This file implements the TargetLoweringX86Base class, which
12 /// consists almost entirely of the lowering sequence for each 12 /// consists almost entirely of the lowering sequence for each
13 /// high-level instruction. 13 /// high-level instruction.
14 /// 14 ///
15 //===----------------------------------------------------------------------===// 15 //===----------------------------------------------------------------------===//
16 16
17 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 17 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 18 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
19 19
20 #include "IceCfg.h" 20 #include "IceCfg.h"
21 #include "IceCfgNode.h" 21 #include "IceCfgNode.h"
22 #include "IceClFlags.h" 22 #include "IceClFlags.h"
23 #include "IceDefs.h" 23 #include "IceDefs.h"
24 #include "IceELFObjectWriter.h" 24 #include "IceELFObjectWriter.h"
25 #include "IceGlobalInits.h" 25 #include "IceGlobalInits.h"
26 #include "IceInstX8632.h"
27 #include "IceLiveness.h" 26 #include "IceLiveness.h"
28 #include "IceOperand.h" 27 #include "IceOperand.h"
29 #include "IceRegistersX8632.h"
30 #include "IceTargetLoweringX8632.def"
31 #include "IceTargetLoweringX8632.h"
32 #include "IceUtils.h" 28 #include "IceUtils.h"
33 #include "llvm/Support/MathExtras.h" 29 #include "llvm/Support/MathExtras.h"
34 30
35 namespace Ice { 31 namespace Ice {
36 namespace X86Internal { 32 namespace X86Internal {
37 33
38 /// A helper class to ease the settings of RandomizationPoolingPause 34 /// A helper class to ease the settings of RandomizationPoolingPause to disable
39 /// to disable constant blinding or pooling for some translation phases. 35 /// constant blinding or pooling for some translation phases.
40 class BoolFlagSaver { 36 class BoolFlagSaver {
41 BoolFlagSaver() = delete; 37 BoolFlagSaver() = delete;
42 BoolFlagSaver(const BoolFlagSaver &) = delete; 38 BoolFlagSaver(const BoolFlagSaver &) = delete;
43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 39 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
44 40
45 public: 41 public:
46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } 42 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
47 ~BoolFlagSaver() { Flag = OldValue; } 43 ~BoolFlagSaver() { Flag = OldValue; }
48 44
49 private: 45 private:
(...skipping 28 matching lines...) Expand all
78 public: 74 public:
79 enum BoolFoldingProducerKind { 75 enum BoolFoldingProducerKind {
80 PK_None, 76 PK_None,
81 PK_Icmp32, 77 PK_Icmp32,
82 PK_Icmp64, 78 PK_Icmp64,
83 PK_Fcmp, 79 PK_Fcmp,
84 PK_Trunc 80 PK_Trunc
85 }; 81 };
86 82
87 /// Currently the actual enum values are not used (other than CK_None), but we 83 /// Currently the actual enum values are not used (other than CK_None), but we
88 /// go 84 /// go ahead and produce them anyway for symmetry with the
89 /// ahead and produce them anyway for symmetry with the
90 /// BoolFoldingProducerKind. 85 /// BoolFoldingProducerKind.
91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 86 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
92 87
93 private: 88 private:
94 BoolFolding(const BoolFolding &) = delete; 89 BoolFolding(const BoolFolding &) = delete;
95 BoolFolding &operator=(const BoolFolding &) = delete; 90 BoolFolding &operator=(const BoolFolding &) = delete;
96 91
97 public: 92 public:
98 BoolFolding() = default; 93 BoolFolding() = default;
99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 94 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
156 return CK_None; 151 return CK_None;
157 case InstCast::Sext: 152 case InstCast::Sext:
158 return CK_Sext; 153 return CK_Sext;
159 case InstCast::Zext: 154 case InstCast::Zext:
160 return CK_Zext; 155 return CK_Zext;
161 } 156 }
162 } 157 }
163 return CK_None; 158 return CK_None;
164 } 159 }
165 160
166 /// Returns true if the producing instruction has a "complex" lowering 161 /// Returns true if the producing instruction has a "complex" lowering sequence.
167 /// sequence. This generally means that its lowering sequence requires 162 /// This generally means that its lowering sequence requires more than one
168 /// more than one conditional branch, namely 64-bit integer compares 163 /// conditional branch, namely 64-bit integer compares and some floating-point
169 /// and some floating-point compares. When this is true, and there is 164 /// compares. When this is true, and there is more than one consumer, we prefer
170 /// more than one consumer, we prefer to disable the folding 165 /// to disable the folding optimization because it minimizes branches.
171 /// optimization because it minimizes branches.
172 template <class MachineTraits> 166 template <class MachineTraits>
173 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 167 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
174 switch (getProducerKind(Instr)) { 168 switch (getProducerKind(Instr)) {
175 default: 169 default:
176 return false; 170 return false;
177 case PK_Icmp64: 171 case PK_Icmp64:
178 return true; 172 return true;
179 case PK_Fcmp: 173 case PK_Fcmp:
180 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 174 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
181 .C2 != MachineTraits::Cond::Br_None; 175 .C2 != MachineTraits::Cond::Br_None;
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
219 } 213 }
220 for (auto &I : Producers) { 214 for (auto &I : Producers) {
221 // Ignore entries previously marked invalid. 215 // Ignore entries previously marked invalid.
222 if (I.second.Instr == nullptr) 216 if (I.second.Instr == nullptr)
223 continue; 217 continue;
224 // Disable the producer if its dest may be live beyond this block. 218 // Disable the producer if its dest may be live beyond this block.
225 if (I.second.IsLiveOut) { 219 if (I.second.IsLiveOut) {
226 setInvalid(I.first); 220 setInvalid(I.first);
227 continue; 221 continue;
228 } 222 }
229 // Mark as "dead" rather than outright deleting. This is so that 223 // Mark as "dead" rather than outright deleting. This is so that other
230 // other peephole style optimizations during or before lowering 224 // peephole style optimizations during or before lowering have access to
231 // have access to this instruction in undeleted form. See for 225 // this instruction in undeleted form. See for example
232 // example tryOptimizedCmpxchgCmpBr(). 226 // tryOptimizedCmpxchgCmpBr().
233 I.second.Instr->setDead(); 227 I.second.Instr->setDead();
234 } 228 }
235 } 229 }
236 230
237 template <class MachineTraits> 231 template <class MachineTraits>
238 const Inst * 232 const Inst *
239 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const { 233 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
240 auto *Var = llvm::dyn_cast<const Variable>(Opnd); 234 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
241 if (Var == nullptr) 235 if (Var == nullptr)
242 return nullptr; 236 return nullptr;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
276 (TargetInstructionSet::X86InstructionSet_End - 270 (TargetInstructionSet::X86InstructionSet_End -
277 TargetInstructionSet::X86InstructionSet_Begin), 271 TargetInstructionSet::X86InstructionSet_Begin),
278 "Traits::InstructionSet range different from TargetInstructionSet"); 272 "Traits::InstructionSet range different from TargetInstructionSet");
279 if (Func->getContext()->getFlags().getTargetInstructionSet() != 273 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
280 TargetInstructionSet::BaseInstructionSet) { 274 TargetInstructionSet::BaseInstructionSet) {
281 InstructionSet = static_cast<typename Traits::InstructionSet>( 275 InstructionSet = static_cast<typename Traits::InstructionSet>(
282 (Func->getContext()->getFlags().getTargetInstructionSet() - 276 (Func->getContext()->getFlags().getTargetInstructionSet() -
283 TargetInstructionSet::X86InstructionSet_Begin) + 277 TargetInstructionSet::X86InstructionSet_Begin) +
284 Traits::InstructionSet::Begin); 278 Traits::InstructionSet::Begin);
285 } 279 }
286 // TODO: Don't initialize IntegerRegisters and friends every time. 280 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
287 // Instead, initialize in some sort of static initializer for the 281 // initialize in some sort of static initializer for the class.
288 // class.
289 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM); 282 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
290 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM); 283 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
291 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM); 284 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
292 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM); 285 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
293 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM); 286 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
294 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM); 287 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
295 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 288
296 frameptr, isI8, isInt, isFP) \ 289 Traits::initRegisterSet(&IntegerRegisters, &IntegerRegistersI8,
297 IntegerRegisters[Traits::RegisterSet::val] = isInt; \ 290 &FloatRegisters, &VectorRegisters, &ScratchRegs);
298 IntegerRegistersI8[Traits::RegisterSet::val] = isI8; \ 291
299 FloatRegisters[Traits::RegisterSet::val] = isFP; \
300 VectorRegisters[Traits::RegisterSet::val] = isFP; \
301 ScratchRegs[Traits::RegisterSet::val] = scratch;
302 REGX8632_TABLE;
303 #undef X
304 TypeToRegisterSet[IceType_void] = InvalidRegisters; 292 TypeToRegisterSet[IceType_void] = InvalidRegisters;
305 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; 293 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
306 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; 294 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
307 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 295 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
308 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 296 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
309 TypeToRegisterSet[IceType_i64] = IntegerRegisters; 297 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
310 TypeToRegisterSet[IceType_f32] = FloatRegisters; 298 TypeToRegisterSet[IceType_f32] = FloatRegisters;
311 TypeToRegisterSet[IceType_f64] = FloatRegisters; 299 TypeToRegisterSet[IceType_f64] = FloatRegisters;
312 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 300 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
313 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 301 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
(...skipping 27 matching lines...) Expand all
341 329
342 // Find read-modify-write opportunities. Do this after address mode 330 // Find read-modify-write opportunities. Do this after address mode
343 // optimization so that doAddressOpt() doesn't need to be applied to RMW 331 // optimization so that doAddressOpt() doesn't need to be applied to RMW
344 // instructions as well. 332 // instructions as well.
345 findRMW(); 333 findRMW();
346 Func->dump("After RMW transform"); 334 Func->dump("After RMW transform");
347 335
348 // Argument lowering 336 // Argument lowering
349 Func->doArgLowering(); 337 Func->doArgLowering();
350 338
351 // Target lowering. This requires liveness analysis for some parts 339 // Target lowering. This requires liveness analysis for some parts of the
352 // of the lowering decisions, such as compare/branch fusing. If 340 // lowering decisions, such as compare/branch fusing. If non-lightweight
353 // non-lightweight liveness analysis is used, the instructions need 341 // liveness analysis is used, the instructions need to be renumbered first
354 // to be renumbered first. TODO: This renumbering should only be 342 // TODO: This renumbering should only be necessary if we're actually
355 // necessary if we're actually calculating live intervals, which we 343 // calculating live intervals, which we only do for register allocation.
356 // only do for register allocation.
357 Func->renumberInstructions(); 344 Func->renumberInstructions();
358 if (Func->hasError()) 345 if (Func->hasError())
359 return; 346 return;
360 347
361 // TODO: It should be sufficient to use the fastest liveness 348 // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
362 // calculation, i.e. livenessLightweight(). However, for some 349 // livenessLightweight(). However, for some reason that slows down the rest
363 // reason that slows down the rest of the translation. Investigate. 350 // of the translation. Investigate.
364 Func->liveness(Liveness_Basic); 351 Func->liveness(Liveness_Basic);
365 if (Func->hasError()) 352 if (Func->hasError())
366 return; 353 return;
367 Func->dump("After x86 address mode opt"); 354 Func->dump("After x86 address mode opt");
368 355
369 // Disable constant blinding or pooling for load optimization. 356 // Disable constant blinding or pooling for load optimization.
370 { 357 {
371 BoolFlagSaver B(RandomizationPoolingPaused, true); 358 BoolFlagSaver B(RandomizationPoolingPaused, true);
372 doLoadOpt(); 359 doLoadOpt();
373 } 360 }
374 Func->genCode(); 361 Func->genCode();
375 if (Func->hasError()) 362 if (Func->hasError())
376 return; 363 return;
377 Func->dump("After x86 codegen"); 364 Func->dump("After x86 codegen");
378 365
379 // Register allocation. This requires instruction renumbering and 366 // Register allocation. This requires instruction renumbering and full
380 // full liveness analysis. 367 // liveness analysis.
381 Func->renumberInstructions(); 368 Func->renumberInstructions();
382 if (Func->hasError()) 369 if (Func->hasError())
383 return; 370 return;
384 Func->liveness(Liveness_Intervals); 371 Func->liveness(Liveness_Intervals);
385 if (Func->hasError()) 372 if (Func->hasError())
386 return; 373 return;
387 // Validate the live range computations. The expensive validation 374 // Validate the live range computations. The expensive validation call is
388 // call is deliberately only made when assertions are enabled. 375 // deliberately only made when assertions are enabled.
389 assert(Func->validateLiveness()); 376 assert(Func->validateLiveness());
390 // The post-codegen dump is done here, after liveness analysis and 377 // The post-codegen dump is done here, after liveness analysis and associated
391 // associated cleanup, to make the dump cleaner and more useful. 378 // cleanup, to make the dump cleaner and more useful.
392 Func->dump("After initial x8632 codegen"); 379 Func->dump("After initial x8632 codegen");
393 Func->getVMetadata()->init(VMK_All); 380 Func->getVMetadata()->init(VMK_All);
394 regAlloc(RAK_Global); 381 regAlloc(RAK_Global);
395 if (Func->hasError()) 382 if (Func->hasError())
396 return; 383 return;
397 Func->dump("After linear scan regalloc"); 384 Func->dump("After linear scan regalloc");
398 385
399 if (Ctx->getFlags().getPhiEdgeSplit()) { 386 if (Ctx->getFlags().getPhiEdgeSplit()) {
400 // We need to pause constant blinding or pooling during advanced 387 // We need to pause constant blinding or pooling during advanced phi
401 // phi lowering, unless the lowering assignment has a physical 388 // lowering, unless the lowering assignment has a physical register for the
402 // register for the dest Variable. 389 // dest Variable.
403 { 390 {
404 BoolFlagSaver B(RandomizationPoolingPaused, true); 391 BoolFlagSaver B(RandomizationPoolingPaused, true);
405 Func->advancedPhiLowering(); 392 Func->advancedPhiLowering();
406 } 393 }
407 Func->dump("After advanced Phi lowering"); 394 Func->dump("After advanced Phi lowering");
408 } 395 }
409 396
410 // Stack frame mapping. 397 // Stack frame mapping.
411 Func->genFrame(); 398 Func->genFrame();
412 if (Func->hasError()) 399 if (Func->hasError())
413 return; 400 return;
414 Func->dump("After stack frame mapping"); 401 Func->dump("After stack frame mapping");
415 402
416 Func->contractEmptyNodes(); 403 Func->contractEmptyNodes();
417 Func->reorderNodes(); 404 Func->reorderNodes();
418 405
419 // Branch optimization. This needs to be done just before code 406 // Branch optimization. This needs to be done just before code emission. In
420 // emission. In particular, no transformations that insert or 407 // particular, no transformations that insert or reorder CfgNodes should be
421 // reorder CfgNodes should be done after branch optimization. We go 408 // done after branch optimization. We go ahead and do it before nop insertion
422 // ahead and do it before nop insertion to reduce the amount of work 409 // to reduce the amount of work needed for searching for opportunities.
423 // needed for searching for opportunities.
424 Func->doBranchOpt(); 410 Func->doBranchOpt();
425 Func->dump("After branch optimization"); 411 Func->dump("After branch optimization");
426 412
427 // Nop insertion 413 // Nop insertion
428 if (Ctx->getFlags().shouldDoNopInsertion()) { 414 if (Ctx->getFlags().shouldDoNopInsertion()) {
429 Func->doNopInsertion(); 415 Func->doNopInsertion();
430 } 416 }
431 } 417 }
432 418
433 template <class Machine> void TargetX86Base<Machine>::translateOm1() { 419 template <class Machine> void TargetX86Base<Machine>::translateOm1() {
(...skipping 27 matching lines...) Expand all
461 Func->dump("After stack frame mapping"); 447 Func->dump("After stack frame mapping");
462 448
463 // Nop insertion 449 // Nop insertion
464 if (Ctx->getFlags().shouldDoNopInsertion()) { 450 if (Ctx->getFlags().shouldDoNopInsertion()) {
465 Func->doNopInsertion(); 451 Func->doNopInsertion();
466 } 452 }
467 } 453 }
468 454
469 bool canRMW(const InstArithmetic *Arith) { 455 bool canRMW(const InstArithmetic *Arith) {
470 Type Ty = Arith->getDest()->getType(); 456 Type Ty = Arith->getDest()->getType();
471 // X86 vector instructions write to a register and have no RMW 457 // X86 vector instructions write to a register and have no RMW option.
472 // option.
473 if (isVectorType(Ty)) 458 if (isVectorType(Ty))
474 return false; 459 return false;
475 bool isI64 = Ty == IceType_i64; 460 bool isI64 = Ty == IceType_i64;
476 461
477 switch (Arith->getOp()) { 462 switch (Arith->getOp()) {
478 // Not handled for lack of simple lowering: 463 // Not handled for lack of simple lowering:
479 // shift on i64 464 // shift on i64
480 // mul, udiv, urem, sdiv, srem, frem 465 // mul, udiv, urem, sdiv, srem, frem
481 // Not handled for lack of RMW instructions: 466 // Not handled for lack of RMW instructions:
482 // fadd, fsub, fmul, fdiv (also vector types) 467 // fadd, fsub, fmul, fdiv (also vector types)
483 default: 468 default:
484 return false; 469 return false;
485 case InstArithmetic::Add: 470 case InstArithmetic::Add:
486 case InstArithmetic::Sub: 471 case InstArithmetic::Sub:
487 case InstArithmetic::And: 472 case InstArithmetic::And:
488 case InstArithmetic::Or: 473 case InstArithmetic::Or:
489 case InstArithmetic::Xor: 474 case InstArithmetic::Xor:
490 return true; 475 return true;
491 case InstArithmetic::Shl: 476 case InstArithmetic::Shl:
492 case InstArithmetic::Lshr: 477 case InstArithmetic::Lshr:
493 case InstArithmetic::Ashr: 478 case InstArithmetic::Ashr:
494 return false; // TODO(stichnot): implement 479 return false; // TODO(stichnot): implement
495 return !isI64; 480 return !isI64;
496 } 481 }
497 } 482 }
498 483
484 template <class Machine>
499 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { 485 bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
500 if (A == B) 486 if (A == B)
501 return true; 487 return true;
502 if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) { 488 if (auto *MemA = llvm::dyn_cast<
503 if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) { 489 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
490 if (auto *MemB = llvm::dyn_cast<
491 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
504 return MemA->getBase() == MemB->getBase() && 492 return MemA->getBase() == MemB->getBase() &&
505 MemA->getOffset() == MemB->getOffset() && 493 MemA->getOffset() == MemB->getOffset() &&
506 MemA->getIndex() == MemB->getIndex() && 494 MemA->getIndex() == MemB->getIndex() &&
507 MemA->getShift() == MemB->getShift() && 495 MemA->getShift() == MemB->getShift() &&
508 MemA->getSegmentRegister() == MemB->getSegmentRegister(); 496 MemA->getSegmentRegister() == MemB->getSegmentRegister();
509 } 497 }
510 } 498 }
511 return false; 499 return false;
512 } 500 }
513 501
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
558 // instruction will be retained and later lowered. On the other 546 // instruction will be retained and later lowered. On the other
559 // hand, if the RMW instruction does not end x's live range, then 547 // hand, if the RMW instruction does not end x's live range, then
560 // the Store instruction must still be present, and therefore the 548 // the Store instruction must still be present, and therefore the
561 // RMW instruction is ignored during lowering because it is 549 // RMW instruction is ignored during lowering because it is
562 // redundant with the Store instruction. 550 // redundant with the Store instruction.
563 // 551 //
564 // Note that if "a" has further uses, the RMW transformation may 552 // Note that if "a" has further uses, the RMW transformation may
565 // still trigger, resulting in two loads and one store, which is 553 // still trigger, resulting in two loads and one store, which is
566 // worse than the original one load and one store. However, this is 554 // worse than the original one load and one store. However, this is
567 // probably rare, and caching probably keeps it just as fast. 555 // probably rare, and caching probably keeps it just as fast.
568 if (!isSameMemAddressOperand(Load->getSourceAddress(), 556 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
569 Store->getAddr())) 557 Store->getAddr()))
570 continue; 558 continue;
571 Operand *ArithSrcFromLoad = Arith->getSrc(0); 559 Operand *ArithSrcFromLoad = Arith->getSrc(0);
572 Operand *ArithSrcOther = Arith->getSrc(1); 560 Operand *ArithSrcOther = Arith->getSrc(1);
573 if (ArithSrcFromLoad != Load->getDest()) { 561 if (ArithSrcFromLoad != Load->getDest()) {
574 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) 562 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
575 continue; 563 continue;
576 std::swap(ArithSrcFromLoad, ArithSrcOther); 564 std::swap(ArithSrcFromLoad, ArithSrcOther);
577 } 565 }
578 if (Arith->getDest() != Store->getData()) 566 if (Arith->getDest() != Store->getData())
579 continue; 567 continue;
580 if (!canRMW(Arith)) 568 if (!canRMW(Arith))
581 continue; 569 continue;
582 if (Func->isVerbose(IceV_RMW)) { 570 if (Func->isVerbose(IceV_RMW)) {
583 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; 571 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
584 Load->dump(Func); 572 Load->dump(Func);
585 Str << "\n "; 573 Str << "\n ";
586 Arith->dump(Func); 574 Arith->dump(Func);
587 Str << "\n "; 575 Str << "\n ";
588 Store->dump(Func); 576 Store->dump(Func);
589 Str << "\n"; 577 Str << "\n";
590 } 578 }
591 Variable *Beacon = Func->template makeVariable(IceType_i32); 579 Variable *Beacon = Func->template makeVariable(IceType_i32);
592 Beacon->setWeight(0); 580 Beacon->setWeight(0);
593 Store->setRmwBeacon(Beacon); 581 Store->setRmwBeacon(Beacon);
594 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); 582 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
595 Node->getInsts().insert(I3, BeaconDef); 583 Node->getInsts().insert(I3, BeaconDef);
596 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( 584 auto *RMW = Traits::Insts::FakeRMW::create(
597 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); 585 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp());
598 Node->getInsts().insert(I3, RMW); 586 Node->getInsts().insert(I3, RMW);
599 } 587 }
600 } 588 }
601 } 589 }
602 } 590 }
603 } 591 }
604 } 592 }
605 593
606 // Converts a ConstantInteger32 operand into its constant value, or 594 // Converts a ConstantInteger32 operand into its constant value, or
(...skipping 107 matching lines...) Expand 10 before | Expand all | Expand 10 after
714 } 702 }
715 Context.advanceCur(); 703 Context.advanceCur();
716 Context.advanceNext(); 704 Context.advanceNext();
717 } 705 }
718 } 706 }
719 Func->dump("After load optimization"); 707 Func->dump("After load optimization");
720 } 708 }
721 709
722 template <class Machine> 710 template <class Machine>
723 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 711 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
724 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { 712 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
725 return Br->optimizeBranch(NextNode); 713 return Br->optimizeBranch(NextNode);
726 } 714 }
727 return false; 715 return false;
728 } 716 }
729 717
730 template <class Machine> 718 template <class Machine>
731 IceString TargetX86Base<Machine>::RegNames[] = {
732 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
733 frameptr, isI8, isInt, isFP) \
734 name,
735 REGX8632_TABLE
736 #undef X
737 };
738
739 template <class Machine>
740 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 719 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
741 if (Ty == IceType_void) 720 if (Ty == IceType_void)
742 Ty = IceType_i32; 721 Ty = IceType_i32;
743 if (PhysicalRegisters[Ty].empty()) 722 if (PhysicalRegisters[Ty].empty())
744 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 723 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
745 assert(RegNum < PhysicalRegisters[Ty].size()); 724 assert(RegNum < PhysicalRegisters[Ty].size());
746 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 725 Variable *Reg = PhysicalRegisters[Ty][RegNum];
747 if (Reg == nullptr) { 726 if (Reg == nullptr) {
748 Reg = Func->template makeVariable(Ty); 727 Reg = Func->template makeVariable(Ty);
749 Reg->setRegNum(RegNum); 728 Reg->setRegNum(RegNum);
750 PhysicalRegisters[Ty][RegNum] = Reg; 729 PhysicalRegisters[Ty][RegNum] = Reg;
751 // Specially mark esp as an "argument" so that it is considered 730 // Specially mark esp as an "argument" so that it is considered
752 // live upon function entry. 731 // live upon function entry.
753 if (RegNum == Traits::RegisterSet::Reg_esp) { 732 if (RegNum == Traits::RegisterSet::Reg_esp) {
754 Func->addImplicitArg(Reg); 733 Func->addImplicitArg(Reg);
755 Reg->setIgnoreLiveness(); 734 Reg->setIgnoreLiveness();
756 } 735 }
757 } 736 }
758 return Reg; 737 return Reg;
759 } 738 }
760 739
761 template <class Machine> 740 template <class Machine>
762 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { 741 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
763 assert(RegNum < Traits::RegisterSet::Reg_NUM); 742 return Traits::getRegName(RegNum, Ty);
764 static IceString RegNames8[] = {
765 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
766 frameptr, isI8, isInt, isFP) \
767 name8,
768 REGX8632_TABLE
769 #undef X
770 };
771 static IceString RegNames16[] = {
772 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
773 frameptr, isI8, isInt, isFP) \
774 name16,
775 REGX8632_TABLE
776 #undef X
777 };
778 switch (Ty) {
779 case IceType_i1:
780 case IceType_i8:
781 return RegNames8[RegNum];
782 case IceType_i16:
783 return RegNames16[RegNum];
784 default:
785 return RegNames[RegNum];
786 }
787 } 743 }
788 744
789 template <class Machine> 745 template <class Machine>
790 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { 746 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
791 Ostream &Str = Ctx->getStrEmit(); 747 Ostream &Str = Ctx->getStrEmit();
792 if (Var->hasReg()) { 748 if (Var->hasReg()) {
793 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 749 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
794 return; 750 return;
795 } 751 }
796 if (Var->getWeight().isInf()) { 752 if (Var->getWeight().isInf()) {
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
877 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 833 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
878 return; 834 return;
879 } 835 }
880 if (isVectorType(Ty)) { 836 if (isVectorType(Ty)) {
881 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); 837 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
882 } 838 }
883 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 839 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
884 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 840 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
885 if (Arg->hasReg()) { 841 if (Arg->hasReg()) {
886 assert(Ty != IceType_i64); 842 assert(Ty != IceType_i64);
887 OperandX8632Mem *Mem = OperandX8632Mem::create( 843 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
888 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 844 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
889 if (isVectorType(Arg->getType())) { 845 if (isVectorType(Arg->getType())) {
890 _movp(Arg, Mem); 846 _movp(Arg, Mem);
891 } else { 847 } else {
892 _mov(Arg, Mem); 848 _mov(Arg, Mem);
893 } 849 }
894 // This argument-copying instruction uses an explicit 850 // This argument-copying instruction uses an explicit Traits::X86OperandMem
895 // OperandX8632Mem operand instead of a Variable, so its 851 // operand instead of a Variable, so its fill-from-stack operation has to be
896 // fill-from-stack operation has to be tracked separately for 852 // tracked separately for statistics.
897 // statistics.
898 Ctx->statsUpdateFills(); 853 Ctx->statsUpdateFills();
899 } 854 }
900 } 855 }
901 856
902 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 857 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
903 return IceType_i32; 858 return IceType_i32;
904 } 859 }
905 860
906 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) { 861 template <class Machine> void TargetX86Base<Machine>::addProlog(CfgNode *Node) {
907 // Stack frame layout: 862 // Stack frame layout:
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
958 // If there is a separate locals area, this specifies the alignment 913 // If there is a separate locals area, this specifies the alignment
959 // for it. 914 // for it.
960 uint32_t LocalsSlotsAlignmentBytes = 0; 915 uint32_t LocalsSlotsAlignmentBytes = 0;
961 // The entire spill locations area gets aligned to largest natural 916 // The entire spill locations area gets aligned to largest natural
962 // alignment of the variables that have a spill slot. 917 // alignment of the variables that have a spill slot.
963 uint32_t SpillAreaAlignmentBytes = 0; 918 uint32_t SpillAreaAlignmentBytes = 0;
964 // A spill slot linked to a variable with a stack slot should reuse 919 // A spill slot linked to a variable with a stack slot should reuse
965 // that stack slot. 920 // that stack slot.
966 std::function<bool(Variable *)> TargetVarHook = 921 std::function<bool(Variable *)> TargetVarHook =
967 [&VariablesLinkedToSpillSlots](Variable *Var) { 922 [&VariablesLinkedToSpillSlots](Variable *Var) {
968 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { 923 if (auto *SpillVar =
924 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
969 assert(Var->getWeight().isZero()); 925 assert(Var->getWeight().isZero());
970 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { 926 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
971 VariablesLinkedToSpillSlots.push_back(Var); 927 VariablesLinkedToSpillSlots.push_back(Var);
972 return true; 928 return true;
973 } 929 }
974 } 930 }
975 return false; 931 return false;
976 }; 932 };
977 933
978 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 934 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
1062 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 1018 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1063 } 1019 }
1064 1020
1065 // Fill in stack offsets for locals. 1021 // Fill in stack offsets for locals.
1066 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, 1022 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1067 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, 1023 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1068 IsEbpBasedFrame); 1024 IsEbpBasedFrame);
1069 // Assign stack offsets to variables that have been linked to spilled 1025 // Assign stack offsets to variables that have been linked to spilled
1070 // variables. 1026 // variables.
1071 for (Variable *Var : VariablesLinkedToSpillSlots) { 1027 for (Variable *Var : VariablesLinkedToSpillSlots) {
1072 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); 1028 Variable *Linked =
1029 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
1073 Var->setStackOffset(Linked->getStackOffset()); 1030 Var->setStackOffset(Linked->getStackOffset());
1074 } 1031 }
1075 this->HasComputedFrame = true; 1032 this->HasComputedFrame = true;
1076 1033
1077 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 1034 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1078 OstreamLocker L(Func->getContext()); 1035 OstreamLocker L(Func->getContext());
1079 Ostream &Str = Func->getContext()->getStrDump(); 1036 Ostream &Str = Func->getContext()->getStrDump();
1080 1037
1081 Str << "Stack layout:\n"; 1038 Str << "Stack layout:\n";
1082 uint32_t EspAdjustmentPaddingSize = 1039 uint32_t EspAdjustmentPaddingSize =
(...skipping 16 matching lines...) Expand all
1099 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 1056 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1100 << " bytes\n" 1057 << " bytes\n"
1101 << " is ebp based = " << IsEbpBasedFrame << "\n"; 1058 << " is ebp based = " << IsEbpBasedFrame << "\n";
1102 } 1059 }
1103 } 1060 }
1104 1061
1105 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) { 1062 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {
1106 InstList &Insts = Node->getInsts(); 1063 InstList &Insts = Node->getInsts();
1107 InstList::reverse_iterator RI, E; 1064 InstList::reverse_iterator RI, E;
1108 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 1065 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1109 if (llvm::isa<InstX8632Ret>(*RI)) 1066 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
1110 break; 1067 break;
1111 } 1068 }
1112 if (RI == E) 1069 if (RI == E)
1113 return; 1070 return;
1114 1071
1115 // Convert the reverse_iterator position into its corresponding 1072 // Convert the reverse_iterator position into its corresponding
1116 // (forward) iterator position. 1073 // (forward) iterator position.
1117 InstList::iterator InsertPoint = RI.base(); 1074 InstList::iterator InsertPoint = RI.base();
1118 --InsertPoint; 1075 --InsertPoint;
1119 Context.init(Node); 1076 Context.init(Node);
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1209 return Operand; 1166 return Operand;
1210 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1167 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1211 split64(Var); 1168 split64(Var);
1212 return Var->getLo(); 1169 return Var->getLo();
1213 } 1170 }
1214 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1171 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1215 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1172 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1216 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 1173 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
1217 return legalize(ConstInt); 1174 return legalize(ConstInt);
1218 } 1175 }
1219 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1176 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
1220 OperandX8632Mem *MemOperand = OperandX8632Mem::create( 1177 auto *MemOperand = Traits::X86OperandMem::create(
1221 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 1178 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
1222 Mem->getShift(), Mem->getSegmentRegister()); 1179 Mem->getShift(), Mem->getSegmentRegister());
1223 // Test if we should randomize or pool the offset, if so randomize it or 1180 // Test if we should randomize or pool the offset, if so randomize it or
1224 // pool it then create mem operand with the blinded/pooled constant. 1181 // pool it then create mem operand with the blinded/pooled constant.
1225 // Otherwise, return the mem operand as ordinary mem operand. 1182 // Otherwise, return the mem operand as ordinary mem operand.
1226 return legalize(MemOperand); 1183 return legalize(MemOperand);
1227 } 1184 }
1228 llvm_unreachable("Unsupported operand type"); 1185 llvm_unreachable("Unsupported operand type");
1229 return nullptr; 1186 return nullptr;
1230 } 1187 }
1231 1188
1232 template <class Machine> 1189 template <class Machine>
1233 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { 1190 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
1234 assert(Operand->getType() == IceType_i64 || 1191 assert(Operand->getType() == IceType_i64 ||
1235 Operand->getType() == IceType_f64); 1192 Operand->getType() == IceType_f64);
1236 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 1193 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1237 return Operand; 1194 return Operand;
1238 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1195 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1239 split64(Var); 1196 split64(Var);
1240 return Var->getHi(); 1197 return Var->getHi();
1241 } 1198 }
1242 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1199 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1243 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1200 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1244 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); 1201 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
1245 // check if we need to blind/pool the constant 1202 // check if we need to blind/pool the constant
1246 return legalize(ConstInt); 1203 return legalize(ConstInt);
1247 } 1204 }
1248 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1205 if (auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
1249 Constant *Offset = Mem->getOffset(); 1206 Constant *Offset = Mem->getOffset();
1250 if (Offset == nullptr) { 1207 if (Offset == nullptr) {
1251 Offset = Ctx->getConstantInt32(4); 1208 Offset = Ctx->getConstantInt32(4);
1252 } else if (ConstantInteger32 *IntOffset = 1209 } else if (ConstantInteger32 *IntOffset =
1253 llvm::dyn_cast<ConstantInteger32>(Offset)) { 1210 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1254 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); 1211 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
1255 } else if (ConstantRelocatable *SymOffset = 1212 } else if (ConstantRelocatable *SymOffset =
1256 llvm::dyn_cast<ConstantRelocatable>(Offset)) { 1213 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
1257 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); 1214 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1258 Offset = 1215 Offset =
1259 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), 1216 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1260 SymOffset->getSuppressMangling()); 1217 SymOffset->getSuppressMangling());
1261 } 1218 }
1262 OperandX8632Mem *MemOperand = OperandX8632Mem::create( 1219 auto *MemOperand = Traits::X86OperandMem::create(
1263 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(), 1220 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
1264 Mem->getShift(), Mem->getSegmentRegister()); 1221 Mem->getShift(), Mem->getSegmentRegister());
1265 // Test if the Offset is an eligible i32 constants for randomization and 1222 // Test if the Offset is an eligible i32 constants for randomization and
1266 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem 1223 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1267 // operand. 1224 // operand.
1268 return legalize(MemOperand); 1225 return legalize(MemOperand);
1269 } 1226 }
1270 llvm_unreachable("Unsupported operand type"); 1227 llvm_unreachable("Unsupported operand type");
1271 return nullptr; 1228 return nullptr;
1272 } 1229 }
1273 1230
1274 template <class Machine> 1231 template <class Machine>
1275 llvm::SmallBitVector 1232 llvm::SmallBitVector
1276 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 1233 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
1277 RegSetMask Exclude) const { 1234 RegSetMask Exclude) const {
1278 llvm::SmallBitVector Registers(Traits::RegisterSet::Reg_NUM); 1235 return Traits::getRegisterSet(Include, Exclude);
1279
1280 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
1281 frameptr, isI8, isInt, isFP) \
1282 if (scratch && (Include & RegSet_CallerSave)) \
1283 Registers[Traits::RegisterSet::val] = true; \
1284 if (preserved && (Include & RegSet_CalleeSave)) \
1285 Registers[Traits::RegisterSet::val] = true; \
1286 if (stackptr && (Include & RegSet_StackPointer)) \
1287 Registers[Traits::RegisterSet::val] = true; \
1288 if (frameptr && (Include & RegSet_FramePointer)) \
1289 Registers[Traits::RegisterSet::val] = true; \
1290 if (scratch && (Exclude & RegSet_CallerSave)) \
1291 Registers[Traits::RegisterSet::val] = false; \
1292 if (preserved && (Exclude & RegSet_CalleeSave)) \
1293 Registers[Traits::RegisterSet::val] = false; \
1294 if (stackptr && (Exclude & RegSet_StackPointer)) \
1295 Registers[Traits::RegisterSet::val] = false; \
1296 if (frameptr && (Exclude & RegSet_FramePointer)) \
1297 Registers[Traits::RegisterSet::val] = false;
1298
1299 REGX8632_TABLE
1300
1301 #undef X
1302
1303 return Registers;
1304 } 1236 }
1305 1237
1306 template <class Machine> 1238 template <class Machine>
1307 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 1239 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
1308 IsEbpBasedFrame = true; 1240 IsEbpBasedFrame = true;
1309 // Conservatively require the stack to be aligned. Some stack 1241 // Conservatively require the stack to be aligned. Some stack
1310 // adjustment operations implemented below assume that the stack is 1242 // adjustment operations implemented below assume that the stack is
1311 // aligned before the alloca. All the alloca code ensures that the 1243 // aligned before the alloca. All the alloca code ensures that the
1312 // stack alignment is preserved after the alloca. The stack alignment 1244 // stack alignment is preserved after the alloca. The stack alignment
1313 // restriction can be relaxed in some cases. 1245 // restriction can be relaxed in some cases.
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1416 return false; 1348 return false;
1417 // Limit the number of lea/shl operations for a single multiply, to 1349 // Limit the number of lea/shl operations for a single multiply, to
1418 // a somewhat arbitrary choice of 3. 1350 // a somewhat arbitrary choice of 3.
1419 const uint32_t MaxOpsForOptimizedMul = 3; 1351 const uint32_t MaxOpsForOptimizedMul = 3;
1420 if (CountOps > MaxOpsForOptimizedMul) 1352 if (CountOps > MaxOpsForOptimizedMul)
1421 return false; 1353 return false;
1422 _mov(T, Src0); 1354 _mov(T, Src0);
1423 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1355 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1424 for (uint32_t i = 0; i < Count9; ++i) { 1356 for (uint32_t i = 0; i < Count9; ++i) {
1425 const uint16_t Shift = 3; // log2(9-1) 1357 const uint16_t Shift = 3; // log2(9-1)
1426 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); 1358 _lea(T,
1359 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1427 _set_dest_nonkillable(); 1360 _set_dest_nonkillable();
1428 } 1361 }
1429 for (uint32_t i = 0; i < Count5; ++i) { 1362 for (uint32_t i = 0; i < Count5; ++i) {
1430 const uint16_t Shift = 2; // log2(5-1) 1363 const uint16_t Shift = 2; // log2(5-1)
1431 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); 1364 _lea(T,
1365 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1432 _set_dest_nonkillable(); 1366 _set_dest_nonkillable();
1433 } 1367 }
1434 for (uint32_t i = 0; i < Count3; ++i) { 1368 for (uint32_t i = 0; i < Count3; ++i) {
1435 const uint16_t Shift = 1; // log2(3-1) 1369 const uint16_t Shift = 1; // log2(3-1)
1436 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); 1370 _lea(T,
1371 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1437 _set_dest_nonkillable(); 1372 _set_dest_nonkillable();
1438 } 1373 }
1439 if (Count2) { 1374 if (Count2) {
1440 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1375 _shl(T, Ctx->getConstantInt(Ty, Count2));
1441 } 1376 }
1442 if (Src1IsNegative) 1377 if (Src1IsNegative)
1443 _neg(T); 1378 _neg(T);
1444 _mov(Dest, T); 1379 _mov(Dest, T);
1445 return true; 1380 return true;
1446 } 1381 }
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
1594 // je L1 1529 // je L1
1595 // use(t3) 1530 // use(t3)
1596 // t3 = t2 1531 // t3 = t2
1597 // t2 = 0 1532 // t2 = 0
1598 // L1: 1533 // L1:
1599 // a.lo = t2 1534 // a.lo = t2
1600 // a.hi = t3 1535 // a.hi = t3
1601 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1536 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1602 Constant *BitTest = Ctx->getConstantInt32(0x20); 1537 Constant *BitTest = Ctx->getConstantInt32(0x20);
1603 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1538 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1604 InstX8632Label *Label = InstX8632Label::create(Func, this); 1539 typename Traits::Insts::Label *Label =
1540 Traits::Insts::Label::create(Func, this);
1605 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1541 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1606 _mov(T_2, Src0Lo); 1542 _mov(T_2, Src0Lo);
1607 _mov(T_3, Src0Hi); 1543 _mov(T_3, Src0Hi);
1608 _shld(T_3, T_2, T_1); 1544 _shld(T_3, T_2, T_1);
1609 _shl(T_2, T_1); 1545 _shl(T_2, T_1);
1610 _test(T_1, BitTest); 1546 _test(T_1, BitTest);
1611 _br(Traits::Cond::Br_e, Label); 1547 _br(Traits::Cond::Br_e, Label);
1612 // T_2 and T_3 are being assigned again because of the 1548 // T_2 and T_3 are being assigned again because of the
1613 // intra-block control flow, so we need the _mov_nonkillable 1549 // intra-block control flow, so we need the _mov_nonkillable
1614 // variant to avoid liveness problems. 1550 // variant to avoid liveness problems.
(...skipping 14 matching lines...) Expand all
1629 // je L1 1565 // je L1
1630 // use(t2) 1566 // use(t2)
1631 // t2 = t3 1567 // t2 = t3
1632 // t3 = 0 1568 // t3 = 0
1633 // L1: 1569 // L1:
1634 // a.lo = t2 1570 // a.lo = t2
1635 // a.hi = t3 1571 // a.hi = t3
1636 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1572 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1637 Constant *BitTest = Ctx->getConstantInt32(0x20); 1573 Constant *BitTest = Ctx->getConstantInt32(0x20);
1638 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1574 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1639 InstX8632Label *Label = InstX8632Label::create(Func, this); 1575 typename Traits::Insts::Label *Label =
1576 Traits::Insts::Label::create(Func, this);
1640 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1577 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1641 _mov(T_2, Src0Lo); 1578 _mov(T_2, Src0Lo);
1642 _mov(T_3, Src0Hi); 1579 _mov(T_3, Src0Hi);
1643 _shrd(T_2, T_3, T_1); 1580 _shrd(T_2, T_3, T_1);
1644 _shr(T_3, T_1); 1581 _shr(T_3, T_1);
1645 _test(T_1, BitTest); 1582 _test(T_1, BitTest);
1646 _br(Traits::Cond::Br_e, Label); 1583 _br(Traits::Cond::Br_e, Label);
1647 // T_2 and T_3 are being assigned again because of the 1584 // T_2 and T_3 are being assigned again because of the
1648 // intra-block control flow, so we need the _mov_nonkillable 1585 // intra-block control flow, so we need the _mov_nonkillable
1649 // variant to avoid liveness problems. 1586 // variant to avoid liveness problems.
(...skipping 14 matching lines...) Expand all
1664 // je L1 1601 // je L1
1665 // use(t2) 1602 // use(t2)
1666 // t2 = t3 1603 // t2 = t3
1667 // t3 = sar t3, 0x1f 1604 // t3 = sar t3, 0x1f
1668 // L1: 1605 // L1:
1669 // a.lo = t2 1606 // a.lo = t2
1670 // a.hi = t3 1607 // a.hi = t3
1671 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1608 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1672 Constant *BitTest = Ctx->getConstantInt32(0x20); 1609 Constant *BitTest = Ctx->getConstantInt32(0x20);
1673 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1610 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1674 InstX8632Label *Label = InstX8632Label::create(Func, this); 1611 typename Traits::Insts::Label *Label =
1612 Traits::Insts::Label::create(Func, this);
1675 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1613 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1676 _mov(T_2, Src0Lo); 1614 _mov(T_2, Src0Lo);
1677 _mov(T_3, Src0Hi); 1615 _mov(T_3, Src0Hi);
1678 _shrd(T_2, T_3, T_1); 1616 _shrd(T_2, T_3, T_1);
1679 _sar(T_3, T_1); 1617 _sar(T_3, T_1);
1680 _test(T_1, BitTest); 1618 _test(T_1, BitTest);
1681 _br(Traits::Cond::Br_e, Label); 1619 _br(Traits::Cond::Br_e, Label);
1682 // T_2 and T_3 are being assigned again because of the 1620 // T_2 and T_3 are being assigned again because of the
1683 // intra-block control flow, so T_2 needs the _mov_nonkillable 1621 // intra-block control flow, so T_2 needs the _mov_nonkillable
1684 // variant to avoid liveness problems. T_3 doesn't need special 1622 // variant to avoid liveness problems. T_3 doesn't need special
(...skipping 17 matching lines...) Expand all
1702 case InstArithmetic::Srem: 1640 case InstArithmetic::Srem:
1703 llvm_unreachable("Call-helper-involved instruction for i64 type \ 1641 llvm_unreachable("Call-helper-involved instruction for i64 type \
1704 should have already been handled before"); 1642 should have already been handled before");
1705 break; 1643 break;
1706 } 1644 }
1707 return; 1645 return;
1708 } 1646 }
1709 if (isVectorType(Dest->getType())) { 1647 if (isVectorType(Dest->getType())) {
1710 // TODO: Trap on integer divide and integer modulo by zero. 1648 // TODO: Trap on integer divide and integer modulo by zero.
1711 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1649 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1712 if (llvm::isa<OperandX8632Mem>(Src1)) 1650 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
1713 Src1 = legalizeToVar(Src1); 1651 Src1 = legalizeToVar(Src1);
1714 switch (Inst->getOp()) { 1652 switch (Inst->getOp()) {
1715 case InstArithmetic::_num: 1653 case InstArithmetic::_num:
1716 llvm_unreachable("Unknown arithmetic operator"); 1654 llvm_unreachable("Unknown arithmetic operator");
1717 break; 1655 break;
1718 case InstArithmetic::Add: { 1656 case InstArithmetic::Add: {
1719 Variable *T = makeReg(Dest->getType()); 1657 Variable *T = makeReg(Dest->getType());
1720 _movp(T, Src0); 1658 _movp(T, Src0);
1721 _padd(T, Src1); 1659 _padd(T, Src1);
1722 _movp(Dest, T); 1660 _movp(Dest, T);
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after
2201 XmmArgs.push_back(Arg); 2139 XmmArgs.push_back(Arg);
2202 } else { 2140 } else {
2203 StackArgs.push_back(Arg); 2141 StackArgs.push_back(Arg);
2204 if (isVectorType(Arg->getType())) { 2142 if (isVectorType(Arg->getType())) {
2205 ParameterAreaSizeBytes = 2143 ParameterAreaSizeBytes =
2206 Traits::applyStackAlignment(ParameterAreaSizeBytes); 2144 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2207 } 2145 }
2208 Variable *esp = 2146 Variable *esp =
2209 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); 2147 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
2210 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); 2148 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2211 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 2149 StackArgLocations.push_back(
2150 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
2212 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 2151 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2213 } 2152 }
2214 } 2153 }
2215 2154
2216 // Adjust the parameter area so that the stack is aligned. It is 2155 // Adjust the parameter area so that the stack is aligned. It is
2217 // assumed that the stack is already aligned at the start of the 2156 // assumed that the stack is already aligned at the start of the
2218 // calling sequence. 2157 // calling sequence.
2219 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); 2158 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2220 2159
2221 // Subtract the appropriate amount for the argument area. This also 2160 // Subtract the appropriate amount for the argument area. This also
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
2298 } else { 2237 } else {
2299 Variable *CallTargetVar = nullptr; 2238 Variable *CallTargetVar = nullptr;
2300 _mov(CallTargetVar, CallTarget); 2239 _mov(CallTargetVar, CallTarget);
2301 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2240 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2302 const SizeT BundleSize = 2241 const SizeT BundleSize =
2303 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); 2242 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
2304 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); 2243 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2305 CallTarget = CallTargetVar; 2244 CallTarget = CallTargetVar;
2306 } 2245 }
2307 } 2246 }
2308 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 2247 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
2309 Context.insert(NewCall); 2248 Context.insert(NewCall);
2310 if (NeedSandboxing) 2249 if (NeedSandboxing)
2311 _bundle_unlock(); 2250 _bundle_unlock();
2312 if (ReturnRegHi) 2251 if (ReturnRegHi)
2313 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 2252 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
2314 2253
2315 // Add the appropriate offset to esp. The call instruction takes care 2254 // Add the appropriate offset to esp. The call instruction takes care
2316 // of resetting the stack offset during emission. 2255 // of resetting the stack offset during emission.
2317 if (ParameterAreaSizeBytes) { 2256 if (ParameterAreaSizeBytes) {
2318 Variable *esp = 2257 Variable *esp =
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
2525 _and(T, Ctx->getConstantInt1(1)); 2464 _and(T, Ctx->getConstantInt1(1));
2526 _mov(Dest, T); 2465 _mov(Dest, T);
2527 } 2466 }
2528 break; 2467 break;
2529 } 2468 }
2530 case InstCast::Fptrunc: 2469 case InstCast::Fptrunc:
2531 case InstCast::Fpext: { 2470 case InstCast::Fpext: {
2532 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2471 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2533 // t1 = cvt Src0RM; Dest = t1 2472 // t1 = cvt Src0RM; Dest = t1
2534 Variable *T = makeReg(Dest->getType()); 2473 Variable *T = makeReg(Dest->getType());
2535 _cvt(T, Src0RM, InstX8632Cvt::Float2float); 2474 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
2536 _mov(Dest, T); 2475 _mov(Dest, T);
2537 break; 2476 break;
2538 } 2477 }
2539 case InstCast::Fptosi: 2478 case InstCast::Fptosi:
2540 if (isVectorType(Dest->getType())) { 2479 if (isVectorType(Dest->getType())) {
2541 assert(Dest->getType() == IceType_v4i32 && 2480 assert(Dest->getType() == IceType_v4i32 &&
2542 Inst->getSrc(0)->getType() == IceType_v4f32); 2481 Inst->getSrc(0)->getType() == IceType_v4f32);
2543 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2482 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2544 if (llvm::isa<OperandX8632Mem>(Src0RM)) 2483 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2545 Src0RM = legalizeToVar(Src0RM); 2484 Src0RM = legalizeToVar(Src0RM);
2546 Variable *T = makeReg(Dest->getType()); 2485 Variable *T = makeReg(Dest->getType());
2547 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq); 2486 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2548 _movp(Dest, T); 2487 _movp(Dest, T);
2549 } else if (Dest->getType() == IceType_i64) { 2488 } else if (Dest->getType() == IceType_i64) {
2550 // Use a helper for converting floating-point values to 64-bit 2489 // Use a helper for converting floating-point values to 64-bit
2551 // integers. SSE2 appears to have no way to convert from xmm 2490 // integers. SSE2 appears to have no way to convert from xmm
2552 // registers to something like the edx:eax register pair, and 2491 // registers to something like the edx:eax register pair, and
2553 // gcc and clang both want to use x87 instructions complete with 2492 // gcc and clang both want to use x87 instructions complete with
2554 // temporary manipulation of the status word. This helper is 2493 // temporary manipulation of the status word. This helper is
2555 // not needed for x86-64. 2494 // not needed for x86-64.
2556 split64(Dest); 2495 split64(Dest);
2557 const SizeT MaxSrcs = 1; 2496 const SizeT MaxSrcs = 1;
2558 Type SrcType = Inst->getSrc(0)->getType(); 2497 Type SrcType = Inst->getSrc(0)->getType();
2559 InstCall *Call = 2498 InstCall *Call =
2560 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2499 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2561 : H_fptosi_f64_i64, 2500 : H_fptosi_f64_i64,
2562 Dest, MaxSrcs); 2501 Dest, MaxSrcs);
2563 Call->addArg(Inst->getSrc(0)); 2502 Call->addArg(Inst->getSrc(0));
2564 lowerCall(Call); 2503 lowerCall(Call);
2565 } else { 2504 } else {
2566 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2505 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2567 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2506 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2568 Variable *T_1 = makeReg(IceType_i32); 2507 Variable *T_1 = makeReg(IceType_i32);
2569 Variable *T_2 = makeReg(Dest->getType()); 2508 Variable *T_2 = makeReg(Dest->getType());
2570 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2509 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2571 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2510 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2572 if (Dest->getType() == IceType_i1) 2511 if (Dest->getType() == IceType_i1)
2573 _and(T_2, Ctx->getConstantInt1(1)); 2512 _and(T_2, Ctx->getConstantInt1(1));
2574 _mov(Dest, T_2); 2513 _mov(Dest, T_2);
2575 } 2514 }
2576 break; 2515 break;
2577 case InstCast::Fptoui: 2516 case InstCast::Fptoui:
2578 if (isVectorType(Dest->getType())) { 2517 if (isVectorType(Dest->getType())) {
2579 assert(Dest->getType() == IceType_v4i32 && 2518 assert(Dest->getType() == IceType_v4i32 &&
2580 Inst->getSrc(0)->getType() == IceType_v4f32); 2519 Inst->getSrc(0)->getType() == IceType_v4f32);
(...skipping 18 matching lines...) Expand all
2599 } 2538 }
2600 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2539 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2601 Call->addArg(Inst->getSrc(0)); 2540 Call->addArg(Inst->getSrc(0));
2602 lowerCall(Call); 2541 lowerCall(Call);
2603 return; 2542 return;
2604 } else { 2543 } else {
2605 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2544 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2606 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2545 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2607 Variable *T_1 = makeReg(IceType_i32); 2546 Variable *T_1 = makeReg(IceType_i32);
2608 Variable *T_2 = makeReg(Dest->getType()); 2547 Variable *T_2 = makeReg(Dest->getType());
2609 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2548 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2610 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2549 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2611 if (Dest->getType() == IceType_i1) 2550 if (Dest->getType() == IceType_i1)
2612 _and(T_2, Ctx->getConstantInt1(1)); 2551 _and(T_2, Ctx->getConstantInt1(1));
2613 _mov(Dest, T_2); 2552 _mov(Dest, T_2);
2614 } 2553 }
2615 break; 2554 break;
2616 case InstCast::Sitofp: 2555 case InstCast::Sitofp:
2617 if (isVectorType(Dest->getType())) { 2556 if (isVectorType(Dest->getType())) {
2618 assert(Dest->getType() == IceType_v4f32 && 2557 assert(Dest->getType() == IceType_v4f32 &&
2619 Inst->getSrc(0)->getType() == IceType_v4i32); 2558 Inst->getSrc(0)->getType() == IceType_v4i32);
2620 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2559 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2621 if (llvm::isa<OperandX8632Mem>(Src0RM)) 2560 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2622 Src0RM = legalizeToVar(Src0RM); 2561 Src0RM = legalizeToVar(Src0RM);
2623 Variable *T = makeReg(Dest->getType()); 2562 Variable *T = makeReg(Dest->getType());
2624 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); 2563 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2625 _movp(Dest, T); 2564 _movp(Dest, T);
2626 } else if (Inst->getSrc(0)->getType() == IceType_i64) { 2565 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
2627 // Use a helper for x86-32. 2566 // Use a helper for x86-32.
2628 const SizeT MaxSrcs = 1; 2567 const SizeT MaxSrcs = 1;
2629 Type DestType = Dest->getType(); 2568 Type DestType = Dest->getType();
2630 InstCall *Call = 2569 InstCall *Call =
2631 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2570 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2632 : H_sitofp_i64_f64, 2571 : H_sitofp_i64_f64,
2633 Dest, MaxSrcs); 2572 Dest, MaxSrcs);
2634 // TODO: Call the correct compiler-rt helper function. 2573 // TODO: Call the correct compiler-rt helper function.
2635 Call->addArg(Inst->getSrc(0)); 2574 Call->addArg(Inst->getSrc(0));
2636 lowerCall(Call); 2575 lowerCall(Call);
2637 return; 2576 return;
2638 } else { 2577 } else {
2639 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2578 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2640 // Sign-extend the operand. 2579 // Sign-extend the operand.
2641 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2580 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2642 Variable *T_1 = makeReg(IceType_i32); 2581 Variable *T_1 = makeReg(IceType_i32);
2643 Variable *T_2 = makeReg(Dest->getType()); 2582 Variable *T_2 = makeReg(Dest->getType());
2644 if (Src0RM->getType() == IceType_i32) 2583 if (Src0RM->getType() == IceType_i32)
2645 _mov(T_1, Src0RM); 2584 _mov(T_1, Src0RM);
2646 else 2585 else
2647 _movsx(T_1, Src0RM); 2586 _movsx(T_1, Src0RM);
2648 _cvt(T_2, T_1, InstX8632Cvt::Si2ss); 2587 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2649 _mov(Dest, T_2); 2588 _mov(Dest, T_2);
2650 } 2589 }
2651 break; 2590 break;
2652 case InstCast::Uitofp: { 2591 case InstCast::Uitofp: {
2653 Operand *Src0 = Inst->getSrc(0); 2592 Operand *Src0 = Inst->getSrc(0);
2654 if (isVectorType(Src0->getType())) { 2593 if (isVectorType(Src0->getType())) {
2655 assert(Dest->getType() == IceType_v4f32 && 2594 assert(Dest->getType() == IceType_v4f32 &&
2656 Src0->getType() == IceType_v4i32); 2595 Src0->getType() == IceType_v4i32);
2657 const SizeT MaxSrcs = 1; 2596 const SizeT MaxSrcs = 1;
2658 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2597 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
(...skipping 20 matching lines...) Expand all
2679 } else { 2618 } else {
2680 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2619 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2681 // Zero-extend the operand. 2620 // Zero-extend the operand.
2682 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2621 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2683 Variable *T_1 = makeReg(IceType_i32); 2622 Variable *T_1 = makeReg(IceType_i32);
2684 Variable *T_2 = makeReg(Dest->getType()); 2623 Variable *T_2 = makeReg(Dest->getType());
2685 if (Src0RM->getType() == IceType_i32) 2624 if (Src0RM->getType() == IceType_i32)
2686 _mov(T_1, Src0RM); 2625 _mov(T_1, Src0RM);
2687 else 2626 else
2688 _movzx(T_1, Src0RM); 2627 _movzx(T_1, Src0RM);
2689 _cvt(T_2, T_1, InstX8632Cvt::Si2ss); 2628 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2690 _mov(Dest, T_2); 2629 _mov(Dest, T_2);
2691 } 2630 }
2692 break; 2631 break;
2693 } 2632 }
2694 case InstCast::Bitcast: { 2633 case InstCast::Bitcast: {
2695 Operand *Src0 = Inst->getSrc(0); 2634 Operand *Src0 = Inst->getSrc(0);
2696 if (Dest->getType() == Src0->getType()) { 2635 if (Dest->getType() == Src0->getType()) {
2697 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); 2636 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2698 lowerAssign(Assign); 2637 lowerAssign(Assign);
2699 return; 2638 return;
(...skipping 21 matching lines...) Expand all
2721 (void)DestType; 2660 (void)DestType;
2722 assert((DestType == IceType_i32 && SrcType == IceType_f32) || 2661 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2723 (DestType == IceType_f32 && SrcType == IceType_i32)); 2662 (DestType == IceType_f32 && SrcType == IceType_i32));
2724 // a.i32 = bitcast b.f32 ==> 2663 // a.i32 = bitcast b.f32 ==>
2725 // t.f32 = b.f32 2664 // t.f32 = b.f32
2726 // s.f32 = spill t.f32 2665 // s.f32 = spill t.f32
2727 // a.i32 = s.f32 2666 // a.i32 = s.f32
2728 Variable *T = nullptr; 2667 Variable *T = nullptr;
2729 // TODO: Should be able to force a spill setup by calling legalize() with 2668 // TODO: Should be able to force a spill setup by calling legalize() with
2730 // Legal_Mem and not Legal_Reg or Legal_Imm. 2669 // Legal_Mem and not Legal_Reg or Legal_Imm.
2731 SpillVariable *SpillVar = 2670 typename Traits::SpillVariable *SpillVar =
2732 Func->template makeVariable<SpillVariable>(SrcType); 2671 Func->template makeVariable<typename Traits::SpillVariable>(SrcType);
2733 SpillVar->setLinkedTo(Dest); 2672 SpillVar->setLinkedTo(Dest);
2734 Variable *Spill = SpillVar; 2673 Variable *Spill = SpillVar;
2735 Spill->setWeight(RegWeight::Zero); 2674 Spill->setWeight(RegWeight::Zero);
2736 _mov(T, Src0RM); 2675 _mov(T, Src0RM);
2737 _mov(Spill, T); 2676 _mov(Spill, T);
2738 _mov(Dest, Spill); 2677 _mov(Dest, Spill);
2739 } break; 2678 } break;
2740 case IceType_i64: { 2679 case IceType_i64: {
2741 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2680 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2742 assert(Src0RM->getType() == IceType_f64); 2681 assert(Src0RM->getType() == IceType_f64);
2743 // a.i64 = bitcast b.f64 ==> 2682 // a.i64 = bitcast b.f64 ==>
2744 // s.f64 = spill b.f64 2683 // s.f64 = spill b.f64
2745 // t_lo.i32 = lo(s.f64) 2684 // t_lo.i32 = lo(s.f64)
2746 // a_lo.i32 = t_lo.i32 2685 // a_lo.i32 = t_lo.i32
2747 // t_hi.i32 = hi(s.f64) 2686 // t_hi.i32 = hi(s.f64)
2748 // a_hi.i32 = t_hi.i32 2687 // a_hi.i32 = t_hi.i32
2749 Operand *SpillLo, *SpillHi; 2688 Operand *SpillLo, *SpillHi;
2750 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { 2689 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2751 SpillVariable *SpillVar = 2690 typename Traits::SpillVariable *SpillVar =
2752 Func->template makeVariable<SpillVariable>(IceType_f64); 2691 Func->template makeVariable<typename Traits::SpillVariable>(
2692 IceType_f64);
2753 SpillVar->setLinkedTo(Src0Var); 2693 SpillVar->setLinkedTo(Src0Var);
2754 Variable *Spill = SpillVar; 2694 Variable *Spill = SpillVar;
2755 Spill->setWeight(RegWeight::Zero); 2695 Spill->setWeight(RegWeight::Zero);
2756 _movq(Spill, Src0RM); 2696 _movq(Spill, Src0RM);
2757 SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low); 2697 SpillLo = Traits::VariableSplit::create(Func, Spill,
2758 SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High); 2698 Traits::VariableSplit::Low);
2699 SpillHi = Traits::VariableSplit::create(Func, Spill,
2700 Traits::VariableSplit::High);
2759 } else { 2701 } else {
2760 SpillLo = loOperand(Src0RM); 2702 SpillLo = loOperand(Src0RM);
2761 SpillHi = hiOperand(Src0RM); 2703 SpillHi = hiOperand(Src0RM);
2762 } 2704 }
2763 2705
2764 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2706 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2765 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2707 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2766 Variable *T_Lo = makeReg(IceType_i32); 2708 Variable *T_Lo = makeReg(IceType_i32);
2767 Variable *T_Hi = makeReg(IceType_i32); 2709 Variable *T_Hi = makeReg(IceType_i32);
2768 2710
2769 _mov(T_Lo, SpillLo); 2711 _mov(T_Lo, SpillLo);
2770 _mov(DestLo, T_Lo); 2712 _mov(DestLo, T_Lo);
2771 _mov(T_Hi, SpillHi); 2713 _mov(T_Hi, SpillHi);
2772 _mov(DestHi, T_Hi); 2714 _mov(DestHi, T_Hi);
2773 } break; 2715 } break;
2774 case IceType_f64: { 2716 case IceType_f64: {
2775 Src0 = legalize(Src0); 2717 Src0 = legalize(Src0);
2776 assert(Src0->getType() == IceType_i64); 2718 assert(Src0->getType() == IceType_i64);
2777 if (llvm::isa<OperandX8632Mem>(Src0)) { 2719 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2778 Variable *T = Func->template makeVariable(Dest->getType()); 2720 Variable *T = Func->template makeVariable(Dest->getType());
2779 _movq(T, Src0); 2721 _movq(T, Src0);
2780 _movq(Dest, T); 2722 _movq(Dest, T);
2781 break; 2723 break;
2782 } 2724 }
2783 // a.f64 = bitcast b.i64 ==> 2725 // a.f64 = bitcast b.i64 ==>
2784 // t_lo.i32 = b_lo.i32 2726 // t_lo.i32 = b_lo.i32
2785 // FakeDef(s.f64) 2727 // FakeDef(s.f64)
2786 // lo(s.f64) = t_lo.i32 2728 // lo(s.f64) = t_lo.i32
2787 // t_hi.i32 = b_hi.i32 2729 // t_hi.i32 = b_hi.i32
2788 // hi(s.f64) = t_hi.i32 2730 // hi(s.f64) = t_hi.i32
2789 // a.f64 = s.f64 2731 // a.f64 = s.f64
2790 SpillVariable *SpillVar = 2732 typename Traits::SpillVariable *SpillVar =
2791 Func->template makeVariable<SpillVariable>(IceType_f64); 2733 Func->template makeVariable<typename Traits::SpillVariable>(
2734 IceType_f64);
2792 SpillVar->setLinkedTo(Dest); 2735 SpillVar->setLinkedTo(Dest);
2793 Variable *Spill = SpillVar; 2736 Variable *Spill = SpillVar;
2794 Spill->setWeight(RegWeight::Zero); 2737 Spill->setWeight(RegWeight::Zero);
2795 2738
2796 Variable *T_Lo = nullptr, *T_Hi = nullptr; 2739 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2797 VariableSplit *SpillLo = 2740 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2798 VariableSplit::create(Func, Spill, VariableSplit::Low); 2741 Func, Spill, Traits::VariableSplit::Low);
2799 VariableSplit *SpillHi = 2742 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2800 VariableSplit::create(Func, Spill, VariableSplit::High); 2743 Func, Spill, Traits::VariableSplit::High);
2801 _mov(T_Lo, loOperand(Src0)); 2744 _mov(T_Lo, loOperand(Src0));
2802 // Technically, the Spill is defined after the _store happens, but 2745 // Technically, the Spill is defined after the _store happens, but
2803 // SpillLo is considered a "use" of Spill so define Spill before it 2746 // SpillLo is considered a "use" of Spill so define Spill before it
2804 // is used. 2747 // is used.
2805 Context.insert(InstFakeDef::create(Func, Spill)); 2748 Context.insert(InstFakeDef::create(Func, Spill));
2806 _store(T_Lo, SpillLo); 2749 _store(T_Lo, SpillLo);
2807 _mov(T_Hi, hiOperand(Src0)); 2750 _mov(T_Hi, hiOperand(Src0));
2808 _store(T_Hi, SpillHi); 2751 _store(T_Hi, SpillHi);
2809 _movq(Dest, Spill); 2752 _movq(Dest, Spill);
2810 } break; 2753 } break;
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
2890 // Spill the value to a stack slot and do the extraction in memory. 2833 // Spill the value to a stack slot and do the extraction in memory.
2891 // 2834 //
2892 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2835 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2893 // support for legalizing to mem is implemented. 2836 // support for legalizing to mem is implemented.
2894 Variable *Slot = Func->template makeVariable(Ty); 2837 Variable *Slot = Func->template makeVariable(Ty);
2895 Slot->setWeight(RegWeight::Zero); 2838 Slot->setWeight(RegWeight::Zero);
2896 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2839 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
2897 2840
2898 // Compute the location of the element in memory. 2841 // Compute the location of the element in memory.
2899 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 2842 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2900 OperandX8632Mem *Loc = 2843 typename Traits::X86OperandMem *Loc =
2901 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 2844 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2902 _mov(ExtractedElementR, Loc); 2845 _mov(ExtractedElementR, Loc);
2903 } 2846 }
2904 2847
2905 if (ElementTy == IceType_i1) { 2848 if (ElementTy == IceType_i1) {
2906 // Truncate extracted integers to i1s if necessary. 2849 // Truncate extracted integers to i1s if necessary.
2907 Variable *T = makeReg(IceType_i1); 2850 Variable *T = makeReg(IceType_i1);
2908 InstCast *Cast = 2851 InstCast *Cast =
2909 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); 2852 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
2910 lowerCast(Cast); 2853 lowerCast(Cast);
(...skipping 25 matching lines...) Expand all
2936 Variable *T = nullptr; 2879 Variable *T = nullptr;
2937 2880
2938 if (Condition == InstFcmp::True) { 2881 if (Condition == InstFcmp::True) {
2939 // makeVectorOfOnes() requires an integer vector type. 2882 // makeVectorOfOnes() requires an integer vector type.
2940 T = makeVectorOfMinusOnes(IceType_v4i32); 2883 T = makeVectorOfMinusOnes(IceType_v4i32);
2941 } else if (Condition == InstFcmp::False) { 2884 } else if (Condition == InstFcmp::False) {
2942 T = makeVectorOfZeros(Dest->getType()); 2885 T = makeVectorOfZeros(Dest->getType());
2943 } else { 2886 } else {
2944 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2887 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2945 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2888 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2946 if (llvm::isa<OperandX8632Mem>(Src1RM)) 2889 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2947 Src1RM = legalizeToVar(Src1RM); 2890 Src1RM = legalizeToVar(Src1RM);
2948 2891
2949 switch (Condition) { 2892 switch (Condition) {
2950 default: { 2893 default: {
2951 typename Traits::Cond::CmppsCond Predicate = 2894 typename Traits::Cond::CmppsCond Predicate =
2952 Traits::TableFcmp[Index].Predicate; 2895 Traits::TableFcmp[Index].Predicate;
2953 assert(Predicate != Traits::Cond::Cmpps_Invalid); 2896 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2954 T = makeReg(Src0RM->getType()); 2897 T = makeReg(Src0RM->getType());
2955 _movp(T, Src0RM); 2898 _movp(T, Src0RM);
2956 _cmpps(T, Src1RM, Predicate); 2899 _cmpps(T, Src1RM, Predicate);
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
3011 _ucomiss(T, Src1RM); 2954 _ucomiss(T, Src1RM);
3012 if (!HasC2) { 2955 if (!HasC2) {
3013 assert(Traits::TableFcmp[Index].Default); 2956 assert(Traits::TableFcmp[Index].Default);
3014 _setcc(Dest, Traits::TableFcmp[Index].C1); 2957 _setcc(Dest, Traits::TableFcmp[Index].C1);
3015 return; 2958 return;
3016 } 2959 }
3017 } 2960 }
3018 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default); 2961 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
3019 _mov(Dest, Default); 2962 _mov(Dest, Default);
3020 if (HasC1) { 2963 if (HasC1) {
3021 InstX8632Label *Label = InstX8632Label::create(Func, this); 2964 typename Traits::Insts::Label *Label =
2965 Traits::Insts::Label::create(Func, this);
3022 _br(Traits::TableFcmp[Index].C1, Label); 2966 _br(Traits::TableFcmp[Index].C1, Label);
3023 if (HasC2) { 2967 if (HasC2) {
3024 _br(Traits::TableFcmp[Index].C2, Label); 2968 _br(Traits::TableFcmp[Index].C2, Label);
3025 } 2969 }
3026 Constant *NonDefault = 2970 Constant *NonDefault =
3027 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default); 2971 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
3028 _mov_nonkillable(Dest, NonDefault); 2972 _mov_nonkillable(Dest, NonDefault);
3029 Context.insert(Label); 2973 Context.insert(Label);
3030 } 2974 }
3031 } 2975 }
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
3084 Src0RM = T0; 3028 Src0RM = T0;
3085 Src1RM = T1; 3029 Src1RM = T1;
3086 } 3030 }
3087 3031
3088 Variable *T = makeReg(Ty); 3032 Variable *T = makeReg(Ty);
3089 switch (Condition) { 3033 switch (Condition) {
3090 default: 3034 default:
3091 llvm_unreachable("unexpected condition"); 3035 llvm_unreachable("unexpected condition");
3092 break; 3036 break;
3093 case InstIcmp::Eq: { 3037 case InstIcmp::Eq: {
3094 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3038 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3095 Src1RM = legalizeToVar(Src1RM); 3039 Src1RM = legalizeToVar(Src1RM);
3096 _movp(T, Src0RM); 3040 _movp(T, Src0RM);
3097 _pcmpeq(T, Src1RM); 3041 _pcmpeq(T, Src1RM);
3098 } break; 3042 } break;
3099 case InstIcmp::Ne: { 3043 case InstIcmp::Ne: {
3100 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3044 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3101 Src1RM = legalizeToVar(Src1RM); 3045 Src1RM = legalizeToVar(Src1RM);
3102 _movp(T, Src0RM); 3046 _movp(T, Src0RM);
3103 _pcmpeq(T, Src1RM); 3047 _pcmpeq(T, Src1RM);
3104 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 3048 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3105 _pxor(T, MinusOne); 3049 _pxor(T, MinusOne);
3106 } break; 3050 } break;
3107 case InstIcmp::Ugt: 3051 case InstIcmp::Ugt:
3108 case InstIcmp::Sgt: { 3052 case InstIcmp::Sgt: {
3109 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3053 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3110 Src1RM = legalizeToVar(Src1RM); 3054 Src1RM = legalizeToVar(Src1RM);
3111 _movp(T, Src0RM); 3055 _movp(T, Src0RM);
3112 _pcmpgt(T, Src1RM); 3056 _pcmpgt(T, Src1RM);
3113 } break; 3057 } break;
3114 case InstIcmp::Uge: 3058 case InstIcmp::Uge:
3115 case InstIcmp::Sge: { 3059 case InstIcmp::Sge: {
3116 // !(Src1RM > Src0RM) 3060 // !(Src1RM > Src0RM)
3117 if (llvm::isa<OperandX8632Mem>(Src0RM)) 3061 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
3118 Src0RM = legalizeToVar(Src0RM); 3062 Src0RM = legalizeToVar(Src0RM);
3119 _movp(T, Src1RM); 3063 _movp(T, Src1RM);
3120 _pcmpgt(T, Src0RM); 3064 _pcmpgt(T, Src0RM);
3121 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 3065 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3122 _pxor(T, MinusOne); 3066 _pxor(T, MinusOne);
3123 } break; 3067 } break;
3124 case InstIcmp::Ult: 3068 case InstIcmp::Ult:
3125 case InstIcmp::Slt: { 3069 case InstIcmp::Slt: {
3126 if (llvm::isa<OperandX8632Mem>(Src0RM)) 3070 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
3127 Src0RM = legalizeToVar(Src0RM); 3071 Src0RM = legalizeToVar(Src0RM);
3128 _movp(T, Src1RM); 3072 _movp(T, Src1RM);
3129 _pcmpgt(T, Src0RM); 3073 _pcmpgt(T, Src0RM);
3130 } break; 3074 } break;
3131 case InstIcmp::Ule: 3075 case InstIcmp::Ule:
3132 case InstIcmp::Sle: { 3076 case InstIcmp::Sle: {
3133 // !(Src0RM > Src1RM) 3077 // !(Src0RM > Src1RM)
3134 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3078 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3135 Src1RM = legalizeToVar(Src1RM); 3079 Src1RM = legalizeToVar(Src1RM);
3136 _movp(T, Src0RM); 3080 _movp(T, Src0RM);
3137 _pcmpgt(T, Src1RM); 3081 _pcmpgt(T, Src1RM);
3138 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 3082 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3139 _pxor(T, MinusOne); 3083 _pxor(T, MinusOne);
3140 } break; 3084 } break;
3141 } 3085 }
3142 3086
3143 _movp(Dest, T); 3087 _movp(Dest, T);
3144 eliminateNextVectorSextInstruction(Dest); 3088 eliminateNextVectorSextInstruction(Dest);
3145 return; 3089 return;
3146 } 3090 }
3147 3091
3148 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 3092 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
3149 if (Src0->getType() == IceType_i64) { 3093 if (Src0->getType() == IceType_i64) {
3150 InstIcmp::ICond Condition = Inst->getCondition(); 3094 InstIcmp::ICond Condition = Inst->getCondition();
3151 size_t Index = static_cast<size_t>(Condition); 3095 size_t Index = static_cast<size_t>(Condition);
3152 assert(Index < Traits::TableIcmp64Size); 3096 assert(Index < Traits::TableIcmp64Size);
3153 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 3097 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3154 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 3098 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3155 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 3099 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3156 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 3100 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
3157 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3101 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3158 Constant *One = Ctx->getConstantInt32(1); 3102 Constant *One = Ctx->getConstantInt32(1);
3159 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); 3103 typename Traits::Insts::Label *LabelFalse =
3160 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); 3104 Traits::Insts::Label::create(Func, this);
3105 typename Traits::Insts::Label *LabelTrue =
3106 Traits::Insts::Label::create(Func, this);
3161 _mov(Dest, One); 3107 _mov(Dest, One);
3162 _cmp(Src0HiRM, Src1HiRI); 3108 _cmp(Src0HiRM, Src1HiRI);
3163 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 3109 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
3164 _br(Traits::TableIcmp64[Index].C1, LabelTrue); 3110 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
3165 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 3111 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
3166 _br(Traits::TableIcmp64[Index].C2, LabelFalse); 3112 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
3167 _cmp(Src0LoRM, Src1LoRI); 3113 _cmp(Src0LoRM, Src1LoRI);
3168 _br(Traits::TableIcmp64[Index].C3, LabelTrue); 3114 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
3169 Context.insert(LabelFalse); 3115 Context.insert(LabelFalse);
3170 _mov_nonkillable(Dest, Zero); 3116 _mov_nonkillable(Dest, Zero);
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
3286 // memory. 3232 // memory.
3287 // 3233 //
3288 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 3234 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
3289 // support for legalizing to mem is implemented. 3235 // support for legalizing to mem is implemented.
3290 Variable *Slot = Func->template makeVariable(Ty); 3236 Variable *Slot = Func->template makeVariable(Ty);
3291 Slot->setWeight(RegWeight::Zero); 3237 Slot->setWeight(RegWeight::Zero);
3292 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 3238 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
3293 3239
3294 // Compute the location of the position to insert in memory. 3240 // Compute the location of the position to insert in memory.
3295 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 3241 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
3296 OperandX8632Mem *Loc = 3242 typename Traits::X86OperandMem *Loc =
3297 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 3243 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3298 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); 3244 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
3299 3245
3300 Variable *T = makeReg(Ty); 3246 Variable *T = makeReg(Ty);
3301 _movp(T, Slot); 3247 _movp(T, Slot);
3302 _movp(Inst->getDest(), T); 3248 _movp(Inst->getDest(), T);
3303 } 3249 }
3304 } 3250 }
3305 3251
3306 template <class Machine> 3252 template <class Machine>
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
3376 return; 3322 return;
3377 } 3323 }
3378 Variable *Dest = Instr->getDest(); 3324 Variable *Dest = Instr->getDest();
3379 if (Dest->getType() == IceType_i64) { 3325 if (Dest->getType() == IceType_i64) {
3380 // Follow what GCC does and use a movq instead of what lowerLoad() 3326 // Follow what GCC does and use a movq instead of what lowerLoad()
3381 // normally does (split the load into two). 3327 // normally does (split the load into two).
3382 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 3328 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
3383 // can't happen anyway, since this is x86-32 and integer arithmetic only 3329 // can't happen anyway, since this is x86-32 and integer arithmetic only
3384 // happens on 32-bit quantities. 3330 // happens on 32-bit quantities.
3385 Variable *T = makeReg(IceType_f64); 3331 Variable *T = makeReg(IceType_f64);
3386 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); 3332 typename Traits::X86OperandMem *Addr =
3333 formMemoryOperand(Instr->getArg(0), IceType_f64);
3387 _movq(T, Addr); 3334 _movq(T, Addr);
3388 // Then cast the bits back out of the XMM register to the i64 Dest. 3335 // Then cast the bits back out of the XMM register to the i64 Dest.
3389 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); 3336 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
3390 lowerCast(Cast); 3337 lowerCast(Cast);
3391 // Make sure that the atomic load isn't elided when unused. 3338 // Make sure that the atomic load isn't elided when unused.
3392 Context.insert(InstFakeUse::create(Func, Dest->getLo())); 3339 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
3393 Context.insert(InstFakeUse::create(Func, Dest->getHi())); 3340 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
3394 return; 3341 return;
3395 } 3342 }
3396 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); 3343 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
(...skipping 29 matching lines...) Expand all
3426 Operand *Value = Instr->getArg(0); 3373 Operand *Value = Instr->getArg(0);
3427 Operand *Ptr = Instr->getArg(1); 3374 Operand *Ptr = Instr->getArg(1);
3428 if (Value->getType() == IceType_i64) { 3375 if (Value->getType() == IceType_i64) {
3429 // Use a movq instead of what lowerStore() normally does 3376 // Use a movq instead of what lowerStore() normally does
3430 // (split the store into two), following what GCC does. 3377 // (split the store into two), following what GCC does.
3431 // Cast the bits from int -> to an xmm register first. 3378 // Cast the bits from int -> to an xmm register first.
3432 Variable *T = makeReg(IceType_f64); 3379 Variable *T = makeReg(IceType_f64);
3433 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); 3380 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3434 lowerCast(Cast); 3381 lowerCast(Cast);
3435 // Then store XMM w/ a movq. 3382 // Then store XMM w/ a movq.
3436 OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64); 3383 typename Traits::X86OperandMem *Addr =
3384 formMemoryOperand(Ptr, IceType_f64);
3437 _storeq(T, Addr); 3385 _storeq(T, Addr);
3438 _mfence(); 3386 _mfence();
3439 return; 3387 return;
3440 } 3388 }
3441 InstStore *Store = InstStore::create(Func, Value, Ptr); 3389 InstStore *Store = InstStore::create(Func, Value, Ptr);
3442 lowerStore(Store); 3390 lowerStore(Store);
3443 _mfence(); 3391 _mfence();
3444 return; 3392 return;
3445 } 3393 }
3446 case Intrinsics::Bswap: { 3394 case Intrinsics::Bswap: {
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
3528 return; 3476 return;
3529 } 3477 }
3530 case Intrinsics::Fabs: { 3478 case Intrinsics::Fabs: {
3531 Operand *Src = legalize(Instr->getArg(0)); 3479 Operand *Src = legalize(Instr->getArg(0));
3532 Type Ty = Src->getType(); 3480 Type Ty = Src->getType();
3533 Variable *Dest = Instr->getDest(); 3481 Variable *Dest = Instr->getDest();
3534 Variable *T = makeVectorOfFabsMask(Ty); 3482 Variable *T = makeVectorOfFabsMask(Ty);
3535 // The pand instruction operates on an m128 memory operand, so if 3483 // The pand instruction operates on an m128 memory operand, so if
3536 // Src is an f32 or f64, we need to make sure it's in a register. 3484 // Src is an f32 or f64, we need to make sure it's in a register.
3537 if (isVectorType(Ty)) { 3485 if (isVectorType(Ty)) {
3538 if (llvm::isa<OperandX8632Mem>(Src)) 3486 if (llvm::isa<typename Traits::X86OperandMem>(Src))
3539 Src = legalizeToVar(Src); 3487 Src = legalizeToVar(Src);
3540 } else { 3488 } else {
3541 Src = legalizeToVar(Src); 3489 Src = legalizeToVar(Src);
3542 } 3490 }
3543 _pand(T, Src); 3491 _pand(T, Src);
3544 if (isVectorType(Ty)) 3492 if (isVectorType(Ty))
3545 _movp(Dest, T); 3493 _movp(Dest, T);
3546 else 3494 else
3547 _mov(Dest, T); 3495 _mov(Dest, T);
3548 return; 3496 return;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
3583 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); 3531 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
3584 Call->addArg(Instr->getArg(0)); 3532 Call->addArg(Instr->getArg(0));
3585 Call->addArg(ValExt); 3533 Call->addArg(ValExt);
3586 Call->addArg(Instr->getArg(2)); 3534 Call->addArg(Instr->getArg(2));
3587 lowerCall(Call); 3535 lowerCall(Call);
3588 return; 3536 return;
3589 } 3537 }
3590 case Intrinsics::NaClReadTP: { 3538 case Intrinsics::NaClReadTP: {
3591 if (Ctx->getFlags().getUseSandboxing()) { 3539 if (Ctx->getFlags().getUseSandboxing()) {
3592 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3540 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3593 Operand *Src = 3541 Operand *Src = Traits::X86OperandMem::create(
3594 OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0, 3542 Func, IceType_i32, nullptr, Zero, nullptr, 0,
3595 OperandX8632Mem::SegReg_GS); 3543 Traits::X86OperandMem::SegReg_GS);
3596 Variable *Dest = Instr->getDest(); 3544 Variable *Dest = Instr->getDest();
3597 Variable *T = nullptr; 3545 Variable *T = nullptr;
3598 _mov(T, Src); 3546 _mov(T, Src);
3599 _mov(Dest, T); 3547 _mov(Dest, T);
3600 } else { 3548 } else {
3601 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); 3549 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
3602 lowerCall(Call); 3550 lowerCall(Call);
3603 } 3551 }
3604 return; 3552 return;
3605 } 3553 }
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
3648 // Reserve the pre-colored registers first, before adding any more 3596 // Reserve the pre-colored registers first, before adding any more
3649 // infinite-weight variables from formMemoryOperand's legalization. 3597 // infinite-weight variables from formMemoryOperand's legalization.
3650 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3598 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3651 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3599 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3652 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3600 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3653 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3601 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3654 _mov(T_eax, loOperand(Expected)); 3602 _mov(T_eax, loOperand(Expected));
3655 _mov(T_edx, hiOperand(Expected)); 3603 _mov(T_edx, hiOperand(Expected));
3656 _mov(T_ebx, loOperand(Desired)); 3604 _mov(T_ebx, loOperand(Desired));
3657 _mov(T_ecx, hiOperand(Desired)); 3605 _mov(T_ecx, hiOperand(Desired));
3658 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3606 typename Traits::X86OperandMem *Addr =
3607 formMemoryOperand(Ptr, Expected->getType());
3659 const bool Locked = true; 3608 const bool Locked = true;
3660 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3609 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3661 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3610 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3662 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3611 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3663 _mov(DestLo, T_eax); 3612 _mov(DestLo, T_eax);
3664 _mov(DestHi, T_edx); 3613 _mov(DestHi, T_edx);
3665 return; 3614 return;
3666 } 3615 }
3667 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); 3616 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
3668 _mov(T_eax, Expected); 3617 _mov(T_eax, Expected);
3669 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3618 typename Traits::X86OperandMem *Addr =
3619 formMemoryOperand(Ptr, Expected->getType());
3670 Variable *DesiredReg = legalizeToVar(Desired); 3620 Variable *DesiredReg = legalizeToVar(Desired);
3671 const bool Locked = true; 3621 const bool Locked = true;
3672 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3622 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3673 _mov(DestPrev, T_eax); 3623 _mov(DestPrev, T_eax);
3674 } 3624 }
3675 3625
3676 template <class Machine> 3626 template <class Machine>
3677 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, 3627 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3678 Operand *PtrToMem, 3628 Operand *PtrToMem,
3679 Operand *Expected, 3629 Operand *Expected,
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
3761 return; 3711 return;
3762 case Intrinsics::AtomicAdd: { 3712 case Intrinsics::AtomicAdd: {
3763 if (Dest->getType() == IceType_i64) { 3713 if (Dest->getType() == IceType_i64) {
3764 // All the fall-through paths must set this to true, but use this 3714 // All the fall-through paths must set this to true, but use this
3765 // for asserting. 3715 // for asserting.
3766 NeedsCmpxchg = true; 3716 NeedsCmpxchg = true;
3767 Op_Lo = &TargetX86Base<Machine>::_add; 3717 Op_Lo = &TargetX86Base<Machine>::_add;
3768 Op_Hi = &TargetX86Base<Machine>::_adc; 3718 Op_Hi = &TargetX86Base<Machine>::_adc;
3769 break; 3719 break;
3770 } 3720 }
3771 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3721 typename Traits::X86OperandMem *Addr =
3722 formMemoryOperand(Ptr, Dest->getType());
3772 const bool Locked = true; 3723 const bool Locked = true;
3773 Variable *T = nullptr; 3724 Variable *T = nullptr;
3774 _mov(T, Val); 3725 _mov(T, Val);
3775 _xadd(Addr, T, Locked); 3726 _xadd(Addr, T, Locked);
3776 _mov(Dest, T); 3727 _mov(Dest, T);
3777 return; 3728 return;
3778 } 3729 }
3779 case Intrinsics::AtomicSub: { 3730 case Intrinsics::AtomicSub: {
3780 if (Dest->getType() == IceType_i64) { 3731 if (Dest->getType() == IceType_i64) {
3781 NeedsCmpxchg = true; 3732 NeedsCmpxchg = true;
3782 Op_Lo = &TargetX86Base<Machine>::_sub; 3733 Op_Lo = &TargetX86Base<Machine>::_sub;
3783 Op_Hi = &TargetX86Base<Machine>::_sbb; 3734 Op_Hi = &TargetX86Base<Machine>::_sbb;
3784 break; 3735 break;
3785 } 3736 }
3786 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3737 typename Traits::X86OperandMem *Addr =
3738 formMemoryOperand(Ptr, Dest->getType());
3787 const bool Locked = true; 3739 const bool Locked = true;
3788 Variable *T = nullptr; 3740 Variable *T = nullptr;
3789 _mov(T, Val); 3741 _mov(T, Val);
3790 _neg(T); 3742 _neg(T);
3791 _xadd(Addr, T, Locked); 3743 _xadd(Addr, T, Locked);
3792 _mov(Dest, T); 3744 _mov(Dest, T);
3793 return; 3745 return;
3794 } 3746 }
3795 case Intrinsics::AtomicOr: 3747 case Intrinsics::AtomicOr:
3796 // TODO(jvoung): If Dest is null or dead, then some of these 3748 // TODO(jvoung): If Dest is null or dead, then some of these
(...skipping 17 matching lines...) Expand all
3814 break; 3766 break;
3815 case Intrinsics::AtomicExchange: 3767 case Intrinsics::AtomicExchange:
3816 if (Dest->getType() == IceType_i64) { 3768 if (Dest->getType() == IceType_i64) {
3817 NeedsCmpxchg = true; 3769 NeedsCmpxchg = true;
3818 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3770 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3819 // just need to be moved to the ecx and ebx registers. 3771 // just need to be moved to the ecx and ebx registers.
3820 Op_Lo = nullptr; 3772 Op_Lo = nullptr;
3821 Op_Hi = nullptr; 3773 Op_Hi = nullptr;
3822 break; 3774 break;
3823 } 3775 }
3824 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3776 typename Traits::X86OperandMem *Addr =
3777 formMemoryOperand(Ptr, Dest->getType());
3825 Variable *T = nullptr; 3778 Variable *T = nullptr;
3826 _mov(T, Val); 3779 _mov(T, Val);
3827 _xchg(Addr, T); 3780 _xchg(Addr, T);
3828 _mov(Dest, T); 3781 _mov(Dest, T);
3829 return; 3782 return;
3830 } 3783 }
3831 // Otherwise, we need a cmpxchg loop. 3784 // Otherwise, we need a cmpxchg loop.
3832 (void)NeedsCmpxchg; 3785 (void)NeedsCmpxchg;
3833 assert(NeedsCmpxchg); 3786 assert(NeedsCmpxchg);
3834 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); 3787 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
(...skipping 27 matching lines...) Expand all
3862 // lock cmpxchg [ptr], <reg> 3815 // lock cmpxchg [ptr], <reg>
3863 // jne .LABEL 3816 // jne .LABEL
3864 // mov <dest>, eax 3817 // mov <dest>, eax
3865 // 3818 //
3866 // If Op_{Lo,Hi} are nullptr, then just copy the value. 3819 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3867 Val = legalize(Val); 3820 Val = legalize(Val);
3868 Type Ty = Val->getType(); 3821 Type Ty = Val->getType();
3869 if (Ty == IceType_i64) { 3822 if (Ty == IceType_i64) {
3870 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3823 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3871 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3824 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3872 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); 3825 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3873 _mov(T_eax, loOperand(Addr)); 3826 _mov(T_eax, loOperand(Addr));
3874 _mov(T_edx, hiOperand(Addr)); 3827 _mov(T_edx, hiOperand(Addr));
3875 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3828 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3876 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3829 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3877 InstX8632Label *Label = InstX8632Label::create(Func, this); 3830 typename Traits::Insts::Label *Label =
3831 Traits::Insts::Label::create(Func, this);
3878 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; 3832 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3879 if (!IsXchg8b) { 3833 if (!IsXchg8b) {
3880 Context.insert(Label); 3834 Context.insert(Label);
3881 _mov(T_ebx, T_eax); 3835 _mov(T_ebx, T_eax);
3882 (this->*Op_Lo)(T_ebx, loOperand(Val)); 3836 (this->*Op_Lo)(T_ebx, loOperand(Val));
3883 _mov(T_ecx, T_edx); 3837 _mov(T_ecx, T_edx);
3884 (this->*Op_Hi)(T_ecx, hiOperand(Val)); 3838 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3885 } else { 3839 } else {
3886 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. 3840 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3887 // It just needs the Val loaded into ebx and ecx. 3841 // It just needs the Val loaded into ebx and ecx.
(...skipping 21 matching lines...) Expand all
3909 } 3863 }
3910 // The address base (if any) is also reused in the loop. 3864 // The address base (if any) is also reused in the loop.
3911 if (Variable *Base = Addr->getBase()) 3865 if (Variable *Base = Addr->getBase())
3912 Context.insert(InstFakeUse::create(Func, Base)); 3866 Context.insert(InstFakeUse::create(Func, Base));
3913 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3867 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3914 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3868 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3915 _mov(DestLo, T_eax); 3869 _mov(DestLo, T_eax);
3916 _mov(DestHi, T_edx); 3870 _mov(DestHi, T_edx);
3917 return; 3871 return;
3918 } 3872 }
3919 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); 3873 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3920 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); 3874 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
3921 _mov(T_eax, Addr); 3875 _mov(T_eax, Addr);
3922 InstX8632Label *Label = InstX8632Label::create(Func, this); 3876 typename Traits::Insts::Label *Label =
3877 Traits::Insts::Label::create(Func, this);
3923 Context.insert(Label); 3878 Context.insert(Label);
3924 // We want to pick a different register for T than Eax, so don't use 3879 // We want to pick a different register for T than Eax, so don't use
3925 // _mov(T == nullptr, T_eax). 3880 // _mov(T == nullptr, T_eax).
3926 Variable *T = makeReg(Ty); 3881 Variable *T = makeReg(Ty);
3927 _mov(T, T_eax); 3882 _mov(T, T_eax);
3928 (this->*Op_Lo)(T, Val); 3883 (this->*Op_Lo)(T, Val);
3929 const bool Locked = true; 3884 const bool Locked = true;
3930 _cmpxchg(Addr, T_eax, T, Locked); 3885 _cmpxchg(Addr, T_eax, T, Locked);
3931 _br(Traits::Cond::Br_ne, Label); 3886 _br(Traits::Cond::Br_ne, Label);
3932 // If Val is a variable, model the extended live range of Val through 3887 // If Val is a variable, model the extended live range of Val through
(...skipping 320 matching lines...) Expand 10 before | Expand all | Expand 10 after
4253 // Index is Index=Var-Const ==> 4208 // Index is Index=Var-Const ==>
4254 // set Index=Var, Offset-=(Const<<Shift) 4209 // set Index=Var, Offset-=(Const<<Shift)
4255 4210
4256 // TODO: consider overflow issues with respect to Offset. 4211 // TODO: consider overflow issues with respect to Offset.
4257 // TODO: handle symbolic constants. 4212 // TODO: handle symbolic constants.
4258 } 4213 }
4259 } 4214 }
4260 4215
4261 template <class Machine> 4216 template <class Machine>
4262 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { 4217 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
4263 // A Load instruction can be treated the same as an Assign 4218 // A Load instruction can be treated the same as an Assign instruction, after
4264 // instruction, after the source operand is transformed into an 4219 // the source operand is transformed into an Traits::X86OperandMem operand.
4265 // OperandX8632Mem operand. Note that the address mode 4220 // Note that the address mode optimization already creates an
4266 // optimization already creates an OperandX8632Mem operand, so it 4221 // Traits::X86OperandMem operand, so it doesn't need another level of
4267 // doesn't need another level of transformation. 4222 // transformation.
4268 Variable *DestLoad = Load->getDest(); 4223 Variable *DestLoad = Load->getDest();
4269 Type Ty = DestLoad->getType(); 4224 Type Ty = DestLoad->getType();
4270 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 4225 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
4271 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 4226 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
4272 lowerAssign(Assign); 4227 lowerAssign(Assign);
4273 } 4228 }
4274 4229
4275 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { 4230 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4276 Inst *Inst = Context.getCur(); 4231 Inst *Inst = Context.getCur();
4277 Variable *Dest = Inst->getDest(); 4232 Variable *Dest = Inst->getDest();
4278 Operand *Addr = Inst->getSrc(0); 4233 Operand *Addr = Inst->getSrc(0);
4279 Variable *Index = nullptr; 4234 Variable *Index = nullptr;
4280 uint16_t Shift = 0; 4235 uint16_t Shift = 0;
4281 int32_t Offset = 0; // TODO: make Constant 4236 int32_t Offset = 0; // TODO: make Constant
4282 // Vanilla ICE load instructions should not use the segment registers, 4237 // Vanilla ICE load instructions should not use the segment registers, and
4283 // and computeAddressOpt only works at the level of Variables and Constants, 4238 // computeAddressOpt only works at the level of Variables and Constants, not
4284 // not other OperandX8632Mem, so there should be no mention of segment 4239 // other Traits::X86OperandMem, so there should be no mention of segment
4285 // registers there either. 4240 // registers there either.
4286 const OperandX8632Mem::SegmentRegisters SegmentReg = 4241 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4287 OperandX8632Mem::DefaultSegment; 4242 Traits::X86OperandMem::DefaultSegment;
4288 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4243 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4289 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4244 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4290 if (Base && Addr != Base) { 4245 if (Base && Addr != Base) {
4291 Inst->setDeleted(); 4246 Inst->setDeleted();
4292 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4247 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4293 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 4248 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
4294 Shift, SegmentReg); 4249 Index, Shift, SegmentReg);
4295 Context.insert(InstLoad::create(Func, Dest, Addr)); 4250 Context.insert(InstLoad::create(Func, Dest, Addr));
4296 } 4251 }
4297 } 4252 }
4298 4253
4299 template <class Machine> 4254 template <class Machine>
4300 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) { 4255 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {
4301 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 4256 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4302 if (RNG.getTrueWithProbability(Probability)) { 4257 if (RNG.getTrueWithProbability(Probability)) {
4303 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS)); 4258 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));
4304 } 4259 }
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
4431 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); 4386 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4432 } 4387 }
4433 assert(CmpOpnd0); 4388 assert(CmpOpnd0);
4434 assert(CmpOpnd1); 4389 assert(CmpOpnd1);
4435 4390
4436 _cmp(CmpOpnd0, CmpOpnd1); 4391 _cmp(CmpOpnd0, CmpOpnd1);
4437 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { 4392 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4438 // The cmov instruction doesn't allow 8-bit or FP operands, so 4393 // The cmov instruction doesn't allow 8-bit or FP operands, so
4439 // we need explicit control flow. 4394 // we need explicit control flow.
4440 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: 4395 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4441 InstX8632Label *Label = InstX8632Label::create(Func, this); 4396 typename Traits::Insts::Label *Label =
4397 Traits::Insts::Label::create(Func, this);
4442 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); 4398 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4443 _mov(Dest, SrcT); 4399 _mov(Dest, SrcT);
4444 _br(Cond, Label); 4400 _br(Cond, Label);
4445 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); 4401 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4446 _mov_nonkillable(Dest, SrcF); 4402 _mov_nonkillable(Dest, SrcF);
4447 Context.insert(Label); 4403 Context.insert(Label);
4448 return; 4404 return;
4449 } 4405 }
4450 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4406 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4451 // But if SrcT is immediate, we might be able to do better, as 4407 // But if SrcT is immediate, we might be able to do better, as
4452 // the cmov instruction doesn't allow an immediate operand: 4408 // the cmov instruction doesn't allow an immediate operand:
4453 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4409 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4454 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4410 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4455 std::swap(SrcT, SrcF); 4411 std::swap(SrcT, SrcF);
4456 Cond = InstX8632::getOppositeCondition(Cond); 4412 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4457 } 4413 }
4458 if (DestTy == IceType_i64) { 4414 if (DestTy == IceType_i64) {
4459 // Set the low portion. 4415 // Set the low portion.
4460 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4416 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4461 Variable *TLo = nullptr; 4417 Variable *TLo = nullptr;
4462 Operand *SrcFLo = legalize(loOperand(SrcF)); 4418 Operand *SrcFLo = legalize(loOperand(SrcF));
4463 _mov(TLo, SrcFLo); 4419 _mov(TLo, SrcFLo);
4464 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); 4420 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4465 _cmov(TLo, SrcTLo, Cond); 4421 _cmov(TLo, SrcTLo, Cond);
4466 _mov(DestLo, TLo); 4422 _mov(DestLo, TLo);
(...skipping 14 matching lines...) Expand all
4481 _mov(T, SrcF); 4437 _mov(T, SrcF);
4482 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4438 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4483 _cmov(T, SrcT, Cond); 4439 _cmov(T, SrcT, Cond);
4484 _mov(Dest, T); 4440 _mov(Dest, T);
4485 } 4441 }
4486 4442
4487 template <class Machine> 4443 template <class Machine>
4488 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4444 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4489 Operand *Value = Inst->getData(); 4445 Operand *Value = Inst->getData();
4490 Operand *Addr = Inst->getAddr(); 4446 Operand *Addr = Inst->getAddr();
4491 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 4447 typename Traits::X86OperandMem *NewAddr =
4448 formMemoryOperand(Addr, Value->getType());
4492 Type Ty = NewAddr->getType(); 4449 Type Ty = NewAddr->getType();
4493 4450
4494 if (Ty == IceType_i64) { 4451 if (Ty == IceType_i64) {
4495 Value = legalize(Value); 4452 Value = legalize(Value);
4496 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4453 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4497 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4454 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
4498 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); 4455 _store(ValueHi,
4499 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); 4456 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4457 _store(ValueLo,
4458 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
4500 } else if (isVectorType(Ty)) { 4459 } else if (isVectorType(Ty)) {
4501 _storep(legalizeToVar(Value), NewAddr); 4460 _storep(legalizeToVar(Value), NewAddr);
4502 } else { 4461 } else {
4503 Value = legalize(Value, Legal_Reg | Legal_Imm); 4462 Value = legalize(Value, Legal_Reg | Legal_Imm);
4504 _store(Value, NewAddr); 4463 _store(Value, NewAddr);
4505 } 4464 }
4506 } 4465 }
4507 4466
4508 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { 4467 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4509 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); 4468 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4510 Operand *Data = Inst->getData(); 4469 Operand *Data = Inst->getData();
4511 Operand *Addr = Inst->getAddr(); 4470 Operand *Addr = Inst->getAddr();
4512 Variable *Index = nullptr; 4471 Variable *Index = nullptr;
4513 uint16_t Shift = 0; 4472 uint16_t Shift = 0;
4514 int32_t Offset = 0; // TODO: make Constant 4473 int32_t Offset = 0; // TODO: make Constant
4515 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4474 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4516 // Vanilla ICE store instructions should not use the segment registers, 4475 // Vanilla ICE store instructions should not use the segment registers, and
4517 // and computeAddressOpt only works at the level of Variables and Constants, 4476 // computeAddressOpt only works at the level of Variables and Constants, not
4518 // not other OperandX8632Mem, so there should be no mention of segment 4477 // other Traits::X86OperandMem, so there should be no mention of segment
4519 // registers there either. 4478 // registers there either.
4520 const OperandX8632Mem::SegmentRegisters SegmentReg = 4479 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4521 OperandX8632Mem::DefaultSegment; 4480 Traits::X86OperandMem::DefaultSegment;
4522 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4481 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4523 if (Base && Addr != Base) { 4482 if (Base && Addr != Base) {
4524 Inst->setDeleted(); 4483 Inst->setDeleted();
4525 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4484 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4526 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4485 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
4527 Shift, SegmentReg); 4486 Index, Shift, SegmentReg);
4528 InstStore *NewStore = InstStore::create(Func, Data, Addr); 4487 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4529 if (Inst->getDest()) 4488 if (Inst->getDest())
4530 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4489 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4531 Context.insert(NewStore); 4490 Context.insert(NewStore);
4532 } 4491 }
4533 } 4492 }
4534 4493
4535 template <class Machine> 4494 template <class Machine>
4536 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { 4495 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4537 // This implements the most naive possible lowering. 4496 // This implements the most naive possible lowering.
4538 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4497 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4539 Operand *Src0 = Inst->getComparison(); 4498 Operand *Src0 = Inst->getComparison();
4540 SizeT NumCases = Inst->getNumCases(); 4499 SizeT NumCases = Inst->getNumCases();
4541 if (Src0->getType() == IceType_i64) { 4500 if (Src0->getType() == IceType_i64) {
4542 Src0 = legalize(Src0); // get Base/Index into physical registers 4501 Src0 = legalize(Src0); // get Base/Index into physical registers
4543 Operand *Src0Lo = loOperand(Src0); 4502 Operand *Src0Lo = loOperand(Src0);
4544 Operand *Src0Hi = hiOperand(Src0); 4503 Operand *Src0Hi = hiOperand(Src0);
4545 if (NumCases >= 2) { 4504 if (NumCases >= 2) {
4546 Src0Lo = legalizeToVar(Src0Lo); 4505 Src0Lo = legalizeToVar(Src0Lo);
4547 Src0Hi = legalizeToVar(Src0Hi); 4506 Src0Hi = legalizeToVar(Src0Hi);
4548 } else { 4507 } else {
4549 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); 4508 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4550 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); 4509 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4551 } 4510 }
4552 for (SizeT I = 0; I < NumCases; ++I) { 4511 for (SizeT I = 0; I < NumCases; ++I) {
4553 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); 4512 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4554 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); 4513 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4555 InstX8632Label *Label = InstX8632Label::create(Func, this); 4514 typename Traits::Insts::Label *Label =
4515 Traits::Insts::Label::create(Func, this);
4556 _cmp(Src0Lo, ValueLo); 4516 _cmp(Src0Lo, ValueLo);
4557 _br(Traits::Cond::Br_ne, Label); 4517 _br(Traits::Cond::Br_ne, Label);
4558 _cmp(Src0Hi, ValueHi); 4518 _cmp(Src0Hi, ValueHi);
4559 _br(Traits::Cond::Br_e, Inst->getLabel(I)); 4519 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4560 Context.insert(Label); 4520 Context.insert(Label);
4561 } 4521 }
4562 _br(Inst->getLabelDefault()); 4522 _br(Inst->getLabelDefault());
4563 return; 4523 return;
4564 } 4524 }
4565 // OK, we'll be slightly less naive by forcing Src into a physical 4525 // OK, we'll be slightly less naive by forcing Src into a physical
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
4632 } 4592 }
4633 } 4593 }
4634 4594
4635 template <class Machine> 4595 template <class Machine>
4636 void TargetX86Base<Machine>::lowerUnreachable( 4596 void TargetX86Base<Machine>::lowerUnreachable(
4637 const InstUnreachable * /*Inst*/) { 4597 const InstUnreachable * /*Inst*/) {
4638 _ud2(); 4598 _ud2();
4639 } 4599 }
4640 4600
4641 template <class Machine> 4601 template <class Machine>
4642 void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) { 4602 void TargetX86Base<Machine>::lowerRMW(
4603 const typename Traits::Insts::FakeRMW *RMW) {
4643 // If the beacon variable's live range does not end in this 4604 // If the beacon variable's live range does not end in this
4644 // instruction, then it must end in the modified Store instruction 4605 // instruction, then it must end in the modified Store instruction
4645 // that follows. This means that the original Store instruction is 4606 // that follows. This means that the original Store instruction is
4646 // still there, either because the value being stored is used beyond 4607 // still there, either because the value being stored is used beyond
4647 // the Store instruction, or because dead code elimination did not 4608 // the Store instruction, or because dead code elimination did not
4648 // happen. In either case, we cancel RMW lowering (and the caller 4609 // happen. In either case, we cancel RMW lowering (and the caller
4649 // deletes the RMW instruction). 4610 // deletes the RMW instruction).
4650 if (!RMW->isLastUse(RMW->getBeacon())) 4611 if (!RMW->isLastUse(RMW->getBeacon()))
4651 return; 4612 return;
4652 Operand *Src = RMW->getData(); 4613 Operand *Src = RMW->getData();
4653 Type Ty = Src->getType(); 4614 Type Ty = Src->getType();
4654 OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty); 4615 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4655 if (Ty == IceType_i64) { 4616 if (Ty == IceType_i64) {
4656 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); 4617 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4657 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); 4618 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
4658 OperandX8632Mem *AddrLo = llvm::cast<OperandX8632Mem>(loOperand(Addr)); 4619 typename Traits::X86OperandMem *AddrLo =
4659 OperandX8632Mem *AddrHi = llvm::cast<OperandX8632Mem>(hiOperand(Addr)); 4620 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4621 typename Traits::X86OperandMem *AddrHi =
4622 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
4660 switch (RMW->getOp()) { 4623 switch (RMW->getOp()) {
4661 default: 4624 default:
4662 // TODO(stichnot): Implement other arithmetic operators. 4625 // TODO(stichnot): Implement other arithmetic operators.
4663 break; 4626 break;
4664 case InstArithmetic::Add: 4627 case InstArithmetic::Add:
4665 _add_rmw(AddrLo, SrcLo); 4628 _add_rmw(AddrLo, SrcLo);
4666 _adc_rmw(AddrHi, SrcHi); 4629 _adc_rmw(AddrHi, SrcHi);
4667 return; 4630 return;
4668 case InstArithmetic::Sub: 4631 case InstArithmetic::Sub:
4669 _sub_rmw(AddrLo, SrcLo); 4632 _sub_rmw(AddrLo, SrcLo);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
4708 Src = legalize(Src, Legal_Reg | Legal_Imm); 4671 Src = legalize(Src, Legal_Reg | Legal_Imm);
4709 _xor_rmw(Addr, Src); 4672 _xor_rmw(Addr, Src);
4710 return; 4673 return;
4711 } 4674 }
4712 } 4675 }
4713 llvm::report_fatal_error("Couldn't lower RMW instruction"); 4676 llvm::report_fatal_error("Couldn't lower RMW instruction");
4714 } 4677 }
4715 4678
4716 template <class Machine> 4679 template <class Machine>
4717 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { 4680 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
4718 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { 4681 if (const auto *RMW =
4682 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
4719 lowerRMW(RMW); 4683 lowerRMW(RMW);
4720 } else { 4684 } else {
4721 TargetLowering::lowerOther(Instr); 4685 TargetLowering::lowerOther(Instr);
4722 } 4686 }
4723 } 4687 }
4724 4688
4725 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4689 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4726 /// preserve integrity of liveness analysis. Undef values are also 4690 /// preserve integrity of liveness analysis. Undef values are also
4727 /// turned into zeroes, since loOperand() and hiOperand() don't expect 4691 /// turned into zeroes, since loOperand() and hiOperand() don't expect
4728 /// Undef input. 4692 /// Undef input.
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
4984 /// vector constants in memory. 4948 /// vector constants in memory.
4985 template <class Machine> 4949 template <class Machine>
4986 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, 4950 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4987 int32_t RegNum) { 4951 int32_t RegNum) {
4988 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 4952 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4989 _psrl(Reg, Ctx->getConstantInt8(1)); 4953 _psrl(Reg, Ctx->getConstantInt8(1));
4990 return Reg; 4954 return Reg;
4991 } 4955 }
4992 4956
4993 template <class Machine> 4957 template <class Machine>
4994 OperandX8632Mem * 4958 typename TargetX86Base<Machine>::Traits::X86OperandMem *
4995 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 4959 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4996 uint32_t Offset) { 4960 uint32_t Offset) {
4997 // Ensure that Loc is a stack slot. 4961 // Ensure that Loc is a stack slot.
4998 assert(Slot->getWeight().isZero()); 4962 assert(Slot->getWeight().isZero());
4999 assert(Slot->getRegNum() == Variable::NoRegister); 4963 assert(Slot->getRegNum() == Variable::NoRegister);
5000 // Compute the location of Loc in memory. 4964 // Compute the location of Loc in memory.
5001 // TODO(wala,stichnot): lea should not be required. The address of 4965 // TODO(wala,stichnot): lea should not be required. The address of
5002 // the stack slot is known at compile time (although not until after 4966 // the stack slot is known at compile time (although not until after
5003 // addProlog()). 4967 // addProlog()).
5004 const Type PointerType = IceType_i32; 4968 const Type PointerType = IceType_i32;
5005 Variable *Loc = makeReg(PointerType); 4969 Variable *Loc = makeReg(PointerType);
5006 _lea(Loc, Slot); 4970 _lea(Loc, Slot);
5007 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 4971 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5008 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4972 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
5009 } 4973 }
5010 4974
5011 /// Helper for legalize() to emit the right code to lower an operand to a 4975 /// Helper for legalize() to emit the right code to lower an operand to a
5012 /// register of the appropriate type. 4976 /// register of the appropriate type.
5013 template <class Machine> 4977 template <class Machine>
5014 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 4978 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5015 Type Ty = Src->getType(); 4979 Type Ty = Src->getType();
5016 Variable *Reg = makeReg(Ty, RegNum); 4980 Variable *Reg = makeReg(Ty, RegNum);
5017 if (isVectorType(Ty)) { 4981 if (isVectorType(Ty)) {
5018 _movp(Reg, Src); 4982 _movp(Reg, Src);
(...skipping 11 matching lines...) Expand all
5030 // to legalize() allow a physical register. If a physical register 4994 // to legalize() allow a physical register. If a physical register
5031 // needs to be explicitly disallowed, then new code will need to be 4995 // needs to be explicitly disallowed, then new code will need to be
5032 // written to force a spill. 4996 // written to force a spill.
5033 assert(Allowed & Legal_Reg); 4997 assert(Allowed & Legal_Reg);
5034 // If we're asking for a specific physical register, make sure we're 4998 // If we're asking for a specific physical register, make sure we're
5035 // not allowing any other operand kinds. (This could be future 4999 // not allowing any other operand kinds. (This could be future
5036 // work, e.g. allow the shl shift amount to be either an immediate 5000 // work, e.g. allow the shl shift amount to be either an immediate
5037 // or in ecx.) 5001 // or in ecx.)
5038 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); 5002 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
5039 5003
5040 if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) { 5004 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
5041 // Before doing anything with a Mem operand, we need to ensure 5005 // Before doing anything with a Mem operand, we need to ensure
5042 // that the Base and Index components are in physical registers. 5006 // that the Base and Index components are in physical registers.
5043 Variable *Base = Mem->getBase(); 5007 Variable *Base = Mem->getBase();
5044 Variable *Index = Mem->getIndex(); 5008 Variable *Index = Mem->getIndex();
5045 Variable *RegBase = nullptr; 5009 Variable *RegBase = nullptr;
5046 Variable *RegIndex = nullptr; 5010 Variable *RegIndex = nullptr;
5047 if (Base) { 5011 if (Base) {
5048 RegBase = legalizeToVar(Base); 5012 RegBase = legalizeToVar(Base);
5049 } 5013 }
5050 if (Index) { 5014 if (Index) {
5051 RegIndex = legalizeToVar(Index); 5015 RegIndex = legalizeToVar(Index);
5052 } 5016 }
5053 if (Base != RegBase || Index != RegIndex) { 5017 if (Base != RegBase || Index != RegIndex) {
5054 Mem = 5018 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
5055 OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex, 5019 RegIndex, Mem->getShift(),
5056 Mem->getShift(), Mem->getSegmentRegister()); 5020 Mem->getSegmentRegister());
5057 } 5021 }
5058 5022
5059 // For all Memory Operands, we do randomization/pooling here 5023 // For all Memory Operands, we do randomization/pooling here
5060 From = randomizeOrPoolImmediate(Mem); 5024 From = randomizeOrPoolImmediate(Mem);
5061 5025
5062 if (!(Allowed & Legal_Mem)) { 5026 if (!(Allowed & Legal_Mem)) {
5063 From = copyToReg(From, RegNum); 5027 From = copyToReg(From, RegNum);
5064 } 5028 }
5065 return From; 5029 return From;
5066 } 5030 }
(...skipping 29 matching lines...) Expand all
5096 5060
5097 // Convert a scalar floating point constant into an explicit 5061 // Convert a scalar floating point constant into an explicit
5098 // memory operand. 5062 // memory operand.
5099 if (isScalarFloatingType(Ty)) { 5063 if (isScalarFloatingType(Ty)) {
5100 Variable *Base = nullptr; 5064 Variable *Base = nullptr;
5101 std::string Buffer; 5065 std::string Buffer;
5102 llvm::raw_string_ostream StrBuf(Buffer); 5066 llvm::raw_string_ostream StrBuf(Buffer);
5103 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); 5067 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
5104 llvm::cast<Constant>(From)->setShouldBePooled(true); 5068 llvm::cast<Constant>(From)->setShouldBePooled(true);
5105 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 5069 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
5106 From = OperandX8632Mem::create(Func, Ty, Base, Offset); 5070 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
5107 } 5071 }
5108 bool NeedsReg = false; 5072 bool NeedsReg = false;
5109 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) 5073 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
5110 // Immediate specifically not allowed 5074 // Immediate specifically not allowed
5111 NeedsReg = true; 5075 NeedsReg = true;
5112 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) 5076 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5113 // On x86, FP constants are lowered to mem operands. 5077 // On x86, FP constants are lowered to mem operands.
5114 NeedsReg = true; 5078 NeedsReg = true;
5115 if (NeedsReg) { 5079 if (NeedsReg) {
5116 From = copyToReg(From, RegNum); 5080 From = copyToReg(From, RegNum);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
5155 if (llvm::isa<Constant>(Src1)) { 5119 if (llvm::isa<Constant>(Src1)) {
5156 IsSrc1ImmOrReg = true; 5120 IsSrc1ImmOrReg = true;
5157 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 5121 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
5158 if (Var->hasReg()) 5122 if (Var->hasReg())
5159 IsSrc1ImmOrReg = true; 5123 IsSrc1ImmOrReg = true;
5160 } 5124 }
5161 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); 5125 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5162 } 5126 }
5163 5127
5164 template <class Machine> 5128 template <class Machine>
5165 OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, 5129 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5166 Type Ty, 5130 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
5167 bool DoLegalize) { 5131 bool DoLegalize) {
5168 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); 5132 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
5169 // It may be the case that address mode optimization already creates 5133 // It may be the case that address mode optimization already creates an
5170 // an OperandX8632Mem, so in that case it wouldn't need another level 5134 // Traits::X86OperandMem, so in that case it wouldn't need another level of
5171 // of transformation. 5135 // transformation.
5172 if (!Mem) { 5136 if (!Mem) {
5173 Variable *Base = llvm::dyn_cast<Variable>(Opnd); 5137 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5174 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); 5138 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5175 assert(Base || Offset); 5139 assert(Base || Offset);
5176 if (Offset) { 5140 if (Offset) {
5177 // During memory operand building, we do not blind or pool 5141 // During memory operand building, we do not blind or pool
5178 // the constant offset, we will work on the whole memory 5142 // the constant offset, we will work on the whole memory
5179 // operand later as one entity later, this save one instruction. 5143 // operand later as one entity later, this save one instruction.
5180 // By turning blinding and pooling off, we guarantee 5144 // By turning blinding and pooling off, we guarantee
5181 // legalize(Offset) will return a constant*. 5145 // legalize(Offset) will return a constant*.
5182 { 5146 {
5183 BoolFlagSaver B(RandomizationPoolingPaused, true); 5147 BoolFlagSaver B(RandomizationPoolingPaused, true);
5184 5148
5185 Offset = llvm::cast<Constant>(legalize(Offset)); 5149 Offset = llvm::cast<Constant>(legalize(Offset));
5186 } 5150 }
5187 5151
5188 assert(llvm::isa<ConstantInteger32>(Offset) || 5152 assert(llvm::isa<ConstantInteger32>(Offset) ||
5189 llvm::isa<ConstantRelocatable>(Offset)); 5153 llvm::isa<ConstantRelocatable>(Offset));
5190 } 5154 }
5191 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); 5155 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
5192 } 5156 }
5193 // Do legalization, which contains randomization/pooling 5157 // Do legalization, which contains randomization/pooling
5194 // or do randomization/pooling. 5158 // or do randomization/pooling.
5195 return llvm::cast<OperandX8632Mem>( 5159 return llvm::cast<typename Traits::X86OperandMem>(
5196 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 5160 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5197 } 5161 }
5198 5162
5199 template <class Machine> 5163 template <class Machine>
5200 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { 5164 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5201 // There aren't any 64-bit integer registers for x86-32. 5165 // There aren't any 64-bit integer registers for x86-32.
5202 assert(Type != IceType_i64); 5166 assert(Type != IceType_i64);
5203 Variable *Reg = Func->template makeVariable(Type); 5167 Variable *Reg = Func->template makeVariable(Type);
5204 if (RegNum == Variable::NoRegister) 5168 if (RegNum == Variable::NoRegister)
5205 Reg->setWeightInfinite(); 5169 Reg->setWeightInfinite();
5206 else 5170 else
5207 Reg->setRegNum(RegNum); 5171 Reg->setRegNum(RegNum);
5208 return Reg; 5172 return Reg;
5209 } 5173 }
5210 5174
5211 template <class Machine> void TargetX86Base<Machine>::postLower() { 5175 template <class Machine> void TargetX86Base<Machine>::postLower() {
5212 if (Ctx->getFlags().getOptLevel() == Opt_m1) 5176 if (Ctx->getFlags().getOptLevel() == Opt_m1)
5213 return; 5177 return;
5214 inferTwoAddress(); 5178 inferTwoAddress();
5215 } 5179 }
5216 5180
5217 template <class Machine> 5181 template <class Machine>
5218 void TargetX86Base<Machine>::makeRandomRegisterPermutation( 5182 void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5219 llvm::SmallVectorImpl<int32_t> &Permutation, 5183 llvm::SmallVectorImpl<int32_t> &Permutation,
5220 const llvm::SmallBitVector &ExcludeRegisters) const { 5184 const llvm::SmallBitVector &ExcludeRegisters) const {
5221 // TODO(stichnot): Declaring Permutation this way loses type/size 5185 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
5222 // information. Fix this in conjunction with the caller-side TODO. 5186 ExcludeRegisters);
5223 assert(Permutation.size() >= Traits::RegisterSet::Reg_NUM);
5224 // Expected upper bound on the number of registers in a single
5225 // equivalence class. For x86-32, this would comprise the 8 XMM
5226 // registers. This is for performance, not correctness.
5227 static const unsigned MaxEquivalenceClassSize = 8;
5228 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
5229 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
5230 EquivalenceClassMap EquivalenceClasses;
5231 SizeT NumShuffled = 0, NumPreserved = 0;
5232
5233 // Build up the equivalence classes of registers by looking at the
5234 // register properties as well as whether the registers should be
5235 // explicitly excluded from shuffling.
5236 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
5237 frameptr, isI8, isInt, isFP) \
5238 if (ExcludeRegisters[Traits::RegisterSet::val]) { \
5239 /* val stays the same in the resulting permutation. */ \
5240 Permutation[Traits::RegisterSet::val] = Traits::RegisterSet::val; \
5241 ++NumPreserved; \
5242 } else { \
5243 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
5244 (isInt << 3) | (isFP << 4); \
5245 /* val is assigned to an equivalence class based on its properties. */ \
5246 EquivalenceClasses[Index].push_back(Traits::RegisterSet::val); \
5247 }
5248 REGX8632_TABLE
5249 #undef X
5250
5251 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
5252
5253 // Shuffle the resulting equivalence classes.
5254 for (auto I : EquivalenceClasses) {
5255 const RegisterList &List = I.second;
5256 RegisterList Shuffled(List);
5257 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
5258 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
5259 Permutation[List[SI]] = Shuffled[SI];
5260 ++NumShuffled;
5261 }
5262 }
5263
5264 assert(NumShuffled + NumPreserved == Traits::RegisterSet::Reg_NUM);
5265
5266 if (Func->isVerbose(IceV_Random)) {
5267 OstreamLocker L(Func->getContext());
5268 Ostream &Str = Func->getContext()->getStrDump();
5269 Str << "Register equivalence classes:\n";
5270 for (auto I : EquivalenceClasses) {
5271 Str << "{";
5272 const RegisterList &List = I.second;
5273 bool First = true;
5274 for (int32_t Register : List) {
5275 if (!First)
5276 Str << " ";
5277 First = false;
5278 Str << getRegName(Register, IceType_i32);
5279 }
5280 Str << "}\n";
5281 }
5282 }
5283 } 5187 }
5284 5188
5285 template <class Machine> 5189 template <class Machine>
5286 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { 5190 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
5287 if (!BuildDefs::dump()) 5191 if (!BuildDefs::dump())
5288 return; 5192 return;
5289 Ostream &Str = Ctx->getStrEmit(); 5193 Ostream &Str = Ctx->getStrEmit();
5290 Str << getConstantPrefix() << C->getValue(); 5194 Str << getConstantPrefix() << C->getValue();
5291 } 5195 }
5292 5196
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
5343 // the assigned register as this assignment is that start of its use-def 5247 // the assigned register as this assignment is that start of its use-def
5344 // chain. So we add RegNum argument here. 5248 // chain. So we add RegNum argument here.
5345 // Note we use 'lea' instruction instead of 'xor' to avoid affecting 5249 // Note we use 'lea' instruction instead of 'xor' to avoid affecting
5346 // the flags. 5250 // the flags.
5347 Variable *Reg = makeReg(IceType_i32, RegNum); 5251 Variable *Reg = makeReg(IceType_i32, RegNum);
5348 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate); 5252 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
5349 uint32_t Value = Integer->getValue(); 5253 uint32_t Value = Integer->getValue();
5350 uint32_t Cookie = Ctx->getRandomizationCookie(); 5254 uint32_t Cookie = Ctx->getRandomizationCookie();
5351 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); 5255 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5352 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); 5256 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
5353 _lea(Reg, 5257 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
5354 OperandX8632Mem::create(Func, IceType_i32, Reg, Offset, nullptr, 0)); 5258 nullptr, 0));
5355 // make sure liveness analysis won't kill this variable, otherwise a 5259 // make sure liveness analysis won't kill this variable, otherwise a
5356 // liveness 5260 // liveness
5357 // assertion will be triggered. 5261 // assertion will be triggered.
5358 _set_dest_nonkillable(); 5262 _set_dest_nonkillable();
5359 if (Immediate->getType() != IceType_i32) { 5263 if (Immediate->getType() != IceType_i32) {
5360 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); 5264 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5361 _mov(TruncReg, Reg); 5265 _mov(TruncReg, Reg);
5362 return TruncReg; 5266 return TruncReg;
5363 } 5267 }
5364 return Reg; 5268 return Reg;
(...skipping 12 matching lines...) Expand all
5377 // the assigned register as this assignment is that start of its use-def 5281 // the assigned register as this assignment is that start of its use-def
5378 // chain. So we add RegNum argument here. 5282 // chain. So we add RegNum argument here.
5379 Variable *Reg = makeReg(Immediate->getType(), RegNum); 5283 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5380 IceString Label; 5284 IceString Label;
5381 llvm::raw_string_ostream Label_stream(Label); 5285 llvm::raw_string_ostream Label_stream(Label);
5382 Immediate->emitPoolLabel(Label_stream); 5286 Immediate->emitPoolLabel(Label_stream);
5383 const RelocOffsetT Offset = 0; 5287 const RelocOffsetT Offset = 0;
5384 const bool SuppressMangling = true; 5288 const bool SuppressMangling = true;
5385 Constant *Symbol = 5289 Constant *Symbol =
5386 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); 5290 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
5387 OperandX8632Mem *MemOperand = 5291 typename Traits::X86OperandMem *MemOperand =
5388 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); 5292 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5293 Symbol);
5389 _mov(Reg, MemOperand); 5294 _mov(Reg, MemOperand);
5390 return Reg; 5295 return Reg;
5391 } 5296 }
5392 assert("Unsupported -randomize-pool-immediates option" && false); 5297 assert("Unsupported -randomize-pool-immediates option" && false);
5393 } 5298 }
5394 // the constant Immediate is not eligible for blinding/pooling 5299 // the constant Immediate is not eligible for blinding/pooling
5395 return Immediate; 5300 return Immediate;
5396 } 5301 }
5397 5302
5398 template <class Machine> 5303 template <class Machine>
5399 OperandX8632Mem * 5304 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5400 TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, 5305 TargetX86Base<Machine>::randomizeOrPoolImmediate(
5401 int32_t RegNum) { 5306 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
5402 assert(MemOperand); 5307 assert(MemOperand);
5403 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5308 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5404 RandomizationPoolingPaused == true) { 5309 RandomizationPoolingPaused == true) {
5405 // immediates randomization/pooling is turned off 5310 // immediates randomization/pooling is turned off
5406 return MemOperand; 5311 return MemOperand;
5407 } 5312 }
5408 5313
5409 // If this memory operand is already a randommized one, we do 5314 // If this memory operand is already a randommized one, we do
5410 // not randomize it again. 5315 // not randomize it again.
5411 if (MemOperand->getRandomized()) 5316 if (MemOperand->getRandomized())
(...skipping 13 matching lines...) Expand all
5425 // => -cookie[RegTemp, index, shift] 5330 // => -cookie[RegTemp, index, shift]
5426 uint32_t Value = 5331 uint32_t Value =
5427 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) 5332 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
5428 ->getValue(); 5333 ->getValue();
5429 uint32_t Cookie = Ctx->getRandomizationCookie(); 5334 uint32_t Cookie = Ctx->getRandomizationCookie();
5430 Constant *Mask1 = Ctx->getConstantInt( 5335 Constant *Mask1 = Ctx->getConstantInt(
5431 MemOperand->getOffset()->getType(), Cookie + Value); 5336 MemOperand->getOffset()->getType(), Cookie + Value);
5432 Constant *Mask2 = 5337 Constant *Mask2 =
5433 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 5338 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5434 5339
5435 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( 5340 typename Traits::X86OperandMem *TempMemOperand =
5436 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); 5341 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5342 MemOperand->getBase(), Mask1);
5437 // If we have already assigned a physical register, we must come from 5343 // If we have already assigned a physical register, we must come from
5438 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5344 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5439 // the assigned register as this assignment is that start of its use-def 5345 // the assigned register as this assignment is that start of its use-def
5440 // chain. So we add RegNum argument here. 5346 // chain. So we add RegNum argument here.
5441 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); 5347 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5442 _lea(RegTemp, TempMemOperand); 5348 _lea(RegTemp, TempMemOperand);
5443 // As source operand doesn't use the dstreg, we don't need to add 5349 // As source operand doesn't use the dstreg, we don't need to add
5444 // _set_dest_nonkillable(). 5350 // _set_dest_nonkillable().
5445 // But if we use the same Dest Reg, that is, with RegNum 5351 // But if we use the same Dest Reg, that is, with RegNum
5446 // assigned, we should add this _set_dest_nonkillable() 5352 // assigned, we should add this _set_dest_nonkillable()
5447 if (RegNum != Variable::NoRegister) 5353 if (RegNum != Variable::NoRegister)
5448 _set_dest_nonkillable(); 5354 _set_dest_nonkillable();
5449 5355
5450 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create( 5356 typename Traits::X86OperandMem *NewMemOperand =
5451 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(), 5357 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5452 MemOperand->getShift(), MemOperand->getSegmentRegister()); 5358 Mask2, MemOperand->getIndex(),
5359 MemOperand->getShift(),
5360 MemOperand->getSegmentRegister());
5453 5361
5454 // Label this memory operand as randomize, so we won't randomize it 5362 // Label this memory operand as randomize, so we won't randomize it
5455 // again in case we call legalize() mutiple times on this memory 5363 // again in case we call legalize() mutiple times on this memory
5456 // operand. 5364 // operand.
5457 NewMemOperand->setRandomized(true); 5365 NewMemOperand->setRandomized(true);
5458 return NewMemOperand; 5366 return NewMemOperand;
5459 } 5367 }
5460 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { 5368 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5461 // pool the constant offset 5369 // pool the constant offset
5462 // FROM: 5370 // FROM:
(...skipping 14 matching lines...) Expand all
5477 return MemOperand; 5385 return MemOperand;
5478 Variable *RegTemp = makeReg(IceType_i32); 5386 Variable *RegTemp = makeReg(IceType_i32);
5479 IceString Label; 5387 IceString Label;
5480 llvm::raw_string_ostream Label_stream(Label); 5388 llvm::raw_string_ostream Label_stream(Label);
5481 MemOperand->getOffset()->emitPoolLabel(Label_stream); 5389 MemOperand->getOffset()->emitPoolLabel(Label_stream);
5482 MemOperand->getOffset()->setShouldBePooled(true); 5390 MemOperand->getOffset()->setShouldBePooled(true);
5483 const RelocOffsetT SymOffset = 0; 5391 const RelocOffsetT SymOffset = 0;
5484 bool SuppressMangling = true; 5392 bool SuppressMangling = true;
5485 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), 5393 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5486 SuppressMangling); 5394 SuppressMangling);
5487 OperandX8632Mem *SymbolOperand = OperandX8632Mem::create( 5395 typename Traits::X86OperandMem *SymbolOperand =
5488 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); 5396 Traits::X86OperandMem::create(
5397 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
5489 _mov(RegTemp, SymbolOperand); 5398 _mov(RegTemp, SymbolOperand);
5490 // If we have a base variable here, we should add the lea instruction 5399 // If we have a base variable here, we should add the lea instruction
5491 // to add the value of the base variable to RegTemp. If there is no 5400 // to add the value of the base variable to RegTemp. If there is no
5492 // base variable, we won't need this lea instruction. 5401 // base variable, we won't need this lea instruction.
5493 if (MemOperand->getBase()) { 5402 if (MemOperand->getBase()) {
5494 OperandX8632Mem *CalculateOperand = OperandX8632Mem::create( 5403 typename Traits::X86OperandMem *CalculateOperand =
5495 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, 5404 Traits::X86OperandMem::create(
5496 RegTemp, 0, MemOperand->getSegmentRegister()); 5405 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
5406 RegTemp, 0, MemOperand->getSegmentRegister());
5497 _lea(RegTemp, CalculateOperand); 5407 _lea(RegTemp, CalculateOperand);
5498 _set_dest_nonkillable(); 5408 _set_dest_nonkillable();
5499 } 5409 }
5500 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create( 5410 typename Traits::X86OperandMem *NewMemOperand =
5501 Func, MemOperand->getType(), RegTemp, nullptr, 5411 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5502 MemOperand->getIndex(), MemOperand->getShift(), 5412 nullptr, MemOperand->getIndex(),
5503 MemOperand->getSegmentRegister()); 5413 MemOperand->getShift(),
5414 MemOperand->getSegmentRegister());
5504 return NewMemOperand; 5415 return NewMemOperand;
5505 } 5416 }
5506 assert("Unsupported -randomize-pool-immediates option" && false); 5417 assert("Unsupported -randomize-pool-immediates option" && false);
5507 } 5418 }
5508 } 5419 }
5509 // the offset is not eligible for blinding or pooling, return the original 5420 // the offset is not eligible for blinding or pooling, return the original
5510 // mem operand 5421 // mem operand
5511 return MemOperand; 5422 return MemOperand;
5512 } 5423 }
5513 5424
5514 } // end of namespace X86Internal 5425 } // end of namespace X86Internal
5515 } // end of namespace Ice 5426 } // end of namespace Ice
5516 5427
5517 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5428 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698