Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(560)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1216933015: X8632 Templatization completed. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« src/IceInstX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX86Base class, which 10 // This file implements the TargetLoweringX86Base class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. 12 // high-level instruction.
13 // 13 //
14 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
15 15
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18 18
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceClFlags.h" 21 #include "IceClFlags.h"
22 #include "IceDefs.h" 22 #include "IceDefs.h"
23 #include "IceELFObjectWriter.h" 23 #include "IceELFObjectWriter.h"
24 #include "IceGlobalInits.h" 24 #include "IceGlobalInits.h"
25 #include "IceInstX8632.h"
26 #include "IceLiveness.h" 25 #include "IceLiveness.h"
27 #include "IceOperand.h" 26 #include "IceOperand.h"
28 #include "IceRegistersX8632.h"
29 #include "IceTargetLoweringX8632.def"
30 #include "IceTargetLoweringX8632.h"
31 #include "IceUtils.h" 27 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h" 28 #include "llvm/Support/MathExtras.h"
33 29
34 namespace Ice { 30 namespace Ice {
35 namespace X86Internal { 31 namespace X86Internal {
36 32
37 // A helper class to ease the settings of RandomizationPoolingPause 33 // A helper class to ease the settings of RandomizationPoolingPause to disable
38 // to disable constant blinding or pooling for some translation phases. 34 // constant blinding or pooling for some translation phases.
39 class BoolFlagSaver { 35 class BoolFlagSaver {
40 BoolFlagSaver() = delete; 36 BoolFlagSaver() = delete;
41 BoolFlagSaver(const BoolFlagSaver &) = delete; 37 BoolFlagSaver(const BoolFlagSaver &) = delete;
42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete; 38 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;
43 39
44 public: 40 public:
45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; } 41 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }
46 ~BoolFlagSaver() { Flag = OldValue; } 42 ~BoolFlagSaver() { Flag = OldValue; }
47 43
48 private: 44 private:
(...skipping 28 matching lines...) Expand all
77 public: 73 public:
78 enum BoolFoldingProducerKind { 74 enum BoolFoldingProducerKind {
79 PK_None, 75 PK_None,
80 PK_Icmp32, 76 PK_Icmp32,
81 PK_Icmp64, 77 PK_Icmp64,
82 PK_Fcmp, 78 PK_Fcmp,
83 PK_Trunc 79 PK_Trunc
84 }; 80 };
85 81
86 // Currently the actual enum values are not used (other than CK_None), but we 82 // Currently the actual enum values are not used (other than CK_None), but we
87 // go 83 // go ahead and produce them anyway for symmetry with the
88 // ahead and produce them anyway for symmetry with the
89 // BoolFoldingProducerKind. 84 // BoolFoldingProducerKind.
90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; 85 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };
91 86
92 private: 87 private:
93 BoolFolding(const BoolFolding &) = delete; 88 BoolFolding(const BoolFolding &) = delete;
94 BoolFolding &operator=(const BoolFolding &) = delete; 89 BoolFolding &operator=(const BoolFolding &) = delete;
95 90
96 public: 91 public:
97 BoolFolding() = default; 92 BoolFolding() = default;
98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr); 93 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
155 return CK_None; 150 return CK_None;
156 case InstCast::Sext: 151 case InstCast::Sext:
157 return CK_Sext; 152 return CK_Sext;
158 case InstCast::Zext: 153 case InstCast::Zext:
159 return CK_Zext; 154 return CK_Zext;
160 } 155 }
161 } 156 }
162 return CK_None; 157 return CK_None;
163 } 158 }
164 159
165 // Returns true if the producing instruction has a "complex" lowering 160 // Returns true if the producing instruction has a "complex" lowering sequence.
166 // sequence. This generally means that its lowering sequence requires 161 // This generally means that its lowering sequence requires more than one
167 // more than one conditional branch, namely 64-bit integer compares 162 // conditional branch, namely 64-bit integer compares and some floating-point
168 // and some floating-point compares. When this is true, and there is 163 // compares. When this is true, and there is more than one consumer, we prefer
169 // more than one consumer, we prefer to disable the folding 164 // to disable the folding optimization because it minimizes branches.
170 // optimization because it minimizes branches.
171 template <class MachineTraits> 165 template <class MachineTraits>
172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 166 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
173 switch (getProducerKind(Instr)) { 167 switch (getProducerKind(Instr)) {
174 default: 168 default:
175 return false; 169 return false;
176 case PK_Icmp64: 170 case PK_Icmp64:
177 return true; 171 return true;
178 case PK_Fcmp: 172 case PK_Fcmp:
179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 173 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
180 .C2 != MachineTraits::Cond::Br_None; 174 .C2 != MachineTraits::Cond::Br_None;
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
218 } 212 }
219 for (auto &I : Producers) { 213 for (auto &I : Producers) {
220 // Ignore entries previously marked invalid. 214 // Ignore entries previously marked invalid.
221 if (I.second.Instr == nullptr) 215 if (I.second.Instr == nullptr)
222 continue; 216 continue;
223 // Disable the producer if its dest may be live beyond this block. 217 // Disable the producer if its dest may be live beyond this block.
224 if (I.second.IsLiveOut) { 218 if (I.second.IsLiveOut) {
225 setInvalid(I.first); 219 setInvalid(I.first);
226 continue; 220 continue;
227 } 221 }
228 // Mark as "dead" rather than outright deleting. This is so that 222 // Mark as "dead" rather than outright deleting. This is so that other
229 // other peephole style optimizations during or before lowering 223 // peephole style optimizations during or before lowering have access to
230 // have access to this instruction in undeleted form. See for 224 // this instruction in undeleted form. See for example
231 // example tryOptimizedCmpxchgCmpBr(). 225 // tryOptimizedCmpxchgCmpBr().
232 I.second.Instr->setDead(); 226 I.second.Instr->setDead();
233 } 227 }
234 } 228 }
235 229
236 template <class MachineTraits> 230 template <class MachineTraits>
237 const Inst * 231 const Inst *
238 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const { 232 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
239 auto *Var = llvm::dyn_cast<const Variable>(Opnd); 233 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
240 if (Var == nullptr) 234 if (Var == nullptr)
241 return nullptr; 235 return nullptr;
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
275 (TargetInstructionSet::X86InstructionSet_End - 269 (TargetInstructionSet::X86InstructionSet_End -
276 TargetInstructionSet::X86InstructionSet_Begin), 270 TargetInstructionSet::X86InstructionSet_Begin),
277 "Traits::InstructionSet range different from TargetInstructionSet"); 271 "Traits::InstructionSet range different from TargetInstructionSet");
278 if (Func->getContext()->getFlags().getTargetInstructionSet() != 272 if (Func->getContext()->getFlags().getTargetInstructionSet() !=
279 TargetInstructionSet::BaseInstructionSet) { 273 TargetInstructionSet::BaseInstructionSet) {
280 InstructionSet = static_cast<typename Traits::InstructionSet>( 274 InstructionSet = static_cast<typename Traits::InstructionSet>(
281 (Func->getContext()->getFlags().getTargetInstructionSet() - 275 (Func->getContext()->getFlags().getTargetInstructionSet() -
282 TargetInstructionSet::X86InstructionSet_Begin) + 276 TargetInstructionSet::X86InstructionSet_Begin) +
283 Traits::InstructionSet::Begin); 277 Traits::InstructionSet::Begin);
284 } 278 }
285 // TODO: Don't initialize IntegerRegisters and friends every time. 279 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
286 // Instead, initialize in some sort of static initializer for the 280 // initialize in some sort of static initializer for the class.
287 // class.
288 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM); 281 llvm::SmallBitVector IntegerRegisters(Traits::RegisterSet::Reg_NUM);
289 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM); 282 llvm::SmallBitVector IntegerRegistersI8(Traits::RegisterSet::Reg_NUM);
290 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM); 283 llvm::SmallBitVector FloatRegisters(Traits::RegisterSet::Reg_NUM);
291 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM); 284 llvm::SmallBitVector VectorRegisters(Traits::RegisterSet::Reg_NUM);
292 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM); 285 llvm::SmallBitVector InvalidRegisters(Traits::RegisterSet::Reg_NUM);
293 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM); 286 ScratchRegs.resize(Traits::RegisterSet::Reg_NUM);
294 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ 287
295 frameptr, isI8, isInt, isFP) \ 288 Traits::initRegisterSet(&IntegerRegisters, &IntegerRegistersI8,
296 IntegerRegisters[Traits::RegisterSet::val] = isInt; \ 289 &FloatRegisters, &VectorRegisters, &ScratchRegs);
297 IntegerRegistersI8[Traits::RegisterSet::val] = isI8; \ 290
298 FloatRegisters[Traits::RegisterSet::val] = isFP; \
299 VectorRegisters[Traits::RegisterSet::val] = isFP; \
300 ScratchRegs[Traits::RegisterSet::val] = scratch;
301 REGX8632_TABLE;
302 #undef X
303 TypeToRegisterSet[IceType_void] = InvalidRegisters; 291 TypeToRegisterSet[IceType_void] = InvalidRegisters;
304 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; 292 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
305 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; 293 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
306 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 294 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
307 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 295 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
308 TypeToRegisterSet[IceType_i64] = IntegerRegisters; 296 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
309 TypeToRegisterSet[IceType_f32] = FloatRegisters; 297 TypeToRegisterSet[IceType_f32] = FloatRegisters;
310 TypeToRegisterSet[IceType_f64] = FloatRegisters; 298 TypeToRegisterSet[IceType_f64] = FloatRegisters;
311 TypeToRegisterSet[IceType_v4i1] = VectorRegisters; 299 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
312 TypeToRegisterSet[IceType_v8i1] = VectorRegisters; 300 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
(...skipping 27 matching lines...) Expand all
340 328
341 // Find read-modify-write opportunities. Do this after address mode 329 // Find read-modify-write opportunities. Do this after address mode
342 // optimization so that doAddressOpt() doesn't need to be applied to RMW 330 // optimization so that doAddressOpt() doesn't need to be applied to RMW
343 // instructions as well. 331 // instructions as well.
344 findRMW(); 332 findRMW();
345 Func->dump("After RMW transform"); 333 Func->dump("After RMW transform");
346 334
347 // Argument lowering 335 // Argument lowering
348 Func->doArgLowering(); 336 Func->doArgLowering();
349 337
350 // Target lowering. This requires liveness analysis for some parts 338 // Target lowering. This requires liveness analysis for some parts of the
351 // of the lowering decisions, such as compare/branch fusing. If 339 // lowering decisions, such as compare/branch fusing. If non-lightweight
352 // non-lightweight liveness analysis is used, the instructions need 340 // liveness analysis is used, the instructions need to be renumbered first
353 // to be renumbered first. TODO: This renumbering should only be 341 // TODO: This renumbering should only be necessary if we're actually
354 // necessary if we're actually calculating live intervals, which we 342 // calculating live intervals, which we only do for register allocation.
355 // only do for register allocation.
356 Func->renumberInstructions(); 343 Func->renumberInstructions();
357 if (Func->hasError()) 344 if (Func->hasError())
358 return; 345 return;
359 346
360 // TODO: It should be sufficient to use the fastest liveness 347 // TODO: It should be sufficient to use the fastest liveness calculation, i.e.
361 // calculation, i.e. livenessLightweight(). However, for some 348 // livenessLightweight(). However, for some reason that slows down the rest
362 // reason that slows down the rest of the translation. Investigate. 349 // of the translation. Investigate.
363 Func->liveness(Liveness_Basic); 350 Func->liveness(Liveness_Basic);
364 if (Func->hasError()) 351 if (Func->hasError())
365 return; 352 return;
366 Func->dump("After x86 address mode opt"); 353 Func->dump("After x86 address mode opt");
367 354
368 // Disable constant blinding or pooling for load optimization. 355 // Disable constant blinding or pooling for load optimization.
369 { 356 {
370 BoolFlagSaver B(RandomizationPoolingPaused, true); 357 BoolFlagSaver B(RandomizationPoolingPaused, true);
371 doLoadOpt(); 358 doLoadOpt();
372 } 359 }
373 Func->genCode(); 360 Func->genCode();
374 if (Func->hasError()) 361 if (Func->hasError())
375 return; 362 return;
376 Func->dump("After x86 codegen"); 363 Func->dump("After x86 codegen");
377 364
378 // Register allocation. This requires instruction renumbering and 365 // Register allocation. This requires instruction renumbering and full
379 // full liveness analysis. 366 // liveness analysis.
380 Func->renumberInstructions(); 367 Func->renumberInstructions();
381 if (Func->hasError()) 368 if (Func->hasError())
382 return; 369 return;
383 Func->liveness(Liveness_Intervals); 370 Func->liveness(Liveness_Intervals);
384 if (Func->hasError()) 371 if (Func->hasError())
385 return; 372 return;
386 // Validate the live range computations. The expensive validation 373 // Validate the live range computations. The expensive validation call is
387 // call is deliberately only made when assertions are enabled. 374 // deliberately only made when assertions are enabled.
388 assert(Func->validateLiveness()); 375 assert(Func->validateLiveness());
389 // The post-codegen dump is done here, after liveness analysis and 376 // The post-codegen dump is done here, after liveness analysis and associated
390 // associated cleanup, to make the dump cleaner and more useful. 377 // cleanup, to make the dump cleaner and more useful.
391 Func->dump("After initial x8632 codegen"); 378 Func->dump("After initial x8632 codegen");
392 Func->getVMetadata()->init(VMK_All); 379 Func->getVMetadata()->init(VMK_All);
393 regAlloc(RAK_Global); 380 regAlloc(RAK_Global);
394 if (Func->hasError()) 381 if (Func->hasError())
395 return; 382 return;
396 Func->dump("After linear scan regalloc"); 383 Func->dump("After linear scan regalloc");
397 384
398 if (Ctx->getFlags().getPhiEdgeSplit()) { 385 if (Ctx->getFlags().getPhiEdgeSplit()) {
399 // We need to pause constant blinding or pooling during advanced 386 // We need to pause constant blinding or pooling during advanced phi
400 // phi lowering, unless the lowering assignment has a physical 387 // lowering, unless the lowering assignment has a physical register for the
401 // register for the dest Variable. 388 // dest Variable.
402 { 389 {
403 BoolFlagSaver B(RandomizationPoolingPaused, true); 390 BoolFlagSaver B(RandomizationPoolingPaused, true);
404 Func->advancedPhiLowering(); 391 Func->advancedPhiLowering();
405 } 392 }
406 Func->dump("After advanced Phi lowering"); 393 Func->dump("After advanced Phi lowering");
407 } 394 }
408 395
409 // Stack frame mapping. 396 // Stack frame mapping.
410 Func->genFrame(); 397 Func->genFrame();
411 if (Func->hasError()) 398 if (Func->hasError())
412 return; 399 return;
413 Func->dump("After stack frame mapping"); 400 Func->dump("After stack frame mapping");
414 401
415 Func->contractEmptyNodes(); 402 Func->contractEmptyNodes();
416 Func->reorderNodes(); 403 Func->reorderNodes();
417 404
418 // Branch optimization. This needs to be done just before code 405 // Branch optimization. This needs to be done just before code emission. In
419 // emission. In particular, no transformations that insert or 406 // particular, no transformations that insert or reorder CfgNodes should be
420 // reorder CfgNodes should be done after branch optimization. We go 407 // done after branch optimization. We go ahead and do it before nop insertion
421 // ahead and do it before nop insertion to reduce the amount of work 408 // to reduce the amount of work needed for searching for opportunities.
jvoung (off chromium) 2015/07/07 00:00:18 extra space between "work" and "needed"
John 2015/07/07 15:12:18 Done.
422 // needed for searching for opportunities.
423 Func->doBranchOpt(); 409 Func->doBranchOpt();
424 Func->dump("After branch optimization"); 410 Func->dump("After branch optimization");
425 411
426 // Nop insertion 412 // Nop insertion
427 if (Ctx->getFlags().shouldDoNopInsertion()) { 413 if (Ctx->getFlags().shouldDoNopInsertion()) {
428 Func->doNopInsertion(); 414 Func->doNopInsertion();
429 } 415 }
430 } 416 }
431 417
432 template <class Machine> void TargetX86Base<Machine>::translateOm1() { 418 template <class Machine> void TargetX86Base<Machine>::translateOm1() {
(...skipping 27 matching lines...) Expand all
460 Func->dump("After stack frame mapping"); 446 Func->dump("After stack frame mapping");
461 447
462 // Nop insertion 448 // Nop insertion
463 if (Ctx->getFlags().shouldDoNopInsertion()) { 449 if (Ctx->getFlags().shouldDoNopInsertion()) {
464 Func->doNopInsertion(); 450 Func->doNopInsertion();
465 } 451 }
466 } 452 }
467 453
468 bool canRMW(const InstArithmetic *Arith) { 454 bool canRMW(const InstArithmetic *Arith) {
469 Type Ty = Arith->getDest()->getType(); 455 Type Ty = Arith->getDest()->getType();
470 // X86 vector instructions write to a register and have no RMW 456 // X86 vector instructions write to a register and have no RMW option.
471 // option.
472 if (isVectorType(Ty)) 457 if (isVectorType(Ty))
473 return false; 458 return false;
474 bool isI64 = Ty == IceType_i64; 459 bool isI64 = Ty == IceType_i64;
475 460
476 switch (Arith->getOp()) { 461 switch (Arith->getOp()) {
477 // Not handled for lack of simple lowering: 462 // Not handled for lack of simple lowering:
478 // shift on i64 463 // shift on i64
479 // mul, udiv, urem, sdiv, srem, frem 464 // mul, udiv, urem, sdiv, srem, frem
480 // Not handled for lack of RMW instructions: 465 // Not handled for lack of RMW instructions:
481 // fadd, fsub, fmul, fdiv (also vector types) 466 // fadd, fsub, fmul, fdiv (also vector types)
482 default: 467 default:
483 return false; 468 return false;
484 case InstArithmetic::Add: 469 case InstArithmetic::Add:
485 case InstArithmetic::Sub: 470 case InstArithmetic::Sub:
486 case InstArithmetic::And: 471 case InstArithmetic::And:
487 case InstArithmetic::Or: 472 case InstArithmetic::Or:
488 case InstArithmetic::Xor: 473 case InstArithmetic::Xor:
489 return true; 474 return true;
490 case InstArithmetic::Shl: 475 case InstArithmetic::Shl:
491 case InstArithmetic::Lshr: 476 case InstArithmetic::Lshr:
492 case InstArithmetic::Ashr: 477 case InstArithmetic::Ashr:
493 return false; // TODO(stichnot): implement 478 return false; // TODO(stichnot): implement
494 return !isI64; 479 return !isI64;
495 } 480 }
496 } 481 }
497 482
483 template <class Machine>
498 bool isSameMemAddressOperand(const Operand *A, const Operand *B) { 484 bool isSameMemAddressOperand(const Operand *A, const Operand *B) {
499 if (A == B) 485 if (A == B)
500 return true; 486 return true;
501 if (auto *MemA = llvm::dyn_cast<OperandX8632Mem>(A)) { 487 if (auto *MemA = llvm::dyn_cast<
502 if (auto *MemB = llvm::dyn_cast<OperandX8632Mem>(B)) { 488 typename TargetX86Base<Machine>::Traits::X86OperandMem>(A)) {
489 if (auto *MemB = llvm::dyn_cast<
490 typename TargetX86Base<Machine>::Traits::X86OperandMem>(B)) {
503 return MemA->getBase() == MemB->getBase() && 491 return MemA->getBase() == MemB->getBase() &&
504 MemA->getOffset() == MemB->getOffset() && 492 MemA->getOffset() == MemB->getOffset() &&
505 MemA->getIndex() == MemB->getIndex() && 493 MemA->getIndex() == MemB->getIndex() &&
506 MemA->getShift() == MemB->getShift() && 494 MemA->getShift() == MemB->getShift() &&
507 MemA->getSegmentRegister() == MemB->getSegmentRegister(); 495 MemA->getSegmentRegister() == MemB->getSegmentRegister();
508 } 496 }
509 } 497 }
510 return false; 498 return false;
511 } 499 }
512 500
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
557 // instruction will be retained and later lowered. On the other 545 // instruction will be retained and later lowered. On the other
558 // hand, if the RMW instruction does not end x's live range, then 546 // hand, if the RMW instruction does not end x's live range, then
559 // the Store instruction must still be present, and therefore the 547 // the Store instruction must still be present, and therefore the
560 // RMW instruction is ignored during lowering because it is 548 // RMW instruction is ignored during lowering because it is
561 // redundant with the Store instruction. 549 // redundant with the Store instruction.
562 // 550 //
563 // Note that if "a" has further uses, the RMW transformation may 551 // Note that if "a" has further uses, the RMW transformation may
564 // still trigger, resulting in two loads and one store, which is 552 // still trigger, resulting in two loads and one store, which is
565 // worse than the original one load and one store. However, this is 553 // worse than the original one load and one store. However, this is
566 // probably rare, and caching probably keeps it just as fast. 554 // probably rare, and caching probably keeps it just as fast.
567 if (!isSameMemAddressOperand(Load->getSourceAddress(), 555 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
568 Store->getAddr())) 556 Store->getAddr()))
569 continue; 557 continue;
570 Operand *ArithSrcFromLoad = Arith->getSrc(0); 558 Operand *ArithSrcFromLoad = Arith->getSrc(0);
571 Operand *ArithSrcOther = Arith->getSrc(1); 559 Operand *ArithSrcOther = Arith->getSrc(1);
572 if (ArithSrcFromLoad != Load->getDest()) { 560 if (ArithSrcFromLoad != Load->getDest()) {
573 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) 561 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
574 continue; 562 continue;
575 std::swap(ArithSrcFromLoad, ArithSrcOther); 563 std::swap(ArithSrcFromLoad, ArithSrcOther);
576 } 564 }
577 if (Arith->getDest() != Store->getData()) 565 if (Arith->getDest() != Store->getData())
578 continue; 566 continue;
579 if (!canRMW(Arith)) 567 if (!canRMW(Arith))
580 continue; 568 continue;
581 if (Func->isVerbose(IceV_RMW)) { 569 if (Func->isVerbose(IceV_RMW)) {
582 Str << "Found RMW in " << Func->getFunctionName() << ":\n "; 570 Str << "Found RMW in " << Func->getFunctionName() << ":\n ";
583 Load->dump(Func); 571 Load->dump(Func);
584 Str << "\n "; 572 Str << "\n ";
585 Arith->dump(Func); 573 Arith->dump(Func);
586 Str << "\n "; 574 Str << "\n ";
587 Store->dump(Func); 575 Store->dump(Func);
588 Str << "\n"; 576 Str << "\n";
589 } 577 }
590 Variable *Beacon = Func->template makeVariable(IceType_i32); 578 Variable *Beacon = Func->template makeVariable(IceType_i32);
591 Beacon->setWeight(0); 579 Beacon->setWeight(0);
592 Store->setRmwBeacon(Beacon); 580 Store->setRmwBeacon(Beacon);
593 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon); 581 InstFakeDef *BeaconDef = InstFakeDef::create(Func, Beacon);
594 Node->getInsts().insert(I3, BeaconDef); 582 Node->getInsts().insert(I3, BeaconDef);
595 InstX8632FakeRMW *RMW = InstX8632FakeRMW::create( 583 typename Traits::Insts::FakeRMW *RMW =
596 Func, ArithSrcOther, Store->getAddr(), Beacon, Arith->getOp()); 584 Traits::Insts::FakeRMW::create(Func, ArithSrcOther,
jvoung (off chromium) 2015/07/07 00:00:18 I wonder if we should allow "auto" for results of
John 2015/07/07 15:12:18 Done.
585 Store->getAddr(), Beacon,
586 Arith->getOp());
597 Node->getInsts().insert(I3, RMW); 587 Node->getInsts().insert(I3, RMW);
598 } 588 }
599 } 589 }
600 } 590 }
601 } 591 }
602 } 592 }
603 } 593 }
604 594
605 // Converts a ConstantInteger32 operand into its constant value, or 595 // Converts a ConstantInteger32 operand into its constant value, or
606 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 596 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
713 } 703 }
714 Context.advanceCur(); 704 Context.advanceCur();
715 Context.advanceNext(); 705 Context.advanceNext();
716 } 706 }
717 } 707 }
718 Func->dump("After load optimization"); 708 Func->dump("After load optimization");
719 } 709 }
720 710
721 template <class Machine> 711 template <class Machine>
722 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) { 712 bool TargetX86Base<Machine>::doBranchOpt(Inst *I, const CfgNode *NextNode) {
723 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { 713 if (auto *Br = llvm::dyn_cast<typename Traits::Insts::Br>(I)) {
724 return Br->optimizeBranch(NextNode); 714 return Br->optimizeBranch(NextNode);
725 } 715 }
726 return false; 716 return false;
727 } 717 }
728 718
729 template <class Machine> 719 template <class Machine>
730 IceString TargetX86Base<Machine>::RegNames[] = {
731 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
732 frameptr, isI8, isInt, isFP) \
733 name,
734 REGX8632_TABLE
735 #undef X
736 };
737
738 template <class Machine>
739 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) { 720 Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {
740 if (Ty == IceType_void) 721 if (Ty == IceType_void)
741 Ty = IceType_i32; 722 Ty = IceType_i32;
742 if (PhysicalRegisters[Ty].empty()) 723 if (PhysicalRegisters[Ty].empty())
743 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 724 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
744 assert(RegNum < PhysicalRegisters[Ty].size()); 725 assert(RegNum < PhysicalRegisters[Ty].size());
745 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 726 Variable *Reg = PhysicalRegisters[Ty][RegNum];
746 if (Reg == nullptr) { 727 if (Reg == nullptr) {
747 Reg = Func->template makeVariable(Ty); 728 Reg = Func->template makeVariable(Ty);
748 Reg->setRegNum(RegNum); 729 Reg->setRegNum(RegNum);
749 PhysicalRegisters[Ty][RegNum] = Reg; 730 PhysicalRegisters[Ty][RegNum] = Reg;
750 // Specially mark esp as an "argument" so that it is considered 731 // Specially mark esp as an "argument" so that it is considered
751 // live upon function entry. 732 // live upon function entry.
752 if (RegNum == Traits::RegisterSet::Reg_esp) { 733 if (RegNum == Traits::RegisterSet::Reg_esp) {
753 Func->addImplicitArg(Reg); 734 Func->addImplicitArg(Reg);
754 Reg->setIgnoreLiveness(); 735 Reg->setIgnoreLiveness();
755 } 736 }
756 } 737 }
757 return Reg; 738 return Reg;
758 } 739 }
759 740
760 template <class Machine> 741 template <class Machine>
761 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { 742 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
762 assert(RegNum < Traits::RegisterSet::Reg_NUM); 743 return Traits::getRegName(RegNum, Ty);
763 static IceString RegNames8[] = {
764 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
765 frameptr, isI8, isInt, isFP) \
766 name8,
767 REGX8632_TABLE
768 #undef X
769 };
770 static IceString RegNames16[] = {
771 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
772 frameptr, isI8, isInt, isFP) \
773 name16,
774 REGX8632_TABLE
775 #undef X
776 };
777 switch (Ty) {
778 case IceType_i1:
779 case IceType_i8:
780 return RegNames8[RegNum];
781 case IceType_i16:
782 return RegNames16[RegNum];
783 default:
784 return RegNames[RegNum];
785 }
786 } 744 }
787 745
788 template <class Machine> 746 template <class Machine>
789 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const { 747 void TargetX86Base<Machine>::emitVariable(const Variable *Var) const {
790 Ostream &Str = Ctx->getStrEmit(); 748 Ostream &Str = Ctx->getStrEmit();
791 if (Var->hasReg()) { 749 if (Var->hasReg()) {
792 Str << "%" << getRegName(Var->getRegNum(), Var->getType()); 750 Str << "%" << getRegName(Var->getRegNum(), Var->getType());
793 return; 751 return;
794 } 752 }
795 if (Var->getWeight().isInf()) { 753 if (Var->getWeight().isInf()) {
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
876 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 834 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
877 return; 835 return;
878 } 836 }
879 if (isVectorType(Ty)) { 837 if (isVectorType(Ty)) {
880 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes); 838 InArgsSizeBytes = Traits::applyStackAlignment(InArgsSizeBytes);
881 } 839 }
882 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 840 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
883 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 841 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
884 if (Arg->hasReg()) { 842 if (Arg->hasReg()) {
885 assert(Ty != IceType_i64); 843 assert(Ty != IceType_i64);
886 OperandX8632Mem *Mem = OperandX8632Mem::create( 844 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
887 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 845 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
888 if (isVectorType(Arg->getType())) { 846 if (isVectorType(Arg->getType())) {
889 _movp(Arg, Mem); 847 _movp(Arg, Mem);
890 } else { 848 } else {
891 _mov(Arg, Mem); 849 _mov(Arg, Mem);
892 } 850 }
893 // This argument-copying instruction uses an explicit 851 // This argument-copying instruction uses an explicit
894 // OperandX8632Mem operand instead of a Variable, so its 852 // typename Traits::X86OperandMem operand instead of a Variable, so its
895 // fill-from-stack operation has to be tracked separately for 853 // fill-from-stack operation has to be tracked separately for
896 // statistics. 854 // statistics.
897 Ctx->statsUpdateFills(); 855 Ctx->statsUpdateFills();
898 } 856 }
899 } 857 }
900 858
901 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 859 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
902 return IceType_i32; 860 return IceType_i32;
903 } 861 }
904 862
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
957 // If there is a separate locals area, this specifies the alignment 915 // If there is a separate locals area, this specifies the alignment
958 // for it. 916 // for it.
959 uint32_t LocalsSlotsAlignmentBytes = 0; 917 uint32_t LocalsSlotsAlignmentBytes = 0;
960 // The entire spill locations area gets aligned to largest natural 918 // The entire spill locations area gets aligned to largest natural
961 // alignment of the variables that have a spill slot. 919 // alignment of the variables that have a spill slot.
962 uint32_t SpillAreaAlignmentBytes = 0; 920 uint32_t SpillAreaAlignmentBytes = 0;
963 // A spill slot linked to a variable with a stack slot should reuse 921 // A spill slot linked to a variable with a stack slot should reuse
964 // that stack slot. 922 // that stack slot.
965 std::function<bool(Variable *)> TargetVarHook = 923 std::function<bool(Variable *)> TargetVarHook =
966 [&VariablesLinkedToSpillSlots](Variable *Var) { 924 [&VariablesLinkedToSpillSlots](Variable *Var) {
967 if (SpillVariable *SpillVar = llvm::dyn_cast<SpillVariable>(Var)) { 925 if (auto *SpillVar =
926 llvm::dyn_cast<typename Traits::SpillVariable>(Var)) {
968 assert(Var->getWeight().isZero()); 927 assert(Var->getWeight().isZero());
969 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) { 928 if (SpillVar->getLinkedTo() && !SpillVar->getLinkedTo()->hasReg()) {
970 VariablesLinkedToSpillSlots.push_back(Var); 929 VariablesLinkedToSpillSlots.push_back(Var);
971 return true; 930 return true;
972 } 931 }
973 } 932 }
974 return false; 933 return false;
975 }; 934 };
976 935
977 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 936 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
1061 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 1020 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1062 } 1021 }
1063 1022
1064 // Fill in stack offsets for locals. 1023 // Fill in stack offsets for locals.
1065 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, 1024 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1066 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, 1025 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1067 IsEbpBasedFrame); 1026 IsEbpBasedFrame);
1068 // Assign stack offsets to variables that have been linked to spilled 1027 // Assign stack offsets to variables that have been linked to spilled
1069 // variables. 1028 // variables.
1070 for (Variable *Var : VariablesLinkedToSpillSlots) { 1029 for (Variable *Var : VariablesLinkedToSpillSlots) {
1071 Variable *Linked = (llvm::cast<SpillVariable>(Var))->getLinkedTo(); 1030 Variable *Linked =
1031 (llvm::cast<typename Traits::SpillVariable>(Var))->getLinkedTo();
1072 Var->setStackOffset(Linked->getStackOffset()); 1032 Var->setStackOffset(Linked->getStackOffset());
1073 } 1033 }
1074 this->HasComputedFrame = true; 1034 this->HasComputedFrame = true;
1075 1035
1076 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) { 1036 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1077 OstreamLocker L(Func->getContext()); 1037 OstreamLocker L(Func->getContext());
1078 Ostream &Str = Func->getContext()->getStrDump(); 1038 Ostream &Str = Func->getContext()->getStrDump();
1079 1039
1080 Str << "Stack layout:\n"; 1040 Str << "Stack layout:\n";
1081 uint32_t EspAdjustmentPaddingSize = 1041 uint32_t EspAdjustmentPaddingSize =
(...skipping 16 matching lines...) Expand all
1098 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes 1058 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1099 << " bytes\n" 1059 << " bytes\n"
1100 << " is ebp based = " << IsEbpBasedFrame << "\n"; 1060 << " is ebp based = " << IsEbpBasedFrame << "\n";
1101 } 1061 }
1102 } 1062 }
1103 1063
1104 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) { 1064 template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) {
1105 InstList &Insts = Node->getInsts(); 1065 InstList &Insts = Node->getInsts();
1106 InstList::reverse_iterator RI, E; 1066 InstList::reverse_iterator RI, E;
1107 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 1067 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1108 if (llvm::isa<InstX8632Ret>(*RI)) 1068 if (llvm::isa<typename Traits::Insts::Ret>(*RI))
1109 break; 1069 break;
1110 } 1070 }
1111 if (RI == E) 1071 if (RI == E)
1112 return; 1072 return;
1113 1073
1114 // Convert the reverse_iterator position into its corresponding 1074 // Convert the reverse_iterator position into its corresponding
1115 // (forward) iterator position. 1075 // (forward) iterator position.
1116 InstList::iterator InsertPoint = RI.base(); 1076 InstList::iterator InsertPoint = RI.base();
1117 --InsertPoint; 1077 --InsertPoint;
1118 Context.init(Node); 1078 Context.init(Node);
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
1208 return Operand; 1168 return Operand;
1209 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1169 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1210 split64(Var); 1170 split64(Var);
1211 return Var->getLo(); 1171 return Var->getLo();
1212 } 1172 }
1213 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1173 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1214 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1174 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1215 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue()))); 1175 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue())));
1216 return legalize(ConstInt); 1176 return legalize(ConstInt);
1217 } 1177 }
1218 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1178 if (typename Traits::X86OperandMem *Mem =
jvoung (off chromium) 2015/07/07 00:00:18 could declare as "auto *Mem" for this, since it's
John 2015/07/07 15:12:18 Done.
1219 OperandX8632Mem *MemOperand = OperandX8632Mem::create( 1179 llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
1180 typename Traits::X86OperandMem *MemOperand = Traits::X86OperandMem::create(
1220 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(), 1181 Func, IceType_i32, Mem->getBase(), Mem->getOffset(), Mem->getIndex(),
1221 Mem->getShift(), Mem->getSegmentRegister()); 1182 Mem->getShift(), Mem->getSegmentRegister());
1222 // Test if we should randomize or pool the offset, if so randomize it or 1183 // Test if we should randomize or pool the offset, if so randomize it or
1223 // pool it then create mem operand with the blinded/pooled constant. 1184 // pool it then create mem operand with the blinded/pooled constant.
1224 // Otherwise, return the mem operand as ordinary mem operand. 1185 // Otherwise, return the mem operand as ordinary mem operand.
1225 return legalize(MemOperand); 1186 return legalize(MemOperand);
1226 } 1187 }
1227 llvm_unreachable("Unsupported operand type"); 1188 llvm_unreachable("Unsupported operand type");
1228 return nullptr; 1189 return nullptr;
1229 } 1190 }
1230 1191
1231 template <class Machine> 1192 template <class Machine>
1232 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) { 1193 Operand *TargetX86Base<Machine>::hiOperand(Operand *Operand) {
1233 assert(Operand->getType() == IceType_i64 || 1194 assert(Operand->getType() == IceType_i64 ||
1234 Operand->getType() == IceType_f64); 1195 Operand->getType() == IceType_f64);
1235 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64) 1196 if (Operand->getType() != IceType_i64 && Operand->getType() != IceType_f64)
1236 return Operand; 1197 return Operand;
1237 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { 1198 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
1238 split64(Var); 1199 split64(Var);
1239 return Var->getHi(); 1200 return Var->getHi();
1240 } 1201 }
1241 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1202 if (ConstantInteger64 *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1242 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>( 1203 ConstantInteger32 *ConstInt = llvm::dyn_cast<ConstantInteger32>(
1243 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32))); 1204 Ctx->getConstantInt32(static_cast<int32_t>(Const->getValue() >> 32)));
1244 // check if we need to blind/pool the constant 1205 // check if we need to blind/pool the constant
1245 return legalize(ConstInt); 1206 return legalize(ConstInt);
1246 } 1207 }
1247 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { 1208 if (typename Traits::X86OperandMem *Mem =
jvoung (off chromium) 2015/07/07 00:00:18 auto
John 2015/07/07 15:12:18 Done.
1209 llvm::dyn_cast<typename Traits::X86OperandMem>(Operand)) {
1248 Constant *Offset = Mem->getOffset(); 1210 Constant *Offset = Mem->getOffset();
1249 if (Offset == nullptr) { 1211 if (Offset == nullptr) {
1250 Offset = Ctx->getConstantInt32(4); 1212 Offset = Ctx->getConstantInt32(4);
1251 } else if (ConstantInteger32 *IntOffset = 1213 } else if (ConstantInteger32 *IntOffset =
1252 llvm::dyn_cast<ConstantInteger32>(Offset)) { 1214 llvm::dyn_cast<ConstantInteger32>(Offset)) {
1253 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue()); 1215 Offset = Ctx->getConstantInt32(4 + IntOffset->getValue());
1254 } else if (ConstantRelocatable *SymOffset = 1216 } else if (ConstantRelocatable *SymOffset =
1255 llvm::dyn_cast<ConstantRelocatable>(Offset)) { 1217 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
1256 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4)); 1218 assert(!Utils::WouldOverflowAdd(SymOffset->getOffset(), 4));
1257 Offset = 1219 Offset =
1258 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(), 1220 Ctx->getConstantSym(4 + SymOffset->getOffset(), SymOffset->getName(),
1259 SymOffset->getSuppressMangling()); 1221 SymOffset->getSuppressMangling());
1260 } 1222 }
1261 OperandX8632Mem *MemOperand = OperandX8632Mem::create( 1223 typename Traits::X86OperandMem *MemOperand = Traits::X86OperandMem::create(
1262 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(), 1224 Func, IceType_i32, Mem->getBase(), Offset, Mem->getIndex(),
1263 Mem->getShift(), Mem->getSegmentRegister()); 1225 Mem->getShift(), Mem->getSegmentRegister());
1264 // Test if the Offset is an eligible i32 constants for randomization and 1226 // Test if the Offset is an eligible i32 constants for randomization and
1265 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem 1227 // pooling. Blind/pool it if it is. Otherwise return as oridinary mem
1266 // operand. 1228 // operand.
1267 return legalize(MemOperand); 1229 return legalize(MemOperand);
1268 } 1230 }
1269 llvm_unreachable("Unsupported operand type"); 1231 llvm_unreachable("Unsupported operand type");
1270 return nullptr; 1232 return nullptr;
1271 } 1233 }
1272 1234
1273 template <class Machine> 1235 template <class Machine>
1274 llvm::SmallBitVector 1236 llvm::SmallBitVector
1275 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 1237 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
1276 RegSetMask Exclude) const { 1238 RegSetMask Exclude) const {
1277 llvm::SmallBitVector Registers(Traits::RegisterSet::Reg_NUM); 1239 return Traits::getRegisterSet(Include, Exclude);
1278
1279 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
1280 frameptr, isI8, isInt, isFP) \
1281 if (scratch && (Include & RegSet_CallerSave)) \
1282 Registers[Traits::RegisterSet::val] = true; \
1283 if (preserved && (Include & RegSet_CalleeSave)) \
1284 Registers[Traits::RegisterSet::val] = true; \
1285 if (stackptr && (Include & RegSet_StackPointer)) \
1286 Registers[Traits::RegisterSet::val] = true; \
1287 if (frameptr && (Include & RegSet_FramePointer)) \
1288 Registers[Traits::RegisterSet::val] = true; \
1289 if (scratch && (Exclude & RegSet_CallerSave)) \
1290 Registers[Traits::RegisterSet::val] = false; \
1291 if (preserved && (Exclude & RegSet_CalleeSave)) \
1292 Registers[Traits::RegisterSet::val] = false; \
1293 if (stackptr && (Exclude & RegSet_StackPointer)) \
1294 Registers[Traits::RegisterSet::val] = false; \
1295 if (frameptr && (Exclude & RegSet_FramePointer)) \
1296 Registers[Traits::RegisterSet::val] = false;
1297
1298 REGX8632_TABLE
1299
1300 #undef X
1301
1302 return Registers;
1303 } 1240 }
1304 1241
1305 template <class Machine> 1242 template <class Machine>
1306 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 1243 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
1307 IsEbpBasedFrame = true; 1244 IsEbpBasedFrame = true;
1308 // Conservatively require the stack to be aligned. Some stack 1245 // Conservatively require the stack to be aligned. Some stack
1309 // adjustment operations implemented below assume that the stack is 1246 // adjustment operations implemented below assume that the stack is
1310 // aligned before the alloca. All the alloca code ensures that the 1247 // aligned before the alloca. All the alloca code ensures that the
1311 // stack alignment is preserved after the alloca. The stack alignment 1248 // stack alignment is preserved after the alloca. The stack alignment
1312 // restriction can be relaxed in some cases. 1249 // restriction can be relaxed in some cases.
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1415 return false; 1352 return false;
1416 // Limit the number of lea/shl operations for a single multiply, to 1353 // Limit the number of lea/shl operations for a single multiply, to
1417 // a somewhat arbitrary choice of 3. 1354 // a somewhat arbitrary choice of 3.
1418 const uint32_t MaxOpsForOptimizedMul = 3; 1355 const uint32_t MaxOpsForOptimizedMul = 3;
1419 if (CountOps > MaxOpsForOptimizedMul) 1356 if (CountOps > MaxOpsForOptimizedMul)
1420 return false; 1357 return false;
1421 _mov(T, Src0); 1358 _mov(T, Src0);
1422 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1359 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1423 for (uint32_t i = 0; i < Count9; ++i) { 1360 for (uint32_t i = 0; i < Count9; ++i) {
1424 const uint16_t Shift = 3; // log2(9-1) 1361 const uint16_t Shift = 3; // log2(9-1)
1425 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); 1362 _lea(T,
1363 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1426 _set_dest_nonkillable(); 1364 _set_dest_nonkillable();
1427 } 1365 }
1428 for (uint32_t i = 0; i < Count5; ++i) { 1366 for (uint32_t i = 0; i < Count5; ++i) {
1429 const uint16_t Shift = 2; // log2(5-1) 1367 const uint16_t Shift = 2; // log2(5-1)
1430 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); 1368 _lea(T,
1369 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1431 _set_dest_nonkillable(); 1370 _set_dest_nonkillable();
1432 } 1371 }
1433 for (uint32_t i = 0; i < Count3; ++i) { 1372 for (uint32_t i = 0; i < Count3; ++i) {
1434 const uint16_t Shift = 1; // log2(3-1) 1373 const uint16_t Shift = 1; // log2(3-1)
1435 _lea(T, OperandX8632Mem::create(Func, IceType_void, T, Zero, T, Shift)); 1374 _lea(T,
1375 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1436 _set_dest_nonkillable(); 1376 _set_dest_nonkillable();
1437 } 1377 }
1438 if (Count2) { 1378 if (Count2) {
1439 _shl(T, Ctx->getConstantInt(Ty, Count2)); 1379 _shl(T, Ctx->getConstantInt(Ty, Count2));
1440 } 1380 }
1441 if (Src1IsNegative) 1381 if (Src1IsNegative)
1442 _neg(T); 1382 _neg(T);
1443 _mov(Dest, T); 1383 _mov(Dest, T);
1444 return true; 1384 return true;
1445 } 1385 }
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
1593 // je L1 1533 // je L1
1594 // use(t3) 1534 // use(t3)
1595 // t3 = t2 1535 // t3 = t2
1596 // t2 = 0 1536 // t2 = 0
1597 // L1: 1537 // L1:
1598 // a.lo = t2 1538 // a.lo = t2
1599 // a.hi = t3 1539 // a.hi = t3
1600 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1540 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1601 Constant *BitTest = Ctx->getConstantInt32(0x20); 1541 Constant *BitTest = Ctx->getConstantInt32(0x20);
1602 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1542 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1603 InstX8632Label *Label = InstX8632Label::create(Func, this); 1543 typename Traits::Insts::Label *Label =
1544 Traits::Insts::Label::create(Func, this);
1604 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1545 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1605 _mov(T_2, Src0Lo); 1546 _mov(T_2, Src0Lo);
1606 _mov(T_3, Src0Hi); 1547 _mov(T_3, Src0Hi);
1607 _shld(T_3, T_2, T_1); 1548 _shld(T_3, T_2, T_1);
1608 _shl(T_2, T_1); 1549 _shl(T_2, T_1);
1609 _test(T_1, BitTest); 1550 _test(T_1, BitTest);
1610 _br(Traits::Cond::Br_e, Label); 1551 _br(Traits::Cond::Br_e, Label);
1611 // T_2 and T_3 are being assigned again because of the 1552 // T_2 and T_3 are being assigned again because of the
1612 // intra-block control flow, so we need the _mov_nonkillable 1553 // intra-block control flow, so we need the _mov_nonkillable
1613 // variant to avoid liveness problems. 1554 // variant to avoid liveness problems.
(...skipping 14 matching lines...) Expand all
1628 // je L1 1569 // je L1
1629 // use(t2) 1570 // use(t2)
1630 // t2 = t3 1571 // t2 = t3
1631 // t3 = 0 1572 // t3 = 0
1632 // L1: 1573 // L1:
1633 // a.lo = t2 1574 // a.lo = t2
1634 // a.hi = t3 1575 // a.hi = t3
1635 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1576 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1636 Constant *BitTest = Ctx->getConstantInt32(0x20); 1577 Constant *BitTest = Ctx->getConstantInt32(0x20);
1637 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1578 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1638 InstX8632Label *Label = InstX8632Label::create(Func, this); 1579 typename Traits::Insts::Label *Label =
1580 Traits::Insts::Label::create(Func, this);
1639 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1581 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1640 _mov(T_2, Src0Lo); 1582 _mov(T_2, Src0Lo);
1641 _mov(T_3, Src0Hi); 1583 _mov(T_3, Src0Hi);
1642 _shrd(T_2, T_3, T_1); 1584 _shrd(T_2, T_3, T_1);
1643 _shr(T_3, T_1); 1585 _shr(T_3, T_1);
1644 _test(T_1, BitTest); 1586 _test(T_1, BitTest);
1645 _br(Traits::Cond::Br_e, Label); 1587 _br(Traits::Cond::Br_e, Label);
1646 // T_2 and T_3 are being assigned again because of the 1588 // T_2 and T_3 are being assigned again because of the
1647 // intra-block control flow, so we need the _mov_nonkillable 1589 // intra-block control flow, so we need the _mov_nonkillable
1648 // variant to avoid liveness problems. 1590 // variant to avoid liveness problems.
(...skipping 14 matching lines...) Expand all
1663 // je L1 1605 // je L1
1664 // use(t2) 1606 // use(t2)
1665 // t2 = t3 1607 // t2 = t3
1666 // t3 = sar t3, 0x1f 1608 // t3 = sar t3, 0x1f
1667 // L1: 1609 // L1:
1668 // a.lo = t2 1610 // a.lo = t2
1669 // a.hi = t3 1611 // a.hi = t3
1670 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr; 1612 Variable *T_1 = nullptr, *T_2 = nullptr, *T_3 = nullptr;
1671 Constant *BitTest = Ctx->getConstantInt32(0x20); 1613 Constant *BitTest = Ctx->getConstantInt32(0x20);
1672 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1614 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1673 InstX8632Label *Label = InstX8632Label::create(Func, this); 1615 typename Traits::Insts::Label *Label =
1616 Traits::Insts::Label::create(Func, this);
1674 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1617 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1675 _mov(T_2, Src0Lo); 1618 _mov(T_2, Src0Lo);
1676 _mov(T_3, Src0Hi); 1619 _mov(T_3, Src0Hi);
1677 _shrd(T_2, T_3, T_1); 1620 _shrd(T_2, T_3, T_1);
1678 _sar(T_3, T_1); 1621 _sar(T_3, T_1);
1679 _test(T_1, BitTest); 1622 _test(T_1, BitTest);
1680 _br(Traits::Cond::Br_e, Label); 1623 _br(Traits::Cond::Br_e, Label);
1681 // T_2 and T_3 are being assigned again because of the 1624 // T_2 and T_3 are being assigned again because of the
1682 // intra-block control flow, so T_2 needs the _mov_nonkillable 1625 // intra-block control flow, so T_2 needs the _mov_nonkillable
1683 // variant to avoid liveness problems. T_3 doesn't need special 1626 // variant to avoid liveness problems. T_3 doesn't need special
(...skipping 17 matching lines...) Expand all
1701 case InstArithmetic::Srem: 1644 case InstArithmetic::Srem:
1702 llvm_unreachable("Call-helper-involved instruction for i64 type \ 1645 llvm_unreachable("Call-helper-involved instruction for i64 type \
1703 should have already been handled before"); 1646 should have already been handled before");
1704 break; 1647 break;
1705 } 1648 }
1706 return; 1649 return;
1707 } 1650 }
1708 if (isVectorType(Dest->getType())) { 1651 if (isVectorType(Dest->getType())) {
1709 // TODO: Trap on integer divide and integer modulo by zero. 1652 // TODO: Trap on integer divide and integer modulo by zero.
1710 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1653 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1711 if (llvm::isa<OperandX8632Mem>(Src1)) 1654 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
1712 Src1 = legalizeToVar(Src1); 1655 Src1 = legalizeToVar(Src1);
1713 switch (Inst->getOp()) { 1656 switch (Inst->getOp()) {
1714 case InstArithmetic::_num: 1657 case InstArithmetic::_num:
1715 llvm_unreachable("Unknown arithmetic operator"); 1658 llvm_unreachable("Unknown arithmetic operator");
1716 break; 1659 break;
1717 case InstArithmetic::Add: { 1660 case InstArithmetic::Add: {
1718 Variable *T = makeReg(Dest->getType()); 1661 Variable *T = makeReg(Dest->getType());
1719 _movp(T, Src0); 1662 _movp(T, Src0);
1720 _padd(T, Src1); 1663 _padd(T, Src1);
1721 _movp(Dest, T); 1664 _movp(Dest, T);
(...skipping 478 matching lines...) Expand 10 before | Expand all | Expand 10 after
2200 XmmArgs.push_back(Arg); 2143 XmmArgs.push_back(Arg);
2201 } else { 2144 } else {
2202 StackArgs.push_back(Arg); 2145 StackArgs.push_back(Arg);
2203 if (isVectorType(Arg->getType())) { 2146 if (isVectorType(Arg->getType())) {
2204 ParameterAreaSizeBytes = 2147 ParameterAreaSizeBytes =
2205 Traits::applyStackAlignment(ParameterAreaSizeBytes); 2148 Traits::applyStackAlignment(ParameterAreaSizeBytes);
2206 } 2149 }
2207 Variable *esp = 2150 Variable *esp =
2208 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); 2151 Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp);
2209 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes); 2152 Constant *Loc = Ctx->getConstantInt32(ParameterAreaSizeBytes);
2210 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 2153 StackArgLocations.push_back(
2154 Traits::X86OperandMem::create(Func, Ty, esp, Loc));
2211 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 2155 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
2212 } 2156 }
2213 } 2157 }
2214 2158
2215 // Adjust the parameter area so that the stack is aligned. It is 2159 // Adjust the parameter area so that the stack is aligned. It is
2216 // assumed that the stack is already aligned at the start of the 2160 // assumed that the stack is already aligned at the start of the
2217 // calling sequence. 2161 // calling sequence.
2218 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); 2162 ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes);
2219 2163
2220 // Subtract the appropriate amount for the argument area. This also 2164 // Subtract the appropriate amount for the argument area. This also
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
2297 } else { 2241 } else {
2298 Variable *CallTargetVar = nullptr; 2242 Variable *CallTargetVar = nullptr;
2299 _mov(CallTargetVar, CallTarget); 2243 _mov(CallTargetVar, CallTarget);
2300 _bundle_lock(InstBundleLock::Opt_AlignToEnd); 2244 _bundle_lock(InstBundleLock::Opt_AlignToEnd);
2301 const SizeT BundleSize = 2245 const SizeT BundleSize =
2302 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); 2246 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes();
2303 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1))); 2247 _and(CallTargetVar, Ctx->getConstantInt32(~(BundleSize - 1)));
2304 CallTarget = CallTargetVar; 2248 CallTarget = CallTargetVar;
2305 } 2249 }
2306 } 2250 }
2307 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 2251 Inst *NewCall = Traits::Insts::Call::create(Func, ReturnReg, CallTarget);
2308 Context.insert(NewCall); 2252 Context.insert(NewCall);
2309 if (NeedSandboxing) 2253 if (NeedSandboxing)
2310 _bundle_unlock(); 2254 _bundle_unlock();
2311 if (ReturnRegHi) 2255 if (ReturnRegHi)
2312 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 2256 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
2313 2257
2314 // Add the appropriate offset to esp. The call instruction takes care 2258 // Add the appropriate offset to esp. The call instruction takes care
2315 // of resetting the stack offset during emission. 2259 // of resetting the stack offset during emission.
2316 if (ParameterAreaSizeBytes) { 2260 if (ParameterAreaSizeBytes) {
2317 Variable *esp = 2261 Variable *esp =
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
2524 _and(T, Ctx->getConstantInt1(1)); 2468 _and(T, Ctx->getConstantInt1(1));
2525 _mov(Dest, T); 2469 _mov(Dest, T);
2526 } 2470 }
2527 break; 2471 break;
2528 } 2472 }
2529 case InstCast::Fptrunc: 2473 case InstCast::Fptrunc:
2530 case InstCast::Fpext: { 2474 case InstCast::Fpext: {
2531 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2475 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2532 // t1 = cvt Src0RM; Dest = t1 2476 // t1 = cvt Src0RM; Dest = t1
2533 Variable *T = makeReg(Dest->getType()); 2477 Variable *T = makeReg(Dest->getType());
2534 _cvt(T, Src0RM, InstX8632Cvt::Float2float); 2478 _cvt(T, Src0RM, Traits::Insts::Cvt::Float2float);
2535 _mov(Dest, T); 2479 _mov(Dest, T);
2536 break; 2480 break;
2537 } 2481 }
2538 case InstCast::Fptosi: 2482 case InstCast::Fptosi:
2539 if (isVectorType(Dest->getType())) { 2483 if (isVectorType(Dest->getType())) {
2540 assert(Dest->getType() == IceType_v4i32 && 2484 assert(Dest->getType() == IceType_v4i32 &&
2541 Inst->getSrc(0)->getType() == IceType_v4f32); 2485 Inst->getSrc(0)->getType() == IceType_v4f32);
2542 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2486 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2543 if (llvm::isa<OperandX8632Mem>(Src0RM)) 2487 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2544 Src0RM = legalizeToVar(Src0RM); 2488 Src0RM = legalizeToVar(Src0RM);
2545 Variable *T = makeReg(Dest->getType()); 2489 Variable *T = makeReg(Dest->getType());
2546 _cvt(T, Src0RM, InstX8632Cvt::Tps2dq); 2490 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2547 _movp(Dest, T); 2491 _movp(Dest, T);
2548 } else if (Dest->getType() == IceType_i64) { 2492 } else if (Dest->getType() == IceType_i64) {
2549 // Use a helper for converting floating-point values to 64-bit 2493 // Use a helper for converting floating-point values to 64-bit
2550 // integers. SSE2 appears to have no way to convert from xmm 2494 // integers. SSE2 appears to have no way to convert from xmm
2551 // registers to something like the edx:eax register pair, and 2495 // registers to something like the edx:eax register pair, and
2552 // gcc and clang both want to use x87 instructions complete with 2496 // gcc and clang both want to use x87 instructions complete with
2553 // temporary manipulation of the status word. This helper is 2497 // temporary manipulation of the status word. This helper is
2554 // not needed for x86-64. 2498 // not needed for x86-64.
2555 split64(Dest); 2499 split64(Dest);
2556 const SizeT MaxSrcs = 1; 2500 const SizeT MaxSrcs = 1;
2557 Type SrcType = Inst->getSrc(0)->getType(); 2501 Type SrcType = Inst->getSrc(0)->getType();
2558 InstCall *Call = 2502 InstCall *Call =
2559 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2503 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2560 : H_fptosi_f64_i64, 2504 : H_fptosi_f64_i64,
2561 Dest, MaxSrcs); 2505 Dest, MaxSrcs);
2562 Call->addArg(Inst->getSrc(0)); 2506 Call->addArg(Inst->getSrc(0));
2563 lowerCall(Call); 2507 lowerCall(Call);
2564 } else { 2508 } else {
2565 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2509 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2566 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2510 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2567 Variable *T_1 = makeReg(IceType_i32); 2511 Variable *T_1 = makeReg(IceType_i32);
2568 Variable *T_2 = makeReg(Dest->getType()); 2512 Variable *T_2 = makeReg(Dest->getType());
2569 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2513 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2570 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2514 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2571 if (Dest->getType() == IceType_i1) 2515 if (Dest->getType() == IceType_i1)
2572 _and(T_2, Ctx->getConstantInt1(1)); 2516 _and(T_2, Ctx->getConstantInt1(1));
2573 _mov(Dest, T_2); 2517 _mov(Dest, T_2);
2574 } 2518 }
2575 break; 2519 break;
2576 case InstCast::Fptoui: 2520 case InstCast::Fptoui:
2577 if (isVectorType(Dest->getType())) { 2521 if (isVectorType(Dest->getType())) {
2578 assert(Dest->getType() == IceType_v4i32 && 2522 assert(Dest->getType() == IceType_v4i32 &&
2579 Inst->getSrc(0)->getType() == IceType_v4f32); 2523 Inst->getSrc(0)->getType() == IceType_v4f32);
(...skipping 18 matching lines...) Expand all
2598 } 2542 }
2599 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); 2543 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
2600 Call->addArg(Inst->getSrc(0)); 2544 Call->addArg(Inst->getSrc(0));
2601 lowerCall(Call); 2545 lowerCall(Call);
2602 return; 2546 return;
2603 } else { 2547 } else {
2604 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2548 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2605 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type 2549 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
2606 Variable *T_1 = makeReg(IceType_i32); 2550 Variable *T_1 = makeReg(IceType_i32);
2607 Variable *T_2 = makeReg(Dest->getType()); 2551 Variable *T_2 = makeReg(Dest->getType());
2608 _cvt(T_1, Src0RM, InstX8632Cvt::Tss2si); 2552 _cvt(T_1, Src0RM, Traits::Insts::Cvt::Tss2si);
2609 _mov(T_2, T_1); // T_1 and T_2 may have different integer types 2553 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
2610 if (Dest->getType() == IceType_i1) 2554 if (Dest->getType() == IceType_i1)
2611 _and(T_2, Ctx->getConstantInt1(1)); 2555 _and(T_2, Ctx->getConstantInt1(1));
2612 _mov(Dest, T_2); 2556 _mov(Dest, T_2);
2613 } 2557 }
2614 break; 2558 break;
2615 case InstCast::Sitofp: 2559 case InstCast::Sitofp:
2616 if (isVectorType(Dest->getType())) { 2560 if (isVectorType(Dest->getType())) {
2617 assert(Dest->getType() == IceType_v4f32 && 2561 assert(Dest->getType() == IceType_v4f32 &&
2618 Inst->getSrc(0)->getType() == IceType_v4i32); 2562 Inst->getSrc(0)->getType() == IceType_v4i32);
2619 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2563 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2620 if (llvm::isa<OperandX8632Mem>(Src0RM)) 2564 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2621 Src0RM = legalizeToVar(Src0RM); 2565 Src0RM = legalizeToVar(Src0RM);
2622 Variable *T = makeReg(Dest->getType()); 2566 Variable *T = makeReg(Dest->getType());
2623 _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); 2567 _cvt(T, Src0RM, Traits::Insts::Cvt::Dq2ps);
2624 _movp(Dest, T); 2568 _movp(Dest, T);
2625 } else if (Inst->getSrc(0)->getType() == IceType_i64) { 2569 } else if (Inst->getSrc(0)->getType() == IceType_i64) {
2626 // Use a helper for x86-32. 2570 // Use a helper for x86-32.
2627 const SizeT MaxSrcs = 1; 2571 const SizeT MaxSrcs = 1;
2628 Type DestType = Dest->getType(); 2572 Type DestType = Dest->getType();
2629 InstCall *Call = 2573 InstCall *Call =
2630 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32 2574 makeHelperCall(isFloat32Asserting32Or64(DestType) ? H_sitofp_i64_f32
2631 : H_sitofp_i64_f64, 2575 : H_sitofp_i64_f64,
2632 Dest, MaxSrcs); 2576 Dest, MaxSrcs);
2633 // TODO: Call the correct compiler-rt helper function. 2577 // TODO: Call the correct compiler-rt helper function.
2634 Call->addArg(Inst->getSrc(0)); 2578 Call->addArg(Inst->getSrc(0));
2635 lowerCall(Call); 2579 lowerCall(Call);
2636 return; 2580 return;
2637 } else { 2581 } else {
2638 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2582 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2639 // Sign-extend the operand. 2583 // Sign-extend the operand.
2640 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 2584 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
2641 Variable *T_1 = makeReg(IceType_i32); 2585 Variable *T_1 = makeReg(IceType_i32);
2642 Variable *T_2 = makeReg(Dest->getType()); 2586 Variable *T_2 = makeReg(Dest->getType());
2643 if (Src0RM->getType() == IceType_i32) 2587 if (Src0RM->getType() == IceType_i32)
2644 _mov(T_1, Src0RM); 2588 _mov(T_1, Src0RM);
2645 else 2589 else
2646 _movsx(T_1, Src0RM); 2590 _movsx(T_1, Src0RM);
2647 _cvt(T_2, T_1, InstX8632Cvt::Si2ss); 2591 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2648 _mov(Dest, T_2); 2592 _mov(Dest, T_2);
2649 } 2593 }
2650 break; 2594 break;
2651 case InstCast::Uitofp: { 2595 case InstCast::Uitofp: {
2652 Operand *Src0 = Inst->getSrc(0); 2596 Operand *Src0 = Inst->getSrc(0);
2653 if (isVectorType(Src0->getType())) { 2597 if (isVectorType(Src0->getType())) {
2654 assert(Dest->getType() == IceType_v4f32 && 2598 assert(Dest->getType() == IceType_v4f32 &&
2655 Src0->getType() == IceType_v4i32); 2599 Src0->getType() == IceType_v4i32);
2656 const SizeT MaxSrcs = 1; 2600 const SizeT MaxSrcs = 1;
2657 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2601 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
(...skipping 20 matching lines...) Expand all
2678 } else { 2622 } else {
2679 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2623 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2680 // Zero-extend the operand. 2624 // Zero-extend the operand.
2681 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 2625 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
2682 Variable *T_1 = makeReg(IceType_i32); 2626 Variable *T_1 = makeReg(IceType_i32);
2683 Variable *T_2 = makeReg(Dest->getType()); 2627 Variable *T_2 = makeReg(Dest->getType());
2684 if (Src0RM->getType() == IceType_i32) 2628 if (Src0RM->getType() == IceType_i32)
2685 _mov(T_1, Src0RM); 2629 _mov(T_1, Src0RM);
2686 else 2630 else
2687 _movzx(T_1, Src0RM); 2631 _movzx(T_1, Src0RM);
2688 _cvt(T_2, T_1, InstX8632Cvt::Si2ss); 2632 _cvt(T_2, T_1, Traits::Insts::Cvt::Si2ss);
2689 _mov(Dest, T_2); 2633 _mov(Dest, T_2);
2690 } 2634 }
2691 break; 2635 break;
2692 } 2636 }
2693 case InstCast::Bitcast: { 2637 case InstCast::Bitcast: {
2694 Operand *Src0 = Inst->getSrc(0); 2638 Operand *Src0 = Inst->getSrc(0);
2695 if (Dest->getType() == Src0->getType()) { 2639 if (Dest->getType() == Src0->getType()) {
2696 InstAssign *Assign = InstAssign::create(Func, Dest, Src0); 2640 InstAssign *Assign = InstAssign::create(Func, Dest, Src0);
2697 lowerAssign(Assign); 2641 lowerAssign(Assign);
2698 return; 2642 return;
(...skipping 21 matching lines...) Expand all
2720 (void)DestType; 2664 (void)DestType;
2721 assert((DestType == IceType_i32 && SrcType == IceType_f32) || 2665 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
2722 (DestType == IceType_f32 && SrcType == IceType_i32)); 2666 (DestType == IceType_f32 && SrcType == IceType_i32));
2723 // a.i32 = bitcast b.f32 ==> 2667 // a.i32 = bitcast b.f32 ==>
2724 // t.f32 = b.f32 2668 // t.f32 = b.f32
2725 // s.f32 = spill t.f32 2669 // s.f32 = spill t.f32
2726 // a.i32 = s.f32 2670 // a.i32 = s.f32
2727 Variable *T = nullptr; 2671 Variable *T = nullptr;
2728 // TODO: Should be able to force a spill setup by calling legalize() with 2672 // TODO: Should be able to force a spill setup by calling legalize() with
2729 // Legal_Mem and not Legal_Reg or Legal_Imm. 2673 // Legal_Mem and not Legal_Reg or Legal_Imm.
2730 SpillVariable *SpillVar = 2674 typename Traits::SpillVariable *SpillVar =
2731 Func->template makeVariable<SpillVariable>(SrcType); 2675 Func->template makeVariable<typename Traits::SpillVariable>(SrcType);
2732 SpillVar->setLinkedTo(Dest); 2676 SpillVar->setLinkedTo(Dest);
2733 Variable *Spill = SpillVar; 2677 Variable *Spill = SpillVar;
2734 Spill->setWeight(RegWeight::Zero); 2678 Spill->setWeight(RegWeight::Zero);
2735 _mov(T, Src0RM); 2679 _mov(T, Src0RM);
2736 _mov(Spill, T); 2680 _mov(Spill, T);
2737 _mov(Dest, Spill); 2681 _mov(Dest, Spill);
2738 } break; 2682 } break;
2739 case IceType_i64: { 2683 case IceType_i64: {
2740 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2684 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2741 assert(Src0RM->getType() == IceType_f64); 2685 assert(Src0RM->getType() == IceType_f64);
2742 // a.i64 = bitcast b.f64 ==> 2686 // a.i64 = bitcast b.f64 ==>
2743 // s.f64 = spill b.f64 2687 // s.f64 = spill b.f64
2744 // t_lo.i32 = lo(s.f64) 2688 // t_lo.i32 = lo(s.f64)
2745 // a_lo.i32 = t_lo.i32 2689 // a_lo.i32 = t_lo.i32
2746 // t_hi.i32 = hi(s.f64) 2690 // t_hi.i32 = hi(s.f64)
2747 // a_hi.i32 = t_hi.i32 2691 // a_hi.i32 = t_hi.i32
2748 Operand *SpillLo, *SpillHi; 2692 Operand *SpillLo, *SpillHi;
2749 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) { 2693 if (auto *Src0Var = llvm::dyn_cast<Variable>(Src0RM)) {
2750 SpillVariable *SpillVar = 2694 typename Traits::SpillVariable *SpillVar =
2751 Func->template makeVariable<SpillVariable>(IceType_f64); 2695 Func->template makeVariable<typename Traits::SpillVariable>(
2696 IceType_f64);
2752 SpillVar->setLinkedTo(Src0Var); 2697 SpillVar->setLinkedTo(Src0Var);
2753 Variable *Spill = SpillVar; 2698 Variable *Spill = SpillVar;
2754 Spill->setWeight(RegWeight::Zero); 2699 Spill->setWeight(RegWeight::Zero);
2755 _movq(Spill, Src0RM); 2700 _movq(Spill, Src0RM);
2756 SpillLo = VariableSplit::create(Func, Spill, VariableSplit::Low); 2701 SpillLo = Traits::VariableSplit::create(Func, Spill,
2757 SpillHi = VariableSplit::create(Func, Spill, VariableSplit::High); 2702 Traits::VariableSplit::Low);
2703 SpillHi = Traits::VariableSplit::create(Func, Spill,
2704 Traits::VariableSplit::High);
2758 } else { 2705 } else {
2759 SpillLo = loOperand(Src0RM); 2706 SpillLo = loOperand(Src0RM);
2760 SpillHi = hiOperand(Src0RM); 2707 SpillHi = hiOperand(Src0RM);
2761 } 2708 }
2762 2709
2763 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2710 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2764 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2711 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2765 Variable *T_Lo = makeReg(IceType_i32); 2712 Variable *T_Lo = makeReg(IceType_i32);
2766 Variable *T_Hi = makeReg(IceType_i32); 2713 Variable *T_Hi = makeReg(IceType_i32);
2767 2714
2768 _mov(T_Lo, SpillLo); 2715 _mov(T_Lo, SpillLo);
2769 _mov(DestLo, T_Lo); 2716 _mov(DestLo, T_Lo);
2770 _mov(T_Hi, SpillHi); 2717 _mov(T_Hi, SpillHi);
2771 _mov(DestHi, T_Hi); 2718 _mov(DestHi, T_Hi);
2772 } break; 2719 } break;
2773 case IceType_f64: { 2720 case IceType_f64: {
2774 Src0 = legalize(Src0); 2721 Src0 = legalize(Src0);
2775 assert(Src0->getType() == IceType_i64); 2722 assert(Src0->getType() == IceType_i64);
2776 if (llvm::isa<OperandX8632Mem>(Src0)) { 2723 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2777 Variable *T = Func->template makeVariable(Dest->getType()); 2724 Variable *T = Func->template makeVariable(Dest->getType());
2778 _movq(T, Src0); 2725 _movq(T, Src0);
2779 _movq(Dest, T); 2726 _movq(Dest, T);
2780 break; 2727 break;
2781 } 2728 }
2782 // a.f64 = bitcast b.i64 ==> 2729 // a.f64 = bitcast b.i64 ==>
2783 // t_lo.i32 = b_lo.i32 2730 // t_lo.i32 = b_lo.i32
2784 // FakeDef(s.f64) 2731 // FakeDef(s.f64)
2785 // lo(s.f64) = t_lo.i32 2732 // lo(s.f64) = t_lo.i32
2786 // t_hi.i32 = b_hi.i32 2733 // t_hi.i32 = b_hi.i32
2787 // hi(s.f64) = t_hi.i32 2734 // hi(s.f64) = t_hi.i32
2788 // a.f64 = s.f64 2735 // a.f64 = s.f64
2789 SpillVariable *SpillVar = 2736 typename Traits::SpillVariable *SpillVar =
2790 Func->template makeVariable<SpillVariable>(IceType_f64); 2737 Func->template makeVariable<typename Traits::SpillVariable>(
2738 IceType_f64);
2791 SpillVar->setLinkedTo(Dest); 2739 SpillVar->setLinkedTo(Dest);
2792 Variable *Spill = SpillVar; 2740 Variable *Spill = SpillVar;
2793 Spill->setWeight(RegWeight::Zero); 2741 Spill->setWeight(RegWeight::Zero);
2794 2742
2795 Variable *T_Lo = nullptr, *T_Hi = nullptr; 2743 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2796 VariableSplit *SpillLo = 2744 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2797 VariableSplit::create(Func, Spill, VariableSplit::Low); 2745 Func, Spill, Traits::VariableSplit::Low);
2798 VariableSplit *SpillHi = 2746 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2799 VariableSplit::create(Func, Spill, VariableSplit::High); 2747 Func, Spill, Traits::VariableSplit::High);
2800 _mov(T_Lo, loOperand(Src0)); 2748 _mov(T_Lo, loOperand(Src0));
2801 // Technically, the Spill is defined after the _store happens, but 2749 // Technically, the Spill is defined after the _store happens, but
2802 // SpillLo is considered a "use" of Spill so define Spill before it 2750 // SpillLo is considered a "use" of Spill so define Spill before it
2803 // is used. 2751 // is used.
2804 Context.insert(InstFakeDef::create(Func, Spill)); 2752 Context.insert(InstFakeDef::create(Func, Spill));
2805 _store(T_Lo, SpillLo); 2753 _store(T_Lo, SpillLo);
2806 _mov(T_Hi, hiOperand(Src0)); 2754 _mov(T_Hi, hiOperand(Src0));
2807 _store(T_Hi, SpillHi); 2755 _store(T_Hi, SpillHi);
2808 _movq(Dest, Spill); 2756 _movq(Dest, Spill);
2809 } break; 2757 } break;
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
2889 // Spill the value to a stack slot and do the extraction in memory. 2837 // Spill the value to a stack slot and do the extraction in memory.
2890 // 2838 //
2891 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2839 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2892 // support for legalizing to mem is implemented. 2840 // support for legalizing to mem is implemented.
2893 Variable *Slot = Func->template makeVariable(Ty); 2841 Variable *Slot = Func->template makeVariable(Ty);
2894 Slot->setWeight(RegWeight::Zero); 2842 Slot->setWeight(RegWeight::Zero);
2895 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2843 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
2896 2844
2897 // Compute the location of the element in memory. 2845 // Compute the location of the element in memory.
2898 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 2846 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2899 OperandX8632Mem *Loc = 2847 typename Traits::X86OperandMem *Loc =
2900 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 2848 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2901 _mov(ExtractedElementR, Loc); 2849 _mov(ExtractedElementR, Loc);
2902 } 2850 }
2903 2851
2904 if (ElementTy == IceType_i1) { 2852 if (ElementTy == IceType_i1) {
2905 // Truncate extracted integers to i1s if necessary. 2853 // Truncate extracted integers to i1s if necessary.
2906 Variable *T = makeReg(IceType_i1); 2854 Variable *T = makeReg(IceType_i1);
2907 InstCast *Cast = 2855 InstCast *Cast =
2908 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR); 2856 InstCast::create(Func, InstCast::Trunc, T, ExtractedElementR);
2909 lowerCast(Cast); 2857 lowerCast(Cast);
(...skipping 25 matching lines...) Expand all
2935 Variable *T = nullptr; 2883 Variable *T = nullptr;
2936 2884
2937 if (Condition == InstFcmp::True) { 2885 if (Condition == InstFcmp::True) {
2938 // makeVectorOfOnes() requires an integer vector type. 2886 // makeVectorOfOnes() requires an integer vector type.
2939 T = makeVectorOfMinusOnes(IceType_v4i32); 2887 T = makeVectorOfMinusOnes(IceType_v4i32);
2940 } else if (Condition == InstFcmp::False) { 2888 } else if (Condition == InstFcmp::False) {
2941 T = makeVectorOfZeros(Dest->getType()); 2889 T = makeVectorOfZeros(Dest->getType());
2942 } else { 2890 } else {
2943 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2891 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2944 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2892 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2945 if (llvm::isa<OperandX8632Mem>(Src1RM)) 2893 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
2946 Src1RM = legalizeToVar(Src1RM); 2894 Src1RM = legalizeToVar(Src1RM);
2947 2895
2948 switch (Condition) { 2896 switch (Condition) {
2949 default: { 2897 default: {
2950 typename Traits::Cond::CmppsCond Predicate = 2898 typename Traits::Cond::CmppsCond Predicate =
2951 Traits::TableFcmp[Index].Predicate; 2899 Traits::TableFcmp[Index].Predicate;
2952 assert(Predicate != Traits::Cond::Cmpps_Invalid); 2900 assert(Predicate != Traits::Cond::Cmpps_Invalid);
2953 T = makeReg(Src0RM->getType()); 2901 T = makeReg(Src0RM->getType());
2954 _movp(T, Src0RM); 2902 _movp(T, Src0RM);
2955 _cmpps(T, Src1RM, Predicate); 2903 _cmpps(T, Src1RM, Predicate);
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
3010 _ucomiss(T, Src1RM); 2958 _ucomiss(T, Src1RM);
3011 if (!HasC2) { 2959 if (!HasC2) {
3012 assert(Traits::TableFcmp[Index].Default); 2960 assert(Traits::TableFcmp[Index].Default);
3013 _setcc(Dest, Traits::TableFcmp[Index].C1); 2961 _setcc(Dest, Traits::TableFcmp[Index].C1);
3014 return; 2962 return;
3015 } 2963 }
3016 } 2964 }
3017 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default); 2965 Constant *Default = Ctx->getConstantInt32(Traits::TableFcmp[Index].Default);
3018 _mov(Dest, Default); 2966 _mov(Dest, Default);
3019 if (HasC1) { 2967 if (HasC1) {
3020 InstX8632Label *Label = InstX8632Label::create(Func, this); 2968 typename Traits::Insts::Label *Label =
2969 Traits::Insts::Label::create(Func, this);
3021 _br(Traits::TableFcmp[Index].C1, Label); 2970 _br(Traits::TableFcmp[Index].C1, Label);
3022 if (HasC2) { 2971 if (HasC2) {
3023 _br(Traits::TableFcmp[Index].C2, Label); 2972 _br(Traits::TableFcmp[Index].C2, Label);
3024 } 2973 }
3025 Constant *NonDefault = 2974 Constant *NonDefault =
3026 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default); 2975 Ctx->getConstantInt32(!Traits::TableFcmp[Index].Default);
3027 _mov_nonkillable(Dest, NonDefault); 2976 _mov_nonkillable(Dest, NonDefault);
3028 Context.insert(Label); 2977 Context.insert(Label);
3029 } 2978 }
3030 } 2979 }
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
3083 Src0RM = T0; 3032 Src0RM = T0;
3084 Src1RM = T1; 3033 Src1RM = T1;
3085 } 3034 }
3086 3035
3087 Variable *T = makeReg(Ty); 3036 Variable *T = makeReg(Ty);
3088 switch (Condition) { 3037 switch (Condition) {
3089 default: 3038 default:
3090 llvm_unreachable("unexpected condition"); 3039 llvm_unreachable("unexpected condition");
3091 break; 3040 break;
3092 case InstIcmp::Eq: { 3041 case InstIcmp::Eq: {
3093 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3042 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3094 Src1RM = legalizeToVar(Src1RM); 3043 Src1RM = legalizeToVar(Src1RM);
3095 _movp(T, Src0RM); 3044 _movp(T, Src0RM);
3096 _pcmpeq(T, Src1RM); 3045 _pcmpeq(T, Src1RM);
3097 } break; 3046 } break;
3098 case InstIcmp::Ne: { 3047 case InstIcmp::Ne: {
3099 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3048 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3100 Src1RM = legalizeToVar(Src1RM); 3049 Src1RM = legalizeToVar(Src1RM);
3101 _movp(T, Src0RM); 3050 _movp(T, Src0RM);
3102 _pcmpeq(T, Src1RM); 3051 _pcmpeq(T, Src1RM);
3103 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 3052 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3104 _pxor(T, MinusOne); 3053 _pxor(T, MinusOne);
3105 } break; 3054 } break;
3106 case InstIcmp::Ugt: 3055 case InstIcmp::Ugt:
3107 case InstIcmp::Sgt: { 3056 case InstIcmp::Sgt: {
3108 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3057 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3109 Src1RM = legalizeToVar(Src1RM); 3058 Src1RM = legalizeToVar(Src1RM);
3110 _movp(T, Src0RM); 3059 _movp(T, Src0RM);
3111 _pcmpgt(T, Src1RM); 3060 _pcmpgt(T, Src1RM);
3112 } break; 3061 } break;
3113 case InstIcmp::Uge: 3062 case InstIcmp::Uge:
3114 case InstIcmp::Sge: { 3063 case InstIcmp::Sge: {
3115 // !(Src1RM > Src0RM) 3064 // !(Src1RM > Src0RM)
3116 if (llvm::isa<OperandX8632Mem>(Src0RM)) 3065 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
3117 Src0RM = legalizeToVar(Src0RM); 3066 Src0RM = legalizeToVar(Src0RM);
3118 _movp(T, Src1RM); 3067 _movp(T, Src1RM);
3119 _pcmpgt(T, Src0RM); 3068 _pcmpgt(T, Src0RM);
3120 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 3069 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3121 _pxor(T, MinusOne); 3070 _pxor(T, MinusOne);
3122 } break; 3071 } break;
3123 case InstIcmp::Ult: 3072 case InstIcmp::Ult:
3124 case InstIcmp::Slt: { 3073 case InstIcmp::Slt: {
3125 if (llvm::isa<OperandX8632Mem>(Src0RM)) 3074 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
3126 Src0RM = legalizeToVar(Src0RM); 3075 Src0RM = legalizeToVar(Src0RM);
3127 _movp(T, Src1RM); 3076 _movp(T, Src1RM);
3128 _pcmpgt(T, Src0RM); 3077 _pcmpgt(T, Src0RM);
3129 } break; 3078 } break;
3130 case InstIcmp::Ule: 3079 case InstIcmp::Ule:
3131 case InstIcmp::Sle: { 3080 case InstIcmp::Sle: {
3132 // !(Src0RM > Src1RM) 3081 // !(Src0RM > Src1RM)
3133 if (llvm::isa<OperandX8632Mem>(Src1RM)) 3082 if (llvm::isa<typename Traits::X86OperandMem>(Src1RM))
3134 Src1RM = legalizeToVar(Src1RM); 3083 Src1RM = legalizeToVar(Src1RM);
3135 _movp(T, Src0RM); 3084 _movp(T, Src0RM);
3136 _pcmpgt(T, Src1RM); 3085 _pcmpgt(T, Src1RM);
3137 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 3086 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
3138 _pxor(T, MinusOne); 3087 _pxor(T, MinusOne);
3139 } break; 3088 } break;
3140 } 3089 }
3141 3090
3142 _movp(Dest, T); 3091 _movp(Dest, T);
3143 eliminateNextVectorSextInstruction(Dest); 3092 eliminateNextVectorSextInstruction(Dest);
3144 return; 3093 return;
3145 } 3094 }
3146 3095
3147 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: 3096 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
3148 if (Src0->getType() == IceType_i64) { 3097 if (Src0->getType() == IceType_i64) {
3149 InstIcmp::ICond Condition = Inst->getCondition(); 3098 InstIcmp::ICond Condition = Inst->getCondition();
3150 size_t Index = static_cast<size_t>(Condition); 3099 size_t Index = static_cast<size_t>(Condition);
3151 assert(Index < Traits::TableIcmp64Size); 3100 assert(Index < Traits::TableIcmp64Size);
3152 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 3101 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
3153 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 3102 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
3154 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 3103 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
3155 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 3104 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
3156 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3105 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3157 Constant *One = Ctx->getConstantInt32(1); 3106 Constant *One = Ctx->getConstantInt32(1);
3158 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); 3107 typename Traits::Insts::Label *LabelFalse =
3159 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); 3108 Traits::Insts::Label::create(Func, this);
3109 typename Traits::Insts::Label *LabelTrue =
3110 Traits::Insts::Label::create(Func, this);
3160 _mov(Dest, One); 3111 _mov(Dest, One);
3161 _cmp(Src0HiRM, Src1HiRI); 3112 _cmp(Src0HiRM, Src1HiRI);
3162 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None) 3113 if (Traits::TableIcmp64[Index].C1 != Traits::Cond::Br_None)
3163 _br(Traits::TableIcmp64[Index].C1, LabelTrue); 3114 _br(Traits::TableIcmp64[Index].C1, LabelTrue);
3164 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None) 3115 if (Traits::TableIcmp64[Index].C2 != Traits::Cond::Br_None)
3165 _br(Traits::TableIcmp64[Index].C2, LabelFalse); 3116 _br(Traits::TableIcmp64[Index].C2, LabelFalse);
3166 _cmp(Src0LoRM, Src1LoRI); 3117 _cmp(Src0LoRM, Src1LoRI);
3167 _br(Traits::TableIcmp64[Index].C3, LabelTrue); 3118 _br(Traits::TableIcmp64[Index].C3, LabelTrue);
3168 Context.insert(LabelFalse); 3119 Context.insert(LabelFalse);
3169 _mov_nonkillable(Dest, Zero); 3120 _mov_nonkillable(Dest, Zero);
(...skipping 115 matching lines...) Expand 10 before | Expand all | Expand 10 after
3285 // memory. 3236 // memory.
3286 // 3237 //
3287 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 3238 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
3288 // support for legalizing to mem is implemented. 3239 // support for legalizing to mem is implemented.
3289 Variable *Slot = Func->template makeVariable(Ty); 3240 Variable *Slot = Func->template makeVariable(Ty);
3290 Slot->setWeight(RegWeight::Zero); 3241 Slot->setWeight(RegWeight::Zero);
3291 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 3242 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
3292 3243
3293 // Compute the location of the position to insert in memory. 3244 // Compute the location of the position to insert in memory.
3294 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 3245 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
3295 OperandX8632Mem *Loc = 3246 typename Traits::X86OperandMem *Loc =
3296 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 3247 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
3297 _store(legalizeToVar(ElementToInsertNotLegalized), Loc); 3248 _store(legalizeToVar(ElementToInsertNotLegalized), Loc);
3298 3249
3299 Variable *T = makeReg(Ty); 3250 Variable *T = makeReg(Ty);
3300 _movp(T, Slot); 3251 _movp(T, Slot);
3301 _movp(Inst->getDest(), T); 3252 _movp(Inst->getDest(), T);
3302 } 3253 }
3303 } 3254 }
3304 3255
3305 template <class Machine> 3256 template <class Machine>
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
3375 return; 3326 return;
3376 } 3327 }
3377 Variable *Dest = Instr->getDest(); 3328 Variable *Dest = Instr->getDest();
3378 if (Dest->getType() == IceType_i64) { 3329 if (Dest->getType() == IceType_i64) {
3379 // Follow what GCC does and use a movq instead of what lowerLoad() 3330 // Follow what GCC does and use a movq instead of what lowerLoad()
3380 // normally does (split the load into two). 3331 // normally does (split the load into two).
3381 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 3332 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding
3382 // can't happen anyway, since this is x86-32 and integer arithmetic only 3333 // can't happen anyway, since this is x86-32 and integer arithmetic only
3383 // happens on 32-bit quantities. 3334 // happens on 32-bit quantities.
3384 Variable *T = makeReg(IceType_f64); 3335 Variable *T = makeReg(IceType_f64);
3385 OperandX8632Mem *Addr = formMemoryOperand(Instr->getArg(0), IceType_f64); 3336 typename Traits::X86OperandMem *Addr =
3337 formMemoryOperand(Instr->getArg(0), IceType_f64);
3386 _movq(T, Addr); 3338 _movq(T, Addr);
3387 // Then cast the bits back out of the XMM register to the i64 Dest. 3339 // Then cast the bits back out of the XMM register to the i64 Dest.
3388 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); 3340 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
3389 lowerCast(Cast); 3341 lowerCast(Cast);
3390 // Make sure that the atomic load isn't elided when unused. 3342 // Make sure that the atomic load isn't elided when unused.
3391 Context.insert(InstFakeUse::create(Func, Dest->getLo())); 3343 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
3392 Context.insert(InstFakeUse::create(Func, Dest->getHi())); 3344 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
3393 return; 3345 return;
3394 } 3346 }
3395 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); 3347 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
(...skipping 29 matching lines...) Expand all
3425 Operand *Value = Instr->getArg(0); 3377 Operand *Value = Instr->getArg(0);
3426 Operand *Ptr = Instr->getArg(1); 3378 Operand *Ptr = Instr->getArg(1);
3427 if (Value->getType() == IceType_i64) { 3379 if (Value->getType() == IceType_i64) {
3428 // Use a movq instead of what lowerStore() normally does 3380 // Use a movq instead of what lowerStore() normally does
3429 // (split the store into two), following what GCC does. 3381 // (split the store into two), following what GCC does.
3430 // Cast the bits from int -> to an xmm register first. 3382 // Cast the bits from int -> to an xmm register first.
3431 Variable *T = makeReg(IceType_f64); 3383 Variable *T = makeReg(IceType_f64);
3432 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); 3384 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
3433 lowerCast(Cast); 3385 lowerCast(Cast);
3434 // Then store XMM w/ a movq. 3386 // Then store XMM w/ a movq.
3435 OperandX8632Mem *Addr = formMemoryOperand(Ptr, IceType_f64); 3387 typename Traits::X86OperandMem *Addr =
3388 formMemoryOperand(Ptr, IceType_f64);
3436 _storeq(T, Addr); 3389 _storeq(T, Addr);
3437 _mfence(); 3390 _mfence();
3438 return; 3391 return;
3439 } 3392 }
3440 InstStore *Store = InstStore::create(Func, Value, Ptr); 3393 InstStore *Store = InstStore::create(Func, Value, Ptr);
3441 lowerStore(Store); 3394 lowerStore(Store);
3442 _mfence(); 3395 _mfence();
3443 return; 3396 return;
3444 } 3397 }
3445 case Intrinsics::Bswap: { 3398 case Intrinsics::Bswap: {
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
3527 return; 3480 return;
3528 } 3481 }
3529 case Intrinsics::Fabs: { 3482 case Intrinsics::Fabs: {
3530 Operand *Src = legalize(Instr->getArg(0)); 3483 Operand *Src = legalize(Instr->getArg(0));
3531 Type Ty = Src->getType(); 3484 Type Ty = Src->getType();
3532 Variable *Dest = Instr->getDest(); 3485 Variable *Dest = Instr->getDest();
3533 Variable *T = makeVectorOfFabsMask(Ty); 3486 Variable *T = makeVectorOfFabsMask(Ty);
3534 // The pand instruction operates on an m128 memory operand, so if 3487 // The pand instruction operates on an m128 memory operand, so if
3535 // Src is an f32 or f64, we need to make sure it's in a register. 3488 // Src is an f32 or f64, we need to make sure it's in a register.
3536 if (isVectorType(Ty)) { 3489 if (isVectorType(Ty)) {
3537 if (llvm::isa<OperandX8632Mem>(Src)) 3490 if (llvm::isa<typename Traits::X86OperandMem>(Src))
3538 Src = legalizeToVar(Src); 3491 Src = legalizeToVar(Src);
3539 } else { 3492 } else {
3540 Src = legalizeToVar(Src); 3493 Src = legalizeToVar(Src);
3541 } 3494 }
3542 _pand(T, Src); 3495 _pand(T, Src);
3543 if (isVectorType(Ty)) 3496 if (isVectorType(Ty))
3544 _movp(Dest, T); 3497 _movp(Dest, T);
3545 else 3498 else
3546 _mov(Dest, T); 3499 _mov(Dest, T);
3547 return; 3500 return;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
3582 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3); 3535 InstCall *Call = makeHelperCall(H_call_memset, nullptr, 3);
3583 Call->addArg(Instr->getArg(0)); 3536 Call->addArg(Instr->getArg(0));
3584 Call->addArg(ValExt); 3537 Call->addArg(ValExt);
3585 Call->addArg(Instr->getArg(2)); 3538 Call->addArg(Instr->getArg(2));
3586 lowerCall(Call); 3539 lowerCall(Call);
3587 return; 3540 return;
3588 } 3541 }
3589 case Intrinsics::NaClReadTP: { 3542 case Intrinsics::NaClReadTP: {
3590 if (Ctx->getFlags().getUseSandboxing()) { 3543 if (Ctx->getFlags().getUseSandboxing()) {
3591 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3544 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3592 Operand *Src = 3545 Operand *Src = Traits::X86OperandMem::create(
3593 OperandX8632Mem::create(Func, IceType_i32, nullptr, Zero, nullptr, 0, 3546 Func, IceType_i32, nullptr, Zero, nullptr, 0,
3594 OperandX8632Mem::SegReg_GS); 3547 Traits::X86OperandMem::SegReg_GS);
3595 Variable *Dest = Instr->getDest(); 3548 Variable *Dest = Instr->getDest();
3596 Variable *T = nullptr; 3549 Variable *T = nullptr;
3597 _mov(T, Src); 3550 _mov(T, Src);
3598 _mov(Dest, T); 3551 _mov(Dest, T);
3599 } else { 3552 } else {
3600 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0); 3553 InstCall *Call = makeHelperCall(H_call_read_tp, Instr->getDest(), 0);
3601 lowerCall(Call); 3554 lowerCall(Call);
3602 } 3555 }
3603 return; 3556 return;
3604 } 3557 }
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
3647 // Reserve the pre-colored registers first, before adding any more 3600 // Reserve the pre-colored registers first, before adding any more
3648 // infinite-weight variables from formMemoryOperand's legalization. 3601 // infinite-weight variables from formMemoryOperand's legalization.
3649 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3602 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3650 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3603 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3651 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3604 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3652 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3605 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3653 _mov(T_eax, loOperand(Expected)); 3606 _mov(T_eax, loOperand(Expected));
3654 _mov(T_edx, hiOperand(Expected)); 3607 _mov(T_edx, hiOperand(Expected));
3655 _mov(T_ebx, loOperand(Desired)); 3608 _mov(T_ebx, loOperand(Desired));
3656 _mov(T_ecx, hiOperand(Desired)); 3609 _mov(T_ecx, hiOperand(Desired));
3657 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3610 typename Traits::X86OperandMem *Addr =
3611 formMemoryOperand(Ptr, Expected->getType());
3658 const bool Locked = true; 3612 const bool Locked = true;
3659 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3613 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3660 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3614 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3661 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3615 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3662 _mov(DestLo, T_eax); 3616 _mov(DestLo, T_eax);
3663 _mov(DestHi, T_edx); 3617 _mov(DestHi, T_edx);
3664 return; 3618 return;
3665 } 3619 }
3666 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax); 3620 Variable *T_eax = makeReg(Expected->getType(), Traits::RegisterSet::Reg_eax);
3667 _mov(T_eax, Expected); 3621 _mov(T_eax, Expected);
3668 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Expected->getType()); 3622 typename Traits::X86OperandMem *Addr =
3623 formMemoryOperand(Ptr, Expected->getType());
3669 Variable *DesiredReg = legalizeToVar(Desired); 3624 Variable *DesiredReg = legalizeToVar(Desired);
3670 const bool Locked = true; 3625 const bool Locked = true;
3671 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3626 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3672 _mov(DestPrev, T_eax); 3627 _mov(DestPrev, T_eax);
3673 } 3628 }
3674 3629
3675 template <class Machine> 3630 template <class Machine>
3676 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest, 3631 bool TargetX86Base<Machine>::tryOptimizedCmpxchgCmpBr(Variable *Dest,
3677 Operand *PtrToMem, 3632 Operand *PtrToMem,
3678 Operand *Expected, 3633 Operand *Expected,
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
3760 return; 3715 return;
3761 case Intrinsics::AtomicAdd: { 3716 case Intrinsics::AtomicAdd: {
3762 if (Dest->getType() == IceType_i64) { 3717 if (Dest->getType() == IceType_i64) {
3763 // All the fall-through paths must set this to true, but use this 3718 // All the fall-through paths must set this to true, but use this
3764 // for asserting. 3719 // for asserting.
3765 NeedsCmpxchg = true; 3720 NeedsCmpxchg = true;
3766 Op_Lo = &TargetX86Base<Machine>::_add; 3721 Op_Lo = &TargetX86Base<Machine>::_add;
3767 Op_Hi = &TargetX86Base<Machine>::_adc; 3722 Op_Hi = &TargetX86Base<Machine>::_adc;
3768 break; 3723 break;
3769 } 3724 }
3770 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3725 typename Traits::X86OperandMem *Addr =
3726 formMemoryOperand(Ptr, Dest->getType());
3771 const bool Locked = true; 3727 const bool Locked = true;
3772 Variable *T = nullptr; 3728 Variable *T = nullptr;
3773 _mov(T, Val); 3729 _mov(T, Val);
3774 _xadd(Addr, T, Locked); 3730 _xadd(Addr, T, Locked);
3775 _mov(Dest, T); 3731 _mov(Dest, T);
3776 return; 3732 return;
3777 } 3733 }
3778 case Intrinsics::AtomicSub: { 3734 case Intrinsics::AtomicSub: {
3779 if (Dest->getType() == IceType_i64) { 3735 if (Dest->getType() == IceType_i64) {
3780 NeedsCmpxchg = true; 3736 NeedsCmpxchg = true;
3781 Op_Lo = &TargetX86Base<Machine>::_sub; 3737 Op_Lo = &TargetX86Base<Machine>::_sub;
3782 Op_Hi = &TargetX86Base<Machine>::_sbb; 3738 Op_Hi = &TargetX86Base<Machine>::_sbb;
3783 break; 3739 break;
3784 } 3740 }
3785 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3741 typename Traits::X86OperandMem *Addr =
3742 formMemoryOperand(Ptr, Dest->getType());
3786 const bool Locked = true; 3743 const bool Locked = true;
3787 Variable *T = nullptr; 3744 Variable *T = nullptr;
3788 _mov(T, Val); 3745 _mov(T, Val);
3789 _neg(T); 3746 _neg(T);
3790 _xadd(Addr, T, Locked); 3747 _xadd(Addr, T, Locked);
3791 _mov(Dest, T); 3748 _mov(Dest, T);
3792 return; 3749 return;
3793 } 3750 }
3794 case Intrinsics::AtomicOr: 3751 case Intrinsics::AtomicOr:
3795 // TODO(jvoung): If Dest is null or dead, then some of these 3752 // TODO(jvoung): If Dest is null or dead, then some of these
(...skipping 17 matching lines...) Expand all
3813 break; 3770 break;
3814 case Intrinsics::AtomicExchange: 3771 case Intrinsics::AtomicExchange:
3815 if (Dest->getType() == IceType_i64) { 3772 if (Dest->getType() == IceType_i64) {
3816 NeedsCmpxchg = true; 3773 NeedsCmpxchg = true;
3817 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values 3774 // NeedsCmpxchg, but no real Op_Lo/Op_Hi need to be done. The values
3818 // just need to be moved to the ecx and ebx registers. 3775 // just need to be moved to the ecx and ebx registers.
3819 Op_Lo = nullptr; 3776 Op_Lo = nullptr;
3820 Op_Hi = nullptr; 3777 Op_Hi = nullptr;
3821 break; 3778 break;
3822 } 3779 }
3823 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Dest->getType()); 3780 typename Traits::X86OperandMem *Addr =
3781 formMemoryOperand(Ptr, Dest->getType());
3824 Variable *T = nullptr; 3782 Variable *T = nullptr;
3825 _mov(T, Val); 3783 _mov(T, Val);
3826 _xchg(Addr, T); 3784 _xchg(Addr, T);
3827 _mov(Dest, T); 3785 _mov(Dest, T);
3828 return; 3786 return;
3829 } 3787 }
3830 // Otherwise, we need a cmpxchg loop. 3788 // Otherwise, we need a cmpxchg loop.
3831 (void)NeedsCmpxchg; 3789 (void)NeedsCmpxchg;
3832 assert(NeedsCmpxchg); 3790 assert(NeedsCmpxchg);
3833 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val); 3791 expandAtomicRMWAsCmpxchg(Op_Lo, Op_Hi, Dest, Ptr, Val);
(...skipping 27 matching lines...) Expand all
3861 // lock cmpxchg [ptr], <reg> 3819 // lock cmpxchg [ptr], <reg>
3862 // jne .LABEL 3820 // jne .LABEL
3863 // mov <dest>, eax 3821 // mov <dest>, eax
3864 // 3822 //
3865 // If Op_{Lo,Hi} are nullptr, then just copy the value. 3823 // If Op_{Lo,Hi} are nullptr, then just copy the value.
3866 Val = legalize(Val); 3824 Val = legalize(Val);
3867 Type Ty = Val->getType(); 3825 Type Ty = Val->getType();
3868 if (Ty == IceType_i64) { 3826 if (Ty == IceType_i64) {
3869 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx); 3827 Variable *T_edx = makeReg(IceType_i32, Traits::RegisterSet::Reg_edx);
3870 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax); 3828 Variable *T_eax = makeReg(IceType_i32, Traits::RegisterSet::Reg_eax);
3871 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); 3829 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3872 _mov(T_eax, loOperand(Addr)); 3830 _mov(T_eax, loOperand(Addr));
3873 _mov(T_edx, hiOperand(Addr)); 3831 _mov(T_edx, hiOperand(Addr));
3874 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx); 3832 Variable *T_ecx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ecx);
3875 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx); 3833 Variable *T_ebx = makeReg(IceType_i32, Traits::RegisterSet::Reg_ebx);
3876 InstX8632Label *Label = InstX8632Label::create(Func, this); 3834 typename Traits::Insts::Label *Label =
3835 Traits::Insts::Label::create(Func, this);
3877 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr; 3836 const bool IsXchg8b = Op_Lo == nullptr && Op_Hi == nullptr;
3878 if (!IsXchg8b) { 3837 if (!IsXchg8b) {
3879 Context.insert(Label); 3838 Context.insert(Label);
3880 _mov(T_ebx, T_eax); 3839 _mov(T_ebx, T_eax);
3881 (this->*Op_Lo)(T_ebx, loOperand(Val)); 3840 (this->*Op_Lo)(T_ebx, loOperand(Val));
3882 _mov(T_ecx, T_edx); 3841 _mov(T_ecx, T_edx);
3883 (this->*Op_Hi)(T_ecx, hiOperand(Val)); 3842 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3884 } else { 3843 } else {
3885 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. 3844 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3886 // It just needs the Val loaded into ebx and ecx. 3845 // It just needs the Val loaded into ebx and ecx.
(...skipping 21 matching lines...) Expand all
3908 } 3867 }
3909 // The address base (if any) is also reused in the loop. 3868 // The address base (if any) is also reused in the loop.
3910 if (Variable *Base = Addr->getBase()) 3869 if (Variable *Base = Addr->getBase())
3911 Context.insert(InstFakeUse::create(Func, Base)); 3870 Context.insert(InstFakeUse::create(Func, Base));
3912 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3871 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3913 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3872 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3914 _mov(DestLo, T_eax); 3873 _mov(DestLo, T_eax);
3915 _mov(DestHi, T_edx); 3874 _mov(DestHi, T_edx);
3916 return; 3875 return;
3917 } 3876 }
3918 OperandX8632Mem *Addr = formMemoryOperand(Ptr, Ty); 3877 typename Traits::X86OperandMem *Addr = formMemoryOperand(Ptr, Ty);
3919 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax); 3878 Variable *T_eax = makeReg(Ty, Traits::RegisterSet::Reg_eax);
3920 _mov(T_eax, Addr); 3879 _mov(T_eax, Addr);
3921 InstX8632Label *Label = InstX8632Label::create(Func, this); 3880 typename Traits::Insts::Label *Label =
3881 Traits::Insts::Label::create(Func, this);
3922 Context.insert(Label); 3882 Context.insert(Label);
3923 // We want to pick a different register for T than Eax, so don't use 3883 // We want to pick a different register for T than Eax, so don't use
3924 // _mov(T == nullptr, T_eax). 3884 // _mov(T == nullptr, T_eax).
3925 Variable *T = makeReg(Ty); 3885 Variable *T = makeReg(Ty);
3926 _mov(T, T_eax); 3886 _mov(T, T_eax);
3927 (this->*Op_Lo)(T, Val); 3887 (this->*Op_Lo)(T, Val);
3928 const bool Locked = true; 3888 const bool Locked = true;
3929 _cmpxchg(Addr, T_eax, T, Locked); 3889 _cmpxchg(Addr, T_eax, T, Locked);
3930 _br(Traits::Cond::Br_ne, Label); 3890 _br(Traits::Cond::Br_ne, Label);
3931 // If Val is a variable, model the extended live range of Val through 3891 // If Val is a variable, model the extended live range of Val through
(...skipping 320 matching lines...) Expand 10 before | Expand all | Expand 10 after
4252 // Index is Index=Var-Const ==> 4212 // Index is Index=Var-Const ==>
4253 // set Index=Var, Offset-=(Const<<Shift) 4213 // set Index=Var, Offset-=(Const<<Shift)
4254 4214
4255 // TODO: consider overflow issues with respect to Offset. 4215 // TODO: consider overflow issues with respect to Offset.
4256 // TODO: handle symbolic constants. 4216 // TODO: handle symbolic constants.
4257 } 4217 }
4258 } 4218 }
4259 4219
4260 template <class Machine> 4220 template <class Machine>
4261 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) { 4221 void TargetX86Base<Machine>::lowerLoad(const InstLoad *Load) {
4262 // A Load instruction can be treated the same as an Assign 4222 // A Load instruction can be treated the same as an Assign instruction, after
4263 // instruction, after the source operand is transformed into an 4223 // the source operand is transformed into an typename Traits::X86OperandMem
jvoung (off chromium) 2015/07/07 00:00:18 Could omit the "typename" within a comment? I supp
John 2015/07/07 15:12:18 Yes, it makes sense. It did not occur to me to /\
4264 // OperandX8632Mem operand. Note that the address mode 4224 // operand. Note that the address mode optimization already creates an
4265 // optimization already creates an OperandX8632Mem operand, so it 4225 // typename Traits::X86OperandMem operand, so it doesn't need another level of
4266 // doesn't need another level of transformation. 4226 // transformation.
4267 Variable *DestLoad = Load->getDest(); 4227 Variable *DestLoad = Load->getDest();
4268 Type Ty = DestLoad->getType(); 4228 Type Ty = DestLoad->getType();
4269 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 4229 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
4270 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 4230 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
4271 lowerAssign(Assign); 4231 lowerAssign(Assign);
4272 } 4232 }
4273 4233
4274 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { 4234 template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() {
4275 Inst *Inst = Context.getCur(); 4235 Inst *Inst = Context.getCur();
4276 Variable *Dest = Inst->getDest(); 4236 Variable *Dest = Inst->getDest();
4277 Operand *Addr = Inst->getSrc(0); 4237 Operand *Addr = Inst->getSrc(0);
4278 Variable *Index = nullptr; 4238 Variable *Index = nullptr;
4279 uint16_t Shift = 0; 4239 uint16_t Shift = 0;
4280 int32_t Offset = 0; // TODO: make Constant 4240 int32_t Offset = 0; // TODO: make Constant
4281 // Vanilla ICE load instructions should not use the segment registers, 4241 // Vanilla ICE load instructions should not use the segment registers, and
4282 // and computeAddressOpt only works at the level of Variables and Constants, 4242 // computeAddressOpt only works at the level of Variables and Constants, not
4283 // not other OperandX8632Mem, so there should be no mention of segment 4243 // other typename Traits::X86OperandMem, so there should be no mention of
jvoung (off chromium) 2015/07/07 00:00:18 similar, in a few places other than this, if you a
John 2015/07/07 15:12:18 Done.
John 2015/07/07 15:12:18 Done.
4284 // registers there either. 4244 // segment registers there either.
4285 const OperandX8632Mem::SegmentRegisters SegmentReg = 4245 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4286 OperandX8632Mem::DefaultSegment; 4246 Traits::X86OperandMem::DefaultSegment;
4287 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4247 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4288 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4248 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4289 if (Base && Addr != Base) { 4249 if (Base && Addr != Base) {
4290 Inst->setDeleted(); 4250 Inst->setDeleted();
4291 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4251 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4292 Addr = OperandX8632Mem::create(Func, Dest->getType(), Base, OffsetOp, Index, 4252 Addr = Traits::X86OperandMem::create(Func, Dest->getType(), Base, OffsetOp,
4293 Shift, SegmentReg); 4253 Index, Shift, SegmentReg);
4294 Context.insert(InstLoad::create(Func, Dest, Addr)); 4254 Context.insert(InstLoad::create(Func, Dest, Addr));
4295 } 4255 }
4296 } 4256 }
4297 4257
4298 template <class Machine> 4258 template <class Machine>
4299 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) { 4259 void TargetX86Base<Machine>::randomlyInsertNop(float Probability) {
4300 RandomNumberGeneratorWrapper RNG(Ctx->getRNG()); 4260 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
4301 if (RNG.getTrueWithProbability(Probability)) { 4261 if (RNG.getTrueWithProbability(Probability)) {
4302 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS)); 4262 _nop(RNG(Traits::X86_NUM_NOP_VARIANTS));
4303 } 4263 }
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
4430 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); 4390 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4431 } 4391 }
4432 assert(CmpOpnd0); 4392 assert(CmpOpnd0);
4433 assert(CmpOpnd1); 4393 assert(CmpOpnd1);
4434 4394
4435 _cmp(CmpOpnd0, CmpOpnd1); 4395 _cmp(CmpOpnd0, CmpOpnd1);
4436 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { 4396 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4437 // The cmov instruction doesn't allow 8-bit or FP operands, so 4397 // The cmov instruction doesn't allow 8-bit or FP operands, so
4438 // we need explicit control flow. 4398 // we need explicit control flow.
4439 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: 4399 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4440 InstX8632Label *Label = InstX8632Label::create(Func, this); 4400 typename Traits::Insts::Label *Label =
4401 Traits::Insts::Label::create(Func, this);
4441 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); 4402 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4442 _mov(Dest, SrcT); 4403 _mov(Dest, SrcT);
4443 _br(Cond, Label); 4404 _br(Cond, Label);
4444 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); 4405 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4445 _mov_nonkillable(Dest, SrcF); 4406 _mov_nonkillable(Dest, SrcF);
4446 Context.insert(Label); 4407 Context.insert(Label);
4447 return; 4408 return;
4448 } 4409 }
4449 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4410 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4450 // But if SrcT is immediate, we might be able to do better, as 4411 // But if SrcT is immediate, we might be able to do better, as
4451 // the cmov instruction doesn't allow an immediate operand: 4412 // the cmov instruction doesn't allow an immediate operand:
4452 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4413 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4453 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4414 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4454 std::swap(SrcT, SrcF); 4415 std::swap(SrcT, SrcF);
4455 Cond = InstX8632::getOppositeCondition(Cond); 4416 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4456 } 4417 }
4457 if (DestTy == IceType_i64) { 4418 if (DestTy == IceType_i64) {
4458 // Set the low portion. 4419 // Set the low portion.
4459 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4420 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
4460 Variable *TLo = nullptr; 4421 Variable *TLo = nullptr;
4461 Operand *SrcFLo = legalize(loOperand(SrcF)); 4422 Operand *SrcFLo = legalize(loOperand(SrcF));
4462 _mov(TLo, SrcFLo); 4423 _mov(TLo, SrcFLo);
4463 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem); 4424 Operand *SrcTLo = legalize(loOperand(SrcT), Legal_Reg | Legal_Mem);
4464 _cmov(TLo, SrcTLo, Cond); 4425 _cmov(TLo, SrcTLo, Cond);
4465 _mov(DestLo, TLo); 4426 _mov(DestLo, TLo);
(...skipping 14 matching lines...) Expand all
4480 _mov(T, SrcF); 4441 _mov(T, SrcF);
4481 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem); 4442 SrcT = legalize(SrcT, Legal_Reg | Legal_Mem);
4482 _cmov(T, SrcT, Cond); 4443 _cmov(T, SrcT, Cond);
4483 _mov(Dest, T); 4444 _mov(Dest, T);
4484 } 4445 }
4485 4446
4486 template <class Machine> 4447 template <class Machine>
4487 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) { 4448 void TargetX86Base<Machine>::lowerStore(const InstStore *Inst) {
4488 Operand *Value = Inst->getData(); 4449 Operand *Value = Inst->getData();
4489 Operand *Addr = Inst->getAddr(); 4450 Operand *Addr = Inst->getAddr();
4490 OperandX8632Mem *NewAddr = formMemoryOperand(Addr, Value->getType()); 4451 typename Traits::X86OperandMem *NewAddr =
4452 formMemoryOperand(Addr, Value->getType());
4491 Type Ty = NewAddr->getType(); 4453 Type Ty = NewAddr->getType();
4492 4454
4493 if (Ty == IceType_i64) { 4455 if (Ty == IceType_i64) {
4494 Value = legalize(Value); 4456 Value = legalize(Value);
4495 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm); 4457 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm);
4496 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm); 4458 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm);
4497 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); 4459 _store(ValueHi,
4498 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); 4460 llvm::cast<typename Traits::X86OperandMem>(hiOperand(NewAddr)));
4461 _store(ValueLo,
4462 llvm::cast<typename Traits::X86OperandMem>(loOperand(NewAddr)));
4499 } else if (isVectorType(Ty)) { 4463 } else if (isVectorType(Ty)) {
4500 _storep(legalizeToVar(Value), NewAddr); 4464 _storep(legalizeToVar(Value), NewAddr);
4501 } else { 4465 } else {
4502 Value = legalize(Value, Legal_Reg | Legal_Imm); 4466 Value = legalize(Value, Legal_Reg | Legal_Imm);
4503 _store(Value, NewAddr); 4467 _store(Value, NewAddr);
4504 } 4468 }
4505 } 4469 }
4506 4470
4507 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { 4471 template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() {
4508 InstStore *Inst = llvm::cast<InstStore>(Context.getCur()); 4472 InstStore *Inst = llvm::cast<InstStore>(Context.getCur());
4509 Operand *Data = Inst->getData(); 4473 Operand *Data = Inst->getData();
4510 Operand *Addr = Inst->getAddr(); 4474 Operand *Addr = Inst->getAddr();
4511 Variable *Index = nullptr; 4475 Variable *Index = nullptr;
4512 uint16_t Shift = 0; 4476 uint16_t Shift = 0;
4513 int32_t Offset = 0; // TODO: make Constant 4477 int32_t Offset = 0; // TODO: make Constant
4514 Variable *Base = llvm::dyn_cast<Variable>(Addr); 4478 Variable *Base = llvm::dyn_cast<Variable>(Addr);
4515 // Vanilla ICE store instructions should not use the segment registers, 4479 // Vanilla ICE store instructions should not use the segment registers,
4516 // and computeAddressOpt only works at the level of Variables and Constants, 4480 // and computeAddressOpt only works at the level of Variables and Constants,
4517 // not other OperandX8632Mem, so there should be no mention of segment 4481 // not other typename Traits::X86OperandMem, so there should be no mention of
4482 // segment
jvoung (off chromium) 2015/07/07 00:00:18 could fit rest of comment on this line too
John 2015/07/07 15:12:18 Done.
4518 // registers there either. 4483 // registers there either.
4519 const OperandX8632Mem::SegmentRegisters SegmentReg = 4484 const typename Traits::X86OperandMem::SegmentRegisters SegmentReg =
4520 OperandX8632Mem::DefaultSegment; 4485 Traits::X86OperandMem::DefaultSegment;
4521 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); 4486 computeAddressOpt(Func, Inst, Base, Index, Shift, Offset);
4522 if (Base && Addr != Base) { 4487 if (Base && Addr != Base) {
4523 Inst->setDeleted(); 4488 Inst->setDeleted();
4524 Constant *OffsetOp = Ctx->getConstantInt32(Offset); 4489 Constant *OffsetOp = Ctx->getConstantInt32(Offset);
4525 Addr = OperandX8632Mem::create(Func, Data->getType(), Base, OffsetOp, Index, 4490 Addr = Traits::X86OperandMem::create(Func, Data->getType(), Base, OffsetOp,
4526 Shift, SegmentReg); 4491 Index, Shift, SegmentReg);
4527 InstStore *NewStore = InstStore::create(Func, Data, Addr); 4492 InstStore *NewStore = InstStore::create(Func, Data, Addr);
4528 if (Inst->getDest()) 4493 if (Inst->getDest())
4529 NewStore->setRmwBeacon(Inst->getRmwBeacon()); 4494 NewStore->setRmwBeacon(Inst->getRmwBeacon());
4530 Context.insert(NewStore); 4495 Context.insert(NewStore);
4531 } 4496 }
4532 } 4497 }
4533 4498
4534 template <class Machine> 4499 template <class Machine>
4535 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) { 4500 void TargetX86Base<Machine>::lowerSwitch(const InstSwitch *Inst) {
4536 // This implements the most naive possible lowering. 4501 // This implements the most naive possible lowering.
4537 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default 4502 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
4538 Operand *Src0 = Inst->getComparison(); 4503 Operand *Src0 = Inst->getComparison();
4539 SizeT NumCases = Inst->getNumCases(); 4504 SizeT NumCases = Inst->getNumCases();
4540 if (Src0->getType() == IceType_i64) { 4505 if (Src0->getType() == IceType_i64) {
4541 Src0 = legalize(Src0); // get Base/Index into physical registers 4506 Src0 = legalize(Src0); // get Base/Index into physical registers
4542 Operand *Src0Lo = loOperand(Src0); 4507 Operand *Src0Lo = loOperand(Src0);
4543 Operand *Src0Hi = hiOperand(Src0); 4508 Operand *Src0Hi = hiOperand(Src0);
4544 if (NumCases >= 2) { 4509 if (NumCases >= 2) {
4545 Src0Lo = legalizeToVar(Src0Lo); 4510 Src0Lo = legalizeToVar(Src0Lo);
4546 Src0Hi = legalizeToVar(Src0Hi); 4511 Src0Hi = legalizeToVar(Src0Hi);
4547 } else { 4512 } else {
4548 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem); 4513 Src0Lo = legalize(Src0Lo, Legal_Reg | Legal_Mem);
4549 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem); 4514 Src0Hi = legalize(Src0Hi, Legal_Reg | Legal_Mem);
4550 } 4515 }
4551 for (SizeT I = 0; I < NumCases; ++I) { 4516 for (SizeT I = 0; I < NumCases; ++I) {
4552 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I)); 4517 Constant *ValueLo = Ctx->getConstantInt32(Inst->getValue(I));
4553 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32); 4518 Constant *ValueHi = Ctx->getConstantInt32(Inst->getValue(I) >> 32);
4554 InstX8632Label *Label = InstX8632Label::create(Func, this); 4519 typename Traits::Insts::Label *Label =
4520 Traits::Insts::Label::create(Func, this);
4555 _cmp(Src0Lo, ValueLo); 4521 _cmp(Src0Lo, ValueLo);
4556 _br(Traits::Cond::Br_ne, Label); 4522 _br(Traits::Cond::Br_ne, Label);
4557 _cmp(Src0Hi, ValueHi); 4523 _cmp(Src0Hi, ValueHi);
4558 _br(Traits::Cond::Br_e, Inst->getLabel(I)); 4524 _br(Traits::Cond::Br_e, Inst->getLabel(I));
4559 Context.insert(Label); 4525 Context.insert(Label);
4560 } 4526 }
4561 _br(Inst->getLabelDefault()); 4527 _br(Inst->getLabelDefault());
4562 return; 4528 return;
4563 } 4529 }
4564 // OK, we'll be slightly less naive by forcing Src into a physical 4530 // OK, we'll be slightly less naive by forcing Src into a physical
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
4631 } 4597 }
4632 } 4598 }
4633 4599
4634 template <class Machine> 4600 template <class Machine>
4635 void TargetX86Base<Machine>::lowerUnreachable( 4601 void TargetX86Base<Machine>::lowerUnreachable(
4636 const InstUnreachable * /*Inst*/) { 4602 const InstUnreachable * /*Inst*/) {
4637 _ud2(); 4603 _ud2();
4638 } 4604 }
4639 4605
4640 template <class Machine> 4606 template <class Machine>
4641 void TargetX86Base<Machine>::lowerRMW(const InstX8632FakeRMW *RMW) { 4607 void TargetX86Base<Machine>::lowerRMW(
4608 const typename Traits::Insts::FakeRMW *RMW) {
4642 // If the beacon variable's live range does not end in this 4609 // If the beacon variable's live range does not end in this
4643 // instruction, then it must end in the modified Store instruction 4610 // instruction, then it must end in the modified Store instruction
4644 // that follows. This means that the original Store instruction is 4611 // that follows. This means that the original Store instruction is
4645 // still there, either because the value being stored is used beyond 4612 // still there, either because the value being stored is used beyond
4646 // the Store instruction, or because dead code elimination did not 4613 // the Store instruction, or because dead code elimination did not
4647 // happen. In either case, we cancel RMW lowering (and the caller 4614 // happen. In either case, we cancel RMW lowering (and the caller
4648 // deletes the RMW instruction). 4615 // deletes the RMW instruction).
4649 if (!RMW->isLastUse(RMW->getBeacon())) 4616 if (!RMW->isLastUse(RMW->getBeacon()))
4650 return; 4617 return;
4651 Operand *Src = RMW->getData(); 4618 Operand *Src = RMW->getData();
4652 Type Ty = Src->getType(); 4619 Type Ty = Src->getType();
4653 OperandX8632Mem *Addr = formMemoryOperand(RMW->getAddr(), Ty); 4620 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4654 if (Ty == IceType_i64) { 4621 if (Ty == IceType_i64) {
4655 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); 4622 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4656 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); 4623 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
4657 OperandX8632Mem *AddrLo = llvm::cast<OperandX8632Mem>(loOperand(Addr)); 4624 typename Traits::X86OperandMem *AddrLo =
4658 OperandX8632Mem *AddrHi = llvm::cast<OperandX8632Mem>(hiOperand(Addr)); 4625 llvm::cast<typename Traits::X86OperandMem>(loOperand(Addr));
4626 typename Traits::X86OperandMem *AddrHi =
4627 llvm::cast<typename Traits::X86OperandMem>(hiOperand(Addr));
4659 switch (RMW->getOp()) { 4628 switch (RMW->getOp()) {
4660 default: 4629 default:
4661 // TODO(stichnot): Implement other arithmetic operators. 4630 // TODO(stichnot): Implement other arithmetic operators.
4662 break; 4631 break;
4663 case InstArithmetic::Add: 4632 case InstArithmetic::Add:
4664 _add_rmw(AddrLo, SrcLo); 4633 _add_rmw(AddrLo, SrcLo);
4665 _adc_rmw(AddrHi, SrcHi); 4634 _adc_rmw(AddrHi, SrcHi);
4666 return; 4635 return;
4667 case InstArithmetic::Sub: 4636 case InstArithmetic::Sub:
4668 _sub_rmw(AddrLo, SrcLo); 4637 _sub_rmw(AddrLo, SrcLo);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
4707 Src = legalize(Src, Legal_Reg | Legal_Imm); 4676 Src = legalize(Src, Legal_Reg | Legal_Imm);
4708 _xor_rmw(Addr, Src); 4677 _xor_rmw(Addr, Src);
4709 return; 4678 return;
4710 } 4679 }
4711 } 4680 }
4712 llvm::report_fatal_error("Couldn't lower RMW instruction"); 4681 llvm::report_fatal_error("Couldn't lower RMW instruction");
4713 } 4682 }
4714 4683
4715 template <class Machine> 4684 template <class Machine>
4716 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { 4685 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
4717 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) { 4686 if (const auto *RMW =
4687 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
4718 lowerRMW(RMW); 4688 lowerRMW(RMW);
4719 } else { 4689 } else {
4720 TargetLowering::lowerOther(Instr); 4690 TargetLowering::lowerOther(Instr);
4721 } 4691 }
4722 } 4692 }
4723 4693
4724 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4694 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to
4725 // preserve integrity of liveness analysis. Undef values are also 4695 // preserve integrity of liveness analysis. Undef values are also
4726 // turned into zeroes, since loOperand() and hiOperand() don't expect 4696 // turned into zeroes, since loOperand() and hiOperand() don't expect
4727 // Undef input. 4697 // Undef input.
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
4983 // vector constants in memory. 4953 // vector constants in memory.
4984 template <class Machine> 4954 template <class Machine>
4985 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, 4955 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4986 int32_t RegNum) { 4956 int32_t RegNum) {
4987 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 4957 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4988 _psrl(Reg, Ctx->getConstantInt8(1)); 4958 _psrl(Reg, Ctx->getConstantInt8(1));
4989 return Reg; 4959 return Reg;
4990 } 4960 }
4991 4961
4992 template <class Machine> 4962 template <class Machine>
4993 OperandX8632Mem * 4963 typename TargetX86Base<Machine>::Traits::X86OperandMem *
4994 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 4964 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4995 uint32_t Offset) { 4965 uint32_t Offset) {
4996 // Ensure that Loc is a stack slot. 4966 // Ensure that Loc is a stack slot.
4997 assert(Slot->getWeight().isZero()); 4967 assert(Slot->getWeight().isZero());
4998 assert(Slot->getRegNum() == Variable::NoRegister); 4968 assert(Slot->getRegNum() == Variable::NoRegister);
4999 // Compute the location of Loc in memory. 4969 // Compute the location of Loc in memory.
5000 // TODO(wala,stichnot): lea should not be required. The address of 4970 // TODO(wala,stichnot): lea should not be required. The address of
5001 // the stack slot is known at compile time (although not until after 4971 // the stack slot is known at compile time (although not until after
5002 // addProlog()). 4972 // addProlog()).
5003 const Type PointerType = IceType_i32; 4973 const Type PointerType = IceType_i32;
5004 Variable *Loc = makeReg(PointerType); 4974 Variable *Loc = makeReg(PointerType);
5005 _lea(Loc, Slot); 4975 _lea(Loc, Slot);
5006 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 4976 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
5007 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); 4977 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
5008 } 4978 }
5009 4979
5010 // Helper for legalize() to emit the right code to lower an operand to a 4980 // Helper for legalize() to emit the right code to lower an operand to a
5011 // register of the appropriate type. 4981 // register of the appropriate type.
5012 template <class Machine> 4982 template <class Machine>
5013 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 4983 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
5014 Type Ty = Src->getType(); 4984 Type Ty = Src->getType();
5015 Variable *Reg = makeReg(Ty, RegNum); 4985 Variable *Reg = makeReg(Ty, RegNum);
5016 if (isVectorType(Ty)) { 4986 if (isVectorType(Ty)) {
5017 _movp(Reg, Src); 4987 _movp(Reg, Src);
(...skipping 11 matching lines...) Expand all
5029 // to legalize() allow a physical register. If a physical register 4999 // to legalize() allow a physical register. If a physical register
5030 // needs to be explicitly disallowed, then new code will need to be 5000 // needs to be explicitly disallowed, then new code will need to be
5031 // written to force a spill. 5001 // written to force a spill.
5032 assert(Allowed & Legal_Reg); 5002 assert(Allowed & Legal_Reg);
5033 // If we're asking for a specific physical register, make sure we're 5003 // If we're asking for a specific physical register, make sure we're
5034 // not allowing any other operand kinds. (This could be future 5004 // not allowing any other operand kinds. (This could be future
5035 // work, e.g. allow the shl shift amount to be either an immediate 5005 // work, e.g. allow the shl shift amount to be either an immediate
5036 // or in ecx.) 5006 // or in ecx.)
5037 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); 5007 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
5038 5008
5039 if (auto Mem = llvm::dyn_cast<OperandX8632Mem>(From)) { 5009 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
5040 // Before doing anything with a Mem operand, we need to ensure 5010 // Before doing anything with a Mem operand, we need to ensure
5041 // that the Base and Index components are in physical registers. 5011 // that the Base and Index components are in physical registers.
5042 Variable *Base = Mem->getBase(); 5012 Variable *Base = Mem->getBase();
5043 Variable *Index = Mem->getIndex(); 5013 Variable *Index = Mem->getIndex();
5044 Variable *RegBase = nullptr; 5014 Variable *RegBase = nullptr;
5045 Variable *RegIndex = nullptr; 5015 Variable *RegIndex = nullptr;
5046 if (Base) { 5016 if (Base) {
5047 RegBase = legalizeToVar(Base); 5017 RegBase = legalizeToVar(Base);
5048 } 5018 }
5049 if (Index) { 5019 if (Index) {
5050 RegIndex = legalizeToVar(Index); 5020 RegIndex = legalizeToVar(Index);
5051 } 5021 }
5052 if (Base != RegBase || Index != RegIndex) { 5022 if (Base != RegBase || Index != RegIndex) {
5053 Mem = 5023 Mem = Traits::X86OperandMem::create(Func, Ty, RegBase, Mem->getOffset(),
5054 OperandX8632Mem::create(Func, Ty, RegBase, Mem->getOffset(), RegIndex, 5024 RegIndex, Mem->getShift(),
5055 Mem->getShift(), Mem->getSegmentRegister()); 5025 Mem->getSegmentRegister());
5056 } 5026 }
5057 5027
5058 // For all Memory Operands, we do randomization/pooling here 5028 // For all Memory Operands, we do randomization/pooling here
5059 From = randomizeOrPoolImmediate(Mem); 5029 From = randomizeOrPoolImmediate(Mem);
5060 5030
5061 if (!(Allowed & Legal_Mem)) { 5031 if (!(Allowed & Legal_Mem)) {
5062 From = copyToReg(From, RegNum); 5032 From = copyToReg(From, RegNum);
5063 } 5033 }
5064 return From; 5034 return From;
5065 } 5035 }
(...skipping 29 matching lines...) Expand all
5095 5065
5096 // Convert a scalar floating point constant into an explicit 5066 // Convert a scalar floating point constant into an explicit
5097 // memory operand. 5067 // memory operand.
5098 if (isScalarFloatingType(Ty)) { 5068 if (isScalarFloatingType(Ty)) {
5099 Variable *Base = nullptr; 5069 Variable *Base = nullptr;
5100 std::string Buffer; 5070 std::string Buffer;
5101 llvm::raw_string_ostream StrBuf(Buffer); 5071 llvm::raw_string_ostream StrBuf(Buffer);
5102 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); 5072 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
5103 llvm::cast<Constant>(From)->setShouldBePooled(true); 5073 llvm::cast<Constant>(From)->setShouldBePooled(true);
5104 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 5074 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
5105 From = OperandX8632Mem::create(Func, Ty, Base, Offset); 5075 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
5106 } 5076 }
5107 bool NeedsReg = false; 5077 bool NeedsReg = false;
5108 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) 5078 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
5109 // Immediate specifically not allowed 5079 // Immediate specifically not allowed
5110 NeedsReg = true; 5080 NeedsReg = true;
5111 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) 5081 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5112 // On x86, FP constants are lowered to mem operands. 5082 // On x86, FP constants are lowered to mem operands.
5113 NeedsReg = true; 5083 NeedsReg = true;
5114 if (NeedsReg) { 5084 if (NeedsReg) {
5115 From = copyToReg(From, RegNum); 5085 From = copyToReg(From, RegNum);
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
5154 if (llvm::isa<Constant>(Src1)) { 5124 if (llvm::isa<Constant>(Src1)) {
5155 IsSrc1ImmOrReg = true; 5125 IsSrc1ImmOrReg = true;
5156 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { 5126 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
5157 if (Var->hasReg()) 5127 if (Var->hasReg())
5158 IsSrc1ImmOrReg = true; 5128 IsSrc1ImmOrReg = true;
5159 } 5129 }
5160 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); 5130 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5161 } 5131 }
5162 5132
5163 template <class Machine> 5133 template <class Machine>
5164 OperandX8632Mem *TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, 5134 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5165 Type Ty, 5135 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
5166 bool DoLegalize) { 5136 bool DoLegalize) {
5167 OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Opnd); 5137 typename Traits::X86OperandMem *Mem =
jvoung (off chromium) 2015/07/07 00:00:18 auto for dyn_cast ?
5138 llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
5168 // It may be the case that address mode optimization already creates 5139 // It may be the case that address mode optimization already creates
5169 // an OperandX8632Mem, so in that case it wouldn't need another level 5140 // an typename Traits::X86OperandMem, so in that case it wouldn't need another
5141 // level
jvoung (off chromium) 2015/07/07 00:00:18 could fit rest of comment here
John 2015/07/07 15:12:18 Done.
5170 // of transformation. 5142 // of transformation.
5171 if (!Mem) { 5143 if (!Mem) {
5172 Variable *Base = llvm::dyn_cast<Variable>(Opnd); 5144 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5173 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); 5145 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5174 assert(Base || Offset); 5146 assert(Base || Offset);
5175 if (Offset) { 5147 if (Offset) {
5176 // During memory operand building, we do not blind or pool 5148 // During memory operand building, we do not blind or pool
5177 // the constant offset, we will work on the whole memory 5149 // the constant offset, we will work on the whole memory
5178 // operand later as one entity later, this save one instruction. 5150 // operand later as one entity later, this save one instruction.
5179 // By turning blinding and pooling off, we guarantee 5151 // By turning blinding and pooling off, we guarantee
5180 // legalize(Offset) will return a constant*. 5152 // legalize(Offset) will return a constant*.
5181 { 5153 {
5182 BoolFlagSaver B(RandomizationPoolingPaused, true); 5154 BoolFlagSaver B(RandomizationPoolingPaused, true);
5183 5155
5184 Offset = llvm::cast<Constant>(legalize(Offset)); 5156 Offset = llvm::cast<Constant>(legalize(Offset));
5185 } 5157 }
5186 5158
5187 assert(llvm::isa<ConstantInteger32>(Offset) || 5159 assert(llvm::isa<ConstantInteger32>(Offset) ||
5188 llvm::isa<ConstantRelocatable>(Offset)); 5160 llvm::isa<ConstantRelocatable>(Offset));
5189 } 5161 }
5190 Mem = OperandX8632Mem::create(Func, Ty, Base, Offset); 5162 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
5191 } 5163 }
5192 // Do legalization, which contains randomization/pooling 5164 // Do legalization, which contains randomization/pooling
5193 // or do randomization/pooling. 5165 // or do randomization/pooling.
5194 return llvm::cast<OperandX8632Mem>( 5166 return llvm::cast<typename Traits::X86OperandMem>(
5195 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 5167 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5196 } 5168 }
5197 5169
5198 template <class Machine> 5170 template <class Machine>
5199 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { 5171 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5200 // There aren't any 64-bit integer registers for x86-32. 5172 // There aren't any 64-bit integer registers for x86-32.
5201 assert(Type != IceType_i64); 5173 assert(Type != IceType_i64);
5202 Variable *Reg = Func->template makeVariable(Type); 5174 Variable *Reg = Func->template makeVariable(Type);
5203 if (RegNum == Variable::NoRegister) 5175 if (RegNum == Variable::NoRegister)
5204 Reg->setWeightInfinite(); 5176 Reg->setWeightInfinite();
5205 else 5177 else
5206 Reg->setRegNum(RegNum); 5178 Reg->setRegNum(RegNum);
5207 return Reg; 5179 return Reg;
5208 } 5180 }
5209 5181
5210 template <class Machine> void TargetX86Base<Machine>::postLower() { 5182 template <class Machine> void TargetX86Base<Machine>::postLower() {
5211 if (Ctx->getFlags().getOptLevel() == Opt_m1) 5183 if (Ctx->getFlags().getOptLevel() == Opt_m1)
5212 return; 5184 return;
5213 inferTwoAddress(); 5185 inferTwoAddress();
5214 } 5186 }
5215 5187
5216 template <class Machine> 5188 template <class Machine>
5217 void TargetX86Base<Machine>::makeRandomRegisterPermutation( 5189 void TargetX86Base<Machine>::makeRandomRegisterPermutation(
5218 llvm::SmallVectorImpl<int32_t> &Permutation, 5190 llvm::SmallVectorImpl<int32_t> &Permutation,
5219 const llvm::SmallBitVector &ExcludeRegisters) const { 5191 const llvm::SmallBitVector &ExcludeRegisters) const {
5220 // TODO(stichnot): Declaring Permutation this way loses type/size 5192 Traits::makeRandomRegisterPermutation(Ctx, Func, Permutation,
5221 // information. Fix this in conjunction with the caller-side TODO. 5193 ExcludeRegisters);
5222 assert(Permutation.size() >= Traits::RegisterSet::Reg_NUM);
5223 // Expected upper bound on the number of registers in a single
5224 // equivalence class. For x86-32, this would comprise the 8 XMM
5225 // registers. This is for performance, not correctness.
5226 static const unsigned MaxEquivalenceClassSize = 8;
5227 typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList;
5228 typedef std::map<uint32_t, RegisterList> EquivalenceClassMap;
5229 EquivalenceClassMap EquivalenceClasses;
5230 SizeT NumShuffled = 0, NumPreserved = 0;
5231
5232 // Build up the equivalence classes of registers by looking at the
5233 // register properties as well as whether the registers should be
5234 // explicitly excluded from shuffling.
5235 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
5236 frameptr, isI8, isInt, isFP) \
5237 if (ExcludeRegisters[Traits::RegisterSet::val]) { \
5238 /* val stays the same in the resulting permutation. */ \
5239 Permutation[Traits::RegisterSet::val] = Traits::RegisterSet::val; \
5240 ++NumPreserved; \
5241 } else { \
5242 const uint32_t Index = (scratch << 0) | (preserved << 1) | (isI8 << 2) | \
5243 (isInt << 3) | (isFP << 4); \
5244 /* val is assigned to an equivalence class based on its properties. */ \
5245 EquivalenceClasses[Index].push_back(Traits::RegisterSet::val); \
5246 }
5247 REGX8632_TABLE
5248 #undef X
5249
5250 RandomNumberGeneratorWrapper RNG(Ctx->getRNG());
5251
5252 // Shuffle the resulting equivalence classes.
5253 for (auto I : EquivalenceClasses) {
5254 const RegisterList &List = I.second;
5255 RegisterList Shuffled(List);
5256 RandomShuffle(Shuffled.begin(), Shuffled.end(), RNG);
5257 for (size_t SI = 0, SE = Shuffled.size(); SI < SE; ++SI) {
5258 Permutation[List[SI]] = Shuffled[SI];
5259 ++NumShuffled;
5260 }
5261 }
5262
5263 assert(NumShuffled + NumPreserved == Traits::RegisterSet::Reg_NUM);
5264
5265 if (Func->isVerbose(IceV_Random)) {
5266 OstreamLocker L(Func->getContext());
5267 Ostream &Str = Func->getContext()->getStrDump();
5268 Str << "Register equivalence classes:\n";
5269 for (auto I : EquivalenceClasses) {
5270 Str << "{";
5271 const RegisterList &List = I.second;
5272 bool First = true;
5273 for (int32_t Register : List) {
5274 if (!First)
5275 Str << " ";
5276 First = false;
5277 Str << getRegName(Register, IceType_i32);
5278 }
5279 Str << "}\n";
5280 }
5281 }
5282 } 5194 }
5283 5195
5284 template <class Machine> 5196 template <class Machine>
5285 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const { 5197 void TargetX86Base<Machine>::emit(const ConstantInteger32 *C) const {
5286 if (!BuildDefs::dump()) 5198 if (!BuildDefs::dump())
5287 return; 5199 return;
5288 Ostream &Str = Ctx->getStrEmit(); 5200 Ostream &Str = Ctx->getStrEmit();
5289 Str << getConstantPrefix() << C->getValue(); 5201 Str << getConstantPrefix() << C->getValue();
5290 } 5202 }
5291 5203
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
5342 // the assigned register as this assignment is that start of its use-def 5254 // the assigned register as this assignment is that start of its use-def
5343 // chain. So we add RegNum argument here. 5255 // chain. So we add RegNum argument here.
5344 // Note we use 'lea' instruction instead of 'xor' to avoid affecting 5256 // Note we use 'lea' instruction instead of 'xor' to avoid affecting
5345 // the flags. 5257 // the flags.
5346 Variable *Reg = makeReg(IceType_i32, RegNum); 5258 Variable *Reg = makeReg(IceType_i32, RegNum);
5347 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate); 5259 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
5348 uint32_t Value = Integer->getValue(); 5260 uint32_t Value = Integer->getValue();
5349 uint32_t Cookie = Ctx->getRandomizationCookie(); 5261 uint32_t Cookie = Ctx->getRandomizationCookie();
5350 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); 5262 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5351 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); 5263 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
5352 _lea(Reg, 5264 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
5353 OperandX8632Mem::create(Func, IceType_i32, Reg, Offset, nullptr, 0)); 5265 nullptr, 0));
5354 // make sure liveness analysis won't kill this variable, otherwise a 5266 // make sure liveness analysis won't kill this variable, otherwise a
5355 // liveness 5267 // liveness
5356 // assertion will be triggered. 5268 // assertion will be triggered.
5357 _set_dest_nonkillable(); 5269 _set_dest_nonkillable();
5358 if (Immediate->getType() != IceType_i32) { 5270 if (Immediate->getType() != IceType_i32) {
5359 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); 5271 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5360 _mov(TruncReg, Reg); 5272 _mov(TruncReg, Reg);
5361 return TruncReg; 5273 return TruncReg;
5362 } 5274 }
5363 return Reg; 5275 return Reg;
(...skipping 12 matching lines...) Expand all
5376 // the assigned register as this assignment is that start of its use-def 5288 // the assigned register as this assignment is that start of its use-def
5377 // chain. So we add RegNum argument here. 5289 // chain. So we add RegNum argument here.
5378 Variable *Reg = makeReg(Immediate->getType(), RegNum); 5290 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5379 IceString Label; 5291 IceString Label;
5380 llvm::raw_string_ostream Label_stream(Label); 5292 llvm::raw_string_ostream Label_stream(Label);
5381 Immediate->emitPoolLabel(Label_stream); 5293 Immediate->emitPoolLabel(Label_stream);
5382 const RelocOffsetT Offset = 0; 5294 const RelocOffsetT Offset = 0;
5383 const bool SuppressMangling = true; 5295 const bool SuppressMangling = true;
5384 Constant *Symbol = 5296 Constant *Symbol =
5385 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); 5297 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
5386 OperandX8632Mem *MemOperand = 5298 typename Traits::X86OperandMem *MemOperand =
5387 OperandX8632Mem::create(Func, Immediate->getType(), nullptr, Symbol); 5299 Traits::X86OperandMem::create(Func, Immediate->getType(), nullptr,
5300 Symbol);
5388 _mov(Reg, MemOperand); 5301 _mov(Reg, MemOperand);
5389 return Reg; 5302 return Reg;
5390 } 5303 }
5391 assert("Unsupported -randomize-pool-immediates option" && false); 5304 assert("Unsupported -randomize-pool-immediates option" && false);
5392 } 5305 }
5393 // the constant Immediate is not eligible for blinding/pooling 5306 // the constant Immediate is not eligible for blinding/pooling
5394 return Immediate; 5307 return Immediate;
5395 } 5308 }
5396 5309
5397 template <class Machine> 5310 template <class Machine>
5398 OperandX8632Mem * 5311 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5399 TargetX86Base<Machine>::randomizeOrPoolImmediate(OperandX8632Mem *MemOperand, 5312 TargetX86Base<Machine>::randomizeOrPoolImmediate(
5400 int32_t RegNum) { 5313 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
5401 assert(MemOperand); 5314 assert(MemOperand);
5402 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5315 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5403 RandomizationPoolingPaused == true) { 5316 RandomizationPoolingPaused == true) {
5404 // immediates randomization/pooling is turned off 5317 // immediates randomization/pooling is turned off
5405 return MemOperand; 5318 return MemOperand;
5406 } 5319 }
5407 5320
5408 // If this memory operand is already a randommized one, we do 5321 // If this memory operand is already a randommized one, we do
5409 // not randomize it again. 5322 // not randomize it again.
5410 if (MemOperand->getRandomized()) 5323 if (MemOperand->getRandomized())
(...skipping 13 matching lines...) Expand all
5424 // => -cookie[RegTemp, index, shift] 5337 // => -cookie[RegTemp, index, shift]
5425 uint32_t Value = 5338 uint32_t Value =
5426 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset()) 5339 llvm::dyn_cast<ConstantInteger32>(MemOperand->getOffset())
5427 ->getValue(); 5340 ->getValue();
5428 uint32_t Cookie = Ctx->getRandomizationCookie(); 5341 uint32_t Cookie = Ctx->getRandomizationCookie();
5429 Constant *Mask1 = Ctx->getConstantInt( 5342 Constant *Mask1 = Ctx->getConstantInt(
5430 MemOperand->getOffset()->getType(), Cookie + Value); 5343 MemOperand->getOffset()->getType(), Cookie + Value);
5431 Constant *Mask2 = 5344 Constant *Mask2 =
5432 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie); 5345 Ctx->getConstantInt(MemOperand->getOffset()->getType(), 0 - Cookie);
5433 5346
5434 OperandX8632Mem *TempMemOperand = OperandX8632Mem::create( 5347 typename Traits::X86OperandMem *TempMemOperand =
5435 Func, MemOperand->getType(), MemOperand->getBase(), Mask1); 5348 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5349 MemOperand->getBase(), Mask1);
5436 // If we have already assigned a physical register, we must come from 5350 // If we have already assigned a physical register, we must come from
5437 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5351 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5438 // the assigned register as this assignment is that start of its use-def 5352 // the assigned register as this assignment is that start of its use-def
5439 // chain. So we add RegNum argument here. 5353 // chain. So we add RegNum argument here.
5440 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); 5354 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5441 _lea(RegTemp, TempMemOperand); 5355 _lea(RegTemp, TempMemOperand);
5442 // As source operand doesn't use the dstreg, we don't need to add 5356 // As source operand doesn't use the dstreg, we don't need to add
5443 // _set_dest_nonkillable(). 5357 // _set_dest_nonkillable().
5444 // But if we use the same Dest Reg, that is, with RegNum 5358 // But if we use the same Dest Reg, that is, with RegNum
5445 // assigned, we should add this _set_dest_nonkillable() 5359 // assigned, we should add this _set_dest_nonkillable()
5446 if (RegNum != Variable::NoRegister) 5360 if (RegNum != Variable::NoRegister)
5447 _set_dest_nonkillable(); 5361 _set_dest_nonkillable();
5448 5362
5449 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create( 5363 typename Traits::X86OperandMem *NewMemOperand =
5450 Func, MemOperand->getType(), RegTemp, Mask2, MemOperand->getIndex(), 5364 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5451 MemOperand->getShift(), MemOperand->getSegmentRegister()); 5365 Mask2, MemOperand->getIndex(),
5366 MemOperand->getShift(),
5367 MemOperand->getSegmentRegister());
5452 5368
5453 // Label this memory operand as randomize, so we won't randomize it 5369 // Label this memory operand as randomize, so we won't randomize it
5454 // again in case we call legalize() mutiple times on this memory 5370 // again in case we call legalize() mutiple times on this memory
5455 // operand. 5371 // operand.
5456 NewMemOperand->setRandomized(true); 5372 NewMemOperand->setRandomized(true);
5457 return NewMemOperand; 5373 return NewMemOperand;
5458 } 5374 }
5459 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { 5375 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5460 // pool the constant offset 5376 // pool the constant offset
5461 // FROM: 5377 // FROM:
(...skipping 14 matching lines...) Expand all
5476 return MemOperand; 5392 return MemOperand;
5477 Variable *RegTemp = makeReg(IceType_i32); 5393 Variable *RegTemp = makeReg(IceType_i32);
5478 IceString Label; 5394 IceString Label;
5479 llvm::raw_string_ostream Label_stream(Label); 5395 llvm::raw_string_ostream Label_stream(Label);
5480 MemOperand->getOffset()->emitPoolLabel(Label_stream); 5396 MemOperand->getOffset()->emitPoolLabel(Label_stream);
5481 MemOperand->getOffset()->setShouldBePooled(true); 5397 MemOperand->getOffset()->setShouldBePooled(true);
5482 const RelocOffsetT SymOffset = 0; 5398 const RelocOffsetT SymOffset = 0;
5483 bool SuppressMangling = true; 5399 bool SuppressMangling = true;
5484 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), 5400 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
5485 SuppressMangling); 5401 SuppressMangling);
5486 OperandX8632Mem *SymbolOperand = OperandX8632Mem::create( 5402 typename Traits::X86OperandMem *SymbolOperand =
5487 Func, MemOperand->getOffset()->getType(), nullptr, Symbol); 5403 Traits::X86OperandMem::create(
5404 Func, MemOperand->getOffset()->getType(), nullptr, Symbol);
5488 _mov(RegTemp, SymbolOperand); 5405 _mov(RegTemp, SymbolOperand);
5489 // If we have a base variable here, we should add the lea instruction 5406 // If we have a base variable here, we should add the lea instruction
5490 // to add the value of the base variable to RegTemp. If there is no 5407 // to add the value of the base variable to RegTemp. If there is no
5491 // base variable, we won't need this lea instruction. 5408 // base variable, we won't need this lea instruction.
5492 if (MemOperand->getBase()) { 5409 if (MemOperand->getBase()) {
5493 OperandX8632Mem *CalculateOperand = OperandX8632Mem::create( 5410 typename Traits::X86OperandMem *CalculateOperand =
5494 Func, MemOperand->getType(), MemOperand->getBase(), nullptr, 5411 Traits::X86OperandMem::create(
5495 RegTemp, 0, MemOperand->getSegmentRegister()); 5412 Func, MemOperand->getType(), MemOperand->getBase(), nullptr,
5413 RegTemp, 0, MemOperand->getSegmentRegister());
5496 _lea(RegTemp, CalculateOperand); 5414 _lea(RegTemp, CalculateOperand);
5497 _set_dest_nonkillable(); 5415 _set_dest_nonkillable();
5498 } 5416 }
5499 OperandX8632Mem *NewMemOperand = OperandX8632Mem::create( 5417 typename Traits::X86OperandMem *NewMemOperand =
5500 Func, MemOperand->getType(), RegTemp, nullptr, 5418 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5501 MemOperand->getIndex(), MemOperand->getShift(), 5419 nullptr, MemOperand->getIndex(),
5502 MemOperand->getSegmentRegister()); 5420 MemOperand->getShift(),
5421 MemOperand->getSegmentRegister());
5503 return NewMemOperand; 5422 return NewMemOperand;
5504 } 5423 }
5505 assert("Unsupported -randomize-pool-immediates option" && false); 5424 assert("Unsupported -randomize-pool-immediates option" && false);
5506 } 5425 }
5507 } 5426 }
5508 // the offset is not eligible for blinding or pooling, return the original 5427 // the offset is not eligible for blinding or pooling, return the original
5509 // mem operand 5428 // mem operand
5510 return MemOperand; 5429 return MemOperand;
5511 } 5430 }
5512 5431
5513 } // end of namespace X86Internal 5432 } // end of namespace X86Internal
5514 } // end of namespace Ice 5433 } // end of namespace Ice
5515 5434
5516 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5435 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« src/IceInstX86Base.h ('K') | « src/IceTargetLoweringX86Base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698