Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(322)

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix spelling and rebase Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceThreading.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==// 1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -*- C++ -*-==//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
11 /// This file implements the TargetLoweringX86Base class, which 11 /// This file implements the TargetLoweringX86Base class, which consists almost
12 /// consists almost entirely of the lowering sequence for each 12 /// entirely of the lowering sequence for each high-level instruction.
13 /// high-level instruction.
14 /// 13 ///
15 //===----------------------------------------------------------------------===// 14 //===----------------------------------------------------------------------===//
16 15
17 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
18 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
19 18
20 #include "IceCfg.h" 19 #include "IceCfg.h"
21 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
22 #include "IceClFlags.h" 21 #include "IceClFlags.h"
23 #include "IceDefs.h" 22 #include "IceDefs.h"
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
56 55
57 public: 56 public:
58 BoolFoldingEntry() = default; 57 BoolFoldingEntry() = default;
59 explicit BoolFoldingEntry(Inst *I); 58 explicit BoolFoldingEntry(Inst *I);
60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; 59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;
61 /// Instr is the instruction producing the i1-type variable of interest. 60 /// Instr is the instruction producing the i1-type variable of interest.
62 Inst *Instr = nullptr; 61 Inst *Instr = nullptr;
63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). 62 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).
64 bool IsComplex = false; 63 bool IsComplex = false;
65 /// IsLiveOut is initialized conservatively to true, and is set to false when 64 /// IsLiveOut is initialized conservatively to true, and is set to false when
66 /// we encounter an instruction that ends Var's live range. We disable the 65 /// we encounter an instruction that ends Var's live range. We disable the
67 /// folding optimization when Var is live beyond this basic block. Note that 66 /// folding optimization when Var is live beyond this basic block. Note that
68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will 67 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will
69 /// always be true and the folding optimization will never be performed. 68 /// always be true and the folding optimization will never be performed.
70 bool IsLiveOut = true; 69 bool IsLiveOut = true;
71 // NumUses counts the number of times Var is used as a source operand in the 70 // NumUses counts the number of times Var is used as a source operand in the
72 // basic block. If IsComplex is true and there is more than one use of Var, 71 // basic block. If IsComplex is true and there is more than one use of Var,
73 // then the folding optimization is disabled for Var. 72 // then the folding optimization is disabled for Var.
74 uint32_t NumUses = 0; 73 uint32_t NumUses = 0;
75 }; 74 };
76 75
77 template <class MachineTraits> class BoolFolding { 76 template <class MachineTraits> class BoolFolding {
78 public: 77 public:
79 enum BoolFoldingProducerKind { 78 enum BoolFoldingProducerKind {
80 PK_None, 79 PK_None,
81 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative. 80 // TODO(jpp): PK_Icmp32 is no longer meaningful. Rename to PK_IcmpNative.
82 PK_Icmp32, 81 PK_Icmp32,
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
159 case InstCast::Zext: 158 case InstCast::Zext:
160 return CK_Zext; 159 return CK_Zext;
161 } 160 }
162 } 161 }
163 return CK_None; 162 return CK_None;
164 } 163 }
165 164
166 /// Returns true if the producing instruction has a "complex" lowering sequence. 165 /// Returns true if the producing instruction has a "complex" lowering sequence.
167 /// This generally means that its lowering sequence requires more than one 166 /// This generally means that its lowering sequence requires more than one
168 /// conditional branch, namely 64-bit integer compares and some floating-point 167 /// conditional branch, namely 64-bit integer compares and some floating-point
169 /// compares. When this is true, and there is more than one consumer, we prefer 168 /// compares. When this is true, and there is more than one consumer, we prefer
170 /// to disable the folding optimization because it minimizes branches. 169 /// to disable the folding optimization because it minimizes branches.
171 template <class MachineTraits> 170 template <class MachineTraits>
172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { 171 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {
173 switch (getProducerKind(Instr)) { 172 switch (getProducerKind(Instr)) {
174 default: 173 default:
175 return false; 174 return false;
176 case PK_Icmp64: 175 case PK_Icmp64:
177 return true; 176 return true;
178 case PK_Fcmp: 177 case PK_Fcmp:
179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()] 178 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
215 } 214 }
216 for (auto &I : Producers) { 215 for (auto &I : Producers) {
217 // Ignore entries previously marked invalid. 216 // Ignore entries previously marked invalid.
218 if (I.second.Instr == nullptr) 217 if (I.second.Instr == nullptr)
219 continue; 218 continue;
220 // Disable the producer if its dest may be live beyond this block. 219 // Disable the producer if its dest may be live beyond this block.
221 if (I.second.IsLiveOut) { 220 if (I.second.IsLiveOut) {
222 setInvalid(I.first); 221 setInvalid(I.first);
223 continue; 222 continue;
224 } 223 }
225 // Mark as "dead" rather than outright deleting. This is so that other 224 // Mark as "dead" rather than outright deleting. This is so that other
226 // peephole style optimizations during or before lowering have access to 225 // peephole style optimizations during or before lowering have access to
227 // this instruction in undeleted form. See for example 226 // this instruction in undeleted form. See for example
228 // tryOptimizedCmpxchgCmpBr(). 227 // tryOptimizedCmpxchgCmpBr().
229 I.second.Instr->setDead(); 228 I.second.Instr->setDead();
230 } 229 }
231 } 230 }
232 231
233 template <class MachineTraits> 232 template <class MachineTraits>
234 const Inst * 233 const Inst *
235 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const { 234 BoolFolding<MachineTraits>::getProducerFor(const Operand *Opnd) const {
236 auto *Var = llvm::dyn_cast<const Variable>(Opnd); 235 auto *Var = llvm::dyn_cast<const Variable>(Opnd);
237 if (Var == nullptr) 236 if (Var == nullptr)
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 if (Func->hasError()) 295 if (Func->hasError())
297 return; 296 return;
298 Func->deletePhis(); 297 Func->deletePhis();
299 if (Func->hasError()) 298 if (Func->hasError())
300 return; 299 return;
301 Func->dump("After Phi lowering"); 300 Func->dump("After Phi lowering");
302 } 301 }
303 302
304 // Run this early so it can be used to focus optimizations on potentially hot 303 // Run this early so it can be used to focus optimizations on potentially hot
305 // code. 304 // code.
306 // TODO(stichnot,ascull): currently only used for regalloc not expensive high 305 // TODO(stichnot,ascull): currently only used for regalloc not
307 // level optimizations which could be focused on potentially hot code. 306 // expensive high level optimizations which could be focused on potentially
307 // hot code.
308 Func->computeLoopNestDepth(); 308 Func->computeLoopNestDepth();
309 Func->dump("After loop nest depth analysis"); 309 Func->dump("After loop nest depth analysis");
310 310
311 // Address mode optimization. 311 // Address mode optimization.
312 Func->getVMetadata()->init(VMK_SingleDefs); 312 Func->getVMetadata()->init(VMK_SingleDefs);
313 Func->doAddressOpt(); 313 Func->doAddressOpt();
314 314
315 // Find read-modify-write opportunities. Do this after address mode 315 // Find read-modify-write opportunities. Do this after address mode
316 // optimization so that doAddressOpt() doesn't need to be applied to RMW 316 // optimization so that doAddressOpt() doesn't need to be applied to RMW
317 // instructions as well. 317 // instructions as well.
318 findRMW(); 318 findRMW();
319 Func->dump("After RMW transform"); 319 Func->dump("After RMW transform");
320 320
321 // Argument lowering 321 // Argument lowering
322 Func->doArgLowering(); 322 Func->doArgLowering();
323 323
324 // Target lowering. This requires liveness analysis for some parts of the 324 // Target lowering. This requires liveness analysis for some parts of the
325 // lowering decisions, such as compare/branch fusing. If non-lightweight 325 // lowering decisions, such as compare/branch fusing. If non-lightweight
326 // liveness analysis is used, the instructions need to be renumbered first 326 // liveness analysis is used, the instructions need to be renumbered first
327 // TODO: This renumbering should only be necessary if we're actually 327 // TODO: This renumbering should only be necessary if we're actually
328 // calculating live intervals, which we only do for register allocation. 328 // calculating live intervals, which we only do for register allocation.
329 Func->renumberInstructions(); 329 Func->renumberInstructions();
330 if (Func->hasError()) 330 if (Func->hasError())
331 return; 331 return;
332 332
333 // TODO: It should be sufficient to use the fastest liveness calculation, i.e. 333 // TODO: It should be sufficient to use the fastest liveness calculation,
334 // livenessLightweight(). However, for some reason that slows down the rest 334 // i.e. livenessLightweight(). However, for some reason that slows down the
335 // of the translation. Investigate. 335 // rest of the translation. Investigate.
336 Func->liveness(Liveness_Basic); 336 Func->liveness(Liveness_Basic);
337 if (Func->hasError()) 337 if (Func->hasError())
338 return; 338 return;
339 Func->dump("After x86 address mode opt"); 339 Func->dump("After x86 address mode opt");
340 340
341 // Disable constant blinding or pooling for load optimization. 341 // Disable constant blinding or pooling for load optimization.
342 { 342 {
343 BoolFlagSaver B(RandomizationPoolingPaused, true); 343 BoolFlagSaver B(RandomizationPoolingPaused, true);
344 doLoadOpt(); 344 doLoadOpt();
345 } 345 }
346 Func->genCode(); 346 Func->genCode();
347 if (Func->hasError()) 347 if (Func->hasError())
348 return; 348 return;
349 Func->dump("After x86 codegen"); 349 Func->dump("After x86 codegen");
350 350
351 // Register allocation. This requires instruction renumbering and full 351 // Register allocation. This requires instruction renumbering and full
352 // liveness analysis. Loops must be identified before liveness so variable 352 // liveness analysis. Loops must be identified before liveness so variable
353 // use weights are correct. 353 // use weights are correct.
354 Func->renumberInstructions(); 354 Func->renumberInstructions();
355 if (Func->hasError()) 355 if (Func->hasError())
356 return; 356 return;
357 Func->liveness(Liveness_Intervals); 357 Func->liveness(Liveness_Intervals);
358 if (Func->hasError()) 358 if (Func->hasError())
359 return; 359 return;
360 // Validate the live range computations. The expensive validation call is 360 // Validate the live range computations. The expensive validation call is
361 // deliberately only made when assertions are enabled. 361 // deliberately only made when assertions are enabled.
362 assert(Func->validateLiveness()); 362 assert(Func->validateLiveness());
363 // The post-codegen dump is done here, after liveness analysis and associated 363 // The post-codegen dump is done here, after liveness analysis and associated
364 // cleanup, to make the dump cleaner and more useful. 364 // cleanup, to make the dump cleaner and more useful.
365 Func->dump("After initial x8632 codegen"); 365 Func->dump("After initial x8632 codegen");
366 Func->getVMetadata()->init(VMK_All); 366 Func->getVMetadata()->init(VMK_All);
367 regAlloc(RAK_Global); 367 regAlloc(RAK_Global);
368 if (Func->hasError()) 368 if (Func->hasError())
369 return; 369 return;
370 Func->dump("After linear scan regalloc"); 370 Func->dump("After linear scan regalloc");
371 371
372 if (Ctx->getFlags().getPhiEdgeSplit()) { 372 if (Ctx->getFlags().getPhiEdgeSplit()) {
373 Func->advancedPhiLowering(); 373 Func->advancedPhiLowering();
374 Func->dump("After advanced Phi lowering"); 374 Func->dump("After advanced Phi lowering");
375 } 375 }
376 376
377 // Stack frame mapping. 377 // Stack frame mapping.
378 Func->genFrame(); 378 Func->genFrame();
379 if (Func->hasError()) 379 if (Func->hasError())
380 return; 380 return;
381 Func->dump("After stack frame mapping"); 381 Func->dump("After stack frame mapping");
382 382
383 Func->contractEmptyNodes(); 383 Func->contractEmptyNodes();
384 Func->reorderNodes(); 384 Func->reorderNodes();
385 385
386 // Shuffle basic block order if -reorder-basic-blocks is enabled. 386 // Shuffle basic block order if -reorder-basic-blocks is enabled.
387 Func->shuffleNodes(); 387 Func->shuffleNodes();
388 388
389 // Branch optimization. This needs to be done just before code emission. In 389 // Branch optimization. This needs to be done just before code emission. In
390 // particular, no transformations that insert or reorder CfgNodes should be 390 // particular, no transformations that insert or reorder CfgNodes should be
391 // done after branch optimization. We go ahead and do it before nop insertion 391 // done after branch optimization. We go ahead and do it before nop insertion
392 // to reduce the amount of work needed for searching for opportunities. 392 // to reduce the amount of work needed for searching for opportunities.
393 Func->doBranchOpt(); 393 Func->doBranchOpt();
394 Func->dump("After branch optimization"); 394 Func->dump("After branch optimization");
395 395
396 // Nop insertion if -nop-insertion is enabled. 396 // Nop insertion if -nop-insertion is enabled.
397 Func->doNopInsertion(); 397 Func->doNopInsertion();
398 398
399 // Mark nodes that require sandbox alignment 399 // Mark nodes that require sandbox alignment
400 if (Ctx->getFlags().getUseSandboxing()) 400 if (Ctx->getFlags().getUseSandboxing())
401 Func->markNodesForSandboxing(); 401 Func->markNodesForSandboxing();
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
488 } 488 }
489 return false; 489 return false;
490 } 490 }
491 491
492 template <class Machine> void TargetX86Base<Machine>::findRMW() { 492 template <class Machine> void TargetX86Base<Machine>::findRMW() {
493 Func->dump("Before RMW"); 493 Func->dump("Before RMW");
494 OstreamLocker L(Func->getContext()); 494 OstreamLocker L(Func->getContext());
495 Ostream &Str = Func->getContext()->getStrDump(); 495 Ostream &Str = Func->getContext()->getStrDump();
496 for (CfgNode *Node : Func->getNodes()) { 496 for (CfgNode *Node : Func->getNodes()) {
497 // Walk through the instructions, considering each sequence of 3 497 // Walk through the instructions, considering each sequence of 3
498 // instructions, and look for the particular RMW pattern. Note that this 498 // instructions, and look for the particular RMW pattern. Note that this
499 // search can be "broken" (false negatives) if there are intervening deleted 499 // search can be "broken" (false negatives) if there are intervening
500 // instructions, or intervening instructions that could be safely moved out 500 // deleted instructions, or intervening instructions that could be safely
501 // of the way to reveal an RMW pattern. 501 // moved out of the way to reveal an RMW pattern.
502 auto E = Node->getInsts().end(); 502 auto E = Node->getInsts().end();
503 auto I1 = E, I2 = E, I3 = Node->getInsts().begin(); 503 auto I1 = E, I2 = E, I3 = Node->getInsts().begin();
504 for (; I3 != E; I1 = I2, I2 = I3, ++I3) { 504 for (; I3 != E; I1 = I2, I2 = I3, ++I3) {
505 // Make I3 skip over deleted instructions. 505 // Make I3 skip over deleted instructions.
506 while (I3 != E && I3->isDeleted()) 506 while (I3 != E && I3->isDeleted())
507 ++I3; 507 ++I3;
508 if (I1 == E || I2 == E || I3 == E) 508 if (I1 == E || I2 == E || I3 == E)
509 continue; 509 continue;
510 assert(!I1->isDeleted()); 510 assert(!I1->isDeleted());
511 assert(!I2->isDeleted()); 511 assert(!I2->isDeleted());
512 assert(!I3->isDeleted()); 512 assert(!I3->isDeleted());
513 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) { 513 if (auto *Load = llvm::dyn_cast<InstLoad>(I1)) {
514 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) { 514 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(I2)) {
515 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) { 515 if (auto *Store = llvm::dyn_cast<InstStore>(I3)) {
516 // Look for: 516 // Look for:
517 // a = Load addr 517 // a = Load addr
518 // b = <op> a, other 518 // b = <op> a, other
519 // Store b, addr 519 // Store b, addr
520 // Change to: 520 // Change to:
521 // a = Load addr 521 // a = Load addr
522 // b = <op> a, other 522 // b = <op> a, other
523 // x = FakeDef 523 // x = FakeDef
524 // RMW <op>, addr, other, x 524 // RMW <op>, addr, other, x
525 // b = Store b, addr, x 525 // b = Store b, addr, x
526 // Note that inferTwoAddress() makes sure setDestNonKillable() gets 526 // Note that inferTwoAddress() makes sure setDestNonKillable() gets
527 // called on the updated Store instruction, to avoid liveness 527 // called on the updated Store instruction, to avoid liveness
528 // problems later. 528 // problems later.
529 // 529 //
530 // With this transformation, the Store instruction acquires a Dest 530 // With this transformation, the Store instruction acquires a Dest
531 // variable and is now subject to dead code elimination if there are 531 // variable and is now subject to dead code elimination if there
532 // no more uses of "b". Variable "x" is a beacon for determining 532 // are no more uses of "b". Variable "x" is a beacon for
533 // whether the Store instruction gets dead-code eliminated. If the 533 // determining whether the Store instruction gets dead-code
534 // Store instruction is eliminated, then it must be the case that 534 // eliminated. If the Store instruction is eliminated, then it
535 // the RMW instruction ends x's live range, and therefore the RMW 535 // must be the case that the RMW instruction ends x's live range,
536 // instruction will be retained and later lowered. On the other 536 // and therefore the RMW instruction will be retained and later
537 // hand, if the RMW instruction does not end x's live range, then 537 // lowered. On the other hand, if the RMW instruction does not end
538 // the Store instruction must still be present, and therefore the 538 // x's live range, then the Store instruction must still be
539 // RMW instruction is ignored during lowering because it is 539 // present, and therefore the RMW instruction is ignored during
540 // redundant with the Store instruction. 540 // lowering because it is redundant with the Store instruction.
541 // 541 //
542 // Note that if "a" has further uses, the RMW transformation may 542 // Note that if "a" has further uses, the RMW transformation may
543 // still trigger, resulting in two loads and one store, which is 543 // still trigger, resulting in two loads and one store, which is
544 // worse than the original one load and one store. However, this is 544 // worse than the original one load and one store. However, this
545 // probably rare, and caching probably keeps it just as fast. 545 // is probably rare, and caching probably keeps it just as fast.
546 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(), 546 if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),
547 Store->getAddr())) 547 Store->getAddr()))
548 continue; 548 continue;
549 Operand *ArithSrcFromLoad = Arith->getSrc(0); 549 Operand *ArithSrcFromLoad = Arith->getSrc(0);
550 Operand *ArithSrcOther = Arith->getSrc(1); 550 Operand *ArithSrcOther = Arith->getSrc(1);
551 if (ArithSrcFromLoad != Load->getDest()) { 551 if (ArithSrcFromLoad != Load->getDest()) {
552 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest()) 552 if (!Arith->isCommutative() || ArithSrcOther != Load->getDest())
553 continue; 553 continue;
554 std::swap(ArithSrcFromLoad, ArithSrcOther); 554 std::swap(ArithSrcFromLoad, ArithSrcOther);
555 } 555 }
(...skipping 26 matching lines...) Expand all
582 } 582 }
583 583
584 // Converts a ConstantInteger32 operand into its constant value, or 584 // Converts a ConstantInteger32 operand into its constant value, or
585 // MemoryOrderInvalid if the operand is not a ConstantInteger32. 585 // MemoryOrderInvalid if the operand is not a ConstantInteger32.
586 inline uint64_t getConstantMemoryOrder(Operand *Opnd) { 586 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
587 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd)) 587 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
588 return Integer->getValue(); 588 return Integer->getValue();
589 return Intrinsics::MemoryOrderInvalid; 589 return Intrinsics::MemoryOrderInvalid;
590 } 590 }
591 591
592 /// Determines whether the dest of a Load instruction can be folded 592 /// Determines whether the dest of a Load instruction can be folded into one of
593 /// into one of the src operands of a 2-operand instruction. This is 593 /// the src operands of a 2-operand instruction. This is true as long as the
594 /// true as long as the load dest matches exactly one of the binary 594 /// load dest matches exactly one of the binary instruction's src operands.
595 /// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if 595 /// Replaces Src0 or Src1 with LoadSrc if the answer is true.
596 /// the answer is true.
597 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, 596 inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,
598 Operand *&Src0, Operand *&Src1) { 597 Operand *&Src0, Operand *&Src1) {
599 if (Src0 == LoadDest && Src1 != LoadDest) { 598 if (Src0 == LoadDest && Src1 != LoadDest) {
600 Src0 = LoadSrc; 599 Src0 = LoadSrc;
601 return true; 600 return true;
602 } 601 }
603 if (Src0 != LoadDest && Src1 == LoadDest) { 602 if (Src0 != LoadDest && Src1 == LoadDest) {
604 Src1 = LoadSrc; 603 Src1 = LoadSrc;
605 return true; 604 return true;
606 } 605 }
607 return false; 606 return false;
608 } 607 }
609 608
610 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() { 609 template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {
611 for (CfgNode *Node : Func->getNodes()) { 610 for (CfgNode *Node : Func->getNodes()) {
612 Context.init(Node); 611 Context.init(Node);
613 while (!Context.atEnd()) { 612 while (!Context.atEnd()) {
614 Variable *LoadDest = nullptr; 613 Variable *LoadDest = nullptr;
615 Operand *LoadSrc = nullptr; 614 Operand *LoadSrc = nullptr;
616 Inst *CurInst = Context.getCur(); 615 Inst *CurInst = Context.getCur();
617 Inst *Next = Context.getNextInst(); 616 Inst *Next = Context.getNextInst();
618 // Determine whether the current instruction is a Load 617 // Determine whether the current instruction is a Load instruction or
619 // instruction or equivalent. 618 // equivalent.
620 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) { 619 if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {
621 // An InstLoad always qualifies. 620 // An InstLoad always qualifies.
622 LoadDest = Load->getDest(); 621 LoadDest = Load->getDest();
623 const bool DoLegalize = false; 622 const bool DoLegalize = false;
624 LoadSrc = formMemoryOperand(Load->getSourceAddress(), 623 LoadSrc = formMemoryOperand(Load->getSourceAddress(),
625 LoadDest->getType(), DoLegalize); 624 LoadDest->getType(), DoLegalize);
626 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) { 625 } else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {
627 // An AtomicLoad intrinsic qualifies as long as it has a valid 626 // An AtomicLoad intrinsic qualifies as long as it has a valid memory
628 // memory ordering, and can be implemented in a single 627 // ordering, and can be implemented in a single instruction (i.e., not
629 // instruction (i.e., not i64 on x86-32). 628 // i64 on x86-32).
630 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID; 629 Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;
631 if (ID == Intrinsics::AtomicLoad && 630 if (ID == Intrinsics::AtomicLoad &&
632 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) && 631 (Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&
633 Intrinsics::isMemoryOrderValid( 632 Intrinsics::isMemoryOrderValid(
634 ID, getConstantMemoryOrder(Intrin->getArg(1)))) { 633 ID, getConstantMemoryOrder(Intrin->getArg(1)))) {
635 LoadDest = Intrin->getDest(); 634 LoadDest = Intrin->getDest();
636 const bool DoLegalize = false; 635 const bool DoLegalize = false;
637 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(), 636 LoadSrc = formMemoryOperand(Intrin->getArg(0), LoadDest->getType(),
638 DoLegalize); 637 DoLegalize);
639 } 638 }
640 } 639 }
641 // A Load instruction can be folded into the following 640 // A Load instruction can be folded into the following instruction only
642 // instruction only if the following instruction ends the Load's 641 // if the following instruction ends the Load's Dest variable's live
643 // Dest variable's live range. 642 // range.
644 if (LoadDest && Next && Next->isLastUse(LoadDest)) { 643 if (LoadDest && Next && Next->isLastUse(LoadDest)) {
645 assert(LoadSrc); 644 assert(LoadSrc);
646 Inst *NewInst = nullptr; 645 Inst *NewInst = nullptr;
647 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) { 646 if (auto *Arith = llvm::dyn_cast<InstArithmetic>(Next)) {
648 Operand *Src0 = Arith->getSrc(0); 647 Operand *Src0 = Arith->getSrc(0);
649 Operand *Src1 = Arith->getSrc(1); 648 Operand *Src1 = Arith->getSrc(1);
650 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { 649 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
651 NewInst = InstArithmetic::create(Func, Arith->getOp(), 650 NewInst = InstArithmetic::create(Func, Arith->getOp(),
652 Arith->getDest(), Src0, Src1); 651 Arith->getDest(), Src0, Src1);
653 } 652 }
(...skipping 12 matching lines...) Expand all
666 Fcmp->getDest(), Src0, Src1); 665 Fcmp->getDest(), Src0, Src1);
667 } 666 }
668 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) { 667 } else if (auto *Select = llvm::dyn_cast<InstSelect>(Next)) {
669 Operand *Src0 = Select->getTrueOperand(); 668 Operand *Src0 = Select->getTrueOperand();
670 Operand *Src1 = Select->getFalseOperand(); 669 Operand *Src1 = Select->getFalseOperand();
671 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) { 670 if (canFoldLoadIntoBinaryInst(LoadSrc, LoadDest, Src0, Src1)) {
672 NewInst = InstSelect::create(Func, Select->getDest(), 671 NewInst = InstSelect::create(Func, Select->getDest(),
673 Select->getCondition(), Src0, Src1); 672 Select->getCondition(), Src0, Src1);
674 } 673 }
675 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) { 674 } else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {
676 // The load dest can always be folded into a Cast 675 // The load dest can always be folded into a Cast instruction.
677 // instruction.
678 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0)); 676 Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));
679 if (Src0 == LoadDest) { 677 if (Src0 == LoadDest) {
680 NewInst = InstCast::create(Func, Cast->getCastKind(), 678 NewInst = InstCast::create(Func, Cast->getCastKind(),
681 Cast->getDest(), LoadSrc); 679 Cast->getDest(), LoadSrc);
682 } 680 }
683 } 681 }
684 if (NewInst) { 682 if (NewInst) {
685 CurInst->setDeleted(); 683 CurInst->setDeleted();
686 Next->setDeleted(); 684 Next->setDeleted();
687 Context.insert(NewInst); 685 Context.insert(NewInst);
688 // Update NewInst->LiveRangesEnded so that target lowering 686 // Update NewInst->LiveRangesEnded so that target lowering may
689 // may benefit. Also update NewInst->HasSideEffects. 687 // benefit. Also update NewInst->HasSideEffects.
690 NewInst->spliceLivenessInfo(Next, CurInst); 688 NewInst->spliceLivenessInfo(Next, CurInst);
691 } 689 }
692 } 690 }
693 Context.advanceCur(); 691 Context.advanceCur();
694 Context.advanceNext(); 692 Context.advanceNext();
695 } 693 }
696 } 694 }
697 Func->dump("After load optimization"); 695 Func->dump("After load optimization");
698 } 696 }
699 697
(...skipping 14 matching lines...) Expand all
714 if (Ty == IceType_void) 712 if (Ty == IceType_void)
715 Ty = IceType_i32; 713 Ty = IceType_i32;
716 if (PhysicalRegisters[Ty].empty()) 714 if (PhysicalRegisters[Ty].empty())
717 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM); 715 PhysicalRegisters[Ty].resize(Traits::RegisterSet::Reg_NUM);
718 assert(RegNum < PhysicalRegisters[Ty].size()); 716 assert(RegNum < PhysicalRegisters[Ty].size());
719 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 717 Variable *Reg = PhysicalRegisters[Ty][RegNum];
720 if (Reg == nullptr) { 718 if (Reg == nullptr) {
721 Reg = Func->makeVariable(Ty); 719 Reg = Func->makeVariable(Ty);
722 Reg->setRegNum(RegNum); 720 Reg->setRegNum(RegNum);
723 PhysicalRegisters[Ty][RegNum] = Reg; 721 PhysicalRegisters[Ty][RegNum] = Reg;
724 // Specially mark esp as an "argument" so that it is considered 722 // Specially mark esp as an "argument" so that it is considered live upon
725 // live upon function entry. 723 // function entry.
726 if (RegNum == Traits::RegisterSet::Reg_esp) { 724 if (RegNum == Traits::RegisterSet::Reg_esp) {
727 Func->addImplicitArg(Reg); 725 Func->addImplicitArg(Reg);
728 Reg->setIgnoreLiveness(); 726 Reg->setIgnoreLiveness();
729 } 727 }
730 } 728 }
731 return Reg; 729 return Reg;
732 } 730 }
733 731
734 template <class Machine> 732 template <class Machine>
735 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const { 733 IceString TargetX86Base<Machine>::getRegName(SizeT RegNum, Type Ty) const {
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
775 BaseRegNum = getFrameOrStackReg(); 773 BaseRegNum = getFrameOrStackReg();
776 if (!hasFramePointer()) 774 if (!hasFramePointer())
777 Offset += getStackAdjustment(); 775 Offset += getStackAdjustment();
778 } 776 }
779 return typename Traits::Address( 777 return typename Traits::Address(
780 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset); 778 Traits::RegisterSet::getEncodedGPR(BaseRegNum), Offset);
781 } 779 }
782 780
783 /// Helper function for addProlog(). 781 /// Helper function for addProlog().
784 /// 782 ///
785 /// This assumes Arg is an argument passed on the stack. This sets the 783 /// This assumes Arg is an argument passed on the stack. This sets the frame
786 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's 784 /// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
787 /// width. For an I64 arg that has been split into Lo and Hi components, 785 /// I64 arg that has been split into Lo and Hi components, it calls itself
788 /// it calls itself recursively on the components, taking care to handle 786 /// recursively on the components, taking care to handle Lo first because of the
789 /// Lo first because of the little-endian architecture. Lastly, this 787 /// little-endian architecture. Lastly, this function generates an instruction
790 /// function generates an instruction to copy Arg into its assigned 788 /// to copy Arg into its assigned register if applicable.
791 /// register if applicable.
792 template <class Machine> 789 template <class Machine>
793 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, 790 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,
794 Variable *FramePtr, 791 Variable *FramePtr,
795 size_t BasicFrameOffset, 792 size_t BasicFrameOffset,
796 size_t &InArgsSizeBytes) { 793 size_t &InArgsSizeBytes) {
797 Variable *Lo = Arg->getLo(); 794 Variable *Lo = Arg->getLo();
798 Variable *Hi = Arg->getHi(); 795 Variable *Hi = Arg->getHi();
799 Type Ty = Arg->getType(); 796 Type Ty = Arg->getType();
800 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) { 797 if (!Traits::Is64Bit && Lo && Hi && Ty == IceType_i64) {
801 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 798 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
(...skipping 10 matching lines...) Expand all
812 if (Arg->hasReg()) { 809 if (Arg->hasReg()) {
813 assert(Ty != IceType_i64 || Traits::Is64Bit); 810 assert(Ty != IceType_i64 || Traits::Is64Bit);
814 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create( 811 typename Traits::X86OperandMem *Mem = Traits::X86OperandMem::create(
815 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset())); 812 Func, Ty, FramePtr, Ctx->getConstantInt32(Arg->getStackOffset()));
816 if (isVectorType(Arg->getType())) { 813 if (isVectorType(Arg->getType())) {
817 _movp(Arg, Mem); 814 _movp(Arg, Mem);
818 } else { 815 } else {
819 _mov(Arg, Mem); 816 _mov(Arg, Mem);
820 } 817 }
821 // This argument-copying instruction uses an explicit Traits::X86OperandMem 818 // This argument-copying instruction uses an explicit Traits::X86OperandMem
822 // operand instead of a Variable, so its fill-from-stack operation has to be 819 // operand instead of a Variable, so its fill-from-stack operation has to
823 // tracked separately for statistics. 820 // be tracked separately for statistics.
824 Ctx->statsUpdateFills(); 821 Ctx->statsUpdateFills();
825 } 822 }
826 } 823 }
827 824
828 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() { 825 template <class Machine> Type TargetX86Base<Machine>::stackSlotType() {
829 return Traits::WordType; 826 return Traits::WordType;
830 } 827 }
831 828
832 template <class Machine> 829 template <class Machine>
833 template <typename T> 830 template <typename T>
834 typename std::enable_if<!T::Is64Bit, void>::type 831 typename std::enable_if<!T::Is64Bit, void>::type
835 TargetX86Base<Machine>::split64(Variable *Var) { 832 TargetX86Base<Machine>::split64(Variable *Var) {
836 switch (Var->getType()) { 833 switch (Var->getType()) {
837 default: 834 default:
838 return; 835 return;
839 case IceType_i64: 836 case IceType_i64:
840 // TODO: Only consider F64 if we need to push each half when 837 // TODO: Only consider F64 if we need to push each half when passing as an
841 // passing as an argument to a function call. Note that each half 838 // argument to a function call. Note that each half is still typed as I32.
842 // is still typed as I32.
843 case IceType_f64: 839 case IceType_f64:
844 break; 840 break;
845 } 841 }
846 Variable *Lo = Var->getLo(); 842 Variable *Lo = Var->getLo();
847 Variable *Hi = Var->getHi(); 843 Variable *Hi = Var->getHi();
848 if (Lo) { 844 if (Lo) {
849 assert(Hi); 845 assert(Hi);
850 return; 846 return;
851 } 847 }
852 assert(Hi == nullptr); 848 assert(Hi == nullptr);
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
939 template <class Machine> 935 template <class Machine>
940 llvm::SmallBitVector 936 llvm::SmallBitVector
941 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include, 937 TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,
942 RegSetMask Exclude) const { 938 RegSetMask Exclude) const {
943 return Traits::getRegisterSet(Include, Exclude); 939 return Traits::getRegisterSet(Include, Exclude);
944 } 940 }
945 941
946 template <class Machine> 942 template <class Machine>
947 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { 943 void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {
948 IsEbpBasedFrame = true; 944 IsEbpBasedFrame = true;
949 // Conservatively require the stack to be aligned. Some stack 945 // Conservatively require the stack to be aligned. Some stack adjustment
950 // adjustment operations implemented below assume that the stack is 946 // operations implemented below assume that the stack is aligned before the
951 // aligned before the alloca. All the alloca code ensures that the 947 // alloca. All the alloca code ensures that the stack alignment is preserved
952 // stack alignment is preserved after the alloca. The stack alignment 948 // after the alloca. The stack alignment restriction can be relaxed in some
953 // restriction can be relaxed in some cases. 949 // cases.
954 NeedsStackAlignment = true; 950 NeedsStackAlignment = true;
955 951
956 // TODO(stichnot): minimize the number of adjustments of esp, etc. 952 // TODO(stichnot): minimize the number of adjustments of esp, etc.
957 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); 953 Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp);
958 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 954 Operand *TotalSize = legalize(Inst->getSizeInBytes());
959 Variable *Dest = Inst->getDest(); 955 Variable *Dest = Inst->getDest();
960 uint32_t AlignmentParam = Inst->getAlignInBytes(); 956 uint32_t AlignmentParam = Inst->getAlignInBytes();
961 // For default align=0, set it to the real value 1, to avoid any 957 // For default align=0, set it to the real value 1, to avoid any
962 // bit-manipulation problems below. 958 // bit-manipulation problems below.
963 AlignmentParam = std::max(AlignmentParam, 1u); 959 AlignmentParam = std::max(AlignmentParam, 1u);
964 960
965 // LLVM enforces power of 2 alignment. 961 // LLVM enforces power of 2 alignment.
966 assert(llvm::isPowerOf2_32(AlignmentParam)); 962 assert(llvm::isPowerOf2_32(AlignmentParam));
967 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES)); 963 assert(llvm::isPowerOf2_32(Traits::X86_STACK_ALIGNMENT_BYTES));
968 964
969 uint32_t Alignment = 965 uint32_t Alignment =
970 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES); 966 std::max(AlignmentParam, Traits::X86_STACK_ALIGNMENT_BYTES);
971 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) { 967 if (Alignment > Traits::X86_STACK_ALIGNMENT_BYTES) {
972 _and(esp, Ctx->getConstantInt32(-Alignment)); 968 _and(esp, Ctx->getConstantInt32(-Alignment));
973 } 969 }
974 if (const auto *ConstantTotalSize = 970 if (const auto *ConstantTotalSize =
975 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 971 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
976 uint32_t Value = ConstantTotalSize->getValue(); 972 uint32_t Value = ConstantTotalSize->getValue();
977 Value = Utils::applyAlignment(Value, Alignment); 973 Value = Utils::applyAlignment(Value, Alignment);
978 _sub(esp, Ctx->getConstantInt32(Value)); 974 _sub(esp, Ctx->getConstantInt32(Value));
979 } else { 975 } else {
980 // Non-constant sizes need to be adjusted to the next highest 976 // Non-constant sizes need to be adjusted to the next highest multiple of
981 // multiple of the required alignment at runtime. 977 // the required alignment at runtime.
982 Variable *T = makeReg(IceType_i32); 978 Variable *T = makeReg(IceType_i32);
983 _mov(T, TotalSize); 979 _mov(T, TotalSize);
984 _add(T, Ctx->getConstantInt32(Alignment - 1)); 980 _add(T, Ctx->getConstantInt32(Alignment - 1));
985 _and(T, Ctx->getConstantInt32(-Alignment)); 981 _and(T, Ctx->getConstantInt32(-Alignment));
986 _sub(esp, T); 982 _sub(esp, T);
987 } 983 }
988 _mov(Dest, esp); 984 _mov(Dest, esp);
989 } 985 }
990 986
991 /// Strength-reduce scalar integer multiplication by a constant (for 987 /// Strength-reduce scalar integer multiplication by a constant (for i32 or
992 /// i32 or narrower) for certain constants. The lea instruction can be 988 /// narrower) for certain constants. The lea instruction can be used to multiply
993 /// used to multiply by 3, 5, or 9, and the lsh instruction can be used 989 /// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of
994 /// to multiply by powers of 2. These can be combined such that 990 /// 2. These can be combined such that e.g. multiplying by 100 can be done as 2
995 /// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, 991 /// lea-based multiplies by 5, combined with left-shifting by 2.
996 /// combined with left-shifting by 2.
997 template <class Machine> 992 template <class Machine>
998 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, 993 bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,
999 int32_t Src1) { 994 int32_t Src1) {
1000 // Disable this optimization for Om1 and O0, just to keep things 995 // Disable this optimization for Om1 and O0, just to keep things simple
1001 // simple there. 996 // there.
1002 if (Ctx->getFlags().getOptLevel() < Opt_1) 997 if (Ctx->getFlags().getOptLevel() < Opt_1)
1003 return false; 998 return false;
1004 Type Ty = Dest->getType(); 999 Type Ty = Dest->getType();
1005 Variable *T = nullptr; 1000 Variable *T = nullptr;
1006 if (Src1 == -1) { 1001 if (Src1 == -1) {
1007 _mov(T, Src0); 1002 _mov(T, Src0);
1008 _neg(T); 1003 _neg(T);
1009 _mov(Dest, T); 1004 _mov(Dest, T);
1010 return true; 1005 return true;
1011 } 1006 }
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
1047 ++CountOps; 1042 ++CountOps;
1048 ++Count2; 1043 ++Count2;
1049 Src1 /= 2; 1044 Src1 /= 2;
1050 } else { 1045 } else {
1051 return false; 1046 return false;
1052 } 1047 }
1053 } 1048 }
1054 // Lea optimization only works for i16 and i32 types, not i8. 1049 // Lea optimization only works for i16 and i32 types, not i8.
1055 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) 1050 if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))
1056 return false; 1051 return false;
1057 // Limit the number of lea/shl operations for a single multiply, to 1052 // Limit the number of lea/shl operations for a single multiply, to a
1058 // a somewhat arbitrary choice of 3. 1053 // somewhat arbitrary choice of 3.
1059 const uint32_t MaxOpsForOptimizedMul = 3; 1054 const uint32_t MaxOpsForOptimizedMul = 3;
1060 if (CountOps > MaxOpsForOptimizedMul) 1055 if (CountOps > MaxOpsForOptimizedMul)
1061 return false; 1056 return false;
1062 _mov(T, Src0); 1057 _mov(T, Src0);
1063 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1058 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1064 for (uint32_t i = 0; i < Count9; ++i) { 1059 for (uint32_t i = 0; i < Count9; ++i) {
1065 const uint16_t Shift = 3; // log2(9-1) 1060 const uint16_t Shift = 3; // log2(9-1)
1066 _lea(T, 1061 _lea(T,
1067 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift)); 1062 Traits::X86OperandMem::create(Func, IceType_void, T, Zero, T, Shift));
1068 _set_dest_nonkillable(); 1063 _set_dest_nonkillable();
(...skipping 25 matching lines...) Expand all
1094 Operand *Src0 = legalize(Inst->getSrc(0)); 1089 Operand *Src0 = legalize(Inst->getSrc(0));
1095 Operand *Src1 = legalize(Inst->getSrc(1)); 1090 Operand *Src1 = legalize(Inst->getSrc(1));
1096 if (Inst->isCommutative()) { 1091 if (Inst->isCommutative()) {
1097 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1)) 1092 if (!llvm::isa<Variable>(Src0) && llvm::isa<Variable>(Src1))
1098 std::swap(Src0, Src1); 1093 std::swap(Src0, Src1);
1099 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1)) 1094 if (llvm::isa<Constant>(Src0) && !llvm::isa<Constant>(Src1))
1100 std::swap(Src0, Src1); 1095 std::swap(Src0, Src1);
1101 } 1096 }
1102 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 1097 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
1103 // These x86-32 helper-call-involved instructions are lowered in this 1098 // These x86-32 helper-call-involved instructions are lowered in this
1104 // separate switch. This is because loOperand() and hiOperand() 1099 // separate switch. This is because loOperand() and hiOperand() may insert
1105 // may insert redundant instructions for constant blinding and 1100 // redundant instructions for constant blinding and pooling. Such redundant
1106 // pooling. Such redundant instructions will fail liveness analysis 1101 // instructions will fail liveness analysis under -Om1 setting. And,
1107 // under -Om1 setting. And, actually these arguments do not need 1102 // actually these arguments do not need to be processed with loOperand()
1108 // to be processed with loOperand() and hiOperand() to be used. 1103 // and hiOperand() to be used.
1109 switch (Inst->getOp()) { 1104 switch (Inst->getOp()) {
1110 case InstArithmetic::Udiv: { 1105 case InstArithmetic::Udiv: {
1111 const SizeT MaxSrcs = 2; 1106 const SizeT MaxSrcs = 2;
1112 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs); 1107 InstCall *Call = makeHelperCall(H_udiv_i64, Dest, MaxSrcs);
1113 Call->addArg(Inst->getSrc(0)); 1108 Call->addArg(Inst->getSrc(0));
1114 Call->addArg(Inst->getSrc(1)); 1109 Call->addArg(Inst->getSrc(1));
1115 lowerCall(Call); 1110 lowerCall(Call);
1116 return; 1111 return;
1117 } 1112 }
1118 case InstArithmetic::Sdiv: { 1113 case InstArithmetic::Sdiv: {
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
1209 // t4.hi += t2 1204 // t4.hi += t2
1210 // a.hi = t4.hi 1205 // a.hi = t4.hi
1211 // The mul instruction cannot take an immediate operand. 1206 // The mul instruction cannot take an immediate operand.
1212 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); 1207 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1213 _mov(T_1, Src0Hi); 1208 _mov(T_1, Src0Hi);
1214 _imul(T_1, Src1Lo); 1209 _imul(T_1, Src1Lo);
1215 _mov(T_2, Src1Hi); 1210 _mov(T_2, Src1Hi);
1216 _imul(T_2, Src0Lo); 1211 _imul(T_2, Src0Lo);
1217 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax); 1212 _mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);
1218 _mul(T_4Lo, T_3, Src1Lo); 1213 _mul(T_4Lo, T_3, Src1Lo);
1219 // The mul instruction produces two dest variables, edx:eax. We 1214 // The mul instruction produces two dest variables, edx:eax. We create a
1220 // create a fake definition of edx to account for this. 1215 // fake definition of edx to account for this.
1221 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); 1216 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1222 _mov(DestLo, T_4Lo); 1217 _mov(DestLo, T_4Lo);
1223 _add(T_4Hi, T_1); 1218 _add(T_4Hi, T_1);
1224 _add(T_4Hi, T_2); 1219 _add(T_4Hi, T_2);
1225 _mov(DestHi, T_4Hi); 1220 _mov(DestHi, T_4Hi);
1226 } break; 1221 } break;
1227 case InstArithmetic::Shl: { 1222 case InstArithmetic::Shl: {
1228 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. 1223 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
1229 // gcc does the following: 1224 // gcc does the following:
1230 // a=b<<c ==> 1225 // a=b<<c ==>
(...skipping 15 matching lines...) Expand all
1246 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1241 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1247 typename Traits::Insts::Label *Label = 1242 typename Traits::Insts::Label *Label =
1248 Traits::Insts::Label::create(Func, this); 1243 Traits::Insts::Label::create(Func, this);
1249 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1244 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1250 _mov(T_2, Src0Lo); 1245 _mov(T_2, Src0Lo);
1251 _mov(T_3, Src0Hi); 1246 _mov(T_3, Src0Hi);
1252 _shld(T_3, T_2, T_1); 1247 _shld(T_3, T_2, T_1);
1253 _shl(T_2, T_1); 1248 _shl(T_2, T_1);
1254 _test(T_1, BitTest); 1249 _test(T_1, BitTest);
1255 _br(Traits::Cond::Br_e, Label); 1250 _br(Traits::Cond::Br_e, Label);
1256 // T_2 and T_3 are being assigned again because of the 1251 // T_2 and T_3 are being assigned again because of the intra-block
1257 // intra-block control flow, so we need the _mov_nonkillable 1252 // control flow, so we need the _mov_nonkillable variant to avoid
1258 // variant to avoid liveness problems. 1253 // liveness problems.
1259 _mov_nonkillable(T_3, T_2); 1254 _mov_nonkillable(T_3, T_2);
1260 _mov_nonkillable(T_2, Zero); 1255 _mov_nonkillable(T_2, Zero);
1261 Context.insert(Label); 1256 Context.insert(Label);
1262 _mov(DestLo, T_2); 1257 _mov(DestLo, T_2);
1263 _mov(DestHi, T_3); 1258 _mov(DestHi, T_3);
1264 } break; 1259 } break;
1265 case InstArithmetic::Lshr: { 1260 case InstArithmetic::Lshr: {
1266 // a=b>>c (unsigned) ==> 1261 // a=b>>c (unsigned) ==>
1267 // t1:ecx = c.lo & 0xff 1262 // t1:ecx = c.lo & 0xff
1268 // t2 = b.lo 1263 // t2 = b.lo
(...skipping 13 matching lines...) Expand all
1282 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1277 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1283 typename Traits::Insts::Label *Label = 1278 typename Traits::Insts::Label *Label =
1284 Traits::Insts::Label::create(Func, this); 1279 Traits::Insts::Label::create(Func, this);
1285 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1280 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1286 _mov(T_2, Src0Lo); 1281 _mov(T_2, Src0Lo);
1287 _mov(T_3, Src0Hi); 1282 _mov(T_3, Src0Hi);
1288 _shrd(T_2, T_3, T_1); 1283 _shrd(T_2, T_3, T_1);
1289 _shr(T_3, T_1); 1284 _shr(T_3, T_1);
1290 _test(T_1, BitTest); 1285 _test(T_1, BitTest);
1291 _br(Traits::Cond::Br_e, Label); 1286 _br(Traits::Cond::Br_e, Label);
1292 // T_2 and T_3 are being assigned again because of the 1287 // T_2 and T_3 are being assigned again because of the intra-block
1293 // intra-block control flow, so we need the _mov_nonkillable 1288 // control flow, so we need the _mov_nonkillable variant to avoid
1294 // variant to avoid liveness problems. 1289 // liveness problems.
1295 _mov_nonkillable(T_2, T_3); 1290 _mov_nonkillable(T_2, T_3);
1296 _mov_nonkillable(T_3, Zero); 1291 _mov_nonkillable(T_3, Zero);
1297 Context.insert(Label); 1292 Context.insert(Label);
1298 _mov(DestLo, T_2); 1293 _mov(DestLo, T_2);
1299 _mov(DestHi, T_3); 1294 _mov(DestHi, T_3);
1300 } break; 1295 } break;
1301 case InstArithmetic::Ashr: { 1296 case InstArithmetic::Ashr: {
1302 // a=b>>c (signed) ==> 1297 // a=b>>c (signed) ==>
1303 // t1:ecx = c.lo & 0xff 1298 // t1:ecx = c.lo & 0xff
1304 // t2 = b.lo 1299 // t2 = b.lo
(...skipping 13 matching lines...) Expand all
1318 Constant *SignExtend = Ctx->getConstantInt32(0x1f); 1313 Constant *SignExtend = Ctx->getConstantInt32(0x1f);
1319 typename Traits::Insts::Label *Label = 1314 typename Traits::Insts::Label *Label =
1320 Traits::Insts::Label::create(Func, this); 1315 Traits::Insts::Label::create(Func, this);
1321 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx); 1316 _mov(T_1, Src1Lo, Traits::RegisterSet::Reg_ecx);
1322 _mov(T_2, Src0Lo); 1317 _mov(T_2, Src0Lo);
1323 _mov(T_3, Src0Hi); 1318 _mov(T_3, Src0Hi);
1324 _shrd(T_2, T_3, T_1); 1319 _shrd(T_2, T_3, T_1);
1325 _sar(T_3, T_1); 1320 _sar(T_3, T_1);
1326 _test(T_1, BitTest); 1321 _test(T_1, BitTest);
1327 _br(Traits::Cond::Br_e, Label); 1322 _br(Traits::Cond::Br_e, Label);
1328 // T_2 and T_3 are being assigned again because of the 1323 // T_2 and T_3 are being assigned again because of the intra-block
1329 // intra-block control flow, so T_2 needs the _mov_nonkillable 1324 // control flow, so T_2 needs the _mov_nonkillable variant to avoid
1330 // variant to avoid liveness problems. T_3 doesn't need special 1325 // liveness problems. T_3 doesn't need special treatment because it is
1331 // treatment because it is reassigned via _sar instead of _mov. 1326 // reassigned via _sar instead of _mov.
1332 _mov_nonkillable(T_2, T_3); 1327 _mov_nonkillable(T_2, T_3);
1333 _sar(T_3, SignExtend); 1328 _sar(T_3, SignExtend);
1334 Context.insert(Label); 1329 Context.insert(Label);
1335 _mov(DestLo, T_2); 1330 _mov(DestLo, T_2);
1336 _mov(DestHi, T_3); 1331 _mov(DestHi, T_3);
1337 } break; 1332 } break;
1338 case InstArithmetic::Fadd: 1333 case InstArithmetic::Fadd:
1339 case InstArithmetic::Fsub: 1334 case InstArithmetic::Fsub:
1340 case InstArithmetic::Fmul: 1335 case InstArithmetic::Fmul:
1341 case InstArithmetic::Fdiv: 1336 case InstArithmetic::Fdiv:
1342 case InstArithmetic::Frem: 1337 case InstArithmetic::Frem:
1343 llvm_unreachable("FP instruction with i64 type"); 1338 llvm_unreachable("FP instruction with i64 type");
1344 break; 1339 break;
1345 case InstArithmetic::Udiv: 1340 case InstArithmetic::Udiv:
1346 case InstArithmetic::Sdiv: 1341 case InstArithmetic::Sdiv:
1347 case InstArithmetic::Urem: 1342 case InstArithmetic::Urem:
1348 case InstArithmetic::Srem: 1343 case InstArithmetic::Srem:
1349 llvm_unreachable("Call-helper-involved instruction for i64 type \ 1344 llvm_unreachable("Call-helper-involved instruction for i64 type \
1350 should have already been handled before"); 1345 should have already been handled before");
1351 break; 1346 break;
1352 } 1347 }
1353 return; 1348 return;
1354 } 1349 }
1355 if (isVectorType(Dest->getType())) { 1350 if (isVectorType(Dest->getType())) {
1356 // TODO: Trap on integer divide and integer modulo by zero. 1351 // TODO: Trap on integer divide and integer modulo by zero. See:
1357 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1352 // https://code.google.com/p/nativeclient/issues/detail?id=3899
1358 if (llvm::isa<typename Traits::X86OperandMem>(Src1)) 1353 if (llvm::isa<typename Traits::X86OperandMem>(Src1))
1359 Src1 = legalizeToReg(Src1); 1354 Src1 = legalizeToReg(Src1);
1360 switch (Inst->getOp()) { 1355 switch (Inst->getOp()) {
1361 case InstArithmetic::_num: 1356 case InstArithmetic::_num:
1362 llvm_unreachable("Unknown arithmetic operator"); 1357 llvm_unreachable("Unknown arithmetic operator");
1363 break; 1358 break;
1364 case InstArithmetic::Add: { 1359 case InstArithmetic::Add: {
1365 Variable *T = makeReg(Dest->getType()); 1360 Variable *T = makeReg(Dest->getType());
1366 _movp(T, Src0); 1361 _movp(T, Src0);
1367 _padd(T, Src1); 1362 _padd(T, Src1);
(...skipping 144 matching lines...) Expand 10 before | Expand all | Expand 10 after
1512 case InstArithmetic::Sub: 1507 case InstArithmetic::Sub:
1513 _mov(T, Src0); 1508 _mov(T, Src0);
1514 _sub(T, Src1); 1509 _sub(T, Src1);
1515 _mov(Dest, T); 1510 _mov(Dest, T);
1516 break; 1511 break;
1517 case InstArithmetic::Mul: 1512 case InstArithmetic::Mul:
1518 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1513 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1519 if (optimizeScalarMul(Dest, Src0, C->getValue())) 1514 if (optimizeScalarMul(Dest, Src0, C->getValue()))
1520 return; 1515 return;
1521 } 1516 }
1522 // The 8-bit version of imul only allows the form "imul r/m8" 1517 // The 8-bit version of imul only allows the form "imul r/m8" where T must
1523 // where T must be in eax. 1518 // be in eax.
1524 if (isByteSizedArithType(Dest->getType())) { 1519 if (isByteSizedArithType(Dest->getType())) {
1525 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1520 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1526 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1521 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1527 } else { 1522 } else {
1528 _mov(T, Src0); 1523 _mov(T, Src0);
1529 } 1524 }
1530 _imul(T, Src1); 1525 _imul(T, Src1);
1531 _mov(Dest, T); 1526 _mov(Dest, T);
1532 break; 1527 break;
1533 case InstArithmetic::Shl: 1528 case InstArithmetic::Shl:
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1573 Context.insert(InstFakeUse::create(Func, T_eax)); 1568 Context.insert(InstFakeUse::create(Func, T_eax));
1574 } else { 1569 } else {
1575 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1570 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1576 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1571 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1577 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx); 1572 _mov(T_edx, Zero, Traits::RegisterSet::Reg_edx);
1578 _div(T, Src1, T_edx); 1573 _div(T, Src1, T_edx);
1579 _mov(Dest, T); 1574 _mov(Dest, T);
1580 } 1575 }
1581 break; 1576 break;
1582 case InstArithmetic::Sdiv: 1577 case InstArithmetic::Sdiv:
1583 // TODO(stichnot): Enable this after doing better performance 1578 // TODO(stichnot): Enable this after doing better performance and cross
1584 // and cross testing. 1579 // testing.
1585 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1580 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1586 // Optimize division by constant power of 2, but not for Om1 1581 // Optimize division by constant power of 2, but not for Om1 or O0, just
1587 // or O0, just to keep things simple there. 1582 // to keep things simple there.
1588 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1583 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1589 int32_t Divisor = C->getValue(); 1584 int32_t Divisor = C->getValue();
1590 uint32_t UDivisor = static_cast<uint32_t>(Divisor); 1585 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1591 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 1586 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1592 uint32_t LogDiv = llvm::Log2_32(UDivisor); 1587 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1593 Type Ty = Dest->getType(); 1588 Type Ty = Dest->getType();
1594 // LLVM does the following for dest=src/(1<<log): 1589 // LLVM does the following for dest=src/(1<<log):
1595 // t=src 1590 // t=src
1596 // sar t,typewidth-1 // -1 if src is negative, 0 if not 1591 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1597 // shr t,typewidth-log 1592 // shr t,typewidth-log
1598 // add t,src 1593 // add t,src
1599 // sar t,log 1594 // sar t,log
1600 // dest=t 1595 // dest=t
1601 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty); 1596 uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);
1602 _mov(T, Src0); 1597 _mov(T, Src0);
1603 // If for some reason we are dividing by 1, just treat it 1598 // If for some reason we are dividing by 1, just treat it like an
1604 // like an assignment. 1599 // assignment.
1605 if (LogDiv > 0) { 1600 if (LogDiv > 0) {
1606 // The initial sar is unnecessary when dividing by 2. 1601 // The initial sar is unnecessary when dividing by 2.
1607 if (LogDiv > 1) 1602 if (LogDiv > 1)
1608 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1)); 1603 _sar(T, Ctx->getConstantInt(Ty, TypeWidth - 1));
1609 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv)); 1604 _shr(T, Ctx->getConstantInt(Ty, TypeWidth - LogDiv));
1610 _add(T, Src0); 1605 _add(T, Src0);
1611 _sar(T, Ctx->getConstantInt(Ty, LogDiv)); 1606 _sar(T, Ctx->getConstantInt(Ty, LogDiv));
1612 } 1607 }
1613 _mov(Dest, T); 1608 _mov(Dest, T);
1614 return; 1609 return;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
1649 } else { 1644 } else {
1650 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1645 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1651 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx); 1646 T_edx = makeReg(Dest->getType(), Traits::RegisterSet::Reg_edx);
1652 _mov(T_edx, Zero); 1647 _mov(T_edx, Zero);
1653 _mov(T, Src0, Traits::RegisterSet::Reg_eax); 1648 _mov(T, Src0, Traits::RegisterSet::Reg_eax);
1654 _div(T_edx, Src1, T); 1649 _div(T_edx, Src1, T);
1655 _mov(Dest, T_edx); 1650 _mov(Dest, T_edx);
1656 } 1651 }
1657 break; 1652 break;
1658 case InstArithmetic::Srem: 1653 case InstArithmetic::Srem:
1659 // TODO(stichnot): Enable this after doing better performance 1654 // TODO(stichnot): Enable this after doing better performance and cross
1660 // and cross testing. 1655 // testing.
1661 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) { 1656 if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {
1662 // Optimize mod by constant power of 2, but not for Om1 or O0, 1657 // Optimize mod by constant power of 2, but not for Om1 or O0, just to
1663 // just to keep things simple there. 1658 // keep things simple there.
1664 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) { 1659 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
1665 int32_t Divisor = C->getValue(); 1660 int32_t Divisor = C->getValue();
1666 uint32_t UDivisor = static_cast<uint32_t>(Divisor); 1661 uint32_t UDivisor = static_cast<uint32_t>(Divisor);
1667 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) { 1662 if (Divisor > 0 && llvm::isPowerOf2_32(UDivisor)) {
1668 uint32_t LogDiv = llvm::Log2_32(UDivisor); 1663 uint32_t LogDiv = llvm::Log2_32(UDivisor);
1669 Type Ty = Dest->getType(); 1664 Type Ty = Dest->getType();
1670 // LLVM does the following for dest=src%(1<<log): 1665 // LLVM does the following for dest=src%(1<<log):
1671 // t=src 1666 // t=src
1672 // sar t,typewidth-1 // -1 if src is negative, 0 if not 1667 // sar t,typewidth-1 // -1 if src is negative, 0 if not
1673 // shr t,typewidth-log 1668 // shr t,typewidth-log
(...skipping 96 matching lines...) Expand 10 before | Expand all | Expand 10 after
1770 _mov(T_Hi, Src0Hi); 1765 _mov(T_Hi, Src0Hi);
1771 _mov(DestHi, T_Hi); 1766 _mov(DestHi, T_Hi);
1772 } else { 1767 } else {
1773 Operand *Src0Legal; 1768 Operand *Src0Legal;
1774 if (Dest->hasReg()) { 1769 if (Dest->hasReg()) {
1775 // If Dest already has a physical register, then only basic legalization 1770 // If Dest already has a physical register, then only basic legalization
1776 // is needed, as the source operand can be a register, immediate, or 1771 // is needed, as the source operand can be a register, immediate, or
1777 // memory. 1772 // memory.
1778 Src0Legal = legalize(Src0); 1773 Src0Legal = legalize(Src0);
1779 } else { 1774 } else {
1780 // If Dest could be a stack operand, then RI must be a physical 1775 // If Dest could be a stack operand, then RI must be a physical register
1781 // register or a scalar integer immediate. 1776 // or a scalar integer immediate.
1782 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm); 1777 Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);
1783 } 1778 }
1784 if (isVectorType(Dest->getType())) 1779 if (isVectorType(Dest->getType()))
1785 _movp(Dest, Src0Legal); 1780 _movp(Dest, Src0Legal);
1786 else 1781 else
1787 _mov(Dest, Src0Legal); 1782 _mov(Dest, Src0Legal);
1788 } 1783 }
1789 } 1784 }
1790 1785
1791 template <class Machine> 1786 template <class Machine>
1792 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) { 1787 void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {
1793 if (Inst->isUnconditional()) { 1788 if (Inst->isUnconditional()) {
1794 _br(Inst->getTargetUnconditional()); 1789 _br(Inst->getTargetUnconditional());
1795 return; 1790 return;
1796 } 1791 }
1797 Operand *Cond = Inst->getCondition(); 1792 Operand *Cond = Inst->getCondition();
1798 1793
1799 // Handle folding opportunities. 1794 // Handle folding opportunities.
1800 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) { 1795 if (const class Inst *Producer = FoldingInfo.getProducerFor(Cond)) {
1801 assert(Producer->isDeleted()); 1796 assert(Producer->isDeleted());
1802 switch (BoolFolding::getProducerKind(Producer)) { 1797 switch (BoolFolding::getProducerKind(Producer)) {
1803 default: 1798 default:
1804 break; 1799 break;
1805 case BoolFolding::PK_Icmp32: { 1800 case BoolFolding::PK_Icmp32: {
1806 // TODO(stichnot): Refactor similarities between this block and 1801 // TODO(stichnot): Refactor similarities between this block and the
1807 // the corresponding code in lowerIcmp(). 1802 // corresponding code in lowerIcmp().
1808 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer); 1803 auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);
1809 Operand *Src0 = Producer->getSrc(0); 1804 Operand *Src0 = Producer->getSrc(0);
1810 Operand *Src1 = legalize(Producer->getSrc(1)); 1805 Operand *Src1 = legalize(Producer->getSrc(1));
1811 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1); 1806 Operand *Src0RM = legalizeSrc0ForCmp(Src0, Src1);
1812 _cmp(Src0RM, Src1); 1807 _cmp(Src0RM, Src1);
1813 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(), 1808 _br(Traits::getIcmp32Mapping(Cmp->getCondition()), Inst->getTargetTrue(),
1814 Inst->getTargetFalse()); 1809 Inst->getTargetFalse());
1815 return; 1810 return;
1816 } 1811 }
1817 } 1812 }
(...skipping 10 matching lines...) Expand all
1828 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) 1823 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1829 InstCast::OpKind CastKind = Inst->getCastKind(); 1824 InstCast::OpKind CastKind = Inst->getCastKind();
1830 Variable *Dest = Inst->getDest(); 1825 Variable *Dest = Inst->getDest();
1831 switch (CastKind) { 1826 switch (CastKind) {
1832 default: 1827 default:
1833 Func->setError("Cast type not supported"); 1828 Func->setError("Cast type not supported");
1834 return; 1829 return;
1835 case InstCast::Sext: { 1830 case InstCast::Sext: {
1836 // Src0RM is the source operand legalized to physical register or memory, 1831 // Src0RM is the source operand legalized to physical register or memory,
1837 // but not immediate, since the relevant x86 native instructions don't 1832 // but not immediate, since the relevant x86 native instructions don't
1838 // allow an immediate operand. If the operand is an immediate, we could 1833 // allow an immediate operand. If the operand is an immediate, we could
1839 // consider computing the strength-reduced result at translation time, 1834 // consider computing the strength-reduced result at translation time, but
1840 // but we're unlikely to see something like that in the bitcode that 1835 // we're unlikely to see something like that in the bitcode that the
1841 // the optimizer wouldn't have already taken care of. 1836 // optimizer wouldn't have already taken care of.
1842 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 1837 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
1843 if (isVectorType(Dest->getType())) { 1838 if (isVectorType(Dest->getType())) {
1844 Type DestTy = Dest->getType(); 1839 Type DestTy = Dest->getType();
1845 if (DestTy == IceType_v16i8) { 1840 if (DestTy == IceType_v16i8) {
1846 // onemask = materialize(1,1,...); dst = (src & onemask) > 0 1841 // onemask = materialize(1,1,...); dst = (src & onemask) > 0
1847 Variable *OneMask = makeVectorOfOnes(Dest->getType()); 1842 Variable *OneMask = makeVectorOfOnes(Dest->getType());
1848 Variable *T = makeReg(DestTy); 1843 Variable *T = makeReg(DestTy);
1849 _movp(T, Src0RM); 1844 _movp(T, Src0RM);
1850 _pand(T, OneMask); 1845 _pand(T, OneMask);
1851 Variable *Zeros = makeVectorOfZeros(Dest->getType()); 1846 Variable *Zeros = makeVectorOfZeros(Dest->getType());
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
1891 // sar t1, dst_bitwidth - 1 1886 // sar t1, dst_bitwidth - 1
1892 // dst = t1 1887 // dst = t1
1893 size_t DestBits = 1888 size_t DestBits =
1894 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType()); 1889 Traits::X86_CHAR_BIT * typeWidthInBytes(Dest->getType());
1895 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1); 1890 Constant *ShiftAmount = Ctx->getConstantInt32(DestBits - 1);
1896 Variable *T = makeReg(Dest->getType()); 1891 Variable *T = makeReg(Dest->getType());
1897 if (typeWidthInBytes(Dest->getType()) <= 1892 if (typeWidthInBytes(Dest->getType()) <=
1898 typeWidthInBytes(Src0RM->getType())) { 1893 typeWidthInBytes(Src0RM->getType())) {
1899 _mov(T, Src0RM); 1894 _mov(T, Src0RM);
1900 } else { 1895 } else {
1901 // Widen the source using movsx or movzx. (It doesn't matter 1896 // Widen the source using movsx or movzx. (It doesn't matter which one,
1902 // which one, since the following shl/sar overwrite the bits.) 1897 // since the following shl/sar overwrite the bits.)
1903 _movzx(T, Src0RM); 1898 _movzx(T, Src0RM);
1904 } 1899 }
1905 _shl(T, ShiftAmount); 1900 _shl(T, ShiftAmount);
1906 _sar(T, ShiftAmount); 1901 _sar(T, ShiftAmount);
1907 _mov(Dest, T); 1902 _mov(Dest, T);
1908 } else { 1903 } else {
1909 // t1 = movsx src; dst = t1 1904 // t1 = movsx src; dst = t1
1910 Variable *T = makeReg(Dest->getType()); 1905 Variable *T = makeReg(Dest->getType());
1911 _movsx(T, Src0RM); 1906 _movsx(T, Src0RM);
1912 _mov(Dest, T); 1907 _mov(Dest, T);
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after
2003 if (isVectorType(Dest->getType())) { 1998 if (isVectorType(Dest->getType())) {
2004 assert(Dest->getType() == IceType_v4i32 && 1999 assert(Dest->getType() == IceType_v4i32 &&
2005 Inst->getSrc(0)->getType() == IceType_v4f32); 2000 Inst->getSrc(0)->getType() == IceType_v4f32);
2006 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem); 2001 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);
2007 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM)) 2002 if (llvm::isa<typename Traits::X86OperandMem>(Src0RM))
2008 Src0RM = legalizeToReg(Src0RM); 2003 Src0RM = legalizeToReg(Src0RM);
2009 Variable *T = makeReg(Dest->getType()); 2004 Variable *T = makeReg(Dest->getType());
2010 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq); 2005 _cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);
2011 _movp(Dest, T); 2006 _movp(Dest, T);
2012 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 2007 } else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2013 // Use a helper for converting floating-point values to 64-bit 2008 // Use a helper for converting floating-point values to 64-bit integers.
2014 // integers. SSE2 appears to have no way to convert from xmm 2009 // SSE2 appears to have no way to convert from xmm registers to something
2015 // registers to something like the edx:eax register pair, and 2010 // like the edx:eax register pair, and gcc and clang both want to use x87
2016 // gcc and clang both want to use x87 instructions complete with 2011 // instructions complete with temporary manipulation of the status word.
2017 // temporary manipulation of the status word. This helper is 2012 // This helper is not needed for x86-64.
2018 // not needed for x86-64.
2019 split64(Dest); 2013 split64(Dest);
2020 const SizeT MaxSrcs = 1; 2014 const SizeT MaxSrcs = 1;
2021 Type SrcType = Inst->getSrc(0)->getType(); 2015 Type SrcType = Inst->getSrc(0)->getType();
2022 InstCall *Call = 2016 InstCall *Call =
2023 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64 2017 makeHelperCall(isFloat32Asserting32Or64(SrcType) ? H_fptosi_f32_i64
2024 : H_fptosi_f64_i64, 2018 : H_fptosi_f64_i64,
2025 Dest, MaxSrcs); 2019 Dest, MaxSrcs);
2026 Call->addArg(Inst->getSrc(0)); 2020 Call->addArg(Inst->getSrc(0));
2027 lowerCall(Call); 2021 lowerCall(Call);
2028 } else { 2022 } else {
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
2143 Operand *Src0 = Inst->getSrc(0); 2137 Operand *Src0 = Inst->getSrc(0);
2144 if (isVectorType(Src0->getType())) { 2138 if (isVectorType(Src0->getType())) {
2145 assert(Dest->getType() == IceType_v4f32 && 2139 assert(Dest->getType() == IceType_v4f32 &&
2146 Src0->getType() == IceType_v4i32); 2140 Src0->getType() == IceType_v4i32);
2147 const SizeT MaxSrcs = 1; 2141 const SizeT MaxSrcs = 1;
2148 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs); 2142 InstCall *Call = makeHelperCall(H_uitofp_4xi32_4xf32, Dest, MaxSrcs);
2149 Call->addArg(Src0); 2143 Call->addArg(Src0);
2150 lowerCall(Call); 2144 lowerCall(Call);
2151 } else if (Src0->getType() == IceType_i64 || 2145 } else if (Src0->getType() == IceType_i64 ||
2152 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) { 2146 (!Traits::Is64Bit && Src0->getType() == IceType_i32)) {
2153 // Use a helper for x86-32 and x86-64. Also use a helper for 2147 // Use a helper for x86-32 and x86-64. Also use a helper for i32 on
2154 // i32 on x86-32. 2148 // x86-32.
2155 const SizeT MaxSrcs = 1; 2149 const SizeT MaxSrcs = 1;
2156 Type DestType = Dest->getType(); 2150 Type DestType = Dest->getType();
2157 IceString TargetString; 2151 IceString TargetString;
2158 if (isInt32Asserting32Or64(Src0->getType())) { 2152 if (isInt32Asserting32Or64(Src0->getType())) {
2159 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32 2153 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i32_f32
2160 : H_uitofp_i32_f64; 2154 : H_uitofp_i32_f64;
2161 } else { 2155 } else {
2162 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32 2156 TargetString = isFloat32Asserting32Or64(DestType) ? H_uitofp_i64_f32
2163 : H_uitofp_i64_f64; 2157 : H_uitofp_i64_f64;
2164 } 2158 }
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
2278 _mov(DestLo, T_Lo); 2272 _mov(DestLo, T_Lo);
2279 _mov(T_Hi, SpillHi); 2273 _mov(T_Hi, SpillHi);
2280 _mov(DestHi, T_Hi); 2274 _mov(DestHi, T_Hi);
2281 } 2275 }
2282 } break; 2276 } break;
2283 case IceType_f64: { 2277 case IceType_f64: {
2284 assert(Src0->getType() == IceType_i64); 2278 assert(Src0->getType() == IceType_i64);
2285 if (Traits::Is64Bit) { 2279 if (Traits::Is64Bit) {
2286 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2280 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2287 Variable *T = makeReg(IceType_f64); 2281 Variable *T = makeReg(IceType_f64);
2288 // Movd requires its fp argument (in this case, the bitcast destination) 2282 // Movd requires its fp argument (in this case, the bitcast
2289 // to be an xmm register. 2283 // destination) to be an xmm register.
2290 T->setMustHaveReg(); 2284 T->setMustHaveReg();
2291 _movd(T, Src0RM); 2285 _movd(T, Src0RM);
2292 _mov(Dest, T); 2286 _mov(Dest, T);
2293 } else { 2287 } else {
2294 Src0 = legalize(Src0); 2288 Src0 = legalize(Src0);
2295 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) { 2289 if (llvm::isa<typename Traits::X86OperandMem>(Src0)) {
2296 Variable *T = Func->makeVariable(Dest->getType()); 2290 Variable *T = Func->makeVariable(Dest->getType());
2297 _movq(T, Src0); 2291 _movq(T, Src0);
2298 _movq(Dest, T); 2292 _movq(Dest, T);
2299 break; 2293 break;
(...skipping 11 matching lines...) Expand all
2311 Variable *Spill = SpillVar; 2305 Variable *Spill = SpillVar;
2312 Spill->setMustNotHaveReg(); 2306 Spill->setMustNotHaveReg();
2313 2307
2314 Variable *T_Lo = nullptr, *T_Hi = nullptr; 2308 Variable *T_Lo = nullptr, *T_Hi = nullptr;
2315 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create( 2309 typename Traits::VariableSplit *SpillLo = Traits::VariableSplit::create(
2316 Func, Spill, Traits::VariableSplit::Low); 2310 Func, Spill, Traits::VariableSplit::Low);
2317 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create( 2311 typename Traits::VariableSplit *SpillHi = Traits::VariableSplit::create(
2318 Func, Spill, Traits::VariableSplit::High); 2312 Func, Spill, Traits::VariableSplit::High);
2319 _mov(T_Lo, loOperand(Src0)); 2313 _mov(T_Lo, loOperand(Src0));
2320 // Technically, the Spill is defined after the _store happens, but 2314 // Technically, the Spill is defined after the _store happens, but
2321 // SpillLo is considered a "use" of Spill so define Spill before it 2315 // SpillLo is considered a "use" of Spill so define Spill before it is
2322 // is used. 2316 // used.
2323 Context.insert(InstFakeDef::create(Func, Spill)); 2317 Context.insert(InstFakeDef::create(Func, Spill));
2324 _store(T_Lo, SpillLo); 2318 _store(T_Lo, SpillLo);
2325 _mov(T_Hi, hiOperand(Src0)); 2319 _mov(T_Hi, hiOperand(Src0));
2326 _store(T_Hi, SpillHi); 2320 _store(T_Hi, SpillHi);
2327 _movq(Dest, Spill); 2321 _movq(Dest, Spill);
2328 } 2322 }
2329 } break; 2323 } break;
2330 case IceType_v8i1: { 2324 case IceType_v8i1: {
2331 assert(Src0->getType() == IceType_i8); 2325 assert(Src0->getType() == IceType_i8);
2332 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1); 2326 InstCall *Call = makeHelperCall(H_bitcast_i8_8xi1, Dest, 1);
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
2377 InstructionSet >= Traits::SSE4_1; 2371 InstructionSet >= Traits::SSE4_1;
2378 if (CanUsePextr && Ty != IceType_v4f32) { 2372 if (CanUsePextr && Ty != IceType_v4f32) {
2379 // Use pextrb, pextrw, or pextrd. 2373 // Use pextrb, pextrw, or pextrd.
2380 Constant *Mask = Ctx->getConstantInt32(Index); 2374 Constant *Mask = Ctx->getConstantInt32(Index);
2381 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized); 2375 Variable *SourceVectR = legalizeToReg(SourceVectNotLegalized);
2382 _pextr(ExtractedElementR, SourceVectR, Mask); 2376 _pextr(ExtractedElementR, SourceVectR, Mask);
2383 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2377 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2384 // Use pshufd and movd/movss. 2378 // Use pshufd and movd/movss.
2385 Variable *T = nullptr; 2379 Variable *T = nullptr;
2386 if (Index) { 2380 if (Index) {
2387 // The shuffle only needs to occur if the element to be extracted 2381 // The shuffle only needs to occur if the element to be extracted is not
2388 // is not at the lowest index. 2382 // at the lowest index.
2389 Constant *Mask = Ctx->getConstantInt32(Index); 2383 Constant *Mask = Ctx->getConstantInt32(Index);
2390 T = makeReg(Ty); 2384 T = makeReg(Ty);
2391 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); 2385 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2392 } else { 2386 } else {
2393 T = legalizeToReg(SourceVectNotLegalized); 2387 T = legalizeToReg(SourceVectNotLegalized);
2394 } 2388 }
2395 2389
2396 if (InVectorElementTy == IceType_i32) { 2390 if (InVectorElementTy == IceType_i32) {
2397 _movd(ExtractedElementR, T); 2391 _movd(ExtractedElementR, T);
2398 } else { // Ty == IceType_f32 2392 } else { // Ty == IceType_f32
2399 // TODO(wala): _movss is only used here because _mov does not 2393 // TODO(wala): _movss is only used here because _mov does not allow a
2400 // allow a vector source and a scalar destination. _mov should be 2394 // vector source and a scalar destination. _mov should be able to be
2401 // able to be used here. 2395 // used here.
2402 // _movss is a binary instruction, so the FakeDef is needed to 2396 // _movss is a binary instruction, so the FakeDef is needed to keep the
2403 // keep the live range analysis consistent. 2397 // live range analysis consistent.
2404 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); 2398 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2405 _movss(ExtractedElementR, T); 2399 _movss(ExtractedElementR, T);
2406 } 2400 }
2407 } else { 2401 } else {
2408 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2402 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2409 // Spill the value to a stack slot and do the extraction in memory. 2403 // Spill the value to a stack slot and do the extraction in memory.
2410 // 2404 //
2411 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2405 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
2412 // support for legalizing to mem is implemented. 2406 // for legalizing to mem is implemented.
2413 Variable *Slot = Func->makeVariable(Ty); 2407 Variable *Slot = Func->makeVariable(Ty);
2414 Slot->setMustNotHaveReg(); 2408 Slot->setMustNotHaveReg();
2415 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); 2409 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
2416 2410
2417 // Compute the location of the element in memory. 2411 // Compute the location of the element in memory.
2418 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 2412 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2419 typename Traits::X86OperandMem *Loc = 2413 typename Traits::X86OperandMem *Loc =
2420 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 2414 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2421 _mov(ExtractedElementR, Loc); 2415 _mov(ExtractedElementR, Loc);
2422 } 2416 }
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
2582 Src0 = NewSrc0; 2576 Src0 = NewSrc0;
2583 Src1 = NewSrc1; 2577 Src1 = NewSrc1;
2584 Ty = NewTy; 2578 Ty = NewTy;
2585 } 2579 }
2586 2580
2587 InstIcmp::ICond Condition = Inst->getCondition(); 2581 InstIcmp::ICond Condition = Inst->getCondition();
2588 2582
2589 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2583 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2590 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2584 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2591 2585
2592 // SSE2 only has signed comparison operations. Transform unsigned 2586 // SSE2 only has signed comparison operations. Transform unsigned inputs in
2593 // inputs in a manner that allows for the use of signed comparison 2587 // a manner that allows for the use of signed comparison operations by
2594 // operations by flipping the high order bits. 2588 // flipping the high order bits.
2595 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge || 2589 if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||
2596 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) { 2590 Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {
2597 Variable *T0 = makeReg(Ty); 2591 Variable *T0 = makeReg(Ty);
2598 Variable *T1 = makeReg(Ty); 2592 Variable *T1 = makeReg(Ty);
2599 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); 2593 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2600 _movp(T0, Src0RM); 2594 _movp(T0, Src0RM);
2601 _pxor(T0, HighOrderBits); 2595 _pxor(T0, HighOrderBits);
2602 _movp(T1, Src1RM); 2596 _movp(T1, Src1RM);
2603 _pxor(T1, HighOrderBits); 2597 _pxor(T1, HighOrderBits);
2604 Src0RM = T0; 2598 Src0RM = T0;
(...skipping 114 matching lines...) Expand 10 before | Expand all | Expand 10 after
2719 // Only constant indices are allowed in PNaCl IR. 2713 // Only constant indices are allowed in PNaCl IR.
2720 assert(ElementIndex); 2714 assert(ElementIndex);
2721 unsigned Index = ElementIndex->getValue(); 2715 unsigned Index = ElementIndex->getValue();
2722 assert(Index < typeNumElements(SourceVectNotLegalized->getType())); 2716 assert(Index < typeNumElements(SourceVectNotLegalized->getType()));
2723 2717
2724 Type Ty = SourceVectNotLegalized->getType(); 2718 Type Ty = SourceVectNotLegalized->getType();
2725 Type ElementTy = typeElementType(Ty); 2719 Type ElementTy = typeElementType(Ty);
2726 Type InVectorElementTy = Traits::getInVectorElementType(Ty); 2720 Type InVectorElementTy = Traits::getInVectorElementType(Ty);
2727 2721
2728 if (ElementTy == IceType_i1) { 2722 if (ElementTy == IceType_i1) {
2729 // Expand the element to the appropriate size for it to be inserted 2723 // Expand the element to the appropriate size for it to be inserted in the
2730 // in the vector. 2724 // vector.
2731 Variable *Expanded = Func->makeVariable(InVectorElementTy); 2725 Variable *Expanded = Func->makeVariable(InVectorElementTy);
2732 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded, 2726 InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,
2733 ElementToInsertNotLegalized); 2727 ElementToInsertNotLegalized);
2734 lowerCast(Cast); 2728 lowerCast(Cast);
2735 ElementToInsertNotLegalized = Expanded; 2729 ElementToInsertNotLegalized = Expanded;
2736 } 2730 }
2737 2731
2738 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 || 2732 if (Ty == IceType_v8i16 || Ty == IceType_v8i1 ||
2739 InstructionSet >= Traits::SSE4_1) { 2733 InstructionSet >= Traits::SSE4_1) {
2740 // Use insertps, pinsrb, pinsrw, or pinsrd. 2734 // Use insertps, pinsrb, pinsrw, or pinsrd.
(...skipping 25 matching lines...) Expand all
2766 } 2760 }
2767 2761
2768 if (Index == 0) { 2762 if (Index == 0) {
2769 Variable *T = makeReg(Ty); 2763 Variable *T = makeReg(Ty);
2770 _movp(T, SourceVectRM); 2764 _movp(T, SourceVectRM);
2771 _movss(T, ElementR); 2765 _movss(T, ElementR);
2772 _movp(Inst->getDest(), T); 2766 _movp(Inst->getDest(), T);
2773 return; 2767 return;
2774 } 2768 }
2775 2769
2776 // shufps treats the source and desination operands as vectors of 2770 // shufps treats the source and destination operands as vectors of four
2777 // four doublewords. The destination's two high doublewords are 2771 // doublewords. The destination's two high doublewords are selected from
2778 // selected from the source operand and the two low doublewords are 2772 // the source operand and the two low doublewords are selected from the
2779 // selected from the (original value of) the destination operand. 2773 // (original value of) the destination operand. An insertelement operation
2780 // An insertelement operation can be effected with a sequence of two 2774 // can be effected with a sequence of two shufps operations with
2781 // shufps operations with appropriate masks. In all cases below, 2775 // appropriate masks. In all cases below, Element[0] is being inserted into
2782 // Element[0] is being inserted into SourceVectOperand. Indices are 2776 // SourceVectOperand. Indices are ordered from left to right.
2783 // ordered from left to right.
2784 // 2777 //
2785 // insertelement into index 1 (result is stored in ElementR): 2778 // insertelement into index 1 (result is stored in ElementR):
2786 // ElementR := ElementR[0, 0] SourceVectRM[0, 0] 2779 // ElementR := ElementR[0, 0] SourceVectRM[0, 0]
2787 // ElementR := ElementR[3, 0] SourceVectRM[2, 3] 2780 // ElementR := ElementR[3, 0] SourceVectRM[2, 3]
2788 // 2781 //
2789 // insertelement into index 2 (result is stored in T): 2782 // insertelement into index 2 (result is stored in T):
2790 // T := SourceVectRM 2783 // T := SourceVectRM
2791 // ElementR := ElementR[0, 0] T[0, 3] 2784 // ElementR := ElementR[0, 0] T[0, 3]
2792 // T := T[0, 1] ElementR[0, 3] 2785 // T := T[0, 1] ElementR[0, 3]
2793 // 2786 //
(...skipping 13 matching lines...) Expand all
2807 _movp(Inst->getDest(), ElementR); 2800 _movp(Inst->getDest(), ElementR);
2808 } else { 2801 } else {
2809 Variable *T = makeReg(Ty); 2802 Variable *T = makeReg(Ty);
2810 _movp(T, SourceVectRM); 2803 _movp(T, SourceVectRM);
2811 _shufps(ElementR, T, Mask1Constant); 2804 _shufps(ElementR, T, Mask1Constant);
2812 _shufps(T, ElementR, Mask2Constant); 2805 _shufps(T, ElementR, Mask2Constant);
2813 _movp(Inst->getDest(), T); 2806 _movp(Inst->getDest(), T);
2814 } 2807 }
2815 } else { 2808 } else {
2816 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2809 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2817 // Spill the value to a stack slot and perform the insertion in 2810 // Spill the value to a stack slot and perform the insertion in memory.
2818 // memory.
2819 // 2811 //
2820 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2812 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support
2821 // support for legalizing to mem is implemented. 2813 // for legalizing to mem is implemented.
2822 Variable *Slot = Func->makeVariable(Ty); 2814 Variable *Slot = Func->makeVariable(Ty);
2823 Slot->setMustNotHaveReg(); 2815 Slot->setMustNotHaveReg();
2824 _movp(Slot, legalizeToReg(SourceVectNotLegalized)); 2816 _movp(Slot, legalizeToReg(SourceVectNotLegalized));
2825 2817
2826 // Compute the location of the position to insert in memory. 2818 // Compute the location of the position to insert in memory.
2827 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy); 2819 unsigned Offset = Index * typeWidthInBytes(InVectorElementTy);
2828 typename Traits::X86OperandMem *Loc = 2820 typename Traits::X86OperandMem *Loc =
2829 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset); 2821 getMemoryOperandForStackSlot(InVectorElementTy, Slot, Offset);
2830 _store(legalizeToReg(ElementToInsertNotLegalized), Loc); 2822 _store(legalizeToReg(ElementToInsertNotLegalized), Loc);
2831 2823
(...skipping 25 matching lines...) Expand all
2857 } 2849 }
2858 case Intrinsics::AtomicFence: 2850 case Intrinsics::AtomicFence:
2859 if (!Intrinsics::isMemoryOrderValid( 2851 if (!Intrinsics::isMemoryOrderValid(
2860 ID, getConstantMemoryOrder(Instr->getArg(0)))) { 2852 ID, getConstantMemoryOrder(Instr->getArg(0)))) {
2861 Func->setError("Unexpected memory ordering for AtomicFence"); 2853 Func->setError("Unexpected memory ordering for AtomicFence");
2862 return; 2854 return;
2863 } 2855 }
2864 _mfence(); 2856 _mfence();
2865 return; 2857 return;
2866 case Intrinsics::AtomicFenceAll: 2858 case Intrinsics::AtomicFenceAll:
2867 // NOTE: FenceAll should prevent and load/store from being moved 2859 // NOTE: FenceAll should prevent and load/store from being moved across the
2868 // across the fence (both atomic and non-atomic). The InstX8632Mfence 2860 // fence (both atomic and non-atomic). The InstX8632Mfence instruction is
2869 // instruction is currently marked coarsely as "HasSideEffects". 2861 // currently marked coarsely as "HasSideEffects".
2870 _mfence(); 2862 _mfence();
2871 return; 2863 return;
2872 case Intrinsics::AtomicIsLockFree: { 2864 case Intrinsics::AtomicIsLockFree: {
2873 // X86 is always lock free for 8/16/32/64 bit accesses. 2865 // X86 is always lock free for 8/16/32/64 bit accesses.
2874 // TODO(jvoung): Since the result is constant when given a constant 2866 // TODO(jvoung): Since the result is constant when given a constant byte
2875 // byte size, this opens up DCE opportunities. 2867 // size, this opens up DCE opportunities.
2876 Operand *ByteSize = Instr->getArg(0); 2868 Operand *ByteSize = Instr->getArg(0);
2877 Variable *Dest = Instr->getDest(); 2869 Variable *Dest = Instr->getDest();
2878 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) { 2870 if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {
2879 Constant *Result; 2871 Constant *Result;
2880 switch (CI->getValue()) { 2872 switch (CI->getValue()) {
2881 default: 2873 default:
2882 // Some x86-64 processors support the cmpxchg16b intruction, which 2874 // Some x86-64 processors support the cmpxchg16b instruction, which can
2883 // can make 16-byte operations lock free (when used with the LOCK 2875 // make 16-byte operations lock free (when used with the LOCK prefix).
2884 // prefix). However, that's not supported in 32-bit mode, so just 2876 // However, that's not supported in 32-bit mode, so just return 0 even
2885 // return 0 even for large sizes. 2877 // for large sizes.
2886 Result = Ctx->getConstantZero(IceType_i32); 2878 Result = Ctx->getConstantZero(IceType_i32);
2887 break; 2879 break;
2888 case 1: 2880 case 1:
2889 case 2: 2881 case 2:
2890 case 4: 2882 case 4:
2891 case 8: 2883 case 8:
2892 Result = Ctx->getConstantInt32(1); 2884 Result = Ctx->getConstantInt32(1);
2893 break; 2885 break;
2894 } 2886 }
2895 _mov(Dest, Result); 2887 _mov(Dest, Result);
2896 return; 2888 return;
2897 } 2889 }
2898 // The PNaCl ABI requires the byte size to be a compile-time constant. 2890 // The PNaCl ABI requires the byte size to be a compile-time constant.
2899 Func->setError("AtomicIsLockFree byte size should be compile-time const"); 2891 Func->setError("AtomicIsLockFree byte size should be compile-time const");
2900 return; 2892 return;
2901 } 2893 }
2902 case Intrinsics::AtomicLoad: { 2894 case Intrinsics::AtomicLoad: {
2903 // We require the memory address to be naturally aligned. 2895 // We require the memory address to be naturally aligned. Given that is the
2904 // Given that is the case, then normal loads are atomic. 2896 // case, then normal loads are atomic.
2905 if (!Intrinsics::isMemoryOrderValid( 2897 if (!Intrinsics::isMemoryOrderValid(
2906 ID, getConstantMemoryOrder(Instr->getArg(1)))) { 2898 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
2907 Func->setError("Unexpected memory ordering for AtomicLoad"); 2899 Func->setError("Unexpected memory ordering for AtomicLoad");
2908 return; 2900 return;
2909 } 2901 }
2910 Variable *Dest = Instr->getDest(); 2902 Variable *Dest = Instr->getDest();
2911 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) { 2903 if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {
2912 // Follow what GCC does and use a movq instead of what lowerLoad() 2904 // Follow what GCC does and use a movq instead of what lowerLoad()
2913 // normally does (split the load into two). 2905 // normally does (split the load into two). Thus, this skips
2914 // Thus, this skips load/arithmetic op folding. Load/arithmetic folding 2906 // load/arithmetic op folding. Load/arithmetic folding can't happen
2915 // can't happen anyway, since this is x86-32 and integer arithmetic only 2907 // anyway, since this is x86-32 and integer arithmetic only happens on
2916 // happens on 32-bit quantities. 2908 // 32-bit quantities.
2917 Variable *T = makeReg(IceType_f64); 2909 Variable *T = makeReg(IceType_f64);
2918 typename Traits::X86OperandMem *Addr = 2910 typename Traits::X86OperandMem *Addr =
2919 formMemoryOperand(Instr->getArg(0), IceType_f64); 2911 formMemoryOperand(Instr->getArg(0), IceType_f64);
2920 _movq(T, Addr); 2912 _movq(T, Addr);
2921 // Then cast the bits back out of the XMM register to the i64 Dest. 2913 // Then cast the bits back out of the XMM register to the i64 Dest.
2922 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T); 2914 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, Dest, T);
2923 lowerCast(Cast); 2915 lowerCast(Cast);
2924 // Make sure that the atomic load isn't elided when unused. 2916 // Make sure that the atomic load isn't elided when unused.
2925 Context.insert(InstFakeUse::create(Func, Dest->getLo())); 2917 Context.insert(InstFakeUse::create(Func, Dest->getLo()));
2926 Context.insert(InstFakeUse::create(Func, Dest->getHi())); 2918 Context.insert(InstFakeUse::create(Func, Dest->getHi()));
2927 return; 2919 return;
2928 } 2920 }
2929 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0)); 2921 InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));
2930 lowerLoad(Load); 2922 lowerLoad(Load);
2931 // Make sure the atomic load isn't elided when unused, by adding a FakeUse. 2923 // Make sure the atomic load isn't elided when unused, by adding a FakeUse.
2932 // Since lowerLoad may fuse the load w/ an arithmetic instruction, 2924 // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert
2933 // insert the FakeUse on the last-inserted instruction's dest. 2925 // the FakeUse on the last-inserted instruction's dest.
2934 Context.insert( 2926 Context.insert(
2935 InstFakeUse::create(Func, Context.getLastInserted()->getDest())); 2927 InstFakeUse::create(Func, Context.getLastInserted()->getDest()));
2936 return; 2928 return;
2937 } 2929 }
2938 case Intrinsics::AtomicRMW: 2930 case Intrinsics::AtomicRMW:
2939 if (!Intrinsics::isMemoryOrderValid( 2931 if (!Intrinsics::isMemoryOrderValid(
2940 ID, getConstantMemoryOrder(Instr->getArg(3)))) { 2932 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
2941 Func->setError("Unexpected memory ordering for AtomicRMW"); 2933 Func->setError("Unexpected memory ordering for AtomicRMW");
2942 return; 2934 return;
2943 } 2935 }
2944 lowerAtomicRMW( 2936 lowerAtomicRMW(
2945 Instr->getDest(), 2937 Instr->getDest(),
2946 static_cast<uint32_t>( 2938 static_cast<uint32_t>(
2947 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()), 2939 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
2948 Instr->getArg(1), Instr->getArg(2)); 2940 Instr->getArg(1), Instr->getArg(2));
2949 return; 2941 return;
2950 case Intrinsics::AtomicStore: { 2942 case Intrinsics::AtomicStore: {
2951 if (!Intrinsics::isMemoryOrderValid( 2943 if (!Intrinsics::isMemoryOrderValid(
2952 ID, getConstantMemoryOrder(Instr->getArg(2)))) { 2944 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
2953 Func->setError("Unexpected memory ordering for AtomicStore"); 2945 Func->setError("Unexpected memory ordering for AtomicStore");
2954 return; 2946 return;
2955 } 2947 }
2956 // We require the memory address to be naturally aligned. 2948 // We require the memory address to be naturally aligned. Given that is the
2957 // Given that is the case, then normal stores are atomic. 2949 // case, then normal stores are atomic. Add a fence after the store to make
2958 // Add a fence after the store to make it visible. 2950 // it visible.
2959 Operand *Value = Instr->getArg(0); 2951 Operand *Value = Instr->getArg(0);
2960 Operand *Ptr = Instr->getArg(1); 2952 Operand *Ptr = Instr->getArg(1);
2961 if (!Traits::Is64Bit && Value->getType() == IceType_i64) { 2953 if (!Traits::Is64Bit && Value->getType() == IceType_i64) {
2962 // Use a movq instead of what lowerStore() normally does 2954 // Use a movq instead of what lowerStore() normally does (split the store
2963 // (split the store into two), following what GCC does. 2955 // into two), following what GCC does. Cast the bits from int -> to an
2964 // Cast the bits from int -> to an xmm register first. 2956 // xmm register first.
2965 Variable *T = makeReg(IceType_f64); 2957 Variable *T = makeReg(IceType_f64);
2966 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value); 2958 InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);
2967 lowerCast(Cast); 2959 lowerCast(Cast);
2968 // Then store XMM w/ a movq. 2960 // Then store XMM w/ a movq.
2969 typename Traits::X86OperandMem *Addr = 2961 typename Traits::X86OperandMem *Addr =
2970 formMemoryOperand(Ptr, IceType_f64); 2962 formMemoryOperand(Ptr, IceType_f64);
2971 _storeq(T, Addr); 2963 _storeq(T, Addr);
2972 _mfence(); 2964 _mfence();
2973 return; 2965 return;
2974 } 2966 }
2975 InstStore *Store = InstStore::create(Func, Value, Ptr); 2967 InstStore *Store = InstStore::create(Func, Value, Ptr);
2976 lowerStore(Store); 2968 lowerStore(Store);
2977 _mfence(); 2969 _mfence();
2978 return; 2970 return;
2979 } 2971 }
2980 case Intrinsics::Bswap: { 2972 case Intrinsics::Bswap: {
2981 Variable *Dest = Instr->getDest(); 2973 Variable *Dest = Instr->getDest();
2982 Operand *Val = Instr->getArg(0); 2974 Operand *Val = Instr->getArg(0);
2983 // In 32-bit mode, bswap only works on 32-bit arguments, and the 2975 // In 32-bit mode, bswap only works on 32-bit arguments, and the argument
2984 // argument must be a register. Use rotate left for 16-bit bswap. 2976 // must be a register. Use rotate left for 16-bit bswap.
2985 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { 2977 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
2986 Val = legalizeUndef(Val); 2978 Val = legalizeUndef(Val);
2987 Variable *T_Lo = legalizeToReg(loOperand(Val)); 2979 Variable *T_Lo = legalizeToReg(loOperand(Val));
2988 Variable *T_Hi = legalizeToReg(hiOperand(Val)); 2980 Variable *T_Hi = legalizeToReg(hiOperand(Val));
2989 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2981 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2990 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2982 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2991 _bswap(T_Lo); 2983 _bswap(T_Lo);
2992 _bswap(T_Hi); 2984 _bswap(T_Hi);
2993 _mov(DestLo, T_Hi); 2985 _mov(DestLo, T_Hi);
2994 _mov(DestHi, T_Lo); 2986 _mov(DestHi, T_Lo);
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
3063 // another 64-bit wide.) 3055 // another 64-bit wide.)
3064 Variable *T_1 = makeReg(IceType_i32); 3056 Variable *T_1 = makeReg(IceType_i32);
3065 _mov(T_1, T); 3057 _mov(T_1, T);
3066 Variable *T_2 = makeReg(IceType_i64); 3058 Variable *T_2 = makeReg(IceType_i64);
3067 _movzx(T_2, T_1); 3059 _movzx(T_2, T_1);
3068 _mov(Dest, T_2); 3060 _mov(Dest, T_2);
3069 } 3061 }
3070 return; 3062 return;
3071 } 3063 }
3072 case Intrinsics::Ctlz: { 3064 case Intrinsics::Ctlz: {
3073 // The "is zero undef" parameter is ignored and we always return 3065 // The "is zero undef" parameter is ignored and we always return a
3074 // a well-defined value. 3066 // well-defined value.
3075 Operand *Val = legalize(Instr->getArg(0)); 3067 Operand *Val = legalize(Instr->getArg(0));
3076 Operand *FirstVal; 3068 Operand *FirstVal;
3077 Operand *SecondVal = nullptr; 3069 Operand *SecondVal = nullptr;
3078 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { 3070 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
3079 FirstVal = loOperand(Val); 3071 FirstVal = loOperand(Val);
3080 SecondVal = hiOperand(Val); 3072 SecondVal = hiOperand(Val);
3081 } else { 3073 } else {
3082 FirstVal = Val; 3074 FirstVal = Val;
3083 } 3075 }
3084 const bool IsCttz = false; 3076 const bool IsCttz = false;
3085 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3077 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3086 SecondVal); 3078 SecondVal);
3087 return; 3079 return;
3088 } 3080 }
3089 case Intrinsics::Cttz: { 3081 case Intrinsics::Cttz: {
3090 // The "is zero undef" parameter is ignored and we always return 3082 // The "is zero undef" parameter is ignored and we always return a
3091 // a well-defined value. 3083 // well-defined value.
3092 Operand *Val = legalize(Instr->getArg(0)); 3084 Operand *Val = legalize(Instr->getArg(0));
3093 Operand *FirstVal; 3085 Operand *FirstVal;
3094 Operand *SecondVal = nullptr; 3086 Operand *SecondVal = nullptr;
3095 if (!Traits::Is64Bit && Val->getType() == IceType_i64) { 3087 if (!Traits::Is64Bit && Val->getType() == IceType_i64) {
3096 FirstVal = hiOperand(Val); 3088 FirstVal = hiOperand(Val);
3097 SecondVal = loOperand(Val); 3089 SecondVal = loOperand(Val);
3098 } else { 3090 } else {
3099 FirstVal = Val; 3091 FirstVal = Val;
3100 } 3092 }
3101 const bool IsCttz = true; 3093 const bool IsCttz = true;
3102 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal, 3094 lowerCountZeros(IsCttz, Val->getType(), Instr->getDest(), FirstVal,
3103 SecondVal); 3095 SecondVal);
3104 return; 3096 return;
3105 } 3097 }
3106 case Intrinsics::Fabs: { 3098 case Intrinsics::Fabs: {
3107 Operand *Src = legalize(Instr->getArg(0)); 3099 Operand *Src = legalize(Instr->getArg(0));
3108 Type Ty = Src->getType(); 3100 Type Ty = Src->getType();
3109 Variable *Dest = Instr->getDest(); 3101 Variable *Dest = Instr->getDest();
3110 Variable *T = makeVectorOfFabsMask(Ty); 3102 Variable *T = makeVectorOfFabsMask(Ty);
3111 // The pand instruction operates on an m128 memory operand, so if 3103 // The pand instruction operates on an m128 memory operand, so if Src is an
3112 // Src is an f32 or f64, we need to make sure it's in a register. 3104 // f32 or f64, we need to make sure it's in a register.
3113 if (isVectorType(Ty)) { 3105 if (isVectorType(Ty)) {
3114 if (llvm::isa<typename Traits::X86OperandMem>(Src)) 3106 if (llvm::isa<typename Traits::X86OperandMem>(Src))
3115 Src = legalizeToReg(Src); 3107 Src = legalizeToReg(Src);
3116 } else { 3108 } else {
3117 Src = legalizeToReg(Src); 3109 Src = legalizeToReg(Src);
3118 } 3110 }
3119 _pand(T, Src); 3111 _pand(T, Src);
3120 if (isVectorType(Ty)) 3112 if (isVectorType(Ty))
3121 _movp(Dest, T); 3113 _movp(Dest, T);
3122 else 3114 else
(...skipping 564 matching lines...) Expand 10 before | Expand all | Expand 10 after
3687 3679
3688 Variable *SrcBase = legalizeToReg(Src); 3680 Variable *SrcBase = legalizeToReg(Src);
3689 Variable *DestBase = legalizeToReg(Dest); 3681 Variable *DestBase = legalizeToReg(Dest);
3690 3682
3691 std::tuple<Type, Constant *, Variable *> 3683 std::tuple<Type, Constant *, Variable *>
3692 Moves[Traits::MEMMOVE_UNROLL_LIMIT]; 3684 Moves[Traits::MEMMOVE_UNROLL_LIMIT];
3693 Constant *Offset; 3685 Constant *Offset;
3694 Variable *Reg; 3686 Variable *Reg;
3695 3687
3696 // Copy the data into registers as the source and destination could overlap 3688 // Copy the data into registers as the source and destination could overlap
3697 // so make sure not to clobber the memory. This also means overlapping moves 3689 // so make sure not to clobber the memory. This also means overlapping
3698 // can be used as we are taking a safe snapshot of the memory. 3690 // moves can be used as we are taking a safe snapshot of the memory.
3699 Type Ty = largestTypeInSize(CountValue); 3691 Type Ty = largestTypeInSize(CountValue);
3700 uint32_t TyWidth = typeWidthInBytes(Ty); 3692 uint32_t TyWidth = typeWidthInBytes(Ty);
3701 3693
3702 uint32_t RemainingBytes = CountValue; 3694 uint32_t RemainingBytes = CountValue;
3703 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth; 3695 int32_t OffsetAmt = (CountValue & ~(TyWidth - 1)) - TyWidth;
3704 size_t N = 0; 3696 size_t N = 0;
3705 while (RemainingBytes >= TyWidth) { 3697 while (RemainingBytes >= TyWidth) {
3706 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT); 3698 assert(N <= Traits::MEMMOVE_UNROLL_LIMIT);
3707 Offset = Ctx->getConstantInt32(OffsetAmt); 3699 Offset = Ctx->getConstantInt32(OffsetAmt);
3708 Reg = makeReg(Ty); 3700 Reg = makeReg(Ty);
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
3889 Str << ", Index="; 3881 Str << ", Index=";
3890 if (Index) 3882 if (Index)
3891 Index->dump(Func); 3883 Index->dump(Func);
3892 else 3884 else
3893 Str << "<null>"; 3885 Str << "<null>";
3894 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n"; 3886 Str << ", Shift=" << Shift << ", Offset=" << Offset << "\n";
3895 } 3887 }
3896 3888
3897 inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata, 3889 inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,
3898 Variable *&Var, const Inst *&Reason) { 3890 Variable *&Var, const Inst *&Reason) {
3899 // Var originates from Var=SrcVar ==> 3891 // Var originates from Var=SrcVar ==> set Var:=SrcVar
3900 // set Var:=SrcVar
3901 if (Var == nullptr) 3892 if (Var == nullptr)
3902 return false; 3893 return false;
3903 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) { 3894 if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {
3904 assert(!VMetadata->isMultiDef(Var)); 3895 assert(!VMetadata->isMultiDef(Var));
3905 if (llvm::isa<InstAssign>(VarAssign)) { 3896 if (llvm::isa<InstAssign>(VarAssign)) {
3906 Operand *SrcOp = VarAssign->getSrc(0); 3897 Operand *SrcOp = VarAssign->getSrc(0);
3907 assert(SrcOp); 3898 assert(SrcOp);
3908 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) { 3899 if (Variable *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
3909 if (!VMetadata->isMultiDef(SrcVar) && 3900 if (!VMetadata->isMultiDef(SrcVar) &&
3910 // TODO: ensure SrcVar stays single-BB 3901 // TODO: ensure SrcVar stays single-BB
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
4052 Func->resetCurrentNode(); 4043 Func->resetCurrentNode();
4053 if (Func->isVerbose(IceV_AddrOpt)) { 4044 if (Func->isVerbose(IceV_AddrOpt)) {
4054 OstreamLocker L(Func->getContext()); 4045 OstreamLocker L(Func->getContext());
4055 Ostream &Str = Func->getContext()->getStrDump(); 4046 Ostream &Str = Func->getContext()->getStrDump();
4056 Str << "\nStarting computeAddressOpt for instruction:\n "; 4047 Str << "\nStarting computeAddressOpt for instruction:\n ";
4057 Instr->dumpDecorated(Func); 4048 Instr->dumpDecorated(Func);
4058 } 4049 }
4059 (void)Offset; // TODO: pattern-match for non-zero offsets. 4050 (void)Offset; // TODO: pattern-match for non-zero offsets.
4060 if (Base == nullptr) 4051 if (Base == nullptr)
4061 return; 4052 return;
4062 // If the Base has more than one use or is live across multiple 4053 // If the Base has more than one use or is live across multiple blocks, then
4063 // blocks, then don't go further. Alternatively (?), never consider 4054 // don't go further. Alternatively (?), never consider a transformation that
4064 // a transformation that would change a variable that is currently 4055 // would change a variable that is currently *not* live across basic block
4065 // *not* live across basic block boundaries into one that *is*. 4056 // boundaries into one that *is*.
4066 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/) 4057 if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)
4067 return; 4058 return;
4068 4059
4069 const bool MockBounds = Func->getContext()->getFlags().getMockBoundsCheck(); 4060 const bool MockBounds = Func->getContext()->getFlags().getMockBoundsCheck();
4070 const VariablesMetadata *VMetadata = Func->getVMetadata(); 4061 const VariablesMetadata *VMetadata = Func->getVMetadata();
4071 bool Continue = true; 4062 bool Continue = true;
4072 while (Continue) { 4063 while (Continue) {
4073 const Inst *Reason = nullptr; 4064 const Inst *Reason = nullptr;
4074 if (matchTransitiveAssign(VMetadata, Base, Reason) || 4065 if (matchTransitiveAssign(VMetadata, Base, Reason) ||
4075 matchTransitiveAssign(VMetadata, Index, Reason) || 4066 matchTransitiveAssign(VMetadata, Index, Reason) ||
(...skipping 149 matching lines...) Expand 10 before | Expand all | Expand 10 after
4225 Operand *SrcT = Inst->getTrueOperand(); 4216 Operand *SrcT = Inst->getTrueOperand();
4226 Operand *SrcF = Inst->getFalseOperand(); 4217 Operand *SrcF = Inst->getFalseOperand();
4227 Operand *Condition = Inst->getCondition(); 4218 Operand *Condition = Inst->getCondition();
4228 4219
4229 if (isVectorType(DestTy)) { 4220 if (isVectorType(DestTy)) {
4230 Type SrcTy = SrcT->getType(); 4221 Type SrcTy = SrcT->getType();
4231 Variable *T = makeReg(SrcTy); 4222 Variable *T = makeReg(SrcTy);
4232 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 4223 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
4233 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 4224 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
4234 if (InstructionSet >= Traits::SSE4_1) { 4225 if (InstructionSet >= Traits::SSE4_1) {
4235 // TODO(wala): If the condition operand is a constant, use blendps 4226 // TODO(wala): If the condition operand is a constant, use blendps or
4236 // or pblendw. 4227 // pblendw.
4237 // 4228 //
4238 // Use blendvps or pblendvb to implement select. 4229 // Use blendvps or pblendvb to implement select.
4239 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 4230 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
4240 SrcTy == IceType_v4f32) { 4231 SrcTy == IceType_v4f32) {
4241 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 4232 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
4242 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0); 4233 Variable *xmm0 = makeReg(IceType_v4i32, Traits::RegisterSet::Reg_xmm0);
4243 _movp(xmm0, ConditionRM); 4234 _movp(xmm0, ConditionRM);
4244 _psll(xmm0, Ctx->getConstantInt8(31)); 4235 _psll(xmm0, Ctx->getConstantInt8(31));
4245 _movp(T, SrcFRM); 4236 _movp(T, SrcFRM);
4246 _blendvps(T, SrcTRM, xmm0); 4237 _blendvps(T, SrcTRM, xmm0);
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after
4303 } 4294 }
4304 if (CmpOpnd0 == nullptr) { 4295 if (CmpOpnd0 == nullptr) {
4305 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem); 4296 CmpOpnd0 = legalize(Condition, Legal_Reg | Legal_Mem);
4306 CmpOpnd1 = Ctx->getConstantZero(IceType_i32); 4297 CmpOpnd1 = Ctx->getConstantZero(IceType_i32);
4307 } 4298 }
4308 assert(CmpOpnd0); 4299 assert(CmpOpnd0);
4309 assert(CmpOpnd1); 4300 assert(CmpOpnd1);
4310 4301
4311 _cmp(CmpOpnd0, CmpOpnd1); 4302 _cmp(CmpOpnd0, CmpOpnd1);
4312 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) { 4303 if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {
4313 // The cmov instruction doesn't allow 8-bit or FP operands, so 4304 // The cmov instruction doesn't allow 8-bit or FP operands, so we need
4314 // we need explicit control flow. 4305 // explicit control flow.
4315 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1: 4306 // d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:
4316 typename Traits::Insts::Label *Label = 4307 typename Traits::Insts::Label *Label =
4317 Traits::Insts::Label::create(Func, this); 4308 Traits::Insts::Label::create(Func, this);
4318 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm); 4309 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm);
4319 _mov(Dest, SrcT); 4310 _mov(Dest, SrcT);
4320 _br(Cond, Label); 4311 _br(Cond, Label);
4321 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm); 4312 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm);
4322 _mov_nonkillable(Dest, SrcF); 4313 _mov_nonkillable(Dest, SrcF);
4323 Context.insert(Label); 4314 Context.insert(Label);
4324 return; 4315 return;
4325 } 4316 }
4326 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t 4317 // mov t, SrcF; cmov_cond t, SrcT; mov dest, t
4327 // But if SrcT is immediate, we might be able to do better, as 4318 // But if SrcT is immediate, we might be able to do better, as the cmov
4328 // the cmov instruction doesn't allow an immediate operand: 4319 // instruction doesn't allow an immediate operand:
4329 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t 4320 // mov t, SrcT; cmov_!cond t, SrcF; mov dest, t
4330 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) { 4321 if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {
4331 std::swap(SrcT, SrcF); 4322 std::swap(SrcT, SrcF);
4332 Cond = InstX86Base<Machine>::getOppositeCondition(Cond); 4323 Cond = InstX86Base<Machine>::getOppositeCondition(Cond);
4333 } 4324 }
4334 if (!Traits::Is64Bit && DestTy == IceType_i64) { 4325 if (!Traits::Is64Bit && DestTy == IceType_i64) {
4335 SrcT = legalizeUndef(SrcT); 4326 SrcT = legalizeUndef(SrcT);
4336 SrcF = legalizeUndef(SrcF); 4327 SrcF = legalizeUndef(SrcF);
4337 // Set the low portion. 4328 // Set the low portion.
4338 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 4329 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
(...skipping 340 matching lines...) Expand 10 before | Expand all | Expand 10 after
4679 4670
4680 lowerAssign(InstAssign::create(Func, Dest, T)); 4671 lowerAssign(InstAssign::create(Func, Dest, T));
4681 } 4672 }
4682 4673
4683 /// The following pattern occurs often in lowered C and C++ code: 4674 /// The following pattern occurs often in lowered C and C++ code:
4684 /// 4675 ///
4685 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 4676 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1
4686 /// %cmp.ext = sext <n x i1> %cmp to <n x ty> 4677 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>
4687 /// 4678 ///
4688 /// We can eliminate the sext operation by copying the result of pcmpeqd, 4679 /// We can eliminate the sext operation by copying the result of pcmpeqd,
4689 /// pcmpgtd, or cmpps (which produce sign extended results) to the result 4680 /// pcmpgtd, or cmpps (which produce sign extended results) to the result of the
4690 /// of the sext operation. 4681 /// sext operation.
4691 template <class Machine> 4682 template <class Machine>
4692 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( 4683 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(
4693 Variable *SignExtendedResult) { 4684 Variable *SignExtendedResult) {
4694 if (InstCast *NextCast = 4685 if (InstCast *NextCast =
4695 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) { 4686 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {
4696 if (NextCast->getCastKind() == InstCast::Sext && 4687 if (NextCast->getCastKind() == InstCast::Sext &&
4697 NextCast->getSrc(0) == SignExtendedResult) { 4688 NextCast->getSrc(0) == SignExtendedResult) {
4698 NextCast->setDeleted(); 4689 NextCast->setDeleted();
4699 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult)); 4690 _movp(NextCast->getDest(), legalizeToReg(SignExtendedResult));
4700 // Skip over the instruction. 4691 // Skip over the instruction.
4701 Context.advanceNext(); 4692 Context.advanceNext();
4702 } 4693 }
4703 } 4694 }
4704 } 4695 }
4705 4696
4706 template <class Machine> 4697 template <class Machine>
4707 void TargetX86Base<Machine>::lowerUnreachable( 4698 void TargetX86Base<Machine>::lowerUnreachable(
4708 const InstUnreachable * /*Inst*/) { 4699 const InstUnreachable * /*Inst*/) {
4709 _ud2(); 4700 _ud2();
4710 } 4701 }
4711 4702
4712 template <class Machine> 4703 template <class Machine>
4713 void TargetX86Base<Machine>::lowerRMW( 4704 void TargetX86Base<Machine>::lowerRMW(
4714 const typename Traits::Insts::FakeRMW *RMW) { 4705 const typename Traits::Insts::FakeRMW *RMW) {
4715 // If the beacon variable's live range does not end in this 4706 // If the beacon variable's live range does not end in this instruction, then
4716 // instruction, then it must end in the modified Store instruction 4707 // it must end in the modified Store instruction that follows. This means
4717 // that follows. This means that the original Store instruction is 4708 // that the original Store instruction is still there, either because the
4718 // still there, either because the value being stored is used beyond 4709 // value being stored is used beyond the Store instruction, or because dead
4719 // the Store instruction, or because dead code elimination did not 4710 // code elimination did not happen. In either case, we cancel RMW lowering
4720 // happen. In either case, we cancel RMW lowering (and the caller 4711 // (and the caller deletes the RMW instruction).
4721 // deletes the RMW instruction).
4722 if (!RMW->isLastUse(RMW->getBeacon())) 4712 if (!RMW->isLastUse(RMW->getBeacon()))
4723 return; 4713 return;
4724 Operand *Src = RMW->getData(); 4714 Operand *Src = RMW->getData();
4725 Type Ty = Src->getType(); 4715 Type Ty = Src->getType();
4726 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty); 4716 typename Traits::X86OperandMem *Addr = formMemoryOperand(RMW->getAddr(), Ty);
4727 doMockBoundsCheck(Addr); 4717 doMockBoundsCheck(Addr);
4728 if (!Traits::Is64Bit && Ty == IceType_i64) { 4718 if (!Traits::Is64Bit && Ty == IceType_i64) {
4729 Src = legalizeUndef(Src); 4719 Src = legalizeUndef(Src);
4730 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm); 4720 Operand *SrcLo = legalize(loOperand(Src), Legal_Reg | Legal_Imm);
4731 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm); 4721 Operand *SrcHi = legalize(hiOperand(Src), Legal_Reg | Legal_Imm);
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
4793 template <class Machine> 4783 template <class Machine>
4794 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { 4784 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {
4795 if (const auto *RMW = 4785 if (const auto *RMW =
4796 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) { 4786 llvm::dyn_cast<typename Traits::Insts::FakeRMW>(Instr)) {
4797 lowerRMW(RMW); 4787 lowerRMW(RMW);
4798 } else { 4788 } else {
4799 TargetLowering::lowerOther(Instr); 4789 TargetLowering::lowerOther(Instr);
4800 } 4790 }
4801 } 4791 }
4802 4792
4803 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to 4793 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
4804 /// preserve integrity of liveness analysis. Undef values are also 4794 /// integrity of liveness analysis. Undef values are also turned into zeroes,
4805 /// turned into zeroes, since loOperand() and hiOperand() don't expect 4795 /// since loOperand() and hiOperand() don't expect Undef input.
4806 /// Undef input.
4807 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { 4796 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {
4808 if (Traits::Is64Bit) { 4797 if (Traits::Is64Bit) {
4809 // On x86-64 we don't need to prelower phis -- the architecture can handle 4798 // On x86-64 we don't need to prelower phis -- the architecture can handle
4810 // 64-bit integer natively. 4799 // 64-bit integer natively.
4811 return; 4800 return;
4812 } 4801 }
4813 4802
4814 // Pause constant blinding or pooling, blinding or pooling will be done 4803 // Pause constant blinding or pooling, blinding or pooling will be done later
4815 // later during phi lowering assignments 4804 // during phi lowering assignments
4816 BoolFlagSaver B(RandomizationPoolingPaused, true); 4805 BoolFlagSaver B(RandomizationPoolingPaused, true);
4817 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>( 4806 PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(
4818 this, Context.getNode(), Func); 4807 this, Context.getNode(), Func);
4819 } 4808 }
4820 4809
4821 // There is no support for loading or emitting vector constants, so the 4810 // There is no support for loading or emitting vector constants, so the vector
4822 // vector values returned from makeVectorOfZeros, makeVectorOfOnes, 4811 // values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are
4823 // etc. are initialized with register operations. 4812 // initialized with register operations.
4824 // 4813 //
4825 // TODO(wala): Add limited support for vector constants so that 4814 // TODO(wala): Add limited support for vector constants so that complex
4826 // complex initialization in registers is unnecessary. 4815 // initialization in registers is unnecessary.
4827 4816
4828 template <class Machine> 4817 template <class Machine>
4829 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) { 4818 Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {
4830 Variable *Reg = makeReg(Ty, RegNum); 4819 Variable *Reg = makeReg(Ty, RegNum);
4831 // Insert a FakeDef, since otherwise the live range of Reg might 4820 // Insert a FakeDef, since otherwise the live range of Reg might be
4832 // be overestimated. 4821 // overestimated.
4833 Context.insert(InstFakeDef::create(Func, Reg)); 4822 Context.insert(InstFakeDef::create(Func, Reg));
4834 _pxor(Reg, Reg); 4823 _pxor(Reg, Reg);
4835 return Reg; 4824 return Reg;
4836 } 4825 }
4837 4826
4838 template <class Machine> 4827 template <class Machine>
4839 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty, 4828 Variable *TargetX86Base<Machine>::makeVectorOfMinusOnes(Type Ty,
4840 int32_t RegNum) { 4829 int32_t RegNum) {
4841 Variable *MinusOnes = makeReg(Ty, RegNum); 4830 Variable *MinusOnes = makeReg(Ty, RegNum);
4842 // Insert a FakeDef so the live range of MinusOnes is not overestimated. 4831 // Insert a FakeDef so the live range of MinusOnes is not overestimated.
(...skipping 25 matching lines...) Expand all
4868 // SSE has no left shift operation for vectors of 8 bit integers. 4857 // SSE has no left shift operation for vectors of 8 bit integers.
4869 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; 4858 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;
4870 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); 4859 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);
4871 Variable *Reg = makeReg(Ty, RegNum); 4860 Variable *Reg = makeReg(Ty, RegNum);
4872 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem)); 4861 _movd(Reg, legalize(ConstantMask, Legal_Reg | Legal_Mem));
4873 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8)); 4862 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));
4874 return Reg; 4863 return Reg;
4875 } 4864 }
4876 } 4865 }
4877 4866
4878 /// Construct a mask in a register that can be and'ed with a 4867 /// Construct a mask in a register that can be and'ed with a floating-point
4879 /// floating-point value to mask off its sign bit. The value will be 4868 /// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32
4880 /// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> 4869 /// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of
4881 /// for f64. Construct it as vector of ones logically right shifted 4870 /// ones logically right shifted one bit.
4882 /// one bit. TODO(stichnot): Fix the wala TODO above, to represent 4871 // TODO(stichnot): Fix the wala
4883 /// vector constants in memory. 4872 // TODO: above, to represent vector constants in memory.
4884 template <class Machine> 4873 template <class Machine>
4885 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, 4874 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,
4886 int32_t RegNum) { 4875 int32_t RegNum) {
4887 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum); 4876 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);
4888 _psrl(Reg, Ctx->getConstantInt8(1)); 4877 _psrl(Reg, Ctx->getConstantInt8(1));
4889 return Reg; 4878 return Reg;
4890 } 4879 }
4891 4880
4892 template <class Machine> 4881 template <class Machine>
4893 typename TargetX86Base<Machine>::Traits::X86OperandMem * 4882 typename TargetX86Base<Machine>::Traits::X86OperandMem *
4894 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, 4883 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,
4895 uint32_t Offset) { 4884 uint32_t Offset) {
4896 // Ensure that Loc is a stack slot. 4885 // Ensure that Loc is a stack slot.
4897 assert(Slot->mustNotHaveReg()); 4886 assert(Slot->mustNotHaveReg());
4898 assert(Slot->getRegNum() == Variable::NoRegister); 4887 assert(Slot->getRegNum() == Variable::NoRegister);
4899 // Compute the location of Loc in memory. 4888 // Compute the location of Loc in memory.
4900 // TODO(wala,stichnot): lea should not be required. The address of 4889 // TODO(wala,stichnot): lea should not
4901 // the stack slot is known at compile time (although not until after 4890 // be required. The address of the stack slot is known at compile time
4902 // addProlog()). 4891 // (although not until after addProlog()).
4903 const Type PointerType = IceType_i32; 4892 const Type PointerType = IceType_i32;
4904 Variable *Loc = makeReg(PointerType); 4893 Variable *Loc = makeReg(PointerType);
4905 _lea(Loc, Slot); 4894 _lea(Loc, Slot);
4906 Constant *ConstantOffset = Ctx->getConstantInt32(Offset); 4895 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);
4907 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset); 4896 return Traits::X86OperandMem::create(Func, Ty, Loc, ConstantOffset);
4908 } 4897 }
4909 4898
4910 /// Helper for legalize() to emit the right code to lower an operand to a 4899 /// Helper for legalize() to emit the right code to lower an operand to a
4911 /// register of the appropriate type. 4900 /// register of the appropriate type.
4912 template <class Machine> 4901 template <class Machine>
4913 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { 4902 Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) {
4914 Type Ty = Src->getType(); 4903 Type Ty = Src->getType();
4915 Variable *Reg = makeReg(Ty, RegNum); 4904 Variable *Reg = makeReg(Ty, RegNum);
4916 if (isVectorType(Ty)) { 4905 if (isVectorType(Ty)) {
4917 _movp(Reg, Src); 4906 _movp(Reg, Src);
4918 } else { 4907 } else {
4919 _mov(Reg, Src); 4908 _mov(Reg, Src);
4920 } 4909 }
4921 return Reg; 4910 return Reg;
4922 } 4911 }
4923 4912
4924 template <class Machine> 4913 template <class Machine>
4925 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, 4914 Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,
4926 int32_t RegNum) { 4915 int32_t RegNum) {
4927 Type Ty = From->getType(); 4916 Type Ty = From->getType();
4928 // Assert that a physical register is allowed. To date, all calls 4917 // Assert that a physical register is allowed. To date, all calls to
4929 // to legalize() allow a physical register. If a physical register 4918 // legalize() allow a physical register. If a physical register needs to be
4930 // needs to be explicitly disallowed, then new code will need to be 4919 // explicitly disallowed, then new code will need to be written to force a
4931 // written to force a spill. 4920 // spill.
4932 assert(Allowed & Legal_Reg); 4921 assert(Allowed & Legal_Reg);
4933 // If we're asking for a specific physical register, make sure we're 4922 // If we're asking for a specific physical register, make sure we're not
4934 // not allowing any other operand kinds. (This could be future 4923 // allowing any other operand kinds. (This could be future work, e.g. allow
4935 // work, e.g. allow the shl shift amount to be either an immediate 4924 // the shl shift amount to be either an immediate or in ecx.)
4936 // or in ecx.)
4937 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); 4925 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
4938 4926
4939 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) { 4927 if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {
4940 // Before doing anything with a Mem operand, we need to ensure 4928 // Before doing anything with a Mem operand, we need to ensure that the
4941 // that the Base and Index components are in physical registers. 4929 // Base and Index components are in physical registers.
4942 Variable *Base = Mem->getBase(); 4930 Variable *Base = Mem->getBase();
4943 Variable *Index = Mem->getIndex(); 4931 Variable *Index = Mem->getIndex();
4944 Variable *RegBase = nullptr; 4932 Variable *RegBase = nullptr;
4945 Variable *RegIndex = nullptr; 4933 Variable *RegIndex = nullptr;
4946 if (Base) { 4934 if (Base) {
4947 RegBase = legalizeToReg(Base); 4935 RegBase = legalizeToReg(Base);
4948 } 4936 }
4949 if (Index) { 4937 if (Index) {
4950 RegIndex = legalizeToReg(Index); 4938 RegIndex = legalizeToReg(Index);
4951 } 4939 }
(...skipping 24 matching lines...) Expand all
4976 // If the operand is a 64 bit constant integer we need to legalize it to a 4964 // If the operand is a 64 bit constant integer we need to legalize it to a
4977 // register in x86-64. 4965 // register in x86-64.
4978 if (Traits::Is64Bit) { 4966 if (Traits::Is64Bit) {
4979 if (llvm::isa<ConstantInteger64>(Const)) { 4967 if (llvm::isa<ConstantInteger64>(Const)) {
4980 Variable *V = copyToReg(Const, RegNum); 4968 Variable *V = copyToReg(Const, RegNum);
4981 V->setMustHaveReg(); 4969 V->setMustHaveReg();
4982 return V; 4970 return V;
4983 } 4971 }
4984 } 4972 }
4985 4973
4986 // If the operand is an 32 bit constant integer, we should check 4974 // If the operand is an 32 bit constant integer, we should check whether we
4987 // whether we need to randomize it or pool it. 4975 // need to randomize it or pool it.
4988 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) { 4976 if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {
4989 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum); 4977 Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);
4990 if (NewConst != Const) { 4978 if (NewConst != Const) {
4991 return NewConst; 4979 return NewConst;
4992 } 4980 }
4993 } 4981 }
4994 4982
4995 // Convert a scalar floating point constant into an explicit 4983 // Convert a scalar floating point constant into an explicit memory
4996 // memory operand. 4984 // operand.
4997 if (isScalarFloatingType(Ty)) { 4985 if (isScalarFloatingType(Ty)) {
4998 Variable *Base = nullptr; 4986 Variable *Base = nullptr;
4999 std::string Buffer; 4987 std::string Buffer;
5000 llvm::raw_string_ostream StrBuf(Buffer); 4988 llvm::raw_string_ostream StrBuf(Buffer);
5001 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); 4989 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
5002 llvm::cast<Constant>(From)->setShouldBePooled(true); 4990 llvm::cast<Constant>(From)->setShouldBePooled(true);
5003 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 4991 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
5004 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 4992 From = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
5005 } 4993 }
5006 bool NeedsReg = false; 4994 bool NeedsReg = false;
5007 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty)) 4995 if (!(Allowed & Legal_Imm) && !isScalarFloatingType(Ty))
5008 // Immediate specifically not allowed 4996 // Immediate specifically not allowed
5009 NeedsReg = true; 4997 NeedsReg = true;
5010 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty)) 4998 if (!(Allowed & Legal_Mem) && isScalarFloatingType(Ty))
5011 // On x86, FP constants are lowered to mem operands. 4999 // On x86, FP constants are lowered to mem operands.
5012 NeedsReg = true; 5000 NeedsReg = true;
5013 if (NeedsReg) { 5001 if (NeedsReg) {
5014 From = copyToReg(From, RegNum); 5002 From = copyToReg(From, RegNum);
5015 } 5003 }
5016 return From; 5004 return From;
5017 } 5005 }
5018 if (auto Var = llvm::dyn_cast<Variable>(From)) { 5006 if (auto Var = llvm::dyn_cast<Variable>(From)) {
5019 // Check if the variable is guaranteed a physical register. This 5007 // Check if the variable is guaranteed a physical register. This can happen
5020 // can happen either when the variable is pre-colored or when it is 5008 // either when the variable is pre-colored or when it is assigned infinite
5021 // assigned infinite weight. 5009 // weight.
5022 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 5010 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
5023 // We need a new physical register for the operand if: 5011 // We need a new physical register for the operand if:
5024 // Mem is not allowed and Var isn't guaranteed a physical 5012 // Mem is not allowed and Var isn't guaranteed a physical
5025 // register, or 5013 // register, or
5026 // RegNum is required and Var->getRegNum() doesn't match. 5014 // RegNum is required and Var->getRegNum() doesn't match.
5027 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 5015 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
5028 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 5016 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
5029 From = copyToReg(From, RegNum); 5017 From = copyToReg(From, RegNum);
5030 } 5018 }
5031 return From; 5019 return From;
5032 } 5020 }
5033 llvm_unreachable("Unhandled operand kind in legalize()"); 5021 llvm_unreachable("Unhandled operand kind in legalize()");
5034 return From; 5022 return From;
5035 } 5023 }
5036 5024
5037 /// Provide a trivial wrapper to legalize() for this common usage. 5025 /// Provide a trivial wrapper to legalize() for this common usage.
5038 template <class Machine> 5026 template <class Machine>
5039 Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) { 5027 Variable *TargetX86Base<Machine>::legalizeToReg(Operand *From, int32_t RegNum) {
5040 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 5028 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
5041 } 5029 }
5042 5030
5043 /// Legalize undef values to concrete values. 5031 /// Legalize undef values to concrete values.
5044 template <class Machine> 5032 template <class Machine>
5045 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) { 5033 Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {
5046 Type Ty = From->getType(); 5034 Type Ty = From->getType();
5047 if (llvm::isa<ConstantUndef>(From)) { 5035 if (llvm::isa<ConstantUndef>(From)) {
5048 // Lower undefs to zero. Another option is to lower undefs to an 5036 // Lower undefs to zero. Another option is to lower undefs to an
5049 // uninitialized register; however, using an uninitialized register 5037 // uninitialized register; however, using an uninitialized register results
5050 // results in less predictable code. 5038 // in less predictable code.
5051 // 5039 //
5052 // If in the future the implementation is changed to lower undef 5040 // If in the future the implementation is changed to lower undef values to
5053 // values to uninitialized registers, a FakeDef will be needed: 5041 // uninitialized registers, a FakeDef will be needed:
5054 // Context.insert(InstFakeDef::create(Func, Reg)); 5042 // Context.insert(InstFakeDef::create(Func, Reg));
5055 // This is in order to ensure that the live range of Reg is not 5043 // This is in order to ensure that the live range of Reg is not
5056 // overestimated. If the constant being lowered is a 64 bit value, 5044 // overestimated. If the constant being lowered is a 64 bit value, then
5057 // then the result should be split and the lo and hi components will 5045 // the result should be split and the lo and hi components will need to go
5058 // need to go in uninitialized registers. 5046 // in uninitialized registers.
5059 if (isVectorType(Ty)) 5047 if (isVectorType(Ty))
5060 return makeVectorOfZeros(Ty, RegNum); 5048 return makeVectorOfZeros(Ty, RegNum);
5061 return Ctx->getConstantZero(Ty); 5049 return Ctx->getConstantZero(Ty);
5062 } 5050 }
5063 return From; 5051 return From;
5064 } 5052 }
5065 5053
5066 /// For the cmp instruction, if Src1 is an immediate, or known to be a 5054 /// For the cmp instruction, if Src1 is an immediate, or known to be a physical
5067 /// physical register, we can allow Src0 to be a memory operand. 5055 /// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be
5068 /// Otherwise, Src0 must be copied into a physical register. 5056 /// copied into a physical register. (Actually, either Src0 or Src1 can be
5069 /// (Actually, either Src0 or Src1 can be chosen for the physical 5057 /// chosen for the physical register, but unfortunately we have to commit to one
5070 /// register, but unfortunately we have to commit to one or the other 5058 /// or the other before register allocation.)
5071 /// before register allocation.)
5072 template <class Machine> 5059 template <class Machine>
5073 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, 5060 Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,
5074 Operand *Src1) { 5061 Operand *Src1) {
5075 bool IsSrc1ImmOrReg = false; 5062 bool IsSrc1ImmOrReg = false;
5076 if (llvm::isa<Constant>(Src1)) { 5063 if (llvm::isa<Constant>(Src1)) {
5077 IsSrc1ImmOrReg = true; 5064 IsSrc1ImmOrReg = true;
5078 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) { 5065 } else if (auto *Var = llvm::dyn_cast<Variable>(Src1)) {
5079 if (Var->hasReg()) 5066 if (Var->hasReg())
5080 IsSrc1ImmOrReg = true; 5067 IsSrc1ImmOrReg = true;
5081 } 5068 }
5082 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg); 5069 return legalize(Src0, IsSrc1ImmOrReg ? (Legal_Reg | Legal_Mem) : Legal_Reg);
5083 } 5070 }
5084 5071
5085 template <class Machine> 5072 template <class Machine>
5086 typename TargetX86Base<Machine>::Traits::X86OperandMem * 5073 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5087 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty, 5074 TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,
5088 bool DoLegalize) { 5075 bool DoLegalize) {
5089 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd); 5076 auto *Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(Opnd);
5090 // It may be the case that address mode optimization already creates an 5077 // It may be the case that address mode optimization already creates an
5091 // Traits::X86OperandMem, so in that case it wouldn't need another level of 5078 // Traits::X86OperandMem, so in that case it wouldn't need another level of
5092 // transformation. 5079 // transformation.
5093 if (!Mem) { 5080 if (!Mem) {
5094 Variable *Base = llvm::dyn_cast<Variable>(Opnd); 5081 Variable *Base = llvm::dyn_cast<Variable>(Opnd);
5095 Constant *Offset = llvm::dyn_cast<Constant>(Opnd); 5082 Constant *Offset = llvm::dyn_cast<Constant>(Opnd);
5096 assert(Base || Offset); 5083 assert(Base || Offset);
5097 if (Offset) { 5084 if (Offset) {
5098 // During memory operand building, we do not blind or pool 5085 // During memory operand building, we do not blind or pool the constant
5099 // the constant offset, we will work on the whole memory 5086 // offset, we will work on the whole memory operand later as one entity
5100 // operand later as one entity later, this save one instruction. 5087 // later, this save one instruction. By turning blinding and pooling off,
5101 // By turning blinding and pooling off, we guarantee 5088 // we guarantee legalize(Offset) will return a Constant*.
5102 // legalize(Offset) will return a Constant*.
5103 { 5089 {
5104 BoolFlagSaver B(RandomizationPoolingPaused, true); 5090 BoolFlagSaver B(RandomizationPoolingPaused, true);
5105 5091
5106 Offset = llvm::cast<Constant>(legalize(Offset)); 5092 Offset = llvm::cast<Constant>(legalize(Offset));
5107 } 5093 }
5108 5094
5109 assert(llvm::isa<ConstantInteger32>(Offset) || 5095 assert(llvm::isa<ConstantInteger32>(Offset) ||
5110 llvm::isa<ConstantRelocatable>(Offset)); 5096 llvm::isa<ConstantRelocatable>(Offset));
5111 } 5097 }
5112 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset); 5098 Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);
5113 } 5099 }
5114 // Do legalization, which contains randomization/pooling 5100 // Do legalization, which contains randomization/pooling or do
5115 // or do randomization/pooling. 5101 // randomization/pooling.
5116 return llvm::cast<typename Traits::X86OperandMem>( 5102 return llvm::cast<typename Traits::X86OperandMem>(
5117 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem)); 5103 DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));
5118 } 5104 }
5119 5105
5120 template <class Machine> 5106 template <class Machine>
5121 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) { 5107 Variable *TargetX86Base<Machine>::makeReg(Type Type, int32_t RegNum) {
5122 // There aren't any 64-bit integer registers for x86-32. 5108 // There aren't any 64-bit integer registers for x86-32.
5123 assert(Traits::Is64Bit || Type != IceType_i64); 5109 assert(Traits::Is64Bit || Type != IceType_i64);
5124 Variable *Reg = Func->makeVariable(Type); 5110 Variable *Reg = Func->makeVariable(Type);
5125 if (RegNum == Variable::NoRegister) 5111 if (RegNum == Variable::NoRegister)
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
5228 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 5214 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5229 RPI_Randomize) { 5215 RPI_Randomize) {
5230 // blind the constant 5216 // blind the constant
5231 // FROM: 5217 // FROM:
5232 // imm 5218 // imm
5233 // TO: 5219 // TO:
5234 // insert: mov imm+cookie, Reg 5220 // insert: mov imm+cookie, Reg
5235 // insert: lea -cookie[Reg], Reg 5221 // insert: lea -cookie[Reg], Reg
5236 // => Reg 5222 // => Reg
5237 // If we have already assigned a phy register, we must come from 5223 // If we have already assigned a phy register, we must come from
5238 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse 5224 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
5239 // the assigned register as this assignment is that start of its use-def 5225 // assigned register as this assignment is that start of its use-def
5240 // chain. So we add RegNum argument here. 5226 // chain. So we add RegNum argument here. Note we use 'lea' instruction
5241 // Note we use 'lea' instruction instead of 'xor' to avoid affecting 5227 // instead of 'xor' to avoid affecting the flags.
5242 // the flags.
5243 Variable *Reg = makeReg(IceType_i32, RegNum); 5228 Variable *Reg = makeReg(IceType_i32, RegNum);
5244 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate); 5229 ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);
5245 uint32_t Value = Integer->getValue(); 5230 uint32_t Value = Integer->getValue();
5246 uint32_t Cookie = Func->getConstantBlindingCookie(); 5231 uint32_t Cookie = Func->getConstantBlindingCookie();
5247 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value)); 5232 _mov(Reg, Ctx->getConstantInt(IceType_i32, Cookie + Value));
5248 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie); 5233 Constant *Offset = Ctx->getConstantInt(IceType_i32, 0 - Cookie);
5249 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset, 5234 _lea(Reg, Traits::X86OperandMem::create(Func, IceType_i32, Reg, Offset,
5250 nullptr, 0)); 5235 nullptr, 0));
5251 // make sure liveness analysis won't kill this variable, otherwise a 5236 // make sure liveness analysis won't kill this variable, otherwise a
5252 // liveness assertion will be triggered. 5237 // liveness assertion will be triggered.
5253 _set_dest_nonkillable(); 5238 _set_dest_nonkillable();
5254 if (Immediate->getType() != IceType_i32) { 5239 if (Immediate->getType() != IceType_i32) {
5255 Variable *TruncReg = makeReg(Immediate->getType(), RegNum); 5240 Variable *TruncReg = makeReg(Immediate->getType(), RegNum);
5256 _mov(TruncReg, Reg); 5241 _mov(TruncReg, Reg);
5257 return TruncReg; 5242 return TruncReg;
5258 } 5243 }
5259 return Reg; 5244 return Reg;
5260 } 5245 }
5261 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { 5246 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5262 // pool the constant 5247 // pool the constant
5263 // FROM: 5248 // FROM:
5264 // imm 5249 // imm
5265 // TO: 5250 // TO:
5266 // insert: mov $label, Reg 5251 // insert: mov $label, Reg
5267 // => Reg 5252 // => Reg
5268 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool); 5253 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool);
5269 Immediate->setShouldBePooled(true); 5254 Immediate->setShouldBePooled(true);
5270 // if we have already assigned a phy register, we must come from 5255 // if we have already assigned a phy register, we must come from
5271 // andvancedPhiLowering()=>lowerAssign(). In this case we should reuse 5256 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse the
5272 // the assigned register as this assignment is that start of its use-def 5257 // assigned register as this assignment is that start of its use-def
5273 // chain. So we add RegNum argument here. 5258 // chain. So we add RegNum argument here.
5274 Variable *Reg = makeReg(Immediate->getType(), RegNum); 5259 Variable *Reg = makeReg(Immediate->getType(), RegNum);
5275 IceString Label; 5260 IceString Label;
5276 llvm::raw_string_ostream Label_stream(Label); 5261 llvm::raw_string_ostream Label_stream(Label);
5277 Immediate->emitPoolLabel(Label_stream); 5262 Immediate->emitPoolLabel(Label_stream);
5278 const RelocOffsetT Offset = 0; 5263 const RelocOffsetT Offset = 0;
5279 const bool SuppressMangling = true; 5264 const bool SuppressMangling = true;
5280 Constant *Symbol = 5265 Constant *Symbol =
5281 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling); 5266 Ctx->getConstantSym(Offset, Label_stream.str(), SuppressMangling);
5282 typename Traits::X86OperandMem *MemOperand = 5267 typename Traits::X86OperandMem *MemOperand =
(...skipping 12 matching lines...) Expand all
5295 typename TargetX86Base<Machine>::Traits::X86OperandMem * 5280 typename TargetX86Base<Machine>::Traits::X86OperandMem *
5296 TargetX86Base<Machine>::randomizeOrPoolImmediate( 5281 TargetX86Base<Machine>::randomizeOrPoolImmediate(
5297 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) { 5282 typename Traits::X86OperandMem *MemOperand, int32_t RegNum) {
5298 assert(MemOperand); 5283 assert(MemOperand);
5299 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None || 5284 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None ||
5300 RandomizationPoolingPaused == true) { 5285 RandomizationPoolingPaused == true) {
5301 // immediates randomization/pooling is turned off 5286 // immediates randomization/pooling is turned off
5302 return MemOperand; 5287 return MemOperand;
5303 } 5288 }
5304 5289
5305 // If this memory operand is already a randommized one, we do 5290 // If this memory operand is already a randomized one, we do not randomize it
5306 // not randomize it again. 5291 // again.
5307 if (MemOperand->getRandomized()) 5292 if (MemOperand->getRandomized())
5308 return MemOperand; 5293 return MemOperand;
5309 5294
5310 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) { 5295 if (Constant *C = llvm::dyn_cast_or_null<Constant>(MemOperand->getOffset())) {
5311 if (C->shouldBeRandomizedOrPooled(Ctx)) { 5296 if (C->shouldBeRandomizedOrPooled(Ctx)) {
5312 // The offset of this mem operand should be blinded or pooled 5297 // The offset of this mem operand should be blinded or pooled
5313 Ctx->statsUpdateRPImms(); 5298 Ctx->statsUpdateRPImms();
5314 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 5299 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5315 RPI_Randomize) { 5300 RPI_Randomize) {
5316 // blind the constant offset 5301 // blind the constant offset
(...skipping 14 matching lines...) Expand all
5331 typename Traits::X86OperandMem *TempMemOperand = 5316 typename Traits::X86OperandMem *TempMemOperand =
5332 Traits::X86OperandMem::create(Func, MemOperand->getType(), 5317 Traits::X86OperandMem::create(Func, MemOperand->getType(),
5333 MemOperand->getBase(), Mask1); 5318 MemOperand->getBase(), Mask1);
5334 // If we have already assigned a physical register, we must come from 5319 // If we have already assigned a physical register, we must come from
5335 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse 5320 // advancedPhiLowering()=>lowerAssign(). In this case we should reuse
5336 // the assigned register as this assignment is that start of its 5321 // the assigned register as this assignment is that start of its
5337 // use-def chain. So we add RegNum argument here. 5322 // use-def chain. So we add RegNum argument here.
5338 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum); 5323 Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);
5339 _lea(RegTemp, TempMemOperand); 5324 _lea(RegTemp, TempMemOperand);
5340 // As source operand doesn't use the dstreg, we don't need to add 5325 // As source operand doesn't use the dstreg, we don't need to add
5341 // _set_dest_nonkillable(). 5326 // _set_dest_nonkillable(). But if we use the same Dest Reg, that is,
5342 // But if we use the same Dest Reg, that is, with RegNum 5327 // with RegNum assigned, we should add this _set_dest_nonkillable()
5343 // assigned, we should add this _set_dest_nonkillable()
5344 if (RegNum != Variable::NoRegister) 5328 if (RegNum != Variable::NoRegister)
5345 _set_dest_nonkillable(); 5329 _set_dest_nonkillable();
5346 5330
5347 typename Traits::X86OperandMem *NewMemOperand = 5331 typename Traits::X86OperandMem *NewMemOperand =
5348 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp, 5332 Traits::X86OperandMem::create(Func, MemOperand->getType(), RegTemp,
5349 Mask2, MemOperand->getIndex(), 5333 Mask2, MemOperand->getIndex(),
5350 MemOperand->getShift(), 5334 MemOperand->getShift(),
5351 MemOperand->getSegmentRegister()); 5335 MemOperand->getSegmentRegister());
5352 5336
5353 // Label this memory operand as randomized, so we won't randomize it 5337 // Label this memory operand as randomized, so we won't randomize it
5354 // again in case we call legalize() multiple times on this memory 5338 // again in case we call legalize() multiple times on this memory
5355 // operand. 5339 // operand.
5356 NewMemOperand->setRandomized(true); 5340 NewMemOperand->setRandomized(true);
5357 return NewMemOperand; 5341 return NewMemOperand;
5358 } 5342 }
5359 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) { 5343 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_Pool) {
5360 // pool the constant offset 5344 // pool the constant offset
5361 // FROM: 5345 // FROM:
5362 // offset[base, index, shift] 5346 // offset[base, index, shift]
5363 // TO: 5347 // TO:
5364 // insert: mov $label, RegTemp 5348 // insert: mov $label, RegTemp
5365 // insert: lea [base, RegTemp], RegTemp 5349 // insert: lea [base, RegTemp], RegTemp
5366 // =>[RegTemp, index, shift] 5350 // =>[RegTemp, index, shift]
5367 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == 5351 assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==
5368 RPI_Pool); 5352 RPI_Pool);
5369 // Memory operand should never exist as source operands in phi 5353 // Memory operand should never exist as source operands in phi lowering
5370 // lowering assignments, so there is no need to reuse any registers 5354 // assignments, so there is no need to reuse any registers here. For
5371 // here. For phi lowering, we should not ask for new physical 5355 // phi lowering, we should not ask for new physical registers in
5372 // registers in general. 5356 // general. However, if we do meet Memory Operand during phi lowering,
5373 // However, if we do meet Memory Operand during phi lowering, we 5357 // we should not blind or pool the immediates for now.
5374 // should not blind or pool the immediates for now.
5375 if (RegNum != Variable::NoRegister) 5358 if (RegNum != Variable::NoRegister)
5376 return MemOperand; 5359 return MemOperand;
5377 Variable *RegTemp = makeReg(IceType_i32); 5360 Variable *RegTemp = makeReg(IceType_i32);
5378 IceString Label; 5361 IceString Label;
5379 llvm::raw_string_ostream Label_stream(Label); 5362 llvm::raw_string_ostream Label_stream(Label);
5380 MemOperand->getOffset()->emitPoolLabel(Label_stream); 5363 MemOperand->getOffset()->emitPoolLabel(Label_stream);
5381 MemOperand->getOffset()->setShouldBePooled(true); 5364 MemOperand->getOffset()->setShouldBePooled(true);
5382 const RelocOffsetT SymOffset = 0; 5365 const RelocOffsetT SymOffset = 0;
5383 bool SuppressMangling = true; 5366 bool SuppressMangling = true;
5384 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(), 5367 Constant *Symbol = Ctx->getConstantSym(SymOffset, Label_stream.str(),
(...skipping 25 matching lines...) Expand all
5410 } 5393 }
5411 // the offset is not eligible for blinding or pooling, return the original 5394 // the offset is not eligible for blinding or pooling, return the original
5412 // mem operand 5395 // mem operand
5413 return MemOperand; 5396 return MemOperand;
5414 } 5397 }
5415 5398
5416 } // end of namespace X86Internal 5399 } // end of namespace X86Internal
5417 } // end of namespace Ice 5400 } // end of namespace Ice
5418 5401
5419 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H 5402 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceThreading.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698