OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8664Traits.h - x86-64 traits -*- C++ -*-=// | 1 //===- subzero/src/IceTargetLoweringX8664Traits.h - x86-64 traits -*- C++ -*-=// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
397 | 397 |
398 return Registers; | 398 return Registers; |
399 } | 399 } |
400 | 400 |
401 static void | 401 static void |
402 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, | 402 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, |
403 llvm::SmallVectorImpl<int32_t> &Permutation, | 403 llvm::SmallVectorImpl<int32_t> &Permutation, |
404 const llvm::SmallBitVector &ExcludeRegisters, | 404 const llvm::SmallBitVector &ExcludeRegisters, |
405 uint64_t Salt) { | 405 uint64_t Salt) { |
406 // TODO(stichnot): Declaring Permutation this way loses type/size | 406 // TODO(stichnot): Declaring Permutation this way loses type/size |
407 // information. Fix this in conjunction with the caller-side TODO. | 407 // information. Fix this in conjunction with the caller-side TODO. |
408 assert(Permutation.size() >= RegisterSet::Reg_NUM); | 408 assert(Permutation.size() >= RegisterSet::Reg_NUM); |
409 // Expected upper bound on the number of registers in a single equivalence | 409 // Expected upper bound on the number of registers in a single equivalence |
410 // class. For x86-64, this would comprise the 16 XMM registers. This is | 410 // class. For x86-64, this would comprise the 16 XMM registers. This is |
411 // for performance, not correctness. | 411 // for performance, not correctness. |
412 static const unsigned MaxEquivalenceClassSize = 8; | 412 static const unsigned MaxEquivalenceClassSize = 8; |
413 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; | 413 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; |
414 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; | 414 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; |
415 EquivalenceClassMap EquivalenceClasses; | 415 EquivalenceClassMap EquivalenceClasses; |
416 SizeT NumShuffled = 0, NumPreserved = 0; | 416 SizeT NumShuffled = 0, NumPreserved = 0; |
417 | 417 |
418 // Build up the equivalence classes of registers by looking at the register | 418 // Build up the equivalence classes of registers by looking at the register |
419 // properties as well as whether the registers should be explicitly excluded | 419 // properties as well as whether the registers should be explicitly excluded |
420 // from shuffling. | 420 // from shuffling. |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
486 /// The number of different NOP instructions | 486 /// The number of different NOP instructions |
487 static const uint32_t X86_NUM_NOP_VARIANTS = 5; | 487 static const uint32_t X86_NUM_NOP_VARIANTS = 5; |
488 | 488 |
489 /// \name Limits for unrolling memory intrinsics. | 489 /// \name Limits for unrolling memory intrinsics. |
490 /// @{ | 490 /// @{ |
491 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; | 491 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; |
492 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; | 492 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; |
493 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; | 493 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; |
494 /// @} | 494 /// @} |
495 | 495 |
496 /// Value is in bytes. Return Value adjusted to the next highest multiple | 496 /// Value is in bytes. Return Value adjusted to the next highest multiple of |
497 /// of the stack alignment. | 497 /// the stack alignment. |
498 static uint32_t applyStackAlignment(uint32_t Value) { | 498 static uint32_t applyStackAlignment(uint32_t Value) { |
499 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 499 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
500 } | 500 } |
501 | 501 |
502 /// Return the type which the elements of the vector have in the X86 | 502 /// Return the type which the elements of the vector have in the X86 |
503 /// representation of the vector. | 503 /// representation of the vector. |
504 static Type getInVectorElementType(Type Ty) { | 504 static Type getInVectorElementType(Type Ty) { |
505 assert(isVectorType(Ty)); | 505 assert(isVectorType(Ty)); |
506 size_t Index = static_cast<size_t>(Ty); | 506 size_t Index = static_cast<size_t>(Ty); |
507 (void)Index; | 507 (void)Index; |
508 assert(Index < TableTypeX8664AttributesSize); | 508 assert(Index < TableTypeX8664AttributesSize); |
509 return TableTypeX8664Attributes[Ty].InVectorElementType; | 509 return TableTypeX8664Attributes[Ty].InVectorElementType; |
510 } | 510 } |
511 | 511 |
512 // Note: The following data structures are defined in | 512 // Note: The following data structures are defined in |
513 // IceTargetLoweringX8664.cpp. | 513 // IceTargetLoweringX8664.cpp. |
514 | 514 |
515 /// The following table summarizes the logic for lowering the fcmp | 515 /// The following table summarizes the logic for lowering the fcmp |
516 /// instruction. There is one table entry for each of the 16 conditions. | 516 /// instruction. There is one table entry for each of the 16 conditions. |
517 /// | 517 /// |
518 /// The first four columns describe the case when the operands are floating | 518 /// The first four columns describe the case when the operands are floating |
519 /// point scalar values. A comment in lowerFcmp() describes the lowering | 519 /// point scalar values. A comment in lowerFcmp() describes the lowering |
520 /// template. In the most general case, there is a compare followed by two | 520 /// template. In the most general case, there is a compare followed by two |
521 /// conditional branches, because some fcmp conditions don't map to a single | 521 /// conditional branches, because some fcmp conditions don't map to a single |
522 /// x86 conditional branch. However, in many cases it is possible to swap the | 522 /// x86 conditional branch. However, in many cases it is possible to swap the |
523 /// operands in the comparison and have a single conditional branch. Since | 523 /// operands in the comparison and have a single conditional branch. Since |
524 /// it's quite tedious to validate the table by hand, good execution tests are | 524 /// it's quite tedious to validate the table by hand, good execution tests are |
525 /// helpful. | 525 /// helpful. |
526 /// | 526 /// |
527 /// The last two columns describe the case when the operands are vectors of | 527 /// The last two columns describe the case when the operands are vectors of |
528 /// floating point values. For most fcmp conditions, there is a clear mapping | 528 /// floating point values. For most fcmp conditions, there is a clear mapping |
529 /// to a single x86 cmpps instruction variant. Some fcmp conditions require | 529 /// to a single x86 cmpps instruction variant. Some fcmp conditions require |
530 /// special code to handle and these are marked in the table with a | 530 /// special code to handle and these are marked in the table with a |
531 /// Cmpps_Invalid predicate. | 531 /// Cmpps_Invalid predicate. |
532 /// {@ | 532 /// {@ |
533 static const struct TableFcmpType { | 533 static const struct TableFcmpType { |
534 uint32_t Default; | 534 uint32_t Default; |
535 bool SwapScalarOperands; | 535 bool SwapScalarOperands; |
536 Cond::BrCond C1, C2; | 536 Cond::BrCond C1, C2; |
537 bool SwapVectorOperands; | 537 bool SwapVectorOperands; |
538 Cond::CmppsCond Predicate; | 538 Cond::CmppsCond Predicate; |
539 } TableFcmp[]; | 539 } TableFcmp[]; |
540 static const size_t TableFcmpSize; | 540 static const size_t TableFcmpSize; |
541 /// @} | 541 /// @} |
542 | 542 |
543 /// The following table summarizes the logic for lowering the icmp instruction | 543 /// The following table summarizes the logic for lowering the icmp instruction |
544 /// for i32 and narrower types. Each icmp condition has a clear mapping to an | 544 /// for i32 and narrower types. Each icmp condition has a clear mapping to an |
545 /// x86 conditional branch instruction. | 545 /// x86 conditional branch instruction. |
546 /// {@ | 546 /// {@ |
547 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; | 547 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; |
548 static const size_t TableIcmp32Size; | 548 static const size_t TableIcmp32Size; |
549 /// @} | 549 /// @} |
550 | 550 |
551 /// The following table summarizes the logic for lowering the icmp instruction | 551 /// The following table summarizes the logic for lowering the icmp instruction |
552 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | 552 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and |
553 /// conditional branches are needed. For the other conditions, three separate | 553 /// conditional branches are needed. For the other conditions, three separate |
554 /// conditional branches are needed. | 554 /// conditional branches are needed. |
555 /// {@ | 555 /// {@ |
556 static const struct TableIcmp64Type { | 556 static const struct TableIcmp64Type { |
557 Cond::BrCond C1, C2, C3; | 557 Cond::BrCond C1, C2, C3; |
558 } TableIcmp64[]; | 558 } TableIcmp64[]; |
559 static const size_t TableIcmp64Size; | 559 static const size_t TableIcmp64Size; |
560 /// @} | 560 /// @} |
561 | 561 |
562 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 562 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
563 size_t Index = static_cast<size_t>(Cond); | 563 size_t Index = static_cast<size_t>(Cond); |
(...skipping 12 matching lines...) Expand all Loading... |
576 // \ \ \ \ \-. \ \___ \/_/\ \/ | 576 // \ \ \ \ \-. \ \___ \/_/\ \/ |
577 // \ \_\ \_\\"\_\/\_____\ \ \_\ | 577 // \ \_\ \_\\"\_\/\_____\ \ \_\ |
578 // \/_/\/_/ \/_/\/_____/ \/_/ | 578 // \/_/\/_/ \/_/\/_____/ \/_/ |
579 // | 579 // |
580 //---------------------------------------------------------------------------- | 580 //---------------------------------------------------------------------------- |
581 using Insts = ::Ice::X86Internal::Insts<TargetX8664>; | 581 using Insts = ::Ice::X86Internal::Insts<TargetX8664>; |
582 | 582 |
583 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>; | 583 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>; |
584 using Assembler = X8664::AssemblerX8664; | 584 using Assembler = X8664::AssemblerX8664; |
585 | 585 |
586 /// X86Operand extends the Operand hierarchy. Its subclasses are | 586 /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem |
587 /// X86OperandMem and VariableSplit. | 587 /// and VariableSplit. |
588 class X86Operand : public ::Ice::Operand { | 588 class X86Operand : public ::Ice::Operand { |
589 X86Operand() = delete; | 589 X86Operand() = delete; |
590 X86Operand(const X86Operand &) = delete; | 590 X86Operand(const X86Operand &) = delete; |
591 X86Operand &operator=(const X86Operand &) = delete; | 591 X86Operand &operator=(const X86Operand &) = delete; |
592 | 592 |
593 public: | 593 public: |
594 enum OperandKindX8664 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; | 594 enum OperandKindX8664 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; |
595 using ::Ice::Operand::dump; | 595 using ::Ice::Operand::dump; |
596 | 596 |
597 void dump(const Cfg *, Ostream &Str) const override; | 597 void dump(const Cfg *, Ostream &Str) const override; |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
648 Constant *Offset; | 648 Constant *Offset; |
649 Variable *Index; | 649 Variable *Index; |
650 uint16_t Shift; | 650 uint16_t Shift; |
651 /// A flag to show if this memory operand is a randomized one. Randomized | 651 /// A flag to show if this memory operand is a randomized one. Randomized |
652 /// memory operands are generated in | 652 /// memory operands are generated in |
653 /// TargetX86Base::randomizeOrPoolImmediate() | 653 /// TargetX86Base::randomizeOrPoolImmediate() |
654 bool Randomized = false; | 654 bool Randomized = false; |
655 }; | 655 }; |
656 | 656 |
657 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 | 657 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 |
658 /// locations (Low and High). This is needed for some cases of the Bitcast | 658 /// locations (Low and High). This is needed for some cases of the Bitcast |
659 /// instruction. Since it's not possible for integer registers to access the | 659 /// instruction. Since it's not possible for integer registers to access the |
660 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to | 660 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to |
661 /// the stack and then accesses through the VariableSplit. | 661 /// the stack and then accesses through the VariableSplit. |
662 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit | 662 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit |
663 // targets can natively handle these. | 663 // targets can natively handle these. |
664 class VariableSplit : public X86Operand { | 664 class VariableSplit : public X86Operand { |
665 VariableSplit() = delete; | 665 VariableSplit() = delete; |
666 VariableSplit(const VariableSplit &) = delete; | 666 VariableSplit(const VariableSplit &) = delete; |
667 VariableSplit &operator=(const VariableSplit &) = delete; | 667 VariableSplit &operator=(const VariableSplit &) = delete; |
668 | 668 |
669 public: | 669 public: |
(...skipping 19 matching lines...) Expand all Loading... |
689 assert(Var->getType() == IceType_f64); | 689 assert(Var->getType() == IceType_f64); |
690 Vars = Func->allocateArrayOf<Variable *>(1); | 690 Vars = Func->allocateArrayOf<Variable *>(1); |
691 Vars[0] = Var; | 691 Vars[0] = Var; |
692 NumVars = 1; | 692 NumVars = 1; |
693 } | 693 } |
694 | 694 |
695 Variable *Var; | 695 Variable *Var; |
696 Portion Part; | 696 Portion Part; |
697 }; | 697 }; |
698 | 698 |
699 /// SpillVariable decorates a Variable by linking it to another Variable. | 699 /// SpillVariable decorates a Variable by linking it to another Variable. When |
700 /// When stack frame offsets are computed, the SpillVariable is given a | 700 /// stack frame offsets are computed, the SpillVariable is given a distinct |
701 /// distinct stack slot only if its linked Variable has a register. If the | 701 /// stack slot only if its linked Variable has a register. If the linked |
702 /// linked Variable has a stack slot, then the Variable and SpillVariable | 702 /// Variable has a stack slot, then the Variable and SpillVariable share that |
703 /// share that slot. | 703 /// slot. |
704 class SpillVariable : public Variable { | 704 class SpillVariable : public Variable { |
705 SpillVariable() = delete; | 705 SpillVariable() = delete; |
706 SpillVariable(const SpillVariable &) = delete; | 706 SpillVariable(const SpillVariable &) = delete; |
707 SpillVariable &operator=(const SpillVariable &) = delete; | 707 SpillVariable &operator=(const SpillVariable &) = delete; |
708 | 708 |
709 public: | 709 public: |
710 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { | 710 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { |
711 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); | 711 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); |
712 } | 712 } |
713 const static OperandKind SpillVariableKind = | 713 const static OperandKind SpillVariableKind = |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
748 | 748 |
749 } // end of namespace X86Internal | 749 } // end of namespace X86Internal |
750 | 750 |
751 namespace X8664 { | 751 namespace X8664 { |
752 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8664>; | 752 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8664>; |
753 } // end of namespace X8664 | 753 } // end of namespace X8664 |
754 | 754 |
755 } // end of namespace Ice | 755 } // end of namespace Ice |
756 | 756 |
757 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8664TRAITS_H | 757 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8664TRAITS_H |
OLD | NEW |