| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8664Traits.h - x86-64 traits -*- C++ -*-=// | 1 //===- subzero/src/IceTargetLoweringX8664Traits.h - x86-64 traits -*- C++ -*-=// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 386 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 397 | 397 |
| 398 return Registers; | 398 return Registers; |
| 399 } | 399 } |
| 400 | 400 |
| 401 static void | 401 static void |
| 402 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, | 402 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, |
| 403 llvm::SmallVectorImpl<int32_t> &Permutation, | 403 llvm::SmallVectorImpl<int32_t> &Permutation, |
| 404 const llvm::SmallBitVector &ExcludeRegisters, | 404 const llvm::SmallBitVector &ExcludeRegisters, |
| 405 uint64_t Salt) { | 405 uint64_t Salt) { |
| 406 // TODO(stichnot): Declaring Permutation this way loses type/size | 406 // TODO(stichnot): Declaring Permutation this way loses type/size |
| 407 // information. Fix this in conjunction with the caller-side TODO. | 407 // information. Fix this in conjunction with the caller-side TODO. |
| 408 assert(Permutation.size() >= RegisterSet::Reg_NUM); | 408 assert(Permutation.size() >= RegisterSet::Reg_NUM); |
| 409 // Expected upper bound on the number of registers in a single equivalence | 409 // Expected upper bound on the number of registers in a single equivalence |
| 410 // class. For x86-64, this would comprise the 16 XMM registers. This is | 410 // class. For x86-64, this would comprise the 16 XMM registers. This is |
| 411 // for performance, not correctness. | 411 // for performance, not correctness. |
| 412 static const unsigned MaxEquivalenceClassSize = 8; | 412 static const unsigned MaxEquivalenceClassSize = 8; |
| 413 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; | 413 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; |
| 414 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; | 414 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; |
| 415 EquivalenceClassMap EquivalenceClasses; | 415 EquivalenceClassMap EquivalenceClasses; |
| 416 SizeT NumShuffled = 0, NumPreserved = 0; | 416 SizeT NumShuffled = 0, NumPreserved = 0; |
| 417 | 417 |
| 418 // Build up the equivalence classes of registers by looking at the register | 418 // Build up the equivalence classes of registers by looking at the register |
| 419 // properties as well as whether the registers should be explicitly excluded | 419 // properties as well as whether the registers should be explicitly excluded |
| 420 // from shuffling. | 420 // from shuffling. |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 486 /// The number of different NOP instructions | 486 /// The number of different NOP instructions |
| 487 static const uint32_t X86_NUM_NOP_VARIANTS = 5; | 487 static const uint32_t X86_NUM_NOP_VARIANTS = 5; |
| 488 | 488 |
| 489 /// \name Limits for unrolling memory intrinsics. | 489 /// \name Limits for unrolling memory intrinsics. |
| 490 /// @{ | 490 /// @{ |
| 491 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; | 491 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; |
| 492 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; | 492 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; |
| 493 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; | 493 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; |
| 494 /// @} | 494 /// @} |
| 495 | 495 |
| 496 /// Value is in bytes. Return Value adjusted to the next highest multiple | 496 /// Value is in bytes. Return Value adjusted to the next highest multiple of |
| 497 /// of the stack alignment. | 497 /// the stack alignment. |
| 498 static uint32_t applyStackAlignment(uint32_t Value) { | 498 static uint32_t applyStackAlignment(uint32_t Value) { |
| 499 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 499 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
| 500 } | 500 } |
| 501 | 501 |
| 502 /// Return the type which the elements of the vector have in the X86 | 502 /// Return the type which the elements of the vector have in the X86 |
| 503 /// representation of the vector. | 503 /// representation of the vector. |
| 504 static Type getInVectorElementType(Type Ty) { | 504 static Type getInVectorElementType(Type Ty) { |
| 505 assert(isVectorType(Ty)); | 505 assert(isVectorType(Ty)); |
| 506 size_t Index = static_cast<size_t>(Ty); | 506 size_t Index = static_cast<size_t>(Ty); |
| 507 (void)Index; | 507 (void)Index; |
| 508 assert(Index < TableTypeX8664AttributesSize); | 508 assert(Index < TableTypeX8664AttributesSize); |
| 509 return TableTypeX8664Attributes[Ty].InVectorElementType; | 509 return TableTypeX8664Attributes[Ty].InVectorElementType; |
| 510 } | 510 } |
| 511 | 511 |
| 512 // Note: The following data structures are defined in | 512 // Note: The following data structures are defined in |
| 513 // IceTargetLoweringX8664.cpp. | 513 // IceTargetLoweringX8664.cpp. |
| 514 | 514 |
| 515 /// The following table summarizes the logic for lowering the fcmp | 515 /// The following table summarizes the logic for lowering the fcmp |
| 516 /// instruction. There is one table entry for each of the 16 conditions. | 516 /// instruction. There is one table entry for each of the 16 conditions. |
| 517 /// | 517 /// |
| 518 /// The first four columns describe the case when the operands are floating | 518 /// The first four columns describe the case when the operands are floating |
| 519 /// point scalar values. A comment in lowerFcmp() describes the lowering | 519 /// point scalar values. A comment in lowerFcmp() describes the lowering |
| 520 /// template. In the most general case, there is a compare followed by two | 520 /// template. In the most general case, there is a compare followed by two |
| 521 /// conditional branches, because some fcmp conditions don't map to a single | 521 /// conditional branches, because some fcmp conditions don't map to a single |
| 522 /// x86 conditional branch. However, in many cases it is possible to swap the | 522 /// x86 conditional branch. However, in many cases it is possible to swap the |
| 523 /// operands in the comparison and have a single conditional branch. Since | 523 /// operands in the comparison and have a single conditional branch. Since |
| 524 /// it's quite tedious to validate the table by hand, good execution tests are | 524 /// it's quite tedious to validate the table by hand, good execution tests are |
| 525 /// helpful. | 525 /// helpful. |
| 526 /// | 526 /// |
| 527 /// The last two columns describe the case when the operands are vectors of | 527 /// The last two columns describe the case when the operands are vectors of |
| 528 /// floating point values. For most fcmp conditions, there is a clear mapping | 528 /// floating point values. For most fcmp conditions, there is a clear mapping |
| 529 /// to a single x86 cmpps instruction variant. Some fcmp conditions require | 529 /// to a single x86 cmpps instruction variant. Some fcmp conditions require |
| 530 /// special code to handle and these are marked in the table with a | 530 /// special code to handle and these are marked in the table with a |
| 531 /// Cmpps_Invalid predicate. | 531 /// Cmpps_Invalid predicate. |
| 532 /// {@ | 532 /// {@ |
| 533 static const struct TableFcmpType { | 533 static const struct TableFcmpType { |
| 534 uint32_t Default; | 534 uint32_t Default; |
| 535 bool SwapScalarOperands; | 535 bool SwapScalarOperands; |
| 536 Cond::BrCond C1, C2; | 536 Cond::BrCond C1, C2; |
| 537 bool SwapVectorOperands; | 537 bool SwapVectorOperands; |
| 538 Cond::CmppsCond Predicate; | 538 Cond::CmppsCond Predicate; |
| 539 } TableFcmp[]; | 539 } TableFcmp[]; |
| 540 static const size_t TableFcmpSize; | 540 static const size_t TableFcmpSize; |
| 541 /// @} | 541 /// @} |
| 542 | 542 |
| 543 /// The following table summarizes the logic for lowering the icmp instruction | 543 /// The following table summarizes the logic for lowering the icmp instruction |
| 544 /// for i32 and narrower types. Each icmp condition has a clear mapping to an | 544 /// for i32 and narrower types. Each icmp condition has a clear mapping to an |
| 545 /// x86 conditional branch instruction. | 545 /// x86 conditional branch instruction. |
| 546 /// {@ | 546 /// {@ |
| 547 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; | 547 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; |
| 548 static const size_t TableIcmp32Size; | 548 static const size_t TableIcmp32Size; |
| 549 /// @} | 549 /// @} |
| 550 | 550 |
| 551 /// The following table summarizes the logic for lowering the icmp instruction | 551 /// The following table summarizes the logic for lowering the icmp instruction |
| 552 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | 552 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and |
| 553 /// conditional branches are needed. For the other conditions, three separate | 553 /// conditional branches are needed. For the other conditions, three separate |
| 554 /// conditional branches are needed. | 554 /// conditional branches are needed. |
| 555 /// {@ | 555 /// {@ |
| 556 static const struct TableIcmp64Type { | 556 static const struct TableIcmp64Type { |
| 557 Cond::BrCond C1, C2, C3; | 557 Cond::BrCond C1, C2, C3; |
| 558 } TableIcmp64[]; | 558 } TableIcmp64[]; |
| 559 static const size_t TableIcmp64Size; | 559 static const size_t TableIcmp64Size; |
| 560 /// @} | 560 /// @} |
| 561 | 561 |
| 562 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 562 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| 563 size_t Index = static_cast<size_t>(Cond); | 563 size_t Index = static_cast<size_t>(Cond); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 576 // \ \ \ \ \-. \ \___ \/_/\ \/ | 576 // \ \ \ \ \-. \ \___ \/_/\ \/ |
| 577 // \ \_\ \_\\"\_\/\_____\ \ \_\ | 577 // \ \_\ \_\\"\_\/\_____\ \ \_\ |
| 578 // \/_/\/_/ \/_/\/_____/ \/_/ | 578 // \/_/\/_/ \/_/\/_____/ \/_/ |
| 579 // | 579 // |
| 580 //---------------------------------------------------------------------------- | 580 //---------------------------------------------------------------------------- |
| 581 using Insts = ::Ice::X86Internal::Insts<TargetX8664>; | 581 using Insts = ::Ice::X86Internal::Insts<TargetX8664>; |
| 582 | 582 |
| 583 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>; | 583 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8664>; |
| 584 using Assembler = X8664::AssemblerX8664; | 584 using Assembler = X8664::AssemblerX8664; |
| 585 | 585 |
| 586 /// X86Operand extends the Operand hierarchy. Its subclasses are | 586 /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem |
| 587 /// X86OperandMem and VariableSplit. | 587 /// and VariableSplit. |
| 588 class X86Operand : public ::Ice::Operand { | 588 class X86Operand : public ::Ice::Operand { |
| 589 X86Operand() = delete; | 589 X86Operand() = delete; |
| 590 X86Operand(const X86Operand &) = delete; | 590 X86Operand(const X86Operand &) = delete; |
| 591 X86Operand &operator=(const X86Operand &) = delete; | 591 X86Operand &operator=(const X86Operand &) = delete; |
| 592 | 592 |
| 593 public: | 593 public: |
| 594 enum OperandKindX8664 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; | 594 enum OperandKindX8664 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; |
| 595 using ::Ice::Operand::dump; | 595 using ::Ice::Operand::dump; |
| 596 | 596 |
| 597 void dump(const Cfg *, Ostream &Str) const override; | 597 void dump(const Cfg *, Ostream &Str) const override; |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 648 Constant *Offset; | 648 Constant *Offset; |
| 649 Variable *Index; | 649 Variable *Index; |
| 650 uint16_t Shift; | 650 uint16_t Shift; |
| 651 /// A flag to show if this memory operand is a randomized one. Randomized | 651 /// A flag to show if this memory operand is a randomized one. Randomized |
| 652 /// memory operands are generated in | 652 /// memory operands are generated in |
| 653 /// TargetX86Base::randomizeOrPoolImmediate() | 653 /// TargetX86Base::randomizeOrPoolImmediate() |
| 654 bool Randomized = false; | 654 bool Randomized = false; |
| 655 }; | 655 }; |
| 656 | 656 |
| 657 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 | 657 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 |
| 658 /// locations (Low and High). This is needed for some cases of the Bitcast | 658 /// locations (Low and High). This is needed for some cases of the Bitcast |
| 659 /// instruction. Since it's not possible for integer registers to access the | 659 /// instruction. Since it's not possible for integer registers to access the |
| 660 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to | 660 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to |
| 661 /// the stack and then accesses through the VariableSplit. | 661 /// the stack and then accesses through the VariableSplit. |
| 662 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit | 662 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit |
| 663 // targets can natively handle these. | 663 // targets can natively handle these. |
| 664 class VariableSplit : public X86Operand { | 664 class VariableSplit : public X86Operand { |
| 665 VariableSplit() = delete; | 665 VariableSplit() = delete; |
| 666 VariableSplit(const VariableSplit &) = delete; | 666 VariableSplit(const VariableSplit &) = delete; |
| 667 VariableSplit &operator=(const VariableSplit &) = delete; | 667 VariableSplit &operator=(const VariableSplit &) = delete; |
| 668 | 668 |
| 669 public: | 669 public: |
| (...skipping 19 matching lines...) Expand all Loading... |
| 689 assert(Var->getType() == IceType_f64); | 689 assert(Var->getType() == IceType_f64); |
| 690 Vars = Func->allocateArrayOf<Variable *>(1); | 690 Vars = Func->allocateArrayOf<Variable *>(1); |
| 691 Vars[0] = Var; | 691 Vars[0] = Var; |
| 692 NumVars = 1; | 692 NumVars = 1; |
| 693 } | 693 } |
| 694 | 694 |
| 695 Variable *Var; | 695 Variable *Var; |
| 696 Portion Part; | 696 Portion Part; |
| 697 }; | 697 }; |
| 698 | 698 |
| 699 /// SpillVariable decorates a Variable by linking it to another Variable. | 699 /// SpillVariable decorates a Variable by linking it to another Variable. When |
| 700 /// When stack frame offsets are computed, the SpillVariable is given a | 700 /// stack frame offsets are computed, the SpillVariable is given a distinct |
| 701 /// distinct stack slot only if its linked Variable has a register. If the | 701 /// stack slot only if its linked Variable has a register. If the linked |
| 702 /// linked Variable has a stack slot, then the Variable and SpillVariable | 702 /// Variable has a stack slot, then the Variable and SpillVariable share that |
| 703 /// share that slot. | 703 /// slot. |
| 704 class SpillVariable : public Variable { | 704 class SpillVariable : public Variable { |
| 705 SpillVariable() = delete; | 705 SpillVariable() = delete; |
| 706 SpillVariable(const SpillVariable &) = delete; | 706 SpillVariable(const SpillVariable &) = delete; |
| 707 SpillVariable &operator=(const SpillVariable &) = delete; | 707 SpillVariable &operator=(const SpillVariable &) = delete; |
| 708 | 708 |
| 709 public: | 709 public: |
| 710 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { | 710 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { |
| 711 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); | 711 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); |
| 712 } | 712 } |
| 713 const static OperandKind SpillVariableKind = | 713 const static OperandKind SpillVariableKind = |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 748 | 748 |
| 749 } // end of namespace X86Internal | 749 } // end of namespace X86Internal |
| 750 | 750 |
| 751 namespace X8664 { | 751 namespace X8664 { |
| 752 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8664>; | 752 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8664>; |
| 753 } // end of namespace X8664 | 753 } // end of namespace X8664 |
| 754 | 754 |
| 755 } // end of namespace Ice | 755 } // end of namespace Ice |
| 756 | 756 |
| 757 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8664TRAITS_H | 757 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8664TRAITS_H |
| OLD | NEW |