| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632Traits.h - x86-32 traits -*- C++ -*-=// | 1 //===- subzero/src/IceTargetLoweringX8632Traits.h - x86-32 traits -*- C++ -*-=// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 372 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 383 | 383 |
| 384 return Registers; | 384 return Registers; |
| 385 } | 385 } |
| 386 | 386 |
| 387 static void | 387 static void |
| 388 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, | 388 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, |
| 389 llvm::SmallVectorImpl<int32_t> &Permutation, | 389 llvm::SmallVectorImpl<int32_t> &Permutation, |
| 390 const llvm::SmallBitVector &ExcludeRegisters, | 390 const llvm::SmallBitVector &ExcludeRegisters, |
| 391 uint64_t Salt) { | 391 uint64_t Salt) { |
| 392 // TODO(stichnot): Declaring Permutation this way loses type/size | 392 // TODO(stichnot): Declaring Permutation this way loses type/size |
| 393 // information. Fix this in conjunction with the caller-side TODO. | 393 // information. Fix this in conjunction with the caller-side TODO. |
| 394 assert(Permutation.size() >= RegisterSet::Reg_NUM); | 394 assert(Permutation.size() >= RegisterSet::Reg_NUM); |
| 395 // Expected upper bound on the number of registers in a single equivalence | 395 // Expected upper bound on the number of registers in a single equivalence |
| 396 // class. For x86-32, this would comprise the 8 XMM registers. This is for | 396 // class. For x86-32, this would comprise the 8 XMM registers. This is for |
| 397 // performance, not correctness. | 397 // performance, not correctness. |
| 398 static const unsigned MaxEquivalenceClassSize = 8; | 398 static const unsigned MaxEquivalenceClassSize = 8; |
| 399 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; | 399 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; |
| 400 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; | 400 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; |
| 401 EquivalenceClassMap EquivalenceClasses; | 401 EquivalenceClassMap EquivalenceClasses; |
| 402 SizeT NumShuffled = 0, NumPreserved = 0; | 402 SizeT NumShuffled = 0, NumPreserved = 0; |
| 403 | 403 |
| 404 // Build up the equivalence classes of registers by looking at the register | 404 // Build up the equivalence classes of registers by looking at the register |
| 405 // properties as well as whether the registers should be explicitly excluded | 405 // properties as well as whether the registers should be explicitly excluded |
| 406 // from shuffling. | 406 // from shuffling. |
| (...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 470 /// The number of different NOP instructions | 470 /// The number of different NOP instructions |
| 471 static const uint32_t X86_NUM_NOP_VARIANTS = 5; | 471 static const uint32_t X86_NUM_NOP_VARIANTS = 5; |
| 472 | 472 |
| 473 /// \name Limits for unrolling memory intrinsics. | 473 /// \name Limits for unrolling memory intrinsics. |
| 474 /// @{ | 474 /// @{ |
| 475 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; | 475 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; |
| 476 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; | 476 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; |
| 477 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; | 477 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; |
| 478 /// @} | 478 /// @} |
| 479 | 479 |
| 480 /// Value is in bytes. Return Value adjusted to the next highest multiple | 480 /// Value is in bytes. Return Value adjusted to the next highest multiple of |
| 481 /// of the stack alignment. | 481 /// the stack alignment. |
| 482 static uint32_t applyStackAlignment(uint32_t Value) { | 482 static uint32_t applyStackAlignment(uint32_t Value) { |
| 483 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 483 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
| 484 } | 484 } |
| 485 | 485 |
| 486 /// Return the type which the elements of the vector have in the X86 | 486 /// Return the type which the elements of the vector have in the X86 |
| 487 /// representation of the vector. | 487 /// representation of the vector. |
| 488 static Type getInVectorElementType(Type Ty) { | 488 static Type getInVectorElementType(Type Ty) { |
| 489 assert(isVectorType(Ty)); | 489 assert(isVectorType(Ty)); |
| 490 size_t Index = static_cast<size_t>(Ty); | 490 size_t Index = static_cast<size_t>(Ty); |
| 491 (void)Index; | 491 (void)Index; |
| 492 assert(Index < TableTypeX8632AttributesSize); | 492 assert(Index < TableTypeX8632AttributesSize); |
| 493 return TableTypeX8632Attributes[Ty].InVectorElementType; | 493 return TableTypeX8632Attributes[Ty].InVectorElementType; |
| 494 } | 494 } |
| 495 | 495 |
| 496 // Note: The following data structures are defined in | 496 // Note: The following data structures are defined in |
| 497 // IceTargetLoweringX8632.cpp. | 497 // IceTargetLoweringX8632.cpp. |
| 498 | 498 |
| 499 /// The following table summarizes the logic for lowering the fcmp | 499 /// The following table summarizes the logic for lowering the fcmp |
| 500 /// instruction. There is one table entry for each of the 16 conditions. | 500 /// instruction. There is one table entry for each of the 16 conditions. |
| 501 /// | 501 /// |
| 502 /// The first four columns describe the case when the operands are floating | 502 /// The first four columns describe the case when the operands are floating |
| 503 /// point scalar values. A comment in lowerFcmp() describes the lowering | 503 /// point scalar values. A comment in lowerFcmp() describes the lowering |
| 504 /// template. In the most general case, there is a compare followed by two | 504 /// template. In the most general case, there is a compare followed by two |
| 505 /// conditional branches, because some fcmp conditions don't map to a single | 505 /// conditional branches, because some fcmp conditions don't map to a single |
| 506 /// x86 conditional branch. However, in many cases it is possible to swap the | 506 /// x86 conditional branch. However, in many cases it is possible to swap the |
| 507 /// operands in the comparison and have a single conditional branch. Since | 507 /// operands in the comparison and have a single conditional branch. Since |
| 508 /// it's quite tedious to validate the table by hand, good execution tests are | 508 /// it's quite tedious to validate the table by hand, good execution tests are |
| 509 /// helpful. | 509 /// helpful. |
| 510 /// | 510 /// |
| 511 /// The last two columns describe the case when the operands are vectors of | 511 /// The last two columns describe the case when the operands are vectors of |
| 512 /// floating point values. For most fcmp conditions, there is a clear mapping | 512 /// floating point values. For most fcmp conditions, there is a clear mapping |
| 513 /// to a single x86 cmpps instruction variant. Some fcmp conditions require | 513 /// to a single x86 cmpps instruction variant. Some fcmp conditions require |
| 514 /// special code to handle and these are marked in the table with a | 514 /// special code to handle and these are marked in the table with a |
| 515 /// Cmpps_Invalid predicate. | 515 /// Cmpps_Invalid predicate. |
| 516 /// {@ | 516 /// {@ |
| 517 static const struct TableFcmpType { | 517 static const struct TableFcmpType { |
| 518 uint32_t Default; | 518 uint32_t Default; |
| 519 bool SwapScalarOperands; | 519 bool SwapScalarOperands; |
| 520 Cond::BrCond C1, C2; | 520 Cond::BrCond C1, C2; |
| 521 bool SwapVectorOperands; | 521 bool SwapVectorOperands; |
| 522 Cond::CmppsCond Predicate; | 522 Cond::CmppsCond Predicate; |
| 523 } TableFcmp[]; | 523 } TableFcmp[]; |
| 524 static const size_t TableFcmpSize; | 524 static const size_t TableFcmpSize; |
| 525 /// @} | 525 /// @} |
| 526 | 526 |
| 527 /// The following table summarizes the logic for lowering the icmp instruction | 527 /// The following table summarizes the logic for lowering the icmp instruction |
| 528 /// for i32 and narrower types. Each icmp condition has a clear mapping to an | 528 /// for i32 and narrower types. Each icmp condition has a clear mapping to an |
| 529 /// x86 conditional branch instruction. | 529 /// x86 conditional branch instruction. |
| 530 /// {@ | 530 /// {@ |
| 531 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; | 531 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; |
| 532 static const size_t TableIcmp32Size; | 532 static const size_t TableIcmp32Size; |
| 533 /// @} | 533 /// @} |
| 534 | 534 |
| 535 /// The following table summarizes the logic for lowering the icmp instruction | 535 /// The following table summarizes the logic for lowering the icmp instruction |
| 536 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | 536 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and |
| 537 /// conditional branches are needed. For the other conditions, three separate | 537 /// conditional branches are needed. For the other conditions, three separate |
| 538 /// conditional branches are needed. | 538 /// conditional branches are needed. |
| 539 /// {@ | 539 /// {@ |
| 540 static const struct TableIcmp64Type { | 540 static const struct TableIcmp64Type { |
| 541 Cond::BrCond C1, C2, C3; | 541 Cond::BrCond C1, C2, C3; |
| 542 } TableIcmp64[]; | 542 } TableIcmp64[]; |
| 543 static const size_t TableIcmp64Size; | 543 static const size_t TableIcmp64Size; |
| 544 /// @} | 544 /// @} |
| 545 | 545 |
| 546 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 546 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
| 547 size_t Index = static_cast<size_t>(Cond); | 547 size_t Index = static_cast<size_t>(Cond); |
| (...skipping 12 matching lines...) Expand all Loading... |
| 560 // \ \ \ \ \-. \ \___ \/_/\ \/ | 560 // \ \ \ \ \-. \ \___ \/_/\ \/ |
| 561 // \ \_\ \_\\"\_\/\_____\ \ \_\ | 561 // \ \_\ \_\\"\_\/\_____\ \ \_\ |
| 562 // \/_/\/_/ \/_/\/_____/ \/_/ | 562 // \/_/\/_/ \/_/\/_____/ \/_/ |
| 563 // | 563 // |
| 564 //---------------------------------------------------------------------------- | 564 //---------------------------------------------------------------------------- |
| 565 using Insts = ::Ice::X86Internal::Insts<TargetX8632>; | 565 using Insts = ::Ice::X86Internal::Insts<TargetX8632>; |
| 566 | 566 |
| 567 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>; | 567 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>; |
| 568 using Assembler = X8632::AssemblerX8632; | 568 using Assembler = X8632::AssemblerX8632; |
| 569 | 569 |
| 570 /// X86Operand extends the Operand hierarchy. Its subclasses are | 570 /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem |
| 571 /// X86OperandMem and VariableSplit. | 571 /// and VariableSplit. |
| 572 class X86Operand : public ::Ice::Operand { | 572 class X86Operand : public ::Ice::Operand { |
| 573 X86Operand() = delete; | 573 X86Operand() = delete; |
| 574 X86Operand(const X86Operand &) = delete; | 574 X86Operand(const X86Operand &) = delete; |
| 575 X86Operand &operator=(const X86Operand &) = delete; | 575 X86Operand &operator=(const X86Operand &) = delete; |
| 576 | 576 |
| 577 public: | 577 public: |
| 578 enum OperandKindX8632 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; | 578 enum OperandKindX8632 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; |
| 579 using ::Ice::Operand::dump; | 579 using ::Ice::Operand::dump; |
| 580 | 580 |
| 581 void dump(const Cfg *, Ostream &Str) const override; | 581 void dump(const Cfg *, Ostream &Str) const override; |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 637 Variable *Index; | 637 Variable *Index; |
| 638 uint16_t Shift; | 638 uint16_t Shift; |
| 639 SegmentRegisters SegmentReg : 16; | 639 SegmentRegisters SegmentReg : 16; |
| 640 /// A flag to show if this memory operand is a randomized one. Randomized | 640 /// A flag to show if this memory operand is a randomized one. Randomized |
| 641 /// memory operands are generated in | 641 /// memory operands are generated in |
| 642 /// TargetX86Base::randomizeOrPoolImmediate() | 642 /// TargetX86Base::randomizeOrPoolImmediate() |
| 643 bool Randomized; | 643 bool Randomized; |
| 644 }; | 644 }; |
| 645 | 645 |
| 646 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 | 646 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 |
| 647 /// locations (Low and High). This is needed for some cases of the Bitcast | 647 /// locations (Low and High). This is needed for some cases of the Bitcast |
| 648 /// instruction. Since it's not possible for integer registers to access the | 648 /// instruction. Since it's not possible for integer registers to access the |
| 649 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to | 649 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to |
| 650 /// the stack and then accesses through the VariableSplit. | 650 /// the stack and then accesses through the VariableSplit. |
| 651 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit | 651 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit |
| 652 // targets can natively handle these. | 652 // targets can natively handle these. |
| 653 class VariableSplit : public X86Operand { | 653 class VariableSplit : public X86Operand { |
| 654 VariableSplit() = delete; | 654 VariableSplit() = delete; |
| 655 VariableSplit(const VariableSplit &) = delete; | 655 VariableSplit(const VariableSplit &) = delete; |
| 656 VariableSplit &operator=(const VariableSplit &) = delete; | 656 VariableSplit &operator=(const VariableSplit &) = delete; |
| 657 | 657 |
| 658 public: | 658 public: |
| (...skipping 19 matching lines...) Expand all Loading... |
| 678 assert(Var->getType() == IceType_f64); | 678 assert(Var->getType() == IceType_f64); |
| 679 Vars = Func->allocateArrayOf<Variable *>(1); | 679 Vars = Func->allocateArrayOf<Variable *>(1); |
| 680 Vars[0] = Var; | 680 Vars[0] = Var; |
| 681 NumVars = 1; | 681 NumVars = 1; |
| 682 } | 682 } |
| 683 | 683 |
| 684 Variable *Var; | 684 Variable *Var; |
| 685 Portion Part; | 685 Portion Part; |
| 686 }; | 686 }; |
| 687 | 687 |
| 688 /// SpillVariable decorates a Variable by linking it to another Variable. | 688 /// SpillVariable decorates a Variable by linking it to another Variable. When |
| 689 /// When stack frame offsets are computed, the SpillVariable is given a | 689 /// stack frame offsets are computed, the SpillVariable is given a distinct |
| 690 /// distinct stack slot only if its linked Variable has a register. If the | 690 /// stack slot only if its linked Variable has a register. If the linked |
| 691 /// linked Variable has a stack slot, then the Variable and SpillVariable | 691 /// Variable has a stack slot, then the Variable and SpillVariable share that |
| 692 /// share that slot. | 692 /// slot. |
| 693 class SpillVariable : public Variable { | 693 class SpillVariable : public Variable { |
| 694 SpillVariable() = delete; | 694 SpillVariable() = delete; |
| 695 SpillVariable(const SpillVariable &) = delete; | 695 SpillVariable(const SpillVariable &) = delete; |
| 696 SpillVariable &operator=(const SpillVariable &) = delete; | 696 SpillVariable &operator=(const SpillVariable &) = delete; |
| 697 | 697 |
| 698 public: | 698 public: |
| 699 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { | 699 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { |
| 700 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); | 700 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); |
| 701 } | 701 } |
| 702 const static OperandKind SpillVariableKind = | 702 const static OperandKind SpillVariableKind = |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 741 | 741 |
| 742 } // end of namespace X86Internal | 742 } // end of namespace X86Internal |
| 743 | 743 |
| 744 namespace X8632 { | 744 namespace X8632 { |
| 745 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8632>; | 745 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8632>; |
| 746 } // end of namespace X8632 | 746 } // end of namespace X8632 |
| 747 | 747 |
| 748 } // end of namespace Ice | 748 } // end of namespace Ice |
| 749 | 749 |
| 750 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H | 750 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H |
| OLD | NEW |