OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632Traits.h - x86-32 traits -*- C++ -*-=// | 1 //===- subzero/src/IceTargetLoweringX8632Traits.h - x86-32 traits -*- C++ -*-=// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 372 matching lines...) Loading... |
383 | 383 |
384 return Registers; | 384 return Registers; |
385 } | 385 } |
386 | 386 |
387 static void | 387 static void |
388 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, | 388 makeRandomRegisterPermutation(GlobalContext *Ctx, Cfg *Func, |
389 llvm::SmallVectorImpl<int32_t> &Permutation, | 389 llvm::SmallVectorImpl<int32_t> &Permutation, |
390 const llvm::SmallBitVector &ExcludeRegisters, | 390 const llvm::SmallBitVector &ExcludeRegisters, |
391 uint64_t Salt) { | 391 uint64_t Salt) { |
392 // TODO(stichnot): Declaring Permutation this way loses type/size | 392 // TODO(stichnot): Declaring Permutation this way loses type/size |
393 // information. Fix this in conjunction with the caller-side TODO. | 393 // information. Fix this in conjunction with the caller-side TODO. |
394 assert(Permutation.size() >= RegisterSet::Reg_NUM); | 394 assert(Permutation.size() >= RegisterSet::Reg_NUM); |
395 // Expected upper bound on the number of registers in a single equivalence | 395 // Expected upper bound on the number of registers in a single equivalence |
396 // class. For x86-32, this would comprise the 8 XMM registers. This is for | 396 // class. For x86-32, this would comprise the 8 XMM registers. This is for |
397 // performance, not correctness. | 397 // performance, not correctness. |
398 static const unsigned MaxEquivalenceClassSize = 8; | 398 static const unsigned MaxEquivalenceClassSize = 8; |
399 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; | 399 using RegisterList = llvm::SmallVector<int32_t, MaxEquivalenceClassSize>; |
400 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; | 400 using EquivalenceClassMap = std::map<uint32_t, RegisterList>; |
401 EquivalenceClassMap EquivalenceClasses; | 401 EquivalenceClassMap EquivalenceClasses; |
402 SizeT NumShuffled = 0, NumPreserved = 0; | 402 SizeT NumShuffled = 0, NumPreserved = 0; |
403 | 403 |
404 // Build up the equivalence classes of registers by looking at the register | 404 // Build up the equivalence classes of registers by looking at the register |
405 // properties as well as whether the registers should be explicitly excluded | 405 // properties as well as whether the registers should be explicitly excluded |
406 // from shuffling. | 406 // from shuffling. |
(...skipping 63 matching lines...) Loading... |
470 /// The number of different NOP instructions | 470 /// The number of different NOP instructions |
471 static const uint32_t X86_NUM_NOP_VARIANTS = 5; | 471 static const uint32_t X86_NUM_NOP_VARIANTS = 5; |
472 | 472 |
473 /// \name Limits for unrolling memory intrinsics. | 473 /// \name Limits for unrolling memory intrinsics. |
474 /// @{ | 474 /// @{ |
475 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; | 475 static constexpr uint32_t MEMCPY_UNROLL_LIMIT = 8; |
476 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; | 476 static constexpr uint32_t MEMMOVE_UNROLL_LIMIT = 8; |
477 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; | 477 static constexpr uint32_t MEMSET_UNROLL_LIMIT = 16; |
478 /// @} | 478 /// @} |
479 | 479 |
480 /// Value is in bytes. Return Value adjusted to the next highest multiple | 480 /// Value is in bytes. Return Value adjusted to the next highest multiple of |
481 /// of the stack alignment. | 481 /// the stack alignment. |
482 static uint32_t applyStackAlignment(uint32_t Value) { | 482 static uint32_t applyStackAlignment(uint32_t Value) { |
483 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 483 return Utils::applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
484 } | 484 } |
485 | 485 |
486 /// Return the type which the elements of the vector have in the X86 | 486 /// Return the type which the elements of the vector have in the X86 |
487 /// representation of the vector. | 487 /// representation of the vector. |
488 static Type getInVectorElementType(Type Ty) { | 488 static Type getInVectorElementType(Type Ty) { |
489 assert(isVectorType(Ty)); | 489 assert(isVectorType(Ty)); |
490 size_t Index = static_cast<size_t>(Ty); | 490 size_t Index = static_cast<size_t>(Ty); |
491 (void)Index; | 491 (void)Index; |
492 assert(Index < TableTypeX8632AttributesSize); | 492 assert(Index < TableTypeX8632AttributesSize); |
493 return TableTypeX8632Attributes[Ty].InVectorElementType; | 493 return TableTypeX8632Attributes[Ty].InVectorElementType; |
494 } | 494 } |
495 | 495 |
496 // Note: The following data structures are defined in | 496 // Note: The following data structures are defined in |
497 // IceTargetLoweringX8632.cpp. | 497 // IceTargetLoweringX8632.cpp. |
498 | 498 |
499 /// The following table summarizes the logic for lowering the fcmp | 499 /// The following table summarizes the logic for lowering the fcmp |
500 /// instruction. There is one table entry for each of the 16 conditions. | 500 /// instruction. There is one table entry for each of the 16 conditions. |
501 /// | 501 /// |
502 /// The first four columns describe the case when the operands are floating | 502 /// The first four columns describe the case when the operands are floating |
503 /// point scalar values. A comment in lowerFcmp() describes the lowering | 503 /// point scalar values. A comment in lowerFcmp() describes the lowering |
504 /// template. In the most general case, there is a compare followed by two | 504 /// template. In the most general case, there is a compare followed by two |
505 /// conditional branches, because some fcmp conditions don't map to a single | 505 /// conditional branches, because some fcmp conditions don't map to a single |
506 /// x86 conditional branch. However, in many cases it is possible to swap the | 506 /// x86 conditional branch. However, in many cases it is possible to swap the |
507 /// operands in the comparison and have a single conditional branch. Since | 507 /// operands in the comparison and have a single conditional branch. Since |
508 /// it's quite tedious to validate the table by hand, good execution tests are | 508 /// it's quite tedious to validate the table by hand, good execution tests are |
509 /// helpful. | 509 /// helpful. |
510 /// | 510 /// |
511 /// The last two columns describe the case when the operands are vectors of | 511 /// The last two columns describe the case when the operands are vectors of |
512 /// floating point values. For most fcmp conditions, there is a clear mapping | 512 /// floating point values. For most fcmp conditions, there is a clear mapping |
513 /// to a single x86 cmpps instruction variant. Some fcmp conditions require | 513 /// to a single x86 cmpps instruction variant. Some fcmp conditions require |
514 /// special code to handle and these are marked in the table with a | 514 /// special code to handle and these are marked in the table with a |
515 /// Cmpps_Invalid predicate. | 515 /// Cmpps_Invalid predicate. |
516 /// {@ | 516 /// {@ |
517 static const struct TableFcmpType { | 517 static const struct TableFcmpType { |
518 uint32_t Default; | 518 uint32_t Default; |
519 bool SwapScalarOperands; | 519 bool SwapScalarOperands; |
520 Cond::BrCond C1, C2; | 520 Cond::BrCond C1, C2; |
521 bool SwapVectorOperands; | 521 bool SwapVectorOperands; |
522 Cond::CmppsCond Predicate; | 522 Cond::CmppsCond Predicate; |
523 } TableFcmp[]; | 523 } TableFcmp[]; |
524 static const size_t TableFcmpSize; | 524 static const size_t TableFcmpSize; |
525 /// @} | 525 /// @} |
526 | 526 |
527 /// The following table summarizes the logic for lowering the icmp instruction | 527 /// The following table summarizes the logic for lowering the icmp instruction |
528 /// for i32 and narrower types. Each icmp condition has a clear mapping to an | 528 /// for i32 and narrower types. Each icmp condition has a clear mapping to an |
529 /// x86 conditional branch instruction. | 529 /// x86 conditional branch instruction. |
530 /// {@ | 530 /// {@ |
531 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; | 531 static const struct TableIcmp32Type { Cond::BrCond Mapping; } TableIcmp32[]; |
532 static const size_t TableIcmp32Size; | 532 static const size_t TableIcmp32Size; |
533 /// @} | 533 /// @} |
534 | 534 |
535 /// The following table summarizes the logic for lowering the icmp instruction | 535 /// The following table summarizes the logic for lowering the icmp instruction |
536 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | 536 /// for the i64 type. For Eq and Ne, two separate 32-bit comparisons and |
537 /// conditional branches are needed. For the other conditions, three separate | 537 /// conditional branches are needed. For the other conditions, three separate |
538 /// conditional branches are needed. | 538 /// conditional branches are needed. |
539 /// {@ | 539 /// {@ |
540 static const struct TableIcmp64Type { | 540 static const struct TableIcmp64Type { |
541 Cond::BrCond C1, C2, C3; | 541 Cond::BrCond C1, C2, C3; |
542 } TableIcmp64[]; | 542 } TableIcmp64[]; |
543 static const size_t TableIcmp64Size; | 543 static const size_t TableIcmp64Size; |
544 /// @} | 544 /// @} |
545 | 545 |
546 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 546 static Cond::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
547 size_t Index = static_cast<size_t>(Cond); | 547 size_t Index = static_cast<size_t>(Cond); |
(...skipping 12 matching lines...) Loading... |
560 // \ \ \ \ \-. \ \___ \/_/\ \/ | 560 // \ \ \ \ \-. \ \___ \/_/\ \/ |
561 // \ \_\ \_\\"\_\/\_____\ \ \_\ | 561 // \ \_\ \_\\"\_\/\_____\ \ \_\ |
562 // \/_/\/_/ \/_/\/_____/ \/_/ | 562 // \/_/\/_/ \/_/\/_____/ \/_/ |
563 // | 563 // |
564 //---------------------------------------------------------------------------- | 564 //---------------------------------------------------------------------------- |
565 using Insts = ::Ice::X86Internal::Insts<TargetX8632>; | 565 using Insts = ::Ice::X86Internal::Insts<TargetX8632>; |
566 | 566 |
567 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>; | 567 using TargetLowering = ::Ice::X86Internal::TargetX86Base<TargetX8632>; |
568 using Assembler = X8632::AssemblerX8632; | 568 using Assembler = X8632::AssemblerX8632; |
569 | 569 |
570 /// X86Operand extends the Operand hierarchy. Its subclasses are | 570 /// X86Operand extends the Operand hierarchy. Its subclasses are X86OperandMem |
571 /// X86OperandMem and VariableSplit. | 571 /// and VariableSplit. |
572 class X86Operand : public ::Ice::Operand { | 572 class X86Operand : public ::Ice::Operand { |
573 X86Operand() = delete; | 573 X86Operand() = delete; |
574 X86Operand(const X86Operand &) = delete; | 574 X86Operand(const X86Operand &) = delete; |
575 X86Operand &operator=(const X86Operand &) = delete; | 575 X86Operand &operator=(const X86Operand &) = delete; |
576 | 576 |
577 public: | 577 public: |
578 enum OperandKindX8632 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; | 578 enum OperandKindX8632 { k__Start = ::Ice::Operand::kTarget, kMem, kSplit }; |
579 using ::Ice::Operand::dump; | 579 using ::Ice::Operand::dump; |
580 | 580 |
581 void dump(const Cfg *, Ostream &Str) const override; | 581 void dump(const Cfg *, Ostream &Str) const override; |
(...skipping 55 matching lines...) Loading... |
637 Variable *Index; | 637 Variable *Index; |
638 uint16_t Shift; | 638 uint16_t Shift; |
639 SegmentRegisters SegmentReg : 16; | 639 SegmentRegisters SegmentReg : 16; |
640 /// A flag to show if this memory operand is a randomized one. Randomized | 640 /// A flag to show if this memory operand is a randomized one. Randomized |
641 /// memory operands are generated in | 641 /// memory operands are generated in |
642 /// TargetX86Base::randomizeOrPoolImmediate() | 642 /// TargetX86Base::randomizeOrPoolImmediate() |
643 bool Randomized; | 643 bool Randomized; |
644 }; | 644 }; |
645 | 645 |
646 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 | 646 /// VariableSplit is a way to treat an f64 memory location as a pair of i32 |
647 /// locations (Low and High). This is needed for some cases of the Bitcast | 647 /// locations (Low and High). This is needed for some cases of the Bitcast |
648 /// instruction. Since it's not possible for integer registers to access the | 648 /// instruction. Since it's not possible for integer registers to access the |
649 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to | 649 /// XMM registers and vice versa, the lowering forces the f64 to be spilled to |
650 /// the stack and then accesses through the VariableSplit. | 650 /// the stack and then accesses through the VariableSplit. |
651 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit | 651 // TODO(jpp): remove references to VariableSplit from IceInstX86Base as 64bit |
652 // targets can natively handle these. | 652 // targets can natively handle these. |
653 class VariableSplit : public X86Operand { | 653 class VariableSplit : public X86Operand { |
654 VariableSplit() = delete; | 654 VariableSplit() = delete; |
655 VariableSplit(const VariableSplit &) = delete; | 655 VariableSplit(const VariableSplit &) = delete; |
656 VariableSplit &operator=(const VariableSplit &) = delete; | 656 VariableSplit &operator=(const VariableSplit &) = delete; |
657 | 657 |
658 public: | 658 public: |
(...skipping 19 matching lines...) Loading... |
678 assert(Var->getType() == IceType_f64); | 678 assert(Var->getType() == IceType_f64); |
679 Vars = Func->allocateArrayOf<Variable *>(1); | 679 Vars = Func->allocateArrayOf<Variable *>(1); |
680 Vars[0] = Var; | 680 Vars[0] = Var; |
681 NumVars = 1; | 681 NumVars = 1; |
682 } | 682 } |
683 | 683 |
684 Variable *Var; | 684 Variable *Var; |
685 Portion Part; | 685 Portion Part; |
686 }; | 686 }; |
687 | 687 |
688 /// SpillVariable decorates a Variable by linking it to another Variable. | 688 /// SpillVariable decorates a Variable by linking it to another Variable. When |
689 /// When stack frame offsets are computed, the SpillVariable is given a | 689 /// stack frame offsets are computed, the SpillVariable is given a distinct |
690 /// distinct stack slot only if its linked Variable has a register. If the | 690 /// stack slot only if its linked Variable has a register. If the linked |
691 /// linked Variable has a stack slot, then the Variable and SpillVariable | 691 /// Variable has a stack slot, then the Variable and SpillVariable share that |
692 /// share that slot. | 692 /// slot. |
693 class SpillVariable : public Variable { | 693 class SpillVariable : public Variable { |
694 SpillVariable() = delete; | 694 SpillVariable() = delete; |
695 SpillVariable(const SpillVariable &) = delete; | 695 SpillVariable(const SpillVariable &) = delete; |
696 SpillVariable &operator=(const SpillVariable &) = delete; | 696 SpillVariable &operator=(const SpillVariable &) = delete; |
697 | 697 |
698 public: | 698 public: |
699 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { | 699 static SpillVariable *create(Cfg *Func, Type Ty, SizeT Index) { |
700 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); | 700 return new (Func->allocate<SpillVariable>()) SpillVariable(Ty, Index); |
701 } | 701 } |
702 const static OperandKind SpillVariableKind = | 702 const static OperandKind SpillVariableKind = |
(...skipping 38 matching lines...) Loading... |
741 | 741 |
742 } // end of namespace X86Internal | 742 } // end of namespace X86Internal |
743 | 743 |
744 namespace X8632 { | 744 namespace X8632 { |
745 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8632>; | 745 using Traits = ::Ice::X86Internal::MachineTraits<TargetX8632>; |
746 } // end of namespace X8632 | 746 } // end of namespace X8632 |
747 | 747 |
748 } // end of namespace Ice | 748 } // end of namespace Ice |
749 | 749 |
750 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H | 750 #endif // SUBZERO_SRC_ICETARGETLOWERINGX8632TRAITS_H |
OLD | NEW |