Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(668)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 465413003: Subzero: Align spill locations to natural alignment. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Comments, round 1 Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements 12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible 13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target. 14 // register allocation for the "fast" target.
15 // 15 //
16 //===----------------------------------------------------------------------===// 16 //===----------------------------------------------------------------------===//
17 17
18 #include "IceDefs.h" 18 #include "IceDefs.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceInstX8632.h" 21 #include "IceInstX8632.h"
22 #include "IceOperand.h" 22 #include "IceOperand.h"
23 #include "IceTargetLoweringX8632.def" 23 #include "IceTargetLoweringX8632.def"
24 #include "IceTargetLoweringX8632.h" 24 #include "IceTargetLoweringX8632.h"
25 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/CommandLine.h"
26 26
27 #include <strings.h>
Jim Stichnoth 2014/08/14 18:21:19 Use MathExtras.h and llvm::findFirstSet(), since J
28
27 namespace Ice { 29 namespace Ice {
28 30
29 namespace { 31 namespace {
30 32
31 // The following table summarizes the logic for lowering the fcmp 33 // The following table summarizes the logic for lowering the fcmp
32 // instruction. There is one table entry for each of the 16 conditions. 34 // instruction. There is one table entry for each of the 16 conditions.
33 // 35 //
34 // The first four columns describe the case when the operands are 36 // The first four columns describe the case when the operands are
35 // floating point scalar values. A comment in lowerFcmp() describes the 37 // floating point scalar values. A comment in lowerFcmp() describes the
36 // lowering template. In the most general case, there is a compare 38 // lowering template. In the most general case, there is a compare
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 } 123 }
122 124
123 // The maximum number of arguments to pass in XMM registers 125 // The maximum number of arguments to pass in XMM registers
124 const uint32_t X86_MAX_XMM_ARGS = 4; 126 const uint32_t X86_MAX_XMM_ARGS = 4;
125 // The number of bits in a byte 127 // The number of bits in a byte
126 const uint32_t X86_CHAR_BIT = 8; 128 const uint32_t X86_CHAR_BIT = 8;
127 // Stack alignment 129 // Stack alignment
128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; 130 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
129 // Size of the return address on the stack 131 // Size of the return address on the stack
130 const uint32_t X86_RET_IP_SIZE_BYTES = 4; 132 const uint32_t X86_RET_IP_SIZE_BYTES = 4;
133 // The base 2 logarithm of the width in bytes of the largest supported type
134 const uint32_t X86_LOG2_OF_MAX_TYPE_SIZE = 4;
131 135
132 // Value is a size in bytes. Return Value adjusted to the next highest 136 // Value and Alignment are in bytes. Return Value adjusted to the next
133 // multiple of the stack alignment. 137 // highest multiple of Alignment.
138 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
139 // power of 2
140 assert((Alignment & (Alignment - 1)) == 0);
141 return (Value + Alignment - 1) & -Alignment;
142 }
143
144 // Value is in bytes. Return Value adjusted to the next highest multiple
145 // of the stack alignment.
134 uint32_t applyStackAlignment(uint32_t Value) { 146 uint32_t applyStackAlignment(uint32_t Value) {
135 // power of 2 147 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
138 } 148 }
139 149
140 // Instruction set options 150 // Instruction set options
141 namespace cl = ::llvm::cl; 151 namespace cl = ::llvm::cl;
142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( 152 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
143 "mattr", cl::desc("X86 target attributes"), 153 "mattr", cl::desc("X86 target attributes"),
144 cl::init(TargetX8632::SSE2), 154 cl::init(TargetX8632::SSE2),
145 cl::values( 155 cl::values(
146 clEnumValN(TargetX8632::SSE2, "sse2", 156 clEnumValN(TargetX8632::SSE2, "sse2",
147 "Enable SSE2 instructions (default)"), 157 "Enable SSE2 instructions (default)"),
(...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after
513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); 523 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);
514 RegisterArg->setRegNum(RegNum); 524 RegisterArg->setRegNum(RegNum);
515 RegisterArg->setIsArg(Func); 525 RegisterArg->setIsArg(Func);
516 Arg->setIsArg(Func, false); 526 Arg->setIsArg(Func, false);
517 527
518 Args[I] = RegisterArg; 528 Args[I] = RegisterArg;
519 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 529 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
520 } 530 }
521 } 531 }
522 532
533 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
534 const SizeT NumBuckets = X86_LOG2_OF_MAX_TYPE_SIZE + 1;
535 VarList Buckets[NumBuckets];
536
537 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;
538 ++I) {
539 Variable *Var = *I;
540 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
541 SizeT BucketIndex = ffs(NaturalAlignment) - 1;
542 assert(BucketIndex < NumBuckets);
543 Buckets[BucketIndex].push_back(Var);
544 }
545
546 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
547 VarList &List = Buckets[NumBuckets - I - 1];
548 Dest.insert(Dest.end(), List.begin(), List.end());
549 }
550 }
551
523 // Helper function for addProlog(). 552 // Helper function for addProlog().
524 // 553 //
525 // This assumes Arg is an argument passed on the stack. This sets the 554 // This assumes Arg is an argument passed on the stack. This sets the
526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 555 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
527 // width. For an I64 arg that has been split into Lo and Hi components, 556 // width. For an I64 arg that has been split into Lo and Hi components,
528 // it calls itself recursively on the components, taking care to handle 557 // it calls itself recursively on the components, taking care to handle
529 // Lo first because of the little-endian architecture. Lastly, this 558 // Lo first because of the little-endian architecture. Lastly, this
530 // function generates an instruction to copy Arg into its assigned 559 // function generates an instruction to copy Arg into its assigned
531 // register if applicable. 560 // register if applicable.
532 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 561 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
(...skipping 23 matching lines...) Expand all
556 _movp(Arg, Mem); 585 _movp(Arg, Mem);
557 } else { 586 } else {
558 _mov(Arg, Mem); 587 _mov(Arg, Mem);
559 } 588 }
560 } 589 }
561 } 590 }
562 591
563 Type TargetX8632::stackSlotType() { return IceType_i32; } 592 Type TargetX8632::stackSlotType() { return IceType_i32; }
564 593
565 void TargetX8632::addProlog(CfgNode *Node) { 594 void TargetX8632::addProlog(CfgNode *Node) {
595 // Stack frame layout:
596 //
597 // +------------------------+
598 // | 1. return address |
599 // +------------------------+
600 // | 2. preserved registers |
601 // +------------------------+
602 // | 3. padding |
603 // +------------------------+
604 // | 4. global spill area |
605 // +------------------------+
606 // | 5. padding |
607 // +------------------------+
608 // | 6. local spill area |
609 // +------------------------+
610 // | 7. padding |
611 // +------------------------+
612 // | 8. local variables |
613 // +------------------------+
614 //
615 // The following variables record the size in bytes of the given areas:
616 // * X86_RET_IP_SIZE_BYTES: area 1
617 // * PreservedRegsSizeBytes: area 2
618 // * SpillAreaPaddingBytes: area 3
619 // * GlobalsSize: area 4
620 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
621 // * LocalsSpillAreaSize: area 6
622 // * LocalsSizeBytes: areas 3 - 7
623
566 // If SimpleCoalescing is false, each variable without a register 624 // If SimpleCoalescing is false, each variable without a register
567 // gets its own unique stack slot, which leads to large stack 625 // gets its own unique stack slot, which leads to large stack
568 // frames. If SimpleCoalescing is true, then each "global" variable 626 // frames. If SimpleCoalescing is true, then each "global" variable
569 // without a register gets its own slot, but "local" variable slots 627 // without a register gets its own slot, but "local" variable slots
570 // are reused across basic blocks. E.g., if A and B are local to 628 // are reused across basic blocks. E.g., if A and B are local to
571 // block 1 and C is local to block 2, then C may share a slot with A 629 // block 1 and C is local to block 2, then C may share a slot with A
572 // or B. 630 // or B.
573 const bool SimpleCoalescing = true; 631 const bool SimpleCoalescing = true;
574 size_t InArgsSizeBytes = 0; 632 size_t InArgsSizeBytes = 0;
575 size_t PreservedRegsSizeBytes = 0; 633 size_t PreservedRegsSizeBytes = 0;
(...skipping 16 matching lines...) Expand all
592 getRegisterSet(RegSet_CalleeSave, RegSet_None); 650 getRegisterSet(RegSet_CalleeSave, RegSet_None);
593 651
594 size_t GlobalsSize = 0; 652 size_t GlobalsSize = 0;
595 std::vector<size_t> LocalsSize(Func->getNumNodes()); 653 std::vector<size_t> LocalsSize(Func->getNumNodes());
596 654
597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and 655 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
598 // LocalsSizeBytes. 656 // LocalsSizeBytes.
599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); 657 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
600 const VarList &Variables = Func->getVariables(); 658 const VarList &Variables = Func->getVariables();
601 const VarList &Args = Func->getArgs(); 659 const VarList &Args = Func->getArgs();
660 VarList SpilledVariables, SortedSpilledVariables,
661 VariablesLinkedToSpillSplots;
662
663 // If there is a separate locals area, this specifies the alignment
664 // for it.
665 uint32_t LocalsSlotsAlignmentBytes = 0;
666 // The entire spill locations area gets aligned to largest natural
667 // alignment of the variables that have a spill slot.
668 uint32_t SpillAreaAlignmentBytes = 0;
602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 669 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
603 I != E; ++I) { 670 I != E; ++I) {
604 Variable *Var = *I; 671 Variable *Var = *I;
605 if (Var->hasReg()) { 672 if (Var->hasReg()) {
606 RegsUsed[Var->getRegNum()] = true; 673 RegsUsed[Var->getRegNum()] = true;
607 continue; 674 continue;
608 } 675 }
609 // An argument either does not need a stack slot (if passed in a 676 // An argument either does not need a stack slot (if passed in a
610 // register) or already has one (if passed on the stack). 677 // register) or already has one (if passed on the stack).
611 if (Var->getIsArg()) 678 if (Var->getIsArg())
612 continue; 679 continue;
613 // An unreferenced variable doesn't need a stack slot. 680 // An unreferenced variable doesn't need a stack slot.
614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) 681 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
615 continue; 682 continue;
616 // A spill slot linked to a variable with a stack slot should reuse 683 // A spill slot linked to a variable with a stack slot should reuse
617 // that stack slot. 684 // that stack slot.
618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { 685 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
619 if (Variable *Linked = Var->getPreferredRegister()) { 686 if (Variable *Linked = Var->getPreferredRegister()) {
620 if (!Linked->hasReg()) 687 if (!Linked->hasReg()) {
688 VariablesLinkedToSpillSplots.push_back(Var);
621 continue; 689 continue;
690 }
622 } 691 }
623 } 692 }
693 SpilledVariables.push_back(Var);
694 }
695
696 sortByAlignment(SortedSpilledVariables, SpilledVariables);
697 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
698 E = SortedSpilledVariables.end();
699 I != E; ++I) {
700 Variable *Var = *I;
624 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 701 size_t Increment = typeWidthInBytesOnStack(Var->getType());
702 if (!SpillAreaAlignmentBytes)
703 SpillAreaAlignmentBytes = Increment;
625 if (SimpleCoalescing) { 704 if (SimpleCoalescing) {
626 if (Var->isMultiblockLife()) { 705 if (Var->isMultiblockLife()) {
627 GlobalsSize += Increment; 706 GlobalsSize += Increment;
628 } else { 707 } else {
629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 708 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
630 LocalsSize[NodeIndex] += Increment; 709 LocalsSize[NodeIndex] += Increment;
631 if (LocalsSize[NodeIndex] > LocalsSizeBytes) 710 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
632 LocalsSizeBytes = LocalsSize[NodeIndex]; 711 LocalsSizeBytes = LocalsSize[NodeIndex];
712 if (!LocalsSlotsAlignmentBytes)
713 LocalsSlotsAlignmentBytes = Increment;
633 } 714 }
634 } else { 715 } else {
635 LocalsSizeBytes += Increment; 716 LocalsSizeBytes += Increment;
636 } 717 }
637 } 718 }
719 uint32_t LocalsSpillAreaSize = LocalsSizeBytes;
720
638 LocalsSizeBytes += GlobalsSize; 721 LocalsSizeBytes += GlobalsSize;
639 722
640 // Add push instructions for preserved registers. 723 // Add push instructions for preserved registers.
641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 724 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
642 if (CalleeSaves[i] && RegsUsed[i]) { 725 if (CalleeSaves[i] && RegsUsed[i]) {
643 PreservedRegsSizeBytes += 4; 726 PreservedRegsSizeBytes += 4;
644 const bool SuppressStackAdjustment = true; 727 const bool SuppressStackAdjustment = true;
645 _push(getPhysicalRegister(i), SuppressStackAdjustment); 728 _push(getPhysicalRegister(i), SuppressStackAdjustment);
646 } 729 }
647 } 730 }
648 731
649 // Generate "push ebp; mov ebp, esp" 732 // Generate "push ebp; mov ebp, esp"
650 if (IsEbpBasedFrame) { 733 if (IsEbpBasedFrame) {
651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) 734 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
652 .count() == 0); 735 .count() == 0);
653 PreservedRegsSizeBytes += 4; 736 PreservedRegsSizeBytes += 4;
654 Variable *ebp = getPhysicalRegister(Reg_ebp); 737 Variable *ebp = getPhysicalRegister(Reg_ebp);
655 Variable *esp = getPhysicalRegister(Reg_esp); 738 Variable *esp = getPhysicalRegister(Reg_esp);
656 const bool SuppressStackAdjustment = true; 739 const bool SuppressStackAdjustment = true;
657 _push(ebp, SuppressStackAdjustment); 740 _push(ebp, SuppressStackAdjustment);
658 _mov(ebp, esp); 741 _mov(ebp, esp);
659 } 742 }
660 743
744 // Align the variables area. SpillAreaPaddingBytes is the size of
745 // the region after the preserved registers and before the spill
746 // areas.
747 uint32_t SpillAreaPaddingBytes = 0;
748 if (SpillAreaAlignmentBytes) {
749 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
750 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
751 uint32_t SpillAreaStart =
752 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
753 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
754 LocalsSizeBytes += SpillAreaPaddingBytes;
755 }
756
757 // If there are separate globals and locals areas, make sure the
758 // locals area is aligned by padding the end of the globals area.
759 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
760 if (LocalsSlotsAlignmentBytes) {
761 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
762 GlobalsAndSubsequentPaddingSize =
763 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
764 LocalsSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
765 }
766
767 // Align esp if necessary.
661 if (NeedsStackAlignment) { 768 if (NeedsStackAlignment) {
662 uint32_t StackSize = applyStackAlignment( 769 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); 770 uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes);
664 LocalsSizeBytes = 771 LocalsSizeBytes = StackSize - StackOffset;
665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
666 } 772 }
667 773
668 // Generate "sub esp, LocalsSizeBytes" 774 // Generate "sub esp, LocalsSizeBytes"
669 if (LocalsSizeBytes) 775 if (LocalsSizeBytes)
670 _sub(getPhysicalRegister(Reg_esp), 776 _sub(getPhysicalRegister(Reg_esp),
671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); 777 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
672 778
673 resetStackAdjustment(); 779 resetStackAdjustment();
674 780
675 // Fill in stack offsets for stack args, and copy args into registers 781 // Fill in stack offsets for stack args, and copy args into registers
676 // for those that were register-allocated. Args are pushed right to 782 // for those that were register-allocated. Args are pushed right to
677 // left, so Arg[0] is closest to the stack/frame pointer. 783 // left, so Arg[0] is closest to the stack/frame pointer.
678 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 784 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
679 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 785 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
680 if (!IsEbpBasedFrame) 786 if (!IsEbpBasedFrame)
681 BasicFrameOffset += LocalsSizeBytes; 787 BasicFrameOffset += LocalsSizeBytes;
682 788
683 unsigned NumXmmArgs = 0; 789 unsigned NumXmmArgs = 0;
684 for (SizeT i = 0; i < Args.size(); ++i) { 790 for (SizeT i = 0; i < Args.size(); ++i) {
685 Variable *Arg = Args[i]; 791 Variable *Arg = Args[i];
686 // Skip arguments passed in registers. 792 // Skip arguments passed in registers.
687 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { 793 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
688 ++NumXmmArgs; 794 ++NumXmmArgs;
689 continue; 795 continue;
690 } 796 }
691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 797 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
692 } 798 }
693 799
694 // Fill in stack offsets for locals. 800 // Fill in stack offsets for locals.
695 size_t TotalGlobalsSize = GlobalsSize; 801 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
696 GlobalsSize = 0;
697 LocalsSize.assign(LocalsSize.size(), 0); 802 LocalsSize.assign(LocalsSize.size(), 0);
698 size_t NextStackOffset = 0; 803 size_t NextStackOffset = GlobalsSpaceUsed;
699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 804 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
805 E = SortedSpilledVariables.end();
700 I != E; ++I) { 806 I != E; ++I) {
701 Variable *Var = *I; 807 Variable *Var = *I;
702 if (Var->hasReg()) {
703 RegsUsed[Var->getRegNum()] = true;
704 continue;
705 }
706 if (Var->getIsArg())
707 continue;
708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
709 continue;
710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
711 if (Variable *Linked = Var->getPreferredRegister()) {
712 if (!Linked->hasReg()) {
713 // TODO: Make sure Linked has already been assigned a stack
714 // slot.
715 Var->setStackOffset(Linked->getStackOffset());
716 continue;
717 }
718 }
719 }
720 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 808 size_t Increment = typeWidthInBytesOnStack(Var->getType());
721 if (SimpleCoalescing) { 809 if (SimpleCoalescing) {
722 if (Var->isMultiblockLife()) { 810 if (Var->isMultiblockLife()) {
723 GlobalsSize += Increment; 811 GlobalsSpaceUsed += Increment;
724 NextStackOffset = GlobalsSize; 812 NextStackOffset = GlobalsSpaceUsed;
725 } else { 813 } else {
726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 814 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
727 LocalsSize[NodeIndex] += Increment; 815 LocalsSize[NodeIndex] += Increment;
728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; 816 NextStackOffset = SpillAreaPaddingBytes +
817 GlobalsAndSubsequentPaddingSize +
818 LocalsSize[NodeIndex];
729 } 819 }
730 } else { 820 } else {
731 NextStackOffset += Increment; 821 NextStackOffset += Increment;
732 } 822 }
733 if (IsEbpBasedFrame) 823 if (IsEbpBasedFrame)
734 Var->setStackOffset(-NextStackOffset); 824 Var->setStackOffset(-NextStackOffset);
735 else 825 else
736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); 826 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
737 } 827 }
738 this->FrameSizeLocals = NextStackOffset; 828 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
739 this->HasComputedFrame = true; 829 this->HasComputedFrame = true;
740 830
831 // Assign stack offsets to variables that have been linked to spilled
832 // variables.
833 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),
834 E = VariablesLinkedToSpillSplots.end();
835 I != E; ++I) {
836 Variable *Var = *I;
837 Variable *Linked = Var->getPreferredRegister();
838 Var->setStackOffset(Linked->getStackOffset());
839 }
840
741 if (Func->getContext()->isVerbose(IceV_Frame)) { 841 if (Func->getContext()->isVerbose(IceV_Frame)) {
742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes 842 Ostream &Str = Func->getContext()->getStrDump();
743 << "\n" 843
744 << "InArgsSizeBytes=" << InArgsSizeBytes 844 Str << "Stack layout:\n";
745 << "\n" 845 uint32_t EspAdjustmentPaddingSize =
746 << "PreservedRegsSizeBytes=" 846 LocalsSizeBytes - GlobalsAndSubsequentPaddingSize -
747 << PreservedRegsSizeBytes << "\n"; 847 LocalsSpillAreaSize - PreservedRegsSizeBytes - X86_RET_IP_SIZE_BYTES;
848 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
849 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
850 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
851 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
852 << " globals spill area = " << GlobalsSize << " bytes\n"
853 << " globals-locals spill areas intermediate padding = "
854 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
855 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
856 << " esp alignment padding = " << EspAdjustmentPaddingSize
857 << " bytes\n";
858
859 Str << "Stack details:\n"
860 << " esp adjustment = " << LocalsSizeBytes << " bytes\n"
861 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
862 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
863 << " bytes\n"
864 << " is ebp based = " << IsEbpBasedFrame << "\n";
748 } 865 }
749 } 866 }
750 867
751 void TargetX8632::addEpilog(CfgNode *Node) { 868 void TargetX8632::addEpilog(CfgNode *Node) {
752 InstList &Insts = Node->getInsts(); 869 InstList &Insts = Node->getInsts();
753 InstList::reverse_iterator RI, E; 870 InstList::reverse_iterator RI, E;
754 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 871 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
755 if (llvm::isa<InstX8632Ret>(*RI)) 872 if (llvm::isa<InstX8632Ret>(*RI))
756 break; 873 break;
757 } 874 }
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
984 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1101 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1102 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
986 1103
987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1104 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1105 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); 1106 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
990 } 1107 }
991 if (ConstantInteger *ConstantTotalSize = 1108 if (ConstantInteger *ConstantTotalSize =
992 llvm::dyn_cast<ConstantInteger>(TotalSize)) { 1109 llvm::dyn_cast<ConstantInteger>(TotalSize)) {
993 uint32_t Value = ConstantTotalSize->getValue(); 1110 uint32_t Value = ConstantTotalSize->getValue();
994 // Round Value up to the next highest multiple of the alignment. 1111 Value = applyAlignment(Value, Alignment);
995 Value = (Value + Alignment - 1) & -Alignment;
996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); 1112 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
997 } else { 1113 } else {
998 // Non-constant sizes need to be adjusted to the next highest 1114 // Non-constant sizes need to be adjusted to the next highest
999 // multiple of the required alignment at runtime. 1115 // multiple of the required alignment at runtime.
1000 Variable *T = makeReg(IceType_i32); 1116 Variable *T = makeReg(IceType_i32);
1001 _mov(T, TotalSize); 1117 _mov(T, TotalSize);
1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); 1118 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));
1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); 1119 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));
1004 _sub(esp, T); 1120 _sub(esp, T);
1005 } 1121 }
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
1232 case InstArithmetic::Fsub: 1348 case InstArithmetic::Fsub:
1233 case InstArithmetic::Fmul: 1349 case InstArithmetic::Fmul:
1234 case InstArithmetic::Fdiv: 1350 case InstArithmetic::Fdiv:
1235 case InstArithmetic::Frem: 1351 case InstArithmetic::Frem:
1236 llvm_unreachable("FP instruction with i64 type"); 1352 llvm_unreachable("FP instruction with i64 type");
1237 break; 1353 break;
1238 } 1354 }
1239 } else if (isVectorType(Dest->getType())) { 1355 } else if (isVectorType(Dest->getType())) {
1240 // TODO: Trap on integer divide and integer modulo by zero. 1356 // TODO: Trap on integer divide and integer modulo by zero.
1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1357 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1242 //
1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1244 // registers. This is a workaround of the fact that there is no
1245 // support for aligning stack operands. Once there is support,
1246 // remove LEGAL_HACK.
1247 #define LEGAL_HACK(s) legalizeToVar((s))
1248 switch (Inst->getOp()) { 1358 switch (Inst->getOp()) {
1249 case InstArithmetic::_num: 1359 case InstArithmetic::_num:
1250 llvm_unreachable("Unknown arithmetic operator"); 1360 llvm_unreachable("Unknown arithmetic operator");
1251 break; 1361 break;
1252 case InstArithmetic::Add: { 1362 case InstArithmetic::Add: {
1253 Variable *T = makeReg(Dest->getType()); 1363 Variable *T = makeReg(Dest->getType());
1254 _movp(T, Src0); 1364 _movp(T, Src0);
1255 _padd(T, LEGAL_HACK(Src1)); 1365 _padd(T, Src1);
1256 _movp(Dest, T); 1366 _movp(Dest, T);
1257 } break; 1367 } break;
1258 case InstArithmetic::And: { 1368 case InstArithmetic::And: {
1259 Variable *T = makeReg(Dest->getType()); 1369 Variable *T = makeReg(Dest->getType());
1260 _movp(T, Src0); 1370 _movp(T, Src0);
1261 _pand(T, LEGAL_HACK(Src1)); 1371 _pand(T, Src1);
1262 _movp(Dest, T); 1372 _movp(Dest, T);
1263 } break; 1373 } break;
1264 case InstArithmetic::Or: { 1374 case InstArithmetic::Or: {
1265 Variable *T = makeReg(Dest->getType()); 1375 Variable *T = makeReg(Dest->getType());
1266 _movp(T, Src0); 1376 _movp(T, Src0);
1267 _por(T, LEGAL_HACK(Src1)); 1377 _por(T, Src1);
1268 _movp(Dest, T); 1378 _movp(Dest, T);
1269 } break; 1379 } break;
1270 case InstArithmetic::Xor: { 1380 case InstArithmetic::Xor: {
1271 Variable *T = makeReg(Dest->getType()); 1381 Variable *T = makeReg(Dest->getType());
1272 _movp(T, Src0); 1382 _movp(T, Src0);
1273 _pxor(T, LEGAL_HACK(Src1)); 1383 _pxor(T, Src1);
1274 _movp(Dest, T); 1384 _movp(Dest, T);
1275 } break; 1385 } break;
1276 case InstArithmetic::Sub: { 1386 case InstArithmetic::Sub: {
1277 Variable *T = makeReg(Dest->getType()); 1387 Variable *T = makeReg(Dest->getType());
1278 _movp(T, Src0); 1388 _movp(T, Src0);
1279 _psub(T, LEGAL_HACK(Src1)); 1389 _psub(T, Src1);
1280 _movp(Dest, T); 1390 _movp(Dest, T);
1281 } break; 1391 } break;
1282 case InstArithmetic::Mul: { 1392 case InstArithmetic::Mul: {
1283 bool TypesAreValidForPmull = 1393 bool TypesAreValidForPmull =
1284 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1394 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1285 bool InstructionSetIsValidForPmull = 1395 bool InstructionSetIsValidForPmull =
1286 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; 1396 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1397 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1288 Variable *T = makeReg(Dest->getType()); 1398 Variable *T = makeReg(Dest->getType());
1289 _movp(T, Src0); 1399 _movp(T, Src0);
1290 _pmull(T, LEGAL_HACK(Src1)); 1400 _pmull(T, Src1);
1291 _movp(Dest, T); 1401 _movp(Dest, T);
1292 } else if (Dest->getType() == IceType_v4i32) { 1402 } else if (Dest->getType() == IceType_v4i32) {
1293 // Lowering sequence: 1403 // Lowering sequence:
1294 // Note: The mask arguments have index 0 on the left. 1404 // Note: The mask arguments have index 0 on the left.
1295 // 1405 //
1296 // movups T1, Src0 1406 // movups T1, Src0
1297 // pshufd T2, Src0, {1,0,3,0} 1407 // pshufd T2, Src0, {1,0,3,0}
1298 // pshufd T3, Src1, {1,0,3,0} 1408 // pshufd T3, Src1, {1,0,3,0}
1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} 1409 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1300 // pmuludq T1, Src1 1410 // pmuludq T1, Src1
(...skipping 12 matching lines...) Expand all
1313 // Dest[0, 2], Src[0, 2] 1423 // Dest[0, 2], Src[0, 2]
1314 const unsigned Mask0202 = 0x88; 1424 const unsigned Mask0202 = 0x88;
1315 // Mask that directs pshufd to create a vector with entries 1425 // Mask that directs pshufd to create a vector with entries
1316 // Src[0, 2, 1, 3] 1426 // Src[0, 2, 1, 3]
1317 const unsigned Mask0213 = 0xd8; 1427 const unsigned Mask0213 = 0xd8;
1318 Variable *T1 = makeReg(IceType_v4i32); 1428 Variable *T1 = makeReg(IceType_v4i32);
1319 Variable *T2 = makeReg(IceType_v4i32); 1429 Variable *T2 = makeReg(IceType_v4i32);
1320 Variable *T3 = makeReg(IceType_v4i32); 1430 Variable *T3 = makeReg(IceType_v4i32);
1321 Variable *T4 = makeReg(IceType_v4i32); 1431 Variable *T4 = makeReg(IceType_v4i32);
1322 _movp(T1, Src0); 1432 _movp(T1, Src0);
1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R 1433 _pshufd(T2, Src0, Mask1030);
1324 // with Src1 after stack operand alignment support is 1434 _pshufd(T3, Src1, Mask1030);
1325 // implemented. 1435 _pmuludq(T1, Src1);
1326 Variable *Src0R = LEGAL_HACK(Src0);
1327 Variable *Src1R = LEGAL_HACK(Src1);
1328 _pshufd(T2, Src0R, Mask1030);
1329 _pshufd(T3, Src1R, Mask1030);
1330 _pmuludq(T1, Src1R);
1331 _pmuludq(T2, T3); 1436 _pmuludq(T2, T3);
1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); 1437 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); 1438 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1334 _movp(Dest, T4); 1439 _movp(Dest, T4);
1335 } else { 1440 } else {
1336 assert(Dest->getType() == IceType_v16i8); 1441 assert(Dest->getType() == IceType_v16i8);
1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1442 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1338 } 1443 }
1339 } break; 1444 } break;
1340 case InstArithmetic::Shl: 1445 case InstArithmetic::Shl:
1341 case InstArithmetic::Lshr: 1446 case InstArithmetic::Lshr:
1342 case InstArithmetic::Ashr: 1447 case InstArithmetic::Ashr:
1343 case InstArithmetic::Udiv: 1448 case InstArithmetic::Udiv:
1344 case InstArithmetic::Urem: 1449 case InstArithmetic::Urem:
1345 case InstArithmetic::Sdiv: 1450 case InstArithmetic::Sdiv:
1346 case InstArithmetic::Srem: 1451 case InstArithmetic::Srem:
1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1452 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1348 break; 1453 break;
1349 case InstArithmetic::Fadd: { 1454 case InstArithmetic::Fadd: {
1350 Variable *T = makeReg(Dest->getType()); 1455 Variable *T = makeReg(Dest->getType());
1351 _movp(T, Src0); 1456 _movp(T, Src0);
1352 _addps(T, LEGAL_HACK(Src1)); 1457 _addps(T, Src1);
1353 _movp(Dest, T); 1458 _movp(Dest, T);
1354 } break; 1459 } break;
1355 case InstArithmetic::Fsub: { 1460 case InstArithmetic::Fsub: {
1356 Variable *T = makeReg(Dest->getType()); 1461 Variable *T = makeReg(Dest->getType());
1357 _movp(T, Src0); 1462 _movp(T, Src0);
1358 _subps(T, LEGAL_HACK(Src1)); 1463 _subps(T, Src1);
1359 _movp(Dest, T); 1464 _movp(Dest, T);
1360 } break; 1465 } break;
1361 case InstArithmetic::Fmul: { 1466 case InstArithmetic::Fmul: {
1362 Variable *T = makeReg(Dest->getType()); 1467 Variable *T = makeReg(Dest->getType());
1363 _movp(T, Src0); 1468 _movp(T, Src0);
1364 _mulps(T, LEGAL_HACK(Src1)); 1469 _mulps(T, Src1);
1365 _movp(Dest, T); 1470 _movp(Dest, T);
1366 } break; 1471 } break;
1367 case InstArithmetic::Fdiv: { 1472 case InstArithmetic::Fdiv: {
1368 Variable *T = makeReg(Dest->getType()); 1473 Variable *T = makeReg(Dest->getType());
1369 _movp(T, Src0); 1474 _movp(T, Src0);
1370 _divps(T, LEGAL_HACK(Src1)); 1475 _divps(T, Src1);
1371 _movp(Dest, T); 1476 _movp(Dest, T);
1372 } break; 1477 } break;
1373 case InstArithmetic::Frem: 1478 case InstArithmetic::Frem:
1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1479 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1375 break; 1480 break;
1376 } 1481 }
1377 #undef LEGAL_HACK
1378 } else { // Dest->getType() is non-i64 scalar 1482 } else { // Dest->getType() is non-i64 scalar
1379 Variable *T_edx = NULL; 1483 Variable *T_edx = NULL;
1380 Variable *T = NULL; 1484 Variable *T = NULL;
1381 switch (Inst->getOp()) { 1485 switch (Inst->getOp()) {
1382 case InstArithmetic::_num: 1486 case InstArithmetic::_num:
1383 llvm_unreachable("Unknown arithmetic operator"); 1487 llvm_unreachable("Unknown arithmetic operator");
1384 break; 1488 break;
1385 case InstArithmetic::Add: 1489 case InstArithmetic::Add:
1386 _mov(T, Src0); 1490 _mov(T, Src0);
1387 _add(T, Src1); 1491 _add(T, Src1);
(...skipping 804 matching lines...) Expand 10 before | Expand all | Expand 10 after
2192 // TODO(wala): Determine the best lowering sequences for each type. 2296 // TODO(wala): Determine the best lowering sequences for each type.
2193 bool CanUsePextr = 2297 bool CanUsePextr =
2194 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2298 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2195 if (CanUsePextr && Ty != IceType_v4f32) { 2299 if (CanUsePextr && Ty != IceType_v4f32) {
2196 // Use pextrb, pextrw, or pextrd. 2300 // Use pextrb, pextrw, or pextrd.
2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2301 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2302 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2199 _pextr(ExtractedElementR, SourceVectR, Mask); 2303 _pextr(ExtractedElementR, SourceVectR, Mask);
2200 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2304 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2201 // Use pshufd and movd/movss. 2305 // Use pshufd and movd/movss.
2202 //
2203 // ALIGNHACK: Force vector operands to registers in instructions
2204 // that require aligned memory operands until support for data
2205 // alignment is implemented.
2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2207 Operand *SourceVectRM =
2208 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2209 Variable *T = NULL; 2306 Variable *T = NULL;
2210 if (Index) { 2307 if (Index) {
2211 // The shuffle only needs to occur if the element to be extracted 2308 // The shuffle only needs to occur if the element to be extracted
2212 // is not at the lowest index. 2309 // is not at the lowest index.
2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2310 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2214 T = makeReg(Ty); 2311 T = makeReg(Ty);
2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); 2312 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2216 } else { 2313 } else {
2217 T = ALIGN_HACK(SourceVectRM); 2314 T = legalizeToVar(SourceVectNotLegalized);
2218 } 2315 }
2219 2316
2220 if (InVectorElementTy == IceType_i32) { 2317 if (InVectorElementTy == IceType_i32) {
2221 _movd(ExtractedElementR, T); 2318 _movd(ExtractedElementR, T);
2222 } else { // Ty == Icetype_f32 2319 } else { // Ty == Icetype_f32
2223 // TODO(wala): _movss is only used here because _mov does not 2320 // TODO(wala): _movss is only used here because _mov does not
2224 // allow a vector source and a scalar destination. _mov should be 2321 // allow a vector source and a scalar destination. _mov should be
2225 // able to be used here. 2322 // able to be used here.
2226 // _movss is a binary instruction, so the FakeDef is needed to 2323 // _movss is a binary instruction, so the FakeDef is needed to
2227 // keep the live range analysis consistent. 2324 // keep the live range analysis consistent.
2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); 2325 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2229 _movss(ExtractedElementR, T); 2326 _movss(ExtractedElementR, T);
2230 } 2327 }
2231 #undef ALIGN_HACK
2232 } else { 2328 } else {
2233 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2329 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2234 // Spill the value to a stack slot and do the extraction in memory. 2330 // Spill the value to a stack slot and do the extraction in memory.
2235 // 2331 //
2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2332 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2237 // support for legalizing to mem is implemented. 2333 // support for legalizing to mem is implemented.
2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2334 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2239 Slot->setWeight(RegWeight::Zero); 2335 Slot->setWeight(RegWeight::Zero);
2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2336 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
2241 2337
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
2280 2376
2281 if (Condition == InstFcmp::True) { 2377 if (Condition == InstFcmp::True) {
2282 // makeVectorOfOnes() requires an integer vector type. 2378 // makeVectorOfOnes() requires an integer vector type.
2283 T = makeVectorOfMinusOnes(IceType_v4i32); 2379 T = makeVectorOfMinusOnes(IceType_v4i32);
2284 } else if (Condition == InstFcmp::False) { 2380 } else if (Condition == InstFcmp::False) {
2285 T = makeVectorOfZeros(Dest->getType()); 2381 T = makeVectorOfZeros(Dest->getType());
2286 } else { 2382 } else {
2287 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2383 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2288 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2384 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2289 2385
2290 // ALIGNHACK: Without support for data alignment, both operands to
2291 // cmpps need to be forced into registers. Once support for data
2292 // alignment is implemented, remove LEGAL_HACK.
2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2294 switch (Condition) { 2386 switch (Condition) {
2295 default: { 2387 default: {
2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; 2388 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); 2389 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2298 T = makeReg(Src0RM->getType()); 2390 T = makeReg(Src0RM->getType());
2299 _movp(T, Src0RM); 2391 _movp(T, Src0RM);
2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); 2392 _cmpps(T, Src1RM, Predicate);
2301 } break; 2393 } break;
2302 case InstFcmp::One: { 2394 case InstFcmp::One: {
2303 // Check both unequal and ordered. 2395 // Check both unequal and ordered.
2304 T = makeReg(Src0RM->getType()); 2396 T = makeReg(Src0RM->getType());
2305 Variable *T2 = makeReg(Src0RM->getType()); 2397 Variable *T2 = makeReg(Src0RM->getType());
2306 Src1RM = LEGAL_HACK(Src1RM);
2307 _movp(T, Src0RM); 2398 _movp(T, Src0RM);
2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); 2399 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);
2309 _movp(T2, Src0RM); 2400 _movp(T2, Src0RM);
2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); 2401 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);
2311 _pand(T, T2); 2402 _pand(T, T2);
2312 } break; 2403 } break;
2313 case InstFcmp::Ueq: { 2404 case InstFcmp::Ueq: {
2314 // Check both equal or unordered. 2405 // Check both equal or unordered.
2315 T = makeReg(Src0RM->getType()); 2406 T = makeReg(Src0RM->getType());
2316 Variable *T2 = makeReg(Src0RM->getType()); 2407 Variable *T2 = makeReg(Src0RM->getType());
2317 Src1RM = LEGAL_HACK(Src1RM);
2318 _movp(T, Src0RM); 2408 _movp(T, Src0RM);
2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); 2409 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);
2320 _movp(T2, Src0RM); 2410 _movp(T2, Src0RM);
2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); 2411 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);
2322 _por(T, T2); 2412 _por(T, T2);
2323 } break; 2413 } break;
2324 } 2414 }
2325 #undef LEGAL_HACK
2326 } 2415 }
2327 2416
2328 _movp(Dest, T); 2417 _movp(Dest, T);
2329 eliminateNextVectorSextInstruction(Dest); 2418 eliminateNextVectorSextInstruction(Dest);
2330 return; 2419 return;
2331 } 2420 }
2332 2421
2333 // Lowering a = fcmp cond, b, c 2422 // Lowering a = fcmp cond, b, c
2334 // ucomiss b, c /* only if C1 != Br_None */ 2423 // ucomiss b, c /* only if C1 != Br_None */
2335 // /* but swap b,c order if SwapOperands==true */ 2424 // /* but swap b,c order if SwapOperands==true */
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
2420 Variable *T1 = makeReg(Ty); 2509 Variable *T1 = makeReg(Ty);
2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); 2510 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2422 _movp(T0, Src0RM); 2511 _movp(T0, Src0RM);
2423 _pxor(T0, HighOrderBits); 2512 _pxor(T0, HighOrderBits);
2424 _movp(T1, Src1RM); 2513 _movp(T1, Src1RM);
2425 _pxor(T1, HighOrderBits); 2514 _pxor(T1, HighOrderBits);
2426 Src0RM = T0; 2515 Src0RM = T0;
2427 Src1RM = T1; 2516 Src1RM = T1;
2428 } 2517 }
2429 2518
2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2431 // in registers until data alignment support is implemented. Once
2432 // there is support for data alignment, LEGAL_HACK can be removed.
2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2434 Variable *T = makeReg(Ty); 2519 Variable *T = makeReg(Ty);
2435 switch (Condition) { 2520 switch (Condition) {
2436 default: 2521 default:
2437 llvm_unreachable("unexpected condition"); 2522 llvm_unreachable("unexpected condition");
2438 break; 2523 break;
2439 case InstIcmp::Eq: { 2524 case InstIcmp::Eq: {
2440 _movp(T, Src0RM); 2525 _movp(T, Src0RM);
2441 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2526 _pcmpeq(T, Src1RM);
2442 } break; 2527 } break;
2443 case InstIcmp::Ne: { 2528 case InstIcmp::Ne: {
2444 _movp(T, Src0RM); 2529 _movp(T, Src0RM);
2445 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2530 _pcmpeq(T, Src1RM);
2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2531 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2447 _pxor(T, MinusOne); 2532 _pxor(T, MinusOne);
2448 } break; 2533 } break;
2449 case InstIcmp::Ugt: 2534 case InstIcmp::Ugt:
2450 case InstIcmp::Sgt: { 2535 case InstIcmp::Sgt: {
2451 _movp(T, Src0RM); 2536 _movp(T, Src0RM);
2452 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2537 _pcmpgt(T, Src1RM);
2453 } break; 2538 } break;
2454 case InstIcmp::Uge: 2539 case InstIcmp::Uge:
2455 case InstIcmp::Sge: { 2540 case InstIcmp::Sge: {
2456 // !(Src1RM > Src0RM) 2541 // !(Src1RM > Src0RM)
2457 _movp(T, Src1RM); 2542 _movp(T, Src1RM);
2458 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2543 _pcmpgt(T, Src0RM);
2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2544 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2460 _pxor(T, MinusOne); 2545 _pxor(T, MinusOne);
2461 } break; 2546 } break;
2462 case InstIcmp::Ult: 2547 case InstIcmp::Ult:
2463 case InstIcmp::Slt: { 2548 case InstIcmp::Slt: {
2464 _movp(T, Src1RM); 2549 _movp(T, Src1RM);
2465 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2550 _pcmpgt(T, Src0RM);
2466 } break; 2551 } break;
2467 case InstIcmp::Ule: 2552 case InstIcmp::Ule:
2468 case InstIcmp::Sle: { 2553 case InstIcmp::Sle: {
2469 // !(Src0RM > Src1RM) 2554 // !(Src0RM > Src1RM)
2470 _movp(T, Src0RM); 2555 _movp(T, Src0RM);
2471 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2556 _pcmpgt(T, Src1RM);
2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2557 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2473 _pxor(T, MinusOne); 2558 _pxor(T, MinusOne);
2474 } break; 2559 } break;
2475 } 2560 }
2476 #undef LEGAL_HACK
2477 2561
2478 _movp(Dest, T); 2562 _movp(Dest, T);
2479 eliminateNextVectorSextInstruction(Dest); 2563 eliminateNextVectorSextInstruction(Dest);
2480 return; 2564 return;
2481 } 2565 }
2482 2566
2483 // If Src1 is an immediate, or known to be a physical register, we can 2567 // If Src1 is an immediate, or known to be a physical register, we can
2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2568 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2569 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2486 // the physical register, but unfortunately we have to commit to one or 2570 // the physical register, but unfortunately we have to commit to one or
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
2642 // insertelement into index 3 (result is stored in T): 2726 // insertelement into index 3 (result is stored in T):
2643 // T := SourceVectRM 2727 // T := SourceVectRM
2644 // ElementR := ElementR[0, 0] T[0, 2] 2728 // ElementR := ElementR[0, 0] T[0, 2]
2645 // T := T[0, 1] ElementR[3, 0] 2729 // T := T[0, 1] ElementR[3, 0]
2646 const unsigned char Mask1[3] = {0, 192, 128}; 2730 const unsigned char Mask1[3] = {0, 192, 128};
2647 const unsigned char Mask2[3] = {227, 196, 52}; 2731 const unsigned char Mask2[3] = {227, 196, 52};
2648 2732
2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); 2733 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); 2734 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
2651 2735
2652 // ALIGNHACK: Force vector operands to registers in instructions
2653 // that require aligned memory operands until support for data
2654 // alignment is implemented.
2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2656 if (Index == 1) { 2736 if (Index == 1) {
2657 SourceVectRM = ALIGN_HACK(SourceVectRM);
2658 _shufps(ElementR, SourceVectRM, Mask1Constant); 2737 _shufps(ElementR, SourceVectRM, Mask1Constant);
2659 _shufps(ElementR, SourceVectRM, Mask2Constant); 2738 _shufps(ElementR, SourceVectRM, Mask2Constant);
2660 _movp(Inst->getDest(), ElementR); 2739 _movp(Inst->getDest(), ElementR);
2661 } else { 2740 } else {
2662 Variable *T = makeReg(Ty); 2741 Variable *T = makeReg(Ty);
2663 _movp(T, SourceVectRM); 2742 _movp(T, SourceVectRM);
2664 _shufps(ElementR, T, Mask1Constant); 2743 _shufps(ElementR, T, Mask1Constant);
2665 _shufps(T, ElementR, Mask2Constant); 2744 _shufps(T, ElementR, Mask2Constant);
2666 _movp(Inst->getDest(), T); 2745 _movp(Inst->getDest(), T);
2667 } 2746 }
2668 #undef ALIGN_HACK
2669 } else { 2747 } else {
2670 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2748 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2671 // Spill the value to a stack slot and perform the insertion in 2749 // Spill the value to a stack slot and perform the insertion in
2672 // memory. 2750 // memory.
2673 // 2751 //
2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2752 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2675 // support for legalizing to mem is implemented. 2753 // support for legalizing to mem is implemented.
2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2754 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2677 Slot->setWeight(RegWeight::Zero); 2755 Slot->setWeight(RegWeight::Zero);
2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2756 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
(...skipping 941 matching lines...) Expand 10 before | Expand all | Expand 10 after
3620 Variable *Dest = Inst->getDest(); 3698 Variable *Dest = Inst->getDest();
3621 Operand *SrcT = Inst->getTrueOperand(); 3699 Operand *SrcT = Inst->getTrueOperand();
3622 Operand *SrcF = Inst->getFalseOperand(); 3700 Operand *SrcF = Inst->getFalseOperand();
3623 Operand *Condition = Inst->getCondition(); 3701 Operand *Condition = Inst->getCondition();
3624 3702
3625 if (isVectorType(Dest->getType())) { 3703 if (isVectorType(Dest->getType())) {
3626 Type SrcTy = SrcT->getType(); 3704 Type SrcTy = SrcT->getType();
3627 Variable *T = makeReg(SrcTy); 3705 Variable *T = makeReg(SrcTy);
3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 3706 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 3707 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
3630 // ALIGNHACK: Until data alignment support is implemented, vector
3631 // instructions need to have vector operands in registers. Once
3632 // there is support for data alignment, LEGAL_HACK can be removed.
3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
3634 if (InstructionSet >= SSE4_1) { 3708 if (InstructionSet >= SSE4_1) {
3635 // TODO(wala): If the condition operand is a constant, use blendps 3709 // TODO(wala): If the condition operand is a constant, use blendps
3636 // or pblendw. 3710 // or pblendw.
3637 // 3711 //
3638 // Use blendvps or pblendvb to implement select. 3712 // Use blendvps or pblendvb to implement select.
3639 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3713 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3640 SrcTy == IceType_v4f32) { 3714 SrcTy == IceType_v4f32) {
3641 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3715 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3716 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
3643 _movp(xmm0, ConditionRM); 3717 _movp(xmm0, ConditionRM);
3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); 3718 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
3645 _movp(T, SrcFRM); 3719 _movp(T, SrcFRM);
3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); 3720 _blendvps(T, SrcTRM, xmm0);
3647 _movp(Dest, T); 3721 _movp(Dest, T);
3648 } else { 3722 } else {
3649 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3723 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3724 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3651 : IceType_v16i8; 3725 : IceType_v16i8;
3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); 3726 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3727 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3654 _movp(T, SrcFRM); 3728 _movp(T, SrcFRM);
3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); 3729 _pblendvb(T, SrcTRM, xmm0);
3656 _movp(Dest, T); 3730 _movp(Dest, T);
3657 } 3731 }
3658 return; 3732 return;
3659 } 3733 }
3660 // Lower select without SSE4.1: 3734 // Lower select without SSE4.1:
3661 // a=d?b:c ==> 3735 // a=d?b:c ==>
3662 // if elementtype(d) != i1: 3736 // if elementtype(d) != i1:
3663 // d=sext(d); 3737 // d=sext(d);
3664 // a=(b&d)|(c&~d); 3738 // a=(b&d)|(c&~d);
3665 Variable *T2 = makeReg(SrcTy); 3739 Variable *T2 = makeReg(SrcTy);
3666 // Sign extend the condition operand if applicable. 3740 // Sign extend the condition operand if applicable.
3667 if (SrcTy == IceType_v4f32) { 3741 if (SrcTy == IceType_v4f32) {
3668 // The sext operation takes only integer arguments. 3742 // The sext operation takes only integer arguments.
3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode()); 3743 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); 3744 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3671 _movp(T, T3); 3745 _movp(T, T3);
3672 } else if (typeElementType(SrcTy) != IceType_i1) { 3746 } else if (typeElementType(SrcTy) != IceType_i1) {
3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); 3747 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3674 } else { 3748 } else {
3675 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3749 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3676 _movp(T, ConditionRM); 3750 _movp(T, ConditionRM);
3677 } 3751 }
3678 _movp(T2, T); 3752 _movp(T2, T);
3679 _pand(T, LEGAL_HACK(SrcTRM)); 3753 _pand(T, SrcTRM);
3680 _pandn(T2, LEGAL_HACK(SrcFRM)); 3754 _pandn(T2, SrcFRM);
3681 _por(T, T2); 3755 _por(T, T2);
3682 _movp(Dest, T); 3756 _movp(Dest, T);
3683 #undef LEGAL_HACK
3684 3757
3685 return; 3758 return;
3686 } 3759 }
3687 3760
3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: 3761 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
3689 Operand *ConditionRMI = legalize(Condition); 3762 Operand *ConditionRMI = legalize(Condition);
3690 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3763 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3691 InstX8632Label *Label = InstX8632Label::create(Func, this); 3764 InstX8632Label *Label = InstX8632Label::create(Func, this);
3692 3765
3693 if (Dest->getType() == IceType_i64) { 3766 if (Dest->getType() == IceType_i64) {
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
4236 for (SizeT i = 0; i < Size; ++i) { 4309 for (SizeT i = 0; i < Size; ++i) {
4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4310 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4238 } 4311 }
4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4312 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4240 } 4313 }
4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4314 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4242 << "\n"; 4315 << "\n";
4243 } 4316 }
4244 4317
4245 } // end of namespace Ice 4318 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698