Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 465413003: Subzero: Align spill locations to natural alignment. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Clarify bucket calculation. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements 12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible 13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target. 14 // register allocation for the "fast" target.
15 // 15 //
16 //===----------------------------------------------------------------------===// 16 //===----------------------------------------------------------------------===//
17 17
18 #include "IceDefs.h" 18 #include "IceDefs.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceInstX8632.h" 21 #include "IceInstX8632.h"
22 #include "IceOperand.h" 22 #include "IceOperand.h"
23 #include "IceTargetLoweringX8632.def" 23 #include "IceTargetLoweringX8632.def"
24 #include "IceTargetLoweringX8632.h" 24 #include "IceTargetLoweringX8632.h"
25 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/CommandLine.h"
26 26
27 #include <strings.h>
28
27 namespace Ice { 29 namespace Ice {
28 30
29 namespace { 31 namespace {
30 32
31 // The following table summarizes the logic for lowering the fcmp 33 // The following table summarizes the logic for lowering the fcmp
32 // instruction. There is one table entry for each of the 16 conditions. 34 // instruction. There is one table entry for each of the 16 conditions.
33 // 35 //
34 // The first four columns describe the case when the operands are 36 // The first four columns describe the case when the operands are
35 // floating point scalar values. A comment in lowerFcmp() describes the 37 // floating point scalar values. A comment in lowerFcmp() describes the
36 // lowering template. In the most general case, there is a compare 38 // lowering template. In the most general case, there is a compare
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
121 } 123 }
122 124
123 // The maximum number of arguments to pass in XMM registers 125 // The maximum number of arguments to pass in XMM registers
124 const uint32_t X86_MAX_XMM_ARGS = 4; 126 const uint32_t X86_MAX_XMM_ARGS = 4;
125 // The number of bits in a byte 127 // The number of bits in a byte
126 const uint32_t X86_CHAR_BIT = 8; 128 const uint32_t X86_CHAR_BIT = 8;
127 // Stack alignment 129 // Stack alignment
128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; 130 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16;
129 // Size of the return address on the stack 131 // Size of the return address on the stack
130 const uint32_t X86_RET_IP_SIZE_BYTES = 4; 132 const uint32_t X86_RET_IP_SIZE_BYTES = 4;
133 // The base 2 logarithm of the width in bytes of the smallest stack slot
134 const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2;
135 // The base 2 logarithm of the width in bytes of the largest stack slot
136 const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4;
131 137
132 // Value is a size in bytes. Return Value adjusted to the next highest 138 // Value and Alignment are in bytes. Return Value adjusted to the next
133 // multiple of the stack alignment. 139 // highest multiple of Alignment.
140 uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) {
141 // power of 2
142 assert((Alignment & (Alignment - 1)) == 0);
143 return (Value + Alignment - 1) & -Alignment;
144 }
145
146 // Value is in bytes. Return Value adjusted to the next highest multiple
147 // of the stack alignment.
134 uint32_t applyStackAlignment(uint32_t Value) { 148 uint32_t applyStackAlignment(uint32_t Value) {
135 // power of 2 149 return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES);
136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES;
138 } 150 }
139 151
140 // Instruction set options 152 // Instruction set options
141 namespace cl = ::llvm::cl; 153 namespace cl = ::llvm::cl;
142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( 154 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
143 "mattr", cl::desc("X86 target attributes"), 155 "mattr", cl::desc("X86 target attributes"),
144 cl::init(TargetX8632::SSE2), 156 cl::init(TargetX8632::SSE2),
145 cl::values( 157 cl::values(
146 clEnumValN(TargetX8632::SSE2, "sse2", 158 clEnumValN(TargetX8632::SSE2, "sse2",
147 "Enable SSE2 instructions (default)"), 159 "Enable SSE2 instructions (default)"),
(...skipping 365 matching lines...) Expand 10 before | Expand all | Expand 10 after
513 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); 525 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);
514 RegisterArg->setRegNum(RegNum); 526 RegisterArg->setRegNum(RegNum);
515 RegisterArg->setIsArg(Func); 527 RegisterArg->setIsArg(Func);
516 Arg->setIsArg(Func, false); 528 Arg->setIsArg(Func, false);
517 529
518 Args[I] = RegisterArg; 530 Args[I] = RegisterArg;
519 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 531 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
520 } 532 }
521 } 533 }
522 534
535 void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const {
Jim Stichnoth 2014/08/14 18:21:19 Does this need to be part of TargetX8632, or can i
wala 2014/08/14 18:24:10 It needs access to TargetLoweringX8632::typeWidthI
536 // Sort the variables into buckets according to the log of their width
537 // in bytes.
538 const SizeT NumBuckets =
539 X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1;
540 VarList Buckets[NumBuckets];
541
542 for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E;
543 ++I) {
544 Variable *Var = *I;
545 uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType());
546 SizeT LogNaturalAlignment = ffs(NaturalAlignment) - 1;
547 assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE);
548 assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE);
549 SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE;
550 Buckets[BucketIndex].push_back(Var);
551 }
552
553 for (SizeT I = 0, E = NumBuckets; I < E; ++I) {
554 VarList &List = Buckets[NumBuckets - I - 1];
555 Dest.insert(Dest.end(), List.begin(), List.end());
556 }
557 }
558
523 // Helper function for addProlog(). 559 // Helper function for addProlog().
524 // 560 //
525 // This assumes Arg is an argument passed on the stack. This sets the 561 // This assumes Arg is an argument passed on the stack. This sets the
526 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 562 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
527 // width. For an I64 arg that has been split into Lo and Hi components, 563 // width. For an I64 arg that has been split into Lo and Hi components,
528 // it calls itself recursively on the components, taking care to handle 564 // it calls itself recursively on the components, taking care to handle
529 // Lo first because of the little-endian architecture. Lastly, this 565 // Lo first because of the little-endian architecture. Lastly, this
530 // function generates an instruction to copy Arg into its assigned 566 // function generates an instruction to copy Arg into its assigned
531 // register if applicable. 567 // register if applicable.
532 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 568 void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
(...skipping 23 matching lines...) Expand all
556 _movp(Arg, Mem); 592 _movp(Arg, Mem);
557 } else { 593 } else {
558 _mov(Arg, Mem); 594 _mov(Arg, Mem);
559 } 595 }
560 } 596 }
561 } 597 }
562 598
563 Type TargetX8632::stackSlotType() { return IceType_i32; } 599 Type TargetX8632::stackSlotType() { return IceType_i32; }
564 600
565 void TargetX8632::addProlog(CfgNode *Node) { 601 void TargetX8632::addProlog(CfgNode *Node) {
602 // Stack frame layout:
603 //
604 // +------------------------+
605 // | 1. return address |
606 // +------------------------+
607 // | 2. preserved registers |
608 // +------------------------+
609 // | 3. padding |
610 // +------------------------+
611 // | 4. global spill area |
612 // +------------------------+
613 // | 5. padding |
614 // +------------------------+
615 // | 6. local spill area |
616 // +------------------------+
617 // | 7. padding |
618 // +------------------------+
619 // | 8. local variables |
620 // +------------------------+
621 //
622 // The following variables record the size in bytes of the given areas:
623 // * X86_RET_IP_SIZE_BYTES: area 1
624 // * PreservedRegsSizeBytes: area 2
625 // * SpillAreaPaddingBytes: area 3
626 // * GlobalsSize: area 4
627 // * GlobalsAndSubsequentPaddingSize: areas 4 - 5
628 // * LocalsSpillAreaSize: area 6
629 // * LocalsSizeBytes: areas 3 - 7
jvoung (off chromium) 2014/08/14 18:40:59 There's a couple of notions of Locals here, local-
wala 2014/08/14 19:47:01 Done.
630
566 // If SimpleCoalescing is false, each variable without a register 631 // If SimpleCoalescing is false, each variable without a register
567 // gets its own unique stack slot, which leads to large stack 632 // gets its own unique stack slot, which leads to large stack
568 // frames. If SimpleCoalescing is true, then each "global" variable 633 // frames. If SimpleCoalescing is true, then each "global" variable
569 // without a register gets its own slot, but "local" variable slots 634 // without a register gets its own slot, but "local" variable slots
570 // are reused across basic blocks. E.g., if A and B are local to 635 // are reused across basic blocks. E.g., if A and B are local to
571 // block 1 and C is local to block 2, then C may share a slot with A 636 // block 1 and C is local to block 2, then C may share a slot with A
572 // or B. 637 // or B.
573 const bool SimpleCoalescing = true; 638 const bool SimpleCoalescing = true;
574 size_t InArgsSizeBytes = 0; 639 size_t InArgsSizeBytes = 0;
575 size_t PreservedRegsSizeBytes = 0; 640 size_t PreservedRegsSizeBytes = 0;
(...skipping 16 matching lines...) Expand all
592 getRegisterSet(RegSet_CalleeSave, RegSet_None); 657 getRegisterSet(RegSet_CalleeSave, RegSet_None);
593 658
594 size_t GlobalsSize = 0; 659 size_t GlobalsSize = 0;
595 std::vector<size_t> LocalsSize(Func->getNumNodes()); 660 std::vector<size_t> LocalsSize(Func->getNumNodes());
596 661
597 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and 662 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
598 // LocalsSizeBytes. 663 // LocalsSizeBytes.
599 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); 664 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
600 const VarList &Variables = Func->getVariables(); 665 const VarList &Variables = Func->getVariables();
601 const VarList &Args = Func->getArgs(); 666 const VarList &Args = Func->getArgs();
667 VarList SpilledVariables, SortedSpilledVariables,
668 VariablesLinkedToSpillSplots;
669
670 // If there is a separate locals area, this specifies the alignment
671 // for it.
672 uint32_t LocalsSlotsAlignmentBytes = 0;
673 // The entire spill locations area gets aligned to largest natural
674 // alignment of the variables that have a spill slot.
675 uint32_t SpillAreaAlignmentBytes = 0;
602 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 676 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
603 I != E; ++I) { 677 I != E; ++I) {
604 Variable *Var = *I; 678 Variable *Var = *I;
605 if (Var->hasReg()) { 679 if (Var->hasReg()) {
606 RegsUsed[Var->getRegNum()] = true; 680 RegsUsed[Var->getRegNum()] = true;
607 continue; 681 continue;
608 } 682 }
609 // An argument either does not need a stack slot (if passed in a 683 // An argument either does not need a stack slot (if passed in a
610 // register) or already has one (if passed on the stack). 684 // register) or already has one (if passed on the stack).
611 if (Var->getIsArg()) 685 if (Var->getIsArg())
612 continue; 686 continue;
613 // An unreferenced variable doesn't need a stack slot. 687 // An unreferenced variable doesn't need a stack slot.
614 if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) 688 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
615 continue; 689 continue;
616 // A spill slot linked to a variable with a stack slot should reuse 690 // A spill slot linked to a variable with a stack slot should reuse
617 // that stack slot. 691 // that stack slot.
618 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { 692 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
619 if (Variable *Linked = Var->getPreferredRegister()) { 693 if (Variable *Linked = Var->getPreferredRegister()) {
620 if (!Linked->hasReg()) 694 if (!Linked->hasReg()) {
695 VariablesLinkedToSpillSplots.push_back(Var);
621 continue; 696 continue;
697 }
622 } 698 }
623 } 699 }
700 SpilledVariables.push_back(Var);
701 }
702
703 sortByAlignment(SortedSpilledVariables, SpilledVariables);
704 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
705 E = SortedSpilledVariables.end();
706 I != E; ++I) {
707 Variable *Var = *I;
624 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 708 size_t Increment = typeWidthInBytesOnStack(Var->getType());
709 if (!SpillAreaAlignmentBytes)
710 SpillAreaAlignmentBytes = Increment;
625 if (SimpleCoalescing) { 711 if (SimpleCoalescing) {
626 if (Var->isMultiblockLife()) { 712 if (Var->isMultiblockLife()) {
627 GlobalsSize += Increment; 713 GlobalsSize += Increment;
628 } else { 714 } else {
629 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 715 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
630 LocalsSize[NodeIndex] += Increment; 716 LocalsSize[NodeIndex] += Increment;
631 if (LocalsSize[NodeIndex] > LocalsSizeBytes) 717 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
632 LocalsSizeBytes = LocalsSize[NodeIndex]; 718 LocalsSizeBytes = LocalsSize[NodeIndex];
719 if (!LocalsSlotsAlignmentBytes)
720 LocalsSlotsAlignmentBytes = Increment;
633 } 721 }
634 } else { 722 } else {
635 LocalsSizeBytes += Increment; 723 LocalsSizeBytes += Increment;
636 } 724 }
637 } 725 }
726 uint32_t LocalsSpillAreaSize = LocalsSizeBytes;
727
638 LocalsSizeBytes += GlobalsSize; 728 LocalsSizeBytes += GlobalsSize;
639 729
640 // Add push instructions for preserved registers. 730 // Add push instructions for preserved registers.
641 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 731 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
642 if (CalleeSaves[i] && RegsUsed[i]) { 732 if (CalleeSaves[i] && RegsUsed[i]) {
643 PreservedRegsSizeBytes += 4; 733 PreservedRegsSizeBytes += 4;
644 const bool SuppressStackAdjustment = true; 734 const bool SuppressStackAdjustment = true;
645 _push(getPhysicalRegister(i), SuppressStackAdjustment); 735 _push(getPhysicalRegister(i), SuppressStackAdjustment);
646 } 736 }
647 } 737 }
648 738
649 // Generate "push ebp; mov ebp, esp" 739 // Generate "push ebp; mov ebp, esp"
650 if (IsEbpBasedFrame) { 740 if (IsEbpBasedFrame) {
651 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) 741 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
652 .count() == 0); 742 .count() == 0);
653 PreservedRegsSizeBytes += 4; 743 PreservedRegsSizeBytes += 4;
654 Variable *ebp = getPhysicalRegister(Reg_ebp); 744 Variable *ebp = getPhysicalRegister(Reg_ebp);
655 Variable *esp = getPhysicalRegister(Reg_esp); 745 Variable *esp = getPhysicalRegister(Reg_esp);
656 const bool SuppressStackAdjustment = true; 746 const bool SuppressStackAdjustment = true;
657 _push(ebp, SuppressStackAdjustment); 747 _push(ebp, SuppressStackAdjustment);
658 _mov(ebp, esp); 748 _mov(ebp, esp);
659 } 749 }
660 750
751 // Align the variables area. SpillAreaPaddingBytes is the size of
752 // the region after the preserved registers and before the spill
753 // areas.
754 uint32_t SpillAreaPaddingBytes = 0;
755 if (SpillAreaAlignmentBytes) {
756 assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES);
757 uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
758 uint32_t SpillAreaStart =
759 applyAlignment(PaddingStart, SpillAreaAlignmentBytes);
760 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart;
761 LocalsSizeBytes += SpillAreaPaddingBytes;
762 }
763
764 // If there are separate globals and locals areas, make sure the
765 // locals area is aligned by padding the end of the globals area.
766 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize;
767 if (LocalsSlotsAlignmentBytes) {
768 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
769 GlobalsAndSubsequentPaddingSize =
770 applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes);
771 LocalsSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize;
772 }
773
774 // Align esp if necessary.
661 if (NeedsStackAlignment) { 775 if (NeedsStackAlignment) {
662 uint32_t StackSize = applyStackAlignment( 776 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
663 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); 777 uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes);
664 LocalsSizeBytes = 778 LocalsSizeBytes = StackSize - StackOffset;
665 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes;
666 } 779 }
667 780
668 // Generate "sub esp, LocalsSizeBytes" 781 // Generate "sub esp, LocalsSizeBytes"
669 if (LocalsSizeBytes) 782 if (LocalsSizeBytes)
670 _sub(getPhysicalRegister(Reg_esp), 783 _sub(getPhysicalRegister(Reg_esp),
671 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); 784 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
672 785
673 resetStackAdjustment(); 786 resetStackAdjustment();
674 787
675 // Fill in stack offsets for stack args, and copy args into registers 788 // Fill in stack offsets for stack args, and copy args into registers
676 // for those that were register-allocated. Args are pushed right to 789 // for those that were register-allocated. Args are pushed right to
677 // left, so Arg[0] is closest to the stack/frame pointer. 790 // left, so Arg[0] is closest to the stack/frame pointer.
678 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 791 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
679 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 792 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
680 if (!IsEbpBasedFrame) 793 if (!IsEbpBasedFrame)
681 BasicFrameOffset += LocalsSizeBytes; 794 BasicFrameOffset += LocalsSizeBytes;
682 795
683 unsigned NumXmmArgs = 0; 796 unsigned NumXmmArgs = 0;
684 for (SizeT i = 0; i < Args.size(); ++i) { 797 for (SizeT i = 0; i < Args.size(); ++i) {
685 Variable *Arg = Args[i]; 798 Variable *Arg = Args[i];
686 // Skip arguments passed in registers. 799 // Skip arguments passed in registers.
687 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { 800 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
688 ++NumXmmArgs; 801 ++NumXmmArgs;
689 continue; 802 continue;
690 } 803 }
691 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); 804 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
692 } 805 }
693 806
694 // Fill in stack offsets for locals. 807 // Fill in stack offsets for locals.
695 size_t TotalGlobalsSize = GlobalsSize; 808 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
696 GlobalsSize = 0;
697 LocalsSize.assign(LocalsSize.size(), 0); 809 LocalsSize.assign(LocalsSize.size(), 0);
698 size_t NextStackOffset = 0; 810 size_t NextStackOffset = GlobalsSpaceUsed;
699 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); 811 for (VarList::const_iterator I = SortedSpilledVariables.begin(),
812 E = SortedSpilledVariables.end();
700 I != E; ++I) { 813 I != E; ++I) {
701 Variable *Var = *I; 814 Variable *Var = *I;
702 if (Var->hasReg()) {
703 RegsUsed[Var->getRegNum()] = true;
704 continue;
705 }
706 if (Var->getIsArg())
707 continue;
708 if (ComputedLiveRanges && Var->getLiveRange().isEmpty())
709 continue;
710 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
711 if (Variable *Linked = Var->getPreferredRegister()) {
712 if (!Linked->hasReg()) {
713 // TODO: Make sure Linked has already been assigned a stack
714 // slot.
715 Var->setStackOffset(Linked->getStackOffset());
716 continue;
717 }
718 }
719 }
720 size_t Increment = typeWidthInBytesOnStack(Var->getType()); 815 size_t Increment = typeWidthInBytesOnStack(Var->getType());
721 if (SimpleCoalescing) { 816 if (SimpleCoalescing) {
722 if (Var->isMultiblockLife()) { 817 if (Var->isMultiblockLife()) {
723 GlobalsSize += Increment; 818 GlobalsSpaceUsed += Increment;
724 NextStackOffset = GlobalsSize; 819 NextStackOffset = GlobalsSpaceUsed;
725 } else { 820 } else {
726 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); 821 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
727 LocalsSize[NodeIndex] += Increment; 822 LocalsSize[NodeIndex] += Increment;
728 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; 823 NextStackOffset = SpillAreaPaddingBytes +
824 GlobalsAndSubsequentPaddingSize +
825 LocalsSize[NodeIndex];
729 } 826 }
730 } else { 827 } else {
731 NextStackOffset += Increment; 828 NextStackOffset += Increment;
732 } 829 }
733 if (IsEbpBasedFrame) 830 if (IsEbpBasedFrame)
734 Var->setStackOffset(-NextStackOffset); 831 Var->setStackOffset(-NextStackOffset);
735 else 832 else
736 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); 833 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
737 } 834 }
738 this->FrameSizeLocals = NextStackOffset; 835 this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes;
739 this->HasComputedFrame = true; 836 this->HasComputedFrame = true;
740 837
838 // Assign stack offsets to variables that have been linked to spilled
839 // variables.
840 for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(),
841 E = VariablesLinkedToSpillSplots.end();
842 I != E; ++I) {
843 Variable *Var = *I;
844 Variable *Linked = Var->getPreferredRegister();
845 Var->setStackOffset(Linked->getStackOffset());
846 }
847
741 if (Func->getContext()->isVerbose(IceV_Frame)) { 848 if (Func->getContext()->isVerbose(IceV_Frame)) {
742 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes 849 Ostream &Str = Func->getContext()->getStrDump();
743 << "\n" 850
744 << "InArgsSizeBytes=" << InArgsSizeBytes 851 Str << "Stack layout:\n";
745 << "\n" 852 uint32_t EspAdjustmentPaddingSize = LocalsSizeBytes - LocalsSpillAreaSize -
746 << "PreservedRegsSizeBytes=" 853 GlobalsAndSubsequentPaddingSize -
747 << PreservedRegsSizeBytes << "\n"; 854 SpillAreaPaddingBytes;
855 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
856 << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n"
857 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
858 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
859 << " globals spill area = " << GlobalsSize << " bytes\n"
860 << " globals-locals spill areas intermediate padding = "
861 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
862 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
863 << " esp alignment padding = " << EspAdjustmentPaddingSize
864 << " bytes\n";
865
866 Str << "Stack details:\n"
867 << " esp adjustment = " << LocalsSizeBytes << " bytes\n"
868 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
869 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
870 << " bytes\n"
871 << " is ebp based = " << IsEbpBasedFrame << "\n";
748 } 872 }
749 } 873 }
750 874
751 void TargetX8632::addEpilog(CfgNode *Node) { 875 void TargetX8632::addEpilog(CfgNode *Node) {
752 InstList &Insts = Node->getInsts(); 876 InstList &Insts = Node->getInsts();
753 InstList::reverse_iterator RI, E; 877 InstList::reverse_iterator RI, E;
754 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 878 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
755 if (llvm::isa<InstX8632Ret>(*RI)) 879 if (llvm::isa<InstX8632Ret>(*RI))
756 break; 880 break;
757 } 881 }
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
984 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1108 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
985 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1109 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
986 1110
987 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); 1111 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES);
988 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { 1112 if (Alignment > X86_STACK_ALIGNMENT_BYTES) {
989 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); 1113 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment));
990 } 1114 }
991 if (ConstantInteger *ConstantTotalSize = 1115 if (ConstantInteger *ConstantTotalSize =
992 llvm::dyn_cast<ConstantInteger>(TotalSize)) { 1116 llvm::dyn_cast<ConstantInteger>(TotalSize)) {
993 uint32_t Value = ConstantTotalSize->getValue(); 1117 uint32_t Value = ConstantTotalSize->getValue();
994 // Round Value up to the next highest multiple of the alignment. 1118 Value = applyAlignment(Value, Alignment);
995 Value = (Value + Alignment - 1) & -Alignment;
996 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); 1119 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
997 } else { 1120 } else {
998 // Non-constant sizes need to be adjusted to the next highest 1121 // Non-constant sizes need to be adjusted to the next highest
999 // multiple of the required alignment at runtime. 1122 // multiple of the required alignment at runtime.
1000 Variable *T = makeReg(IceType_i32); 1123 Variable *T = makeReg(IceType_i32);
1001 _mov(T, TotalSize); 1124 _mov(T, TotalSize);
1002 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); 1125 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1));
1003 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); 1126 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment));
1004 _sub(esp, T); 1127 _sub(esp, T);
1005 } 1128 }
(...skipping 226 matching lines...) Expand 10 before | Expand all | Expand 10 after
1232 case InstArithmetic::Fsub: 1355 case InstArithmetic::Fsub:
1233 case InstArithmetic::Fmul: 1356 case InstArithmetic::Fmul:
1234 case InstArithmetic::Fdiv: 1357 case InstArithmetic::Fdiv:
1235 case InstArithmetic::Frem: 1358 case InstArithmetic::Frem:
1236 llvm_unreachable("FP instruction with i64 type"); 1359 llvm_unreachable("FP instruction with i64 type");
1237 break; 1360 break;
1238 } 1361 }
1239 } else if (isVectorType(Dest->getType())) { 1362 } else if (isVectorType(Dest->getType())) {
1240 // TODO: Trap on integer divide and integer modulo by zero. 1363 // TODO: Trap on integer divide and integer modulo by zero.
1241 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 1364 // See: https://code.google.com/p/nativeclient/issues/detail?id=3899
1242 //
1243 // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in
1244 // registers. This is a workaround of the fact that there is no
1245 // support for aligning stack operands. Once there is support,
1246 // remove LEGAL_HACK.
1247 #define LEGAL_HACK(s) legalizeToVar((s))
1248 switch (Inst->getOp()) { 1365 switch (Inst->getOp()) {
1249 case InstArithmetic::_num: 1366 case InstArithmetic::_num:
1250 llvm_unreachable("Unknown arithmetic operator"); 1367 llvm_unreachable("Unknown arithmetic operator");
1251 break; 1368 break;
1252 case InstArithmetic::Add: { 1369 case InstArithmetic::Add: {
1253 Variable *T = makeReg(Dest->getType()); 1370 Variable *T = makeReg(Dest->getType());
1254 _movp(T, Src0); 1371 _movp(T, Src0);
1255 _padd(T, LEGAL_HACK(Src1)); 1372 _padd(T, Src1);
1256 _movp(Dest, T); 1373 _movp(Dest, T);
1257 } break; 1374 } break;
1258 case InstArithmetic::And: { 1375 case InstArithmetic::And: {
1259 Variable *T = makeReg(Dest->getType()); 1376 Variable *T = makeReg(Dest->getType());
1260 _movp(T, Src0); 1377 _movp(T, Src0);
1261 _pand(T, LEGAL_HACK(Src1)); 1378 _pand(T, Src1);
1262 _movp(Dest, T); 1379 _movp(Dest, T);
1263 } break; 1380 } break;
1264 case InstArithmetic::Or: { 1381 case InstArithmetic::Or: {
1265 Variable *T = makeReg(Dest->getType()); 1382 Variable *T = makeReg(Dest->getType());
1266 _movp(T, Src0); 1383 _movp(T, Src0);
1267 _por(T, LEGAL_HACK(Src1)); 1384 _por(T, Src1);
1268 _movp(Dest, T); 1385 _movp(Dest, T);
1269 } break; 1386 } break;
1270 case InstArithmetic::Xor: { 1387 case InstArithmetic::Xor: {
1271 Variable *T = makeReg(Dest->getType()); 1388 Variable *T = makeReg(Dest->getType());
1272 _movp(T, Src0); 1389 _movp(T, Src0);
1273 _pxor(T, LEGAL_HACK(Src1)); 1390 _pxor(T, Src1);
1274 _movp(Dest, T); 1391 _movp(Dest, T);
1275 } break; 1392 } break;
1276 case InstArithmetic::Sub: { 1393 case InstArithmetic::Sub: {
1277 Variable *T = makeReg(Dest->getType()); 1394 Variable *T = makeReg(Dest->getType());
1278 _movp(T, Src0); 1395 _movp(T, Src0);
1279 _psub(T, LEGAL_HACK(Src1)); 1396 _psub(T, Src1);
1280 _movp(Dest, T); 1397 _movp(Dest, T);
1281 } break; 1398 } break;
1282 case InstArithmetic::Mul: { 1399 case InstArithmetic::Mul: {
1283 bool TypesAreValidForPmull = 1400 bool TypesAreValidForPmull =
1284 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16; 1401 Dest->getType() == IceType_v4i32 || Dest->getType() == IceType_v8i16;
1285 bool InstructionSetIsValidForPmull = 1402 bool InstructionSetIsValidForPmull =
1286 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1; 1403 Dest->getType() == IceType_v8i16 || InstructionSet >= SSE4_1;
1287 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { 1404 if (TypesAreValidForPmull && InstructionSetIsValidForPmull) {
1288 Variable *T = makeReg(Dest->getType()); 1405 Variable *T = makeReg(Dest->getType());
1289 _movp(T, Src0); 1406 _movp(T, Src0);
1290 _pmull(T, LEGAL_HACK(Src1)); 1407 _pmull(T, Src1);
1291 _movp(Dest, T); 1408 _movp(Dest, T);
1292 } else if (Dest->getType() == IceType_v4i32) { 1409 } else if (Dest->getType() == IceType_v4i32) {
1293 // Lowering sequence: 1410 // Lowering sequence:
1294 // Note: The mask arguments have index 0 on the left. 1411 // Note: The mask arguments have index 0 on the left.
1295 // 1412 //
1296 // movups T1, Src0 1413 // movups T1, Src0
1297 // pshufd T2, Src0, {1,0,3,0} 1414 // pshufd T2, Src0, {1,0,3,0}
1298 // pshufd T3, Src1, {1,0,3,0} 1415 // pshufd T3, Src1, {1,0,3,0}
1299 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]} 1416 // # T1 = {Src0[0] * Src1[0], Src0[2] * Src1[2]}
1300 // pmuludq T1, Src1 1417 // pmuludq T1, Src1
(...skipping 12 matching lines...) Expand all
1313 // Dest[0, 2], Src[0, 2] 1430 // Dest[0, 2], Src[0, 2]
1314 const unsigned Mask0202 = 0x88; 1431 const unsigned Mask0202 = 0x88;
1315 // Mask that directs pshufd to create a vector with entries 1432 // Mask that directs pshufd to create a vector with entries
1316 // Src[0, 2, 1, 3] 1433 // Src[0, 2, 1, 3]
1317 const unsigned Mask0213 = 0xd8; 1434 const unsigned Mask0213 = 0xd8;
1318 Variable *T1 = makeReg(IceType_v4i32); 1435 Variable *T1 = makeReg(IceType_v4i32);
1319 Variable *T2 = makeReg(IceType_v4i32); 1436 Variable *T2 = makeReg(IceType_v4i32);
1320 Variable *T3 = makeReg(IceType_v4i32); 1437 Variable *T3 = makeReg(IceType_v4i32);
1321 Variable *T4 = makeReg(IceType_v4i32); 1438 Variable *T4 = makeReg(IceType_v4i32);
1322 _movp(T1, Src0); 1439 _movp(T1, Src0);
1323 // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R 1440 _pshufd(T2, Src0, Mask1030);
1324 // with Src1 after stack operand alignment support is 1441 _pshufd(T3, Src1, Mask1030);
1325 // implemented. 1442 _pmuludq(T1, Src1);
1326 Variable *Src0R = LEGAL_HACK(Src0);
1327 Variable *Src1R = LEGAL_HACK(Src1);
1328 _pshufd(T2, Src0R, Mask1030);
1329 _pshufd(T3, Src1R, Mask1030);
1330 _pmuludq(T1, Src1R);
1331 _pmuludq(T2, T3); 1443 _pmuludq(T2, T3);
1332 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); 1444 _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202));
1333 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); 1445 _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213));
1334 _movp(Dest, T4); 1446 _movp(Dest, T4);
1335 } else { 1447 } else {
1336 assert(Dest->getType() == IceType_v16i8); 1448 assert(Dest->getType() == IceType_v16i8);
1337 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1449 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1338 } 1450 }
1339 } break; 1451 } break;
1340 case InstArithmetic::Shl: 1452 case InstArithmetic::Shl:
1341 case InstArithmetic::Lshr: 1453 case InstArithmetic::Lshr:
1342 case InstArithmetic::Ashr: 1454 case InstArithmetic::Ashr:
1343 case InstArithmetic::Udiv: 1455 case InstArithmetic::Udiv:
1344 case InstArithmetic::Urem: 1456 case InstArithmetic::Urem:
1345 case InstArithmetic::Sdiv: 1457 case InstArithmetic::Sdiv:
1346 case InstArithmetic::Srem: 1458 case InstArithmetic::Srem:
1347 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1459 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1348 break; 1460 break;
1349 case InstArithmetic::Fadd: { 1461 case InstArithmetic::Fadd: {
1350 Variable *T = makeReg(Dest->getType()); 1462 Variable *T = makeReg(Dest->getType());
1351 _movp(T, Src0); 1463 _movp(T, Src0);
1352 _addps(T, LEGAL_HACK(Src1)); 1464 _addps(T, Src1);
1353 _movp(Dest, T); 1465 _movp(Dest, T);
1354 } break; 1466 } break;
1355 case InstArithmetic::Fsub: { 1467 case InstArithmetic::Fsub: {
1356 Variable *T = makeReg(Dest->getType()); 1468 Variable *T = makeReg(Dest->getType());
1357 _movp(T, Src0); 1469 _movp(T, Src0);
1358 _subps(T, LEGAL_HACK(Src1)); 1470 _subps(T, Src1);
1359 _movp(Dest, T); 1471 _movp(Dest, T);
1360 } break; 1472 } break;
1361 case InstArithmetic::Fmul: { 1473 case InstArithmetic::Fmul: {
1362 Variable *T = makeReg(Dest->getType()); 1474 Variable *T = makeReg(Dest->getType());
1363 _movp(T, Src0); 1475 _movp(T, Src0);
1364 _mulps(T, LEGAL_HACK(Src1)); 1476 _mulps(T, Src1);
1365 _movp(Dest, T); 1477 _movp(Dest, T);
1366 } break; 1478 } break;
1367 case InstArithmetic::Fdiv: { 1479 case InstArithmetic::Fdiv: {
1368 Variable *T = makeReg(Dest->getType()); 1480 Variable *T = makeReg(Dest->getType());
1369 _movp(T, Src0); 1481 _movp(T, Src0);
1370 _divps(T, LEGAL_HACK(Src1)); 1482 _divps(T, Src1);
1371 _movp(Dest, T); 1483 _movp(Dest, T);
1372 } break; 1484 } break;
1373 case InstArithmetic::Frem: 1485 case InstArithmetic::Frem:
1374 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); 1486 scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1);
1375 break; 1487 break;
1376 } 1488 }
1377 #undef LEGAL_HACK
1378 } else { // Dest->getType() is non-i64 scalar 1489 } else { // Dest->getType() is non-i64 scalar
1379 Variable *T_edx = NULL; 1490 Variable *T_edx = NULL;
1380 Variable *T = NULL; 1491 Variable *T = NULL;
1381 switch (Inst->getOp()) { 1492 switch (Inst->getOp()) {
1382 case InstArithmetic::_num: 1493 case InstArithmetic::_num:
1383 llvm_unreachable("Unknown arithmetic operator"); 1494 llvm_unreachable("Unknown arithmetic operator");
1384 break; 1495 break;
1385 case InstArithmetic::Add: 1496 case InstArithmetic::Add:
1386 _mov(T, Src0); 1497 _mov(T, Src0);
1387 _add(T, Src1); 1498 _add(T, Src1);
(...skipping 804 matching lines...) Expand 10 before | Expand all | Expand 10 after
2192 // TODO(wala): Determine the best lowering sequences for each type. 2303 // TODO(wala): Determine the best lowering sequences for each type.
2193 bool CanUsePextr = 2304 bool CanUsePextr =
2194 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2305 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2195 if (CanUsePextr && Ty != IceType_v4f32) { 2306 if (CanUsePextr && Ty != IceType_v4f32) {
2196 // Use pextrb, pextrw, or pextrd. 2307 // Use pextrb, pextrw, or pextrd.
2197 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2308 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2198 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2309 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2199 _pextr(ExtractedElementR, SourceVectR, Mask); 2310 _pextr(ExtractedElementR, SourceVectR, Mask);
2200 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2311 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2201 // Use pshufd and movd/movss. 2312 // Use pshufd and movd/movss.
2202 //
2203 // ALIGNHACK: Force vector operands to registers in instructions
2204 // that require aligned memory operands until support for data
2205 // alignment is implemented.
2206 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2207 Operand *SourceVectRM =
2208 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2209 Variable *T = NULL; 2313 Variable *T = NULL;
2210 if (Index) { 2314 if (Index) {
2211 // The shuffle only needs to occur if the element to be extracted 2315 // The shuffle only needs to occur if the element to be extracted
2212 // is not at the lowest index. 2316 // is not at the lowest index.
2213 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2317 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2214 T = makeReg(Ty); 2318 T = makeReg(Ty);
2215 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); 2319 _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);
2216 } else { 2320 } else {
2217 T = ALIGN_HACK(SourceVectRM); 2321 T = legalizeToVar(SourceVectNotLegalized);
2218 } 2322 }
2219 2323
2220 if (InVectorElementTy == IceType_i32) { 2324 if (InVectorElementTy == IceType_i32) {
2221 _movd(ExtractedElementR, T); 2325 _movd(ExtractedElementR, T);
2222 } else { // Ty == Icetype_f32 2326 } else { // Ty == Icetype_f32
2223 // TODO(wala): _movss is only used here because _mov does not 2327 // TODO(wala): _movss is only used here because _mov does not
2224 // allow a vector source and a scalar destination. _mov should be 2328 // allow a vector source and a scalar destination. _mov should be
2225 // able to be used here. 2329 // able to be used here.
2226 // _movss is a binary instruction, so the FakeDef is needed to 2330 // _movss is a binary instruction, so the FakeDef is needed to
2227 // keep the live range analysis consistent. 2331 // keep the live range analysis consistent.
2228 Context.insert(InstFakeDef::create(Func, ExtractedElementR)); 2332 Context.insert(InstFakeDef::create(Func, ExtractedElementR));
2229 _movss(ExtractedElementR, T); 2333 _movss(ExtractedElementR, T);
2230 } 2334 }
2231 #undef ALIGN_HACK
2232 } else { 2335 } else {
2233 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2336 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2234 // Spill the value to a stack slot and do the extraction in memory. 2337 // Spill the value to a stack slot and do the extraction in memory.
2235 // 2338 //
2236 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2339 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2237 // support for legalizing to mem is implemented. 2340 // support for legalizing to mem is implemented.
2238 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2341 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2239 Slot->setWeight(RegWeight::Zero); 2342 Slot->setWeight(RegWeight::Zero);
2240 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2343 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
2241 2344
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
2280 2383
2281 if (Condition == InstFcmp::True) { 2384 if (Condition == InstFcmp::True) {
2282 // makeVectorOfOnes() requires an integer vector type. 2385 // makeVectorOfOnes() requires an integer vector type.
2283 T = makeVectorOfMinusOnes(IceType_v4i32); 2386 T = makeVectorOfMinusOnes(IceType_v4i32);
2284 } else if (Condition == InstFcmp::False) { 2387 } else if (Condition == InstFcmp::False) {
2285 T = makeVectorOfZeros(Dest->getType()); 2388 T = makeVectorOfZeros(Dest->getType());
2286 } else { 2389 } else {
2287 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2390 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2288 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2391 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2289 2392
2290 // ALIGNHACK: Without support for data alignment, both operands to
2291 // cmpps need to be forced into registers. Once support for data
2292 // alignment is implemented, remove LEGAL_HACK.
2293 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2294 switch (Condition) { 2393 switch (Condition) {
2295 default: { 2394 default: {
2296 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; 2395 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2297 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); 2396 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2298 T = makeReg(Src0RM->getType()); 2397 T = makeReg(Src0RM->getType());
2299 _movp(T, Src0RM); 2398 _movp(T, Src0RM);
2300 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); 2399 _cmpps(T, Src1RM, Predicate);
2301 } break; 2400 } break;
2302 case InstFcmp::One: { 2401 case InstFcmp::One: {
2303 // Check both unequal and ordered. 2402 // Check both unequal and ordered.
2304 T = makeReg(Src0RM->getType()); 2403 T = makeReg(Src0RM->getType());
2305 Variable *T2 = makeReg(Src0RM->getType()); 2404 Variable *T2 = makeReg(Src0RM->getType());
2306 Src1RM = LEGAL_HACK(Src1RM);
2307 _movp(T, Src0RM); 2405 _movp(T, Src0RM);
2308 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); 2406 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);
2309 _movp(T2, Src0RM); 2407 _movp(T2, Src0RM);
2310 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); 2408 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);
2311 _pand(T, T2); 2409 _pand(T, T2);
2312 } break; 2410 } break;
2313 case InstFcmp::Ueq: { 2411 case InstFcmp::Ueq: {
2314 // Check both equal or unordered. 2412 // Check both equal or unordered.
2315 T = makeReg(Src0RM->getType()); 2413 T = makeReg(Src0RM->getType());
2316 Variable *T2 = makeReg(Src0RM->getType()); 2414 Variable *T2 = makeReg(Src0RM->getType());
2317 Src1RM = LEGAL_HACK(Src1RM);
2318 _movp(T, Src0RM); 2415 _movp(T, Src0RM);
2319 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); 2416 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);
2320 _movp(T2, Src0RM); 2417 _movp(T2, Src0RM);
2321 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); 2418 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);
2322 _por(T, T2); 2419 _por(T, T2);
2323 } break; 2420 } break;
2324 } 2421 }
2325 #undef LEGAL_HACK
2326 } 2422 }
2327 2423
2328 _movp(Dest, T); 2424 _movp(Dest, T);
2329 eliminateNextVectorSextInstruction(Dest); 2425 eliminateNextVectorSextInstruction(Dest);
2330 return; 2426 return;
2331 } 2427 }
2332 2428
2333 // Lowering a = fcmp cond, b, c 2429 // Lowering a = fcmp cond, b, c
2334 // ucomiss b, c /* only if C1 != Br_None */ 2430 // ucomiss b, c /* only if C1 != Br_None */
2335 // /* but swap b,c order if SwapOperands==true */ 2431 // /* but swap b,c order if SwapOperands==true */
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
2420 Variable *T1 = makeReg(Ty); 2516 Variable *T1 = makeReg(Ty);
2421 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); 2517 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2422 _movp(T0, Src0RM); 2518 _movp(T0, Src0RM);
2423 _pxor(T0, HighOrderBits); 2519 _pxor(T0, HighOrderBits);
2424 _movp(T1, Src1RM); 2520 _movp(T1, Src1RM);
2425 _pxor(T1, HighOrderBits); 2521 _pxor(T1, HighOrderBits);
2426 Src0RM = T0; 2522 Src0RM = T0;
2427 Src1RM = T1; 2523 Src1RM = T1;
2428 } 2524 }
2429 2525
2430 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2431 // in registers until data alignment support is implemented. Once
2432 // there is support for data alignment, LEGAL_HACK can be removed.
2433 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2434 Variable *T = makeReg(Ty); 2526 Variable *T = makeReg(Ty);
2435 switch (Condition) { 2527 switch (Condition) {
2436 default: 2528 default:
2437 llvm_unreachable("unexpected condition"); 2529 llvm_unreachable("unexpected condition");
2438 break; 2530 break;
2439 case InstIcmp::Eq: { 2531 case InstIcmp::Eq: {
2440 _movp(T, Src0RM); 2532 _movp(T, Src0RM);
2441 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2533 _pcmpeq(T, Src1RM);
2442 } break; 2534 } break;
2443 case InstIcmp::Ne: { 2535 case InstIcmp::Ne: {
2444 _movp(T, Src0RM); 2536 _movp(T, Src0RM);
2445 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2537 _pcmpeq(T, Src1RM);
2446 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2538 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2447 _pxor(T, MinusOne); 2539 _pxor(T, MinusOne);
2448 } break; 2540 } break;
2449 case InstIcmp::Ugt: 2541 case InstIcmp::Ugt:
2450 case InstIcmp::Sgt: { 2542 case InstIcmp::Sgt: {
2451 _movp(T, Src0RM); 2543 _movp(T, Src0RM);
2452 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2544 _pcmpgt(T, Src1RM);
2453 } break; 2545 } break;
2454 case InstIcmp::Uge: 2546 case InstIcmp::Uge:
2455 case InstIcmp::Sge: { 2547 case InstIcmp::Sge: {
2456 // !(Src1RM > Src0RM) 2548 // !(Src1RM > Src0RM)
2457 _movp(T, Src1RM); 2549 _movp(T, Src1RM);
2458 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2550 _pcmpgt(T, Src0RM);
2459 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2551 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2460 _pxor(T, MinusOne); 2552 _pxor(T, MinusOne);
2461 } break; 2553 } break;
2462 case InstIcmp::Ult: 2554 case InstIcmp::Ult:
2463 case InstIcmp::Slt: { 2555 case InstIcmp::Slt: {
2464 _movp(T, Src1RM); 2556 _movp(T, Src1RM);
2465 _pcmpgt(T, LEGAL_HACK(Src0RM)); 2557 _pcmpgt(T, Src0RM);
2466 } break; 2558 } break;
2467 case InstIcmp::Ule: 2559 case InstIcmp::Ule:
2468 case InstIcmp::Sle: { 2560 case InstIcmp::Sle: {
2469 // !(Src0RM > Src1RM) 2561 // !(Src0RM > Src1RM)
2470 _movp(T, Src0RM); 2562 _movp(T, Src0RM);
2471 _pcmpgt(T, LEGAL_HACK(Src1RM)); 2563 _pcmpgt(T, Src1RM);
2472 Variable *MinusOne = makeVectorOfMinusOnes(Ty); 2564 Variable *MinusOne = makeVectorOfMinusOnes(Ty);
2473 _pxor(T, MinusOne); 2565 _pxor(T, MinusOne);
2474 } break; 2566 } break;
2475 } 2567 }
2476 #undef LEGAL_HACK
2477 2568
2478 _movp(Dest, T); 2569 _movp(Dest, T);
2479 eliminateNextVectorSextInstruction(Dest); 2570 eliminateNextVectorSextInstruction(Dest);
2480 return; 2571 return;
2481 } 2572 }
2482 2573
2483 // If Src1 is an immediate, or known to be a physical register, we can 2574 // If Src1 is an immediate, or known to be a physical register, we can
2484 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into 2575 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
2485 // a physical register. (Actually, either Src0 or Src1 can be chosen for 2576 // a physical register. (Actually, either Src0 or Src1 can be chosen for
2486 // the physical register, but unfortunately we have to commit to one or 2577 // the physical register, but unfortunately we have to commit to one or
(...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after
2642 // insertelement into index 3 (result is stored in T): 2733 // insertelement into index 3 (result is stored in T):
2643 // T := SourceVectRM 2734 // T := SourceVectRM
2644 // ElementR := ElementR[0, 0] T[0, 2] 2735 // ElementR := ElementR[0, 0] T[0, 2]
2645 // T := T[0, 1] ElementR[3, 0] 2736 // T := T[0, 1] ElementR[3, 0]
2646 const unsigned char Mask1[3] = {0, 192, 128}; 2737 const unsigned char Mask1[3] = {0, 192, 128};
2647 const unsigned char Mask2[3] = {227, 196, 52}; 2738 const unsigned char Mask2[3] = {227, 196, 52};
2648 2739
2649 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); 2740 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
2650 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); 2741 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
2651 2742
2652 // ALIGNHACK: Force vector operands to registers in instructions
2653 // that require aligned memory operands until support for data
2654 // alignment is implemented.
2655 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2656 if (Index == 1) { 2743 if (Index == 1) {
2657 SourceVectRM = ALIGN_HACK(SourceVectRM);
2658 _shufps(ElementR, SourceVectRM, Mask1Constant); 2744 _shufps(ElementR, SourceVectRM, Mask1Constant);
2659 _shufps(ElementR, SourceVectRM, Mask2Constant); 2745 _shufps(ElementR, SourceVectRM, Mask2Constant);
2660 _movp(Inst->getDest(), ElementR); 2746 _movp(Inst->getDest(), ElementR);
2661 } else { 2747 } else {
2662 Variable *T = makeReg(Ty); 2748 Variable *T = makeReg(Ty);
2663 _movp(T, SourceVectRM); 2749 _movp(T, SourceVectRM);
2664 _shufps(ElementR, T, Mask1Constant); 2750 _shufps(ElementR, T, Mask1Constant);
2665 _shufps(T, ElementR, Mask2Constant); 2751 _shufps(T, ElementR, Mask2Constant);
2666 _movp(Inst->getDest(), T); 2752 _movp(Inst->getDest(), T);
2667 } 2753 }
2668 #undef ALIGN_HACK
2669 } else { 2754 } else {
2670 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); 2755 assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
2671 // Spill the value to a stack slot and perform the insertion in 2756 // Spill the value to a stack slot and perform the insertion in
2672 // memory. 2757 // memory.
2673 // 2758 //
2674 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when 2759 // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when
2675 // support for legalizing to mem is implemented. 2760 // support for legalizing to mem is implemented.
2676 Variable *Slot = Func->makeVariable(Ty, Context.getNode()); 2761 Variable *Slot = Func->makeVariable(Ty, Context.getNode());
2677 Slot->setWeight(RegWeight::Zero); 2762 Slot->setWeight(RegWeight::Zero);
2678 _movp(Slot, legalizeToVar(SourceVectNotLegalized)); 2763 _movp(Slot, legalizeToVar(SourceVectNotLegalized));
(...skipping 941 matching lines...) Expand 10 before | Expand all | Expand 10 after
3620 Variable *Dest = Inst->getDest(); 3705 Variable *Dest = Inst->getDest();
3621 Operand *SrcT = Inst->getTrueOperand(); 3706 Operand *SrcT = Inst->getTrueOperand();
3622 Operand *SrcF = Inst->getFalseOperand(); 3707 Operand *SrcF = Inst->getFalseOperand();
3623 Operand *Condition = Inst->getCondition(); 3708 Operand *Condition = Inst->getCondition();
3624 3709
3625 if (isVectorType(Dest->getType())) { 3710 if (isVectorType(Dest->getType())) {
3626 Type SrcTy = SrcT->getType(); 3711 Type SrcTy = SrcT->getType();
3627 Variable *T = makeReg(SrcTy); 3712 Variable *T = makeReg(SrcTy);
3628 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 3713 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3629 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 3714 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
3630 // ALIGNHACK: Until data alignment support is implemented, vector
3631 // instructions need to have vector operands in registers. Once
3632 // there is support for data alignment, LEGAL_HACK can be removed.
3633 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
3634 if (InstructionSet >= SSE4_1) { 3715 if (InstructionSet >= SSE4_1) {
3635 // TODO(wala): If the condition operand is a constant, use blendps 3716 // TODO(wala): If the condition operand is a constant, use blendps
3636 // or pblendw. 3717 // or pblendw.
3637 // 3718 //
3638 // Use blendvps or pblendvb to implement select. 3719 // Use blendvps or pblendvb to implement select.
3639 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3720 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3640 SrcTy == IceType_v4f32) { 3721 SrcTy == IceType_v4f32) {
3641 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3722 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3642 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3723 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
3643 _movp(xmm0, ConditionRM); 3724 _movp(xmm0, ConditionRM);
3644 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); 3725 _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31));
3645 _movp(T, SrcFRM); 3726 _movp(T, SrcFRM);
3646 _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); 3727 _blendvps(T, SrcTRM, xmm0);
3647 _movp(Dest, T); 3728 _movp(Dest, T);
3648 } else { 3729 } else {
3649 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3730 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3650 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3731 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3651 : IceType_v16i8; 3732 : IceType_v16i8;
3652 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); 3733 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);
3653 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3734 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3654 _movp(T, SrcFRM); 3735 _movp(T, SrcFRM);
3655 _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); 3736 _pblendvb(T, SrcTRM, xmm0);
3656 _movp(Dest, T); 3737 _movp(Dest, T);
3657 } 3738 }
3658 return; 3739 return;
3659 } 3740 }
3660 // Lower select without SSE4.1: 3741 // Lower select without SSE4.1:
3661 // a=d?b:c ==> 3742 // a=d?b:c ==>
3662 // if elementtype(d) != i1: 3743 // if elementtype(d) != i1:
3663 // d=sext(d); 3744 // d=sext(d);
3664 // a=(b&d)|(c&~d); 3745 // a=(b&d)|(c&~d);
3665 Variable *T2 = makeReg(SrcTy); 3746 Variable *T2 = makeReg(SrcTy);
3666 // Sign extend the condition operand if applicable. 3747 // Sign extend the condition operand if applicable.
3667 if (SrcTy == IceType_v4f32) { 3748 if (SrcTy == IceType_v4f32) {
3668 // The sext operation takes only integer arguments. 3749 // The sext operation takes only integer arguments.
3669 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode()); 3750 Variable *T3 = Func->makeVariable(IceType_v4i32, Context.getNode());
3670 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition)); 3751 lowerCast(InstCast::create(Func, InstCast::Sext, T3, Condition));
3671 _movp(T, T3); 3752 _movp(T, T3);
3672 } else if (typeElementType(SrcTy) != IceType_i1) { 3753 } else if (typeElementType(SrcTy) != IceType_i1) {
3673 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition)); 3754 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
3674 } else { 3755 } else {
3675 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3756 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3676 _movp(T, ConditionRM); 3757 _movp(T, ConditionRM);
3677 } 3758 }
3678 _movp(T2, T); 3759 _movp(T2, T);
3679 _pand(T, LEGAL_HACK(SrcTRM)); 3760 _pand(T, SrcTRM);
3680 _pandn(T2, LEGAL_HACK(SrcFRM)); 3761 _pandn(T2, SrcFRM);
3681 _por(T, T2); 3762 _por(T, T2);
3682 _movp(Dest, T); 3763 _movp(Dest, T);
3683 #undef LEGAL_HACK
3684 3764
3685 return; 3765 return;
3686 } 3766 }
3687 3767
3688 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: 3768 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
3689 Operand *ConditionRMI = legalize(Condition); 3769 Operand *ConditionRMI = legalize(Condition);
3690 Constant *Zero = Ctx->getConstantZero(IceType_i32); 3770 Constant *Zero = Ctx->getConstantZero(IceType_i32);
3691 InstX8632Label *Label = InstX8632Label::create(Func, this); 3771 InstX8632Label *Label = InstX8632Label::create(Func, this);
3692 3772
3693 if (Dest->getType() == IceType_i64) { 3773 if (Dest->getType() == IceType_i64) {
(...skipping 542 matching lines...) Expand 10 before | Expand all | Expand 10 after
4236 for (SizeT i = 0; i < Size; ++i) { 4316 for (SizeT i = 0; i < Size; ++i) {
4237 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4317 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4238 } 4318 }
4239 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4319 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4240 } 4320 }
4241 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4321 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4242 << "\n"; 4322 << "\n";
4243 } 4323 }
4244 4324
4245 } // end of namespace Ice 4325 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | tests_lit/llvm2ice_tests/align-spill-locations.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698