Chromium Code Reviews

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 444443002: Subzero: Align the stack at the point of function calls. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Improve wording of a comment and reorder function in crosstest Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff |
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
(...skipping 106 matching lines...)
117 size_t Index = static_cast<size_t>(Ty); 117 size_t Index = static_cast<size_t>(Ty);
118 (void)Index; 118 (void)Index;
119 assert(Index < TableTypeX8632AttributesSize); 119 assert(Index < TableTypeX8632AttributesSize);
120 return TableTypeX8632Attributes[Ty].InVectorElementType; 120 return TableTypeX8632Attributes[Ty].InVectorElementType;
121 } 121 }
122 122
123 // The maximum number of arguments to pass in XMM registers 123 // The maximum number of arguments to pass in XMM registers
124 const unsigned X86_MAX_XMM_ARGS = 4; 124 const unsigned X86_MAX_XMM_ARGS = 4;
125 // The number of bits in a byte 125 // The number of bits in a byte
126 const unsigned X86_CHAR_BIT = 8; 126 const unsigned X86_CHAR_BIT = 8;
127 // Stack alignment
128 const unsigned X86_STACK_ALIGNMENT_BYTES = 16;
129 // Size of the return address on the stack
130 const unsigned X86_RET_IP_SIZE_BYTES = 4;
131
132 // Return the difference between Size and the next highest multiple of
133 // the stack alignment. All values are in bytes.
134 uint32_t getAdjustmentToAlignStackInBytes(uint32_t Size) {
Jim Stichnoth 2014/08/05 18:09:27 Almost all the calls to this routine have a patter
wala 2014/08/05 23:57:03 Done.
135 uint32_t Offset = Size % X86_STACK_ALIGNMENT_BYTES;
136 if (Offset) {
137 return X86_STACK_ALIGNMENT_BYTES - Offset;
138 }
139 return 0;
140 }
127 141
128 // Instruction set options 142 // Instruction set options
129 namespace cl = ::llvm::cl; 143 namespace cl = ::llvm::cl;
130 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( 144 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet(
131 "mattr", cl::desc("X86 target attributes"), 145 "mattr", cl::desc("X86 target attributes"),
132 cl::init(TargetX8632::SSE2), 146 cl::init(TargetX8632::SSE2),
133 cl::values( 147 cl::values(
134 clEnumValN(TargetX8632::SSE2, "sse2", 148 clEnumValN(TargetX8632::SSE2, "sse2",
135 "Enable SSE2 instructions (default)"), 149 "Enable SSE2 instructions (default)"),
136 clEnumValN(TargetX8632::SSE4_1, "sse4.1", 150 clEnumValN(TargetX8632::SSE4_1, "sse4.1",
(...skipping 104 matching lines...)
241 STATIC_ASSERT(_table1_##tag == _table2_##tag); 255 STATIC_ASSERT(_table1_##tag == _table2_##tag);
242 ICETYPE_TABLE; 256 ICETYPE_TABLE;
243 #undef X 257 #undef X
244 } 258 }
245 } 259 }
246 260
247 } // end of anonymous namespace 261 } // end of anonymous namespace
248 262
249 TargetX8632::TargetX8632(Cfg *Func) 263 TargetX8632::TargetX8632(Cfg *Func)
250 : TargetLowering(Func), InstructionSet(CLInstructionSet), 264 : TargetLowering(Func), InstructionSet(CLInstructionSet),
251 IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0), 265 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
252 NextLabelNumber(0), ComputedLiveRanges(false), 266 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
253 PhysicalRegisters(VarList(Reg_NUM)) { 267 PhysicalRegisters(VarList(Reg_NUM)) {
254 // TODO: Don't initialize IntegerRegisters and friends every time. 268 // TODO: Don't initialize IntegerRegisters and friends every time.
255 // Instead, initialize in some sort of static initializer for the 269 // Instead, initialize in some sort of static initializer for the
256 // class. 270 // class.
257 llvm::SmallBitVector IntegerRegisters(Reg_NUM); 271 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
258 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); 272 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
259 llvm::SmallBitVector FloatRegisters(Reg_NUM); 273 llvm::SmallBitVector FloatRegisters(Reg_NUM);
260 llvm::SmallBitVector VectorRegisters(Reg_NUM); 274 llvm::SmallBitVector VectorRegisters(Reg_NUM);
261 llvm::SmallBitVector InvalidRegisters(Reg_NUM); 275 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
262 ScratchRegs.resize(Reg_NUM); 276 ScratchRegs.resize(Reg_NUM);
(...skipping 273 matching lines...)
536 Variable *Lo = Arg->getLo(); 550 Variable *Lo = Arg->getLo();
537 Variable *Hi = Arg->getHi(); 551 Variable *Hi = Arg->getHi();
538 Type Ty = Arg->getType(); 552 Type Ty = Arg->getType();
539 if (Lo && Hi && Ty == IceType_i64) { 553 if (Lo && Hi && Ty == IceType_i64) {
540 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 554 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
541 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 555 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
542 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 556 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
543 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 557 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
544 return; 558 return;
545 } 559 }
560 if (isVectorType(Ty)) {
561 InArgsSizeBytes += getAdjustmentToAlignStackInBytes(InArgsSizeBytes);
562 }
546 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 563 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
547 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 564 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
548 if (Arg->hasReg()) { 565 if (Arg->hasReg()) {
549 assert(Ty != IceType_i64); 566 assert(Ty != IceType_i64);
550 OperandX8632Mem *Mem = OperandX8632Mem::create( 567 OperandX8632Mem *Mem = OperandX8632Mem::create(
551 Func, Ty, FramePtr, 568 Func, Ty, FramePtr,
552 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); 569 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
553 if (isVectorType(Arg->getType())) { 570 if (isVectorType(Arg->getType())) {
554 _movp(Arg, Mem); 571 _movp(Arg, Mem);
555 } else { 572 } else {
556 _mov(Arg, Mem); 573 _mov(Arg, Mem);
557 } 574 }
558 } 575 }
559 } 576 }
560 577
561 Type TargetX8632::stackSlotType() { return IceType_i32; } 578 Type TargetX8632::stackSlotType() { return IceType_i32; }
562 579
563 void TargetX8632::addProlog(CfgNode *Node) { 580 void TargetX8632::addProlog(CfgNode *Node) {
564 // If SimpleCoalescing is false, each variable without a register 581 // If SimpleCoalescing is false, each variable without a register
565 // gets its own unique stack slot, which leads to large stack 582 // gets its own unique stack slot, which leads to large stack
566 // frames. If SimpleCoalescing is true, then each "global" variable 583 // frames. If SimpleCoalescing is true, then each "global" variable
567 // without a register gets its own slot, but "local" variable slots 584 // without a register gets its own slot, but "local" variable slots
568 // are reused across basic blocks. E.g., if A and B are local to 585 // are reused across basic blocks. E.g., if A and B are local to
569 // block 1 and C is local to block 2, then C may share a slot with A 586 // block 1 and C is local to block 2, then C may share a slot with A
570 // or B. 587 // or B.
571 const bool SimpleCoalescing = true; 588 const bool SimpleCoalescing = true;
572 size_t InArgsSizeBytes = 0; 589 size_t InArgsSizeBytes = 0;
573 size_t RetIpSizeBytes = 4;
574 size_t PreservedRegsSizeBytes = 0; 590 size_t PreservedRegsSizeBytes = 0;
575 LocalsSizeBytes = 0; 591 LocalsSizeBytes = 0;
576 Context.init(Node); 592 Context.init(Node);
577 Context.setInsertPoint(Context.getCur()); 593 Context.setInsertPoint(Context.getCur());
578 594
579 // Determine stack frame offsets for each Variable without a 595 // Determine stack frame offsets for each Variable without a
580 // register assignment. This can be done as one variable per stack 596 // register assignment. This can be done as one variable per stack
581 // slot. Or, do coalescing by running the register allocator again 597 // slot. Or, do coalescing by running the register allocator again
582 // with an infinite set of registers (as a side effect, this gives 598 // with an infinite set of registers (as a side effect, this gives
583 // variables a second chance at physical register assignment). 599 // variables a second chance at physical register assignment).
(...skipping 66 matching lines...)
650 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) 666 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
651 .count() == 0); 667 .count() == 0);
652 PreservedRegsSizeBytes += 4; 668 PreservedRegsSizeBytes += 4;
653 Variable *ebp = getPhysicalRegister(Reg_ebp); 669 Variable *ebp = getPhysicalRegister(Reg_ebp);
654 Variable *esp = getPhysicalRegister(Reg_esp); 670 Variable *esp = getPhysicalRegister(Reg_esp);
655 const bool SuppressStackAdjustment = true; 671 const bool SuppressStackAdjustment = true;
656 _push(ebp, SuppressStackAdjustment); 672 _push(ebp, SuppressStackAdjustment);
657 _mov(ebp, esp); 673 _mov(ebp, esp);
658 } 674 }
659 675
676 if (NeedsStackAlignment) {
677 LocalsSizeBytes += getAdjustmentToAlignStackInBytes(
678 X86_RET_IP_SIZE_BYTES + LocalsSizeBytes + PreservedRegsSizeBytes);
679 }
680
660 // Generate "sub esp, LocalsSizeBytes" 681 // Generate "sub esp, LocalsSizeBytes"
661 if (LocalsSizeBytes) 682 if (LocalsSizeBytes)
662 _sub(getPhysicalRegister(Reg_esp), 683 _sub(getPhysicalRegister(Reg_esp),
663 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); 684 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
664 685
665 resetStackAdjustment(); 686 resetStackAdjustment();
666 687
667 // Fill in stack offsets for stack args, and copy args into registers 688 // Fill in stack offsets for stack args, and copy args into registers
668 // for those that were register-allocated. Args are pushed right to 689 // for those that were register-allocated. Args are pushed right to
669 // left, so Arg[0] is closest to the stack/frame pointer. 690 // left, so Arg[0] is closest to the stack/frame pointer.
670 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 691 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
671 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; 692 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
672 if (!IsEbpBasedFrame) 693 if (!IsEbpBasedFrame)
673 BasicFrameOffset += LocalsSizeBytes; 694 BasicFrameOffset += LocalsSizeBytes;
674 695
675 unsigned NumXmmArgs = 0; 696 unsigned NumXmmArgs = 0;
676 for (SizeT i = 0; i < Args.size(); ++i) { 697 for (SizeT i = 0; i < Args.size(); ++i) {
677 Variable *Arg = Args[i]; 698 Variable *Arg = Args[i];
678 // Skip arguments passed in registers. 699 // Skip arguments passed in registers.
679 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { 700 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) {
680 ++NumXmmArgs; 701 ++NumXmmArgs;
681 continue; 702 continue;
(...skipping 270 matching lines...)
952 973
953 REGX8632_TABLE 974 REGX8632_TABLE
954 975
955 #undef X 976 #undef X
956 977
957 return Registers; 978 return Registers;
958 } 979 }
959 980
960 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { 981 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
961 IsEbpBasedFrame = true; 982 IsEbpBasedFrame = true;
962 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize 983 NeedsStackAlignment = true;
Jim Stichnoth 2014/08/05 18:09:27 A couple things here. 1. It looks like alloca low
wala 2014/08/05 23:57:03 Done.
963 // the number of adjustments of esp, etc. 984 // TODO(sehr,stichnot): align allocated memory, minimize the number of
985 // adjustments of esp, etc.
986
964 Variable *esp = getPhysicalRegister(Reg_esp); 987 Variable *esp = getPhysicalRegister(Reg_esp);
965 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 988 Operand *TotalSize = legalize(Inst->getSizeInBytes());
966 Variable *Dest = Inst->getDest(); 989 Variable *Dest = Inst->getDest();
967 _sub(esp, TotalSize); 990 if (ConstantInteger *ConstantTotalSize =
991 llvm::dyn_cast<ConstantInteger>(TotalSize)) {
992 uint32_t Value = ConstantTotalSize->getValue();
993 Value += getAdjustmentToAlignStackInBytes(Value);
994 if (Value) {
995 _sub(esp, Ctx->getConstantInt(IceType_i32, Value));
996 }
997 } else {
998 Variable *T = makeReg(IceType_i32);
999 _mov(T, TotalSize);
1000 // Non-constant sizes need to be adjusted to the next highest
1001 // multiple of the stack alignment at runtime.
1002 _add(T, Ctx->getConstantInt(IceType_i32, 15));
Jim Stichnoth 2014/08/05 18:09:26 Should these constants be in terms of X86_STACK_AL
wala 2014/08/05 23:57:03 Done.
1003 _and(T, Ctx->getConstantInt(IceType_i32, -16));
1004 _sub(esp, T);
1005 }
968 _mov(Dest, esp); 1006 _mov(Dest, esp);
969 } 1007 }
970 1008
971 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { 1009 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
972 Variable *Dest = Inst->getDest(); 1010 Variable *Dest = Inst->getDest();
973 Operand *Src0 = legalize(Inst->getSrc(0)); 1011 Operand *Src0 = legalize(Inst->getSrc(0));
974 Operand *Src1 = legalize(Inst->getSrc(1)); 1012 Operand *Src1 = legalize(Inst->getSrc(1));
975 if (Dest->getType() == IceType_i64) { 1013 if (Dest->getType() == IceType_i64) {
976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 1014 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1015 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
(...skipping 607 matching lines...)
1585 _br(Inst->getTargetUnconditional()); 1623 _br(Inst->getTargetUnconditional());
1586 } else { 1624 } else {
1587 Operand *Src0 = legalize(Inst->getCondition()); 1625 Operand *Src0 = legalize(Inst->getCondition());
1588 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1626 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1589 _cmp(Src0, Zero); 1627 _cmp(Src0, Zero);
1590 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 1628 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1591 } 1629 }
1592 } 1630 }
1593 1631
1594 void TargetX8632::lowerCall(const InstCall *Instr) { 1632 void TargetX8632::lowerCall(const InstCall *Instr) {
1633 // x86-32 calling convention:
1634 //
1635 // * At the point before the call, the stack must be aligned to 16
1636 // bytes.
1637 //
1638 // * The first four arguments of vector type, regardless of their
1639 // position relative to the other arguments in the argument list, are
1640 // placed in registers xmm0 - xmm3.
1641 //
1642 // * Other arguments are placed on the stack ordered according to the
Jim Stichnoth 2014/08/05 18:09:27 This is unclear to me (and possibly wrong). Does
wala 2014/08/05 23:57:03 Done.
1643 // argument list and get assigned stack locations with increasing
1644 // addresses. The first stack argument is placed at what is the bottom
1645 // of the stack at the point before the call.
1646 //
1647 // * Stack arguments of vector type are aligned to the next highest
1648 // multiple of 16 bytes. Other stack arguments are aligned to the
1649 // next highest multiple of 4 bytes.
1650 NeedsStackAlignment = true;
1651
1652 OperandList XmmArgs;
1653 OperandList StackArgs, StackArgLocations;
1654 uint32_t ParameterAreaSizeBytes = 0;
1655
1595 // Classify each argument operand according to the location where the 1656 // Classify each argument operand according to the location where the
1596 // argument is passed. 1657 // argument is passed.
1597 OperandList XmmArgs;
1598 OperandList StackArgs;
1599 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { 1658 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
1600 Operand *Arg = Instr->getArg(i); 1659 Operand *Arg = Instr->getArg(i);
1601 if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 1660 Type Ty = Arg->getType();
1661 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
1662 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
1663 Ty == IceType_f64 || isVectorType(Ty));
1664 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1602 XmmArgs.push_back(Arg); 1665 XmmArgs.push_back(Arg);
1603 } else { 1666 } else {
1604 StackArgs.push_back(Arg); 1667 StackArgs.push_back(Arg);
1668 if (isVectorType(Arg->getType())) {
1669 ParameterAreaSizeBytes +=
1670 getAdjustmentToAlignStackInBytes(ParameterAreaSizeBytes);
1671 }
1672 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1673 Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes);
1674 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1675 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1605 } 1676 }
1606 } 1677 }
1607 // For stack arguments, generate a sequence of push instructions, 1678
1608 // pushing right to left, keeping track of stack offsets in case a 1679 // Adjust the parameter area so that the stack is aligned. It is
1609 // push involves a stack operand and we are using an esp-based frame. 1680 // assumed that the stack is already aligned at the start of the
1610 uint32_t StackOffset = 0; 1681 // calling sequence.
1611 // TODO: Consolidate the stack adjustment for function calls by 1682 ParameterAreaSizeBytes +=
1612 // reserving enough space for the arguments only once. 1683 getAdjustmentToAlignStackInBytes(ParameterAreaSizeBytes);
1684
1685 // Subtract the appropriate amount for the argument area. This also
1686 // takes care of setting the stack adjustment during emission.
1613 // 1687 //
1614 // TODO: If for some reason the call instruction gets dead-code 1688 // TODO: If for some reason the call instruction gets dead-code
1615 // eliminated after lowering, we would need to ensure that the 1689 // eliminated after lowering, we would need to ensure that the
1616 // pre-call push instructions and the post-call esp adjustment get 1690 // pre-call and the post-call esp adjustment get eliminated as well.
1617 // eliminated as well. 1691 if (ParameterAreaSizeBytes) {
1618 for (OperandList::reverse_iterator I = StackArgs.rbegin(), 1692 _adjust_stack(ParameterAreaSizeBytes);
1619 E = StackArgs.rend(); I != E; ++I) {
1620 Operand *Arg = legalize(*I);
1621 if (Arg->getType() == IceType_i64) {
1622 _push(hiOperand(Arg));
1623 _push(loOperand(Arg));
1624 } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) {
1625 // If the Arg turns out to be a memory operand, more than one push
1626 // instruction is required. This ends up being somewhat clumsy in
1627 // the current IR, so we use a workaround. Force the operand into
1628 // a (xmm) register, and then push the register. An xmm register
1629 // push is actually not possible in x86, but the Push instruction
1630 // emitter handles this by decrementing the stack pointer and
1631 // directly writing the xmm register value.
1632 _push(legalize(Arg, Legal_Reg));
1633 } else {
1634 // Otherwise PNaCl requires parameter types to be at least 32-bits.
1635 assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32);
1636 _push(Arg);
1637 }
1638 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1639 } 1693 }
1694
1695 // Copy arguments that are passed on the stack to the appropriate
1696 // stack locations.
1697 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) {
1698 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i]));
1699 // TODO: Consider calling postLower() here to reduce the register
1700 // pressure associated with using too many infinite weight
1701 // temporaries when lowering the call sequence in -Om1 mode.
1702 }
1703
1640 // Copy arguments to be passed in registers to the appropriate 1704 // Copy arguments to be passed in registers to the appropriate
1641 // registers. 1705 // registers.
1642 // TODO: Investigate the impact of lowering arguments passed in 1706 // TODO: Investigate the impact of lowering arguments passed in
1643 // registers after lowering stack arguments as opposed to the other 1707 // registers after lowering stack arguments as opposed to the other
1644 // way around. Lowering register arguments after stack arguments may 1708 // way around. Lowering register arguments after stack arguments may
1645 // reduce register pressure. On the other hand, lowering register 1709 // reduce register pressure. On the other hand, lowering register
1646 // arguments first (before stack arguments) may result in more compact 1710 // arguments first (before stack arguments) may result in more compact
1647 // code, as the memory operand displacements may end up being smaller 1711 // code, as the memory operand displacements may end up being smaller
1648 // before any stack adjustment is done. 1712 // before any stack adjustment is done.
1649 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { 1713 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
(...skipping 43 matching lines...)
1693 } 1757 }
1694 } 1758 }
1695 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once 1759 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once
1696 // a proper emitter is used. 1760 // a proper emitter is used.
1697 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); 1761 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
1698 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 1762 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
1699 Context.insert(NewCall); 1763 Context.insert(NewCall);
1700 if (ReturnRegHi) 1764 if (ReturnRegHi)
1701 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1765 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1702 1766
1703 // Add the appropriate offset to esp. 1767 // Add the appropriate offset to esp. The call instruction takes care
1704 if (StackOffset) { 1768 // of resetting the stack offset during emission.
1769 if (ParameterAreaSizeBytes) {
1705 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 1770 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1706 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); 1771 _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes));
1707 } 1772 }
1708 1773
1709 // Insert a register-kill pseudo instruction. 1774 // Insert a register-kill pseudo instruction.
1710 VarList KilledRegs; 1775 VarList KilledRegs;
1711 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { 1776 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1712 if (ScratchRegs[i]) 1777 if (ScratchRegs[i])
1713 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); 1778 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1714 } 1779 }
1715 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); 1780 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
1716 1781
(...skipping 458 matching lines...)
2175 bool CanUsePextr = 2240 bool CanUsePextr =
2176 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; 2241 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1;
2177 if (CanUsePextr && Ty != IceType_v4f32) { 2242 if (CanUsePextr && Ty != IceType_v4f32) {
2178 // Use pextrb, pextrw, or pextrd. 2243 // Use pextrb, pextrw, or pextrd.
2179 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2244 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2180 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); 2245 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized);
2181 _pextr(ExtractedElementR, SourceVectR, Mask); 2246 _pextr(ExtractedElementR, SourceVectR, Mask);
2182 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { 2247 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
2183 // Use pshufd and movd/movss. 2248 // Use pshufd and movd/movss.
2184 // 2249 //
2185 // ALIGNHACK: Force vector operands to registers in instructions that 2250 // ALIGNHACK: Force vector operands to registers in instructions
2186 // require aligned memory operands until support for stack alignment 2251 // that require aligned memory operands until support for data
2187 // is implemented. 2252 // alignment is implemented.
2188 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) 2253 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2189 Operand *SourceVectRM = 2254 Operand *SourceVectRM =
2190 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); 2255 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem);
2191 Variable *T = NULL; 2256 Variable *T = NULL;
2192 if (Index) { 2257 if (Index) {
2193 // The shuffle only needs to occur if the element to be extracted 2258 // The shuffle only needs to occur if the element to be extracted
2194 // is not at the lowest index. 2259 // is not at the lowest index.
2195 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); 2260 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
2196 T = makeReg(Ty); 2261 T = makeReg(Ty);
2197 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); 2262 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask);
(...skipping 64 matching lines...)
2262 2327
2263 if (Condition == InstFcmp::True) { 2328 if (Condition == InstFcmp::True) {
2264 // makeVectorOfOnes() requires an integer vector type. 2329 // makeVectorOfOnes() requires an integer vector type.
2265 T = makeVectorOfMinusOnes(IceType_v4i32); 2330 T = makeVectorOfMinusOnes(IceType_v4i32);
2266 } else if (Condition == InstFcmp::False) { 2331 } else if (Condition == InstFcmp::False) {
2267 T = makeVectorOfZeros(Dest->getType()); 2332 T = makeVectorOfZeros(Dest->getType());
2268 } else { 2333 } else {
2269 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2334 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2270 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2335 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2271 2336
2272 // ALIGNHACK: Without support for stack alignment, both operands to 2337 // ALIGNHACK: Without support for data alignment, both operands to
2273 // cmpps need to be forced into registers. Once support for stack 2338 // cmpps need to be forced into registers. Once support for data
2274 // alignment is implemented, remove LEGAL_HACK. 2339 // alignment is implemented, remove LEGAL_HACK.
2275 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) 2340 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2276 switch (Condition) { 2341 switch (Condition) {
2277 default: { 2342 default: {
2278 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; 2343 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;
2279 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); 2344 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);
2280 T = makeReg(Src0RM->getType()); 2345 T = makeReg(Src0RM->getType());
2281 _movp(T, Src0RM); 2346 _movp(T, Src0RM);
2282 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); 2347 _cmpps(T, LEGAL_HACK(Src1RM), Predicate);
2283 } break; 2348 } break;
(...skipping 119 matching lines...)
2403 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); 2468 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty);
2404 _movp(T0, Src0RM); 2469 _movp(T0, Src0RM);
2405 _pxor(T0, HighOrderBits); 2470 _pxor(T0, HighOrderBits);
2406 _movp(T1, Src1RM); 2471 _movp(T1, Src1RM);
2407 _pxor(T1, HighOrderBits); 2472 _pxor(T1, HighOrderBits);
2408 Src0RM = T0; 2473 Src0RM = T0;
2409 Src1RM = T1; 2474 Src1RM = T1;
2410 } 2475 }
2411 2476
2412 // TODO: ALIGNHACK: Both operands to compare instructions need to be 2477 // TODO: ALIGNHACK: Both operands to compare instructions need to be
2413 // in registers until stack alignment support is implemented. Once 2478 // in registers until data alignment support is implemented. Once
2414 // there is support for stack alignment, LEGAL_HACK can be removed. 2479 // there is support for data alignment, LEGAL_HACK can be removed.
2415 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) 2480 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
2416 Variable *T = makeReg(Ty); 2481 Variable *T = makeReg(Ty);
2417 switch (Condition) { 2482 switch (Condition) {
2418 default: 2483 default:
2419 llvm_unreachable("unexpected condition"); 2484 llvm_unreachable("unexpected condition");
2420 break; 2485 break;
2421 case InstIcmp::Eq: { 2486 case InstIcmp::Eq: {
2422 _movp(T, Src0RM); 2487 _movp(T, Src0RM);
2423 _pcmpeq(T, LEGAL_HACK(Src1RM)); 2488 _pcmpeq(T, LEGAL_HACK(Src1RM));
2424 } break; 2489 } break;
(...skipping 199 matching lines...)
2624 // insertelement into index 3 (result is stored in T): 2689 // insertelement into index 3 (result is stored in T):
2625 // T := SourceVectRM 2690 // T := SourceVectRM
2626 // ElementR := ElementR[0, 0] T[0, 2] 2691 // ElementR := ElementR[0, 0] T[0, 2]
2627 // T := T[0, 1] ElementR[3, 0] 2692 // T := T[0, 1] ElementR[3, 0]
2628 const unsigned char Mask1[3] = {0, 192, 128}; 2693 const unsigned char Mask1[3] = {0, 192, 128};
2629 const unsigned char Mask2[3] = {227, 196, 52}; 2694 const unsigned char Mask2[3] = {227, 196, 52};
2630 2695
2631 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); 2696 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
2632 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); 2697 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
2633 2698
2634 // ALIGNHACK: Force vector operands to registers in instructions that 2699 // ALIGNHACK: Force vector operands to registers in instructions
2635 // require aligned memory operands until support for stack alignment 2700 // that require aligned memory operands until support for data
2636 // is implemented. 2701 // alignment is implemented.
2637 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) 2702 #define ALIGN_HACK(Vect) legalizeToVar((Vect))
2638 if (Index == 1) { 2703 if (Index == 1) {
2639 SourceVectRM = ALIGN_HACK(SourceVectRM); 2704 SourceVectRM = ALIGN_HACK(SourceVectRM);
2640 _shufps(ElementR, SourceVectRM, Mask1Constant); 2705 _shufps(ElementR, SourceVectRM, Mask1Constant);
2641 _shufps(ElementR, SourceVectRM, Mask2Constant); 2706 _shufps(ElementR, SourceVectRM, Mask2Constant);
2642 _movp(Inst->getDest(), ElementR); 2707 _movp(Inst->getDest(), ElementR);
2643 } else { 2708 } else {
2644 Variable *T = makeReg(Ty); 2709 Variable *T = makeReg(Ty);
2645 _movp(T, SourceVectRM); 2710 _movp(T, SourceVectRM);
2646 _shufps(ElementR, T, Mask1Constant); 2711 _shufps(ElementR, T, Mask1Constant);
(...skipping 267 matching lines...)
2914 case Intrinsics::Memmove: { 2979 case Intrinsics::Memmove: {
2915 InstCall *Call = makeHelperCall("memmove", NULL, 3); 2980 InstCall *Call = makeHelperCall("memmove", NULL, 3);
2916 Call->addArg(Instr->getArg(0)); 2981 Call->addArg(Instr->getArg(0));
2917 Call->addArg(Instr->getArg(1)); 2982 Call->addArg(Instr->getArg(1));
2918 Call->addArg(Instr->getArg(2)); 2983 Call->addArg(Instr->getArg(2));
2919 lowerCall(Call); 2984 lowerCall(Call);
2920 return; 2985 return;
2921 } 2986 }
2922 case Intrinsics::Memset: { 2987 case Intrinsics::Memset: {
2923 // The value operand needs to be extended to a stack slot size 2988 // The value operand needs to be extended to a stack slot size
2924 // because "push" only works for a specific operand size. 2989 // because the PNaCl ABI requires arguments to be at least 32 bits
2990 // wide.
2925 Operand *ValOp = Instr->getArg(1); 2991 Operand *ValOp = Instr->getArg(1);
2926 assert(ValOp->getType() == IceType_i8); 2992 assert(ValOp->getType() == IceType_i8);
2927 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); 2993 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode());
2928 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); 2994 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp));
2929 InstCall *Call = makeHelperCall("memset", NULL, 3); 2995 InstCall *Call = makeHelperCall("memset", NULL, 3);
2930 Call->addArg(Instr->getArg(0)); 2996 Call->addArg(Instr->getArg(0));
2931 Call->addArg(ValExt); 2997 Call->addArg(ValExt);
2932 Call->addArg(Instr->getArg(2)); 2998 Call->addArg(Instr->getArg(2));
2933 lowerCall(Call); 2999 lowerCall(Call);
2934 return; 3000 return;
(...skipping 648 matching lines...)
3583 Variable *Dest = Inst->getDest(); 3649 Variable *Dest = Inst->getDest();
3584 Operand *SrcT = Inst->getTrueOperand(); 3650 Operand *SrcT = Inst->getTrueOperand();
3585 Operand *SrcF = Inst->getFalseOperand(); 3651 Operand *SrcF = Inst->getFalseOperand();
3586 Operand *Condition = Inst->getCondition(); 3652 Operand *Condition = Inst->getCondition();
3587 3653
3588 if (isVectorType(Dest->getType())) { 3654 if (isVectorType(Dest->getType())) {
3589 Type SrcTy = SrcT->getType(); 3655 Type SrcTy = SrcT->getType();
3590 Variable *T = makeReg(SrcTy); 3656 Variable *T = makeReg(SrcTy);
3591 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 3657 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3592 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 3658 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
3593 // ALIGNHACK: Until stack alignment support is implemented, vector 3659 // ALIGNHACK: Until data alignment support is implemented, vector
3594 // instructions need to have vector operands in registers. Once 3660 // instructions need to have vector operands in registers. Once
3595 // there is support for stack alignment, LEGAL_HACK can be removed. 3661 // there is support for data alignment, LEGAL_HACK can be removed.
3596 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) 3662 #define LEGAL_HACK(Vect) legalizeToVar((Vect))
3597 if (InstructionSet >= SSE4_1) { 3663 if (InstructionSet >= SSE4_1) {
3598 // TODO(wala): If the condition operand is a constant, use blendps 3664 // TODO(wala): If the condition operand is a constant, use blendps
3599 // or pblendw. 3665 // or pblendw.
3600 // 3666 //
3601 // Use blendvps or pblendvb to implement select. 3667 // Use blendvps or pblendvb to implement select.
3602 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3668 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3603 SrcTy == IceType_v4f32) { 3669 SrcTy == IceType_v4f32) {
3604 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3670 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3605 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3671 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);
(...skipping 74 matching lines...)
3680 _mov(Dest, SrcF); 3746 _mov(Dest, SrcF);
3681 } 3747 }
3682 3748
3683 Context.insert(Label); 3749 Context.insert(Label);
3684 } 3750 }
3685 3751
3686 void TargetX8632::lowerStore(const InstStore *Inst) { 3752 void TargetX8632::lowerStore(const InstStore *Inst) {
3687 Operand *Value = Inst->getData(); 3753 Operand *Value = Inst->getData();
3688 Operand *Addr = Inst->getAddr(); 3754 Operand *Addr = Inst->getAddr();
3689 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); 3755 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType());
3756 Type Ty = NewAddr->getType();
3690 3757
3691 if (NewAddr->getType() == IceType_i64) { 3758 if (Ty == IceType_i64) {
3692 Value = legalize(Value); 3759 Value = legalize(Value);
3693 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); 3760 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
3694 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); 3761 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
3695 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); 3762 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
3696 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); 3763 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
3764 } else if (isVectorType(Ty)) {
3765 _storep(legalizeToVar(Value), NewAddr);
3697 } else { 3766 } else {
3698 Value = legalize(Value, Legal_Reg | Legal_Imm, true); 3767 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
3699 _store(Value, NewAddr); 3768 _store(Value, NewAddr);
3700 } 3769 }
3701 } 3770 }
3702 3771
3703 void TargetX8632::doAddressOptStore() { 3772 void TargetX8632::doAddressOptStore() {
3704 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); 3773 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur());
3705 Operand *Data = Inst->getData(); 3774 Operand *Data = Inst->getData();
3706 Operand *Addr = Inst->getAddr(); 3775 Operand *Addr = Inst->getAddr();
(...skipping 322 matching lines...)
4029 SizeT NumVars = Src->getNumVars(); 4098 SizeT NumVars = Src->getNumVars();
4030 for (SizeT J = 0; J < NumVars; ++J) { 4099 for (SizeT J = 0; J < NumVars; ++J) {
4031 Variable *Var = Src->getVar(J); 4100 Variable *Var = Src->getVar(J);
4032 if (Var->hasReg()) 4101 if (Var->hasReg())
4033 continue; 4102 continue;
4034 if (!Var->getWeight().isInf()) 4103 if (!Var->getWeight().isInf())
4035 continue; 4104 continue;
4036 llvm::SmallBitVector AvailableTypedRegisters = 4105 llvm::SmallBitVector AvailableTypedRegisters =
4037 AvailableRegisters & getRegisterSetForType(Var->getType()); 4106 AvailableRegisters & getRegisterSetForType(Var->getType());
4038 if (!AvailableTypedRegisters.any()) { 4107 if (!AvailableTypedRegisters.any()) {
4039 // This is a hack in case we run out of physical registers 4108 // This is a hack in case we run out of physical registers due
4040 // due to an excessive number of "push" instructions from 4109 // to an excessively long code sequence, as might happen when
4041 // lowering a call. 4110 // lowering arguments in lowerCall().
4042 AvailableRegisters = WhiteList; 4111 AvailableRegisters = WhiteList;
4043 AvailableTypedRegisters = 4112 AvailableTypedRegisters =
4044 AvailableRegisters & getRegisterSetForType(Var->getType()); 4113 AvailableRegisters & getRegisterSetForType(Var->getType());
4045 } 4114 }
4046 assert(AvailableTypedRegisters.any()); 4115 assert(AvailableTypedRegisters.any());
4047 int32_t RegNum = AvailableTypedRegisters.find_first(); 4116 int32_t RegNum = AvailableTypedRegisters.find_first();
4048 Var->setRegNum(RegNum); 4117 Var->setRegNum(RegNum);
4049 AvailableRegisters[RegNum] = false; 4118 AvailableRegisters[RegNum] = false;
4050 } 4119 }
4051 } 4120 }
(...skipping 111 matching lines...)
4163 for (SizeT i = 0; i < Size; ++i) { 4232 for (SizeT i = 0; i < Size; ++i) {
4164 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4233 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4165 } 4234 }
4166 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4235 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4167 } 4236 }
4168 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName 4237 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName
4169 << "\n"; 4238 << "\n";
4170 } 4239 }
4171 4240
4172 } // end of namespace Ice 4241 } // end of namespace Ice
OLDNEW

Powered by Google App Engine