OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
114 // representation of the vector. | 114 // representation of the vector. |
115 Type getInVectorElementType(Type Ty) { | 115 Type getInVectorElementType(Type Ty) { |
116 assert(isVectorType(Ty)); | 116 assert(isVectorType(Ty)); |
117 size_t Index = static_cast<size_t>(Ty); | 117 size_t Index = static_cast<size_t>(Ty); |
118 (void)Index; | 118 (void)Index; |
119 assert(Index < TableTypeX8632AttributesSize); | 119 assert(Index < TableTypeX8632AttributesSize); |
120 return TableTypeX8632Attributes[Ty].InVectorElementType; | 120 return TableTypeX8632Attributes[Ty].InVectorElementType; |
121 } | 121 } |
122 | 122 |
123 // The maximum number of arguments to pass in XMM registers | 123 // The maximum number of arguments to pass in XMM registers |
124 const unsigned X86_MAX_XMM_ARGS = 4; | 124 const unsigned X86_MAX_XMM_ARGS = 4; |
Jim Stichnoth
2014/08/06 18:26:16
Generally, I've been preferring to use uint32_t ra
wala
2014/08/07 18:09:21
Done.
| |
125 // The number of bits in a byte | 125 // The number of bits in a byte |
126 const unsigned X86_CHAR_BIT = 8; | 126 const unsigned X86_CHAR_BIT = 8; |
127 // Stack alignment | |
128 const unsigned X86_STACK_ALIGNMENT_BYTES = 16; | |
129 // Size of the return address on the stack | |
130 const unsigned X86_RET_IP_SIZE_BYTES = 4; | |
131 | |
132 // Value is a size in bytes. Return Value adjusted to the next highest | |
133 // multiple of the stack alignment. | |
134 uint32_t applyStackAlignment(uint32_t Value) { | |
135 // power of 2 | |
136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); | |
137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES; | |
138 } | |
127 | 139 |
128 // Instruction set options | 140 // Instruction set options |
129 namespace cl = ::llvm::cl; | 141 namespace cl = ::llvm::cl; |
130 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | 142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( |
131 "mattr", cl::desc("X86 target attributes"), | 143 "mattr", cl::desc("X86 target attributes"), |
132 cl::init(TargetX8632::SSE2), | 144 cl::init(TargetX8632::SSE2), |
133 cl::values( | 145 cl::values( |
134 clEnumValN(TargetX8632::SSE2, "sse2", | 146 clEnumValN(TargetX8632::SSE2, "sse2", |
135 "Enable SSE2 instructions (default)"), | 147 "Enable SSE2 instructions (default)"), |
136 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | 148 clEnumValN(TargetX8632::SSE4_1, "sse4.1", |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
241 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 253 STATIC_ASSERT(_table1_##tag == _table2_##tag); |
242 ICETYPE_TABLE; | 254 ICETYPE_TABLE; |
243 #undef X | 255 #undef X |
244 } | 256 } |
245 } | 257 } |
246 | 258 |
247 } // end of anonymous namespace | 259 } // end of anonymous namespace |
248 | 260 |
249 TargetX8632::TargetX8632(Cfg *Func) | 261 TargetX8632::TargetX8632(Cfg *Func) |
250 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 262 : TargetLowering(Func), InstructionSet(CLInstructionSet), |
251 IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0), | 263 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
252 NextLabelNumber(0), ComputedLiveRanges(false), | 264 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), |
253 PhysicalRegisters(VarList(Reg_NUM)) { | 265 PhysicalRegisters(VarList(Reg_NUM)) { |
254 // TODO: Don't initialize IntegerRegisters and friends every time. | 266 // TODO: Don't initialize IntegerRegisters and friends every time. |
255 // Instead, initialize in some sort of static initializer for the | 267 // Instead, initialize in some sort of static initializer for the |
256 // class. | 268 // class. |
257 llvm::SmallBitVector IntegerRegisters(Reg_NUM); | 269 llvm::SmallBitVector IntegerRegisters(Reg_NUM); |
258 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); | 270 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); |
259 llvm::SmallBitVector FloatRegisters(Reg_NUM); | 271 llvm::SmallBitVector FloatRegisters(Reg_NUM); |
260 llvm::SmallBitVector VectorRegisters(Reg_NUM); | 272 llvm::SmallBitVector VectorRegisters(Reg_NUM); |
261 llvm::SmallBitVector InvalidRegisters(Reg_NUM); | 273 llvm::SmallBitVector InvalidRegisters(Reg_NUM); |
262 ScratchRegs.resize(Reg_NUM); | 274 ScratchRegs.resize(Reg_NUM); |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
536 Variable *Lo = Arg->getLo(); | 548 Variable *Lo = Arg->getLo(); |
537 Variable *Hi = Arg->getHi(); | 549 Variable *Hi = Arg->getHi(); |
538 Type Ty = Arg->getType(); | 550 Type Ty = Arg->getType(); |
539 if (Lo && Hi && Ty == IceType_i64) { | 551 if (Lo && Hi && Ty == IceType_i64) { |
540 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 552 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
541 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 553 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
542 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 554 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
543 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 555 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
544 return; | 556 return; |
545 } | 557 } |
558 if (isVectorType(Ty)) { | |
559 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); | |
560 } | |
546 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 561 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
547 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 562 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
548 if (Arg->hasReg()) { | 563 if (Arg->hasReg()) { |
549 assert(Ty != IceType_i64); | 564 assert(Ty != IceType_i64); |
550 OperandX8632Mem *Mem = OperandX8632Mem::create( | 565 OperandX8632Mem *Mem = OperandX8632Mem::create( |
551 Func, Ty, FramePtr, | 566 Func, Ty, FramePtr, |
552 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | 567 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); |
553 if (isVectorType(Arg->getType())) { | 568 if (isVectorType(Arg->getType())) { |
554 _movp(Arg, Mem); | 569 _movp(Arg, Mem); |
555 } else { | 570 } else { |
556 _mov(Arg, Mem); | 571 _mov(Arg, Mem); |
557 } | 572 } |
558 } | 573 } |
559 } | 574 } |
560 | 575 |
561 Type TargetX8632::stackSlotType() { return IceType_i32; } | 576 Type TargetX8632::stackSlotType() { return IceType_i32; } |
562 | 577 |
563 void TargetX8632::addProlog(CfgNode *Node) { | 578 void TargetX8632::addProlog(CfgNode *Node) { |
564 // If SimpleCoalescing is false, each variable without a register | 579 // If SimpleCoalescing is false, each variable without a register |
565 // gets its own unique stack slot, which leads to large stack | 580 // gets its own unique stack slot, which leads to large stack |
566 // frames. If SimpleCoalescing is true, then each "global" variable | 581 // frames. If SimpleCoalescing is true, then each "global" variable |
567 // without a register gets its own slot, but "local" variable slots | 582 // without a register gets its own slot, but "local" variable slots |
568 // are reused across basic blocks. E.g., if A and B are local to | 583 // are reused across basic blocks. E.g., if A and B are local to |
569 // block 1 and C is local to block 2, then C may share a slot with A | 584 // block 1 and C is local to block 2, then C may share a slot with A |
570 // or B. | 585 // or B. |
571 const bool SimpleCoalescing = true; | 586 const bool SimpleCoalescing = true; |
572 size_t InArgsSizeBytes = 0; | 587 size_t InArgsSizeBytes = 0; |
573 size_t RetIpSizeBytes = 4; | |
574 size_t PreservedRegsSizeBytes = 0; | 588 size_t PreservedRegsSizeBytes = 0; |
575 LocalsSizeBytes = 0; | 589 LocalsSizeBytes = 0; |
576 Context.init(Node); | 590 Context.init(Node); |
577 Context.setInsertPoint(Context.getCur()); | 591 Context.setInsertPoint(Context.getCur()); |
578 | 592 |
579 // Determine stack frame offsets for each Variable without a | 593 // Determine stack frame offsets for each Variable without a |
580 // register assignment. This can be done as one variable per stack | 594 // register assignment. This can be done as one variable per stack |
581 // slot. Or, do coalescing by running the register allocator again | 595 // slot. Or, do coalescing by running the register allocator again |
582 // with an infinite set of registers (as a side effect, this gives | 596 // with an infinite set of registers (as a side effect, this gives |
583 // variables a second chance at physical register assignment). | 597 // variables a second chance at physical register assignment). |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
650 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | 664 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) |
651 .count() == 0); | 665 .count() == 0); |
652 PreservedRegsSizeBytes += 4; | 666 PreservedRegsSizeBytes += 4; |
653 Variable *ebp = getPhysicalRegister(Reg_ebp); | 667 Variable *ebp = getPhysicalRegister(Reg_ebp); |
654 Variable *esp = getPhysicalRegister(Reg_esp); | 668 Variable *esp = getPhysicalRegister(Reg_esp); |
655 const bool SuppressStackAdjustment = true; | 669 const bool SuppressStackAdjustment = true; |
656 _push(ebp, SuppressStackAdjustment); | 670 _push(ebp, SuppressStackAdjustment); |
657 _mov(ebp, esp); | 671 _mov(ebp, esp); |
658 } | 672 } |
659 | 673 |
674 if (NeedsStackAlignment) { | |
675 uint32_t StackSize = applyStackAlignment( | |
676 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); | |
677 LocalsSizeBytes = | |
678 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes; | |
679 } | |
680 | |
660 // Generate "sub esp, LocalsSizeBytes" | 681 // Generate "sub esp, LocalsSizeBytes" |
661 if (LocalsSizeBytes) | 682 if (LocalsSizeBytes) |
662 _sub(getPhysicalRegister(Reg_esp), | 683 _sub(getPhysicalRegister(Reg_esp), |
663 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | 684 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); |
664 | 685 |
665 resetStackAdjustment(); | 686 resetStackAdjustment(); |
666 | 687 |
667 // Fill in stack offsets for stack args, and copy args into registers | 688 // Fill in stack offsets for stack args, and copy args into registers |
668 // for those that were register-allocated. Args are pushed right to | 689 // for those that were register-allocated. Args are pushed right to |
669 // left, so Arg[0] is closest to the stack/frame pointer. | 690 // left, so Arg[0] is closest to the stack/frame pointer. |
670 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 691 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
671 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; | 692 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; |
672 if (!IsEbpBasedFrame) | 693 if (!IsEbpBasedFrame) |
673 BasicFrameOffset += LocalsSizeBytes; | 694 BasicFrameOffset += LocalsSizeBytes; |
674 | 695 |
675 unsigned NumXmmArgs = 0; | 696 unsigned NumXmmArgs = 0; |
676 for (SizeT i = 0; i < Args.size(); ++i) { | 697 for (SizeT i = 0; i < Args.size(); ++i) { |
677 Variable *Arg = Args[i]; | 698 Variable *Arg = Args[i]; |
678 // Skip arguments passed in registers. | 699 // Skip arguments passed in registers. |
679 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 700 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { |
680 ++NumXmmArgs; | 701 ++NumXmmArgs; |
681 continue; | 702 continue; |
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
952 | 973 |
953 REGX8632_TABLE | 974 REGX8632_TABLE |
954 | 975 |
955 #undef X | 976 #undef X |
956 | 977 |
957 return Registers; | 978 return Registers; |
958 } | 979 } |
959 | 980 |
960 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 981 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { |
961 IsEbpBasedFrame = true; | 982 IsEbpBasedFrame = true; |
962 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize | 983 // Conservatively require the stack to be aligned. Some stack |
963 // the number of adjustments of esp, etc. | 984 // adjustment operations implemented below assume that the stack is |
985 // aligned before the alloca. All the alloca code ensures that the | |
986 // stack alignment is preserved after the alloca. The stack alignment | |
987 // restriction can be relaxed in some cases. | |
988 NeedsStackAlignment = true; | |
989 | |
990 // TODO(sehr,stichnot): align allocated memory, minimize the number of | |
991 // adjustments of esp, etc. | |
964 Variable *esp = getPhysicalRegister(Reg_esp); | 992 Variable *esp = getPhysicalRegister(Reg_esp); |
965 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 993 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
966 Variable *Dest = Inst->getDest(); | 994 Variable *Dest = Inst->getDest(); |
967 _sub(esp, TotalSize); | 995 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
996 | |
997 // LLVM enforces power of 2 alignment. | |
998 assert((AlignmentParam & (AlignmentParam - 1)) == 0); | |
999 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); | |
1000 bool AdjustAlignment = AlignmentParam > X86_STACK_ALIGNMENT_BYTES; | |
Jim Stichnoth
2014/08/06 18:26:16
It might be slightly clearer this way:
uint32_t A
wala
2014/08/07 18:09:21
Done.
| |
1001 uint32_t Alignment = | |
1002 AdjustAlignment ? AlignmentParam : X86_STACK_ALIGNMENT_BYTES; | |
1003 | |
1004 if (AdjustAlignment) { | |
1005 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); | |
1006 } | |
1007 if (ConstantInteger *ConstantTotalSize = | |
1008 llvm::dyn_cast<ConstantInteger>(TotalSize)) { | |
1009 uint32_t Value = ConstantTotalSize->getValue(); | |
1010 // Round Value up to the next highest multiple of the alignment. | |
1011 Value = (Value + Alignment - 1) & -Alignment; | |
Jim Stichnoth
2014/08/06 18:26:16
Should this instead do
Value = applyStackAlignme
wala
2014/08/07 18:09:21
No. Alignment is a power of 2 at least as big as t
| |
1012 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); | |
1013 } else { | |
1014 // Non-constant sizes need to be adjusted to the next highest | |
1015 // multiple of the required alignment at runtime. | |
1016 Variable *T = makeReg(IceType_i32); | |
1017 _mov(T, TotalSize); | |
1018 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); | |
1019 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); | |
1020 _sub(esp, T); | |
1021 } | |
968 _mov(Dest, esp); | 1022 _mov(Dest, esp); |
969 } | 1023 } |
970 | 1024 |
971 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1025 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
972 Variable *Dest = Inst->getDest(); | 1026 Variable *Dest = Inst->getDest(); |
973 Operand *Src0 = legalize(Inst->getSrc(0)); | 1027 Operand *Src0 = legalize(Inst->getSrc(0)); |
974 Operand *Src1 = legalize(Inst->getSrc(1)); | 1028 Operand *Src1 = legalize(Inst->getSrc(1)); |
975 if (Dest->getType() == IceType_i64) { | 1029 if (Dest->getType() == IceType_i64) { |
976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1030 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1031 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
(...skipping 607 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1585 _br(Inst->getTargetUnconditional()); | 1639 _br(Inst->getTargetUnconditional()); |
1586 } else { | 1640 } else { |
1587 Operand *Src0 = legalize(Inst->getCondition()); | 1641 Operand *Src0 = legalize(Inst->getCondition()); |
1588 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1642 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1589 _cmp(Src0, Zero); | 1643 _cmp(Src0, Zero); |
1590 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1644 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
1591 } | 1645 } |
1592 } | 1646 } |
1593 | 1647 |
1594 void TargetX8632::lowerCall(const InstCall *Instr) { | 1648 void TargetX8632::lowerCall(const InstCall *Instr) { |
1649 // x86-32 calling convention: | |
1650 // | |
1651 // * At the point before the call, the stack must be aligned to 16 | |
1652 // bytes. | |
1653 // | |
1654 // * The first four arguments of vector type, regardless of their | |
1655 // position relative to the other arguments in the argument list, are | |
1656 // placed in registers xmm0 - xmm3. | |
1657 // | |
1658 // * Other arguments are pushed onto the stack in right-to-left order, | |
1659 // such that the left-most argument ends up on the top of the stack at | |
1660 // the lowest memory address. | |
1661 // | |
1662 // * Stack arguments of vector type are aligned to start at the next | |
1663 // highest multiple of 16 bytes. Other stack arguments are aligned to | |
1664 // 4 bytes. | |
1665 // | |
1666 // This intends to match the section "IA-32 Function Calling | |
1667 // Convention" of the document "OS X ABI Function Call Guide" by | |
1668 // Apple. | |
1669 NeedsStackAlignment = true; | |
1670 | |
1671 OperandList XmmArgs; | |
1672 OperandList StackArgs, StackArgLocations; | |
1673 uint32_t ParameterAreaSizeBytes = 0; | |
1674 | |
1595 // Classify each argument operand according to the location where the | 1675 // Classify each argument operand according to the location where the |
1596 // argument is passed. | 1676 // argument is passed. |
1597 OperandList XmmArgs; | |
1598 OperandList StackArgs; | |
1599 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 1677 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
1600 Operand *Arg = Instr->getArg(i); | 1678 Operand *Arg = Instr->getArg(i); |
1601 if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | 1679 Type Ty = Arg->getType(); |
1680 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
1681 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || | |
1682 Ty == IceType_f64 || isVectorType(Ty)); | |
1683 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | |
1602 XmmArgs.push_back(Arg); | 1684 XmmArgs.push_back(Arg); |
1603 } else { | 1685 } else { |
1604 StackArgs.push_back(Arg); | 1686 StackArgs.push_back(Arg); |
1687 if (isVectorType(Arg->getType())) { | |
1688 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | |
1689 } | |
1690 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | |
1691 Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes); | |
1692 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); | |
1693 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
1605 } | 1694 } |
1606 } | 1695 } |
1607 // For stack arguments, generate a sequence of push instructions, | 1696 |
1608 // pushing right to left, keeping track of stack offsets in case a | 1697 // Adjust the parameter area so that the stack is aligned. It is |
1609 // push involves a stack operand and we are using an esp-based frame. | 1698 // assumed that the stack is already aligned at the start of the |
1610 uint32_t StackOffset = 0; | 1699 // calling sequence. |
1611 // TODO: Consolidate the stack adjustment for function calls by | 1700 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
1612 // reserving enough space for the arguments only once. | 1701 |
1702 // Subtract the appropriate amount for the argument area. This also | |
1703 // takes care of setting the stack adjustment during emission. | |
1613 // | 1704 // |
1614 // TODO: If for some reason the call instruction gets dead-code | 1705 // TODO: If for some reason the call instruction gets dead-code |
1615 // eliminated after lowering, we would need to ensure that the | 1706 // eliminated after lowering, we would need to ensure that the |
1616 // pre-call push instructions and the post-call esp adjustment get | 1707 // pre-call and the post-call esp adjustment get eliminated as well. |
1617 // eliminated as well. | 1708 if (ParameterAreaSizeBytes) { |
1618 for (OperandList::reverse_iterator I = StackArgs.rbegin(), | 1709 _adjust_stack(ParameterAreaSizeBytes); |
1619 E = StackArgs.rend(); I != E; ++I) { | |
1620 Operand *Arg = legalize(*I); | |
1621 if (Arg->getType() == IceType_i64) { | |
1622 _push(hiOperand(Arg)); | |
1623 _push(loOperand(Arg)); | |
1624 } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) { | |
1625 // If the Arg turns out to be a memory operand, more than one push | |
1626 // instruction is required. This ends up being somewhat clumsy in | |
1627 // the current IR, so we use a workaround. Force the operand into | |
1628 // a (xmm) register, and then push the register. An xmm register | |
1629 // push is actually not possible in x86, but the Push instruction | |
1630 // emitter handles this by decrementing the stack pointer and | |
1631 // directly writing the xmm register value. | |
1632 _push(legalize(Arg, Legal_Reg)); | |
1633 } else { | |
1634 // Otherwise PNaCl requires parameter types to be at least 32-bits. | |
1635 assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32); | |
1636 _push(Arg); | |
1637 } | |
1638 StackOffset += typeWidthInBytesOnStack(Arg->getType()); | |
1639 } | 1710 } |
1711 | |
1712 // Copy arguments that are passed on the stack to the appropriate | |
1713 // stack locations. | |
1714 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
1715 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
1716 // TODO: Consider calling postLower() here to reduce the register | |
1717 // pressure associated with using too many infinite weight | |
1718 // temporaries when lowering the call sequence in -Om1 mode. | |
1719 } | |
1720 | |
1640 // Copy arguments to be passed in registers to the appropriate | 1721 // Copy arguments to be passed in registers to the appropriate |
1641 // registers. | 1722 // registers. |
1642 // TODO: Investigate the impact of lowering arguments passed in | 1723 // TODO: Investigate the impact of lowering arguments passed in |
1643 // registers after lowering stack arguments as opposed to the other | 1724 // registers after lowering stack arguments as opposed to the other |
1644 // way around. Lowering register arguments after stack arguments may | 1725 // way around. Lowering register arguments after stack arguments may |
1645 // reduce register pressure. On the other hand, lowering register | 1726 // reduce register pressure. On the other hand, lowering register |
1646 // arguments first (before stack arguments) may result in more compact | 1727 // arguments first (before stack arguments) may result in more compact |
1647 // code, as the memory operand displacements may end up being smaller | 1728 // code, as the memory operand displacements may end up being smaller |
1648 // before any stack adjustment is done. | 1729 // before any stack adjustment is done. |
1649 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 1730 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1693 } | 1774 } |
1694 } | 1775 } |
1695 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once | 1776 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once |
1696 // a proper emitter is used. | 1777 // a proper emitter is used. |
1697 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); | 1778 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); |
1698 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 1779 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); |
1699 Context.insert(NewCall); | 1780 Context.insert(NewCall); |
1700 if (ReturnRegHi) | 1781 if (ReturnRegHi) |
1701 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1782 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
1702 | 1783 |
1703 // Add the appropriate offset to esp. | 1784 // Add the appropriate offset to esp. The call instruction takes care |
1704 if (StackOffset) { | 1785 // of resetting the stack offset during emission. |
1786 if (ParameterAreaSizeBytes) { | |
1705 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 1787 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); |
1706 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); | 1788 _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes)); |
1707 } | 1789 } |
1708 | 1790 |
1709 // Insert a register-kill pseudo instruction. | 1791 // Insert a register-kill pseudo instruction. |
1710 VarList KilledRegs; | 1792 VarList KilledRegs; |
1711 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { | 1793 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { |
1712 if (ScratchRegs[i]) | 1794 if (ScratchRegs[i]) |
1713 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); | 1795 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); |
1714 } | 1796 } |
1715 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); | 1797 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); |
1716 | 1798 |
(...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2175 bool CanUsePextr = | 2257 bool CanUsePextr = |
2176 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 2258 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; |
2177 if (CanUsePextr && Ty != IceType_v4f32) { | 2259 if (CanUsePextr && Ty != IceType_v4f32) { |
2178 // Use pextrb, pextrw, or pextrd. | 2260 // Use pextrb, pextrw, or pextrd. |
2179 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2261 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
2180 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2262 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); |
2181 _pextr(ExtractedElementR, SourceVectR, Mask); | 2263 _pextr(ExtractedElementR, SourceVectR, Mask); |
2182 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2264 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
2183 // Use pshufd and movd/movss. | 2265 // Use pshufd and movd/movss. |
2184 // | 2266 // |
2185 // ALIGNHACK: Force vector operands to registers in instructions that | 2267 // ALIGNHACK: Force vector operands to registers in instructions |
2186 // require aligned memory operands until support for stack alignment | 2268 // that require aligned memory operands until support for data |
2187 // is implemented. | 2269 // alignment is implemented. |
2188 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2270 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
2189 Operand *SourceVectRM = | 2271 Operand *SourceVectRM = |
2190 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 2272 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
2191 Variable *T = NULL; | 2273 Variable *T = NULL; |
2192 if (Index) { | 2274 if (Index) { |
2193 // The shuffle only needs to occur if the element to be extracted | 2275 // The shuffle only needs to occur if the element to be extracted |
2194 // is not at the lowest index. | 2276 // is not at the lowest index. |
2195 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2277 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
2196 T = makeReg(Ty); | 2278 T = makeReg(Ty); |
2197 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); | 2279 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2262 | 2344 |
2263 if (Condition == InstFcmp::True) { | 2345 if (Condition == InstFcmp::True) { |
2264 // makeVectorOfOnes() requires an integer vector type. | 2346 // makeVectorOfOnes() requires an integer vector type. |
2265 T = makeVectorOfMinusOnes(IceType_v4i32); | 2347 T = makeVectorOfMinusOnes(IceType_v4i32); |
2266 } else if (Condition == InstFcmp::False) { | 2348 } else if (Condition == InstFcmp::False) { |
2267 T = makeVectorOfZeros(Dest->getType()); | 2349 T = makeVectorOfZeros(Dest->getType()); |
2268 } else { | 2350 } else { |
2269 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2351 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
2270 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2352 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
2271 | 2353 |
2272 // ALIGNHACK: Without support for stack alignment, both operands to | 2354 // ALIGNHACK: Without support for data alignment, both operands to |
2273 // cmpps need to be forced into registers. Once support for stack | 2355 // cmpps need to be forced into registers. Once support for data |
2274 // alignment is implemented, remove LEGAL_HACK. | 2356 // alignment is implemented, remove LEGAL_HACK. |
2275 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 2357 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
2276 switch (Condition) { | 2358 switch (Condition) { |
2277 default: { | 2359 default: { |
2278 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; | 2360 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; |
2279 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); | 2361 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
2280 T = makeReg(Src0RM->getType()); | 2362 T = makeReg(Src0RM->getType()); |
2281 _movp(T, Src0RM); | 2363 _movp(T, Src0RM); |
2282 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); | 2364 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); |
2283 } break; | 2365 } break; |
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2403 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | 2485 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
2404 _movp(T0, Src0RM); | 2486 _movp(T0, Src0RM); |
2405 _pxor(T0, HighOrderBits); | 2487 _pxor(T0, HighOrderBits); |
2406 _movp(T1, Src1RM); | 2488 _movp(T1, Src1RM); |
2407 _pxor(T1, HighOrderBits); | 2489 _pxor(T1, HighOrderBits); |
2408 Src0RM = T0; | 2490 Src0RM = T0; |
2409 Src1RM = T1; | 2491 Src1RM = T1; |
2410 } | 2492 } |
2411 | 2493 |
2412 // TODO: ALIGNHACK: Both operands to compare instructions need to be | 2494 // TODO: ALIGNHACK: Both operands to compare instructions need to be |
2413 // in registers until stack alignment support is implemented. Once | 2495 // in registers until data alignment support is implemented. Once |
2414 // there is support for stack alignment, LEGAL_HACK can be removed. | 2496 // there is support for data alignment, LEGAL_HACK can be removed. |
2415 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 2497 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
2416 Variable *T = makeReg(Ty); | 2498 Variable *T = makeReg(Ty); |
2417 switch (Condition) { | 2499 switch (Condition) { |
2418 default: | 2500 default: |
2419 llvm_unreachable("unexpected condition"); | 2501 llvm_unreachable("unexpected condition"); |
2420 break; | 2502 break; |
2421 case InstIcmp::Eq: { | 2503 case InstIcmp::Eq: { |
2422 _movp(T, Src0RM); | 2504 _movp(T, Src0RM); |
2423 _pcmpeq(T, LEGAL_HACK(Src1RM)); | 2505 _pcmpeq(T, LEGAL_HACK(Src1RM)); |
2424 } break; | 2506 } break; |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2624 // insertelement into index 3 (result is stored in T): | 2706 // insertelement into index 3 (result is stored in T): |
2625 // T := SourceVectRM | 2707 // T := SourceVectRM |
2626 // ElementR := ElementR[0, 0] T[0, 2] | 2708 // ElementR := ElementR[0, 0] T[0, 2] |
2627 // T := T[0, 1] ElementR[3, 0] | 2709 // T := T[0, 1] ElementR[3, 0] |
2628 const unsigned char Mask1[3] = {0, 192, 128}; | 2710 const unsigned char Mask1[3] = {0, 192, 128}; |
2629 const unsigned char Mask2[3] = {227, 196, 52}; | 2711 const unsigned char Mask2[3] = {227, 196, 52}; |
2630 | 2712 |
2631 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); | 2713 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
2632 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); | 2714 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
2633 | 2715 |
2634 // ALIGNHACK: Force vector operands to registers in instructions that | 2716 // ALIGNHACK: Force vector operands to registers in instructions |
2635 // require aligned memory operands until support for stack alignment | 2717 // that require aligned memory operands until support for data |
2636 // is implemented. | 2718 // alignment is implemented. |
2637 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2719 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
2638 if (Index == 1) { | 2720 if (Index == 1) { |
2639 SourceVectRM = ALIGN_HACK(SourceVectRM); | 2721 SourceVectRM = ALIGN_HACK(SourceVectRM); |
2640 _shufps(ElementR, SourceVectRM, Mask1Constant); | 2722 _shufps(ElementR, SourceVectRM, Mask1Constant); |
2641 _shufps(ElementR, SourceVectRM, Mask2Constant); | 2723 _shufps(ElementR, SourceVectRM, Mask2Constant); |
2642 _movp(Inst->getDest(), ElementR); | 2724 _movp(Inst->getDest(), ElementR); |
2643 } else { | 2725 } else { |
2644 Variable *T = makeReg(Ty); | 2726 Variable *T = makeReg(Ty); |
2645 _movp(T, SourceVectRM); | 2727 _movp(T, SourceVectRM); |
2646 _shufps(ElementR, T, Mask1Constant); | 2728 _shufps(ElementR, T, Mask1Constant); |
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2914 case Intrinsics::Memmove: { | 2996 case Intrinsics::Memmove: { |
2915 InstCall *Call = makeHelperCall("memmove", NULL, 3); | 2997 InstCall *Call = makeHelperCall("memmove", NULL, 3); |
2916 Call->addArg(Instr->getArg(0)); | 2998 Call->addArg(Instr->getArg(0)); |
2917 Call->addArg(Instr->getArg(1)); | 2999 Call->addArg(Instr->getArg(1)); |
2918 Call->addArg(Instr->getArg(2)); | 3000 Call->addArg(Instr->getArg(2)); |
2919 lowerCall(Call); | 3001 lowerCall(Call); |
2920 return; | 3002 return; |
2921 } | 3003 } |
2922 case Intrinsics::Memset: { | 3004 case Intrinsics::Memset: { |
2923 // The value operand needs to be extended to a stack slot size | 3005 // The value operand needs to be extended to a stack slot size |
2924 // because "push" only works for a specific operand size. | 3006 // because the PNaCl ABI requires arguments to be at least 32 bits |
3007 // wide. | |
2925 Operand *ValOp = Instr->getArg(1); | 3008 Operand *ValOp = Instr->getArg(1); |
2926 assert(ValOp->getType() == IceType_i8); | 3009 assert(ValOp->getType() == IceType_i8); |
2927 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); | 3010 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); |
2928 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); | 3011 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); |
2929 InstCall *Call = makeHelperCall("memset", NULL, 3); | 3012 InstCall *Call = makeHelperCall("memset", NULL, 3); |
2930 Call->addArg(Instr->getArg(0)); | 3013 Call->addArg(Instr->getArg(0)); |
2931 Call->addArg(ValExt); | 3014 Call->addArg(ValExt); |
2932 Call->addArg(Instr->getArg(2)); | 3015 Call->addArg(Instr->getArg(2)); |
2933 lowerCall(Call); | 3016 lowerCall(Call); |
2934 return; | 3017 return; |
(...skipping 648 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3583 Variable *Dest = Inst->getDest(); | 3666 Variable *Dest = Inst->getDest(); |
3584 Operand *SrcT = Inst->getTrueOperand(); | 3667 Operand *SrcT = Inst->getTrueOperand(); |
3585 Operand *SrcF = Inst->getFalseOperand(); | 3668 Operand *SrcF = Inst->getFalseOperand(); |
3586 Operand *Condition = Inst->getCondition(); | 3669 Operand *Condition = Inst->getCondition(); |
3587 | 3670 |
3588 if (isVectorType(Dest->getType())) { | 3671 if (isVectorType(Dest->getType())) { |
3589 Type SrcTy = SrcT->getType(); | 3672 Type SrcTy = SrcT->getType(); |
3590 Variable *T = makeReg(SrcTy); | 3673 Variable *T = makeReg(SrcTy); |
3591 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 3674 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
3592 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 3675 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
3593 // ALIGNHACK: Until stack alignment support is implemented, vector | 3676 // ALIGNHACK: Until data alignment support is implemented, vector |
3594 // instructions need to have vector operands in registers. Once | 3677 // instructions need to have vector operands in registers. Once |
3595 // there is support for stack alignment, LEGAL_HACK can be removed. | 3678 // there is support for data alignment, LEGAL_HACK can be removed. |
3596 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 3679 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
3597 if (InstructionSet >= SSE4_1) { | 3680 if (InstructionSet >= SSE4_1) { |
3598 // TODO(wala): If the condition operand is a constant, use blendps | 3681 // TODO(wala): If the condition operand is a constant, use blendps |
3599 // or pblendw. | 3682 // or pblendw. |
3600 // | 3683 // |
3601 // Use blendvps or pblendvb to implement select. | 3684 // Use blendvps or pblendvb to implement select. |
3602 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 3685 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
3603 SrcTy == IceType_v4f32) { | 3686 SrcTy == IceType_v4f32) { |
3604 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3687 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
3605 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); | 3688 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3680 _mov(Dest, SrcF); | 3763 _mov(Dest, SrcF); |
3681 } | 3764 } |
3682 | 3765 |
3683 Context.insert(Label); | 3766 Context.insert(Label); |
3684 } | 3767 } |
3685 | 3768 |
3686 void TargetX8632::lowerStore(const InstStore *Inst) { | 3769 void TargetX8632::lowerStore(const InstStore *Inst) { |
3687 Operand *Value = Inst->getData(); | 3770 Operand *Value = Inst->getData(); |
3688 Operand *Addr = Inst->getAddr(); | 3771 Operand *Addr = Inst->getAddr(); |
3689 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); | 3772 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); |
3773 Type Ty = NewAddr->getType(); | |
3690 | 3774 |
3691 if (NewAddr->getType() == IceType_i64) { | 3775 if (Ty == IceType_i64) { |
3692 Value = legalize(Value); | 3776 Value = legalize(Value); |
3693 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | 3777 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); |
3694 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | 3778 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); |
3695 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 3779 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
3696 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 3780 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
3781 } else if (isVectorType(Ty)) { | |
3782 _storep(legalizeToVar(Value), NewAddr); | |
3697 } else { | 3783 } else { |
3698 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | 3784 Value = legalize(Value, Legal_Reg | Legal_Imm, true); |
3699 _store(Value, NewAddr); | 3785 _store(Value, NewAddr); |
3700 } | 3786 } |
3701 } | 3787 } |
3702 | 3788 |
3703 void TargetX8632::doAddressOptStore() { | 3789 void TargetX8632::doAddressOptStore() { |
3704 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); | 3790 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); |
3705 Operand *Data = Inst->getData(); | 3791 Operand *Data = Inst->getData(); |
3706 Operand *Addr = Inst->getAddr(); | 3792 Operand *Addr = Inst->getAddr(); |
(...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4029 SizeT NumVars = Src->getNumVars(); | 4115 SizeT NumVars = Src->getNumVars(); |
4030 for (SizeT J = 0; J < NumVars; ++J) { | 4116 for (SizeT J = 0; J < NumVars; ++J) { |
4031 Variable *Var = Src->getVar(J); | 4117 Variable *Var = Src->getVar(J); |
4032 if (Var->hasReg()) | 4118 if (Var->hasReg()) |
4033 continue; | 4119 continue; |
4034 if (!Var->getWeight().isInf()) | 4120 if (!Var->getWeight().isInf()) |
4035 continue; | 4121 continue; |
4036 llvm::SmallBitVector AvailableTypedRegisters = | 4122 llvm::SmallBitVector AvailableTypedRegisters = |
4037 AvailableRegisters & getRegisterSetForType(Var->getType()); | 4123 AvailableRegisters & getRegisterSetForType(Var->getType()); |
4038 if (!AvailableTypedRegisters.any()) { | 4124 if (!AvailableTypedRegisters.any()) { |
4039 // This is a hack in case we run out of physical registers | 4125 // This is a hack in case we run out of physical registers due |
4040 // due to an excessive number of "push" instructions from | 4126 // to an excessively long code sequence, as might happen when |
4041 // lowering a call. | 4127 // lowering arguments in lowerCall(). |
4042 AvailableRegisters = WhiteList; | 4128 AvailableRegisters = WhiteList; |
4043 AvailableTypedRegisters = | 4129 AvailableTypedRegisters = |
4044 AvailableRegisters & getRegisterSetForType(Var->getType()); | 4130 AvailableRegisters & getRegisterSetForType(Var->getType()); |
4045 } | 4131 } |
4046 assert(AvailableTypedRegisters.any()); | 4132 assert(AvailableTypedRegisters.any()); |
4047 int32_t RegNum = AvailableTypedRegisters.find_first(); | 4133 int32_t RegNum = AvailableTypedRegisters.find_first(); |
4048 Var->setRegNum(RegNum); | 4134 Var->setRegNum(RegNum); |
4049 AvailableRegisters[RegNum] = false; | 4135 AvailableRegisters[RegNum] = false; |
4050 } | 4136 } |
4051 } | 4137 } |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
4163 for (SizeT i = 0; i < Size; ++i) { | 4249 for (SizeT i = 0; i < Size; ++i) { |
4164 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4250 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
4165 } | 4251 } |
4166 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4252 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4167 } | 4253 } |
4168 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4254 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
4169 << "\n"; | 4255 << "\n"; |
4170 } | 4256 } |
4171 | 4257 |
4172 } // end of namespace Ice | 4258 } // end of namespace Ice |
OLD | NEW |