| OLD | NEW |
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 // | 9 // |
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
| (...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 114 // representation of the vector. | 114 // representation of the vector. |
| 115 Type getInVectorElementType(Type Ty) { | 115 Type getInVectorElementType(Type Ty) { |
| 116 assert(isVectorType(Ty)); | 116 assert(isVectorType(Ty)); |
| 117 size_t Index = static_cast<size_t>(Ty); | 117 size_t Index = static_cast<size_t>(Ty); |
| 118 (void)Index; | 118 (void)Index; |
| 119 assert(Index < TableTypeX8632AttributesSize); | 119 assert(Index < TableTypeX8632AttributesSize); |
| 120 return TableTypeX8632Attributes[Ty].InVectorElementType; | 120 return TableTypeX8632Attributes[Ty].InVectorElementType; |
| 121 } | 121 } |
| 122 | 122 |
| 123 // The maximum number of arguments to pass in XMM registers | 123 // The maximum number of arguments to pass in XMM registers |
| 124 const unsigned X86_MAX_XMM_ARGS = 4; | 124 const uint32_t X86_MAX_XMM_ARGS = 4; |
| 125 // The number of bits in a byte | 125 // The number of bits in a byte |
| 126 const unsigned X86_CHAR_BIT = 8; | 126 const uint32_t X86_CHAR_BIT = 8; |
| 127 // Stack alignment |
| 128 const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; |
| 129 // Size of the return address on the stack |
| 130 const uint32_t X86_RET_IP_SIZE_BYTES = 4; |
| 131 |
| 132 // Value is a size in bytes. Return Value adjusted to the next highest |
| 133 // multiple of the stack alignment. |
| 134 uint32_t applyStackAlignment(uint32_t Value) { |
| 135 // power of 2 |
| 136 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); |
| 137 return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES; |
| 138 } |
| 127 | 139 |
| 128 // Instruction set options | 140 // Instruction set options |
| 129 namespace cl = ::llvm::cl; | 141 namespace cl = ::llvm::cl; |
| 130 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | 142 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( |
| 131 "mattr", cl::desc("X86 target attributes"), | 143 "mattr", cl::desc("X86 target attributes"), |
| 132 cl::init(TargetX8632::SSE2), | 144 cl::init(TargetX8632::SSE2), |
| 133 cl::values( | 145 cl::values( |
| 134 clEnumValN(TargetX8632::SSE2, "sse2", | 146 clEnumValN(TargetX8632::SSE2, "sse2", |
| 135 "Enable SSE2 instructions (default)"), | 147 "Enable SSE2 instructions (default)"), |
| 136 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | 148 clEnumValN(TargetX8632::SSE4_1, "sse4.1", |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 241 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 253 STATIC_ASSERT(_table1_##tag == _table2_##tag); |
| 242 ICETYPE_TABLE; | 254 ICETYPE_TABLE; |
| 243 #undef X | 255 #undef X |
| 244 } | 256 } |
| 245 } | 257 } |
| 246 | 258 |
| 247 } // end of anonymous namespace | 259 } // end of anonymous namespace |
| 248 | 260 |
| 249 TargetX8632::TargetX8632(Cfg *Func) | 261 TargetX8632::TargetX8632(Cfg *Func) |
| 250 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 262 : TargetLowering(Func), InstructionSet(CLInstructionSet), |
| 251 IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0), | 263 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
| 252 NextLabelNumber(0), ComputedLiveRanges(false), | 264 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), |
| 253 PhysicalRegisters(VarList(Reg_NUM)) { | 265 PhysicalRegisters(VarList(Reg_NUM)) { |
| 254 // TODO: Don't initialize IntegerRegisters and friends every time. | 266 // TODO: Don't initialize IntegerRegisters and friends every time. |
| 255 // Instead, initialize in some sort of static initializer for the | 267 // Instead, initialize in some sort of static initializer for the |
| 256 // class. | 268 // class. |
| 257 llvm::SmallBitVector IntegerRegisters(Reg_NUM); | 269 llvm::SmallBitVector IntegerRegisters(Reg_NUM); |
| 258 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); | 270 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); |
| 259 llvm::SmallBitVector FloatRegisters(Reg_NUM); | 271 llvm::SmallBitVector FloatRegisters(Reg_NUM); |
| 260 llvm::SmallBitVector VectorRegisters(Reg_NUM); | 272 llvm::SmallBitVector VectorRegisters(Reg_NUM); |
| 261 llvm::SmallBitVector InvalidRegisters(Reg_NUM); | 273 llvm::SmallBitVector InvalidRegisters(Reg_NUM); |
| 262 ScratchRegs.resize(Reg_NUM); | 274 ScratchRegs.resize(Reg_NUM); |
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 536 Variable *Lo = Arg->getLo(); | 548 Variable *Lo = Arg->getLo(); |
| 537 Variable *Hi = Arg->getHi(); | 549 Variable *Hi = Arg->getHi(); |
| 538 Type Ty = Arg->getType(); | 550 Type Ty = Arg->getType(); |
| 539 if (Lo && Hi && Ty == IceType_i64) { | 551 if (Lo && Hi && Ty == IceType_i64) { |
| 540 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 552 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| 541 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 553 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
| 542 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 554 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 543 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 555 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 544 return; | 556 return; |
| 545 } | 557 } |
| 558 if (isVectorType(Ty)) { |
| 559 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); |
| 560 } |
| 546 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 561 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 547 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 562 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 548 if (Arg->hasReg()) { | 563 if (Arg->hasReg()) { |
| 549 assert(Ty != IceType_i64); | 564 assert(Ty != IceType_i64); |
| 550 OperandX8632Mem *Mem = OperandX8632Mem::create( | 565 OperandX8632Mem *Mem = OperandX8632Mem::create( |
| 551 Func, Ty, FramePtr, | 566 Func, Ty, FramePtr, |
| 552 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | 567 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); |
| 553 if (isVectorType(Arg->getType())) { | 568 if (isVectorType(Arg->getType())) { |
| 554 _movp(Arg, Mem); | 569 _movp(Arg, Mem); |
| 555 } else { | 570 } else { |
| 556 _mov(Arg, Mem); | 571 _mov(Arg, Mem); |
| 557 } | 572 } |
| 558 } | 573 } |
| 559 } | 574 } |
| 560 | 575 |
| 561 Type TargetX8632::stackSlotType() { return IceType_i32; } | 576 Type TargetX8632::stackSlotType() { return IceType_i32; } |
| 562 | 577 |
| 563 void TargetX8632::addProlog(CfgNode *Node) { | 578 void TargetX8632::addProlog(CfgNode *Node) { |
| 564 // If SimpleCoalescing is false, each variable without a register | 579 // If SimpleCoalescing is false, each variable without a register |
| 565 // gets its own unique stack slot, which leads to large stack | 580 // gets its own unique stack slot, which leads to large stack |
| 566 // frames. If SimpleCoalescing is true, then each "global" variable | 581 // frames. If SimpleCoalescing is true, then each "global" variable |
| 567 // without a register gets its own slot, but "local" variable slots | 582 // without a register gets its own slot, but "local" variable slots |
| 568 // are reused across basic blocks. E.g., if A and B are local to | 583 // are reused across basic blocks. E.g., if A and B are local to |
| 569 // block 1 and C is local to block 2, then C may share a slot with A | 584 // block 1 and C is local to block 2, then C may share a slot with A |
| 570 // or B. | 585 // or B. |
| 571 const bool SimpleCoalescing = true; | 586 const bool SimpleCoalescing = true; |
| 572 size_t InArgsSizeBytes = 0; | 587 size_t InArgsSizeBytes = 0; |
| 573 size_t RetIpSizeBytes = 4; | |
| 574 size_t PreservedRegsSizeBytes = 0; | 588 size_t PreservedRegsSizeBytes = 0; |
| 575 LocalsSizeBytes = 0; | 589 LocalsSizeBytes = 0; |
| 576 Context.init(Node); | 590 Context.init(Node); |
| 577 Context.setInsertPoint(Context.getCur()); | 591 Context.setInsertPoint(Context.getCur()); |
| 578 | 592 |
| 579 // Determine stack frame offsets for each Variable without a | 593 // Determine stack frame offsets for each Variable without a |
| 580 // register assignment. This can be done as one variable per stack | 594 // register assignment. This can be done as one variable per stack |
| 581 // slot. Or, do coalescing by running the register allocator again | 595 // slot. Or, do coalescing by running the register allocator again |
| 582 // with an infinite set of registers (as a side effect, this gives | 596 // with an infinite set of registers (as a side effect, this gives |
| 583 // variables a second chance at physical register assignment). | 597 // variables a second chance at physical register assignment). |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 650 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | 664 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) |
| 651 .count() == 0); | 665 .count() == 0); |
| 652 PreservedRegsSizeBytes += 4; | 666 PreservedRegsSizeBytes += 4; |
| 653 Variable *ebp = getPhysicalRegister(Reg_ebp); | 667 Variable *ebp = getPhysicalRegister(Reg_ebp); |
| 654 Variable *esp = getPhysicalRegister(Reg_esp); | 668 Variable *esp = getPhysicalRegister(Reg_esp); |
| 655 const bool SuppressStackAdjustment = true; | 669 const bool SuppressStackAdjustment = true; |
| 656 _push(ebp, SuppressStackAdjustment); | 670 _push(ebp, SuppressStackAdjustment); |
| 657 _mov(ebp, esp); | 671 _mov(ebp, esp); |
| 658 } | 672 } |
| 659 | 673 |
| 674 if (NeedsStackAlignment) { |
| 675 uint32_t StackSize = applyStackAlignment( |
| 676 X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); |
| 677 LocalsSizeBytes = |
| 678 StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes; |
| 679 } |
| 680 |
| 660 // Generate "sub esp, LocalsSizeBytes" | 681 // Generate "sub esp, LocalsSizeBytes" |
| 661 if (LocalsSizeBytes) | 682 if (LocalsSizeBytes) |
| 662 _sub(getPhysicalRegister(Reg_esp), | 683 _sub(getPhysicalRegister(Reg_esp), |
| 663 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | 684 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); |
| 664 | 685 |
| 665 resetStackAdjustment(); | 686 resetStackAdjustment(); |
| 666 | 687 |
| 667 // Fill in stack offsets for stack args, and copy args into registers | 688 // Fill in stack offsets for stack args, and copy args into registers |
| 668 // for those that were register-allocated. Args are pushed right to | 689 // for those that were register-allocated. Args are pushed right to |
| 669 // left, so Arg[0] is closest to the stack/frame pointer. | 690 // left, so Arg[0] is closest to the stack/frame pointer. |
| 670 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 691 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 671 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; | 692 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; |
| 672 if (!IsEbpBasedFrame) | 693 if (!IsEbpBasedFrame) |
| 673 BasicFrameOffset += LocalsSizeBytes; | 694 BasicFrameOffset += LocalsSizeBytes; |
| 674 | 695 |
| 675 unsigned NumXmmArgs = 0; | 696 unsigned NumXmmArgs = 0; |
| 676 for (SizeT i = 0; i < Args.size(); ++i) { | 697 for (SizeT i = 0; i < Args.size(); ++i) { |
| 677 Variable *Arg = Args[i]; | 698 Variable *Arg = Args[i]; |
| 678 // Skip arguments passed in registers. | 699 // Skip arguments passed in registers. |
| 679 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 700 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { |
| 680 ++NumXmmArgs; | 701 ++NumXmmArgs; |
| 681 continue; | 702 continue; |
| (...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 952 | 973 |
| 953 REGX8632_TABLE | 974 REGX8632_TABLE |
| 954 | 975 |
| 955 #undef X | 976 #undef X |
| 956 | 977 |
| 957 return Registers; | 978 return Registers; |
| 958 } | 979 } |
| 959 | 980 |
| 960 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 981 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { |
| 961 IsEbpBasedFrame = true; | 982 IsEbpBasedFrame = true; |
| 962 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize | 983 // Conservatively require the stack to be aligned. Some stack |
| 963 // the number of adjustments of esp, etc. | 984 // adjustment operations implemented below assume that the stack is |
| 985 // aligned before the alloca. All the alloca code ensures that the |
| 986 // stack alignment is preserved after the alloca. The stack alignment |
| 987 // restriction can be relaxed in some cases. |
| 988 NeedsStackAlignment = true; |
| 989 |
| 990 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc. |
| 964 Variable *esp = getPhysicalRegister(Reg_esp); | 991 Variable *esp = getPhysicalRegister(Reg_esp); |
| 965 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 992 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
| 966 Variable *Dest = Inst->getDest(); | 993 Variable *Dest = Inst->getDest(); |
| 967 _sub(esp, TotalSize); | 994 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
| 995 |
| 996 // LLVM enforces power of 2 alignment. |
| 997 assert((AlignmentParam & (AlignmentParam - 1)) == 0); |
| 998 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); |
| 999 |
| 1000 uint32_t Alignment = std::max(AlignmentParam, X86_STACK_ALIGNMENT_BYTES); |
| 1001 if (Alignment > X86_STACK_ALIGNMENT_BYTES) { |
| 1002 _and(esp, Ctx->getConstantInt(IceType_i32, -Alignment)); |
| 1003 } |
| 1004 if (ConstantInteger *ConstantTotalSize = |
| 1005 llvm::dyn_cast<ConstantInteger>(TotalSize)) { |
| 1006 uint32_t Value = ConstantTotalSize->getValue(); |
| 1007 // Round Value up to the next highest multiple of the alignment. |
| 1008 Value = (Value + Alignment - 1) & -Alignment; |
| 1009 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); |
| 1010 } else { |
| 1011 // Non-constant sizes need to be adjusted to the next highest |
| 1012 // multiple of the required alignment at runtime. |
| 1013 Variable *T = makeReg(IceType_i32); |
| 1014 _mov(T, TotalSize); |
| 1015 _add(T, Ctx->getConstantInt(IceType_i32, Alignment - 1)); |
| 1016 _and(T, Ctx->getConstantInt(IceType_i32, -Alignment)); |
| 1017 _sub(esp, T); |
| 1018 } |
| 968 _mov(Dest, esp); | 1019 _mov(Dest, esp); |
| 969 } | 1020 } |
| 970 | 1021 |
| 971 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1022 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| 972 Variable *Dest = Inst->getDest(); | 1023 Variable *Dest = Inst->getDest(); |
| 973 Operand *Src0 = legalize(Inst->getSrc(0)); | 1024 Operand *Src0 = legalize(Inst->getSrc(0)); |
| 974 Operand *Src1 = legalize(Inst->getSrc(1)); | 1025 Operand *Src1 = legalize(Inst->getSrc(1)); |
| 975 if (Dest->getType() == IceType_i64) { | 1026 if (Dest->getType() == IceType_i64) { |
| 976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1027 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
| 977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1028 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| (...skipping 559 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1537 _br(Inst->getTargetUnconditional()); | 1588 _br(Inst->getTargetUnconditional()); |
| 1538 } else { | 1589 } else { |
| 1539 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem); | 1590 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem); |
| 1540 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1591 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 1541 _cmp(Src0, Zero); | 1592 _cmp(Src0, Zero); |
| 1542 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1593 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
| 1543 } | 1594 } |
| 1544 } | 1595 } |
| 1545 | 1596 |
| 1546 void TargetX8632::lowerCall(const InstCall *Instr) { | 1597 void TargetX8632::lowerCall(const InstCall *Instr) { |
| 1598 // x86-32 calling convention: |
| 1599 // |
| 1600 // * At the point before the call, the stack must be aligned to 16 |
| 1601 // bytes. |
| 1602 // |
| 1603 // * The first four arguments of vector type, regardless of their |
| 1604 // position relative to the other arguments in the argument list, are |
| 1605 // placed in registers xmm0 - xmm3. |
| 1606 // |
| 1607 // * Other arguments are pushed onto the stack in right-to-left order, |
| 1608 // such that the left-most argument ends up on the top of the stack at |
| 1609 // the lowest memory address. |
| 1610 // |
| 1611 // * Stack arguments of vector type are aligned to start at the next |
| 1612 // highest multiple of 16 bytes. Other stack arguments are aligned to |
| 1613 // 4 bytes. |
| 1614 // |
| 1615 // This intends to match the section "IA-32 Function Calling |
| 1616 // Convention" of the document "OS X ABI Function Call Guide" by |
| 1617 // Apple. |
| 1618 NeedsStackAlignment = true; |
| 1619 |
| 1620 OperandList XmmArgs; |
| 1621 OperandList StackArgs, StackArgLocations; |
| 1622 uint32_t ParameterAreaSizeBytes = 0; |
| 1623 |
| 1547 // Classify each argument operand according to the location where the | 1624 // Classify each argument operand according to the location where the |
| 1548 // argument is passed. | 1625 // argument is passed. |
| 1549 OperandList XmmArgs; | |
| 1550 OperandList StackArgs; | |
| 1551 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 1626 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { |
| 1552 Operand *Arg = Instr->getArg(i); | 1627 Operand *Arg = Instr->getArg(i); |
| 1553 if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | 1628 Type Ty = Arg->getType(); |
| 1629 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
| 1630 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || |
| 1631 Ty == IceType_f64 || isVectorType(Ty)); |
| 1632 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { |
| 1554 XmmArgs.push_back(Arg); | 1633 XmmArgs.push_back(Arg); |
| 1555 } else { | 1634 } else { |
| 1556 StackArgs.push_back(Arg); | 1635 StackArgs.push_back(Arg); |
| 1636 if (isVectorType(Arg->getType())) { |
| 1637 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
| 1638 } |
| 1639 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); |
| 1640 Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes); |
| 1641 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); |
| 1642 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
| 1557 } | 1643 } |
| 1558 } | 1644 } |
| 1559 // For stack arguments, generate a sequence of push instructions, | 1645 |
| 1560 // pushing right to left, keeping track of stack offsets in case a | 1646 // Adjust the parameter area so that the stack is aligned. It is |
| 1561 // push involves a stack operand and we are using an esp-based frame. | 1647 // assumed that the stack is already aligned at the start of the |
| 1562 uint32_t StackOffset = 0; | 1648 // calling sequence. |
| 1563 // TODO: Consolidate the stack adjustment for function calls by | 1649 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
| 1564 // reserving enough space for the arguments only once. | 1650 |
| 1651 // Subtract the appropriate amount for the argument area. This also |
| 1652 // takes care of setting the stack adjustment during emission. |
| 1565 // | 1653 // |
| 1566 // TODO: If for some reason the call instruction gets dead-code | 1654 // TODO: If for some reason the call instruction gets dead-code |
| 1567 // eliminated after lowering, we would need to ensure that the | 1655 // eliminated after lowering, we would need to ensure that the |
| 1568 // pre-call push instructions and the post-call esp adjustment get | 1656 // pre-call and the post-call esp adjustment get eliminated as well. |
| 1569 // eliminated as well. | 1657 if (ParameterAreaSizeBytes) { |
| 1570 for (OperandList::reverse_iterator I = StackArgs.rbegin(), | 1658 _adjust_stack(ParameterAreaSizeBytes); |
| 1571 E = StackArgs.rend(); I != E; ++I) { | |
| 1572 Operand *Arg = legalize(*I); | |
| 1573 if (Arg->getType() == IceType_i64) { | |
| 1574 _push(hiOperand(Arg)); | |
| 1575 _push(loOperand(Arg)); | |
| 1576 } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) { | |
| 1577 // If the Arg turns out to be a memory operand, more than one push | |
| 1578 // instruction is required. This ends up being somewhat clumsy in | |
| 1579 // the current IR, so we use a workaround. Force the operand into | |
| 1580 // a (xmm) register, and then push the register. An xmm register | |
| 1581 // push is actually not possible in x86, but the Push instruction | |
| 1582 // emitter handles this by decrementing the stack pointer and | |
| 1583 // directly writing the xmm register value. | |
| 1584 _push(legalize(Arg, Legal_Reg)); | |
| 1585 } else { | |
| 1586 // Otherwise PNaCl requires parameter types to be at least 32-bits. | |
| 1587 assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32); | |
| 1588 _push(Arg); | |
| 1589 } | |
| 1590 StackOffset += typeWidthInBytesOnStack(Arg->getType()); | |
| 1591 } | 1659 } |
| 1660 |
| 1661 // Copy arguments that are passed on the stack to the appropriate |
| 1662 // stack locations. |
| 1663 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
| 1664 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
| 1665 // TODO: Consider calling postLower() here to reduce the register |
| 1666 // pressure associated with using too many infinite weight |
| 1667 // temporaries when lowering the call sequence in -Om1 mode. |
| 1668 } |
| 1669 |
| 1592 // Copy arguments to be passed in registers to the appropriate | 1670 // Copy arguments to be passed in registers to the appropriate |
| 1593 // registers. | 1671 // registers. |
| 1594 // TODO: Investigate the impact of lowering arguments passed in | 1672 // TODO: Investigate the impact of lowering arguments passed in |
| 1595 // registers after lowering stack arguments as opposed to the other | 1673 // registers after lowering stack arguments as opposed to the other |
| 1596 // way around. Lowering register arguments after stack arguments may | 1674 // way around. Lowering register arguments after stack arguments may |
| 1597 // reduce register pressure. On the other hand, lowering register | 1675 // reduce register pressure. On the other hand, lowering register |
| 1598 // arguments first (before stack arguments) may result in more compact | 1676 // arguments first (before stack arguments) may result in more compact |
| 1599 // code, as the memory operand displacements may end up being smaller | 1677 // code, as the memory operand displacements may end up being smaller |
| 1600 // before any stack adjustment is done. | 1678 // before any stack adjustment is done. |
| 1601 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 1679 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1645 } | 1723 } |
| 1646 } | 1724 } |
| 1647 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once | 1725 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once |
| 1648 // a proper emitter is used. | 1726 // a proper emitter is used. |
| 1649 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); | 1727 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); |
| 1650 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 1728 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); |
| 1651 Context.insert(NewCall); | 1729 Context.insert(NewCall); |
| 1652 if (ReturnRegHi) | 1730 if (ReturnRegHi) |
| 1653 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1731 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
| 1654 | 1732 |
| 1655 // Add the appropriate offset to esp. | 1733 // Add the appropriate offset to esp. The call instruction takes care |
| 1656 if (StackOffset) { | 1734 // of resetting the stack offset during emission. |
| 1735 if (ParameterAreaSizeBytes) { |
| 1657 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 1736 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); |
| 1658 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); | 1737 _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes)); |
| 1659 } | 1738 } |
| 1660 | 1739 |
| 1661 // Insert a register-kill pseudo instruction. | 1740 // Insert a register-kill pseudo instruction. |
| 1662 VarList KilledRegs; | 1741 VarList KilledRegs; |
| 1663 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { | 1742 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { |
| 1664 if (ScratchRegs[i]) | 1743 if (ScratchRegs[i]) |
| 1665 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); | 1744 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); |
| 1666 } | 1745 } |
| 1667 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); | 1746 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); |
| 1668 | 1747 |
| (...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2127 bool CanUsePextr = | 2206 bool CanUsePextr = |
| 2128 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 2207 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; |
| 2129 if (CanUsePextr && Ty != IceType_v4f32) { | 2208 if (CanUsePextr && Ty != IceType_v4f32) { |
| 2130 // Use pextrb, pextrw, or pextrd. | 2209 // Use pextrb, pextrw, or pextrd. |
| 2131 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2210 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
| 2132 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2211 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); |
| 2133 _pextr(ExtractedElementR, SourceVectR, Mask); | 2212 _pextr(ExtractedElementR, SourceVectR, Mask); |
| 2134 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2213 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| 2135 // Use pshufd and movd/movss. | 2214 // Use pshufd and movd/movss. |
| 2136 // | 2215 // |
| 2137 // ALIGNHACK: Force vector operands to registers in instructions that | 2216 // ALIGNHACK: Force vector operands to registers in instructions |
| 2138 // require aligned memory operands until support for stack alignment | 2217 // that require aligned memory operands until support for data |
| 2139 // is implemented. | 2218 // alignment is implemented. |
| 2140 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2219 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
| 2141 Operand *SourceVectRM = | 2220 Operand *SourceVectRM = |
| 2142 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 2221 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| 2143 Variable *T = NULL; | 2222 Variable *T = NULL; |
| 2144 if (Index) { | 2223 if (Index) { |
| 2145 // The shuffle only needs to occur if the element to be extracted | 2224 // The shuffle only needs to occur if the element to be extracted |
| 2146 // is not at the lowest index. | 2225 // is not at the lowest index. |
| 2147 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2226 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
| 2148 T = makeReg(Ty); | 2227 T = makeReg(Ty); |
| 2149 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); | 2228 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2214 | 2293 |
| 2215 if (Condition == InstFcmp::True) { | 2294 if (Condition == InstFcmp::True) { |
| 2216 // makeVectorOfOnes() requires an integer vector type. | 2295 // makeVectorOfOnes() requires an integer vector type. |
| 2217 T = makeVectorOfMinusOnes(IceType_v4i32); | 2296 T = makeVectorOfMinusOnes(IceType_v4i32); |
| 2218 } else if (Condition == InstFcmp::False) { | 2297 } else if (Condition == InstFcmp::False) { |
| 2219 T = makeVectorOfZeros(Dest->getType()); | 2298 T = makeVectorOfZeros(Dest->getType()); |
| 2220 } else { | 2299 } else { |
| 2221 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2300 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| 2222 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2301 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| 2223 | 2302 |
| 2224 // ALIGNHACK: Without support for stack alignment, both operands to | 2303 // ALIGNHACK: Without support for data alignment, both operands to |
| 2225 // cmpps need to be forced into registers. Once support for stack | 2304 // cmpps need to be forced into registers. Once support for data |
| 2226 // alignment is implemented, remove LEGAL_HACK. | 2305 // alignment is implemented, remove LEGAL_HACK. |
| 2227 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 2306 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| 2228 switch (Condition) { | 2307 switch (Condition) { |
| 2229 default: { | 2308 default: { |
| 2230 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; | 2309 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; |
| 2231 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); | 2310 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
| 2232 T = makeReg(Src0RM->getType()); | 2311 T = makeReg(Src0RM->getType()); |
| 2233 _movp(T, Src0RM); | 2312 _movp(T, Src0RM); |
| 2234 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); | 2313 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); |
| 2235 } break; | 2314 } break; |
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2355 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | 2434 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); |
| 2356 _movp(T0, Src0RM); | 2435 _movp(T0, Src0RM); |
| 2357 _pxor(T0, HighOrderBits); | 2436 _pxor(T0, HighOrderBits); |
| 2358 _movp(T1, Src1RM); | 2437 _movp(T1, Src1RM); |
| 2359 _pxor(T1, HighOrderBits); | 2438 _pxor(T1, HighOrderBits); |
| 2360 Src0RM = T0; | 2439 Src0RM = T0; |
| 2361 Src1RM = T1; | 2440 Src1RM = T1; |
| 2362 } | 2441 } |
| 2363 | 2442 |
| 2364 // TODO: ALIGNHACK: Both operands to compare instructions need to be | 2443 // TODO: ALIGNHACK: Both operands to compare instructions need to be |
| 2365 // in registers until stack alignment support is implemented. Once | 2444 // in registers until data alignment support is implemented. Once |
| 2366 // there is support for stack alignment, LEGAL_HACK can be removed. | 2445 // there is support for data alignment, LEGAL_HACK can be removed. |
| 2367 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 2446 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| 2368 Variable *T = makeReg(Ty); | 2447 Variable *T = makeReg(Ty); |
| 2369 switch (Condition) { | 2448 switch (Condition) { |
| 2370 default: | 2449 default: |
| 2371 llvm_unreachable("unexpected condition"); | 2450 llvm_unreachable("unexpected condition"); |
| 2372 break; | 2451 break; |
| 2373 case InstIcmp::Eq: { | 2452 case InstIcmp::Eq: { |
| 2374 _movp(T, Src0RM); | 2453 _movp(T, Src0RM); |
| 2375 _pcmpeq(T, LEGAL_HACK(Src1RM)); | 2454 _pcmpeq(T, LEGAL_HACK(Src1RM)); |
| 2376 } break; | 2455 } break; |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2576 // insertelement into index 3 (result is stored in T): | 2655 // insertelement into index 3 (result is stored in T): |
| 2577 // T := SourceVectRM | 2656 // T := SourceVectRM |
| 2578 // ElementR := ElementR[0, 0] T[0, 2] | 2657 // ElementR := ElementR[0, 0] T[0, 2] |
| 2579 // T := T[0, 1] ElementR[3, 0] | 2658 // T := T[0, 1] ElementR[3, 0] |
| 2580 const unsigned char Mask1[3] = {0, 192, 128}; | 2659 const unsigned char Mask1[3] = {0, 192, 128}; |
| 2581 const unsigned char Mask2[3] = {227, 196, 52}; | 2660 const unsigned char Mask2[3] = {227, 196, 52}; |
| 2582 | 2661 |
| 2583 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); | 2662 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
| 2584 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); | 2663 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
| 2585 | 2664 |
| 2586 // ALIGNHACK: Force vector operands to registers in instructions that | 2665 // ALIGNHACK: Force vector operands to registers in instructions |
| 2587 // require aligned memory operands until support for stack alignment | 2666 // that require aligned memory operands until support for data |
| 2588 // is implemented. | 2667 // alignment is implemented. |
| 2589 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2668 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
| 2590 if (Index == 1) { | 2669 if (Index == 1) { |
| 2591 SourceVectRM = ALIGN_HACK(SourceVectRM); | 2670 SourceVectRM = ALIGN_HACK(SourceVectRM); |
| 2592 _shufps(ElementR, SourceVectRM, Mask1Constant); | 2671 _shufps(ElementR, SourceVectRM, Mask1Constant); |
| 2593 _shufps(ElementR, SourceVectRM, Mask2Constant); | 2672 _shufps(ElementR, SourceVectRM, Mask2Constant); |
| 2594 _movp(Inst->getDest(), ElementR); | 2673 _movp(Inst->getDest(), ElementR); |
| 2595 } else { | 2674 } else { |
| 2596 Variable *T = makeReg(Ty); | 2675 Variable *T = makeReg(Ty); |
| 2597 _movp(T, SourceVectRM); | 2676 _movp(T, SourceVectRM); |
| 2598 _shufps(ElementR, T, Mask1Constant); | 2677 _shufps(ElementR, T, Mask1Constant); |
| (...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2866 case Intrinsics::Memmove: { | 2945 case Intrinsics::Memmove: { |
| 2867 InstCall *Call = makeHelperCall("memmove", NULL, 3); | 2946 InstCall *Call = makeHelperCall("memmove", NULL, 3); |
| 2868 Call->addArg(Instr->getArg(0)); | 2947 Call->addArg(Instr->getArg(0)); |
| 2869 Call->addArg(Instr->getArg(1)); | 2948 Call->addArg(Instr->getArg(1)); |
| 2870 Call->addArg(Instr->getArg(2)); | 2949 Call->addArg(Instr->getArg(2)); |
| 2871 lowerCall(Call); | 2950 lowerCall(Call); |
| 2872 return; | 2951 return; |
| 2873 } | 2952 } |
| 2874 case Intrinsics::Memset: { | 2953 case Intrinsics::Memset: { |
| 2875 // The value operand needs to be extended to a stack slot size | 2954 // The value operand needs to be extended to a stack slot size |
| 2876 // because "push" only works for a specific operand size. | 2955 // because the PNaCl ABI requires arguments to be at least 32 bits |
| 2956 // wide. |
| 2877 Operand *ValOp = Instr->getArg(1); | 2957 Operand *ValOp = Instr->getArg(1); |
| 2878 assert(ValOp->getType() == IceType_i8); | 2958 assert(ValOp->getType() == IceType_i8); |
| 2879 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); | 2959 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); |
| 2880 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); | 2960 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); |
| 2881 InstCall *Call = makeHelperCall("memset", NULL, 3); | 2961 InstCall *Call = makeHelperCall("memset", NULL, 3); |
| 2882 Call->addArg(Instr->getArg(0)); | 2962 Call->addArg(Instr->getArg(0)); |
| 2883 Call->addArg(ValExt); | 2963 Call->addArg(ValExt); |
| 2884 Call->addArg(Instr->getArg(2)); | 2964 Call->addArg(Instr->getArg(2)); |
| 2885 lowerCall(Call); | 2965 lowerCall(Call); |
| 2886 return; | 2966 return; |
| (...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3553 Variable *Dest = Inst->getDest(); | 3633 Variable *Dest = Inst->getDest(); |
| 3554 Operand *SrcT = Inst->getTrueOperand(); | 3634 Operand *SrcT = Inst->getTrueOperand(); |
| 3555 Operand *SrcF = Inst->getFalseOperand(); | 3635 Operand *SrcF = Inst->getFalseOperand(); |
| 3556 Operand *Condition = Inst->getCondition(); | 3636 Operand *Condition = Inst->getCondition(); |
| 3557 | 3637 |
| 3558 if (isVectorType(Dest->getType())) { | 3638 if (isVectorType(Dest->getType())) { |
| 3559 Type SrcTy = SrcT->getType(); | 3639 Type SrcTy = SrcT->getType(); |
| 3560 Variable *T = makeReg(SrcTy); | 3640 Variable *T = makeReg(SrcTy); |
| 3561 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 3641 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| 3562 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 3642 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| 3563 // ALIGNHACK: Until stack alignment support is implemented, vector | 3643 // ALIGNHACK: Until data alignment support is implemented, vector |
| 3564 // instructions need to have vector operands in registers. Once | 3644 // instructions need to have vector operands in registers. Once |
| 3565 // there is support for stack alignment, LEGAL_HACK can be removed. | 3645 // there is support for data alignment, LEGAL_HACK can be removed. |
| 3566 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 3646 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| 3567 if (InstructionSet >= SSE4_1) { | 3647 if (InstructionSet >= SSE4_1) { |
| 3568 // TODO(wala): If the condition operand is a constant, use blendps | 3648 // TODO(wala): If the condition operand is a constant, use blendps |
| 3569 // or pblendw. | 3649 // or pblendw. |
| 3570 // | 3650 // |
| 3571 // Use blendvps or pblendvb to implement select. | 3651 // Use blendvps or pblendvb to implement select. |
| 3572 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 3652 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
| 3573 SrcTy == IceType_v4f32) { | 3653 SrcTy == IceType_v4f32) { |
| 3574 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3654 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
| 3575 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); | 3655 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); |
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3650 _mov(Dest, SrcF); | 3730 _mov(Dest, SrcF); |
| 3651 } | 3731 } |
| 3652 | 3732 |
| 3653 Context.insert(Label); | 3733 Context.insert(Label); |
| 3654 } | 3734 } |
| 3655 | 3735 |
| 3656 void TargetX8632::lowerStore(const InstStore *Inst) { | 3736 void TargetX8632::lowerStore(const InstStore *Inst) { |
| 3657 Operand *Value = Inst->getData(); | 3737 Operand *Value = Inst->getData(); |
| 3658 Operand *Addr = Inst->getAddr(); | 3738 Operand *Addr = Inst->getAddr(); |
| 3659 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); | 3739 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); |
| 3740 Type Ty = NewAddr->getType(); |
| 3660 | 3741 |
| 3661 if (NewAddr->getType() == IceType_i64) { | 3742 if (Ty == IceType_i64) { |
| 3662 Value = legalize(Value); | 3743 Value = legalize(Value); |
| 3663 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | 3744 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); |
| 3664 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | 3745 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); |
| 3665 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 3746 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); |
| 3666 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 3747 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); |
| 3748 } else if (isVectorType(Ty)) { |
| 3749 _storep(legalizeToVar(Value), NewAddr); |
| 3667 } else { | 3750 } else { |
| 3668 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | 3751 Value = legalize(Value, Legal_Reg | Legal_Imm, true); |
| 3669 _store(Value, NewAddr); | 3752 _store(Value, NewAddr); |
| 3670 } | 3753 } |
| 3671 } | 3754 } |
| 3672 | 3755 |
| 3673 void TargetX8632::doAddressOptStore() { | 3756 void TargetX8632::doAddressOptStore() { |
| 3674 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); | 3757 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); |
| 3675 Operand *Data = Inst->getData(); | 3758 Operand *Data = Inst->getData(); |
| 3676 Operand *Addr = Inst->getAddr(); | 3759 Operand *Addr = Inst->getAddr(); |
| (...skipping 355 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4032 SizeT NumVars = Src->getNumVars(); | 4115 SizeT NumVars = Src->getNumVars(); |
| 4033 for (SizeT J = 0; J < NumVars; ++J) { | 4116 for (SizeT J = 0; J < NumVars; ++J) { |
| 4034 Variable *Var = Src->getVar(J); | 4117 Variable *Var = Src->getVar(J); |
| 4035 if (Var->hasReg()) | 4118 if (Var->hasReg()) |
| 4036 continue; | 4119 continue; |
| 4037 if (!Var->getWeight().isInf()) | 4120 if (!Var->getWeight().isInf()) |
| 4038 continue; | 4121 continue; |
| 4039 llvm::SmallBitVector AvailableTypedRegisters = | 4122 llvm::SmallBitVector AvailableTypedRegisters = |
| 4040 AvailableRegisters & getRegisterSetForType(Var->getType()); | 4123 AvailableRegisters & getRegisterSetForType(Var->getType()); |
| 4041 if (!AvailableTypedRegisters.any()) { | 4124 if (!AvailableTypedRegisters.any()) { |
| 4042 // This is a hack in case we run out of physical registers | 4125 // This is a hack in case we run out of physical registers due |
| 4043 // due to an excessive number of "push" instructions from | 4126 // to an excessively long code sequence, as might happen when |
| 4044 // lowering a call. | 4127 // lowering arguments in lowerCall(). |
| 4045 AvailableRegisters = WhiteList; | 4128 AvailableRegisters = WhiteList; |
| 4046 AvailableTypedRegisters = | 4129 AvailableTypedRegisters = |
| 4047 AvailableRegisters & getRegisterSetForType(Var->getType()); | 4130 AvailableRegisters & getRegisterSetForType(Var->getType()); |
| 4048 } | 4131 } |
| 4049 assert(AvailableTypedRegisters.any()); | 4132 assert(AvailableTypedRegisters.any()); |
| 4050 int32_t RegNum = AvailableTypedRegisters.find_first(); | 4133 int32_t RegNum = AvailableTypedRegisters.find_first(); |
| 4051 Var->setRegNum(RegNum); | 4134 Var->setRegNum(RegNum); |
| 4052 AvailableRegisters[RegNum] = false; | 4135 AvailableRegisters[RegNum] = false; |
| 4053 } | 4136 } |
| 4054 } | 4137 } |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4166 for (SizeT i = 0; i < Size; ++i) { | 4249 for (SizeT i = 0; i < Size; ++i) { |
| 4167 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4250 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
| 4168 } | 4251 } |
| 4169 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4252 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
| 4170 } | 4253 } |
| 4171 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4254 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName |
| 4172 << "\n"; | 4255 << "\n"; |
| 4173 } | 4256 } |
| 4174 | 4257 |
| 4175 } // end of namespace Ice | 4258 } // end of namespace Ice |
| OLD | NEW |