 Chromium Code Reviews
 Chromium Code Reviews Issue 444443002:
  Subzero: Align the stack at the point of function calls.  (Closed) 
  Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
    
  
    Issue 444443002:
  Subzero: Align the stack at the point of function calls.  (Closed) 
  Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master| OLD | NEW | 
|---|---|
| 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 
| 2 // | 2 // | 
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator | 
| 4 // | 4 // | 
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source | 
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. | 
| 7 // | 7 // | 
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// | 
| 9 // | 9 // | 
| 10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which | 
| (...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 117 size_t Index = static_cast<size_t>(Ty); | 117 size_t Index = static_cast<size_t>(Ty); | 
| 118 (void)Index; | 118 (void)Index; | 
| 119 assert(Index < TableTypeX8632AttributesSize); | 119 assert(Index < TableTypeX8632AttributesSize); | 
| 120 return TableTypeX8632Attributes[Ty].InVectorElementType; | 120 return TableTypeX8632Attributes[Ty].InVectorElementType; | 
| 121 } | 121 } | 
| 122 | 122 | 
| 123 // The maximum number of arguments to pass in XMM registers | 123 // The maximum number of arguments to pass in XMM registers | 
| 124 const unsigned X86_MAX_XMM_ARGS = 4; | 124 const unsigned X86_MAX_XMM_ARGS = 4; | 
| 125 // The number of bits in a byte | 125 // The number of bits in a byte | 
| 126 const unsigned X86_CHAR_BIT = 8; | 126 const unsigned X86_CHAR_BIT = 8; | 
| 127 // Stack alignment | |
| 128 const unsigned X86_STACK_ALIGNMENT_BYTES = 16; | |
| 129 // Size of the return address on the stack | |
| 130 const unsigned X86_RET_IP_SIZE_BYTES = 4; | |
| 131 | |
| 132 // Return the difference between Size and the next highest multiple of | |
| 133 // the stack alignment. All values are in bytes. | |
| 134 uint32_t getAdjustmentToAlignStackInBytes(uint32_t Size) { | |
| 
Jim Stichnoth
2014/08/05 18:09:27
Almost all the calls to this routine have a patter
 
wala
2014/08/05 23:57:03
Done.
 | |
| 135 uint32_t Offset = Size % X86_STACK_ALIGNMENT_BYTES; | |
| 136 if (Offset) { | |
| 137 return X86_STACK_ALIGNMENT_BYTES - Offset; | |
| 138 } | |
| 139 return 0; | |
| 140 } | |
| 127 | 141 | 
| 128 // Instruction set options | 142 // Instruction set options | 
| 129 namespace cl = ::llvm::cl; | 143 namespace cl = ::llvm::cl; | 
| 130 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | 144 cl::opt<TargetX8632::X86InstructionSet> CLInstructionSet( | 
| 131 "mattr", cl::desc("X86 target attributes"), | 145 "mattr", cl::desc("X86 target attributes"), | 
| 132 cl::init(TargetX8632::SSE2), | 146 cl::init(TargetX8632::SSE2), | 
| 133 cl::values( | 147 cl::values( | 
| 134 clEnumValN(TargetX8632::SSE2, "sse2", | 148 clEnumValN(TargetX8632::SSE2, "sse2", | 
| 135 "Enable SSE2 instructions (default)"), | 149 "Enable SSE2 instructions (default)"), | 
| 136 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | 150 clEnumValN(TargetX8632::SSE4_1, "sse4.1", | 
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 241 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 255 STATIC_ASSERT(_table1_##tag == _table2_##tag); | 
| 242 ICETYPE_TABLE; | 256 ICETYPE_TABLE; | 
| 243 #undef X | 257 #undef X | 
| 244 } | 258 } | 
| 245 } | 259 } | 
| 246 | 260 | 
| 247 } // end of anonymous namespace | 261 } // end of anonymous namespace | 
| 248 | 262 | 
| 249 TargetX8632::TargetX8632(Cfg *Func) | 263 TargetX8632::TargetX8632(Cfg *Func) | 
| 250 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 264 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 
| 251 IsEbpBasedFrame(false), FrameSizeLocals(0), LocalsSizeBytes(0), | 265 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), | 
| 252 NextLabelNumber(0), ComputedLiveRanges(false), | 266 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), | 
| 253 PhysicalRegisters(VarList(Reg_NUM)) { | 267 PhysicalRegisters(VarList(Reg_NUM)) { | 
| 254 // TODO: Don't initialize IntegerRegisters and friends every time. | 268 // TODO: Don't initialize IntegerRegisters and friends every time. | 
| 255 // Instead, initialize in some sort of static initializer for the | 269 // Instead, initialize in some sort of static initializer for the | 
| 256 // class. | 270 // class. | 
| 257 llvm::SmallBitVector IntegerRegisters(Reg_NUM); | 271 llvm::SmallBitVector IntegerRegisters(Reg_NUM); | 
| 258 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); | 272 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); | 
| 259 llvm::SmallBitVector FloatRegisters(Reg_NUM); | 273 llvm::SmallBitVector FloatRegisters(Reg_NUM); | 
| 260 llvm::SmallBitVector VectorRegisters(Reg_NUM); | 274 llvm::SmallBitVector VectorRegisters(Reg_NUM); | 
| 261 llvm::SmallBitVector InvalidRegisters(Reg_NUM); | 275 llvm::SmallBitVector InvalidRegisters(Reg_NUM); | 
| 262 ScratchRegs.resize(Reg_NUM); | 276 ScratchRegs.resize(Reg_NUM); | 
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 536 Variable *Lo = Arg->getLo(); | 550 Variable *Lo = Arg->getLo(); | 
| 537 Variable *Hi = Arg->getHi(); | 551 Variable *Hi = Arg->getHi(); | 
| 538 Type Ty = Arg->getType(); | 552 Type Ty = Arg->getType(); | 
| 539 if (Lo && Hi && Ty == IceType_i64) { | 553 if (Lo && Hi && Ty == IceType_i64) { | 
| 540 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 554 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 
| 541 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 555 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 
| 542 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 556 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 
| 543 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 557 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 
| 544 return; | 558 return; | 
| 545 } | 559 } | 
| 560 if (isVectorType(Ty)) { | |
| 561 InArgsSizeBytes += getAdjustmentToAlignStackInBytes(InArgsSizeBytes); | |
| 562 } | |
| 546 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 563 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 
| 547 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 564 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 
| 548 if (Arg->hasReg()) { | 565 if (Arg->hasReg()) { | 
| 549 assert(Ty != IceType_i64); | 566 assert(Ty != IceType_i64); | 
| 550 OperandX8632Mem *Mem = OperandX8632Mem::create( | 567 OperandX8632Mem *Mem = OperandX8632Mem::create( | 
| 551 Func, Ty, FramePtr, | 568 Func, Ty, FramePtr, | 
| 552 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | 569 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | 
| 553 if (isVectorType(Arg->getType())) { | 570 if (isVectorType(Arg->getType())) { | 
| 554 _movp(Arg, Mem); | 571 _movp(Arg, Mem); | 
| 555 } else { | 572 } else { | 
| 556 _mov(Arg, Mem); | 573 _mov(Arg, Mem); | 
| 557 } | 574 } | 
| 558 } | 575 } | 
| 559 } | 576 } | 
| 560 | 577 | 
| 561 Type TargetX8632::stackSlotType() { return IceType_i32; } | 578 Type TargetX8632::stackSlotType() { return IceType_i32; } | 
| 562 | 579 | 
| 563 void TargetX8632::addProlog(CfgNode *Node) { | 580 void TargetX8632::addProlog(CfgNode *Node) { | 
| 564 // If SimpleCoalescing is false, each variable without a register | 581 // If SimpleCoalescing is false, each variable without a register | 
| 565 // gets its own unique stack slot, which leads to large stack | 582 // gets its own unique stack slot, which leads to large stack | 
| 566 // frames. If SimpleCoalescing is true, then each "global" variable | 583 // frames. If SimpleCoalescing is true, then each "global" variable | 
| 567 // without a register gets its own slot, but "local" variable slots | 584 // without a register gets its own slot, but "local" variable slots | 
| 568 // are reused across basic blocks. E.g., if A and B are local to | 585 // are reused across basic blocks. E.g., if A and B are local to | 
| 569 // block 1 and C is local to block 2, then C may share a slot with A | 586 // block 1 and C is local to block 2, then C may share a slot with A | 
| 570 // or B. | 587 // or B. | 
| 571 const bool SimpleCoalescing = true; | 588 const bool SimpleCoalescing = true; | 
| 572 size_t InArgsSizeBytes = 0; | 589 size_t InArgsSizeBytes = 0; | 
| 573 size_t RetIpSizeBytes = 4; | |
| 574 size_t PreservedRegsSizeBytes = 0; | 590 size_t PreservedRegsSizeBytes = 0; | 
| 575 LocalsSizeBytes = 0; | 591 LocalsSizeBytes = 0; | 
| 576 Context.init(Node); | 592 Context.init(Node); | 
| 577 Context.setInsertPoint(Context.getCur()); | 593 Context.setInsertPoint(Context.getCur()); | 
| 578 | 594 | 
| 579 // Determine stack frame offsets for each Variable without a | 595 // Determine stack frame offsets for each Variable without a | 
| 580 // register assignment. This can be done as one variable per stack | 596 // register assignment. This can be done as one variable per stack | 
| 581 // slot. Or, do coalescing by running the register allocator again | 597 // slot. Or, do coalescing by running the register allocator again | 
| 582 // with an infinite set of registers (as a side effect, this gives | 598 // with an infinite set of registers (as a side effect, this gives | 
| 583 // variables a second chance at physical register assignment). | 599 // variables a second chance at physical register assignment). | 
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 650 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | 666 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | 
| 651 .count() == 0); | 667 .count() == 0); | 
| 652 PreservedRegsSizeBytes += 4; | 668 PreservedRegsSizeBytes += 4; | 
| 653 Variable *ebp = getPhysicalRegister(Reg_ebp); | 669 Variable *ebp = getPhysicalRegister(Reg_ebp); | 
| 654 Variable *esp = getPhysicalRegister(Reg_esp); | 670 Variable *esp = getPhysicalRegister(Reg_esp); | 
| 655 const bool SuppressStackAdjustment = true; | 671 const bool SuppressStackAdjustment = true; | 
| 656 _push(ebp, SuppressStackAdjustment); | 672 _push(ebp, SuppressStackAdjustment); | 
| 657 _mov(ebp, esp); | 673 _mov(ebp, esp); | 
| 658 } | 674 } | 
| 659 | 675 | 
| 676 if (NeedsStackAlignment) { | |
| 677 LocalsSizeBytes += getAdjustmentToAlignStackInBytes( | |
| 678 X86_RET_IP_SIZE_BYTES + LocalsSizeBytes + PreservedRegsSizeBytes); | |
| 679 } | |
| 680 | |
| 660 // Generate "sub esp, LocalsSizeBytes" | 681 // Generate "sub esp, LocalsSizeBytes" | 
| 661 if (LocalsSizeBytes) | 682 if (LocalsSizeBytes) | 
| 662 _sub(getPhysicalRegister(Reg_esp), | 683 _sub(getPhysicalRegister(Reg_esp), | 
| 663 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | 684 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | 
| 664 | 685 | 
| 665 resetStackAdjustment(); | 686 resetStackAdjustment(); | 
| 666 | 687 | 
| 667 // Fill in stack offsets for stack args, and copy args into registers | 688 // Fill in stack offsets for stack args, and copy args into registers | 
| 668 // for those that were register-allocated. Args are pushed right to | 689 // for those that were register-allocated. Args are pushed right to | 
| 669 // left, so Arg[0] is closest to the stack/frame pointer. | 690 // left, so Arg[0] is closest to the stack/frame pointer. | 
| 670 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 691 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 
| 671 size_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; | 692 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; | 
| 672 if (!IsEbpBasedFrame) | 693 if (!IsEbpBasedFrame) | 
| 673 BasicFrameOffset += LocalsSizeBytes; | 694 BasicFrameOffset += LocalsSizeBytes; | 
| 674 | 695 | 
| 675 unsigned NumXmmArgs = 0; | 696 unsigned NumXmmArgs = 0; | 
| 676 for (SizeT i = 0; i < Args.size(); ++i) { | 697 for (SizeT i = 0; i < Args.size(); ++i) { | 
| 677 Variable *Arg = Args[i]; | 698 Variable *Arg = Args[i]; | 
| 678 // Skip arguments passed in registers. | 699 // Skip arguments passed in registers. | 
| 679 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 700 if (isVectorType(Arg->getType()) && NumXmmArgs < X86_MAX_XMM_ARGS) { | 
| 680 ++NumXmmArgs; | 701 ++NumXmmArgs; | 
| 681 continue; | 702 continue; | 
| (...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 952 | 973 | 
| 953 REGX8632_TABLE | 974 REGX8632_TABLE | 
| 954 | 975 | 
| 955 #undef X | 976 #undef X | 
| 956 | 977 | 
| 957 return Registers; | 978 return Registers; | 
| 958 } | 979 } | 
| 959 | 980 | 
| 960 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 981 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 
| 961 IsEbpBasedFrame = true; | 982 IsEbpBasedFrame = true; | 
| 962 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize | 983 NeedsStackAlignment = true; | 
| 
Jim Stichnoth
2014/08/05 18:09:27
A couple things here.
1. It looks like alloca low
 
wala
2014/08/05 23:57:03
Done.
 | |
| 963 // the number of adjustments of esp, etc. | 984 // TODO(sehr,stichnot): align allocated memory, minimize the number of | 
| 985 // adjustments of esp, etc. | |
| 986 | |
| 964 Variable *esp = getPhysicalRegister(Reg_esp); | 987 Variable *esp = getPhysicalRegister(Reg_esp); | 
| 965 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 988 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 
| 966 Variable *Dest = Inst->getDest(); | 989 Variable *Dest = Inst->getDest(); | 
| 967 _sub(esp, TotalSize); | 990 if (ConstantInteger *ConstantTotalSize = | 
| 991 llvm::dyn_cast<ConstantInteger>(TotalSize)) { | |
| 992 uint32_t Value = ConstantTotalSize->getValue(); | |
| 993 Value += getAdjustmentToAlignStackInBytes(Value); | |
| 994 if (Value) { | |
| 995 _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); | |
| 996 } | |
| 997 } else { | |
| 998 Variable *T = makeReg(IceType_i32); | |
| 999 _mov(T, TotalSize); | |
| 1000 // Non-constant sizes need to be adjusted to the next highest | |
| 1001 // multiple of the stack alignment at runtime. | |
| 1002 _add(T, Ctx->getConstantInt(IceType_i32, 15)); | |
| 
Jim Stichnoth
2014/08/05 18:09:26
Should these constants be in terms of X86_STACK_AL
 
wala
2014/08/05 23:57:03
Done.
 | |
| 1003 _and(T, Ctx->getConstantInt(IceType_i32, -16)); | |
| 1004 _sub(esp, T); | |
| 1005 } | |
| 968 _mov(Dest, esp); | 1006 _mov(Dest, esp); | 
| 969 } | 1007 } | 
| 970 | 1008 | 
| 971 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 1009 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 
| 972 Variable *Dest = Inst->getDest(); | 1010 Variable *Dest = Inst->getDest(); | 
| 973 Operand *Src0 = legalize(Inst->getSrc(0)); | 1011 Operand *Src0 = legalize(Inst->getSrc(0)); | 
| 974 Operand *Src1 = legalize(Inst->getSrc(1)); | 1012 Operand *Src1 = legalize(Inst->getSrc(1)); | 
| 975 if (Dest->getType() == IceType_i64) { | 1013 if (Dest->getType() == IceType_i64) { | 
| 976 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 1014 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 
| 977 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1015 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 
| (...skipping 607 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1585 _br(Inst->getTargetUnconditional()); | 1623 _br(Inst->getTargetUnconditional()); | 
| 1586 } else { | 1624 } else { | 
| 1587 Operand *Src0 = legalize(Inst->getCondition()); | 1625 Operand *Src0 = legalize(Inst->getCondition()); | 
| 1588 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1626 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 
| 1589 _cmp(Src0, Zero); | 1627 _cmp(Src0, Zero); | 
| 1590 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1628 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 
| 1591 } | 1629 } | 
| 1592 } | 1630 } | 
| 1593 | 1631 | 
| 1594 void TargetX8632::lowerCall(const InstCall *Instr) { | 1632 void TargetX8632::lowerCall(const InstCall *Instr) { | 
| 1633 // x86-32 calling convention: | |
| 1634 // | |
| 1635 // * At the point before the call, the stack must be aligned to 16 | |
| 1636 // bytes. | |
| 1637 // | |
| 1638 // * The first four arguments of vector type, regardless of their | |
| 1639 // position relative to the other arguments in the argument list, are | |
| 1640 // placed in registers xmm0 - xmm3. | |
| 1641 // | |
| 1642 // * Other arguments are placed on the stack ordered according to the | |
| 
Jim Stichnoth
2014/08/05 18:09:27
This is unclear to me (and possibly wrong).  Does
 
wala
2014/08/05 23:57:03
Done.
 | |
| 1643 // argument list and get assigned stack locations with increasing | |
| 1644 // addresses. The first stack argument is placed at what is the bottom | |
| 1645 // of the stack at the point before the call. | |
| 1646 // | |
| 1647 // * Stack arguments of vector type are aligned to the next highest | |
| 1648 // multiple of 16 bytes. Other stack arguments are aligned to the | |
| 1649 // next highest multiple of 4 bytes. | |
| 1650 NeedsStackAlignment = true; | |
| 1651 | |
| 1652 OperandList XmmArgs; | |
| 1653 OperandList StackArgs, StackArgLocations; | |
| 1654 uint32_t ParameterAreaSizeBytes = 0; | |
| 1655 | |
| 1595 // Classify each argument operand according to the location where the | 1656 // Classify each argument operand according to the location where the | 
| 1596 // argument is passed. | 1657 // argument is passed. | 
| 1597 OperandList XmmArgs; | |
| 1598 OperandList StackArgs; | |
| 1599 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 1658 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) { | 
| 1600 Operand *Arg = Instr->getArg(i); | 1659 Operand *Arg = Instr->getArg(i); | 
| 1601 if (isVectorType(Arg->getType()) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | 1660 Type Ty = Arg->getType(); | 
| 1661 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | |
| 1662 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || | |
| 1663 Ty == IceType_f64 || isVectorType(Ty)); | |
| 1664 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | |
| 1602 XmmArgs.push_back(Arg); | 1665 XmmArgs.push_back(Arg); | 
| 1603 } else { | 1666 } else { | 
| 1604 StackArgs.push_back(Arg); | 1667 StackArgs.push_back(Arg); | 
| 1668 if (isVectorType(Arg->getType())) { | |
| 1669 ParameterAreaSizeBytes += | |
| 1670 getAdjustmentToAlignStackInBytes(ParameterAreaSizeBytes); | |
| 1671 } | |
| 1672 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | |
| 1673 Constant *Loc = Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes); | |
| 1674 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); | |
| 1675 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | |
| 1605 } | 1676 } | 
| 1606 } | 1677 } | 
| 1607 // For stack arguments, generate a sequence of push instructions, | 1678 | 
| 1608 // pushing right to left, keeping track of stack offsets in case a | 1679 // Adjust the parameter area so that the stack is aligned. It is | 
| 1609 // push involves a stack operand and we are using an esp-based frame. | 1680 // assumed that the stack is already aligned at the start of the | 
| 1610 uint32_t StackOffset = 0; | 1681 // calling sequence. | 
| 1611 // TODO: Consolidate the stack adjustment for function calls by | 1682 ParameterAreaSizeBytes += | 
| 1612 // reserving enough space for the arguments only once. | 1683 getAdjustmentToAlignStackInBytes(ParameterAreaSizeBytes); | 
| 1684 | |
| 1685 // Subtract the appropriate amount for the argument area. This also | |
| 1686 // takes care of setting the stack adjustment during emission. | |
| 1613 // | 1687 // | 
| 1614 // TODO: If for some reason the call instruction gets dead-code | 1688 // TODO: If for some reason the call instruction gets dead-code | 
| 1615 // eliminated after lowering, we would need to ensure that the | 1689 // eliminated after lowering, we would need to ensure that the | 
| 1616 // pre-call push instructions and the post-call esp adjustment get | 1690 // pre-call and the post-call esp adjustment get eliminated as well. | 
| 1617 // eliminated as well. | 1691 if (ParameterAreaSizeBytes) { | 
| 1618 for (OperandList::reverse_iterator I = StackArgs.rbegin(), | 1692 _adjust_stack(ParameterAreaSizeBytes); | 
| 1619 E = StackArgs.rend(); I != E; ++I) { | |
| 1620 Operand *Arg = legalize(*I); | |
| 1621 if (Arg->getType() == IceType_i64) { | |
| 1622 _push(hiOperand(Arg)); | |
| 1623 _push(loOperand(Arg)); | |
| 1624 } else if (Arg->getType() == IceType_f64 || isVectorType(Arg->getType())) { | |
| 1625 // If the Arg turns out to be a memory operand, more than one push | |
| 1626 // instruction is required. This ends up being somewhat clumsy in | |
| 1627 // the current IR, so we use a workaround. Force the operand into | |
| 1628 // a (xmm) register, and then push the register. An xmm register | |
| 1629 // push is actually not possible in x86, but the Push instruction | |
| 1630 // emitter handles this by decrementing the stack pointer and | |
| 1631 // directly writing the xmm register value. | |
| 1632 _push(legalize(Arg, Legal_Reg)); | |
| 1633 } else { | |
| 1634 // Otherwise PNaCl requires parameter types to be at least 32-bits. | |
| 1635 assert(Arg->getType() == IceType_f32 || Arg->getType() == IceType_i32); | |
| 1636 _push(Arg); | |
| 1637 } | |
| 1638 StackOffset += typeWidthInBytesOnStack(Arg->getType()); | |
| 1639 } | 1693 } | 
| 1694 | |
| 1695 // Copy arguments that are passed on the stack to the appropriate | |
| 1696 // stack locations. | |
| 1697 for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { | |
| 1698 lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); | |
| 1699 // TODO: Consider calling postLower() here to reduce the register | |
| 1700 // pressure associated with using too many infinite weight | |
| 1701 // temporaries when lowering the call sequence in -Om1 mode. | |
| 1702 } | |
| 1703 | |
| 1640 // Copy arguments to be passed in registers to the appropriate | 1704 // Copy arguments to be passed in registers to the appropriate | 
| 1641 // registers. | 1705 // registers. | 
| 1642 // TODO: Investigate the impact of lowering arguments passed in | 1706 // TODO: Investigate the impact of lowering arguments passed in | 
| 1643 // registers after lowering stack arguments as opposed to the other | 1707 // registers after lowering stack arguments as opposed to the other | 
| 1644 // way around. Lowering register arguments after stack arguments may | 1708 // way around. Lowering register arguments after stack arguments may | 
| 1645 // reduce register pressure. On the other hand, lowering register | 1709 // reduce register pressure. On the other hand, lowering register | 
| 1646 // arguments first (before stack arguments) may result in more compact | 1710 // arguments first (before stack arguments) may result in more compact | 
| 1647 // code, as the memory operand displacements may end up being smaller | 1711 // code, as the memory operand displacements may end up being smaller | 
| 1648 // before any stack adjustment is done. | 1712 // before any stack adjustment is done. | 
| 1649 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 1713 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1693 } | 1757 } | 
| 1694 } | 1758 } | 
| 1695 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once | 1759 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once | 
| 1696 // a proper emitter is used. | 1760 // a proper emitter is used. | 
| 1697 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); | 1761 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); | 
| 1698 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 1762 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 
| 1699 Context.insert(NewCall); | 1763 Context.insert(NewCall); | 
| 1700 if (ReturnRegHi) | 1764 if (ReturnRegHi) | 
| 1701 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1765 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 
| 1702 | 1766 | 
| 1703 // Add the appropriate offset to esp. | 1767 // Add the appropriate offset to esp. The call instruction takes care | 
| 1704 if (StackOffset) { | 1768 // of resetting the stack offset during emission. | 
| 1769 if (ParameterAreaSizeBytes) { | |
| 1705 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 1770 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 
| 1706 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); | 1771 _add(esp, Ctx->getConstantInt(IceType_i32, ParameterAreaSizeBytes)); | 
| 1707 } | 1772 } | 
| 1708 | 1773 | 
| 1709 // Insert a register-kill pseudo instruction. | 1774 // Insert a register-kill pseudo instruction. | 
| 1710 VarList KilledRegs; | 1775 VarList KilledRegs; | 
| 1711 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { | 1776 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { | 
| 1712 if (ScratchRegs[i]) | 1777 if (ScratchRegs[i]) | 
| 1713 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); | 1778 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); | 
| 1714 } | 1779 } | 
| 1715 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); | 1780 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); | 
| 1716 | 1781 | 
| (...skipping 458 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2175 bool CanUsePextr = | 2240 bool CanUsePextr = | 
| 2176 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 2241 Ty == IceType_v8i16 || Ty == IceType_v8i1 || InstructionSet >= SSE4_1; | 
| 2177 if (CanUsePextr && Ty != IceType_v4f32) { | 2242 if (CanUsePextr && Ty != IceType_v4f32) { | 
| 2178 // Use pextrb, pextrw, or pextrd. | 2243 // Use pextrb, pextrw, or pextrd. | 
| 2179 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2244 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 
| 2180 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 2245 Variable *SourceVectR = legalizeToVar(SourceVectNotLegalized); | 
| 2181 _pextr(ExtractedElementR, SourceVectR, Mask); | 2246 _pextr(ExtractedElementR, SourceVectR, Mask); | 
| 2182 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 2247 } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 
| 2183 // Use pshufd and movd/movss. | 2248 // Use pshufd and movd/movss. | 
| 2184 // | 2249 // | 
| 2185 // ALIGNHACK: Force vector operands to registers in instructions that | 2250 // ALIGNHACK: Force vector operands to registers in instructions | 
| 2186 // require aligned memory operands until support for stack alignment | 2251 // that require aligned memory operands until support for data | 
| 2187 // is implemented. | 2252 // alignment is implemented. | 
| 2188 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2253 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 
| 2189 Operand *SourceVectRM = | 2254 Operand *SourceVectRM = | 
| 2190 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 2255 legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 
| 2191 Variable *T = NULL; | 2256 Variable *T = NULL; | 
| 2192 if (Index) { | 2257 if (Index) { | 
| 2193 // The shuffle only needs to occur if the element to be extracted | 2258 // The shuffle only needs to occur if the element to be extracted | 
| 2194 // is not at the lowest index. | 2259 // is not at the lowest index. | 
| 2195 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 2260 Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 
| 2196 T = makeReg(Ty); | 2261 T = makeReg(Ty); | 
| 2197 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); | 2262 _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); | 
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2262 | 2327 | 
| 2263 if (Condition == InstFcmp::True) { | 2328 if (Condition == InstFcmp::True) { | 
| 2264 // makeVectorOfOnes() requires an integer vector type. | 2329 // makeVectorOfOnes() requires an integer vector type. | 
| 2265 T = makeVectorOfMinusOnes(IceType_v4i32); | 2330 T = makeVectorOfMinusOnes(IceType_v4i32); | 
| 2266 } else if (Condition == InstFcmp::False) { | 2331 } else if (Condition == InstFcmp::False) { | 
| 2267 T = makeVectorOfZeros(Dest->getType()); | 2332 T = makeVectorOfZeros(Dest->getType()); | 
| 2268 } else { | 2333 } else { | 
| 2269 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2334 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 
| 2270 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2335 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 
| 2271 | 2336 | 
| 2272 // ALIGNHACK: Without support for stack alignment, both operands to | 2337 // ALIGNHACK: Without support for data alignment, both operands to | 
| 2273 // cmpps need to be forced into registers. Once support for stack | 2338 // cmpps need to be forced into registers. Once support for data | 
| 2274 // alignment is implemented, remove LEGAL_HACK. | 2339 // alignment is implemented, remove LEGAL_HACK. | 
| 2275 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 2340 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 
| 2276 switch (Condition) { | 2341 switch (Condition) { | 
| 2277 default: { | 2342 default: { | 
| 2278 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; | 2343 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; | 
| 2279 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); | 2344 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); | 
| 2280 T = makeReg(Src0RM->getType()); | 2345 T = makeReg(Src0RM->getType()); | 
| 2281 _movp(T, Src0RM); | 2346 _movp(T, Src0RM); | 
| 2282 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); | 2347 _cmpps(T, LEGAL_HACK(Src1RM), Predicate); | 
| 2283 } break; | 2348 } break; | 
| (...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2403 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | 2468 Variable *HighOrderBits = makeVectorOfHighOrderBits(Ty); | 
| 2404 _movp(T0, Src0RM); | 2469 _movp(T0, Src0RM); | 
| 2405 _pxor(T0, HighOrderBits); | 2470 _pxor(T0, HighOrderBits); | 
| 2406 _movp(T1, Src1RM); | 2471 _movp(T1, Src1RM); | 
| 2407 _pxor(T1, HighOrderBits); | 2472 _pxor(T1, HighOrderBits); | 
| 2408 Src0RM = T0; | 2473 Src0RM = T0; | 
| 2409 Src1RM = T1; | 2474 Src1RM = T1; | 
| 2410 } | 2475 } | 
| 2411 | 2476 | 
| 2412 // TODO: ALIGNHACK: Both operands to compare instructions need to be | 2477 // TODO: ALIGNHACK: Both operands to compare instructions need to be | 
| 2413 // in registers until stack alignment support is implemented. Once | 2478 // in registers until data alignment support is implemented. Once | 
| 2414 // there is support for stack alignment, LEGAL_HACK can be removed. | 2479 // there is support for data alignment, LEGAL_HACK can be removed. | 
| 2415 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 2480 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 
| 2416 Variable *T = makeReg(Ty); | 2481 Variable *T = makeReg(Ty); | 
| 2417 switch (Condition) { | 2482 switch (Condition) { | 
| 2418 default: | 2483 default: | 
| 2419 llvm_unreachable("unexpected condition"); | 2484 llvm_unreachable("unexpected condition"); | 
| 2420 break; | 2485 break; | 
| 2421 case InstIcmp::Eq: { | 2486 case InstIcmp::Eq: { | 
| 2422 _movp(T, Src0RM); | 2487 _movp(T, Src0RM); | 
| 2423 _pcmpeq(T, LEGAL_HACK(Src1RM)); | 2488 _pcmpeq(T, LEGAL_HACK(Src1RM)); | 
| 2424 } break; | 2489 } break; | 
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2624 // insertelement into index 3 (result is stored in T): | 2689 // insertelement into index 3 (result is stored in T): | 
| 2625 // T := SourceVectRM | 2690 // T := SourceVectRM | 
| 2626 // ElementR := ElementR[0, 0] T[0, 2] | 2691 // ElementR := ElementR[0, 0] T[0, 2] | 
| 2627 // T := T[0, 1] ElementR[3, 0] | 2692 // T := T[0, 1] ElementR[3, 0] | 
| 2628 const unsigned char Mask1[3] = {0, 192, 128}; | 2693 const unsigned char Mask1[3] = {0, 192, 128}; | 
| 2629 const unsigned char Mask2[3] = {227, 196, 52}; | 2694 const unsigned char Mask2[3] = {227, 196, 52}; | 
| 2630 | 2695 | 
| 2631 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); | 2696 Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); | 
| 2632 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); | 2697 Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); | 
| 2633 | 2698 | 
| 2634 // ALIGNHACK: Force vector operands to registers in instructions that | 2699 // ALIGNHACK: Force vector operands to registers in instructions | 
| 2635 // require aligned memory operands until support for stack alignment | 2700 // that require aligned memory operands until support for data | 
| 2636 // is implemented. | 2701 // alignment is implemented. | 
| 2637 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 2702 #define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 
| 2638 if (Index == 1) { | 2703 if (Index == 1) { | 
| 2639 SourceVectRM = ALIGN_HACK(SourceVectRM); | 2704 SourceVectRM = ALIGN_HACK(SourceVectRM); | 
| 2640 _shufps(ElementR, SourceVectRM, Mask1Constant); | 2705 _shufps(ElementR, SourceVectRM, Mask1Constant); | 
| 2641 _shufps(ElementR, SourceVectRM, Mask2Constant); | 2706 _shufps(ElementR, SourceVectRM, Mask2Constant); | 
| 2642 _movp(Inst->getDest(), ElementR); | 2707 _movp(Inst->getDest(), ElementR); | 
| 2643 } else { | 2708 } else { | 
| 2644 Variable *T = makeReg(Ty); | 2709 Variable *T = makeReg(Ty); | 
| 2645 _movp(T, SourceVectRM); | 2710 _movp(T, SourceVectRM); | 
| 2646 _shufps(ElementR, T, Mask1Constant); | 2711 _shufps(ElementR, T, Mask1Constant); | 
| (...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2914 case Intrinsics::Memmove: { | 2979 case Intrinsics::Memmove: { | 
| 2915 InstCall *Call = makeHelperCall("memmove", NULL, 3); | 2980 InstCall *Call = makeHelperCall("memmove", NULL, 3); | 
| 2916 Call->addArg(Instr->getArg(0)); | 2981 Call->addArg(Instr->getArg(0)); | 
| 2917 Call->addArg(Instr->getArg(1)); | 2982 Call->addArg(Instr->getArg(1)); | 
| 2918 Call->addArg(Instr->getArg(2)); | 2983 Call->addArg(Instr->getArg(2)); | 
| 2919 lowerCall(Call); | 2984 lowerCall(Call); | 
| 2920 return; | 2985 return; | 
| 2921 } | 2986 } | 
| 2922 case Intrinsics::Memset: { | 2987 case Intrinsics::Memset: { | 
| 2923 // The value operand needs to be extended to a stack slot size | 2988 // The value operand needs to be extended to a stack slot size | 
| 2924 // because "push" only works for a specific operand size. | 2989 // because the PNaCl ABI requires arguments to be at least 32 bits | 
| 2990 // wide. | |
| 2925 Operand *ValOp = Instr->getArg(1); | 2991 Operand *ValOp = Instr->getArg(1); | 
| 2926 assert(ValOp->getType() == IceType_i8); | 2992 assert(ValOp->getType() == IceType_i8); | 
| 2927 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); | 2993 Variable *ValExt = Func->makeVariable(stackSlotType(), Context.getNode()); | 
| 2928 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); | 2994 lowerCast(InstCast::create(Func, InstCast::Zext, ValExt, ValOp)); | 
| 2929 InstCall *Call = makeHelperCall("memset", NULL, 3); | 2995 InstCall *Call = makeHelperCall("memset", NULL, 3); | 
| 2930 Call->addArg(Instr->getArg(0)); | 2996 Call->addArg(Instr->getArg(0)); | 
| 2931 Call->addArg(ValExt); | 2997 Call->addArg(ValExt); | 
| 2932 Call->addArg(Instr->getArg(2)); | 2998 Call->addArg(Instr->getArg(2)); | 
| 2933 lowerCall(Call); | 2999 lowerCall(Call); | 
| 2934 return; | 3000 return; | 
| (...skipping 648 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3583 Variable *Dest = Inst->getDest(); | 3649 Variable *Dest = Inst->getDest(); | 
| 3584 Operand *SrcT = Inst->getTrueOperand(); | 3650 Operand *SrcT = Inst->getTrueOperand(); | 
| 3585 Operand *SrcF = Inst->getFalseOperand(); | 3651 Operand *SrcF = Inst->getFalseOperand(); | 
| 3586 Operand *Condition = Inst->getCondition(); | 3652 Operand *Condition = Inst->getCondition(); | 
| 3587 | 3653 | 
| 3588 if (isVectorType(Dest->getType())) { | 3654 if (isVectorType(Dest->getType())) { | 
| 3589 Type SrcTy = SrcT->getType(); | 3655 Type SrcTy = SrcT->getType(); | 
| 3590 Variable *T = makeReg(SrcTy); | 3656 Variable *T = makeReg(SrcTy); | 
| 3591 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 3657 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 
| 3592 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 3658 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 
| 3593 // ALIGNHACK: Until stack alignment support is implemented, vector | 3659 // ALIGNHACK: Until data alignment support is implemented, vector | 
| 3594 // instructions need to have vector operands in registers. Once | 3660 // instructions need to have vector operands in registers. Once | 
| 3595 // there is support for stack alignment, LEGAL_HACK can be removed. | 3661 // there is support for data alignment, LEGAL_HACK can be removed. | 
| 3596 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 3662 #define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 
| 3597 if (InstructionSet >= SSE4_1) { | 3663 if (InstructionSet >= SSE4_1) { | 
| 3598 // TODO(wala): If the condition operand is a constant, use blendps | 3664 // TODO(wala): If the condition operand is a constant, use blendps | 
| 3599 // or pblendw. | 3665 // or pblendw. | 
| 3600 // | 3666 // | 
| 3601 // Use blendvps or pblendvb to implement select. | 3667 // Use blendvps or pblendvb to implement select. | 
| 3602 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 3668 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 
| 3603 SrcTy == IceType_v4f32) { | 3669 SrcTy == IceType_v4f32) { | 
| 3604 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3670 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 
| 3605 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); | 3671 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); | 
| (...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 3680 _mov(Dest, SrcF); | 3746 _mov(Dest, SrcF); | 
| 3681 } | 3747 } | 
| 3682 | 3748 | 
| 3683 Context.insert(Label); | 3749 Context.insert(Label); | 
| 3684 } | 3750 } | 
| 3685 | 3751 | 
| 3686 void TargetX8632::lowerStore(const InstStore *Inst) { | 3752 void TargetX8632::lowerStore(const InstStore *Inst) { | 
| 3687 Operand *Value = Inst->getData(); | 3753 Operand *Value = Inst->getData(); | 
| 3688 Operand *Addr = Inst->getAddr(); | 3754 Operand *Addr = Inst->getAddr(); | 
| 3689 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); | 3755 OperandX8632Mem *NewAddr = FormMemoryOperand(Addr, Value->getType()); | 
| 3756 Type Ty = NewAddr->getType(); | |
| 3690 | 3757 | 
| 3691 if (NewAddr->getType() == IceType_i64) { | 3758 if (Ty == IceType_i64) { | 
| 3692 Value = legalize(Value); | 3759 Value = legalize(Value); | 
| 3693 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | 3760 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | 
| 3694 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | 3761 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | 
| 3695 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 3762 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | 
| 3696 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 3763 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | 
| 3764 } else if (isVectorType(Ty)) { | |
| 3765 _storep(legalizeToVar(Value), NewAddr); | |
| 3697 } else { | 3766 } else { | 
| 3698 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | 3767 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | 
| 3699 _store(Value, NewAddr); | 3768 _store(Value, NewAddr); | 
| 3700 } | 3769 } | 
| 3701 } | 3770 } | 
| 3702 | 3771 | 
| 3703 void TargetX8632::doAddressOptStore() { | 3772 void TargetX8632::doAddressOptStore() { | 
| 3704 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); | 3773 InstStore *Inst = llvm::cast<InstStore>(*Context.getCur()); | 
| 3705 Operand *Data = Inst->getData(); | 3774 Operand *Data = Inst->getData(); | 
| 3706 Operand *Addr = Inst->getAddr(); | 3775 Operand *Addr = Inst->getAddr(); | 
| (...skipping 322 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4029 SizeT NumVars = Src->getNumVars(); | 4098 SizeT NumVars = Src->getNumVars(); | 
| 4030 for (SizeT J = 0; J < NumVars; ++J) { | 4099 for (SizeT J = 0; J < NumVars; ++J) { | 
| 4031 Variable *Var = Src->getVar(J); | 4100 Variable *Var = Src->getVar(J); | 
| 4032 if (Var->hasReg()) | 4101 if (Var->hasReg()) | 
| 4033 continue; | 4102 continue; | 
| 4034 if (!Var->getWeight().isInf()) | 4103 if (!Var->getWeight().isInf()) | 
| 4035 continue; | 4104 continue; | 
| 4036 llvm::SmallBitVector AvailableTypedRegisters = | 4105 llvm::SmallBitVector AvailableTypedRegisters = | 
| 4037 AvailableRegisters & getRegisterSetForType(Var->getType()); | 4106 AvailableRegisters & getRegisterSetForType(Var->getType()); | 
| 4038 if (!AvailableTypedRegisters.any()) { | 4107 if (!AvailableTypedRegisters.any()) { | 
| 4039 // This is a hack in case we run out of physical registers | 4108 // This is a hack in case we run out of physical registers due | 
| 4040 // due to an excessive number of "push" instructions from | 4109 // to an excessively long code sequence, as might happen when | 
| 4041 // lowering a call. | 4110 // lowering arguments in lowerCall(). | 
| 4042 AvailableRegisters = WhiteList; | 4111 AvailableRegisters = WhiteList; | 
| 4043 AvailableTypedRegisters = | 4112 AvailableTypedRegisters = | 
| 4044 AvailableRegisters & getRegisterSetForType(Var->getType()); | 4113 AvailableRegisters & getRegisterSetForType(Var->getType()); | 
| 4045 } | 4114 } | 
| 4046 assert(AvailableTypedRegisters.any()); | 4115 assert(AvailableTypedRegisters.any()); | 
| 4047 int32_t RegNum = AvailableTypedRegisters.find_first(); | 4116 int32_t RegNum = AvailableTypedRegisters.find_first(); | 
| 4048 Var->setRegNum(RegNum); | 4117 Var->setRegNum(RegNum); | 
| 4049 AvailableRegisters[RegNum] = false; | 4118 AvailableRegisters[RegNum] = false; | 
| 4050 } | 4119 } | 
| 4051 } | 4120 } | 
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 4163 for (SizeT i = 0; i < Size; ++i) { | 4232 for (SizeT i = 0; i < Size; ++i) { | 
| 4164 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4233 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 
| 4165 } | 4234 } | 
| 4166 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4235 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 
| 4167 } | 4236 } | 
| 4168 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 4237 Str << "\t" << (IsInternal ? ".local" : ".global") << "\t" << MangledName | 
| 4169 << "\n"; | 4238 << "\n"; | 
| 4170 } | 4239 } | 
| 4171 | 4240 | 
| 4172 } // end of namespace Ice | 4241 } // end of namespace Ice | 
| OLD | NEW |