Index: lib/Target/X86/X86ISelLowering.cpp |
=================================================================== |
--- lib/Target/X86/X86ISelLowering.cpp (revision 116297) |
+++ lib/Target/X86/X86ISelLowering.cpp (working copy) |
@@ -7570,11 +7570,68 @@ |
} |
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { |
- // X86-64 va_list is a struct { i32, i32, i8*, i8* }. |
- assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); |
+ assert(Subtarget->is64Bit() && |
+ "LowerVAARG only handles 64-bit va_arg!"); |
+ assert((Subtarget->isTargetLinux() || |
+ Subtarget->isTargetDarwin()) && |
+ "Unhandled target in LowerVAARG"); |
+ assert(Op.getNode()->getNumOperands() == 4); |
+ SDValue Chain = Op.getOperand(0); |
+ SDValue SrcPtr = Op.getOperand(1); |
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
+ unsigned Align = Op.getConstantOperandVal(3); |
+ DebugLoc dl = Op.getDebugLoc(); |
- report_fatal_error("VAArgInst is not yet implemented for x86-64!"); |
- return SDValue(); |
+ EVT ArgVT = Op.getNode()->getValueType(0); |
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy); |
+ uint8_t ArgMode; |
+ |
+ // Decide which area this value should be read from. |
+ // TODO: Implement the AMD64 ABI in its entirety. This simple |
+ // selection mechanism works only for the basic types. |
+ if (ArgVT == MVT::f80) { |
+ llvm_unreachable("va_arg for f80 not yet implemented"); |
+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) { |
+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset. |
+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) { |
+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset. |
+ } else { |
+ llvm_unreachable("Unhandled argument type in LowerVAARG"); |
+ } |
+ |
+ if (ArgMode == 2) { |
+ // Sanity Check: Make sure using fp_offset makes sense. |
+ const Function *Fn = DAG.getMachineFunction().getFunction(); |
+ bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); |
+ assert(!UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE1()); |
+ } |
+ |
+ // Insert VAARG_64 node into the DAG |
+ // VAARG_64 returns two values: Variable Argument Address, Chain |
+ SmallVector<SDValue, 11> InstOps; |
+ InstOps.push_back(Chain); |
+ InstOps.push_back(SrcPtr); |
+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32)); |
+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8)); |
+ InstOps.push_back(DAG.getConstant(Align, MVT::i32)); |
+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other); |
+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl, |
+ VTs, &InstOps[0], InstOps.size(), |
+ MVT::i64, |
+ MachinePointerInfo(SV), |
+ /*Align=*/0, |
+ /*Volatile=*/false, |
+ /*ReadMem=*/true, |
+ /*WriteMem=*/true); |
+ Chain = VAARG.getValue(1); |
+ |
+ // Load the next argument and return it |
+ return DAG.getLoad(ArgVT, dl, |
+ Chain, |
+ VAARG, |
+ MachinePointerInfo(), |
+ false, false, 0); |
} |
SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { |
@@ -8850,6 +8907,7 @@ |
case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ"; |
case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ"; |
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; |
+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64"; |
case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; |
} |
} |
@@ -9411,6 +9469,261 @@ |
} |
MachineBasicBlock * |
+X86TargetLowering::EmitVAARG64WithCustomInserter( |
+ MachineInstr *MI, |
+ MachineBasicBlock *MBB) const { |
+ // Emit va_arg instruction on X86-64. |
+ |
+ // Operands to this pseudo-instruction: |
+ // 0 ) Output : destination address (reg) |
+ // 1-5) Input : va_list address (addr, i64mem) |
+ // 6 ) ArgSize : Size (in bytes) of vararg type |
+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset |
+ // 8 ) Align : Alignment of type |
+ // 9 ) EFLAGS (implicit-def) |
+ |
+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!"); |
+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands"); |
+ |
+ unsigned DestReg = MI->getOperand(0).getReg(); |
+ MachineOperand &Base = MI->getOperand(1); |
+ MachineOperand &Scale = MI->getOperand(2); |
+ MachineOperand &Index = MI->getOperand(3); |
+ MachineOperand &Disp = MI->getOperand(4); |
+ MachineOperand &Segment = MI->getOperand(5); |
+ unsigned ArgSize = MI->getOperand(6).getImm(); |
+ unsigned ArgMode = MI->getOperand(7).getImm(); |
+ unsigned Align = MI->getOperand(8).getImm(); |
+ |
+ // Memory Reference |
+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand"); |
+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); |
+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); |
+ |
+ // Machine Information |
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); |
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); |
+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); |
+ DebugLoc DL = MI->getDebugLoc(); |
+ |
+ // struct va_list { |
+ // i32 gp_offset |
+ // i32 fp_offset |
+ // i64 overflow_area (address) |
+ // i64 reg_save_area (address) |
+ // } |
+ // sizeof(va_list) = 24 |
+ // alignment(va_list) = 8 |
+ |
+ unsigned TotalNumIntRegs = 6; |
+ unsigned TotalNumXMMRegs = 8; |
+ bool UseGPOffset = (ArgMode == 1); |
+ bool UseFPOffset = (ArgMode == 2); |
+ unsigned MaxOffset = TotalNumIntRegs * 8 + |
+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0); |
+ |
+ /* Align ArgSize to a multiple of 8 */ |
+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7; |
+ bool NeedsAlign = (Align > 8); |
+ |
+ MachineBasicBlock *thisMBB = MBB; |
+ MachineBasicBlock *overflowMBB; |
+ MachineBasicBlock *offsetMBB; |
+ MachineBasicBlock *endMBB; |
+ |
+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB |
+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB |
+ unsigned OffsetReg = 0; |
+ |
+ if (!UseGPOffset && !UseFPOffset) { |
+ // If we only pull from the overflow region, we don't create a branch. |
+ // We don't need to alter control flow. |
+ OffsetDestReg = 0; // unused |
+ OverflowDestReg = DestReg; |
+ |
+ offsetMBB = NULL; |
+ overflowMBB = thisMBB; |
+ endMBB = thisMBB; |
+ } else { |
+ // First emit code to check if gp_offset (or fp_offset) is below the bound. |
+ // If so, pull the argument from reg_save_area. (branch to offsetMBB) |
+ // If not, pull from overflow_area. (branch to overflowMBB) |
+ // |
+ // thisMBB |
+ // | . |
+ // | . |
+ // offsetMBB overflowMBB |
+ // | . |
+ // | . |
+ // endMBB |
+ |
+ // Registers for the PHI in endMBB |
+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); |
+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); |
+ |
+ const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
+ MachineFunction *MF = MBB->getParent(); |
+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
+ |
+ MachineFunction::iterator MBBIter = MBB; |
+ ++MBBIter; |
+ |
+ // Insert the new basic blocks |
+ MF->insert(MBBIter, offsetMBB); |
+ MF->insert(MBBIter, overflowMBB); |
+ MF->insert(MBBIter, endMBB); |
+ |
+ // Transfer the remainder of MBB and its successor edges to endMBB. |
+ endMBB->splice(endMBB->begin(), thisMBB, |
+ llvm::next(MachineBasicBlock::iterator(MI)), |
+ thisMBB->end()); |
+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); |
+ |
+ // Make offsetMBB and overflowMBB successors of thisMBB |
+ thisMBB->addSuccessor(offsetMBB); |
+ thisMBB->addSuccessor(overflowMBB); |
+ |
+ // endMBB is a successor of both offsetMBB and overflowMBB |
+ offsetMBB->addSuccessor(endMBB); |
+ overflowMBB->addSuccessor(endMBB); |
+ |
+ // Load the offset value into a register |
+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) |
+ .addOperand(Base) |
+ .addOperand(Scale) |
+ .addOperand(Index) |
+ .addDisp(Disp, UseFPOffset ? 4 : 0) |
+ .addOperand(Segment) |
+ .setMemRefs(MMOBegin, MMOEnd); |
+ |
+ // Check if there is enough room left to pull this argument. |
+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) |
+ .addReg(OffsetReg) |
+ .addImm(MaxOffset + 8 - ArgSizeA8); |
+ |
+ // Branch to "overflowMBB" if offset >= max |
+ // Fall through to "offsetMBB" otherwise |
+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE))) |
+ .addMBB(overflowMBB); |
+ } |
+ |
+ // In offsetMBB, emit code to use the reg_save_area. |
+ if (offsetMBB) { |
+ assert(OffsetReg != 0); |
+ |
+ // Read the reg_save_area address. |
+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass); |
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg) |
+ .addOperand(Base) |
+ .addOperand(Scale) |
+ .addOperand(Index) |
+ .addDisp(Disp, 16) |
+ .addOperand(Segment) |
+ .setMemRefs(MMOBegin, MMOEnd); |
+ |
+ // Zero-extend the offset |
+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); |
+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) |
+ .addImm(0) |
+ .addReg(OffsetReg) |
+ .addImm(X86::sub_32bit); |
+ |
+ // Add the offset to the reg_save_area to get the final address. |
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) |
+ .addReg(OffsetReg64) |
+ .addReg(RegSaveReg); |
+ |
+ // Compute the offset for the next argument |
+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) |
+ .addReg(OffsetReg) |
+ .addImm(UseFPOffset ? 16 : 8); |
+ |
+ // Store it back into the va_list. |
+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) |
+ .addOperand(Base) |
+ .addOperand(Scale) |
+ .addOperand(Index) |
+ .addDisp(Disp, UseFPOffset ? 4 : 0) |
+ .addOperand(Segment) |
+ .addReg(NextOffsetReg) |
+ .setMemRefs(MMOBegin, MMOEnd); |
+ |
+ // Jump to endMBB |
+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4)) |
+ .addMBB(endMBB); |
+ } |
+ |
+ // |
+ // Emit code to use overflow area |
+ // |
+ |
+ // Load the overflow_area address into a register. |
+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); |
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg) |
+ .addOperand(Base) |
+ .addOperand(Scale) |
+ .addOperand(Index) |
+ .addDisp(Disp, 8) |
+ .addOperand(Segment) |
+ .setMemRefs(MMOBegin, MMOEnd); |
+ |
+ // If we need to align it, do so. Otherwise, just copy the address |
+ // to OverflowDestReg. |
+ if (NeedsAlign) { |
+ // Align the overflow address |
+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2"); |
+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass); |
+ |
+ // aligned_addr = (addr + (align-1)) & ~(align-1) |
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg) |
+ .addReg(OverflowAddrReg) |
+ .addImm(Align-1); |
+ |
+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg) |
+ .addReg(TmpReg) |
+ .addImm(~(uint64_t)(Align-1)); |
+ } else { |
+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) |
+ .addReg(OverflowAddrReg); |
+ } |
+ |
+ // Compute the next overflow address after this argument. |
+ // (the overflow address should be kept 8-byte aligned) |
+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass); |
+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg) |
+ .addReg(OverflowDestReg) |
+ .addImm(ArgSizeA8); |
+ |
+ // Store the new overflow address. |
+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr)) |
+ .addOperand(Base) |
+ .addOperand(Scale) |
+ .addOperand(Index) |
+ .addDisp(Disp, 8) |
+ .addOperand(Segment) |
+ .addReg(NextAddrReg) |
+ .setMemRefs(MMOBegin, MMOEnd); |
+ |
+ // If we branched, emit the PHI to the front of endMBB. |
+ if (offsetMBB) { |
+ BuildMI(*endMBB, endMBB->begin(), DL, |
+ TII->get(X86::PHI), DestReg) |
+ .addReg(OffsetDestReg).addMBB(offsetMBB) |
+ .addReg(OverflowDestReg).addMBB(overflowMBB); |
+ } |
+ |
+ // Erase the pseudo instruction |
+ MI->eraseFromParent(); |
+ |
+ return endMBB; |
+} |
+ |
+MachineBasicBlock * |
X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( |
MachineInstr *MI, |
MachineBasicBlock *MBB) const { |
@@ -9915,6 +10228,9 @@ |
false); |
case X86::VASTART_SAVE_XMM_REGS: |
return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); |
+ |
+ case X86::VAARG_64: |
+ return EmitVAARG64WithCustomInserter(MI, BB); |
} |
} |