lib/Target/X86/X86ISelLowering.cpp - Issue 3661004: x86-64 va_arg

Unified Diff: lib/Target/X86/X86ISelLowering.cpp

Issue 3661004: x86-64 va_arg (Closed) Base URL: http://llvm.org/svn/llvm-project/llvm/trunk/

Patch Set: long double doesn't work how I thought Created 10 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: lib/Target/X86/X86ISelLowering.cpp

===================================================================

--- lib/Target/X86/X86ISelLowering.cpp (revision 116297)

+++ lib/Target/X86/X86ISelLowering.cpp (working copy)

@@ -7570,11 +7570,68 @@

}

SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {

- // X86-64 va_list is a struct { i32, i32, i8*, i8* }.

- assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");

+ assert(Subtarget->is64Bit() &&

+ "LowerVAARG only handles 64-bit va_arg!");

+ assert((Subtarget->isTargetLinux() ||

+ Subtarget->isTargetDarwin()) &&

+ "Unhandled target in LowerVAARG");

+ assert(Op.getNode()->getNumOperands() == 4);

+ SDValue Chain = Op.getOperand(0);

+ SDValue SrcPtr = Op.getOperand(1);

+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();

+ unsigned Align = Op.getConstantOperandVal(3);

+ DebugLoc dl = Op.getDebugLoc();

- report_fatal_error("VAArgInst is not yet implemented for x86-64!");

- return SDValue();

+ EVT ArgVT = Op.getNode()->getValueType(0);

+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());

+ uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);

+ uint8_t ArgMode;

+ // Decide which area this value should be read from.

+ // TODO: Implement the AMD64 ABI in its entirety. This simple

+ // selection mechanism works only for the basic types.

+ if (ArgVT == MVT::f80) {

+ llvm_unreachable("va_arg for f80 not yet implemented");

+ } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {

+ ArgMode = 2; // Argument passed in XMM register. Use fp_offset.

+ } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {

+ ArgMode = 1; // Argument passed in GPR64 register(s). Use gp_offset.

+ } else {

+ llvm_unreachable("Unhandled argument type in LowerVAARG");

+ }

+ if (ArgMode == 2) {

+ // Sanity Check: Make sure using fp_offset makes sense.

+ const Function *Fn = DAG.getMachineFunction().getFunction();

+ bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);

+ assert(!UseSoftFloat && !NoImplicitFloatOps && Subtarget->hasSSE1());

+ }

+ // Insert VAARG_64 node into the DAG

+ // VAARG_64 returns two values: Variable Argument Address, Chain

+ SmallVector<SDValue, 11> InstOps;

+ InstOps.push_back(Chain);

+ InstOps.push_back(SrcPtr);

+ InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));

+ InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));

+ InstOps.push_back(DAG.getConstant(Align, MVT::i32));

+ SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);

+ SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,

+ VTs, &InstOps[0], InstOps.size(),

+ MVT::i64,

+ MachinePointerInfo(SV),

+ /*Align=*/0,

+ /*Volatile=*/false,

+ /*ReadMem=*/true,

+ /*WriteMem=*/true);

+ Chain = VAARG.getValue(1);

+ // Load the next argument and return it

+ return DAG.getLoad(ArgVT, dl,

+ Chain,

+ VAARG,

+ MachinePointerInfo(),

+ false, false, 0);

}

SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {

@@ -8850,6 +8907,7 @@

case X86ISD::PUNPCKHDQ: return "X86ISD::PUNPCKHDQ";

case X86ISD::PUNPCKHQDQ: return "X86ISD::PUNPCKHQDQ";

case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";

+ case X86ISD::VAARG_64: return "X86ISD::VAARG_64";

case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA";

}

@@ -9411,6 +9469,261 @@

}

MachineBasicBlock *

+X86TargetLowering::EmitVAARG64WithCustomInserter(

+ MachineInstr *MI,

+ MachineBasicBlock *MBB) const {

+ // Emit va_arg instruction on X86-64.

+ // Operands to this pseudo-instruction:

+ // 0 ) Output : destination address (reg)

+ // 1-5) Input : va_list address (addr, i64mem)

+ // 6 ) ArgSize : Size (in bytes) of vararg type

+ // 7 ) ArgMode : 0=overflow only, 1=use gp_offset, 2=use fp_offset

+ // 8 ) Align : Alignment of type

+ // 9 ) EFLAGS (implicit-def)

+ assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");

+ assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");

+ unsigned DestReg = MI->getOperand(0).getReg();

+ MachineOperand &Base = MI->getOperand(1);

+ MachineOperand &Scale = MI->getOperand(2);

+ MachineOperand &Index = MI->getOperand(3);

+ MachineOperand &Disp = MI->getOperand(4);

+ MachineOperand &Segment = MI->getOperand(5);

+ unsigned ArgSize = MI->getOperand(6).getImm();

+ unsigned ArgMode = MI->getOperand(7).getImm();

+ unsigned Align = MI->getOperand(8).getImm();

+ // Memory Reference

+ assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");

+ MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();

+ MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();

+ // Machine Information

+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();

+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();

+ const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);

+ const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);

+ DebugLoc DL = MI->getDebugLoc();

+ // struct va_list {

+ // i32 gp_offset

+ // i32 fp_offset

+ // i64 overflow_area (address)

+ // i64 reg_save_area (address)

+ // }

+ // sizeof(va_list) = 24

+ // alignment(va_list) = 8

+ unsigned TotalNumIntRegs = 6;

+ unsigned TotalNumXMMRegs = 8;

+ bool UseGPOffset = (ArgMode == 1);

+ bool UseFPOffset = (ArgMode == 2);

+ unsigned MaxOffset = TotalNumIntRegs * 8 +

+ (UseFPOffset ? TotalNumXMMRegs * 16 : 0);

+ /* Align ArgSize to a multiple of 8 */

+ unsigned ArgSizeA8 = (ArgSize + 7) & ~7;

+ bool NeedsAlign = (Align > 8);

+ MachineBasicBlock *thisMBB = MBB;

+ MachineBasicBlock *overflowMBB;

+ MachineBasicBlock *offsetMBB;

+ MachineBasicBlock *endMBB;

+ unsigned OffsetDestReg = 0; // Argument address computed by offsetMBB

+ unsigned OverflowDestReg = 0; // Argument address computed by overflowMBB

+ unsigned OffsetReg = 0;

+ if (!UseGPOffset && !UseFPOffset) {

+ // If we only pull from the overflow region, we don't create a branch.

+ // We don't need to alter control flow.

+ OffsetDestReg = 0; // unused

+ OverflowDestReg = DestReg;

+ offsetMBB = NULL;

+ overflowMBB = thisMBB;

+ endMBB = thisMBB;

+ } else {

+ // First emit code to check if gp_offset (or fp_offset) is below the bound.

+ // If so, pull the argument from reg_save_area. (branch to offsetMBB)

+ // If not, pull from overflow_area. (branch to overflowMBB)

+ //

+ // thisMBB

+ // | .

+ // offsetMBB overflowMBB

+ // | .

+ // endMBB

+ // Registers for the PHI in endMBB

+ OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);

+ OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);

+ const BasicBlock *LLVM_BB = MBB->getBasicBlock();

+ MachineFunction *MF = MBB->getParent();

+ overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);

+ offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);

+ endMBB = MF->CreateMachineBasicBlock(LLVM_BB);

+ MachineFunction::iterator MBBIter = MBB;

+ ++MBBIter;

+ // Insert the new basic blocks

+ MF->insert(MBBIter, offsetMBB);

+ MF->insert(MBBIter, overflowMBB);

+ MF->insert(MBBIter, endMBB);

+ // Transfer the remainder of MBB and its successor edges to endMBB.

+ endMBB->splice(endMBB->begin(), thisMBB,

+ llvm::next(MachineBasicBlock::iterator(MI)),

+ thisMBB->end());

+ endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);

+ // Make offsetMBB and overflowMBB successors of thisMBB

+ thisMBB->addSuccessor(offsetMBB);

+ thisMBB->addSuccessor(overflowMBB);

+ // endMBB is a successor of both offsetMBB and overflowMBB

+ offsetMBB->addSuccessor(endMBB);

+ overflowMBB->addSuccessor(endMBB);

+ // Load the offset value into a register

+ OffsetReg = MRI.createVirtualRegister(OffsetRegClass);

+ BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)

+ .addOperand(Base)

+ .addOperand(Scale)

+ .addOperand(Index)

+ .addDisp(Disp, UseFPOffset ? 4 : 0)

+ .addOperand(Segment)

+ .setMemRefs(MMOBegin, MMOEnd);

+ // Check if there is enough room left to pull this argument.

+ BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))

+ .addReg(OffsetReg)

+ .addImm(MaxOffset + 8 - ArgSizeA8);

+ // Branch to "overflowMBB" if offset >= max

+ // Fall through to "offsetMBB" otherwise

+ BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))

+ .addMBB(overflowMBB);

+ }

+ // In offsetMBB, emit code to use the reg_save_area.

+ if (offsetMBB) {

+ assert(OffsetReg != 0);

+ // Read the reg_save_area address.

+ unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);

+ BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)

+ .addOperand(Base)

+ .addOperand(Scale)

+ .addOperand(Index)

+ .addDisp(Disp, 16)

+ .addOperand(Segment)

+ .setMemRefs(MMOBegin, MMOEnd);

+ // Zero-extend the offset

+ unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);

+ BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)

+ .addImm(0)

+ .addReg(OffsetReg)

+ .addImm(X86::sub_32bit);

+ // Add the offset to the reg_save_area to get the final address.

+ BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)

+ .addReg(OffsetReg64)

+ .addReg(RegSaveReg);

+ // Compute the offset for the next argument

+ unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);

+ BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)

+ .addReg(OffsetReg)

+ .addImm(UseFPOffset ? 16 : 8);

+ // Store it back into the va_list.

+ BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))

+ .addOperand(Base)

+ .addOperand(Scale)

+ .addOperand(Index)

+ .addDisp(Disp, UseFPOffset ? 4 : 0)

+ .addOperand(Segment)

+ .addReg(NextOffsetReg)

+ .setMemRefs(MMOBegin, MMOEnd);

+ // Jump to endMBB

+ BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))

+ .addMBB(endMBB);

+ }

+ //

+ // Emit code to use overflow area

+ //

+ // Load the overflow_area address into a register.

+ unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);

+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)

+ .addOperand(Base)

+ .addOperand(Scale)

+ .addOperand(Index)

+ .addDisp(Disp, 8)

+ .addOperand(Segment)

+ .setMemRefs(MMOBegin, MMOEnd);

+ // If we need to align it, do so. Otherwise, just copy the address

+ // to OverflowDestReg.

+ if (NeedsAlign) {

+ // Align the overflow address

+ assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");

+ unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);

+ // aligned_addr = (addr + (align-1)) & ~(align-1)

+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)

+ .addReg(OverflowAddrReg)

+ .addImm(Align-1);

+ BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)

+ .addReg(TmpReg)

+ .addImm(~(uint64_t)(Align-1));

+ } else {

+ BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)

+ .addReg(OverflowAddrReg);

+ }

+ // Compute the next overflow address after this argument.

+ // (the overflow address should be kept 8-byte aligned)

+ unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);

+ BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)

+ .addReg(OverflowDestReg)

+ .addImm(ArgSizeA8);

+ // Store the new overflow address.

+ BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))

+ .addOperand(Base)

+ .addOperand(Scale)

+ .addOperand(Index)

+ .addDisp(Disp, 8)

+ .addOperand(Segment)

+ .addReg(NextAddrReg)

+ .setMemRefs(MMOBegin, MMOEnd);

+ // If we branched, emit the PHI to the front of endMBB.

+ if (offsetMBB) {

+ BuildMI(*endMBB, endMBB->begin(), DL,

+ TII->get(X86::PHI), DestReg)

+ .addReg(OffsetDestReg).addMBB(offsetMBB)

+ .addReg(OverflowDestReg).addMBB(overflowMBB);

+ }

+ // Erase the pseudo instruction

+ MI->eraseFromParent();

+ return endMBB;

+MachineBasicBlock *

X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(

MachineInstr *MI,

MachineBasicBlock *MBB) const {

@@ -9915,6 +10228,9 @@

false);

case X86::VASTART_SAVE_XMM_REGS:

return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);

+ case X86::VAARG_64:

+ return EmitVAARG64WithCustomInserter(MI, BB);

}

« no previous file with comments | « lib/Target/X86/X86ISelLowering.h ('k') | lib/Target/X86/X86InstrCompiler.td » ('j') | no next file with comments »