src/IceTargetLoweringARM32.cpp - Issue 1159013002: Subzero ARM: addProlog/addEpilogue -- share some code with x86.

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1159013002: Subzero ARM: addProlog/addEpilogue -- share some code with x86. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: typo Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//	1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringARM32 class, which consists almost	10 // This file implements the TargetLoweringARM32 class, which consists almost

(...skipping 108 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
119 ICEINSTICMP_TABLE	119 ICEINSTICMP_TABLE

120 #undef X	120 #undef X

121 } // end of namespace dummy1	121 } // end of namespace dummy1

122	122

123 // The maximum number of arguments to pass in GPR registers.	123 // The maximum number of arguments to pass in GPR registers.

124 const uint32_t ARM32_MAX_GPR_ARG = 4;	124 const uint32_t ARM32_MAX_GPR_ARG = 4;

125	125

126 // Stack alignment	126 // Stack alignment

127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;	127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;

128	128

	129 // Value is in bytes. Return Value adjusted to the next highest multiple

	130 // of the stack alignment.

	131 uint32_t applyStackAlignment(uint32_t Value) {

	132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);

	133 }

	134

129 } // end of anonymous namespace	135 } // end of anonymous namespace

130	136

131 TargetARM32::TargetARM32(Cfg *Func)	137 TargetARM32::TargetARM32(Cfg *Func)

132 : TargetLowering(Func), UsesFramePointer(false) {	138 : TargetLowering(Func), UsesFramePointer(false), NeedsStackAlignment(false),

	139 MaybeLeafFunc(true), SpillAreaSizeBytes(0) {

133 // TODO: Don't initialize IntegerRegisters and friends every time.	140 // TODO: Don't initialize IntegerRegisters and friends every time.

134 // Instead, initialize in some sort of static initializer for the	141 // Instead, initialize in some sort of static initializer for the

135 // class.	142 // class.

136 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);	143 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);

137 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);	144 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);

138 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);	145 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);

139 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);	146 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);

140 ScratchRegs.resize(RegARM32::Reg_NUM);	147 ScratchRegs.resize(RegARM32::Reg_NUM);

141 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \	148 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \

142 isFP) \	149 isFP) \

(...skipping 246 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
389 // TODO(jvoung): handle float/vector types.	396 // TODO(jvoung): handle float/vector types.

390 if (isVectorType(Ty)) {	397 if (isVectorType(Ty)) {

391 UnimplementedError(Func->getContext()->getFlags());	398 UnimplementedError(Func->getContext()->getFlags());

392 continue;	399 continue;

393 } else if (isFloatingType(Ty)) {	400 } else if (isFloatingType(Ty)) {

394 UnimplementedError(Func->getContext()->getFlags());	401 UnimplementedError(Func->getContext()->getFlags());

395 continue;	402 continue;

396 } else if (Ty == IceType_i64) {	403 } else if (Ty == IceType_i64) {

397 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)	404 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)

398 continue;	405 continue;

399 int32_t RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;	406 int32_t RegLo;

400 int32_t RegHi = 0;	407 int32_t RegHi;

401 ++NumGPRRegsUsed;

402 // Always start i64 registers at an even register, so this may end	408 // Always start i64 registers at an even register, so this may end

403 // up padding away a register.	409 // up padding away a register.

404 if (RegLo % 2 != 0) {	410 if (NumGPRRegsUsed % 2 != 0) {

405 ++RegLo;

406 ++NumGPRRegsUsed;	411 ++NumGPRRegsUsed;

407 }	412 }

408 // If this leaves us without room to consume another register,	413 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;

409 // leave any previously speculatively consumed registers as consumed.	414 ++NumGPRRegsUsed;

410 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)

411 continue;

412 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;	415 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;

413 ++NumGPRRegsUsed;	416 ++NumGPRRegsUsed;

	417 // If this bumps us past the boundary, don't allocate to a register

	418 // and leave any previously speculatively consumed registers as consumed.

	419 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)

	420 continue;

414 Variable *RegisterArg = Func->makeVariable(Ty);	421 Variable *RegisterArg = Func->makeVariable(Ty);

415 Variable *RegisterLo = Func->makeVariable(IceType_i32);	422 Variable *RegisterLo = Func->makeVariable(IceType_i32);

416 Variable *RegisterHi = Func->makeVariable(IceType_i32);	423 Variable *RegisterHi = Func->makeVariable(IceType_i32);

417 if (ALLOW_DUMP) {	424 if (ALLOW_DUMP) {

418 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));	425 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

419 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));	426 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));

420 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));	427 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));

421 }	428 }

422 RegisterLo->setRegNum(RegLo);	429 RegisterLo->setRegNum(RegLo);

423 RegisterLo->setIsArg();	430 RegisterLo->setIsArg();

(...skipping 19 matching lines...) Expand all Loading...
443 RegisterArg->setRegNum(RegNum);	450 RegisterArg->setRegNum(RegNum);

444 RegisterArg->setIsArg();	451 RegisterArg->setIsArg();

445 Arg->setIsArg(false);	452 Arg->setIsArg(false);

446	453

447 Args[I] = RegisterArg;	454 Args[I] = RegisterArg;

448 Context.insert(InstAssign::create(Func, Arg, RegisterArg));	455 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

449 }	456 }

450 }	457 }

451 }	458 }

452	459

	460 // Helper function for addProlog().

	461 //

	462 // This assumes Arg is an argument passed on the stack. This sets the

	463 // frame offset for Arg and updates InArgsSizeBytes according to Arg's

	464 // width. For an I64 arg that has been split into Lo and Hi components,

	465 // it calls itself recursively on the components, taking care to handle

	466 // Lo first because of the little-endian architecture. Lastly, this

	467 // function generates an instruction to copy Arg into its assigned

	468 // register if applicable.

	469 void TargetARM32::finishArgumentLowering(Variable Arg, Variable FramePtr,

	470 size_t BasicFrameOffset,

	471 size_t &InArgsSizeBytes) {

	472 Variable *Lo = Arg->getLo();

	473 Variable *Hi = Arg->getHi();

	474 Type Ty = Arg->getType();

	475 if (Lo && Hi && Ty == IceType_i64) {

	476 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

	477 assert(Hi->getType() != IceType_i64); // don't want infinite recursion

	478 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	479 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	480 return;

	481 }

	482 if (isVectorType(Ty)) {

	483 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);

	484 }

	485 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

	486 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

	487 // If the argument variable has been assigned a register, we need to load

	488 // the value from the stack slot.

	489 if (Arg->hasReg()) {

	490 assert(Ty != IceType_i64);

	491 OperandARM32Mem *Mem = OperandARM32Mem::create(

	492 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(

	493 Ctx->getConstantInt32(Arg->getStackOffset())));

	494 if (isVectorType(Arg->getType())) {

	495 UnimplementedError(Func->getContext()->getFlags());

	496 } else {

	497 _ldr(Arg, Mem);

	498 }

	499 // This argument-copying instruction uses an explicit

	500 // OperandARM32Mem operand instead of a Variable, so its

	501 // fill-from-stack operation has to be tracked separately for

	502 // statistics.

	503 Ctx->statsUpdateFills();

	504 }

	505 }

	506

453 Type TargetARM32::stackSlotType() { return IceType_i32; }	507 Type TargetARM32::stackSlotType() { return IceType_i32; }

454	508

455 void TargetARM32::addProlog(CfgNode *Node) {	509 void TargetARM32::addProlog(CfgNode *Node) {

456 (void)Node;	510 // Stack frame layout:

457 UnimplementedError(Func->getContext()->getFlags());	511 //

	512 // +------------------------+

	513 // \| 1. preserved registers \|

	514 // +------------------------+

	515 // \| 2. padding \|

	516 // +------------------------+

	517 // \| 3. global spill area \|

	518 // +------------------------+

	519 // \| 4. padding \|

	520 // +------------------------+

	521 // \| 5. local spill area \|

	522 // +------------------------+

	523 // \| 6. padding \|

	524 // +------------------------+

	525 // \| 7. allocas \|

	526 // +------------------------+

	527 //

	528 // The following variables record the size in bytes of the given areas:

	529 // * PreservedRegsSizeBytes: area 1

	530 // * SpillAreaPaddingBytes: area 2

	531 // * GlobalsSize: area 3

	532 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4

	533 // * LocalsSpillAreaSize: area 5

	534 // * SpillAreaSizeBytes: areas 2 - 6

	535 // Determine stack frame offsets for each Variable without a

	536 // register assignment. This can be done as one variable per stack

	537 // slot. Or, do coalescing by running the register allocator again

	538 // with an infinite set of registers (as a side effect, this gives

	539 // variables a second chance at physical register assignment).

	540 //

	541 // A middle ground approach is to leverage sparsity and allocate one

	542 // block of space on the frame for globals (variables with

	543 // multi-block lifetime), and one block to share for locals

	544 // (single-block lifetime).

	545

	546 Context.init(Node);

	547 Context.setInsertPoint(Context.getCur());

	548

	549 llvm::SmallBitVector CalleeSaves =

	550 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	551 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

	552 VarList SortedSpilledVariables;

	553 size_t GlobalsSize = 0;

	554 // If there is a separate locals area, this represents that area.

	555 // Otherwise it counts any variable not counted by GlobalsSize.

	556 SpillAreaSizeBytes = 0;

	557 // If there is a separate locals area, this specifies the alignment

	558 // for it.

	559 uint32_t LocalsSlotsAlignmentBytes = 0;

	560 // The entire spill locations area gets aligned to largest natural

	561 // alignment of the variables that have a spill slot.

	562 uint32_t SpillAreaAlignmentBytes = 0;

	563 // For now, we don't have target-specific variables that need special

	564 // treatment (no stack-slot-linked SpillVariable type).

	565 std::function<bool(Variable *)> TargetVarHook =

	566 [](Variable *) { return false; };

	567

	568 // Compute the list of spilled variables and bounds for GlobalsSize, etc.

	569 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,

	570 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,

	571 &LocalsSlotsAlignmentBytes, TargetVarHook);

	572 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

	573 SpillAreaSizeBytes += GlobalsSize;

	574

	575 // Add push instructions for preserved registers.

	576 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).

	577 // Unlike x86, ARM also has callee-saved float/vector registers.

	578 // The "vpush" instruction can handle a whole list of float/vector

	579 // registers, but it only handles contiguous sequences of registers

	580 // by specifying the start and the length.

	581 VarList GPRsToPreserve;

	582 GPRsToPreserve.reserve(CalleeSaves.size());

	583 uint32_t NumCallee = 0;

	584 size_t PreservedRegsSizeBytes = 0;

	585 // Consider FP and LR as callee-save / used as needed.

	586 if (UsesFramePointer) {

	587 CalleeSaves[RegARM32::Reg_fp] = true;

	588 assert(RegsUsed[RegARM32::Reg_fp] == false);

	589 RegsUsed[RegARM32::Reg_fp] = true;

	590 }

	591 if (!MaybeLeafFunc) {

	592 CalleeSaves[RegARM32::Reg_lr] = true;

	593 RegsUsed[RegARM32::Reg_lr] = true;

	594 }

	595 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	596 if (CalleeSaves[i] && RegsUsed[i]) {

	597 // TODO(jvoung): do separate vpush for each floating point

	598 // register segment and += 4, or 8 depending on type.

	599 ++NumCallee;

	600 PreservedRegsSizeBytes += 4;

	601 GPRsToPreserve.push_back(getPhysicalRegister(i));

	602 }

	603 }

	604 Ctx->statsUpdateRegistersSaved(NumCallee);

	605 if (!GPRsToPreserve.empty())

	606 _push(GPRsToPreserve);

	607

	608 // Generate "mov FP, SP" if needed.

	609 if (UsesFramePointer) {

	610 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);

	611 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

	612 _mov(FP, SP);

	613 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).

	614 Context.insert(InstFakeUse::create(Func, FP));

	615 }

	616

	617 // Align the variables area. SpillAreaPaddingBytes is the size of

	618 // the region after the preserved registers and before the spill areas.

	619 // LocalsSlotsPaddingBytes is the amount of padding between the globals

	620 // and locals area if they are separate.

	621 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);

	622 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

	623 uint32_t SpillAreaPaddingBytes = 0;

	624 uint32_t LocalsSlotsPaddingBytes = 0;

	625 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,

	626 GlobalsSize, LocalsSlotsAlignmentBytes,

	627 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);

	628 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;

	629 uint32_t GlobalsAndSubsequentPaddingSize =

	630 GlobalsSize + LocalsSlotsPaddingBytes;

	631

	632 // Align SP if necessary.

	633 if (NeedsStackAlignment) {

	634 uint32_t StackOffset = PreservedRegsSizeBytes;

	635 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

	636 SpillAreaSizeBytes = StackSize - StackOffset;

	637 }

	638

	639 // Generate "sub sp, SpillAreaSizeBytes"

	640 if (SpillAreaSizeBytes) {

	641 // Use the IP inter-procedural scratch register if needed to legalize

	642 // the immediate.

	643 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),

	644 Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);

	645 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

	646 _sub(SP, SP, SubAmount);

	647 }

	648 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

	649

	650 resetStackAdjustment();

	651

	652 // Fill in stack offsets for stack args, and copy args into registers

	653 // for those that were register-allocated. Args are pushed right to

	654 // left, so Arg[0] is closest to the stack/frame pointer.

	655 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

	656 size_t BasicFrameOffset = PreservedRegsSizeBytes;

	657 if (!UsesFramePointer)

	658 BasicFrameOffset += SpillAreaSizeBytes;

	659

	660 const VarList &Args = Func->getArgs();

	661 size_t InArgsSizeBytes = 0;

	662 unsigned NumGPRArgs = 0;

	663 for (Variable *Arg : Args) {

	664 Type Ty = Arg->getType();

	665 // Skip arguments passed in registers.

	666 if (isVectorType(Ty)) {

	667 UnimplementedError(Func->getContext()->getFlags());

	668 continue;

	669 } else if (isFloatingType(Ty)) {

	670 UnimplementedError(Func->getContext()->getFlags());

	671 continue;

	672 } else if (Ty == IceType_i64 && NumGPRArgs < ARM32_MAX_GPR_ARG) {

	673 // Start at an even register.

	674 if (NumGPRArgs % 2 == 1) {

	675 ++NumGPRArgs;

	676 }

	677 NumGPRArgs += 2;

	678 if (NumGPRArgs <= ARM32_MAX_GPR_ARG)

	679 continue;

	680 } else if (NumGPRArgs < ARM32_MAX_GPR_ARG) {

	681 ++NumGPRArgs;

	682 continue;

	683 }

	684 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);

	685 }

	686

	687 // Fill in stack offsets for locals.

	688 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,

	689 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,

	690 UsesFramePointer);

	691 this->HasComputedFrame = true;

	692

	693 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {

	694 OstreamLocker L(Func->getContext());

	695 Ostream &Str = Func->getContext()->getStrDump();

	696

	697 Str << "Stack layout:\n";

	698 uint32_t SPAdjustmentPaddingSize =

	699 SpillAreaSizeBytes - LocalsSpillAreaSize -

	700 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;

	701 Str << " in-args = " << InArgsSizeBytes << " bytes\n"

	702 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"

	703 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"

	704 << " globals spill area = " << GlobalsSize << " bytes\n"

	705 << " globals-locals spill areas intermediate padding = "

	706 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"

	707 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"

	708 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";

	709

	710 Str << "Stack details:\n"

	711 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"

	712 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"

	713 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes

	714 << " bytes\n"

	715 << " is FP based = " << UsesFramePointer << "\n";

	716 }

458 }	717 }

459	718

460 void TargetARM32::addEpilog(CfgNode *Node) {	719 void TargetARM32::addEpilog(CfgNode *Node) {

461 (void)Node;	720 InstList &Insts = Node->getInsts();

462 UnimplementedError(Func->getContext()->getFlags());	721 InstList::reverse_iterator RI, E;

	722 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {

	723 if (llvm::isa<InstARM32Ret>(*RI))

	724 break;

	725 }

	726 if (RI == E)

	727 return;

	728

	729 // Convert the reverse_iterator position into its corresponding

	730 // (forward) iterator position.

	731 InstList::iterator InsertPoint = RI.base();

	732 --InsertPoint;

	733 Context.init(Node);

	734 Context.setInsertPoint(InsertPoint);

	735

	736 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

	737 if (UsesFramePointer) {

	738 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);

	739 // For late-stage liveness analysis (e.g. asm-verbose mode),

	740 // adding a fake use of SP before the assignment of SP=FP keeps

	741 // previous SP adjustments from being dead-code eliminated.

	742 Context.insert(InstFakeUse::create(Func, SP));

	743 _mov(SP, FP);

	744 } else {

	745 // add SP, SpillAreaSizeBytes

	746 if (SpillAreaSizeBytes) {

	747 // Use the IP inter-procedural scratch register if needed to legalize

	748 // the immediate. It shouldn't be live at this point.

	749 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),

	750 Legal_Reg \| Legal_Flex, RegARM32::Reg_ip);

	751 _add(SP, SP, AddAmount);

	752 }

	753 }

	754

	755 // Add pop instructions for preserved registers.

	756 llvm::SmallBitVector CalleeSaves =

	757 getRegisterSet(RegSet_CalleeSave, RegSet_None);

	758 VarList GPRsToRestore;

	759 GPRsToRestore.reserve(CalleeSaves.size());

	760 // Consider FP and LR as callee-save / used as needed.

	761 if (UsesFramePointer) {

	762 CalleeSaves[RegARM32::Reg_fp] = true;

	763 }

	764 if (!MaybeLeafFunc) {

	765 CalleeSaves[RegARM32::Reg_lr] = true;

	766 }

	767 // Pop registers in ascending order just like push

	768 // (instead of in reverse order).

	769 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

	770 if (CalleeSaves[i] && RegsUsed[i]) {

	771 GPRsToRestore.push_back(getPhysicalRegister(i));

	772 }

	773 }

	774 if (!GPRsToRestore.empty())

	775 _pop(GPRsToRestore);

	776

	777 if (!Ctx->getFlags().getUseSandboxing())

	778 return;

	779

	780 // Change the original ret instruction into a sandboxed return sequence.

	781 // bundle_lock

	782 // bic lr, #0xc000000f

	783 // bx lr

	784 // bundle_unlock

	785 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to

	786 // restrict to the lower 1GB as well.

	787 Operand *RetMask =

	788 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg \| Legal_Flex);

	789 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);

	790 Variable *RetValue = nullptr;

	791 if (RI->getSrcSize())

	792 RetValue = llvm::cast<Variable>(RI->getSrc(0));

	793 _bundle_lock();

	794 _bic(LR, LR, RetMask);

	795 _ret(LR, RetValue);

	796 _bundle_unlock();

	797 RI->setDeleted();

463 }	798 }

464	799

465 void TargetARM32::split64(Variable *Var) {	800 void TargetARM32::split64(Variable *Var) {

466 assert(Var->getType() == IceType_i64);	801 assert(Var->getType() == IceType_i64);

467 Variable *Lo = Var->getLo();	802 Variable *Lo = Var->getLo();

468 Variable *Hi = Var->getHi();	803 Variable *Hi = Var->getHi();

469 if (Lo) {	804 if (Lo) {

470 assert(Hi);	805 assert(Hi);

471 return;	806 return;

472 }	807 }

(...skipping 401 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
874 Operand *Cond = Inst->getCondition();	1209 Operand *Cond = Inst->getCondition();

875 // TODO(jvoung): Handle folding opportunities.	1210 // TODO(jvoung): Handle folding opportunities.

876	1211

877 Variable *Src0R = legalizeToVar(Cond);	1212 Variable *Src0R = legalizeToVar(Cond);

878 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1213 Constant *Zero = Ctx->getConstantZero(IceType_i32);

879 _cmp(Src0R, Zero);	1214 _cmp(Src0R, Zero);

880 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());	1215 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());

881 }	1216 }

882	1217

883 void TargetARM32::lowerCall(const InstCall *Instr) {	1218 void TargetARM32::lowerCall(const InstCall *Instr) {

	1219 MaybeLeafFunc = false;

	1220

884 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack.	1221 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack.

885 if (Instr->getNumArgs()) {	1222 if (Instr->getNumArgs()) {

886 UnimplementedError(Func->getContext()->getFlags());	1223 UnimplementedError(Func->getContext()->getFlags());

887 }	1224 }

888	1225

889 // Generate the call instruction. Assign its result to a temporary	1226 // Generate the call instruction. Assign its result to a temporary

890 // with high register allocation weight.	1227 // with high register allocation weight.

891 Variable *Dest = Instr->getDest();	1228 Variable *Dest = Instr->getDest();

892 // ReturnReg doubles as ReturnRegLo as necessary.	1229 // ReturnReg doubles as ReturnRegLo as necessary.

893 Variable *ReturnReg = nullptr;	1230 Variable *ReturnReg = nullptr;

(...skipping 666 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1560 Variable *Reg = Func->makeVariable(Type);	1897 Variable *Reg = Func->makeVariable(Type);

1561 if (RegNum == Variable::NoRegister)	1898 if (RegNum == Variable::NoRegister)

1562 Reg->setWeightInfinite();	1899 Reg->setWeightInfinite();

1563 else	1900 else

1564 Reg->setRegNum(RegNum);	1901 Reg->setRegNum(RegNum);

1565 return Reg;	1902 return Reg;

1566 }	1903 }

1567	1904

1568 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {	1905 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {

1569 assert(llvm::isPowerOf2_32(Align));	1906 assert(llvm::isPowerOf2_32(Align));

1570 uint32_t RotateAmt = 0;	1907 uint32_t RotateAmt;

1571 uint32_t Immed_8;	1908 uint32_t Immed_8;

1572 Operand *Mask;	1909 Operand *Mask;

1573 // Use AND or BIC to mask off the bits, depending on which immediate fits	1910 // Use AND or BIC to mask off the bits, depending on which immediate fits

1574 // (if it fits at all). Assume Align is usually small, in which case BIC	1911 // (if it fits at all). Assume Align is usually small, in which case BIC

1575 // works better.	1912 // works better. Thus, this rounds down to the alignment.

1576 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {	1913 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {

1577 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg \| Legal_Flex);	1914 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg \| Legal_Flex);

1578 _bic(Reg, Reg, Mask);	1915 _bic(Reg, Reg, Mask);

1579 } else {	1916 } else {

1580 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg \| Legal_Flex);	1917 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg \| Legal_Flex);

1581 _and(Reg, Reg, Mask);	1918 _and(Reg, Reg, Mask);

1582 }	1919 }

1583 }	1920 }

1584	1921

1585 void TargetARM32::postLower() {	1922 void TargetARM32::postLower() {

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1649 }	1986 }

1650 }	1987 }

1651	1988

1652 void TargetDataARM32::lowerConstants() const {	1989 void TargetDataARM32::lowerConstants() const {

1653 if (Ctx->getFlags().getDisableTranslation())	1990 if (Ctx->getFlags().getDisableTranslation())

1654 return;	1991 return;

1655 UnimplementedError(Ctx->getFlags());	1992 UnimplementedError(Ctx->getFlags());

1656 }	1993 }

1657	1994

1658 } // end of namespace Ice	1995 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX8632.h » ('j') | no next file with comments »