Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(98)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1159013002: Subzero ARM: addProlog/addEpilogue -- share some code with x86. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: typo Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX8632.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringARM32 class, which consists almost 10 // This file implements the TargetLoweringARM32 class, which consists almost
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 ICEINSTICMP_TABLE 119 ICEINSTICMP_TABLE
120 #undef X 120 #undef X
121 } // end of namespace dummy1 121 } // end of namespace dummy1
122 122
123 // The maximum number of arguments to pass in GPR registers. 123 // The maximum number of arguments to pass in GPR registers.
124 const uint32_t ARM32_MAX_GPR_ARG = 4; 124 const uint32_t ARM32_MAX_GPR_ARG = 4;
125 125
126 // Stack alignment 126 // Stack alignment
127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; 127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
128 128
129 // Value is in bytes. Return Value adjusted to the next highest multiple
130 // of the stack alignment.
131 uint32_t applyStackAlignment(uint32_t Value) {
132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
133 }
134
129 } // end of anonymous namespace 135 } // end of anonymous namespace
130 136
131 TargetARM32::TargetARM32(Cfg *Func) 137 TargetARM32::TargetARM32(Cfg *Func)
132 : TargetLowering(Func), UsesFramePointer(false) { 138 : TargetLowering(Func), UsesFramePointer(false), NeedsStackAlignment(false),
139 MaybeLeafFunc(true), SpillAreaSizeBytes(0) {
133 // TODO: Don't initialize IntegerRegisters and friends every time. 140 // TODO: Don't initialize IntegerRegisters and friends every time.
134 // Instead, initialize in some sort of static initializer for the 141 // Instead, initialize in some sort of static initializer for the
135 // class. 142 // class.
136 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); 143 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
137 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM); 144 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM);
138 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); 145 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
139 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); 146 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
140 ScratchRegs.resize(RegARM32::Reg_NUM); 147 ScratchRegs.resize(RegARM32::Reg_NUM);
141 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ 148 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
142 isFP) \ 149 isFP) \
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 // TODO(jvoung): handle float/vector types. 396 // TODO(jvoung): handle float/vector types.
390 if (isVectorType(Ty)) { 397 if (isVectorType(Ty)) {
391 UnimplementedError(Func->getContext()->getFlags()); 398 UnimplementedError(Func->getContext()->getFlags());
392 continue; 399 continue;
393 } else if (isFloatingType(Ty)) { 400 } else if (isFloatingType(Ty)) {
394 UnimplementedError(Func->getContext()->getFlags()); 401 UnimplementedError(Func->getContext()->getFlags());
395 continue; 402 continue;
396 } else if (Ty == IceType_i64) { 403 } else if (Ty == IceType_i64) {
397 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 404 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
398 continue; 405 continue;
399 int32_t RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; 406 int32_t RegLo;
400 int32_t RegHi = 0; 407 int32_t RegHi;
401 ++NumGPRRegsUsed;
402 // Always start i64 registers at an even register, so this may end 408 // Always start i64 registers at an even register, so this may end
403 // up padding away a register. 409 // up padding away a register.
404 if (RegLo % 2 != 0) { 410 if (NumGPRRegsUsed % 2 != 0) {
405 ++RegLo;
406 ++NumGPRRegsUsed; 411 ++NumGPRRegsUsed;
407 } 412 }
408 // If this leaves us without room to consume another register, 413 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
409 // leave any previously speculatively consumed registers as consumed. 414 ++NumGPRRegsUsed;
410 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
411 continue;
412 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; 415 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
413 ++NumGPRRegsUsed; 416 ++NumGPRRegsUsed;
417 // If this bumps us past the boundary, don't allocate to a register
418 // and leave any previously speculatively consumed registers as consumed.
419 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
420 continue;
414 Variable *RegisterArg = Func->makeVariable(Ty); 421 Variable *RegisterArg = Func->makeVariable(Ty);
415 Variable *RegisterLo = Func->makeVariable(IceType_i32); 422 Variable *RegisterLo = Func->makeVariable(IceType_i32);
416 Variable *RegisterHi = Func->makeVariable(IceType_i32); 423 Variable *RegisterHi = Func->makeVariable(IceType_i32);
417 if (ALLOW_DUMP) { 424 if (ALLOW_DUMP) {
418 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); 425 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));
419 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func)); 426 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func));
420 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func)); 427 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func));
421 } 428 }
422 RegisterLo->setRegNum(RegLo); 429 RegisterLo->setRegNum(RegLo);
423 RegisterLo->setIsArg(); 430 RegisterLo->setIsArg();
(...skipping 19 matching lines...) Expand all
443 RegisterArg->setRegNum(RegNum); 450 RegisterArg->setRegNum(RegNum);
444 RegisterArg->setIsArg(); 451 RegisterArg->setIsArg();
445 Arg->setIsArg(false); 452 Arg->setIsArg(false);
446 453
447 Args[I] = RegisterArg; 454 Args[I] = RegisterArg;
448 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 455 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
449 } 456 }
450 } 457 }
451 } 458 }
452 459
460 // Helper function for addProlog().
461 //
462 // This assumes Arg is an argument passed on the stack. This sets the
463 // frame offset for Arg and updates InArgsSizeBytes according to Arg's
464 // width. For an I64 arg that has been split into Lo and Hi components,
465 // it calls itself recursively on the components, taking care to handle
466 // Lo first because of the little-endian architecture. Lastly, this
467 // function generates an instruction to copy Arg into its assigned
468 // register if applicable.
469 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
470 size_t BasicFrameOffset,
471 size_t &InArgsSizeBytes) {
472 Variable *Lo = Arg->getLo();
473 Variable *Hi = Arg->getHi();
474 Type Ty = Arg->getType();
475 if (Lo && Hi && Ty == IceType_i64) {
476 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
477 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
478 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
479 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
480 return;
481 }
482 if (isVectorType(Ty)) {
483 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes);
484 }
485 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
486 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
487 // If the argument variable has been assigned a register, we need to load
488 // the value from the stack slot.
489 if (Arg->hasReg()) {
490 assert(Ty != IceType_i64);
491 OperandARM32Mem *Mem = OperandARM32Mem::create(
492 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
493 Ctx->getConstantInt32(Arg->getStackOffset())));
494 if (isVectorType(Arg->getType())) {
495 UnimplementedError(Func->getContext()->getFlags());
496 } else {
497 _ldr(Arg, Mem);
498 }
499 // This argument-copying instruction uses an explicit
500 // OperandARM32Mem operand instead of a Variable, so its
501 // fill-from-stack operation has to be tracked separately for
502 // statistics.
503 Ctx->statsUpdateFills();
504 }
505 }
506
453 Type TargetARM32::stackSlotType() { return IceType_i32; } 507 Type TargetARM32::stackSlotType() { return IceType_i32; }
454 508
455 void TargetARM32::addProlog(CfgNode *Node) { 509 void TargetARM32::addProlog(CfgNode *Node) {
456 (void)Node; 510 // Stack frame layout:
457 UnimplementedError(Func->getContext()->getFlags()); 511 //
512 // +------------------------+
513 // | 1. preserved registers |
514 // +------------------------+
515 // | 2. padding |
516 // +------------------------+
517 // | 3. global spill area |
518 // +------------------------+
519 // | 4. padding |
520 // +------------------------+
521 // | 5. local spill area |
522 // +------------------------+
523 // | 6. padding |
524 // +------------------------+
525 // | 7. allocas |
526 // +------------------------+
527 //
528 // The following variables record the size in bytes of the given areas:
529 // * PreservedRegsSizeBytes: area 1
530 // * SpillAreaPaddingBytes: area 2
531 // * GlobalsSize: area 3
532 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
533 // * LocalsSpillAreaSize: area 5
534 // * SpillAreaSizeBytes: areas 2 - 6
535 // Determine stack frame offsets for each Variable without a
536 // register assignment. This can be done as one variable per stack
537 // slot. Or, do coalescing by running the register allocator again
538 // with an infinite set of registers (as a side effect, this gives
539 // variables a second chance at physical register assignment).
540 //
541 // A middle ground approach is to leverage sparsity and allocate one
542 // block of space on the frame for globals (variables with
543 // multi-block lifetime), and one block to share for locals
544 // (single-block lifetime).
545
546 Context.init(Node);
547 Context.setInsertPoint(Context.getCur());
548
549 llvm::SmallBitVector CalleeSaves =
550 getRegisterSet(RegSet_CalleeSave, RegSet_None);
551 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
552 VarList SortedSpilledVariables;
553 size_t GlobalsSize = 0;
554 // If there is a separate locals area, this represents that area.
555 // Otherwise it counts any variable not counted by GlobalsSize.
556 SpillAreaSizeBytes = 0;
557 // If there is a separate locals area, this specifies the alignment
558 // for it.
559 uint32_t LocalsSlotsAlignmentBytes = 0;
560 // The entire spill locations area gets aligned to largest natural
561 // alignment of the variables that have a spill slot.
562 uint32_t SpillAreaAlignmentBytes = 0;
563 // For now, we don't have target-specific variables that need special
564 // treatment (no stack-slot-linked SpillVariable type).
565 std::function<bool(Variable *)> TargetVarHook =
566 [](Variable *) { return false; };
567
568 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
569 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
570 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
571 &LocalsSlotsAlignmentBytes, TargetVarHook);
572 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
573 SpillAreaSizeBytes += GlobalsSize;
574
575 // Add push instructions for preserved registers.
576 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
577 // Unlike x86, ARM also has callee-saved float/vector registers.
578 // The "vpush" instruction can handle a whole list of float/vector
579 // registers, but it only handles contiguous sequences of registers
580 // by specifying the start and the length.
581 VarList GPRsToPreserve;
582 GPRsToPreserve.reserve(CalleeSaves.size());
583 uint32_t NumCallee = 0;
584 size_t PreservedRegsSizeBytes = 0;
585 // Consider FP and LR as callee-save / used as needed.
586 if (UsesFramePointer) {
587 CalleeSaves[RegARM32::Reg_fp] = true;
588 assert(RegsUsed[RegARM32::Reg_fp] == false);
589 RegsUsed[RegARM32::Reg_fp] = true;
590 }
591 if (!MaybeLeafFunc) {
592 CalleeSaves[RegARM32::Reg_lr] = true;
593 RegsUsed[RegARM32::Reg_lr] = true;
594 }
595 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
596 if (CalleeSaves[i] && RegsUsed[i]) {
597 // TODO(jvoung): do separate vpush for each floating point
598 // register segment and += 4, or 8 depending on type.
599 ++NumCallee;
600 PreservedRegsSizeBytes += 4;
601 GPRsToPreserve.push_back(getPhysicalRegister(i));
602 }
603 }
604 Ctx->statsUpdateRegistersSaved(NumCallee);
605 if (!GPRsToPreserve.empty())
606 _push(GPRsToPreserve);
607
608 // Generate "mov FP, SP" if needed.
609 if (UsesFramePointer) {
610 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
611 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
612 _mov(FP, SP);
613 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
614 Context.insert(InstFakeUse::create(Func, FP));
615 }
616
617 // Align the variables area. SpillAreaPaddingBytes is the size of
618 // the region after the preserved registers and before the spill areas.
619 // LocalsSlotsPaddingBytes is the amount of padding between the globals
620 // and locals area if they are separate.
621 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
622 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
623 uint32_t SpillAreaPaddingBytes = 0;
624 uint32_t LocalsSlotsPaddingBytes = 0;
625 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
626 GlobalsSize, LocalsSlotsAlignmentBytes,
627 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
628 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
629 uint32_t GlobalsAndSubsequentPaddingSize =
630 GlobalsSize + LocalsSlotsPaddingBytes;
631
632 // Align SP if necessary.
633 if (NeedsStackAlignment) {
634 uint32_t StackOffset = PreservedRegsSizeBytes;
635 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
636 SpillAreaSizeBytes = StackSize - StackOffset;
637 }
638
639 // Generate "sub sp, SpillAreaSizeBytes"
640 if (SpillAreaSizeBytes) {
641 // Use the IP inter-procedural scratch register if needed to legalize
642 // the immediate.
643 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
644 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
645 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
646 _sub(SP, SP, SubAmount);
647 }
648 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
649
650 resetStackAdjustment();
651
652 // Fill in stack offsets for stack args, and copy args into registers
653 // for those that were register-allocated. Args are pushed right to
654 // left, so Arg[0] is closest to the stack/frame pointer.
655 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
656 size_t BasicFrameOffset = PreservedRegsSizeBytes;
657 if (!UsesFramePointer)
658 BasicFrameOffset += SpillAreaSizeBytes;
659
660 const VarList &Args = Func->getArgs();
661 size_t InArgsSizeBytes = 0;
662 unsigned NumGPRArgs = 0;
663 for (Variable *Arg : Args) {
664 Type Ty = Arg->getType();
665 // Skip arguments passed in registers.
666 if (isVectorType(Ty)) {
667 UnimplementedError(Func->getContext()->getFlags());
668 continue;
669 } else if (isFloatingType(Ty)) {
670 UnimplementedError(Func->getContext()->getFlags());
671 continue;
672 } else if (Ty == IceType_i64 && NumGPRArgs < ARM32_MAX_GPR_ARG) {
673 // Start at an even register.
674 if (NumGPRArgs % 2 == 1) {
675 ++NumGPRArgs;
676 }
677 NumGPRArgs += 2;
678 if (NumGPRArgs <= ARM32_MAX_GPR_ARG)
679 continue;
680 } else if (NumGPRArgs < ARM32_MAX_GPR_ARG) {
681 ++NumGPRArgs;
682 continue;
683 }
684 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
685 }
686
687 // Fill in stack offsets for locals.
688 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
689 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
690 UsesFramePointer);
691 this->HasComputedFrame = true;
692
693 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) {
694 OstreamLocker L(Func->getContext());
695 Ostream &Str = Func->getContext()->getStrDump();
696
697 Str << "Stack layout:\n";
698 uint32_t SPAdjustmentPaddingSize =
699 SpillAreaSizeBytes - LocalsSpillAreaSize -
700 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes;
701 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
702 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
703 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
704 << " globals spill area = " << GlobalsSize << " bytes\n"
705 << " globals-locals spill areas intermediate padding = "
706 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
707 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
708 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
709
710 Str << "Stack details:\n"
711 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
712 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
713 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
714 << " bytes\n"
715 << " is FP based = " << UsesFramePointer << "\n";
716 }
458 } 717 }
459 718
460 void TargetARM32::addEpilog(CfgNode *Node) { 719 void TargetARM32::addEpilog(CfgNode *Node) {
461 (void)Node; 720 InstList &Insts = Node->getInsts();
462 UnimplementedError(Func->getContext()->getFlags()); 721 InstList::reverse_iterator RI, E;
722 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
723 if (llvm::isa<InstARM32Ret>(*RI))
724 break;
725 }
726 if (RI == E)
727 return;
728
729 // Convert the reverse_iterator position into its corresponding
730 // (forward) iterator position.
731 InstList::iterator InsertPoint = RI.base();
732 --InsertPoint;
733 Context.init(Node);
734 Context.setInsertPoint(InsertPoint);
735
736 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
737 if (UsesFramePointer) {
738 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
739 // For late-stage liveness analysis (e.g. asm-verbose mode),
740 // adding a fake use of SP before the assignment of SP=FP keeps
741 // previous SP adjustments from being dead-code eliminated.
742 Context.insert(InstFakeUse::create(Func, SP));
743 _mov(SP, FP);
744 } else {
745 // add SP, SpillAreaSizeBytes
746 if (SpillAreaSizeBytes) {
747 // Use the IP inter-procedural scratch register if needed to legalize
748 // the immediate. It shouldn't be live at this point.
749 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
750 Legal_Reg | Legal_Flex, RegARM32::Reg_ip);
751 _add(SP, SP, AddAmount);
752 }
753 }
754
755 // Add pop instructions for preserved registers.
756 llvm::SmallBitVector CalleeSaves =
757 getRegisterSet(RegSet_CalleeSave, RegSet_None);
758 VarList GPRsToRestore;
759 GPRsToRestore.reserve(CalleeSaves.size());
760 // Consider FP and LR as callee-save / used as needed.
761 if (UsesFramePointer) {
762 CalleeSaves[RegARM32::Reg_fp] = true;
763 }
764 if (!MaybeLeafFunc) {
765 CalleeSaves[RegARM32::Reg_lr] = true;
766 }
767 // Pop registers in ascending order just like push
768 // (instead of in reverse order).
769 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
770 if (CalleeSaves[i] && RegsUsed[i]) {
771 GPRsToRestore.push_back(getPhysicalRegister(i));
772 }
773 }
774 if (!GPRsToRestore.empty())
775 _pop(GPRsToRestore);
776
777 if (!Ctx->getFlags().getUseSandboxing())
778 return;
779
780 // Change the original ret instruction into a sandboxed return sequence.
781 // bundle_lock
782 // bic lr, #0xc000000f
783 // bx lr
784 // bundle_unlock
785 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
786 // restrict to the lower 1GB as well.
787 Operand *RetMask =
788 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex);
789 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr);
790 Variable *RetValue = nullptr;
791 if (RI->getSrcSize())
792 RetValue = llvm::cast<Variable>(RI->getSrc(0));
793 _bundle_lock();
794 _bic(LR, LR, RetMask);
795 _ret(LR, RetValue);
796 _bundle_unlock();
797 RI->setDeleted();
463 } 798 }
464 799
465 void TargetARM32::split64(Variable *Var) { 800 void TargetARM32::split64(Variable *Var) {
466 assert(Var->getType() == IceType_i64); 801 assert(Var->getType() == IceType_i64);
467 Variable *Lo = Var->getLo(); 802 Variable *Lo = Var->getLo();
468 Variable *Hi = Var->getHi(); 803 Variable *Hi = Var->getHi();
469 if (Lo) { 804 if (Lo) {
470 assert(Hi); 805 assert(Hi);
471 return; 806 return;
472 } 807 }
(...skipping 401 matching lines...) Expand 10 before | Expand all | Expand 10 after
874 Operand *Cond = Inst->getCondition(); 1209 Operand *Cond = Inst->getCondition();
875 // TODO(jvoung): Handle folding opportunities. 1210 // TODO(jvoung): Handle folding opportunities.
876 1211
877 Variable *Src0R = legalizeToVar(Cond); 1212 Variable *Src0R = legalizeToVar(Cond);
878 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1213 Constant *Zero = Ctx->getConstantZero(IceType_i32);
879 _cmp(Src0R, Zero); 1214 _cmp(Src0R, Zero);
880 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse()); 1215 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse());
881 } 1216 }
882 1217
883 void TargetARM32::lowerCall(const InstCall *Instr) { 1218 void TargetARM32::lowerCall(const InstCall *Instr) {
1219 MaybeLeafFunc = false;
1220
884 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack. 1221 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack.
885 if (Instr->getNumArgs()) { 1222 if (Instr->getNumArgs()) {
886 UnimplementedError(Func->getContext()->getFlags()); 1223 UnimplementedError(Func->getContext()->getFlags());
887 } 1224 }
888 1225
889 // Generate the call instruction. Assign its result to a temporary 1226 // Generate the call instruction. Assign its result to a temporary
890 // with high register allocation weight. 1227 // with high register allocation weight.
891 Variable *Dest = Instr->getDest(); 1228 Variable *Dest = Instr->getDest();
892 // ReturnReg doubles as ReturnRegLo as necessary. 1229 // ReturnReg doubles as ReturnRegLo as necessary.
893 Variable *ReturnReg = nullptr; 1230 Variable *ReturnReg = nullptr;
(...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after
1560 Variable *Reg = Func->makeVariable(Type); 1897 Variable *Reg = Func->makeVariable(Type);
1561 if (RegNum == Variable::NoRegister) 1898 if (RegNum == Variable::NoRegister)
1562 Reg->setWeightInfinite(); 1899 Reg->setWeightInfinite();
1563 else 1900 else
1564 Reg->setRegNum(RegNum); 1901 Reg->setRegNum(RegNum);
1565 return Reg; 1902 return Reg;
1566 } 1903 }
1567 1904
1568 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { 1905 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
1569 assert(llvm::isPowerOf2_32(Align)); 1906 assert(llvm::isPowerOf2_32(Align));
1570 uint32_t RotateAmt = 0; 1907 uint32_t RotateAmt;
1571 uint32_t Immed_8; 1908 uint32_t Immed_8;
1572 Operand *Mask; 1909 Operand *Mask;
1573 // Use AND or BIC to mask off the bits, depending on which immediate fits 1910 // Use AND or BIC to mask off the bits, depending on which immediate fits
1574 // (if it fits at all). Assume Align is usually small, in which case BIC 1911 // (if it fits at all). Assume Align is usually small, in which case BIC
1575 // works better. 1912 // works better. Thus, this rounds down to the alignment.
1576 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { 1913 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
1577 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); 1914 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
1578 _bic(Reg, Reg, Mask); 1915 _bic(Reg, Reg, Mask);
1579 } else { 1916 } else {
1580 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); 1917 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
1581 _and(Reg, Reg, Mask); 1918 _and(Reg, Reg, Mask);
1582 } 1919 }
1583 } 1920 }
1584 1921
1585 void TargetARM32::postLower() { 1922 void TargetARM32::postLower() {
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
1649 } 1986 }
1650 } 1987 }
1651 1988
1652 void TargetDataARM32::lowerConstants() const { 1989 void TargetDataARM32::lowerConstants() const {
1653 if (Ctx->getFlags().getDisableTranslation()) 1990 if (Ctx->getFlags().getDisableTranslation())
1654 return; 1991 return;
1655 UnimplementedError(Ctx->getFlags()); 1992 UnimplementedError(Ctx->getFlags());
1656 } 1993 }
1657 1994
1658 } // end of namespace Ice 1995 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringX8632.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698