OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringARM32 class, which consists almost | 10 // This file implements the TargetLoweringARM32 class, which consists almost |
(...skipping 108 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
119 ICEINSTICMP_TABLE | 119 ICEINSTICMP_TABLE |
120 #undef X | 120 #undef X |
121 } // end of namespace dummy1 | 121 } // end of namespace dummy1 |
122 | 122 |
123 // The maximum number of arguments to pass in GPR registers. | 123 // The maximum number of arguments to pass in GPR registers. |
124 const uint32_t ARM32_MAX_GPR_ARG = 4; | 124 const uint32_t ARM32_MAX_GPR_ARG = 4; |
125 | 125 |
126 // Stack alignment | 126 // Stack alignment |
127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; | 127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; |
128 | 128 |
| 129 // Value is in bytes. Return Value adjusted to the next highest multiple |
| 130 // of the stack alignment. |
| 131 uint32_t applyStackAlignment(uint32_t Value) { |
| 132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); |
| 133 } |
| 134 |
129 } // end of anonymous namespace | 135 } // end of anonymous namespace |
130 | 136 |
131 TargetARM32::TargetARM32(Cfg *Func) | 137 TargetARM32::TargetARM32(Cfg *Func) |
132 : TargetLowering(Func), UsesFramePointer(false) { | 138 : TargetLowering(Func), UsesFramePointer(false), NeedsStackAlignment(false), |
| 139 IsLeafFunction(true), SpillAreaSizeBytes(0) { |
133 // TODO: Don't initialize IntegerRegisters and friends every time. | 140 // TODO: Don't initialize IntegerRegisters and friends every time. |
134 // Instead, initialize in some sort of static initializer for the | 141 // Instead, initialize in some sort of static initializer for the |
135 // class. | 142 // class. |
136 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); | 143 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); |
137 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM); | 144 llvm::SmallBitVector FloatRegisters(RegARM32::Reg_NUM); |
138 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); | 145 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); |
139 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); | 146 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); |
140 ScratchRegs.resize(RegARM32::Reg_NUM); | 147 ScratchRegs.resize(RegARM32::Reg_NUM); |
141 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ | 148 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ |
142 isFP) \ | 149 isFP) \ |
(...skipping 246 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
389 // TODO(jvoung): handle float/vector types. | 396 // TODO(jvoung): handle float/vector types. |
390 if (isVectorType(Ty)) { | 397 if (isVectorType(Ty)) { |
391 UnimplementedError(Func->getContext()->getFlags()); | 398 UnimplementedError(Func->getContext()->getFlags()); |
392 continue; | 399 continue; |
393 } else if (isFloatingType(Ty)) { | 400 } else if (isFloatingType(Ty)) { |
394 UnimplementedError(Func->getContext()->getFlags()); | 401 UnimplementedError(Func->getContext()->getFlags()); |
395 continue; | 402 continue; |
396 } else if (Ty == IceType_i64) { | 403 } else if (Ty == IceType_i64) { |
397 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 404 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
398 continue; | 405 continue; |
399 int32_t RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; | 406 int32_t RegLo; |
400 int32_t RegHi = 0; | 407 int32_t RegHi; |
401 ++NumGPRRegsUsed; | |
402 // Always start i64 registers at an even register, so this may end | 408 // Always start i64 registers at an even register, so this may end |
403 // up padding away a register. | 409 // up padding away a register. |
404 if (RegLo % 2 != 0) { | 410 if (NumGPRRegsUsed % 2 != 0) { |
405 ++RegLo; | |
406 ++NumGPRRegsUsed; | 411 ++NumGPRRegsUsed; |
407 } | 412 } |
408 // If this leaves us without room to consume another register, | 413 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; |
409 // leave any previously speculatively consumed registers as consumed. | 414 ++NumGPRRegsUsed; |
410 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | |
411 continue; | |
412 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; | 415 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; |
413 ++NumGPRRegsUsed; | 416 ++NumGPRRegsUsed; |
| 417 // If this bumps us past the boundary, don't allocate to a register |
| 418 // and leave any previously speculatively consumed registers as consumed. |
| 419 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) |
| 420 continue; |
414 Variable *RegisterArg = Func->makeVariable(Ty); | 421 Variable *RegisterArg = Func->makeVariable(Ty); |
415 Variable *RegisterLo = Func->makeVariable(IceType_i32); | 422 Variable *RegisterLo = Func->makeVariable(IceType_i32); |
416 Variable *RegisterHi = Func->makeVariable(IceType_i32); | 423 Variable *RegisterHi = Func->makeVariable(IceType_i32); |
417 if (ALLOW_DUMP) { | 424 if (ALLOW_DUMP) { |
418 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); | 425 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func)); |
419 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func)); | 426 RegisterLo->setName(Func, "home_reg_lo:" + Arg->getName(Func)); |
420 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func)); | 427 RegisterHi->setName(Func, "home_reg_hi:" + Arg->getName(Func)); |
421 } | 428 } |
422 RegisterLo->setRegNum(RegLo); | 429 RegisterLo->setRegNum(RegLo); |
423 RegisterLo->setIsArg(); | 430 RegisterLo->setIsArg(); |
(...skipping 19 matching lines...) Expand all Loading... |
443 RegisterArg->setRegNum(RegNum); | 450 RegisterArg->setRegNum(RegNum); |
444 RegisterArg->setIsArg(); | 451 RegisterArg->setIsArg(); |
445 Arg->setIsArg(false); | 452 Arg->setIsArg(false); |
446 | 453 |
447 Args[I] = RegisterArg; | 454 Args[I] = RegisterArg; |
448 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 455 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
449 } | 456 } |
450 } | 457 } |
451 } | 458 } |
452 | 459 |
| 460 // Helper function for addProlog(). |
| 461 // |
| 462 // This assumes Arg is an argument passed on the stack. This sets the |
| 463 // frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| 464 // width. For an I64 arg that has been split into Lo and Hi components, |
| 465 // it calls itself recursively on the components, taking care to handle |
| 466 // Lo first because of the little-endian architecture. Lastly, this |
| 467 // function generates an instruction to copy Arg into its assigned |
| 468 // register if applicable. |
| 469 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| 470 size_t BasicFrameOffset, |
| 471 size_t &InArgsSizeBytes) { |
| 472 Variable *Lo = Arg->getLo(); |
| 473 Variable *Hi = Arg->getHi(); |
| 474 Type Ty = Arg->getType(); |
| 475 if (Lo && Hi && Ty == IceType_i64) { |
| 476 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
| 477 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
| 478 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 479 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 480 return; |
| 481 } |
| 482 if (isVectorType(Ty)) { |
| 483 InArgsSizeBytes = applyStackAlignment(InArgsSizeBytes); |
| 484 } |
| 485 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
| 486 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
| 487 // If the argument variable has been assigned a register, we need to load |
| 488 // the value from the stack slot. |
| 489 if (Arg->hasReg()) { |
| 490 assert(Ty != IceType_i64); |
| 491 OperandARM32Mem *Mem = OperandARM32Mem::create( |
| 492 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
| 493 Ctx->getConstantInt32(Arg->getStackOffset()))); |
| 494 if (isVectorType(Arg->getType())) { |
| 495 UnimplementedError(Func->getContext()->getFlags()); |
| 496 } else { |
| 497 _ldr(Arg, Mem); |
| 498 } |
| 499 // This argument-copying instruction uses an explicit |
| 500 // OperandARM32Mem operand instead of a Variable, so its |
| 501 // fill-from-stack operation has to be tracked separately for |
| 502 // statistics. |
| 503 Ctx->statsUpdateFills(); |
| 504 } |
| 505 } |
| 506 |
453 Type TargetARM32::stackSlotType() { return IceType_i32; } | 507 Type TargetARM32::stackSlotType() { return IceType_i32; } |
454 | 508 |
455 void TargetARM32::addProlog(CfgNode *Node) { | 509 void TargetARM32::addProlog(CfgNode *Node) { |
456 (void)Node; | 510 // Stack frame layout: |
457 UnimplementedError(Func->getContext()->getFlags()); | 511 // |
| 512 // +------------------------+ |
| 513 // | 1. preserved registers | |
| 514 // +------------------------+ |
| 515 // | 2. padding | |
| 516 // +------------------------+ |
| 517 // | 3. global spill area | |
| 518 // +------------------------+ |
| 519 // | 4. padding | |
| 520 // +------------------------+ |
| 521 // | 5. local spill area | |
| 522 // +------------------------+ |
| 523 // | 6. padding | |
| 524 // +------------------------+ |
| 525 // | 7. allocas | |
| 526 // +------------------------+ |
| 527 // |
| 528 // The following variables record the size in bytes of the given areas: |
| 529 // * PreservedRegsSizeBytes: area 1 |
| 530 // * SpillAreaPaddingBytes: area 2 |
| 531 // * GlobalsSize: area 3 |
| 532 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
| 533 // * LocalsSpillAreaSize: area 5 |
| 534 // * SpillAreaSizeBytes: areas 2 - 6 |
| 535 // Determine stack frame offsets for each Variable without a |
| 536 // register assignment. This can be done as one variable per stack |
| 537 // slot. Or, do coalescing by running the register allocator again |
| 538 // with an infinite set of registers (as a side effect, this gives |
| 539 // variables a second chance at physical register assignment). |
| 540 // |
| 541 // A middle ground approach is to leverage sparsity and allocate one |
| 542 // block of space on the frame for globals (variables with |
| 543 // multi-block lifetime), and one block to share for locals |
| 544 // (single-block lifetime). |
| 545 |
| 546 Context.init(Node); |
| 547 Context.setInsertPoint(Context.getCur()); |
| 548 |
| 549 llvm::SmallBitVector CalleeSaves = |
| 550 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| 551 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
| 552 VarList SortedSpilledVariables; |
| 553 size_t GlobalsSize = 0; |
| 554 // If there is a separate locals area, this represents that area. |
| 555 // Otherwise it counts any variable not counted by GlobalsSize. |
| 556 SpillAreaSizeBytes = 0; |
| 557 // If there is a separate locals area, this specifies the alignment |
| 558 // for it. |
| 559 uint32_t LocalsSlotsAlignmentBytes = 0; |
| 560 // The entire spill locations area gets aligned to largest natural |
| 561 // alignment of the variables that have a spill slot. |
| 562 uint32_t SpillAreaAlignmentBytes = 0; |
| 563 // For now, we don't have target-specific variables that need special |
| 564 // treatment (no stack-slot-linked SpillVariable type). |
| 565 std::function<bool(Variable *)> TargetVarHook = |
| 566 [](Variable *) { return false; }; |
| 567 |
| 568 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
| 569 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
| 570 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
| 571 &LocalsSlotsAlignmentBytes, TargetVarHook); |
| 572 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
| 573 SpillAreaSizeBytes += GlobalsSize; |
| 574 |
| 575 // Add push instructions for preserved registers. |
| 576 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15). |
| 577 // Unlike x86, ARM also has callee-saved float/vector registers. |
| 578 // The "vpush" instruction can handle a whole list of float/vector |
| 579 // registers, but it only handles contiguous sequences of registers |
| 580 // by specifying the start and the length. |
| 581 VarList GPRsToPreserve; |
| 582 GPRsToPreserve.reserve(CalleeSaves.size()); |
| 583 uint32_t NumCallee = 0; |
| 584 size_t PreservedRegsSizeBytes = 0; |
| 585 // Consider FP and LR as callee-save / used as needed. |
| 586 if (UsesFramePointer) { |
| 587 CalleeSaves[RegARM32::Reg_fp] = true; |
| 588 assert(RegsUsed[RegARM32::Reg_fp] == false); |
| 589 RegsUsed[RegARM32::Reg_fp] = true; |
| 590 } |
| 591 if (!IsLeafFunction) { |
| 592 CalleeSaves[RegARM32::Reg_lr] = true; |
| 593 RegsUsed[RegARM32::Reg_lr] = true; |
| 594 } |
| 595 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 596 if (CalleeSaves[i] && RegsUsed[i]) { |
| 597 // TODO(jvoung): do separate vpush for each floating point |
| 598 // register segment and += 4, or 8 depending on type. |
| 599 ++NumCallee; |
| 600 PreservedRegsSizeBytes += 4; |
| 601 GPRsToPreserve.push_back(getPhysicalRegister(i)); |
| 602 } |
| 603 } |
| 604 Ctx->statsUpdateRegistersSaved(NumCallee); |
| 605 if (!GPRsToPreserve.empty()) |
| 606 _push(GPRsToPreserve); |
| 607 |
| 608 // Generate "mov FP, SP" if needed. |
| 609 if (UsesFramePointer) { |
| 610 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
| 611 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| 612 _mov(FP, SP); |
| 613 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
| 614 Context.insert(InstFakeUse::create(Func, FP)); |
| 615 } |
| 616 |
| 617 // TODO(jvoung): Reuse this code too. |
| 618 // Align the variables area. SpillAreaPaddingBytes is the size of |
| 619 // the region after the preserved registers and before the spill areas. |
| 620 uint32_t SpillAreaPaddingBytes = 0; |
| 621 if (SpillAreaAlignmentBytes) { |
| 622 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); |
| 623 uint32_t PaddingStart = PreservedRegsSizeBytes; |
| 624 uint32_t SpillAreaStart = |
| 625 Utils::applyAlignment(PaddingStart, SpillAreaAlignmentBytes); |
| 626 SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; |
| 627 SpillAreaSizeBytes += SpillAreaPaddingBytes; |
| 628 } |
| 629 |
| 630 // If there are separate globals and locals areas, make sure the |
| 631 // locals area is aligned by padding the end of the globals area. |
| 632 uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; |
| 633 if (LocalsSlotsAlignmentBytes) { |
| 634 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| 635 GlobalsAndSubsequentPaddingSize = |
| 636 Utils::applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); |
| 637 SpillAreaSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize; |
| 638 } |
| 639 |
| 640 // Align SP if necessary. |
| 641 if (NeedsStackAlignment) { |
| 642 uint32_t StackOffset = PreservedRegsSizeBytes; |
| 643 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 644 SpillAreaSizeBytes = StackSize - StackOffset; |
| 645 } |
| 646 |
| 647 // Generate "sub sp, SpillAreaSizeBytes" |
| 648 if (SpillAreaSizeBytes) { |
| 649 // Use the IP inter-procedural scratch register if needed to legalize |
| 650 // the immediate. |
| 651 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 652 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); |
| 653 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| 654 _sub(SP, SP, SubAmount); |
| 655 } |
| 656 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 657 |
| 658 resetStackAdjustment(); |
| 659 |
| 660 // Fill in stack offsets for stack args, and copy args into registers |
| 661 // for those that were register-allocated. Args are pushed right to |
| 662 // left, so Arg[0] is closest to the stack/frame pointer. |
| 663 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
| 664 size_t BasicFrameOffset = PreservedRegsSizeBytes; |
| 665 if (!UsesFramePointer) |
| 666 BasicFrameOffset += SpillAreaSizeBytes; |
| 667 |
| 668 const VarList &Args = Func->getArgs(); |
| 669 size_t InArgsSizeBytes = 0; |
| 670 unsigned NumGPRArgs = 0; |
| 671 for (Variable *Arg : Args) { |
| 672 Type Ty = Arg->getType(); |
| 673 // Skip arguments passed in registers. |
| 674 if (isVectorType(Ty)) { |
| 675 UnimplementedError(Func->getContext()->getFlags()); |
| 676 continue; |
| 677 } else if (isFloatingType(Ty)) { |
| 678 UnimplementedError(Func->getContext()->getFlags()); |
| 679 continue; |
| 680 } else if (Ty == IceType_i64 && NumGPRArgs < ARM32_MAX_GPR_ARG) { |
| 681 // Start at an even register. |
| 682 if (NumGPRArgs % 2 == 1) { |
| 683 ++NumGPRArgs; |
| 684 } |
| 685 NumGPRArgs += 2; |
| 686 if (NumGPRArgs <= ARM32_MAX_GPR_ARG) |
| 687 continue; |
| 688 } else if (NumGPRArgs < ARM32_MAX_GPR_ARG) { |
| 689 ++NumGPRArgs; |
| 690 continue; |
| 691 } |
| 692 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
| 693 } |
| 694 |
| 695 // Fill in stack offsets for locals. |
| 696 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes, |
| 697 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize, |
| 698 UsesFramePointer); |
| 699 this->HasComputedFrame = true; |
| 700 |
| 701 if (ALLOW_DUMP && Func->isVerbose(IceV_Frame)) { |
| 702 OstreamLocker L(Func->getContext()); |
| 703 Ostream &Str = Func->getContext()->getStrDump(); |
| 704 |
| 705 Str << "Stack layout:\n"; |
| 706 uint32_t SPAdjustmentPaddingSize = |
| 707 SpillAreaSizeBytes - LocalsSpillAreaSize - |
| 708 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes; |
| 709 Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| 710 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| 711 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| 712 << " globals spill area = " << GlobalsSize << " bytes\n" |
| 713 << " globals-locals spill areas intermediate padding = " |
| 714 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| 715 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| 716 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n"; |
| 717 |
| 718 Str << "Stack details:\n" |
| 719 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n" |
| 720 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| 721 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| 722 << " bytes\n" |
| 723 << " is FP based = " << UsesFramePointer << "\n"; |
| 724 } |
458 } | 725 } |
459 | 726 |
460 void TargetARM32::addEpilog(CfgNode *Node) { | 727 void TargetARM32::addEpilog(CfgNode *Node) { |
461 (void)Node; | 728 InstList &Insts = Node->getInsts(); |
462 UnimplementedError(Func->getContext()->getFlags()); | 729 InstList::reverse_iterator RI, E; |
| 730 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
| 731 if (llvm::isa<InstARM32Ret>(*RI)) |
| 732 break; |
| 733 } |
| 734 if (RI == E) |
| 735 return; |
| 736 |
| 737 // Convert the reverse_iterator position into its corresponding |
| 738 // (forward) iterator position. |
| 739 InstList::iterator InsertPoint = RI.base(); |
| 740 --InsertPoint; |
| 741 Context.init(Node); |
| 742 Context.setInsertPoint(InsertPoint); |
| 743 |
| 744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| 745 if (UsesFramePointer) { |
| 746 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
| 747 // For late-stage liveness analysis (e.g. asm-verbose mode), |
| 748 // adding a fake use of SP before the assignment of SP=FP keeps |
| 749 // previous SP adjustments from being dead-code eliminated. |
| 750 Context.insert(InstFakeUse::create(Func, SP)); |
| 751 _mov(SP, FP); |
| 752 } else { |
| 753 // add SP, SpillAreaSizeBytes |
| 754 if (SpillAreaSizeBytes) { |
| 755 // Use the IP inter-procedural scratch register if needed to legalize |
| 756 // the immediate. It shouldn't be live at this point. |
| 757 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 758 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); |
| 759 _add(SP, SP, AddAmount); |
| 760 } |
| 761 } |
| 762 |
| 763 // Add pop instructions for preserved registers. |
| 764 llvm::SmallBitVector CalleeSaves = |
| 765 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| 766 VarList GPRsToRestore; |
| 767 GPRsToRestore.reserve(CalleeSaves.size()); |
| 768 // Consider FP and LR as callee-save / used as needed. |
| 769 if (UsesFramePointer) { |
| 770 CalleeSaves[RegARM32::Reg_fp] = true; |
| 771 } |
| 772 if (!IsLeafFunction) { |
| 773 CalleeSaves[RegARM32::Reg_lr] = true; |
| 774 } |
| 775 // Pop registers in ascending order just like push |
| 776 // (instead of in reverse order). |
| 777 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
| 778 if (CalleeSaves[i] && RegsUsed[i]) { |
| 779 GPRsToRestore.push_back(getPhysicalRegister(i)); |
| 780 } |
| 781 } |
| 782 if (!GPRsToRestore.empty()) |
| 783 _pop(GPRsToRestore); |
| 784 |
| 785 if (!Ctx->getFlags().getUseSandboxing()) |
| 786 return; |
| 787 |
| 788 // Change the original ret instruction into a sandboxed return sequence. |
| 789 // bundle_lock |
| 790 // bic lr, #0xc000000f |
| 791 // bx lr |
| 792 // bundle_unlock |
| 793 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to |
| 794 // restrict to the lower 1GB as well. |
| 795 Operand *RetMask = |
| 796 legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex); |
| 797 Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr); |
| 798 Variable *RetValue = nullptr; |
| 799 if (RI->getSrcSize()) |
| 800 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 801 _bundle_lock(); |
| 802 _bic(LR, LR, RetMask); |
| 803 _ret(LR, RetValue); |
| 804 _bundle_unlock(); |
| 805 RI->setDeleted(); |
463 } | 806 } |
464 | 807 |
465 void TargetARM32::split64(Variable *Var) { | 808 void TargetARM32::split64(Variable *Var) { |
466 assert(Var->getType() == IceType_i64); | 809 assert(Var->getType() == IceType_i64); |
467 Variable *Lo = Var->getLo(); | 810 Variable *Lo = Var->getLo(); |
468 Variable *Hi = Var->getHi(); | 811 Variable *Hi = Var->getHi(); |
469 if (Lo) { | 812 if (Lo) { |
470 assert(Hi); | 813 assert(Hi); |
471 return; | 814 return; |
472 } | 815 } |
(...skipping 401 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
874 Operand *Cond = Inst->getCondition(); | 1217 Operand *Cond = Inst->getCondition(); |
875 // TODO(jvoung): Handle folding opportunities. | 1218 // TODO(jvoung): Handle folding opportunities. |
876 | 1219 |
877 Variable *Src0R = legalizeToVar(Cond); | 1220 Variable *Src0R = legalizeToVar(Cond); |
878 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1221 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
879 _cmp(Src0R, Zero); | 1222 _cmp(Src0R, Zero); |
880 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1223 _br(CondARM32::NE, Inst->getTargetTrue(), Inst->getTargetFalse()); |
881 } | 1224 } |
882 | 1225 |
883 void TargetARM32::lowerCall(const InstCall *Instr) { | 1226 void TargetARM32::lowerCall(const InstCall *Instr) { |
| 1227 IsLeafFunction = false; |
| 1228 |
884 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack. | 1229 // TODO(jvoung): assign arguments to registers and stack. Also reserve stack. |
885 if (Instr->getNumArgs()) { | 1230 if (Instr->getNumArgs()) { |
886 UnimplementedError(Func->getContext()->getFlags()); | 1231 UnimplementedError(Func->getContext()->getFlags()); |
887 } | 1232 } |
888 | 1233 |
889 // Generate the call instruction. Assign its result to a temporary | 1234 // Generate the call instruction. Assign its result to a temporary |
890 // with high register allocation weight. | 1235 // with high register allocation weight. |
891 Variable *Dest = Instr->getDest(); | 1236 Variable *Dest = Instr->getDest(); |
892 // ReturnReg doubles as ReturnRegLo as necessary. | 1237 // ReturnReg doubles as ReturnRegLo as necessary. |
893 Variable *ReturnReg = nullptr; | 1238 Variable *ReturnReg = nullptr; |
(...skipping 666 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1560 Variable *Reg = Func->makeVariable(Type); | 1905 Variable *Reg = Func->makeVariable(Type); |
1561 if (RegNum == Variable::NoRegister) | 1906 if (RegNum == Variable::NoRegister) |
1562 Reg->setWeightInfinite(); | 1907 Reg->setWeightInfinite(); |
1563 else | 1908 else |
1564 Reg->setRegNum(RegNum); | 1909 Reg->setRegNum(RegNum); |
1565 return Reg; | 1910 return Reg; |
1566 } | 1911 } |
1567 | 1912 |
1568 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { | 1913 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { |
1569 assert(llvm::isPowerOf2_32(Align)); | 1914 assert(llvm::isPowerOf2_32(Align)); |
1570 uint32_t RotateAmt = 0; | 1915 uint32_t RotateAmt; |
1571 uint32_t Immed_8; | 1916 uint32_t Immed_8; |
1572 Operand *Mask; | 1917 Operand *Mask; |
1573 // Use AND or BIC to mask off the bits, depending on which immediate fits | 1918 // Use AND or BIC to mask off the bits, depending on which immediate fits |
1574 // (if it fits at all). Assume Align is usually small, in which case BIC | 1919 // (if it fits at all). Assume Align is usually small, in which case BIC |
1575 // works better. | 1920 // works better. Thus, this rounds down to the alignment. |
1576 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { | 1921 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { |
1577 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); | 1922 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); |
1578 _bic(Reg, Reg, Mask); | 1923 _bic(Reg, Reg, Mask); |
1579 } else { | 1924 } else { |
1580 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); | 1925 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); |
1581 _and(Reg, Reg, Mask); | 1926 _and(Reg, Reg, Mask); |
1582 } | 1927 } |
1583 } | 1928 } |
1584 | 1929 |
1585 void TargetARM32::postLower() { | 1930 void TargetARM32::postLower() { |
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1649 } | 1994 } |
1650 } | 1995 } |
1651 | 1996 |
1652 void TargetDataARM32::lowerConstants() const { | 1997 void TargetDataARM32::lowerConstants() const { |
1653 if (Ctx->getFlags().getDisableTranslation()) | 1998 if (Ctx->getFlags().getDisableTranslation()) |
1654 return; | 1999 return; |
1655 UnimplementedError(Ctx->getFlags()); | 2000 UnimplementedError(Ctx->getFlags()); |
1656 } | 2001 } |
1657 | 2002 |
1658 } // end of namespace Ice | 2003 } // end of namespace Ice |
OLD | NEW |