OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
278 Func->advancedPhiLowering(); | 278 Func->advancedPhiLowering(); |
279 Func->dump("After advanced Phi lowering"); | 279 Func->dump("After advanced Phi lowering"); |
280 } | 280 } |
281 | 281 |
282 // Stack frame mapping. | 282 // Stack frame mapping. |
283 Func->genFrame(); | 283 Func->genFrame(); |
284 if (Func->hasError()) | 284 if (Func->hasError()) |
285 return; | 285 return; |
286 Func->dump("After stack frame mapping"); | 286 Func->dump("After stack frame mapping"); |
287 | 287 |
| 288 legalizeStackSlots(); |
| 289 if (Func->hasError()) |
| 290 return; |
| 291 Func->dump("After legalizeStackSlots"); |
| 292 |
288 Func->contractEmptyNodes(); | 293 Func->contractEmptyNodes(); |
289 Func->reorderNodes(); | 294 Func->reorderNodes(); |
290 | 295 |
291 // Branch optimization. This needs to be done just before code | 296 // Branch optimization. This needs to be done just before code |
292 // emission. In particular, no transformations that insert or | 297 // emission. In particular, no transformations that insert or |
293 // reorder CfgNodes should be done after branch optimization. We go | 298 // reorder CfgNodes should be done after branch optimization. We go |
294 // ahead and do it before nop insertion to reduce the amount of work | 299 // ahead and do it before nop insertion to reduce the amount of work |
295 // needed for searching for opportunities. | 300 // needed for searching for opportunities. |
296 Func->doBranchOpt(); | 301 Func->doBranchOpt(); |
297 Func->dump("After branch optimization"); | 302 Func->dump("After branch optimization"); |
(...skipping 30 matching lines...) Expand all Loading... |
328 regAlloc(RAK_InfOnly); | 333 regAlloc(RAK_InfOnly); |
329 if (Func->hasError()) | 334 if (Func->hasError()) |
330 return; | 335 return; |
331 Func->dump("After regalloc of infinite-weight variables"); | 336 Func->dump("After regalloc of infinite-weight variables"); |
332 | 337 |
333 Func->genFrame(); | 338 Func->genFrame(); |
334 if (Func->hasError()) | 339 if (Func->hasError()) |
335 return; | 340 return; |
336 Func->dump("After stack frame mapping"); | 341 Func->dump("After stack frame mapping"); |
337 | 342 |
| 343 legalizeStackSlots(); |
| 344 if (Func->hasError()) |
| 345 return; |
| 346 Func->dump("After legalizeStackSlots"); |
| 347 |
338 // Nop insertion | 348 // Nop insertion |
339 if (Ctx->getFlags().shouldDoNopInsertion()) { | 349 if (Ctx->getFlags().shouldDoNopInsertion()) { |
340 Func->doNopInsertion(); | 350 Func->doNopInsertion(); |
341 } | 351 } |
342 } | 352 } |
343 | 353 |
344 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 354 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
345 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { | 355 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { |
346 return Br->optimizeBranch(NextNode); | 356 return Br->optimizeBranch(NextNode); |
347 } | 357 } |
(...skipping 29 matching lines...) Expand all Loading... |
377 // live upon function entry. | 387 // live upon function entry. |
378 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { | 388 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { |
379 Func->addImplicitArg(Reg); | 389 Func->addImplicitArg(Reg); |
380 Reg->setIgnoreLiveness(); | 390 Reg->setIgnoreLiveness(); |
381 } | 391 } |
382 } | 392 } |
383 return Reg; | 393 return Reg; |
384 } | 394 } |
385 | 395 |
386 void TargetARM32::emitVariable(const Variable *Var) const { | 396 void TargetARM32::emitVariable(const Variable *Var) const { |
| 397 if (!BuildDefs::dump()) |
| 398 return; |
387 Ostream &Str = Ctx->getStrEmit(); | 399 Ostream &Str = Ctx->getStrEmit(); |
388 if (Var->hasReg()) { | 400 if (Var->hasReg()) { |
389 Str << getRegName(Var->getRegNum(), Var->getType()); | 401 Str << getRegName(Var->getRegNum(), Var->getType()); |
390 return; | 402 return; |
391 } | 403 } |
392 if (Var->getWeight().isInf()) { | 404 if (Var->getWeight().isInf()) { |
393 llvm::report_fatal_error( | 405 llvm::report_fatal_error( |
394 "Infinite-weight Variable has no register assigned"); | 406 "Infinite-weight Variable has no register assigned"); |
395 } | 407 } |
396 int32_t Offset = Var->getStackOffset(); | 408 int32_t Offset = Var->getStackOffset(); |
397 if (!hasFramePointer()) | 409 int32_t BaseRegNum = Var->getBaseRegNum(); |
398 Offset += getStackAdjustment(); | 410 if (BaseRegNum == Variable::NoRegister) { |
399 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register | 411 BaseRegNum = getFrameOrStackReg(); |
400 // to materialize a larger offset. | 412 if (!hasFramePointer()) |
401 constexpr bool SignExt = false; | 413 Offset += getStackAdjustment(); |
402 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) { | 414 } |
| 415 if (!isLegalVariableStackOffset(Offset)) { |
403 llvm::report_fatal_error("Illegal stack offset"); | 416 llvm::report_fatal_error("Illegal stack offset"); |
404 } | 417 } |
405 const Type FrameSPTy = IceType_i32; | 418 const Type FrameSPTy = stackSlotType(); |
406 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy); | 419 Str << "[" << getRegName(BaseRegNum, FrameSPTy); |
407 if (Offset != 0) { | 420 if (Offset != 0) { |
408 Str << ", " << getConstantPrefix() << Offset; | 421 Str << ", " << getConstantPrefix() << Offset; |
409 } | 422 } |
410 Str << "]"; | 423 Str << "]"; |
411 } | 424 } |
412 | 425 |
413 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { | 426 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { |
414 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 427 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
415 return false; | 428 return false; |
416 int32_t RegLo, RegHi; | 429 int32_t RegLo, RegHi; |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
549 | 562 |
550 Type TargetARM32::stackSlotType() { return IceType_i32; } | 563 Type TargetARM32::stackSlotType() { return IceType_i32; } |
551 | 564 |
552 void TargetARM32::addProlog(CfgNode *Node) { | 565 void TargetARM32::addProlog(CfgNode *Node) { |
553 // Stack frame layout: | 566 // Stack frame layout: |
554 // | 567 // |
555 // +------------------------+ | 568 // +------------------------+ |
556 // | 1. preserved registers | | 569 // | 1. preserved registers | |
557 // +------------------------+ | 570 // +------------------------+ |
558 // | 2. padding | | 571 // | 2. padding | |
559 // +------------------------+ | 572 // +------------------------+ <--- FramePointer (if used) |
560 // | 3. global spill area | | 573 // | 3. global spill area | |
561 // +------------------------+ | 574 // +------------------------+ |
562 // | 4. padding | | 575 // | 4. padding | |
563 // +------------------------+ | 576 // +------------------------+ |
564 // | 5. local spill area | | 577 // | 5. local spill area | |
565 // +------------------------+ | 578 // +------------------------+ |
566 // | 6. padding | | 579 // | 6. padding | |
567 // +------------------------+ | 580 // +------------------------+ |
568 // | 7. allocas | | 581 // | 7. allocas | |
569 // +------------------------+ | 582 // +------------------------+ <--- StackPointer |
570 // | 583 // |
571 // The following variables record the size in bytes of the given areas: | 584 // The following variables record the size in bytes of the given areas: |
572 // * PreservedRegsSizeBytes: area 1 | 585 // * PreservedRegsSizeBytes: area 1 |
573 // * SpillAreaPaddingBytes: area 2 | 586 // * SpillAreaPaddingBytes: area 2 |
574 // * GlobalsSize: area 3 | 587 // * GlobalsSize: area 3 |
575 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 | 588 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
576 // * LocalsSpillAreaSize: area 5 | 589 // * LocalsSpillAreaSize: area 5 |
577 // * SpillAreaSizeBytes: areas 2 - 6 | 590 // * SpillAreaSizeBytes: areas 2 - 6 |
578 // Determine stack frame offsets for each Variable without a | 591 // Determine stack frame offsets for each Variable without a |
579 // register assignment. This can be done as one variable per stack | 592 // register assignment. This can be done as one variable per stack |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
674 | 687 |
675 // Align SP if necessary. | 688 // Align SP if necessary. |
676 if (NeedsStackAlignment) { | 689 if (NeedsStackAlignment) { |
677 uint32_t StackOffset = PreservedRegsSizeBytes; | 690 uint32_t StackOffset = PreservedRegsSizeBytes; |
678 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 691 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
679 SpillAreaSizeBytes = StackSize - StackOffset; | 692 SpillAreaSizeBytes = StackSize - StackOffset; |
680 } | 693 } |
681 | 694 |
682 // Generate "sub sp, SpillAreaSizeBytes" | 695 // Generate "sub sp, SpillAreaSizeBytes" |
683 if (SpillAreaSizeBytes) { | 696 if (SpillAreaSizeBytes) { |
684 // Use the IP inter-procedural scratch register if needed to legalize | 697 // Use the scratch register if needed to legalize the immediate. |
685 // the immediate. | |
686 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 698 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
687 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); | 699 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
688 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 700 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
689 _sub(SP, SP, SubAmount); | 701 _sub(SP, SP, SubAmount); |
690 } | 702 } |
691 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 703 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
692 | 704 |
693 resetStackAdjustment(); | 705 resetStackAdjustment(); |
694 | 706 |
695 // Fill in stack offsets for stack args, and copy args into registers | 707 // Fill in stack offsets for stack args, and copy args into registers |
696 // for those that were register-allocated. Args are pushed right to | 708 // for those that were register-allocated. Args are pushed right to |
697 // left, so Arg[0] is closest to the stack/frame pointer. | 709 // left, so Arg[0] is closest to the stack/frame pointer. |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
778 if (UsesFramePointer) { | 790 if (UsesFramePointer) { |
779 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 791 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
780 // For late-stage liveness analysis (e.g. asm-verbose mode), | 792 // For late-stage liveness analysis (e.g. asm-verbose mode), |
781 // adding a fake use of SP before the assignment of SP=FP keeps | 793 // adding a fake use of SP before the assignment of SP=FP keeps |
782 // previous SP adjustments from being dead-code eliminated. | 794 // previous SP adjustments from being dead-code eliminated. |
783 Context.insert(InstFakeUse::create(Func, SP)); | 795 Context.insert(InstFakeUse::create(Func, SP)); |
784 _mov(SP, FP); | 796 _mov(SP, FP); |
785 } else { | 797 } else { |
786 // add SP, SpillAreaSizeBytes | 798 // add SP, SpillAreaSizeBytes |
787 if (SpillAreaSizeBytes) { | 799 if (SpillAreaSizeBytes) { |
788 // Use the IP inter-procedural scratch register if needed to legalize | 800 // Use the scratch register if needed to legalize the immediate. |
789 // the immediate. It shouldn't be live at this point. | 801 Operand *AddAmount = |
790 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 802 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
791 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); | 803 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
792 _add(SP, SP, AddAmount); | 804 _add(SP, SP, AddAmount); |
793 } | 805 } |
794 } | 806 } |
795 | 807 |
796 // Add pop instructions for preserved registers. | 808 // Add pop instructions for preserved registers. |
797 llvm::SmallBitVector CalleeSaves = | 809 llvm::SmallBitVector CalleeSaves = |
798 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 810 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
799 VarList GPRsToRestore; | 811 VarList GPRsToRestore; |
800 GPRsToRestore.reserve(CalleeSaves.size()); | 812 GPRsToRestore.reserve(CalleeSaves.size()); |
801 // Consider FP and LR as callee-save / used as needed. | 813 // Consider FP and LR as callee-save / used as needed. |
(...skipping 29 matching lines...) Expand all Loading... |
831 Variable *RetValue = nullptr; | 843 Variable *RetValue = nullptr; |
832 if (RI->getSrcSize()) | 844 if (RI->getSrcSize()) |
833 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 845 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
834 _bundle_lock(); | 846 _bundle_lock(); |
835 _bic(LR, LR, RetMask); | 847 _bic(LR, LR, RetMask); |
836 _ret(LR, RetValue); | 848 _ret(LR, RetValue); |
837 _bundle_unlock(); | 849 _bundle_unlock(); |
838 RI->setDeleted(); | 850 RI->setDeleted(); |
839 } | 851 } |
840 | 852 |
| 853 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { |
| 854 constexpr bool SignExt = false; |
| 855 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); |
| 856 } |
| 857 |
| 858 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, |
| 859 Variable *OrigBaseReg) { |
| 860 int32_t Offset = Var->getStackOffset(); |
| 861 // Legalize will likely need a movw/movt combination, but if the top |
| 862 // bits are all 0 from negating the offset and subtracting, we could |
| 863 // use that instead. |
| 864 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; |
| 865 if (ShouldSub) |
| 866 Offset = -Offset; |
| 867 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), |
| 868 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| 869 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); |
| 870 if (ShouldSub) |
| 871 _sub(ScratchReg, OrigBaseReg, OffsetVal); |
| 872 else |
| 873 _add(ScratchReg, OrigBaseReg, OffsetVal); |
| 874 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType()); |
| 875 NewVar->setWeight(RegWeight::Zero); |
| 876 NewVar->setBaseRegNum(ScratchReg->getRegNum()); |
| 877 constexpr int32_t NewOffset = 0; |
| 878 NewVar->setStackOffset(NewOffset); |
| 879 return NewVar; |
| 880 } |
| 881 |
| 882 void TargetARM32::legalizeStackSlots() { |
| 883 // If a stack variable's frame offset doesn't fit, convert from: |
| 884 // ldr X, OFF[SP] |
| 885 // to: |
| 886 // movw/movt TMP, OFF_PART |
| 887 // add TMP, TMP, SP |
| 888 // ldr X, OFF_MORE[TMP] |
| 889 // |
| 890 // This is safe because we have reserved TMP, and add for ARM does not |
| 891 // clobber the flags register. |
| 892 Func->dump("Before legalizeStackSlots"); |
| 893 assert(hasComputedFrame()); |
| 894 // Early exit, if SpillAreaSizeBytes is really small. |
| 895 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) |
| 896 return; |
| 897 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); |
| 898 int32_t StackAdjust = 0; |
| 899 // Do a fairly naive greedy clustering for now. Pick the first stack slot |
| 900 // that's out of bounds and make a new base reg using the architecture's temp |
| 901 // register. If that works for the next slot, then great. Otherwise, create |
| 902 // a new base register, clobbering the previous base register. Never share a |
| 903 // base reg across different basic blocks. This isn't ideal if local and |
| 904 // multi-block variables are far apart and their references are interspersed. |
| 905 // It may help to be more coordinated about assign stack slot numbers |
| 906 // and may help to assign smaller offsets to higher-weight variables |
| 907 // so that they don't depend on this legalization. |
| 908 for (CfgNode *Node : Func->getNodes()) { |
| 909 Context.init(Node); |
| 910 StackVariable *NewBaseReg = nullptr; |
| 911 int32_t NewBaseOffset = 0; |
| 912 while (!Context.atEnd()) { |
| 913 PostIncrLoweringContext PostIncrement(Context); |
| 914 Inst *CurInstr = Context.getCur(); |
| 915 Variable *Dest = CurInstr->getDest(); |
| 916 // Check if the previous NewBaseReg is clobbered, and reset if needed. |
| 917 if ((Dest && NewBaseReg && Dest->hasReg() && |
| 918 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || |
| 919 llvm::isa<InstFakeKill>(CurInstr)) { |
| 920 NewBaseReg = nullptr; |
| 921 NewBaseOffset = 0; |
| 922 } |
| 923 // The stack adjustment only matters if we are using SP instead of FP. |
| 924 if (!hasFramePointer()) { |
| 925 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { |
| 926 StackAdjust += AdjInst->getAmount(); |
| 927 NewBaseOffset += AdjInst->getAmount(); |
| 928 continue; |
| 929 } |
| 930 if (llvm::isa<InstARM32Call>(CurInstr)) { |
| 931 NewBaseOffset -= StackAdjust; |
| 932 StackAdjust = 0; |
| 933 continue; |
| 934 } |
| 935 } |
| 936 // For now, only Mov instructions can have stack variables. We need to |
| 937 // know the type of instruction because we currently create a fresh one |
| 938 // to replace Dest/Source, rather than mutate in place. |
| 939 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); |
| 940 if (!MovInst) { |
| 941 continue; |
| 942 } |
| 943 if (!Dest->hasReg()) { |
| 944 int32_t Offset = Dest->getStackOffset(); |
| 945 Offset += StackAdjust; |
| 946 if (!isLegalVariableStackOffset(Offset)) { |
| 947 if (NewBaseReg) { |
| 948 int32_t OffsetDiff = Offset - NewBaseOffset; |
| 949 if (isLegalVariableStackOffset(OffsetDiff)) { |
| 950 StackVariable *NewDest = |
| 951 Func->makeVariable<StackVariable>(stackSlotType()); |
| 952 NewDest->setWeight(RegWeight::Zero); |
| 953 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); |
| 954 NewDest->setStackOffset(OffsetDiff); |
| 955 Variable *NewDestVar = NewDest; |
| 956 _mov(NewDestVar, MovInst->getSrc(0)); |
| 957 MovInst->setDeleted(); |
| 958 continue; |
| 959 } |
| 960 } |
| 961 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg); |
| 962 assert(LegalDest != Dest); |
| 963 Variable *LegalDestVar = LegalDest; |
| 964 _mov(LegalDestVar, MovInst->getSrc(0)); |
| 965 MovInst->setDeleted(); |
| 966 NewBaseReg = LegalDest; |
| 967 NewBaseOffset = Offset; |
| 968 continue; |
| 969 } |
| 970 } |
| 971 assert(MovInst->getSrcSize() == 1); |
| 972 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0)); |
| 973 if (Var && !Var->hasReg()) { |
| 974 int32_t Offset = Var->getStackOffset(); |
| 975 Offset += StackAdjust; |
| 976 if (!isLegalVariableStackOffset(Offset)) { |
| 977 if (NewBaseReg) { |
| 978 int32_t OffsetDiff = Offset - NewBaseOffset; |
| 979 if (isLegalVariableStackOffset(OffsetDiff)) { |
| 980 StackVariable *NewVar = |
| 981 Func->makeVariable<StackVariable>(stackSlotType()); |
| 982 NewVar->setWeight(RegWeight::Zero); |
| 983 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); |
| 984 NewVar->setStackOffset(OffsetDiff); |
| 985 _mov(Dest, NewVar); |
| 986 MovInst->setDeleted(); |
| 987 continue; |
| 988 } |
| 989 } |
| 990 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg); |
| 991 assert(LegalVar != Var); |
| 992 _mov(Dest, LegalVar); |
| 993 MovInst->setDeleted(); |
| 994 NewBaseReg = LegalVar; |
| 995 NewBaseOffset = Offset; |
| 996 continue; |
| 997 } |
| 998 } |
| 999 } |
| 1000 } |
| 1001 } |
| 1002 |
841 void TargetARM32::split64(Variable *Var) { | 1003 void TargetARM32::split64(Variable *Var) { |
842 assert(Var->getType() == IceType_i64); | 1004 assert(Var->getType() == IceType_i64); |
843 Variable *Lo = Var->getLo(); | 1005 Variable *Lo = Var->getLo(); |
844 Variable *Hi = Var->getHi(); | 1006 Variable *Hi = Var->getHi(); |
845 if (Lo) { | 1007 if (Lo) { |
846 assert(Hi); | 1008 assert(Hi); |
847 return; | 1009 return; |
848 } | 1010 } |
849 assert(Hi == nullptr); | 1011 assert(Hi == nullptr); |
850 Lo = Func->makeVariable(IceType_i32); | 1012 Lo = Func->makeVariable(IceType_i32); |
(...skipping 1220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2071 Call->addArg(Val); | 2233 Call->addArg(Val); |
2072 lowerCall(Call); | 2234 lowerCall(Call); |
2073 // The popcount helpers always return 32-bit values, while the intrinsic's | 2235 // The popcount helpers always return 32-bit values, while the intrinsic's |
2074 // signature matches some 64-bit platform's native instructions and | 2236 // signature matches some 64-bit platform's native instructions and |
2075 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest | 2237 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest |
2076 // just in case the user doesn't do that in the IR or doesn't toss the bits | 2238 // just in case the user doesn't do that in the IR or doesn't toss the bits |
2077 // via truncate. | 2239 // via truncate. |
2078 if (Val->getType() == IceType_i64) { | 2240 if (Val->getType() == IceType_i64) { |
2079 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2241 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2080 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2242 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2081 _mov(DestHi, Zero); | 2243 Variable *T = nullptr; |
| 2244 _mov(T, Zero); |
| 2245 _mov(DestHi, T); |
2082 } | 2246 } |
2083 return; | 2247 return; |
2084 } | 2248 } |
2085 case Intrinsics::Ctlz: { | 2249 case Intrinsics::Ctlz: { |
2086 // The "is zero undef" parameter is ignored and we always return | 2250 // The "is zero undef" parameter is ignored and we always return |
2087 // a well-defined value. | 2251 // a well-defined value. |
2088 Operand *Val = Instr->getArg(0); | 2252 Operand *Val = Instr->getArg(0); |
2089 Variable *ValLoR; | 2253 Variable *ValLoR; |
2090 Variable *ValHiR = nullptr; | 2254 Variable *ValHiR = nullptr; |
2091 if (Val->getType() == IceType_i64) { | 2255 if (Val->getType() == IceType_i64) { |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2223 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 2387 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
2224 _cmp(ValHiR, Zero); | 2388 _cmp(ValHiR, Zero); |
2225 Variable *T2 = makeReg(IceType_i32); | 2389 Variable *T2 = makeReg(IceType_i32); |
2226 _add(T2, T, ThirtyTwo); | 2390 _add(T2, T, ThirtyTwo); |
2227 _clz(T2, ValHiR, CondARM32::NE); | 2391 _clz(T2, ValHiR, CondARM32::NE); |
2228 // T2 is actually a source as well when the predicate is not AL | 2392 // T2 is actually a source as well when the predicate is not AL |
2229 // (since it may leave T2 alone). We use set_dest_nonkillable to | 2393 // (since it may leave T2 alone). We use set_dest_nonkillable to |
2230 // prolong the liveness of T2 as if it was used as a source. | 2394 // prolong the liveness of T2 as if it was used as a source. |
2231 _set_dest_nonkillable(); | 2395 _set_dest_nonkillable(); |
2232 _mov(DestLo, T2); | 2396 _mov(DestLo, T2); |
2233 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 2397 Variable *T3 = nullptr; |
| 2398 _mov(T3, Zero); |
| 2399 _mov(DestHi, T3); |
2234 return; | 2400 return; |
2235 } | 2401 } |
2236 _mov(Dest, T); | 2402 _mov(Dest, T); |
2237 return; | 2403 return; |
2238 } | 2404 } |
2239 | 2405 |
2240 void TargetARM32::lowerLoad(const InstLoad *Load) { | 2406 void TargetARM32::lowerLoad(const InstLoad *Load) { |
2241 // A Load instruction can be treated the same as an Assign | 2407 // A Load instruction can be treated the same as an Assign |
2242 // instruction, after the source operand is transformed into an | 2408 // instruction, after the source operand is transformed into an |
2243 // OperandARM32Mem operand. | 2409 // OperandARM32Mem operand. |
(...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2764 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 2930 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
2765 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 2931 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
2766 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 2932 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
2767 } | 2933 } |
2768 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 2934 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
2769 // However, for compatibility with current NaCl LLVM, don't claim that. | 2935 // However, for compatibility with current NaCl LLVM, don't claim that. |
2770 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 2936 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
2771 } | 2937 } |
2772 | 2938 |
2773 } // end of namespace Ice | 2939 } // end of namespace Ice |
OLD | NEW |