Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
| 2 // | 2 // |
| 3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
| 4 // | 4 // |
| 5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
| 6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
| 7 // | 7 // |
| 8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
| 9 /// | 9 /// |
| 10 /// \file | 10 /// \file |
| (...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 278 Func->advancedPhiLowering(); | 278 Func->advancedPhiLowering(); |
| 279 Func->dump("After advanced Phi lowering"); | 279 Func->dump("After advanced Phi lowering"); |
| 280 } | 280 } |
| 281 | 281 |
| 282 // Stack frame mapping. | 282 // Stack frame mapping. |
| 283 Func->genFrame(); | 283 Func->genFrame(); |
| 284 if (Func->hasError()) | 284 if (Func->hasError()) |
| 285 return; | 285 return; |
| 286 Func->dump("After stack frame mapping"); | 286 Func->dump("After stack frame mapping"); |
| 287 | 287 |
| 288 legalizeStackSlots(); | |
| 289 if (Func->hasError()) | |
| 290 return; | |
| 291 Func->dump("After legalizeStackSlots"); | |
| 292 | |
| 288 Func->contractEmptyNodes(); | 293 Func->contractEmptyNodes(); |
| 289 Func->reorderNodes(); | 294 Func->reorderNodes(); |
| 290 | 295 |
| 291 // Branch optimization. This needs to be done just before code | 296 // Branch optimization. This needs to be done just before code |
| 292 // emission. In particular, no transformations that insert or | 297 // emission. In particular, no transformations that insert or |
| 293 // reorder CfgNodes should be done after branch optimization. We go | 298 // reorder CfgNodes should be done after branch optimization. We go |
| 294 // ahead and do it before nop insertion to reduce the amount of work | 299 // ahead and do it before nop insertion to reduce the amount of work |
| 295 // needed for searching for opportunities. | 300 // needed for searching for opportunities. |
| 296 Func->doBranchOpt(); | 301 Func->doBranchOpt(); |
| 297 Func->dump("After branch optimization"); | 302 Func->dump("After branch optimization"); |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 328 regAlloc(RAK_InfOnly); | 333 regAlloc(RAK_InfOnly); |
| 329 if (Func->hasError()) | 334 if (Func->hasError()) |
| 330 return; | 335 return; |
| 331 Func->dump("After regalloc of infinite-weight variables"); | 336 Func->dump("After regalloc of infinite-weight variables"); |
| 332 | 337 |
| 333 Func->genFrame(); | 338 Func->genFrame(); |
| 334 if (Func->hasError()) | 339 if (Func->hasError()) |
| 335 return; | 340 return; |
| 336 Func->dump("After stack frame mapping"); | 341 Func->dump("After stack frame mapping"); |
| 337 | 342 |
| 343 legalizeStackSlots(); | |
| 344 if (Func->hasError()) | |
| 345 return; | |
| 346 Func->dump("After legalizeStackSlots"); | |
| 347 | |
| 338 // Nop insertion | 348 // Nop insertion |
| 339 if (Ctx->getFlags().shouldDoNopInsertion()) { | 349 if (Ctx->getFlags().shouldDoNopInsertion()) { |
| 340 Func->doNopInsertion(); | 350 Func->doNopInsertion(); |
| 341 } | 351 } |
| 342 } | 352 } |
| 343 | 353 |
| 344 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 354 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
| 345 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { | 355 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { |
| 346 return Br->optimizeBranch(NextNode); | 356 return Br->optimizeBranch(NextNode); |
| 347 } | 357 } |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 387 Ostream &Str = Ctx->getStrEmit(); | 397 Ostream &Str = Ctx->getStrEmit(); |
| 388 if (Var->hasReg()) { | 398 if (Var->hasReg()) { |
| 389 Str << getRegName(Var->getRegNum(), Var->getType()); | 399 Str << getRegName(Var->getRegNum(), Var->getType()); |
| 390 return; | 400 return; |
| 391 } | 401 } |
| 392 if (Var->getWeight().isInf()) { | 402 if (Var->getWeight().isInf()) { |
| 393 llvm::report_fatal_error( | 403 llvm::report_fatal_error( |
| 394 "Infinite-weight Variable has no register assigned"); | 404 "Infinite-weight Variable has no register assigned"); |
| 395 } | 405 } |
| 396 int32_t Offset = Var->getStackOffset(); | 406 int32_t Offset = Var->getStackOffset(); |
| 397 if (!hasFramePointer()) | 407 int32_t BaseRegNum = Var->getBaseRegNum(); |
| 398 Offset += getStackAdjustment(); | 408 if (BaseRegNum == Variable::NoRegister) { |
| 399 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register | 409 BaseRegNum = getFrameOrStackReg(); |
| 400 // to materialize a larger offset. | 410 if (!hasFramePointer()) |
| 401 constexpr bool SignExt = false; | 411 Offset += getStackAdjustment(); |
| 402 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) { | 412 } |
| 413 if (!isLegalVariableStackOffset(Offset)) { | |
| 403 llvm::report_fatal_error("Illegal stack offset"); | 414 llvm::report_fatal_error("Illegal stack offset"); |
| 404 } | 415 } |
| 405 const Type FrameSPTy = IceType_i32; | 416 const Type FrameSPTy = stackSlotType(); |
| 406 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy); | 417 Str << "[" << getRegName(BaseRegNum, FrameSPTy); |
| 407 if (Offset != 0) { | 418 if (Offset != 0) { |
| 408 Str << ", " << getConstantPrefix() << Offset; | 419 Str << ", " << getConstantPrefix() << Offset; |
| 409 } | 420 } |
| 410 Str << "]"; | 421 Str << "]"; |
| 411 } | 422 } |
| 412 | 423 |
| 413 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { | 424 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { |
| 414 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 425 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
| 415 return false; | 426 return false; |
| 416 int32_t RegLo, RegHi; | 427 int32_t RegLo, RegHi; |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 549 | 560 |
| 550 Type TargetARM32::stackSlotType() { return IceType_i32; } | 561 Type TargetARM32::stackSlotType() { return IceType_i32; } |
| 551 | 562 |
| 552 void TargetARM32::addProlog(CfgNode *Node) { | 563 void TargetARM32::addProlog(CfgNode *Node) { |
| 553 // Stack frame layout: | 564 // Stack frame layout: |
| 554 // | 565 // |
| 555 // +------------------------+ | 566 // +------------------------+ |
| 556 // | 1. preserved registers | | 567 // | 1. preserved registers | |
| 557 // +------------------------+ | 568 // +------------------------+ |
| 558 // | 2. padding | | 569 // | 2. padding | |
| 559 // +------------------------+ | 570 // +------------------------+ <--- FramePointer (if used) |
| 560 // | 3. global spill area | | 571 // | 3. global spill area | |
| 561 // +------------------------+ | 572 // +------------------------+ |
| 562 // | 4. padding | | 573 // | 4. padding | |
| 563 // +------------------------+ | 574 // +------------------------+ |
| 564 // | 5. local spill area | | 575 // | 5. local spill area | |
| 565 // +------------------------+ | 576 // +------------------------+ |
| 566 // | 6. padding | | 577 // | 6. padding | |
| 567 // +------------------------+ | 578 // +------------------------+ |
| 568 // | 7. allocas | | 579 // | 7. allocas | |
| 569 // +------------------------+ | 580 // +------------------------+ <--- StackPointer |
| 570 // | 581 // |
| 571 // The following variables record the size in bytes of the given areas: | 582 // The following variables record the size in bytes of the given areas: |
| 572 // * PreservedRegsSizeBytes: area 1 | 583 // * PreservedRegsSizeBytes: area 1 |
| 573 // * SpillAreaPaddingBytes: area 2 | 584 // * SpillAreaPaddingBytes: area 2 |
| 574 // * GlobalsSize: area 3 | 585 // * GlobalsSize: area 3 |
| 575 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 | 586 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
| 576 // * LocalsSpillAreaSize: area 5 | 587 // * LocalsSpillAreaSize: area 5 |
| 577 // * SpillAreaSizeBytes: areas 2 - 6 | 588 // * SpillAreaSizeBytes: areas 2 - 6 |
| 578 // Determine stack frame offsets for each Variable without a | 589 // Determine stack frame offsets for each Variable without a |
| 579 // register assignment. This can be done as one variable per stack | 590 // register assignment. This can be done as one variable per stack |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 674 | 685 |
| 675 // Align SP if necessary. | 686 // Align SP if necessary. |
| 676 if (NeedsStackAlignment) { | 687 if (NeedsStackAlignment) { |
| 677 uint32_t StackOffset = PreservedRegsSizeBytes; | 688 uint32_t StackOffset = PreservedRegsSizeBytes; |
| 678 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 689 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
| 679 SpillAreaSizeBytes = StackSize - StackOffset; | 690 SpillAreaSizeBytes = StackSize - StackOffset; |
| 680 } | 691 } |
| 681 | 692 |
| 682 // Generate "sub sp, SpillAreaSizeBytes" | 693 // Generate "sub sp, SpillAreaSizeBytes" |
| 683 if (SpillAreaSizeBytes) { | 694 if (SpillAreaSizeBytes) { |
| 684 // Use the IP inter-procedural scratch register if needed to legalize | 695 // Use the scratch register if needed to legalize the immediate. |
| 685 // the immediate. | |
| 686 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 696 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 687 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); | 697 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| 688 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 698 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
| 689 _sub(SP, SP, SubAmount); | 699 _sub(SP, SP, SubAmount); |
| 690 } | 700 } |
| 691 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 701 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
| 692 | 702 |
| 693 resetStackAdjustment(); | 703 resetStackAdjustment(); |
| 694 | 704 |
| 695 // Fill in stack offsets for stack args, and copy args into registers | 705 // Fill in stack offsets for stack args, and copy args into registers |
| 696 // for those that were register-allocated. Args are pushed right to | 706 // for those that were register-allocated. Args are pushed right to |
| 697 // left, so Arg[0] is closest to the stack/frame pointer. | 707 // left, so Arg[0] is closest to the stack/frame pointer. |
| (...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 778 if (UsesFramePointer) { | 788 if (UsesFramePointer) { |
| 779 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 789 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
| 780 // For late-stage liveness analysis (e.g. asm-verbose mode), | 790 // For late-stage liveness analysis (e.g. asm-verbose mode), |
| 781 // adding a fake use of SP before the assignment of SP=FP keeps | 791 // adding a fake use of SP before the assignment of SP=FP keeps |
| 782 // previous SP adjustments from being dead-code eliminated. | 792 // previous SP adjustments from being dead-code eliminated. |
| 783 Context.insert(InstFakeUse::create(Func, SP)); | 793 Context.insert(InstFakeUse::create(Func, SP)); |
| 784 _mov(SP, FP); | 794 _mov(SP, FP); |
| 785 } else { | 795 } else { |
| 786 // add SP, SpillAreaSizeBytes | 796 // add SP, SpillAreaSizeBytes |
| 787 if (SpillAreaSizeBytes) { | 797 if (SpillAreaSizeBytes) { |
| 788 // Use the IP inter-procedural scratch register if needed to legalize | 798 // Use the scratch register if needed to legalize the immediate. |
| 789 // the immediate. It shouldn't be live at this point. | 799 Operand *AddAmount = |
| 790 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 800 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
| 791 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); | 801 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
| 792 _add(SP, SP, AddAmount); | 802 _add(SP, SP, AddAmount); |
| 793 } | 803 } |
| 794 } | 804 } |
| 795 | 805 |
| 796 // Add pop instructions for preserved registers. | 806 // Add pop instructions for preserved registers. |
| 797 llvm::SmallBitVector CalleeSaves = | 807 llvm::SmallBitVector CalleeSaves = |
| 798 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 808 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
| 799 VarList GPRsToRestore; | 809 VarList GPRsToRestore; |
| 800 GPRsToRestore.reserve(CalleeSaves.size()); | 810 GPRsToRestore.reserve(CalleeSaves.size()); |
| 801 // Consider FP and LR as callee-save / used as needed. | 811 // Consider FP and LR as callee-save / used as needed. |
| (...skipping 29 matching lines...) Expand all Loading... | |
| 831 Variable *RetValue = nullptr; | 841 Variable *RetValue = nullptr; |
| 832 if (RI->getSrcSize()) | 842 if (RI->getSrcSize()) |
| 833 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 843 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
| 834 _bundle_lock(); | 844 _bundle_lock(); |
| 835 _bic(LR, LR, RetMask); | 845 _bic(LR, LR, RetMask); |
| 836 _ret(LR, RetValue); | 846 _ret(LR, RetValue); |
| 837 _bundle_unlock(); | 847 _bundle_unlock(); |
| 838 RI->setDeleted(); | 848 RI->setDeleted(); |
| 839 } | 849 } |
| 840 | 850 |
| 851 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { | |
| 852 constexpr bool SignExt = false; | |
| 853 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); | |
| 854 } | |
| 855 | |
| 856 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, | |
| 857 Variable *OrigBaseReg) { | |
| 858 int32_t Offset = Var->getStackOffset(); | |
| 859 // Legalize will likely need a movw/movt combination, but if the top | |
| 860 // bits are all 0 from negating the offset and subtracting, we could | |
| 861 // use that instead. | |
| 862 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; | |
| 863 if (ShouldSub) | |
| 864 Offset = -Offset; | |
| 865 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), | |
| 866 Legal_Reg | Legal_Flex, getReservedTmpReg()); | |
| 867 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); | |
| 868 if (ShouldSub) | |
| 869 _sub(ScratchReg, OrigBaseReg, OffsetVal); | |
| 870 else | |
| 871 _add(ScratchReg, OrigBaseReg, OffsetVal); | |
| 872 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType()); | |
| 873 NewVar->setWeight(RegWeight::Zero); | |
| 874 NewVar->setBaseRegNum(ScratchReg->getRegNum()); | |
| 875 constexpr int32_t NewOffset = 0; | |
| 876 NewVar->setStackOffset(NewOffset); | |
| 877 return NewVar; | |
| 878 } | |
| 879 | |
| 880 void TargetARM32::legalizeStackSlots() { | |
| 881 // If a stack variable's frame offset doesn't fit, convert from: | |
| 882 // ldr X, OFF[SP] | |
| 883 // to: | |
| 884 // movw/movt TMP, OFF_PART | |
| 885 // add TMP, TMP, SP | |
| 886 // ldr X, OFF_MORE[TMP] | |
| 887 // | |
| 888 // This is safe because we have reserved TMP, and add for ARM does not | |
| 889 // clobber the flags register. | |
| 890 Func->dump("Before legalizeStackSlots"); | |
| 891 assert(hasComputedFrame()); | |
| 892 // Early exit, if SpillAreaSizeBytes is really small. | |
| 893 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) | |
| 894 return; | |
| 895 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); | |
| 896 int32_t StackAdjust = 0; | |
| 897 // Do a fairly naive greedy clustering for now. Pick the first stack slot | |
| 898 // that's out of bounds and make a new base reg using the architecture's temp | |
| 899 // register. If that works for the next slot, then great. Otherwise, create | |
| 900 // a new base register, clobbering the previous base register. Never share a | |
| 901 // base reg across different basic blocks. This isn't ideal if local and | |
| 902 // multi-block variables are far apart and their references are interspersed. | |
| 903 // It may help to be more coordinated about assign stack slot numbers | |
| 904 // and may help to assign smaller offsets to higher-weight variables | |
| 905 // so that they don't depend on this legalization. | |
| 906 for (CfgNode *Node : Func->getNodes()) { | |
| 907 Context.init(Node); | |
| 908 StackVariable *NewBaseReg = nullptr; | |
| 909 int32_t NewBaseOffset = 0; | |
| 910 while (!Context.atEnd()) { | |
| 911 PostIncrLoweringContext PostIncrement(Context); | |
| 912 Inst *CurInstr = Context.getCur(); | |
| 913 Variable *Dest = CurInstr->getDest(); | |
| 914 // Check if the previous NewBaseReg is clobbered, and reset if needed. | |
| 915 if ((Dest && NewBaseReg && Dest->hasReg() && | |
| 916 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || | |
| 917 llvm::isa<InstFakeKill>(CurInstr)) { | |
| 918 NewBaseReg = nullptr; | |
| 919 NewBaseOffset = 0; | |
| 920 } | |
| 921 // The stack adjustment only matters if we are using SP instead of FP. | |
| 922 if (!hasFramePointer()) { | |
| 923 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { | |
| 924 StackAdjust += AdjInst->getAmount(); | |
| 925 NewBaseOffset += AdjInst->getAmount(); | |
| 926 continue; | |
| 927 } | |
| 928 if (llvm::isa<InstARM32Call>(CurInstr)) { | |
| 929 NewBaseOffset -= StackAdjust; | |
| 930 StackAdjust = 0; | |
| 931 continue; | |
| 932 } | |
| 933 } | |
| 934 // For now, only Mov instructions can have stack variables. We need to | |
| 935 // know the type of instruction because we currently create a fresh one | |
| 936 // to replace Dest/Source, rather than mutate in place. | |
| 937 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); | |
| 938 if (!MovInst) { | |
| 939 continue; | |
| 940 } | |
| 941 if (!Dest->hasReg()) { | |
| 942 int32_t Offset = Dest->getStackOffset(); | |
| 943 Offset += StackAdjust; | |
| 944 if (!isLegalVariableStackOffset(Offset)) { | |
| 945 if (NewBaseReg) { | |
| 946 int32_t OffsetDiff = Offset - NewBaseOffset; | |
| 947 if (isLegalVariableStackOffset(OffsetDiff)) { | |
| 948 StackVariable *NewDest = | |
| 949 Func->makeVariable<StackVariable>(stackSlotType()); | |
| 950 NewDest->setWeight(RegWeight::Zero); | |
| 951 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); | |
| 952 NewDest->setStackOffset(OffsetDiff); | |
| 953 Variable *NewDestVar = NewDest; | |
| 954 _mov(NewDestVar, MovInst->getSrc(0)); | |
| 955 MovInst->setDeleted(); | |
| 956 continue; | |
| 957 } | |
| 958 } | |
| 959 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg); | |
| 960 assert(LegalDest != Dest); | |
| 961 Variable *LegalDestVar = LegalDest; | |
| 962 _mov(LegalDestVar, MovInst->getSrc(0)); | |
| 963 MovInst->setDeleted(); | |
| 964 NewBaseReg = LegalDest; | |
| 965 NewBaseOffset = Offset; | |
| 966 continue; | |
| 967 } | |
| 968 } | |
| 969 assert(MovInst->getSrcSize() == 1); | |
| 970 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0)); | |
| 971 if (Var && !Var->hasReg()) { | |
| 972 int32_t Offset = Var->getStackOffset(); | |
| 973 Offset += StackAdjust; | |
| 974 if (!isLegalVariableStackOffset(Offset)) { | |
| 975 if (NewBaseReg) { | |
| 976 int32_t OffsetDiff = Offset - NewBaseOffset; | |
| 977 if (isLegalVariableStackOffset(OffsetDiff)) { | |
| 978 StackVariable *NewVar = | |
| 979 Func->makeVariable<StackVariable>(stackSlotType()); | |
| 980 NewVar->setWeight(RegWeight::Zero); | |
| 981 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); | |
| 982 NewVar->setStackOffset(OffsetDiff); | |
| 983 _mov(Dest, NewVar); | |
| 984 MovInst->setDeleted(); | |
| 985 continue; | |
| 986 } | |
| 987 } | |
| 988 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg); | |
| 989 assert(LegalVar != Var); | |
| 990 _mov(Dest, LegalVar); | |
| 991 MovInst->setDeleted(); | |
| 992 NewBaseReg = LegalVar; | |
| 993 NewBaseOffset = Offset; | |
| 994 continue; | |
| 995 } | |
| 996 } | |
| 997 } | |
| 998 } | |
| 999 } | |
| 1000 | |
| 841 void TargetARM32::split64(Variable *Var) { | 1001 void TargetARM32::split64(Variable *Var) { |
| 842 assert(Var->getType() == IceType_i64); | 1002 assert(Var->getType() == IceType_i64); |
| 843 Variable *Lo = Var->getLo(); | 1003 Variable *Lo = Var->getLo(); |
| 844 Variable *Hi = Var->getHi(); | 1004 Variable *Hi = Var->getHi(); |
| 845 if (Lo) { | 1005 if (Lo) { |
| 846 assert(Hi); | 1006 assert(Hi); |
| 847 return; | 1007 return; |
| 848 } | 1008 } |
| 849 assert(Hi == nullptr); | 1009 assert(Hi == nullptr); |
| 850 Lo = Func->makeVariable(IceType_i32); | 1010 Lo = Func->makeVariable(IceType_i32); |
| (...skipping 1220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2071 Call->addArg(Val); | 2231 Call->addArg(Val); |
| 2072 lowerCall(Call); | 2232 lowerCall(Call); |
| 2073 // The popcount helpers always return 32-bit values, while the intrinsic's | 2233 // The popcount helpers always return 32-bit values, while the intrinsic's |
| 2074 // signature matches some 64-bit platform's native instructions and | 2234 // signature matches some 64-bit platform's native instructions and |
| 2075 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest | 2235 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest |
| 2076 // just in case the user doesn't do that in the IR or doesn't toss the bits | 2236 // just in case the user doesn't do that in the IR or doesn't toss the bits |
| 2077 // via truncate. | 2237 // via truncate. |
| 2078 if (Val->getType() == IceType_i64) { | 2238 if (Val->getType() == IceType_i64) { |
| 2079 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2239 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
| 2080 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2240 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
| 2081 _mov(DestHi, Zero); | 2241 Variable *T = nullptr; |
| 2242 _mov(T, Zero); | |
|
jvoung (off chromium)
2015/07/23 23:26:32
misc legalization fix for test_bitmanip, now that
| |
| 2243 _mov(DestHi, T); | |
| 2082 } | 2244 } |
| 2083 return; | 2245 return; |
| 2084 } | 2246 } |
| 2085 case Intrinsics::Ctlz: { | 2247 case Intrinsics::Ctlz: { |
| 2086 // The "is zero undef" parameter is ignored and we always return | 2248 // The "is zero undef" parameter is ignored and we always return |
| 2087 // a well-defined value. | 2249 // a well-defined value. |
| 2088 Operand *Val = Instr->getArg(0); | 2250 Operand *Val = Instr->getArg(0); |
| 2089 Variable *ValLoR; | 2251 Variable *ValLoR; |
| 2090 Variable *ValHiR = nullptr; | 2252 Variable *ValHiR = nullptr; |
| 2091 if (Val->getType() == IceType_i64) { | 2253 if (Val->getType() == IceType_i64) { |
| (...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2223 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 2385 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
| 2224 _cmp(ValHiR, Zero); | 2386 _cmp(ValHiR, Zero); |
| 2225 Variable *T2 = makeReg(IceType_i32); | 2387 Variable *T2 = makeReg(IceType_i32); |
| 2226 _add(T2, T, ThirtyTwo); | 2388 _add(T2, T, ThirtyTwo); |
| 2227 _clz(T2, ValHiR, CondARM32::NE); | 2389 _clz(T2, ValHiR, CondARM32::NE); |
| 2228 // T2 is actually a source as well when the predicate is not AL | 2390 // T2 is actually a source as well when the predicate is not AL |
| 2229 // (since it may leave T2 alone). We use set_dest_nonkillable to | 2391 // (since it may leave T2 alone). We use set_dest_nonkillable to |
| 2230 // prolong the liveness of T2 as if it was used as a source. | 2392 // prolong the liveness of T2 as if it was used as a source. |
| 2231 _set_dest_nonkillable(); | 2393 _set_dest_nonkillable(); |
| 2232 _mov(DestLo, T2); | 2394 _mov(DestLo, T2); |
| 2233 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 2395 Variable *T3 = nullptr; |
| 2396 _mov(T3, Zero); | |
| 2397 _mov(DestHi, T3); | |
| 2234 return; | 2398 return; |
| 2235 } | 2399 } |
| 2236 _mov(Dest, T); | 2400 _mov(Dest, T); |
| 2237 return; | 2401 return; |
| 2238 } | 2402 } |
| 2239 | 2403 |
| 2240 void TargetARM32::lowerLoad(const InstLoad *Load) { | 2404 void TargetARM32::lowerLoad(const InstLoad *Load) { |
| 2241 // A Load instruction can be treated the same as an Assign | 2405 // A Load instruction can be treated the same as an Assign |
| 2242 // instruction, after the source operand is transformed into an | 2406 // instruction, after the source operand is transformed into an |
| 2243 // OperandARM32Mem operand. | 2407 // OperandARM32Mem operand. |
| (...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 2764 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 2928 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
| 2765 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 2929 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
| 2766 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 2930 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
| 2767 } | 2931 } |
| 2768 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 2932 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
| 2769 // However, for compatibility with current NaCl LLVM, don't claim that. | 2933 // However, for compatibility with current NaCl LLVM, don't claim that. |
| 2770 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 2934 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
| 2771 } | 2935 } |
| 2772 | 2936 |
| 2773 } // end of namespace Ice | 2937 } // end of namespace Ice |
| OLD | NEW |