OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
278 Func->advancedPhiLowering(); | 278 Func->advancedPhiLowering(); |
279 Func->dump("After advanced Phi lowering"); | 279 Func->dump("After advanced Phi lowering"); |
280 } | 280 } |
281 | 281 |
282 // Stack frame mapping. | 282 // Stack frame mapping. |
283 Func->genFrame(); | 283 Func->genFrame(); |
284 if (Func->hasError()) | 284 if (Func->hasError()) |
285 return; | 285 return; |
286 Func->dump("After stack frame mapping"); | 286 Func->dump("After stack frame mapping"); |
287 | 287 |
288 legalizeStackSlots(); | |
289 if (Func->hasError()) | |
290 return; | |
291 Func->dump("After legalizeStackSlots"); | |
292 | |
288 Func->contractEmptyNodes(); | 293 Func->contractEmptyNodes(); |
289 Func->reorderNodes(); | 294 Func->reorderNodes(); |
290 | 295 |
291 // Branch optimization. This needs to be done just before code | 296 // Branch optimization. This needs to be done just before code |
292 // emission. In particular, no transformations that insert or | 297 // emission. In particular, no transformations that insert or |
293 // reorder CfgNodes should be done after branch optimization. We go | 298 // reorder CfgNodes should be done after branch optimization. We go |
294 // ahead and do it before nop insertion to reduce the amount of work | 299 // ahead and do it before nop insertion to reduce the amount of work |
295 // needed for searching for opportunities. | 300 // needed for searching for opportunities. |
296 Func->doBranchOpt(); | 301 Func->doBranchOpt(); |
297 Func->dump("After branch optimization"); | 302 Func->dump("After branch optimization"); |
(...skipping 30 matching lines...) Expand all Loading... | |
328 regAlloc(RAK_InfOnly); | 333 regAlloc(RAK_InfOnly); |
329 if (Func->hasError()) | 334 if (Func->hasError()) |
330 return; | 335 return; |
331 Func->dump("After regalloc of infinite-weight variables"); | 336 Func->dump("After regalloc of infinite-weight variables"); |
332 | 337 |
333 Func->genFrame(); | 338 Func->genFrame(); |
334 if (Func->hasError()) | 339 if (Func->hasError()) |
335 return; | 340 return; |
336 Func->dump("After stack frame mapping"); | 341 Func->dump("After stack frame mapping"); |
337 | 342 |
343 legalizeStackSlots(); | |
344 if (Func->hasError()) | |
345 return; | |
346 Func->dump("After legalizeStackSlots"); | |
347 | |
338 // Nop insertion | 348 // Nop insertion |
339 if (Ctx->getFlags().shouldDoNopInsertion()) { | 349 if (Ctx->getFlags().shouldDoNopInsertion()) { |
340 Func->doNopInsertion(); | 350 Func->doNopInsertion(); |
341 } | 351 } |
342 } | 352 } |
343 | 353 |
344 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 354 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
345 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { | 355 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { |
346 return Br->optimizeBranch(NextNode); | 356 return Br->optimizeBranch(NextNode); |
347 } | 357 } |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
387 Ostream &Str = Ctx->getStrEmit(); | 397 Ostream &Str = Ctx->getStrEmit(); |
388 if (Var->hasReg()) { | 398 if (Var->hasReg()) { |
389 Str << getRegName(Var->getRegNum(), Var->getType()); | 399 Str << getRegName(Var->getRegNum(), Var->getType()); |
390 return; | 400 return; |
391 } | 401 } |
392 if (Var->getWeight().isInf()) { | 402 if (Var->getWeight().isInf()) { |
393 llvm::report_fatal_error( | 403 llvm::report_fatal_error( |
394 "Infinite-weight Variable has no register assigned"); | 404 "Infinite-weight Variable has no register assigned"); |
395 } | 405 } |
396 int32_t Offset = Var->getStackOffset(); | 406 int32_t Offset = Var->getStackOffset(); |
397 if (!hasFramePointer()) | 407 int32_t BaseRegNum = Var->getBaseRegNum(); |
398 Offset += getStackAdjustment(); | 408 if (BaseRegNum == Variable::NoRegister) { |
399 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register | 409 BaseRegNum = getFrameOrStackReg(); |
400 // to materialize a larger offset. | 410 if (!hasFramePointer()) |
401 constexpr bool SignExt = false; | 411 Offset += getStackAdjustment(); |
402 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) { | 412 } |
413 if (!isLegalVariableStackOffset(Offset)) { | |
403 llvm::report_fatal_error("Illegal stack offset"); | 414 llvm::report_fatal_error("Illegal stack offset"); |
404 } | 415 } |
405 const Type FrameSPTy = IceType_i32; | 416 const Type FrameSPTy = stackSlotType(); |
406 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy); | 417 Str << "[" << getRegName(BaseRegNum, FrameSPTy); |
407 if (Offset != 0) { | 418 if (Offset != 0) { |
408 Str << ", " << getConstantPrefix() << Offset; | 419 Str << ", " << getConstantPrefix() << Offset; |
409 } | 420 } |
410 Str << "]"; | 421 Str << "]"; |
411 } | 422 } |
412 | 423 |
413 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { | 424 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { |
414 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 425 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
415 return false; | 426 return false; |
416 int32_t RegLo, RegHi; | 427 int32_t RegLo, RegHi; |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
549 | 560 |
550 Type TargetARM32::stackSlotType() { return IceType_i32; } | 561 Type TargetARM32::stackSlotType() { return IceType_i32; } |
551 | 562 |
552 void TargetARM32::addProlog(CfgNode *Node) { | 563 void TargetARM32::addProlog(CfgNode *Node) { |
553 // Stack frame layout: | 564 // Stack frame layout: |
554 // | 565 // |
555 // +------------------------+ | 566 // +------------------------+ |
556 // | 1. preserved registers | | 567 // | 1. preserved registers | |
557 // +------------------------+ | 568 // +------------------------+ |
558 // | 2. padding | | 569 // | 2. padding | |
559 // +------------------------+ | 570 // +------------------------+ <--- FramePointer (if used) |
560 // | 3. global spill area | | 571 // | 3. global spill area | |
561 // +------------------------+ | 572 // +------------------------+ |
562 // | 4. padding | | 573 // | 4. padding | |
563 // +------------------------+ | 574 // +------------------------+ |
564 // | 5. local spill area | | 575 // | 5. local spill area | |
565 // +------------------------+ | 576 // +------------------------+ |
566 // | 6. padding | | 577 // | 6. padding | |
567 // +------------------------+ | 578 // +------------------------+ |
568 // | 7. allocas | | 579 // | 7. allocas | |
569 // +------------------------+ | 580 // +------------------------+ <--- StackPointer |
570 // | 581 // |
571 // The following variables record the size in bytes of the given areas: | 582 // The following variables record the size in bytes of the given areas: |
572 // * PreservedRegsSizeBytes: area 1 | 583 // * PreservedRegsSizeBytes: area 1 |
573 // * SpillAreaPaddingBytes: area 2 | 584 // * SpillAreaPaddingBytes: area 2 |
574 // * GlobalsSize: area 3 | 585 // * GlobalsSize: area 3 |
575 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 | 586 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
576 // * LocalsSpillAreaSize: area 5 | 587 // * LocalsSpillAreaSize: area 5 |
577 // * SpillAreaSizeBytes: areas 2 - 6 | 588 // * SpillAreaSizeBytes: areas 2 - 6 |
578 // Determine stack frame offsets for each Variable without a | 589 // Determine stack frame offsets for each Variable without a |
579 // register assignment. This can be done as one variable per stack | 590 // register assignment. This can be done as one variable per stack |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
674 | 685 |
675 // Align SP if necessary. | 686 // Align SP if necessary. |
676 if (NeedsStackAlignment) { | 687 if (NeedsStackAlignment) { |
677 uint32_t StackOffset = PreservedRegsSizeBytes; | 688 uint32_t StackOffset = PreservedRegsSizeBytes; |
678 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 689 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
679 SpillAreaSizeBytes = StackSize - StackOffset; | 690 SpillAreaSizeBytes = StackSize - StackOffset; |
680 } | 691 } |
681 | 692 |
682 // Generate "sub sp, SpillAreaSizeBytes" | 693 // Generate "sub sp, SpillAreaSizeBytes" |
683 if (SpillAreaSizeBytes) { | 694 if (SpillAreaSizeBytes) { |
684 // Use the IP inter-procedural scratch register if needed to legalize | 695 // Use the scratch register if needed to legalize the immediate. |
685 // the immediate. | |
686 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 696 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
687 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); | 697 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
688 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 698 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
689 _sub(SP, SP, SubAmount); | 699 _sub(SP, SP, SubAmount); |
690 } | 700 } |
691 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 701 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
692 | 702 |
693 resetStackAdjustment(); | 703 resetStackAdjustment(); |
694 | 704 |
695 // Fill in stack offsets for stack args, and copy args into registers | 705 // Fill in stack offsets for stack args, and copy args into registers |
696 // for those that were register-allocated. Args are pushed right to | 706 // for those that were register-allocated. Args are pushed right to |
697 // left, so Arg[0] is closest to the stack/frame pointer. | 707 // left, so Arg[0] is closest to the stack/frame pointer. |
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
778 if (UsesFramePointer) { | 788 if (UsesFramePointer) { |
779 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 789 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
780 // For late-stage liveness analysis (e.g. asm-verbose mode), | 790 // For late-stage liveness analysis (e.g. asm-verbose mode), |
781 // adding a fake use of SP before the assignment of SP=FP keeps | 791 // adding a fake use of SP before the assignment of SP=FP keeps |
782 // previous SP adjustments from being dead-code eliminated. | 792 // previous SP adjustments from being dead-code eliminated. |
783 Context.insert(InstFakeUse::create(Func, SP)); | 793 Context.insert(InstFakeUse::create(Func, SP)); |
784 _mov(SP, FP); | 794 _mov(SP, FP); |
785 } else { | 795 } else { |
786 // add SP, SpillAreaSizeBytes | 796 // add SP, SpillAreaSizeBytes |
787 if (SpillAreaSizeBytes) { | 797 if (SpillAreaSizeBytes) { |
788 // Use the IP inter-procedural scratch register if needed to legalize | 798 // Use the scratch register if needed to legalize the immediate. |
789 // the immediate. It shouldn't be live at this point. | 799 Operand *AddAmount = |
790 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 800 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
791 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); | 801 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
792 _add(SP, SP, AddAmount); | 802 _add(SP, SP, AddAmount); |
793 } | 803 } |
794 } | 804 } |
795 | 805 |
796 // Add pop instructions for preserved registers. | 806 // Add pop instructions for preserved registers. |
797 llvm::SmallBitVector CalleeSaves = | 807 llvm::SmallBitVector CalleeSaves = |
798 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 808 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
799 VarList GPRsToRestore; | 809 VarList GPRsToRestore; |
800 GPRsToRestore.reserve(CalleeSaves.size()); | 810 GPRsToRestore.reserve(CalleeSaves.size()); |
801 // Consider FP and LR as callee-save / used as needed. | 811 // Consider FP and LR as callee-save / used as needed. |
(...skipping 29 matching lines...) Expand all Loading... | |
831 Variable *RetValue = nullptr; | 841 Variable *RetValue = nullptr; |
832 if (RI->getSrcSize()) | 842 if (RI->getSrcSize()) |
833 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 843 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
834 _bundle_lock(); | 844 _bundle_lock(); |
835 _bic(LR, LR, RetMask); | 845 _bic(LR, LR, RetMask); |
836 _ret(LR, RetValue); | 846 _ret(LR, RetValue); |
837 _bundle_unlock(); | 847 _bundle_unlock(); |
838 RI->setDeleted(); | 848 RI->setDeleted(); |
839 } | 849 } |
840 | 850 |
851 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { | |
852 constexpr bool SignExt = false; | |
853 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); | |
854 } | |
855 | |
856 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, | |
857 Variable *OrigBaseReg) { | |
858 int32_t Offset = Var->getStackOffset(); | |
859 // Legalize will likely need a movw/movt combination, but if the top | |
860 // bits are all 0 from negating the offset and subtracting, we could | |
861 // use that instead. | |
862 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; | |
863 if (ShouldSub) | |
864 Offset = -Offset; | |
865 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), | |
866 Legal_Reg | Legal_Flex, getReservedTmpReg()); | |
867 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); | |
868 if (ShouldSub) | |
869 _sub(ScratchReg, OrigBaseReg, OffsetVal); | |
870 else | |
871 _add(ScratchReg, OrigBaseReg, OffsetVal); | |
872 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType()); | |
873 NewVar->setWeight(RegWeight::Zero); | |
874 NewVar->setBaseRegNum(ScratchReg->getRegNum()); | |
875 constexpr int32_t NewOffset = 0; | |
876 NewVar->setStackOffset(NewOffset); | |
877 return NewVar; | |
878 } | |
879 | |
880 void TargetARM32::legalizeStackSlots() { | |
881 // If a stack variable's frame offset doesn't fit, convert from: | |
882 // ldr X, OFF[SP] | |
883 // to: | |
884 // movw/movt TMP, OFF_PART | |
885 // add TMP, TMP, SP | |
886 // ldr X, OFF_MORE[TMP] | |
887 // | |
888 // This is safe because we have reserved TMP, and add for ARM does not | |
889 // clobber the flags register. | |
890 Func->dump("Before legalizeStackSlots"); | |
891 assert(hasComputedFrame()); | |
892 // Early exit, if SpillAreaSizeBytes is really small. | |
893 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) | |
894 return; | |
895 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); | |
896 int32_t StackAdjust = 0; | |
897 // Do a fairly naive greedy clustering for now. Pick the first stack slot | |
898 // that's out of bounds and make a new base reg using the architecture's temp | |
899 // register. If that works for the next slot, then great. Otherwise, create | |
900 // a new base register, clobbering the previous base register. Never share a | |
901 // base reg across different basic blocks. This isn't ideal if local and | |
902 // multi-block variables are far apart and their references are interspersed. | |
903 // It may help to be more coordinated about assign stack slot numbers | |
904 // and may help to assign smaller offsets to higher-weight variables | |
905 // so that they don't depend on this legalization. | |
906 for (CfgNode *Node : Func->getNodes()) { | |
907 Context.init(Node); | |
908 StackVariable *NewBaseReg = nullptr; | |
909 int32_t NewBaseOffset = 0; | |
910 while (!Context.atEnd()) { | |
911 PostIncrLoweringContext PostIncrement(Context); | |
912 Inst *CurInstr = Context.getCur(); | |
913 Variable *Dest = CurInstr->getDest(); | |
914 // Check if the previous NewBaseReg is clobbered, and reset if needed. | |
915 if ((Dest && NewBaseReg && Dest->hasReg() && | |
916 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || | |
917 llvm::isa<InstFakeKill>(CurInstr)) { | |
918 NewBaseReg = nullptr; | |
919 NewBaseOffset = 0; | |
920 } | |
921 // The stack adjustment only matters if we are using SP instead of FP. | |
922 if (!hasFramePointer()) { | |
923 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { | |
924 StackAdjust += AdjInst->getAmount(); | |
925 NewBaseOffset += AdjInst->getAmount(); | |
926 continue; | |
927 } | |
928 if (llvm::isa<InstARM32Call>(CurInstr)) { | |
929 NewBaseOffset -= StackAdjust; | |
930 StackAdjust = 0; | |
931 continue; | |
932 } | |
933 } | |
934 // For now, only Mov instructions can have stack variables. We need to | |
935 // know the type of instruction because we currently create a fresh one | |
936 // to replace Dest/Source, rather than mutate in place. | |
937 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); | |
938 if (!MovInst) { | |
939 continue; | |
940 } | |
941 if (!Dest->hasReg()) { | |
942 int32_t Offset = Dest->getStackOffset(); | |
943 Offset += StackAdjust; | |
944 if (!isLegalVariableStackOffset(Offset)) { | |
945 if (NewBaseReg) { | |
946 int32_t OffsetDiff = Offset - NewBaseOffset; | |
947 if (isLegalVariableStackOffset(OffsetDiff)) { | |
948 StackVariable *NewDest = | |
949 Func->makeVariable<StackVariable>(stackSlotType()); | |
950 NewDest->setWeight(RegWeight::Zero); | |
951 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum()); | |
952 NewDest->setStackOffset(OffsetDiff); | |
953 Variable *NewDestVar = NewDest; | |
954 _mov(NewDestVar, MovInst->getSrc(0)); | |
955 MovInst->setDeleted(); | |
956 continue; | |
957 } | |
958 } | |
959 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg); | |
960 assert(LegalDest != Dest); | |
961 Variable *LegalDestVar = LegalDest; | |
962 _mov(LegalDestVar, MovInst->getSrc(0)); | |
963 MovInst->setDeleted(); | |
964 NewBaseReg = LegalDest; | |
965 NewBaseOffset = Offset; | |
966 continue; | |
967 } | |
968 } | |
969 assert(MovInst->getSrcSize() == 1); | |
970 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0)); | |
971 if (Var && !Var->hasReg()) { | |
972 int32_t Offset = Var->getStackOffset(); | |
973 Offset += StackAdjust; | |
974 if (!isLegalVariableStackOffset(Offset)) { | |
975 if (NewBaseReg) { | |
976 int32_t OffsetDiff = Offset - NewBaseOffset; | |
977 if (isLegalVariableStackOffset(OffsetDiff)) { | |
978 StackVariable *NewVar = | |
979 Func->makeVariable<StackVariable>(stackSlotType()); | |
980 NewVar->setWeight(RegWeight::Zero); | |
981 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum()); | |
982 NewVar->setStackOffset(OffsetDiff); | |
983 _mov(Dest, NewVar); | |
984 MovInst->setDeleted(); | |
985 continue; | |
986 } | |
987 } | |
988 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg); | |
989 assert(LegalVar != Var); | |
990 _mov(Dest, LegalVar); | |
991 MovInst->setDeleted(); | |
992 NewBaseReg = LegalVar; | |
993 NewBaseOffset = Offset; | |
994 continue; | |
995 } | |
996 } | |
997 } | |
998 } | |
999 } | |
1000 | |
841 void TargetARM32::split64(Variable *Var) { | 1001 void TargetARM32::split64(Variable *Var) { |
842 assert(Var->getType() == IceType_i64); | 1002 assert(Var->getType() == IceType_i64); |
843 Variable *Lo = Var->getLo(); | 1003 Variable *Lo = Var->getLo(); |
844 Variable *Hi = Var->getHi(); | 1004 Variable *Hi = Var->getHi(); |
845 if (Lo) { | 1005 if (Lo) { |
846 assert(Hi); | 1006 assert(Hi); |
847 return; | 1007 return; |
848 } | 1008 } |
849 assert(Hi == nullptr); | 1009 assert(Hi == nullptr); |
850 Lo = Func->makeVariable(IceType_i32); | 1010 Lo = Func->makeVariable(IceType_i32); |
(...skipping 1220 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2071 Call->addArg(Val); | 2231 Call->addArg(Val); |
2072 lowerCall(Call); | 2232 lowerCall(Call); |
2073 // The popcount helpers always return 32-bit values, while the intrinsic's | 2233 // The popcount helpers always return 32-bit values, while the intrinsic's |
2074 // signature matches some 64-bit platform's native instructions and | 2234 // signature matches some 64-bit platform's native instructions and |
2075 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest | 2235 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest |
2076 // just in case the user doesn't do that in the IR or doesn't toss the bits | 2236 // just in case the user doesn't do that in the IR or doesn't toss the bits |
2077 // via truncate. | 2237 // via truncate. |
2078 if (Val->getType() == IceType_i64) { | 2238 if (Val->getType() == IceType_i64) { |
2079 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2239 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2080 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2240 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2081 _mov(DestHi, Zero); | 2241 Variable *T = nullptr; |
2242 _mov(T, Zero); | |
jvoung (off chromium)
2015/07/23 23:26:32
misc legalization fix for test_bitmanip, now that
| |
2243 _mov(DestHi, T); | |
2082 } | 2244 } |
2083 return; | 2245 return; |
2084 } | 2246 } |
2085 case Intrinsics::Ctlz: { | 2247 case Intrinsics::Ctlz: { |
2086 // The "is zero undef" parameter is ignored and we always return | 2248 // The "is zero undef" parameter is ignored and we always return |
2087 // a well-defined value. | 2249 // a well-defined value. |
2088 Operand *Val = Instr->getArg(0); | 2250 Operand *Val = Instr->getArg(0); |
2089 Variable *ValLoR; | 2251 Variable *ValLoR; |
2090 Variable *ValHiR = nullptr; | 2252 Variable *ValHiR = nullptr; |
2091 if (Val->getType() == IceType_i64) { | 2253 if (Val->getType() == IceType_i64) { |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2223 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 2385 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
2224 _cmp(ValHiR, Zero); | 2386 _cmp(ValHiR, Zero); |
2225 Variable *T2 = makeReg(IceType_i32); | 2387 Variable *T2 = makeReg(IceType_i32); |
2226 _add(T2, T, ThirtyTwo); | 2388 _add(T2, T, ThirtyTwo); |
2227 _clz(T2, ValHiR, CondARM32::NE); | 2389 _clz(T2, ValHiR, CondARM32::NE); |
2228 // T2 is actually a source as well when the predicate is not AL | 2390 // T2 is actually a source as well when the predicate is not AL |
2229 // (since it may leave T2 alone). We use set_dest_nonkillable to | 2391 // (since it may leave T2 alone). We use set_dest_nonkillable to |
2230 // prolong the liveness of T2 as if it was used as a source. | 2392 // prolong the liveness of T2 as if it was used as a source. |
2231 _set_dest_nonkillable(); | 2393 _set_dest_nonkillable(); |
2232 _mov(DestLo, T2); | 2394 _mov(DestLo, T2); |
2233 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 2395 Variable *T3 = nullptr; |
2396 _mov(T3, Zero); | |
2397 _mov(DestHi, T3); | |
2234 return; | 2398 return; |
2235 } | 2399 } |
2236 _mov(Dest, T); | 2400 _mov(Dest, T); |
2237 return; | 2401 return; |
2238 } | 2402 } |
2239 | 2403 |
2240 void TargetARM32::lowerLoad(const InstLoad *Load) { | 2404 void TargetARM32::lowerLoad(const InstLoad *Load) { |
2241 // A Load instruction can be treated the same as an Assign | 2405 // A Load instruction can be treated the same as an Assign |
2242 // instruction, after the source operand is transformed into an | 2406 // instruction, after the source operand is transformed into an |
2243 // OperandARM32Mem operand. | 2407 // OperandARM32Mem operand. |
(...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2764 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 2928 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
2765 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 2929 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
2766 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 2930 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
2767 } | 2931 } |
2768 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 2932 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
2769 // However, for compatibility with current NaCl LLVM, don't claim that. | 2933 // However, for compatibility with current NaCl LLVM, don't claim that. |
2770 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 2934 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
2771 } | 2935 } |
2772 | 2936 |
2773 } // end of namespace Ice | 2937 } // end of namespace Ice |
OLD | NEW |