Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(481)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1241763002: ARM: Add a postRA pass to legalize stack offsets. Greedy approach (reserve IP). (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: ugh add a virtual? Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after
278 Func->advancedPhiLowering(); 278 Func->advancedPhiLowering();
279 Func->dump("After advanced Phi lowering"); 279 Func->dump("After advanced Phi lowering");
280 } 280 }
281 281
282 // Stack frame mapping. 282 // Stack frame mapping.
283 Func->genFrame(); 283 Func->genFrame();
284 if (Func->hasError()) 284 if (Func->hasError())
285 return; 285 return;
286 Func->dump("After stack frame mapping"); 286 Func->dump("After stack frame mapping");
287 287
288 legalizeStackSlots();
289 if (Func->hasError())
290 return;
291 Func->dump("After legalizeStackSlots");
292
288 Func->contractEmptyNodes(); 293 Func->contractEmptyNodes();
289 Func->reorderNodes(); 294 Func->reorderNodes();
290 295
291 // Branch optimization. This needs to be done just before code 296 // Branch optimization. This needs to be done just before code
292 // emission. In particular, no transformations that insert or 297 // emission. In particular, no transformations that insert or
293 // reorder CfgNodes should be done after branch optimization. We go 298 // reorder CfgNodes should be done after branch optimization. We go
294 // ahead and do it before nop insertion to reduce the amount of work 299 // ahead and do it before nop insertion to reduce the amount of work
295 // needed for searching for opportunities. 300 // needed for searching for opportunities.
296 Func->doBranchOpt(); 301 Func->doBranchOpt();
297 Func->dump("After branch optimization"); 302 Func->dump("After branch optimization");
(...skipping 30 matching lines...) Expand all
328 regAlloc(RAK_InfOnly); 333 regAlloc(RAK_InfOnly);
329 if (Func->hasError()) 334 if (Func->hasError())
330 return; 335 return;
331 Func->dump("After regalloc of infinite-weight variables"); 336 Func->dump("After regalloc of infinite-weight variables");
332 337
333 Func->genFrame(); 338 Func->genFrame();
334 if (Func->hasError()) 339 if (Func->hasError())
335 return; 340 return;
336 Func->dump("After stack frame mapping"); 341 Func->dump("After stack frame mapping");
337 342
343 legalizeStackSlots();
344 if (Func->hasError())
345 return;
346 Func->dump("After legalizeStackSlots");
347
338 // Nop insertion 348 // Nop insertion
339 if (Ctx->getFlags().shouldDoNopInsertion()) { 349 if (Ctx->getFlags().shouldDoNopInsertion()) {
340 Func->doNopInsertion(); 350 Func->doNopInsertion();
341 } 351 }
342 } 352 }
343 353
344 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { 354 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
345 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { 355 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
346 return Br->optimizeBranch(NextNode); 356 return Br->optimizeBranch(NextNode);
347 } 357 }
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after
387 Ostream &Str = Ctx->getStrEmit(); 397 Ostream &Str = Ctx->getStrEmit();
388 if (Var->hasReg()) { 398 if (Var->hasReg()) {
389 Str << getRegName(Var->getRegNum(), Var->getType()); 399 Str << getRegName(Var->getRegNum(), Var->getType());
390 return; 400 return;
391 } 401 }
392 if (Var->getWeight().isInf()) { 402 if (Var->getWeight().isInf()) {
393 llvm::report_fatal_error( 403 llvm::report_fatal_error(
394 "Infinite-weight Variable has no register assigned"); 404 "Infinite-weight Variable has no register assigned");
395 } 405 }
396 int32_t Offset = Var->getStackOffset(); 406 int32_t Offset = Var->getStackOffset();
397 if (!hasFramePointer()) 407 int32_t BaseRegNum = Var->getBaseRegNum();
398 Offset += getStackAdjustment(); 408 if (BaseRegNum == Variable::NoRegister) {
399 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register 409 BaseRegNum = getFrameOrStackReg();
400 // to materialize a larger offset. 410 if (!hasFramePointer())
401 constexpr bool SignExt = false; 411 Offset += getStackAdjustment();
402 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) { 412 }
413 if (!isLegalVariableStackOffset(Offset)) {
403 llvm::report_fatal_error("Illegal stack offset"); 414 llvm::report_fatal_error("Illegal stack offset");
404 } 415 }
405 const Type FrameSPTy = IceType_i32; 416 const Type FrameSPTy = stackSlotType();
406 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy); 417 Str << "[" << getRegName(BaseRegNum, FrameSPTy);
407 if (Offset != 0) { 418 if (Offset != 0) {
408 Str << ", " << getConstantPrefix() << Offset; 419 Str << ", " << getConstantPrefix() << Offset;
409 } 420 }
410 Str << "]"; 421 Str << "]";
411 } 422 }
412 423
413 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { 424 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
414 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 425 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
415 return false; 426 return false;
416 int32_t RegLo, RegHi; 427 int32_t RegLo, RegHi;
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
549 560
550 Type TargetARM32::stackSlotType() { return IceType_i32; } 561 Type TargetARM32::stackSlotType() { return IceType_i32; }
551 562
552 void TargetARM32::addProlog(CfgNode *Node) { 563 void TargetARM32::addProlog(CfgNode *Node) {
553 // Stack frame layout: 564 // Stack frame layout:
554 // 565 //
555 // +------------------------+ 566 // +------------------------+
556 // | 1. preserved registers | 567 // | 1. preserved registers |
557 // +------------------------+ 568 // +------------------------+
558 // | 2. padding | 569 // | 2. padding |
559 // +------------------------+ 570 // +------------------------+ <--- FramePointer (if used)
560 // | 3. global spill area | 571 // | 3. global spill area |
561 // +------------------------+ 572 // +------------------------+
562 // | 4. padding | 573 // | 4. padding |
563 // +------------------------+ 574 // +------------------------+
564 // | 5. local spill area | 575 // | 5. local spill area |
565 // +------------------------+ 576 // +------------------------+
566 // | 6. padding | 577 // | 6. padding |
567 // +------------------------+ 578 // +------------------------+
568 // | 7. allocas | 579 // | 7. allocas |
569 // +------------------------+ 580 // +------------------------+ <--- StackPointer
570 // 581 //
571 // The following variables record the size in bytes of the given areas: 582 // The following variables record the size in bytes of the given areas:
572 // * PreservedRegsSizeBytes: area 1 583 // * PreservedRegsSizeBytes: area 1
573 // * SpillAreaPaddingBytes: area 2 584 // * SpillAreaPaddingBytes: area 2
574 // * GlobalsSize: area 3 585 // * GlobalsSize: area 3
575 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 586 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
576 // * LocalsSpillAreaSize: area 5 587 // * LocalsSpillAreaSize: area 5
577 // * SpillAreaSizeBytes: areas 2 - 6 588 // * SpillAreaSizeBytes: areas 2 - 6
578 // Determine stack frame offsets for each Variable without a 589 // Determine stack frame offsets for each Variable without a
579 // register assignment. This can be done as one variable per stack 590 // register assignment. This can be done as one variable per stack
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
674 685
675 // Align SP if necessary. 686 // Align SP if necessary.
676 if (NeedsStackAlignment) { 687 if (NeedsStackAlignment) {
677 uint32_t StackOffset = PreservedRegsSizeBytes; 688 uint32_t StackOffset = PreservedRegsSizeBytes;
678 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 689 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
679 SpillAreaSizeBytes = StackSize - StackOffset; 690 SpillAreaSizeBytes = StackSize - StackOffset;
680 } 691 }
681 692
682 // Generate "sub sp, SpillAreaSizeBytes" 693 // Generate "sub sp, SpillAreaSizeBytes"
683 if (SpillAreaSizeBytes) { 694 if (SpillAreaSizeBytes) {
684 // Use the IP inter-procedural scratch register if needed to legalize 695 // Use the scratch register if needed to legalize the immediate.
685 // the immediate.
686 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 696 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
687 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); 697 Legal_Reg | Legal_Flex, getReservedTmpReg());
688 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 698 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
689 _sub(SP, SP, SubAmount); 699 _sub(SP, SP, SubAmount);
690 } 700 }
691 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 701 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
692 702
693 resetStackAdjustment(); 703 resetStackAdjustment();
694 704
695 // Fill in stack offsets for stack args, and copy args into registers 705 // Fill in stack offsets for stack args, and copy args into registers
696 // for those that were register-allocated. Args are pushed right to 706 // for those that were register-allocated. Args are pushed right to
697 // left, so Arg[0] is closest to the stack/frame pointer. 707 // left, so Arg[0] is closest to the stack/frame pointer.
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
778 if (UsesFramePointer) { 788 if (UsesFramePointer) {
779 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 789 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
780 // For late-stage liveness analysis (e.g. asm-verbose mode), 790 // For late-stage liveness analysis (e.g. asm-verbose mode),
781 // adding a fake use of SP before the assignment of SP=FP keeps 791 // adding a fake use of SP before the assignment of SP=FP keeps
782 // previous SP adjustments from being dead-code eliminated. 792 // previous SP adjustments from being dead-code eliminated.
783 Context.insert(InstFakeUse::create(Func, SP)); 793 Context.insert(InstFakeUse::create(Func, SP));
784 _mov(SP, FP); 794 _mov(SP, FP);
785 } else { 795 } else {
786 // add SP, SpillAreaSizeBytes 796 // add SP, SpillAreaSizeBytes
787 if (SpillAreaSizeBytes) { 797 if (SpillAreaSizeBytes) {
788 // Use the IP inter-procedural scratch register if needed to legalize 798 // Use the scratch register if needed to legalize the immediate.
789 // the immediate. It shouldn't be live at this point. 799 Operand *AddAmount =
790 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 800 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
791 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); 801 Legal_Reg | Legal_Flex, getReservedTmpReg());
792 _add(SP, SP, AddAmount); 802 _add(SP, SP, AddAmount);
793 } 803 }
794 } 804 }
795 805
796 // Add pop instructions for preserved registers. 806 // Add pop instructions for preserved registers.
797 llvm::SmallBitVector CalleeSaves = 807 llvm::SmallBitVector CalleeSaves =
798 getRegisterSet(RegSet_CalleeSave, RegSet_None); 808 getRegisterSet(RegSet_CalleeSave, RegSet_None);
799 VarList GPRsToRestore; 809 VarList GPRsToRestore;
800 GPRsToRestore.reserve(CalleeSaves.size()); 810 GPRsToRestore.reserve(CalleeSaves.size());
801 // Consider FP and LR as callee-save / used as needed. 811 // Consider FP and LR as callee-save / used as needed.
(...skipping 29 matching lines...) Expand all
831 Variable *RetValue = nullptr; 841 Variable *RetValue = nullptr;
832 if (RI->getSrcSize()) 842 if (RI->getSrcSize())
833 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 843 RetValue = llvm::cast<Variable>(RI->getSrc(0));
834 _bundle_lock(); 844 _bundle_lock();
835 _bic(LR, LR, RetMask); 845 _bic(LR, LR, RetMask);
836 _ret(LR, RetValue); 846 _ret(LR, RetValue);
837 _bundle_unlock(); 847 _bundle_unlock();
838 RI->setDeleted(); 848 RI->setDeleted();
839 } 849 }
840 850
851 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
852 constexpr bool SignExt = false;
853 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
854 }
855
856 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
857 Variable *OrigBaseReg) {
858 int32_t Offset = Var->getStackOffset();
859 // Legalize will likely need a movw/movt combination, but if the top
860 // bits are all 0 from negating the offset and subtracting, we could
861 // use that instead.
862 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
863 if (ShouldSub)
864 Offset = -Offset;
865 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
866 Legal_Reg | Legal_Flex, getReservedTmpReg());
867 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
868 if (ShouldSub)
869 _sub(ScratchReg, OrigBaseReg, OffsetVal);
870 else
871 _add(ScratchReg, OrigBaseReg, OffsetVal);
872 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType());
873 NewVar->setWeight(RegWeight::Zero);
874 NewVar->setBaseRegNum(ScratchReg->getRegNum());
875 constexpr int32_t NewOffset = 0;
876 NewVar->setStackOffset(NewOffset);
877 return NewVar;
878 }
879
880 void TargetARM32::legalizeStackSlots() {
881 // If a stack variable's frame offset doesn't fit, convert from:
882 // ldr X, OFF[SP]
883 // to:
884 // movw/movt TMP, OFF_PART
885 // add TMP, TMP, SP
886 // ldr X, OFF_MORE[TMP]
887 //
888 // This is safe because we have reserved TMP, and add for ARM does not
889 // clobber the flags register.
890 Func->dump("Before legalizeStackSlots");
891 assert(hasComputedFrame());
892 // Early exit, if SpillAreaSizeBytes is really small.
893 if (isLegalVariableStackOffset(SpillAreaSizeBytes))
894 return;
895 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
896 int32_t StackAdjust = 0;
897 // Do a fairly naive greedy clustering for now. Pick the first stack slot
898 // that's out of bounds and make a new base reg using the architecture's temp
899 // register. If that works for the next slot, then great. Otherwise, create
900 // a new base register, clobbering the previous base register. Never share a
901 // base reg across different basic blocks. This isn't ideal if local and
902 // multi-block variables are far apart and their references are interspersed.
903 // It may help to be more coordinated about assign stack slot numbers
904 // and may help to assign smaller offsets to higher-weight variables
905 // so that they don't depend on this legalization.
906 for (CfgNode *Node : Func->getNodes()) {
907 Context.init(Node);
908 StackVariable *NewBaseReg = nullptr;
909 int32_t NewBaseOffset = 0;
910 while (!Context.atEnd()) {
911 PostIncrLoweringContext PostIncrement(Context);
912 Inst *CurInstr = Context.getCur();
913 Variable *Dest = CurInstr->getDest();
914 // Check if the previous NewBaseReg is clobbered, and reset if needed.
915 if ((Dest && NewBaseReg && Dest->hasReg() &&
916 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
917 llvm::isa<InstFakeKill>(CurInstr)) {
918 NewBaseReg = nullptr;
919 NewBaseOffset = 0;
920 }
921 // The stack adjustment only matters if we are using SP instead of FP.
922 if (!hasFramePointer()) {
923 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
924 StackAdjust += AdjInst->getAmount();
925 NewBaseOffset += AdjInst->getAmount();
926 continue;
927 }
928 if (llvm::isa<InstARM32Call>(CurInstr)) {
929 NewBaseOffset -= StackAdjust;
930 StackAdjust = 0;
931 continue;
932 }
933 }
934 // For now, only Mov instructions can have stack variables. We need to
935 // know the type of instruction because we currently create a fresh one
936 // to replace Dest/Source, rather than mutate in place.
937 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
938 if (!MovInst) {
939 continue;
940 }
941 if (!Dest->hasReg()) {
942 int32_t Offset = Dest->getStackOffset();
943 Offset += StackAdjust;
944 if (!isLegalVariableStackOffset(Offset)) {
945 if (NewBaseReg) {
946 int32_t OffsetDiff = Offset - NewBaseOffset;
947 if (isLegalVariableStackOffset(OffsetDiff)) {
948 StackVariable *NewDest =
949 Func->makeVariable<StackVariable>(stackSlotType());
950 NewDest->setWeight(RegWeight::Zero);
951 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum());
952 NewDest->setStackOffset(OffsetDiff);
953 Variable *NewDestVar = NewDest;
954 _mov(NewDestVar, MovInst->getSrc(0));
955 MovInst->setDeleted();
956 continue;
957 }
958 }
959 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg);
960 assert(LegalDest != Dest);
961 Variable *LegalDestVar = LegalDest;
962 _mov(LegalDestVar, MovInst->getSrc(0));
963 MovInst->setDeleted();
964 NewBaseReg = LegalDest;
965 NewBaseOffset = Offset;
966 continue;
967 }
968 }
969 assert(MovInst->getSrcSize() == 1);
970 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0));
971 if (Var && !Var->hasReg()) {
972 int32_t Offset = Var->getStackOffset();
973 Offset += StackAdjust;
974 if (!isLegalVariableStackOffset(Offset)) {
975 if (NewBaseReg) {
976 int32_t OffsetDiff = Offset - NewBaseOffset;
977 if (isLegalVariableStackOffset(OffsetDiff)) {
978 StackVariable *NewVar =
979 Func->makeVariable<StackVariable>(stackSlotType());
980 NewVar->setWeight(RegWeight::Zero);
981 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum());
982 NewVar->setStackOffset(OffsetDiff);
983 _mov(Dest, NewVar);
984 MovInst->setDeleted();
985 continue;
986 }
987 }
988 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg);
989 assert(LegalVar != Var);
990 _mov(Dest, LegalVar);
991 MovInst->setDeleted();
992 NewBaseReg = LegalVar;
993 NewBaseOffset = Offset;
994 continue;
995 }
996 }
997 }
998 }
999 }
1000
841 void TargetARM32::split64(Variable *Var) { 1001 void TargetARM32::split64(Variable *Var) {
842 assert(Var->getType() == IceType_i64); 1002 assert(Var->getType() == IceType_i64);
843 Variable *Lo = Var->getLo(); 1003 Variable *Lo = Var->getLo();
844 Variable *Hi = Var->getHi(); 1004 Variable *Hi = Var->getHi();
845 if (Lo) { 1005 if (Lo) {
846 assert(Hi); 1006 assert(Hi);
847 return; 1007 return;
848 } 1008 }
849 assert(Hi == nullptr); 1009 assert(Hi == nullptr);
850 Lo = Func->makeVariable(IceType_i32); 1010 Lo = Func->makeVariable(IceType_i32);
(...skipping 1220 matching lines...) Expand 10 before | Expand all | Expand 10 after
2071 Call->addArg(Val); 2231 Call->addArg(Val);
2072 lowerCall(Call); 2232 lowerCall(Call);
2073 // The popcount helpers always return 32-bit values, while the intrinsic's 2233 // The popcount helpers always return 32-bit values, while the intrinsic's
2074 // signature matches some 64-bit platform's native instructions and 2234 // signature matches some 64-bit platform's native instructions and
2075 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest 2235 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
2076 // just in case the user doesn't do that in the IR or doesn't toss the bits 2236 // just in case the user doesn't do that in the IR or doesn't toss the bits
2077 // via truncate. 2237 // via truncate.
2078 if (Val->getType() == IceType_i64) { 2238 if (Val->getType() == IceType_i64) {
2079 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2239 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2080 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2240 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2081 _mov(DestHi, Zero); 2241 Variable *T = nullptr;
2242 _mov(T, Zero);
jvoung (off chromium) 2015/07/23 23:26:32 misc legalization fix for test_bitmanip, now that
2243 _mov(DestHi, T);
2082 } 2244 }
2083 return; 2245 return;
2084 } 2246 }
2085 case Intrinsics::Ctlz: { 2247 case Intrinsics::Ctlz: {
2086 // The "is zero undef" parameter is ignored and we always return 2248 // The "is zero undef" parameter is ignored and we always return
2087 // a well-defined value. 2249 // a well-defined value.
2088 Operand *Val = Instr->getArg(0); 2250 Operand *Val = Instr->getArg(0);
2089 Variable *ValLoR; 2251 Variable *ValLoR;
2090 Variable *ValHiR = nullptr; 2252 Variable *ValHiR = nullptr;
2091 if (Val->getType() == IceType_i64) { 2253 if (Val->getType() == IceType_i64) {
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
2223 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); 2385 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2224 _cmp(ValHiR, Zero); 2386 _cmp(ValHiR, Zero);
2225 Variable *T2 = makeReg(IceType_i32); 2387 Variable *T2 = makeReg(IceType_i32);
2226 _add(T2, T, ThirtyTwo); 2388 _add(T2, T, ThirtyTwo);
2227 _clz(T2, ValHiR, CondARM32::NE); 2389 _clz(T2, ValHiR, CondARM32::NE);
2228 // T2 is actually a source as well when the predicate is not AL 2390 // T2 is actually a source as well when the predicate is not AL
2229 // (since it may leave T2 alone). We use set_dest_nonkillable to 2391 // (since it may leave T2 alone). We use set_dest_nonkillable to
2230 // prolong the liveness of T2 as if it was used as a source. 2392 // prolong the liveness of T2 as if it was used as a source.
2231 _set_dest_nonkillable(); 2393 _set_dest_nonkillable();
2232 _mov(DestLo, T2); 2394 _mov(DestLo, T2);
2233 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 2395 Variable *T3 = nullptr;
2396 _mov(T3, Zero);
2397 _mov(DestHi, T3);
2234 return; 2398 return;
2235 } 2399 }
2236 _mov(Dest, T); 2400 _mov(Dest, T);
2237 return; 2401 return;
2238 } 2402 }
2239 2403
2240 void TargetARM32::lowerLoad(const InstLoad *Load) { 2404 void TargetARM32::lowerLoad(const InstLoad *Load) {
2241 // A Load instruction can be treated the same as an Assign 2405 // A Load instruction can be treated the same as an Assign
2242 // instruction, after the source operand is transformed into an 2406 // instruction, after the source operand is transformed into an
2243 // OperandARM32Mem operand. 2407 // OperandARM32Mem operand.
(...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after
2764 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 2928 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
2765 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 2929 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
2766 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 2930 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
2767 } 2931 }
2768 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 2932 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
2769 // However, for compatibility with current NaCl LLVM, don't claim that. 2933 // However, for compatibility with current NaCl LLVM, don't claim that.
2770 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 2934 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
2771 } 2935 }
2772 2936
2773 } // end of namespace Ice 2937 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698