Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(145)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1241763002: ARM: Add a postRA pass to legalize stack offsets. Greedy approach (reserve IP). (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: review Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.cpp » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after
278 Func->advancedPhiLowering(); 278 Func->advancedPhiLowering();
279 Func->dump("After advanced Phi lowering"); 279 Func->dump("After advanced Phi lowering");
280 } 280 }
281 281
282 // Stack frame mapping. 282 // Stack frame mapping.
283 Func->genFrame(); 283 Func->genFrame();
284 if (Func->hasError()) 284 if (Func->hasError())
285 return; 285 return;
286 Func->dump("After stack frame mapping"); 286 Func->dump("After stack frame mapping");
287 287
288 legalizeStackSlots();
289 if (Func->hasError())
290 return;
291 Func->dump("After legalizeStackSlots");
292
288 Func->contractEmptyNodes(); 293 Func->contractEmptyNodes();
289 Func->reorderNodes(); 294 Func->reorderNodes();
290 295
291 // Branch optimization. This needs to be done just before code 296 // Branch optimization. This needs to be done just before code
292 // emission. In particular, no transformations that insert or 297 // emission. In particular, no transformations that insert or
293 // reorder CfgNodes should be done after branch optimization. We go 298 // reorder CfgNodes should be done after branch optimization. We go
294 // ahead and do it before nop insertion to reduce the amount of work 299 // ahead and do it before nop insertion to reduce the amount of work
295 // needed for searching for opportunities. 300 // needed for searching for opportunities.
296 Func->doBranchOpt(); 301 Func->doBranchOpt();
297 Func->dump("After branch optimization"); 302 Func->dump("After branch optimization");
(...skipping 30 matching lines...) Expand all
328 regAlloc(RAK_InfOnly); 333 regAlloc(RAK_InfOnly);
329 if (Func->hasError()) 334 if (Func->hasError())
330 return; 335 return;
331 Func->dump("After regalloc of infinite-weight variables"); 336 Func->dump("After regalloc of infinite-weight variables");
332 337
333 Func->genFrame(); 338 Func->genFrame();
334 if (Func->hasError()) 339 if (Func->hasError())
335 return; 340 return;
336 Func->dump("After stack frame mapping"); 341 Func->dump("After stack frame mapping");
337 342
343 legalizeStackSlots();
344 if (Func->hasError())
345 return;
346 Func->dump("After legalizeStackSlots");
347
338 // Nop insertion 348 // Nop insertion
339 if (Ctx->getFlags().shouldDoNopInsertion()) { 349 if (Ctx->getFlags().shouldDoNopInsertion()) {
340 Func->doNopInsertion(); 350 Func->doNopInsertion();
341 } 351 }
342 } 352 }
343 353
344 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) { 354 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
345 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) { 355 if (InstARM32Br *Br = llvm::dyn_cast<InstARM32Br>(I)) {
346 return Br->optimizeBranch(NextNode); 356 return Br->optimizeBranch(NextNode);
347 } 357 }
(...skipping 29 matching lines...) Expand all
377 // live upon function entry. 387 // live upon function entry.
378 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { 388 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
379 Func->addImplicitArg(Reg); 389 Func->addImplicitArg(Reg);
380 Reg->setIgnoreLiveness(); 390 Reg->setIgnoreLiveness();
381 } 391 }
382 } 392 }
383 return Reg; 393 return Reg;
384 } 394 }
385 395
386 void TargetARM32::emitVariable(const Variable *Var) const { 396 void TargetARM32::emitVariable(const Variable *Var) const {
397 if (!BuildDefs::dump())
398 return;
387 Ostream &Str = Ctx->getStrEmit(); 399 Ostream &Str = Ctx->getStrEmit();
388 if (Var->hasReg()) { 400 if (Var->hasReg()) {
389 Str << getRegName(Var->getRegNum(), Var->getType()); 401 Str << getRegName(Var->getRegNum(), Var->getType());
390 return; 402 return;
391 } 403 }
392 if (Var->getWeight().isInf()) { 404 if (Var->getWeight().isInf()) {
393 llvm::report_fatal_error( 405 llvm::report_fatal_error(
394 "Infinite-weight Variable has no register assigned"); 406 "Infinite-weight Variable has no register assigned");
395 } 407 }
396 int32_t Offset = Var->getStackOffset(); 408 int32_t Offset = Var->getStackOffset();
397 if (!hasFramePointer()) 409 int32_t BaseRegNum = Var->getBaseRegNum();
398 Offset += getStackAdjustment(); 410 if (BaseRegNum == Variable::NoRegister) {
399 // TODO(jvoung): Handle out of range. Perhaps we need a scratch register 411 BaseRegNum = getFrameOrStackReg();
400 // to materialize a larger offset. 412 if (!hasFramePointer())
401 constexpr bool SignExt = false; 413 Offset += getStackAdjustment();
402 if (!OperandARM32Mem::canHoldOffset(Var->getType(), SignExt, Offset)) { 414 }
415 if (!isLegalVariableStackOffset(Offset)) {
403 llvm::report_fatal_error("Illegal stack offset"); 416 llvm::report_fatal_error("Illegal stack offset");
404 } 417 }
405 const Type FrameSPTy = IceType_i32; 418 const Type FrameSPTy = stackSlotType();
406 Str << "[" << getRegName(getFrameOrStackReg(), FrameSPTy); 419 Str << "[" << getRegName(BaseRegNum, FrameSPTy);
407 if (Offset != 0) { 420 if (Offset != 0) {
408 Str << ", " << getConstantPrefix() << Offset; 421 Str << ", " << getConstantPrefix() << Offset;
409 } 422 }
410 Str << "]"; 423 Str << "]";
411 } 424 }
412 425
413 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { 426 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
414 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 427 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
415 return false; 428 return false;
416 int32_t RegLo, RegHi; 429 int32_t RegLo, RegHi;
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
549 562
550 Type TargetARM32::stackSlotType() { return IceType_i32; } 563 Type TargetARM32::stackSlotType() { return IceType_i32; }
551 564
552 void TargetARM32::addProlog(CfgNode *Node) { 565 void TargetARM32::addProlog(CfgNode *Node) {
553 // Stack frame layout: 566 // Stack frame layout:
554 // 567 //
555 // +------------------------+ 568 // +------------------------+
556 // | 1. preserved registers | 569 // | 1. preserved registers |
557 // +------------------------+ 570 // +------------------------+
558 // | 2. padding | 571 // | 2. padding |
559 // +------------------------+ 572 // +------------------------+ <--- FramePointer (if used)
560 // | 3. global spill area | 573 // | 3. global spill area |
561 // +------------------------+ 574 // +------------------------+
562 // | 4. padding | 575 // | 4. padding |
563 // +------------------------+ 576 // +------------------------+
564 // | 5. local spill area | 577 // | 5. local spill area |
565 // +------------------------+ 578 // +------------------------+
566 // | 6. padding | 579 // | 6. padding |
567 // +------------------------+ 580 // +------------------------+
568 // | 7. allocas | 581 // | 7. allocas |
569 // +------------------------+ 582 // +------------------------+ <--- StackPointer
570 // 583 //
571 // The following variables record the size in bytes of the given areas: 584 // The following variables record the size in bytes of the given areas:
572 // * PreservedRegsSizeBytes: area 1 585 // * PreservedRegsSizeBytes: area 1
573 // * SpillAreaPaddingBytes: area 2 586 // * SpillAreaPaddingBytes: area 2
574 // * GlobalsSize: area 3 587 // * GlobalsSize: area 3
575 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 588 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
576 // * LocalsSpillAreaSize: area 5 589 // * LocalsSpillAreaSize: area 5
577 // * SpillAreaSizeBytes: areas 2 - 6 590 // * SpillAreaSizeBytes: areas 2 - 6
578 // Determine stack frame offsets for each Variable without a 591 // Determine stack frame offsets for each Variable without a
579 // register assignment. This can be done as one variable per stack 592 // register assignment. This can be done as one variable per stack
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
674 687
675 // Align SP if necessary. 688 // Align SP if necessary.
676 if (NeedsStackAlignment) { 689 if (NeedsStackAlignment) {
677 uint32_t StackOffset = PreservedRegsSizeBytes; 690 uint32_t StackOffset = PreservedRegsSizeBytes;
678 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 691 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
679 SpillAreaSizeBytes = StackSize - StackOffset; 692 SpillAreaSizeBytes = StackSize - StackOffset;
680 } 693 }
681 694
682 // Generate "sub sp, SpillAreaSizeBytes" 695 // Generate "sub sp, SpillAreaSizeBytes"
683 if (SpillAreaSizeBytes) { 696 if (SpillAreaSizeBytes) {
684 // Use the IP inter-procedural scratch register if needed to legalize 697 // Use the scratch register if needed to legalize the immediate.
685 // the immediate.
686 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 698 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
687 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); 699 Legal_Reg | Legal_Flex, getReservedTmpReg());
688 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 700 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
689 _sub(SP, SP, SubAmount); 701 _sub(SP, SP, SubAmount);
690 } 702 }
691 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 703 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
692 704
693 resetStackAdjustment(); 705 resetStackAdjustment();
694 706
695 // Fill in stack offsets for stack args, and copy args into registers 707 // Fill in stack offsets for stack args, and copy args into registers
696 // for those that were register-allocated. Args are pushed right to 708 // for those that were register-allocated. Args are pushed right to
697 // left, so Arg[0] is closest to the stack/frame pointer. 709 // left, so Arg[0] is closest to the stack/frame pointer.
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
778 if (UsesFramePointer) { 790 if (UsesFramePointer) {
779 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 791 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
780 // For late-stage liveness analysis (e.g. asm-verbose mode), 792 // For late-stage liveness analysis (e.g. asm-verbose mode),
781 // adding a fake use of SP before the assignment of SP=FP keeps 793 // adding a fake use of SP before the assignment of SP=FP keeps
782 // previous SP adjustments from being dead-code eliminated. 794 // previous SP adjustments from being dead-code eliminated.
783 Context.insert(InstFakeUse::create(Func, SP)); 795 Context.insert(InstFakeUse::create(Func, SP));
784 _mov(SP, FP); 796 _mov(SP, FP);
785 } else { 797 } else {
786 // add SP, SpillAreaSizeBytes 798 // add SP, SpillAreaSizeBytes
787 if (SpillAreaSizeBytes) { 799 if (SpillAreaSizeBytes) {
788 // Use the IP inter-procedural scratch register if needed to legalize 800 // Use the scratch register if needed to legalize the immediate.
789 // the immediate. It shouldn't be live at this point. 801 Operand *AddAmount =
790 Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 802 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
791 Legal_Reg | Legal_Flex, RegARM32::Reg_ip); 803 Legal_Reg | Legal_Flex, getReservedTmpReg());
792 _add(SP, SP, AddAmount); 804 _add(SP, SP, AddAmount);
793 } 805 }
794 } 806 }
795 807
796 // Add pop instructions for preserved registers. 808 // Add pop instructions for preserved registers.
797 llvm::SmallBitVector CalleeSaves = 809 llvm::SmallBitVector CalleeSaves =
798 getRegisterSet(RegSet_CalleeSave, RegSet_None); 810 getRegisterSet(RegSet_CalleeSave, RegSet_None);
799 VarList GPRsToRestore; 811 VarList GPRsToRestore;
800 GPRsToRestore.reserve(CalleeSaves.size()); 812 GPRsToRestore.reserve(CalleeSaves.size());
801 // Consider FP and LR as callee-save / used as needed. 813 // Consider FP and LR as callee-save / used as needed.
(...skipping 29 matching lines...) Expand all
831 Variable *RetValue = nullptr; 843 Variable *RetValue = nullptr;
832 if (RI->getSrcSize()) 844 if (RI->getSrcSize())
833 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 845 RetValue = llvm::cast<Variable>(RI->getSrc(0));
834 _bundle_lock(); 846 _bundle_lock();
835 _bic(LR, LR, RetMask); 847 _bic(LR, LR, RetMask);
836 _ret(LR, RetValue); 848 _ret(LR, RetValue);
837 _bundle_unlock(); 849 _bundle_unlock();
838 RI->setDeleted(); 850 RI->setDeleted();
839 } 851 }
840 852
853 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
854 constexpr bool SignExt = false;
855 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
856 }
857
858 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
859 Variable *OrigBaseReg) {
860 int32_t Offset = Var->getStackOffset();
861 // Legalize will likely need a movw/movt combination, but if the top
862 // bits are all 0 from negating the offset and subtracting, we could
863 // use that instead.
864 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
865 if (ShouldSub)
866 Offset = -Offset;
867 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
868 Legal_Reg | Legal_Flex, getReservedTmpReg());
869 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
870 if (ShouldSub)
871 _sub(ScratchReg, OrigBaseReg, OffsetVal);
872 else
873 _add(ScratchReg, OrigBaseReg, OffsetVal);
874 StackVariable *NewVar = Func->makeVariable<StackVariable>(stackSlotType());
875 NewVar->setWeight(RegWeight::Zero);
876 NewVar->setBaseRegNum(ScratchReg->getRegNum());
877 constexpr int32_t NewOffset = 0;
878 NewVar->setStackOffset(NewOffset);
879 return NewVar;
880 }
881
882 void TargetARM32::legalizeStackSlots() {
883 // If a stack variable's frame offset doesn't fit, convert from:
884 // ldr X, OFF[SP]
885 // to:
886 // movw/movt TMP, OFF_PART
887 // add TMP, TMP, SP
888 // ldr X, OFF_MORE[TMP]
889 //
890 // This is safe because we have reserved TMP, and add for ARM does not
891 // clobber the flags register.
892 Func->dump("Before legalizeStackSlots");
893 assert(hasComputedFrame());
894 // Early exit, if SpillAreaSizeBytes is really small.
895 if (isLegalVariableStackOffset(SpillAreaSizeBytes))
896 return;
897 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
898 int32_t StackAdjust = 0;
899 // Do a fairly naive greedy clustering for now. Pick the first stack slot
900 // that's out of bounds and make a new base reg using the architecture's temp
901 // register. If that works for the next slot, then great. Otherwise, create
902 // a new base register, clobbering the previous base register. Never share a
903 // base reg across different basic blocks. This isn't ideal if local and
904 // multi-block variables are far apart and their references are interspersed.
905 // It may help to be more coordinated about assign stack slot numbers
906 // and may help to assign smaller offsets to higher-weight variables
907 // so that they don't depend on this legalization.
908 for (CfgNode *Node : Func->getNodes()) {
909 Context.init(Node);
910 StackVariable *NewBaseReg = nullptr;
911 int32_t NewBaseOffset = 0;
912 while (!Context.atEnd()) {
913 PostIncrLoweringContext PostIncrement(Context);
914 Inst *CurInstr = Context.getCur();
915 Variable *Dest = CurInstr->getDest();
916 // Check if the previous NewBaseReg is clobbered, and reset if needed.
917 if ((Dest && NewBaseReg && Dest->hasReg() &&
918 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
919 llvm::isa<InstFakeKill>(CurInstr)) {
920 NewBaseReg = nullptr;
921 NewBaseOffset = 0;
922 }
923 // The stack adjustment only matters if we are using SP instead of FP.
924 if (!hasFramePointer()) {
925 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
926 StackAdjust += AdjInst->getAmount();
927 NewBaseOffset += AdjInst->getAmount();
928 continue;
929 }
930 if (llvm::isa<InstARM32Call>(CurInstr)) {
931 NewBaseOffset -= StackAdjust;
932 StackAdjust = 0;
933 continue;
934 }
935 }
936 // For now, only Mov instructions can have stack variables. We need to
937 // know the type of instruction because we currently create a fresh one
938 // to replace Dest/Source, rather than mutate in place.
939 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
940 if (!MovInst) {
941 continue;
942 }
943 if (!Dest->hasReg()) {
944 int32_t Offset = Dest->getStackOffset();
945 Offset += StackAdjust;
946 if (!isLegalVariableStackOffset(Offset)) {
947 if (NewBaseReg) {
948 int32_t OffsetDiff = Offset - NewBaseOffset;
949 if (isLegalVariableStackOffset(OffsetDiff)) {
950 StackVariable *NewDest =
951 Func->makeVariable<StackVariable>(stackSlotType());
952 NewDest->setWeight(RegWeight::Zero);
953 NewDest->setBaseRegNum(NewBaseReg->getBaseRegNum());
954 NewDest->setStackOffset(OffsetDiff);
955 Variable *NewDestVar = NewDest;
956 _mov(NewDestVar, MovInst->getSrc(0));
957 MovInst->setDeleted();
958 continue;
959 }
960 }
961 StackVariable *LegalDest = legalizeVariableSlot(Dest, OrigBaseReg);
962 assert(LegalDest != Dest);
963 Variable *LegalDestVar = LegalDest;
964 _mov(LegalDestVar, MovInst->getSrc(0));
965 MovInst->setDeleted();
966 NewBaseReg = LegalDest;
967 NewBaseOffset = Offset;
968 continue;
969 }
970 }
971 assert(MovInst->getSrcSize() == 1);
972 Variable *Var = llvm::dyn_cast<Variable>(MovInst->getSrc(0));
973 if (Var && !Var->hasReg()) {
974 int32_t Offset = Var->getStackOffset();
975 Offset += StackAdjust;
976 if (!isLegalVariableStackOffset(Offset)) {
977 if (NewBaseReg) {
978 int32_t OffsetDiff = Offset - NewBaseOffset;
979 if (isLegalVariableStackOffset(OffsetDiff)) {
980 StackVariable *NewVar =
981 Func->makeVariable<StackVariable>(stackSlotType());
982 NewVar->setWeight(RegWeight::Zero);
983 NewVar->setBaseRegNum(NewBaseReg->getBaseRegNum());
984 NewVar->setStackOffset(OffsetDiff);
985 _mov(Dest, NewVar);
986 MovInst->setDeleted();
987 continue;
988 }
989 }
990 StackVariable *LegalVar = legalizeVariableSlot(Var, OrigBaseReg);
991 assert(LegalVar != Var);
992 _mov(Dest, LegalVar);
993 MovInst->setDeleted();
994 NewBaseReg = LegalVar;
995 NewBaseOffset = Offset;
996 continue;
997 }
998 }
999 }
1000 }
1001 }
1002
841 void TargetARM32::split64(Variable *Var) { 1003 void TargetARM32::split64(Variable *Var) {
842 assert(Var->getType() == IceType_i64); 1004 assert(Var->getType() == IceType_i64);
843 Variable *Lo = Var->getLo(); 1005 Variable *Lo = Var->getLo();
844 Variable *Hi = Var->getHi(); 1006 Variable *Hi = Var->getHi();
845 if (Lo) { 1007 if (Lo) {
846 assert(Hi); 1008 assert(Hi);
847 return; 1009 return;
848 } 1010 }
849 assert(Hi == nullptr); 1011 assert(Hi == nullptr);
850 Lo = Func->makeVariable(IceType_i32); 1012 Lo = Func->makeVariable(IceType_i32);
(...skipping 1220 matching lines...) Expand 10 before | Expand all | Expand 10 after
2071 Call->addArg(Val); 2233 Call->addArg(Val);
2072 lowerCall(Call); 2234 lowerCall(Call);
2073 // The popcount helpers always return 32-bit values, while the intrinsic's 2235 // The popcount helpers always return 32-bit values, while the intrinsic's
2074 // signature matches some 64-bit platform's native instructions and 2236 // signature matches some 64-bit platform's native instructions and
2075 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest 2237 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
2076 // just in case the user doesn't do that in the IR or doesn't toss the bits 2238 // just in case the user doesn't do that in the IR or doesn't toss the bits
2077 // via truncate. 2239 // via truncate.
2078 if (Val->getType() == IceType_i64) { 2240 if (Val->getType() == IceType_i64) {
2079 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2241 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2080 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2242 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2081 _mov(DestHi, Zero); 2243 Variable *T = nullptr;
2244 _mov(T, Zero);
2245 _mov(DestHi, T);
2082 } 2246 }
2083 return; 2247 return;
2084 } 2248 }
2085 case Intrinsics::Ctlz: { 2249 case Intrinsics::Ctlz: {
2086 // The "is zero undef" parameter is ignored and we always return 2250 // The "is zero undef" parameter is ignored and we always return
2087 // a well-defined value. 2251 // a well-defined value.
2088 Operand *Val = Instr->getArg(0); 2252 Operand *Val = Instr->getArg(0);
2089 Variable *ValLoR; 2253 Variable *ValLoR;
2090 Variable *ValHiR = nullptr; 2254 Variable *ValHiR = nullptr;
2091 if (Val->getType() == IceType_i64) { 2255 if (Val->getType() == IceType_i64) {
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
2223 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); 2387 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2224 _cmp(ValHiR, Zero); 2388 _cmp(ValHiR, Zero);
2225 Variable *T2 = makeReg(IceType_i32); 2389 Variable *T2 = makeReg(IceType_i32);
2226 _add(T2, T, ThirtyTwo); 2390 _add(T2, T, ThirtyTwo);
2227 _clz(T2, ValHiR, CondARM32::NE); 2391 _clz(T2, ValHiR, CondARM32::NE);
2228 // T2 is actually a source as well when the predicate is not AL 2392 // T2 is actually a source as well when the predicate is not AL
2229 // (since it may leave T2 alone). We use set_dest_nonkillable to 2393 // (since it may leave T2 alone). We use set_dest_nonkillable to
2230 // prolong the liveness of T2 as if it was used as a source. 2394 // prolong the liveness of T2 as if it was used as a source.
2231 _set_dest_nonkillable(); 2395 _set_dest_nonkillable();
2232 _mov(DestLo, T2); 2396 _mov(DestLo, T2);
2233 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 2397 Variable *T3 = nullptr;
2398 _mov(T3, Zero);
2399 _mov(DestHi, T3);
2234 return; 2400 return;
2235 } 2401 }
2236 _mov(Dest, T); 2402 _mov(Dest, T);
2237 return; 2403 return;
2238 } 2404 }
2239 2405
2240 void TargetARM32::lowerLoad(const InstLoad *Load) { 2406 void TargetARM32::lowerLoad(const InstLoad *Load) {
2241 // A Load instruction can be treated the same as an Assign 2407 // A Load instruction can be treated the same as an Assign
2242 // instruction, after the source operand is transformed into an 2408 // instruction, after the source operand is transformed into an
2243 // OperandARM32Mem operand. 2409 // OperandARM32Mem operand.
(...skipping 520 matching lines...) Expand 10 before | Expand all | Expand 10 after
2764 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 2930 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
2765 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 2931 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
2766 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 2932 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
2767 } 2933 }
2768 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 2934 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
2769 // However, for compatibility with current NaCl LLVM, don't claim that. 2935 // However, for compatibility with current NaCl LLVM, don't claim that.
2770 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 2936 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
2771 } 2937 }
2772 2938
2773 } // end of namespace Ice 2939 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698