Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1465213002: Subzero. ARM32. Combine allocas. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Addresses comments. Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/assembler/arm32/bic.ll » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 247 matching lines...) Expand 10 before | Expand all | Expand 10 after
258 } 258 }
259 259
260 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty); 260 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
261 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty); 261 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
262 } 262 }
263 263
264 return applyStackAlignment(OutArgsSizeBytes); 264 return applyStackAlignment(OutArgsSizeBytes);
265 } 265 }
266 266
267 void TargetARM32::findMaxStackOutArgsSize() { 267 void TargetARM32::findMaxStackOutArgsSize() {
268 // MinNeededOutArgsBytes should be updated if the Target ever creates an 268 // MinNeededOutArgsBytes should be updated if the Target ever creates a
269 // high-level InstCall that requires more stack bytes. 269 // high-level InstCall that requires more stack bytes.
270 constexpr size_t MinNeededOutArgsBytes = 0; 270 constexpr size_t MinNeededOutArgsBytes = 0;
271 MaxOutArgsSizeBytes = MinNeededOutArgsBytes; 271 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
272 for (CfgNode *Node : Func->getNodes()) { 272 for (CfgNode *Node : Func->getNodes()) {
273 Context.init(Node); 273 Context.init(Node);
274 while (!Context.atEnd()) { 274 while (!Context.atEnd()) {
275 PostIncrLoweringContext PostIncrement(Context); 275 PostIncrLoweringContext PostIncrement(Context);
276 Inst *CurInstr = Context.getCur(); 276 Inst *CurInstr = Context.getCur();
277 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) { 277 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
278 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call); 278 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
279 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes); 279 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
280 } 280 }
281 } 281 }
282 } 282 }
283 } 283 }
284 284
285 void TargetARM32::translateO2() { 285 void TargetARM32::translateO2() {
286 TimerMarker T(TimerStack::TT_O2, Func); 286 TimerMarker T(TimerStack::TT_O2, Func);
287 287
288 // TODO(stichnot): share passes with X86? 288 // TODO(stichnot): share passes with X86?
289 // https://code.google.com/p/nativeclient/issues/detail?id=4094 289 // https://code.google.com/p/nativeclient/issues/detail?id=4094
290 genTargetHelperCalls(); 290 genTargetHelperCalls();
291 findMaxStackOutArgsSize(); 291 findMaxStackOutArgsSize();
292 292
293 // Do not merge Alloca instructions, and lay out the stack. 293 // Do not merge Alloca instructions, and lay out the stack.
294 static constexpr bool SortAndCombineAllocas = false; 294 static constexpr bool SortAndCombineAllocas = true;
295 Func->processAllocas(SortAndCombineAllocas); 295 Func->processAllocas(SortAndCombineAllocas);
296 Func->dump("After Alloca processing"); 296 Func->dump("After Alloca processing");
297 297
298 if (!Ctx->getFlags().getPhiEdgeSplit()) { 298 if (!Ctx->getFlags().getPhiEdgeSplit()) {
299 // Lower Phi instructions. 299 // Lower Phi instructions.
300 Func->placePhiLoads(); 300 Func->placePhiLoads();
301 if (Func->hasError()) 301 if (Func->hasError())
302 return; 302 return;
303 Func->placePhiStores(); 303 Func->placePhiStores();
304 if (Func->hasError()) 304 if (Func->hasError())
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
349 // Validate the live range computations. The expensive validation call is 349 // Validate the live range computations. The expensive validation call is
350 // deliberately only made when assertions are enabled. 350 // deliberately only made when assertions are enabled.
351 assert(Func->validateLiveness()); 351 assert(Func->validateLiveness());
352 // The post-codegen dump is done here, after liveness analysis and associated 352 // The post-codegen dump is done here, after liveness analysis and associated
353 // cleanup, to make the dump cleaner and more useful. 353 // cleanup, to make the dump cleaner and more useful.
354 Func->dump("After initial ARM32 codegen"); 354 Func->dump("After initial ARM32 codegen");
355 Func->getVMetadata()->init(VMK_All); 355 Func->getVMetadata()->init(VMK_All);
356 regAlloc(RAK_Global); 356 regAlloc(RAK_Global);
357 if (Func->hasError()) 357 if (Func->hasError())
358 return; 358 return;
359
359 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); 360 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
360 Func->dump("After linear scan regalloc"); 361 Func->dump("After linear scan regalloc");
361 362
362 if (Ctx->getFlags().getPhiEdgeSplit()) { 363 if (Ctx->getFlags().getPhiEdgeSplit()) {
363 Func->advancedPhiLowering(); 364 Func->advancedPhiLowering();
364 Func->dump("After advanced Phi lowering"); 365 Func->dump("After advanced Phi lowering");
365 } 366 }
366 367
368 ForbidTemporaryWithoutReg _(this);
369
367 // Stack frame mapping. 370 // Stack frame mapping.
368 Func->genFrame(); 371 Func->genFrame();
369 if (Func->hasError()) 372 if (Func->hasError())
370 return; 373 return;
371 Func->dump("After stack frame mapping"); 374 Func->dump("After stack frame mapping");
372 375
373 legalizeStackSlots(); 376 legalizeStackSlots();
374 if (Func->hasError()) 377 if (Func->hasError())
375 return; 378 return;
376 Func->dump("After legalizeStackSlots"); 379 Func->dump("After legalizeStackSlots");
(...skipping 15 matching lines...) Expand all
392 } 395 }
393 396
394 void TargetARM32::translateOm1() { 397 void TargetARM32::translateOm1() {
395 TimerMarker T(TimerStack::TT_Om1, Func); 398 TimerMarker T(TimerStack::TT_Om1, Func);
396 399
397 // TODO: share passes with X86? 400 // TODO: share passes with X86?
398 genTargetHelperCalls(); 401 genTargetHelperCalls();
399 findMaxStackOutArgsSize(); 402 findMaxStackOutArgsSize();
400 403
401 // Do not merge Alloca instructions, and lay out the stack. 404 // Do not merge Alloca instructions, and lay out the stack.
402 static constexpr bool SortAndCombineAllocas = false; 405 static constexpr bool DontSortAndCombineAllocas = false;
403 Func->processAllocas(SortAndCombineAllocas); 406 Func->processAllocas(DontSortAndCombineAllocas);
404 Func->dump("After Alloca processing"); 407 Func->dump("After Alloca processing");
405 408
406 Func->placePhiLoads(); 409 Func->placePhiLoads();
407 if (Func->hasError()) 410 if (Func->hasError())
408 return; 411 return;
409 Func->placePhiStores(); 412 Func->placePhiStores();
410 if (Func->hasError()) 413 if (Func->hasError())
411 return; 414 return;
412 Func->deletePhis(); 415 Func->deletePhis();
413 if (Func->hasError()) 416 if (Func->hasError())
414 return; 417 return;
415 Func->dump("After Phi lowering"); 418 Func->dump("After Phi lowering");
416 419
417 Func->doArgLowering(); 420 Func->doArgLowering();
418 421
419 Func->genCode(); 422 Func->genCode();
420 if (Func->hasError()) 423 if (Func->hasError())
421 return; 424 return;
422 Func->dump("After initial ARM32 codegen"); 425 Func->dump("After initial ARM32 codegen");
423 426
424 regAlloc(RAK_InfOnly); 427 regAlloc(RAK_InfOnly);
425 if (Func->hasError()) 428 if (Func->hasError())
426 return; 429 return;
430
427 copyRegAllocFromInfWeightVariable64On32(Func->getVariables()); 431 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
428 Func->dump("After regalloc of infinite-weight variables"); 432 Func->dump("After regalloc of infinite-weight variables");
429 433
434 ForbidTemporaryWithoutReg _(this);
435
430 Func->genFrame(); 436 Func->genFrame();
431 if (Func->hasError()) 437 if (Func->hasError())
432 return; 438 return;
433 Func->dump("After stack frame mapping"); 439 Func->dump("After stack frame mapping");
434 440
435 legalizeStackSlots(); 441 legalizeStackSlots();
436 if (Func->hasError()) 442 if (Func->hasError())
437 return; 443 return;
438 Func->dump("After legalizeStackSlots"); 444 Func->dump("After legalizeStackSlots");
439 445
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
513 return; 519 return;
514 Ostream &Str = Ctx->getStrEmit(); 520 Ostream &Str = Ctx->getStrEmit();
515 if (Var->hasReg()) { 521 if (Var->hasReg()) {
516 Str << getRegName(Var->getRegNum(), Var->getType()); 522 Str << getRegName(Var->getRegNum(), Var->getType());
517 return; 523 return;
518 } 524 }
519 if (Var->mustHaveReg()) { 525 if (Var->mustHaveReg()) {
520 llvm::report_fatal_error( 526 llvm::report_fatal_error(
521 "Infinite-weight Variable has no register assigned"); 527 "Infinite-weight Variable has no register assigned");
522 } 528 }
529 assert(!Var->isRematerializable());
523 int32_t Offset = Var->getStackOffset(); 530 int32_t Offset = Var->getStackOffset();
524 int32_t BaseRegNum = Var->getBaseRegNum(); 531 int32_t BaseRegNum = Var->getBaseRegNum();
525 if (BaseRegNum == Variable::NoRegister) { 532 if (BaseRegNum == Variable::NoRegister) {
526 BaseRegNum = getFrameOrStackReg(); 533 BaseRegNum = getFrameOrStackReg();
527 } 534 }
528 const Type VarTy = Var->getType(); 535 const Type VarTy = Var->getType();
529 Str << "[" << getRegName(BaseRegNum, VarTy); 536 Str << "[" << getRegName(BaseRegNum, VarTy);
530 if (Offset != 0) { 537 if (Offset != 0) {
531 Str << ", " << getConstantPrefix() << Offset; 538 Str << ", " << getConstantPrefix() << Offset;
532 } 539 }
(...skipping 310 matching lines...) Expand 10 before | Expand all | Expand 10 after
843 // Adds the out args space to the stack, and align SP if necessary. 850 // Adds the out args space to the stack, and align SP if necessary.
844 if (!NeedsStackAlignment) { 851 if (!NeedsStackAlignment) {
845 SpillAreaSizeBytes += MaxOutArgsSizeBytes; 852 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
846 } else { 853 } else {
847 uint32_t StackOffset = PreservedRegsSizeBytes; 854 uint32_t StackOffset = PreservedRegsSizeBytes;
848 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 855 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
849 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes); 856 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
850 SpillAreaSizeBytes = StackSize - StackOffset; 857 SpillAreaSizeBytes = StackSize - StackOffset;
851 } 858 }
852 859
860 // Combine fixed alloca with SpillAreaSize.
861 SpillAreaSizeBytes += FixedAllocaSizeBytes;
862
853 // Generate "sub sp, SpillAreaSizeBytes" 863 // Generate "sub sp, SpillAreaSizeBytes"
854 if (SpillAreaSizeBytes) { 864 if (SpillAreaSizeBytes) {
855 // Use the scratch register if needed to legalize the immediate. 865 // Use the scratch register if needed to legalize the immediate.
856 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 866 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
857 Legal_Reg | Legal_Flex, getReservedTmpReg()); 867 Legal_Reg | Legal_Flex, getReservedTmpReg());
858 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 868 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
859 _sub(SP, SP, SubAmount); 869 _sub(SP, SP, SubAmount);
870 if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
871 alignRegisterPow2(SP, FixedAllocaAlignBytes);
872 }
860 } 873 }
874
861 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 875 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
862 876
863 // Fill in stack offsets for stack args, and copy args into registers for 877 // Fill in stack offsets for stack args, and copy args into registers for
864 // those that were register-allocated. Args are pushed right to left, so 878 // those that were register-allocated. Args are pushed right to left, so
865 // Arg[0] is closest to the stack/frame pointer. 879 // Arg[0] is closest to the stack/frame pointer.
866 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 880 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
867 size_t BasicFrameOffset = PreservedRegsSizeBytes; 881 size_t BasicFrameOffset = PreservedRegsSizeBytes;
868 if (!UsesFramePointer) 882 if (!UsesFramePointer)
869 BasicFrameOffset += SpillAreaSizeBytes; 883 BasicFrameOffset += SpillAreaSizeBytes;
870 884
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
1027 _sub(ScratchReg, OrigBaseReg, OffsetVal); 1041 _sub(ScratchReg, OrigBaseReg, OffsetVal);
1028 else 1042 else
1029 _add(ScratchReg, OrigBaseReg, OffsetVal); 1043 _add(ScratchReg, OrigBaseReg, OffsetVal);
1030 return ScratchReg; 1044 return ScratchReg;
1031 } 1045 }
1032 1046
1033 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset, 1047 OperandARM32Mem *TargetARM32::createMemOperand(Type Ty, int32_t Offset,
1034 Variable *OrigBaseReg, 1048 Variable *OrigBaseReg,
1035 Variable **NewBaseReg, 1049 Variable **NewBaseReg,
1036 int32_t *NewBaseOffset) { 1050 int32_t *NewBaseOffset) {
1051 assert(!OrigBaseReg->isRematerializable());
1037 if (isLegalMemOffset(Ty, Offset)) { 1052 if (isLegalMemOffset(Ty, Offset)) {
1038 return OperandARM32Mem::create( 1053 return OperandARM32Mem::create(
1039 Func, Ty, OrigBaseReg, 1054 Func, Ty, OrigBaseReg,
1040 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)), 1055 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)),
1041 OperandARM32Mem::Offset); 1056 OperandARM32Mem::Offset);
1042 } 1057 }
1043 1058
1044 if (*NewBaseReg == nullptr) { 1059 if (*NewBaseReg == nullptr) {
1045 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg); 1060 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
1046 *NewBaseOffset = Offset; 1061 *NewBaseOffset = Offset;
1047 } 1062 }
1048 1063
1049 int32_t OffsetDiff = Offset - *NewBaseOffset; 1064 int32_t OffsetDiff = Offset - *NewBaseOffset;
1050 if (!isLegalMemOffset(Ty, OffsetDiff)) { 1065 if (!isLegalMemOffset(Ty, OffsetDiff)) {
1051 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg); 1066 *NewBaseReg = newBaseRegister(Offset, OrigBaseReg);
1052 *NewBaseOffset = Offset; 1067 *NewBaseOffset = Offset;
1053 OffsetDiff = 0; 1068 OffsetDiff = 0;
1054 } 1069 }
1055 1070
1071 assert(!(*NewBaseReg)->isRematerializable());
1056 return OperandARM32Mem::create( 1072 return OperandARM32Mem::create(
1057 Func, Ty, *NewBaseReg, 1073 Func, Ty, *NewBaseReg,
1058 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)), 1074 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetDiff)),
1059 OperandARM32Mem::Offset); 1075 OperandARM32Mem::Offset);
1060 } 1076 }
1061 1077
1062 void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg, 1078 void TargetARM32::legalizeMov(InstARM32Mov *MovInstr, Variable *OrigBaseReg,
1063 Variable **NewBaseReg, int32_t *NewBaseOffset) { 1079 Variable **NewBaseReg, int32_t *NewBaseOffset) {
1064 Variable *Dest = MovInstr->getDest(); 1080 Variable *Dest = MovInstr->getDest();
1065 assert(Dest != nullptr); 1081 assert(Dest != nullptr);
1066 Type DestTy = Dest->getType(); 1082 Type DestTy = Dest->getType();
1067 assert(DestTy != IceType_i64); 1083 assert(DestTy != IceType_i64);
1068 1084
1069 Operand *Src = MovInstr->getSrc(0); 1085 Operand *Src = MovInstr->getSrc(0);
1070 Type SrcTy = Src->getType(); 1086 Type SrcTy = Src->getType();
1071 (void)SrcTy; 1087 (void)SrcTy;
1072 assert(SrcTy != IceType_i64); 1088 assert(SrcTy != IceType_i64);
1073 1089
1074 if (MovInstr->isMultiDest() || MovInstr->isMultiSource()) 1090 if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1075 return; 1091 return;
1076 1092
1077 bool Legalized = false; 1093 bool Legalized = false;
1078 if (!Dest->hasReg()) { 1094 if (!Dest->hasReg()) {
1079 auto *const SrcR = llvm::cast<Variable>(Src); 1095 auto *SrcR = llvm::cast<Variable>(Src);
1080 assert(SrcR->hasReg()); 1096 assert(SrcR->hasReg());
1097 assert(!SrcR->isRematerializable());
1081 const int32_t Offset = Dest->getStackOffset(); 1098 const int32_t Offset = Dest->getStackOffset();
1082 // This is a _mov(Mem(), Variable), i.e., a store. 1099 // This is a _mov(Mem(), Variable), i.e., a store.
1083 _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, 1100 _str(SrcR, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
1084 NewBaseOffset), 1101 NewBaseOffset),
1085 MovInstr->getPredicate()); 1102 MovInstr->getPredicate());
1086 // _str() does not have a Dest, so we add a fake-def(Dest). 1103 // _str() does not have a Dest, so we add a fake-def(Dest).
1087 Context.insert(InstFakeDef::create(Func, Dest)); 1104 Context.insert(InstFakeDef::create(Func, Dest));
1088 Legalized = true; 1105 Legalized = true;
1089 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) { 1106 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1090 if (!Var->hasReg()) { 1107 if (Var->isRematerializable()) {
1091 const int32_t Offset = Var->getStackOffset(); 1108 // Rematerialization arithmetic.
1092 _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg, 1109 const int32_t ExtraOffset =
1093 NewBaseOffset), 1110 (static_cast<SizeT>(Var->getRegNum()) == getFrameReg())
1094 MovInstr->getPredicate()); 1111 ? getFrameFixedAllocaOffset()
1112 : 0;
1113
1114 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1115 Operand *OffsetRF = legalize(Ctx->getConstantInt32(Offset),
1116 Legal_Reg | Legal_Flex, Dest->getRegNum());
1117 _add(Dest, Var, OffsetRF);
1095 Legalized = true; 1118 Legalized = true;
1119 } else {
1120 if (!Var->hasReg()) {
1121 const int32_t Offset = Var->getStackOffset();
1122 _ldr(Dest, createMemOperand(DestTy, Offset, OrigBaseReg, NewBaseReg,
1123 NewBaseOffset),
1124 MovInstr->getPredicate());
1125 Legalized = true;
1126 }
1096 } 1127 }
1097 } 1128 }
1098 1129
1099 if (Legalized) { 1130 if (Legalized) {
1100 if (MovInstr->isDestRedefined()) { 1131 if (MovInstr->isDestRedefined()) {
1101 _set_dest_redefined(); 1132 _set_dest_redefined();
1102 } 1133 }
1103 MovInstr->setDeleted(); 1134 MovInstr->setDeleted();
1104 } 1135 }
1105 } 1136 }
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
1156 return Operand; 1187 return Operand;
1157 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand)) 1188 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
1158 return Var64On32->getLo(); 1189 return Var64On32->getLo();
1159 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) 1190 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
1160 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue())); 1191 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
1161 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { 1192 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1162 // Conservatively disallow memory operands with side-effects (pre/post 1193 // Conservatively disallow memory operands with side-effects (pre/post
1163 // increment) in case of duplication. 1194 // increment) in case of duplication.
1164 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || 1195 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1165 Mem->getAddrMode() == OperandARM32Mem::NegOffset); 1196 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1197 Variable *BaseR = legalizeToReg(Mem->getBase());
1166 if (Mem->isRegReg()) { 1198 if (Mem->isRegReg()) {
1167 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), 1199 Variable *IndexR = legalizeToReg(Mem->getIndex());
1168 Mem->getIndex(), Mem->getShiftOp(), 1200 return OperandARM32Mem::create(Func, IceType_i32, BaseR, IndexR,
1169 Mem->getShiftAmt(), Mem->getAddrMode()); 1201 Mem->getShiftOp(), Mem->getShiftAmt(),
1202 Mem->getAddrMode());
1170 } else { 1203 } else {
1171 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), 1204 return OperandARM32Mem::create(Func, IceType_i32, BaseR, Mem->getOffset(),
1172 Mem->getOffset(), Mem->getAddrMode()); 1205 Mem->getAddrMode());
1173 } 1206 }
1174 } 1207 }
1175 llvm_unreachable("Unsupported operand type"); 1208 llvm_unreachable("Unsupported operand type");
1176 return nullptr; 1209 return nullptr;
1177 } 1210 }
1178 1211
1179 Operand *TargetARM32::hiOperand(Operand *Operand) { 1212 Operand *TargetARM32::hiOperand(Operand *Operand) {
1180 assert(Operand->getType() == IceType_i64); 1213 assert(Operand->getType() == IceType_i64);
1181 if (Operand->getType() != IceType_i64) 1214 if (Operand->getType() != IceType_i64)
1182 return Operand; 1215 return Operand;
(...skipping 11 matching lines...) Expand all
1194 const Type SplitType = IceType_i32; 1227 const Type SplitType = IceType_i32;
1195 if (Mem->isRegReg()) { 1228 if (Mem->isRegReg()) {
1196 // We have to make a temp variable T, and add 4 to either Base or Index. 1229 // We have to make a temp variable T, and add 4 to either Base or Index.
1197 // The Index may be shifted, so adding 4 can mean something else. Thus, 1230 // The Index may be shifted, so adding 4 can mean something else. Thus,
1198 // prefer T := Base + 4, and use T as the new Base. 1231 // prefer T := Base + 4, and use T as the new Base.
1199 Variable *Base = Mem->getBase(); 1232 Variable *Base = Mem->getBase();
1200 Constant *Four = Ctx->getConstantInt32(4); 1233 Constant *Four = Ctx->getConstantInt32(4);
1201 Variable *NewBase = Func->makeVariable(Base->getType()); 1234 Variable *NewBase = Func->makeVariable(Base->getType());
1202 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, 1235 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1203 Base, Four)); 1236 Base, Four));
1204 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), 1237 Variable *BaseR = legalizeToReg(NewBase);
1238 Variable *IndexR = legalizeToReg(Mem->getIndex());
1239 return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
1205 Mem->getShiftOp(), Mem->getShiftAmt(), 1240 Mem->getShiftOp(), Mem->getShiftAmt(),
1206 Mem->getAddrMode()); 1241 Mem->getAddrMode());
1207 } else { 1242 } else {
1208 Variable *Base = Mem->getBase(); 1243 Variable *Base = Mem->getBase();
1209 ConstantInteger32 *Offset = Mem->getOffset(); 1244 ConstantInteger32 *Offset = Mem->getOffset();
1210 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); 1245 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1211 int32_t NextOffsetVal = Offset->getValue() + 4; 1246 int32_t NextOffsetVal = Offset->getValue() + 4;
1212 constexpr bool ZeroExt = false; 1247 constexpr bool ZeroExt = false;
1213 if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) { 1248 if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
1214 // We have to make a temp variable and add 4 to either Base or Offset. 1249 // We have to make a temp variable and add 4 to either Base or Offset.
1215 // If we add 4 to Offset, this will convert a non-RegReg addressing 1250 // If we add 4 to Offset, this will convert a non-RegReg addressing
1216 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows 1251 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
1217 // RegReg addressing modes, prefer adding to base and replacing 1252 // RegReg addressing modes, prefer adding to base and replacing
1218 // instead. Thus we leave the old offset alone. 1253 // instead. Thus we leave the old offset alone.
1219 Constant *Four = Ctx->getConstantInt32(4); 1254 Constant *_4 = Ctx->getConstantInt32(4);
1220 Variable *NewBase = Func->makeVariable(Base->getType()); 1255 Variable *NewBase = Func->makeVariable(Base->getType());
1221 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, 1256 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1222 NewBase, Base, Four)); 1257 NewBase, Base, _4));
1223 Base = NewBase; 1258 Base = NewBase;
1224 } else { 1259 } else {
1225 Offset = 1260 Offset =
1226 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); 1261 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1227 } 1262 }
1228 return OperandARM32Mem::create(Func, SplitType, Base, Offset, 1263 Variable *BaseR = legalizeToReg(Base);
1264 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
1229 Mem->getAddrMode()); 1265 Mem->getAddrMode());
1230 } 1266 }
1231 } 1267 }
1232 llvm_unreachable("Unsupported operand type"); 1268 llvm_unreachable("Unsupported operand type");
1233 return nullptr; 1269 return nullptr;
1234 } 1270 }
1235 1271
1236 llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include, 1272 llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
1237 RegSetMask Exclude) const { 1273 RegSetMask Exclude) const {
1238 llvm::SmallBitVector Registers(RegARM32::Reg_NUM); 1274 llvm::SmallBitVector Registers(RegARM32::Reg_NUM);
(...skipping 18 matching lines...) Expand all
1257 Registers[RegARM32::val] = false; 1293 Registers[RegARM32::val] = false;
1258 1294
1259 REGARM32_TABLE 1295 REGARM32_TABLE
1260 1296
1261 #undef X 1297 #undef X
1262 1298
1263 return Registers; 1299 return Registers;
1264 } 1300 }
1265 1301
1266 void TargetARM32::lowerAlloca(const InstAlloca *Inst) { 1302 void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
1267 UsesFramePointer = true;
1268 // Conservatively require the stack to be aligned. Some stack adjustment 1303 // Conservatively require the stack to be aligned. Some stack adjustment
1269 // operations implemented below assume that the stack is aligned before the 1304 // operations implemented below assume that the stack is aligned before the
1270 // alloca. All the alloca code ensures that the stack alignment is preserved 1305 // alloca. All the alloca code ensures that the stack alignment is preserved
1271 // after the alloca. The stack alignment restriction can be relaxed in some 1306 // after the alloca. The stack alignment restriction can be relaxed in some
1272 // cases. 1307 // cases.
1273 NeedsStackAlignment = true; 1308 NeedsStackAlignment = true;
1274 1309
1275 // TODO(stichnot): minimize the number of adjustments of SP, etc.
1276 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1277 Variable *Dest = Inst->getDest();
1278 uint32_t AlignmentParam = Inst->getAlignInBytes();
1279 // For default align=0, set it to the real value 1, to avoid any 1310 // For default align=0, set it to the real value 1, to avoid any
1280 // bit-manipulation problems below. 1311 // bit-manipulation problems below.
1281 AlignmentParam = std::max(AlignmentParam, 1u); 1312 const uint32_t AlignmentParam = std::max(1u, Inst->getAlignInBytes());
1282 1313
1283 // LLVM enforces power of 2 alignment. 1314 // LLVM enforces power of 2 alignment.
1284 assert(llvm::isPowerOf2_32(AlignmentParam)); 1315 assert(llvm::isPowerOf2_32(AlignmentParam));
1285 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); 1316 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1286 1317
1287 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); 1318 const uint32_t Alignment =
1288 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { 1319 std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1320 const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
1321 const bool OptM1 = Ctx->getFlags().getOptLevel() == Opt_m1;
1322 const bool AllocaWithKnownOffset = Inst->getKnownFrameOffset();
1323 const bool UseFramePointer =
1324 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
1325
1326 if (UseFramePointer)
1327 setHasFramePointer();
1328
1329 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1330 if (OverAligned) {
1289 alignRegisterPow2(SP, Alignment); 1331 alignRegisterPow2(SP, Alignment);
1290 } 1332 }
1333
1334 Variable *Dest = Inst->getDest();
1291 Operand *TotalSize = Inst->getSizeInBytes(); 1335 Operand *TotalSize = Inst->getSizeInBytes();
1336
1292 if (const auto *ConstantTotalSize = 1337 if (const auto *ConstantTotalSize =
1293 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1338 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1294 uint32_t Value = ConstantTotalSize->getValue(); 1339 const uint32_t Value =
1295 Value = Utils::applyAlignment(Value, Alignment); 1340 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
1296 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); 1341 // Constant size alloca.
1297 _sub(SP, SP, SubAmount); 1342 if (!UseFramePointer) {
1343 // If we don't need a Frame Pointer, this alloca has a known offset to the
1344 // stack pointer. We don't need adjust the stack pointer, nor assign any
1345 // value to Dest, as Dest is rematerializable.
1346 assert(Dest->isRematerializable());
1347 FixedAllocaSizeBytes += Value;
1348 Context.insert(InstFakeDef::create(Func, Dest));
1349 return;
1350 }
1351
1352 // If a frame pointer is required, then we need to store the alloca'd result
1353 // in Dest.
1354 Operand *SubAmountRF =
1355 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
1356 _sub(SP, SP, SubAmountRF);
1298 } else { 1357 } else {
1299 // Non-constant sizes need to be adjusted to the next highest multiple of 1358 // Non-constant sizes need to be adjusted to the next highest multiple of
1300 // the required alignment at runtime. 1359 // the required alignment at runtime.
1301 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); 1360 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
1302 Variable *T = makeReg(IceType_i32); 1361 Variable *T = makeReg(IceType_i32);
1303 _mov(T, TotalSize); 1362 _mov(T, TotalSize);
1304 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); 1363 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1305 _add(T, T, AddAmount); 1364 _add(T, T, AddAmount);
1306 alignRegisterPow2(T, Alignment); 1365 alignRegisterPow2(T, Alignment);
1307 _sub(SP, SP, T); 1366 _sub(SP, SP, T);
1308 } 1367 }
1368
1369 // Adds back a few bytes to SP to account for the out args area.
1309 Variable *T = SP; 1370 Variable *T = SP;
1310 if (MaxOutArgsSizeBytes != 0) { 1371 if (MaxOutArgsSizeBytes != 0) {
1311 T = makeReg(getPointerType()); 1372 T = makeReg(getPointerType());
1312 Operand *OutArgsSizeRF = legalize( 1373 Operand *OutArgsSizeRF = legalize(
1313 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex); 1374 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
1314 _add(T, SP, OutArgsSizeRF); 1375 _add(T, SP, OutArgsSizeRF);
1315 } 1376 }
1377
1316 _mov(Dest, T); 1378 _mov(Dest, T);
1317 } 1379 }
1318 1380
1319 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) { 1381 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
1320 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi)) 1382 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
1321 return; 1383 return;
1322 Variable *SrcLoReg = legalizeToReg(SrcLo); 1384 Variable *SrcLoReg = legalizeToReg(SrcLo);
1323 switch (Ty) { 1385 switch (Ty) {
1324 default: 1386 default:
1325 llvm::report_fatal_error("Unexpected type"); 1387 llvm::report_fatal_error("Unexpected type");
(...skipping 643 matching lines...) Expand 10 before | Expand all | Expand 10 after
1969 case InstArithmetic::Urem: 2031 case InstArithmetic::Urem:
1970 case InstArithmetic::Srem: 2032 case InstArithmetic::Srem:
1971 llvm::report_fatal_error("Call-helper-involved instruction for i64 type " 2033 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
1972 "should have already been handled before"); 2034 "should have already been handled before");
1973 return; 2035 return;
1974 } 2036 }
1975 } 2037 }
1976 2038
1977 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 2039 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
1978 Variable *Dest = Inst->getDest(); 2040 Variable *Dest = Inst->getDest();
2041
2042 if (Dest->isRematerializable()) {
2043 Context.insert(InstFakeDef::create(Func, Dest));
2044 return;
2045 }
2046
1979 if (Dest->getType() == IceType_i1) { 2047 if (Dest->getType() == IceType_i1) {
1980 lowerInt1Arithmetic(Inst); 2048 lowerInt1Arithmetic(Inst);
1981 return; 2049 return;
1982 } 2050 }
1983 2051
1984 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 2052 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1985 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 2053 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
1986 if (Dest->getType() == IceType_i64) { 2054 if (Dest->getType() == IceType_i64) {
1987 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1); 2055 lowerInt64Arithmetic(Inst->getOp(), Inst->getDest(), Src0, Src1);
1988 return; 2056 return;
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
2132 Variable *Src0R = Srcs.src0R(this); 2200 Variable *Src0R = Srcs.src0R(this);
2133 Operand *Src1RF = Srcs.src1RF(this); 2201 Operand *Src1RF = Srcs.src1RF(this);
2134 _eor(T, Src0R, Src1RF); 2202 _eor(T, Src0R, Src1RF);
2135 _mov(Dest, T); 2203 _mov(Dest, T);
2136 return; 2204 return;
2137 } 2205 }
2138 case InstArithmetic::Sub: { 2206 case InstArithmetic::Sub: {
2139 if (Srcs.hasConstOperand()) { 2207 if (Srcs.hasConstOperand()) {
2140 // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed 2208 // TODO(jpp): lowering Src0R here is wrong -- Src0R it is not guaranteed
2141 // to be used. 2209 // to be used.
2142 Variable *Src0R = Srcs.src0R(this);
2143 if (Srcs.immediateIsFlexEncodable()) { 2210 if (Srcs.immediateIsFlexEncodable()) {
2211 Variable *Src0R = Srcs.src0R(this);
2144 Operand *Src1RF = Srcs.src1RF(this); 2212 Operand *Src1RF = Srcs.src1RF(this);
2145 if (Srcs.swappedOperands()) { 2213 if (Srcs.swappedOperands()) {
2146 _rsb(T, Src0R, Src1RF); 2214 _rsb(T, Src0R, Src1RF);
2147 } else { 2215 } else {
2148 _sub(T, Src0R, Src1RF); 2216 _sub(T, Src0R, Src1RF);
2149 } 2217 }
2150 _mov(Dest, T); 2218 _mov(Dest, T);
2151 return; 2219 return;
2152 } 2220 }
2153 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) { 2221 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
2222 Variable *Src0R = Srcs.src0R(this);
2154 Operand *Src1F = Srcs.negatedSrc1F(this); 2223 Operand *Src1F = Srcs.negatedSrc1F(this);
2155 _add(T, Src0R, Src1F); 2224 _add(T, Src0R, Src1F);
2156 _mov(Dest, T); 2225 _mov(Dest, T);
2157 return; 2226 return;
2158 } 2227 }
2159 } 2228 }
2160 Variable *Src0R = Srcs.unswappedSrc0R(this); 2229 Variable *Src0R = Srcs.unswappedSrc0R(this);
2161 Variable *Src1R = Srcs.unswappedSrc1R(this); 2230 Variable *Src1R = Srcs.unswappedSrc1R(this);
2162 _sub(T, Src0R, Src1R); 2231 _sub(T, Src0R, Src1R);
2163 _mov(Dest, T); 2232 _mov(Dest, T);
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
2208 case InstArithmetic::Fdiv: 2277 case InstArithmetic::Fdiv:
2209 case InstArithmetic::Frem: 2278 case InstArithmetic::Frem:
2210 llvm::report_fatal_error( 2279 llvm::report_fatal_error(
2211 "Floating point arith should have been handled earlier."); 2280 "Floating point arith should have been handled earlier.");
2212 return; 2281 return;
2213 } 2282 }
2214 } 2283 }
2215 2284
2216 void TargetARM32::lowerAssign(const InstAssign *Inst) { 2285 void TargetARM32::lowerAssign(const InstAssign *Inst) {
2217 Variable *Dest = Inst->getDest(); 2286 Variable *Dest = Inst->getDest();
2287
2288 if (Dest->isRematerializable()) {
2289 Context.insert(InstFakeDef::create(Func, Dest));
2290 return;
2291 }
2292
2218 Operand *Src0 = Inst->getSrc(0); 2293 Operand *Src0 = Inst->getSrc(0);
2219 assert(Dest->getType() == Src0->getType()); 2294 assert(Dest->getType() == Src0->getType());
2220 if (Dest->getType() == IceType_i64) { 2295 if (Dest->getType() == IceType_i64) {
2221 Src0 = legalizeUndef(Src0); 2296 Src0 = legalizeUndef(Src0);
2222 2297
2223 Variable *T_Lo = makeReg(IceType_i32); 2298 Variable *T_Lo = makeReg(IceType_i32);
2224 auto *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2299 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2225 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 2300 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2226 _mov(T_Lo, Src0Lo); 2301 _mov(T_Lo, Src0Lo);
2227 _mov(DestLo, T_Lo); 2302 _mov(DestLo, T_Lo);
(...skipping 2190 matching lines...) Expand 10 before | Expand all | Expand 10 after
4418 OffsetImm = 0; 4493 OffsetImm = 0;
4419 } 4494 }
4420 } 4495 }
4421 4496
4422 assert(BaseVar != nullptr); 4497 assert(BaseVar != nullptr);
4423 assert(OffsetImm == 0 || OffsetReg == nullptr); 4498 assert(OffsetImm == 0 || OffsetReg == nullptr);
4424 assert(OffsetReg == nullptr || CanHaveIndex); 4499 assert(OffsetReg == nullptr || CanHaveIndex);
4425 assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm 4500 assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
4426 : (ValidImmMask & OffsetImm) == OffsetImm); 4501 : (ValidImmMask & OffsetImm) == OffsetImm);
4427 4502
4503 Variable *BaseR = makeReg(getPointerType());
4504 Context.insert(InstAssign::create(Func, BaseR, BaseVar));
4428 if (OffsetReg != nullptr) { 4505 if (OffsetReg != nullptr) {
4429 return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetReg, ShiftKind, 4506 Variable *OffsetR = makeReg(getPointerType());
4507 Context.insert(InstAssign::create(Func, OffsetR, OffsetReg));
4508 return OperandARM32Mem::create(Func, Ty, BaseR, OffsetR, ShiftKind,
4430 OffsetRegShamt); 4509 OffsetRegShamt);
4431 } 4510 }
4432 4511
4433 return OperandARM32Mem::create( 4512 return OperandARM32Mem::create(
4434 Func, Ty, BaseVar, 4513 Func, Ty, BaseR,
4435 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm))); 4514 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
4436 } 4515 }
4437 4516
4438 void TargetARM32::doAddressOptLoad() { 4517 void TargetARM32::doAddressOptLoad() {
4439 Inst *Instr = Context.getCur(); 4518 Inst *Instr = Context.getCur();
4440 assert(llvm::isa<InstLoad>(Instr)); 4519 assert(llvm::isa<InstLoad>(Instr));
4441 Variable *Dest = Instr->getDest(); 4520 Variable *Dest = Instr->getDest();
4442 Operand *Addr = Instr->getSrc(0); 4521 Operand *Addr = Instr->getSrc(0);
4443 if (OperandARM32Mem *Mem = 4522 if (OperandARM32Mem *Mem =
4444 formAddressingMode(Dest->getType(), Func, Instr, Addr)) { 4523 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
(...skipping 178 matching lines...) Expand 10 before | Expand all | Expand 10 after
4623 Type Ty = From->getType(); 4702 Type Ty = From->getType();
4624 // Assert that a physical register is allowed. To date, all calls to 4703 // Assert that a physical register is allowed. To date, all calls to
4625 // legalize() allow a physical register. Legal_Flex converts registers to the 4704 // legalize() allow a physical register. Legal_Flex converts registers to the
4626 // right type OperandARM32FlexReg as needed. 4705 // right type OperandARM32FlexReg as needed.
4627 assert(Allowed & Legal_Reg); 4706 assert(Allowed & Legal_Reg);
4628 4707
4629 // Copied ipsis literis from TargetX86Base<Machine>. 4708 // Copied ipsis literis from TargetX86Base<Machine>.
4630 if (RegNum == Variable::NoRegister) { 4709 if (RegNum == Variable::NoRegister) {
4631 if (Variable *Subst = getContext().availabilityGet(From)) { 4710 if (Variable *Subst = getContext().availabilityGet(From)) {
4632 // At this point we know there is a potential substitution available. 4711 // At this point we know there is a potential substitution available.
4633 if (Subst->mustHaveReg() && !Subst->hasReg()) { 4712 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
4713 !Subst->hasReg()) {
4634 // At this point we know the substitution will have a register. 4714 // At this point we know the substitution will have a register.
4635 if (From->getType() == Subst->getType()) { 4715 if (From->getType() == Subst->getType()) {
4636 // At this point we know the substitution's register is compatible. 4716 // At this point we know the substitution's register is compatible.
4637 return Subst; 4717 return Subst;
4638 } 4718 }
4639 } 4719 }
4640 } 4720 }
4641 } 4721 }
4642 4722
4643 // Go through the various types of operands: OperandARM32Mem, 4723 // Go through the various types of operands: OperandARM32Mem,
(...skipping 137 matching lines...) Expand 10 before | Expand all | Expand 10 after
4781 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 4861 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
4782 Variable *BaseReg = makeReg(getPointerType()); 4862 Variable *BaseReg = makeReg(getPointerType());
4783 _movw(BaseReg, Offset); 4863 _movw(BaseReg, Offset);
4784 _movt(BaseReg, Offset); 4864 _movt(BaseReg, Offset);
4785 From = formMemoryOperand(BaseReg, Ty); 4865 From = formMemoryOperand(BaseReg, Ty);
4786 return copyToReg(From, RegNum); 4866 return copyToReg(From, RegNum);
4787 } 4867 }
4788 } 4868 }
4789 4869
4790 if (auto *Var = llvm::dyn_cast<Variable>(From)) { 4870 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
4871 if (Var->isRematerializable()) {
4872 // TODO(jpp): We don't need to rematerialize Var if legalize() was invoked
4873 // for a Variable in a Mem operand.
4874 Variable *T = makeReg(Var->getType(), RegNum);
4875 _mov(T, Var);
4876 return T;
4877 }
4791 // Check if the variable is guaranteed a physical register. This can happen 4878 // Check if the variable is guaranteed a physical register. This can happen
4792 // either when the variable is pre-colored or when it is assigned infinite 4879 // either when the variable is pre-colored or when it is assigned infinite
4793 // weight. 4880 // weight.
4794 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 4881 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
4795 // We need a new physical register for the operand if: 4882 // We need a new physical register for the operand if:
4796 // Mem is not allowed and Var isn't guaranteed a physical 4883 // Mem is not allowed and Var isn't guaranteed a physical
4797 // register, or 4884 // register, or
4798 // RegNum is required and Var->getRegNum() doesn't match. 4885 // RegNum is required and Var->getRegNum() doesn't match.
4799 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 4886 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
4800 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 4887 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
4837 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); 4924 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
4838 // It may be the case that address mode optimization already creates an 4925 // It may be the case that address mode optimization already creates an
4839 // OperandARM32Mem, so in that case it wouldn't need another level of 4926 // OperandARM32Mem, so in that case it wouldn't need another level of
4840 // transformation. 4927 // transformation.
4841 if (Mem) { 4928 if (Mem) {
4842 return llvm::cast<OperandARM32Mem>(legalize(Mem)); 4929 return llvm::cast<OperandARM32Mem>(legalize(Mem));
4843 } 4930 }
4844 // If we didn't do address mode optimization, then we only have a 4931 // If we didn't do address mode optimization, then we only have a
4845 // base/offset to work with. ARM always requires a base register, so 4932 // base/offset to work with. ARM always requires a base register, so
4846 // just use that to hold the operand. 4933 // just use that to hold the operand.
4847 Variable *Base = legalizeToReg(Operand); 4934 Variable *BaseR = legalizeToReg(Operand);
4848 return OperandARM32Mem::create( 4935 return OperandARM32Mem::create(
4849 Func, Ty, Base, 4936 Func, Ty, BaseR,
4850 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 4937 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
4851 } 4938 }
4852 4939
4853 Variable64On32 *TargetARM32::makeI64RegPair() { 4940 Variable64On32 *TargetARM32::makeI64RegPair() {
4854 Variable64On32 *Reg = 4941 Variable64On32 *Reg =
4855 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64)); 4942 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4856 Reg->setMustHaveReg(); 4943 Reg->setMustHaveReg();
4857 Reg->initHiLo(Func); 4944 Reg->initHiLo(Func);
4858 Reg->getLo()->setMustNotHaveReg(); 4945 Reg->getLo()->setMustNotHaveReg();
4859 Reg->getHi()->setMustNotHaveReg(); 4946 Reg->getHi()->setMustNotHaveReg();
4860 return Reg; 4947 return Reg;
4861 } 4948 }
4862 4949
4863 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { 4950 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
4864 // There aren't any 64-bit integer registers for ARM32. 4951 // There aren't any 64-bit integer registers for ARM32.
4865 assert(Type != IceType_i64); 4952 assert(Type != IceType_i64);
4953 assert(AllowTemporaryWithNoReg || RegNum != Variable::NoRegister);
4866 Variable *Reg = Func->makeVariable(Type); 4954 Variable *Reg = Func->makeVariable(Type);
4867 if (RegNum == Variable::NoRegister) 4955 if (RegNum == Variable::NoRegister)
4868 Reg->setMustHaveReg(); 4956 Reg->setMustHaveReg();
4869 else 4957 else
4870 Reg->setRegNum(RegNum); 4958 Reg->setRegNum(RegNum);
4871 return Reg; 4959 return Reg;
4872 } 4960 }
4873 4961
4874 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { 4962 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
4963 int32_t TmpRegNum) {
4875 assert(llvm::isPowerOf2_32(Align)); 4964 assert(llvm::isPowerOf2_32(Align));
4876 uint32_t RotateAmt; 4965 uint32_t RotateAmt;
4877 uint32_t Immed_8; 4966 uint32_t Immed_8;
4878 Operand *Mask; 4967 Operand *Mask;
4879 // Use AND or BIC to mask off the bits, depending on which immediate fits (if 4968 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
4880 // it fits at all). Assume Align is usually small, in which case BIC works 4969 // it fits at all). Assume Align is usually small, in which case BIC works
4881 // better. Thus, this rounds down to the alignment. 4970 // better. Thus, this rounds down to the alignment.
4882 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { 4971 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
4883 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); 4972 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
4973 TmpRegNum);
4884 _bic(Reg, Reg, Mask); 4974 _bic(Reg, Reg, Mask);
4885 } else { 4975 } else {
4886 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); 4976 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
4977 TmpRegNum);
4887 _and(Reg, Reg, Mask); 4978 _and(Reg, Reg, Mask);
4888 } 4979 }
4889 } 4980 }
4890 4981
4891 void TargetARM32::postLower() { 4982 void TargetARM32::postLower() {
4892 if (Ctx->getFlags().getOptLevel() == Opt_m1) 4983 if (Ctx->getFlags().getOptLevel() == Opt_m1)
4893 return; 4984 return;
4894 markRedefinitions(); 4985 markRedefinitions();
4895 Context.availabilityUpdate(); 4986 Context.availabilityUpdate();
4896 } 4987 }
(...skipping 515 matching lines...) Expand 10 before | Expand all | Expand 10 after
5412 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 5503 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
5413 // However, for compatibility with current NaCl LLVM, don't claim that. 5504 // However, for compatibility with current NaCl LLVM, don't claim that.
5414 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 5505 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
5415 } 5506 }
5416 5507
5417 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM]; 5508 llvm::SmallBitVector TargetARM32::TypeToRegisterSet[IceType_NUM];
5418 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM]; 5509 llvm::SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
5419 llvm::SmallBitVector TargetARM32::ScratchRegs; 5510 llvm::SmallBitVector TargetARM32::ScratchRegs;
5420 5511
5421 } // end of namespace Ice 5512 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | tests_lit/assembler/arm32/bic.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698