Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(112)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 29 matching lines...) Expand all
40 do { \ 40 do { \
41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ 41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \
42 /* Use llvm_unreachable instead of report_fatal_error, which gives \ 42 /* Use llvm_unreachable instead of report_fatal_error, which gives \
43 better stack traces. */ \ 43 better stack traces. */ \
44 llvm_unreachable("Not yet implemented"); \ 44 llvm_unreachable("Not yet implemented"); \
45 abort(); \ 45 abort(); \
46 } \ 46 } \
47 } while (0) 47 } while (0)
48 48
49 // The following table summarizes the logic for lowering the icmp instruction 49 // The following table summarizes the logic for lowering the icmp instruction
50 // for i32 and narrower types. Each icmp condition has a clear mapping to an 50 // for i32 and narrower types. Each icmp condition has a clear mapping to an
51 // ARM32 conditional move instruction. 51 // ARM32 conditional move instruction.
52 52
53 const struct TableIcmp32_ { 53 const struct TableIcmp32_ {
54 CondARM32::Cond Mapping; 54 CondARM32::Cond Mapping;
55 } TableIcmp32[] = { 55 } TableIcmp32[] = {
56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ 56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
57 { CondARM32::C_32 } \ 57 { CondARM32::C_32 } \
58 , 58 ,
59 ICMPARM32_TABLE 59 ICMPARM32_TABLE
60 #undef X 60 #undef X
61 }; 61 };
62 62
63 // The following table summarizes the logic for lowering the icmp instruction 63 // The following table summarizes the logic for lowering the icmp instruction
64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0. 64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
65 // The operands may need to be swapped, and there is a slight difference 65 // The operands may need to be swapped, and there is a slight difference for
66 // for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). 66 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
67 const struct TableIcmp64_ { 67 const struct TableIcmp64_ {
68 bool IsSigned; 68 bool IsSigned;
69 bool Swapped; 69 bool Swapped;
70 CondARM32::Cond C1, C2; 70 CondARM32::Cond C1, C2;
71 } TableIcmp64[] = { 71 } TableIcmp64[] = {
72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ 72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ 73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
74 , 74 ,
75 ICMPARM32_TABLE 75 ICMPARM32_TABLE
76 #undef X 76 #undef X
77 }; 77 };
78 78
79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { 79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
80 size_t Index = static_cast<size_t>(Cond); 80 size_t Index = static_cast<size_t>(Cond);
81 assert(Index < llvm::array_lengthof(TableIcmp32)); 81 assert(Index < llvm::array_lengthof(TableIcmp32));
82 return TableIcmp32[Index].Mapping; 82 return TableIcmp32[Index].Mapping;
83 } 83 }
84 84
85 // In some cases, there are x-macros tables for both high-level and 85 // In some cases, there are x-macros tables for both high-level and low-level
86 // low-level instructions/operands that use the same enum key value. 86 // instructions/operands that use the same enum key value. The tables are kept
87 // The tables are kept separate to maintain a proper separation 87 // separate to maintain a proper separation between abstraction layers. There
88 // between abstraction layers. There is a risk that the tables could 88 // is a risk that the tables could get out of sync if enum values are reordered
89 // get out of sync if enum values are reordered or if entries are 89 // or if entries are added or deleted. The following dummy namespaces use
90 // added or deleted. The following dummy namespaces use
91 // static_asserts to ensure everything is kept in sync. 90 // static_asserts to ensure everything is kept in sync.
92 91
93 // Validate the enum values in ICMPARM32_TABLE. 92 // Validate the enum values in ICMPARM32_TABLE.
94 namespace dummy1 { 93 namespace dummy1 {
95 // Define a temporary set of enum values based on low-level table 94 // Define a temporary set of enum values based on low-level table entries.
96 // entries.
97 enum _tmp_enum { 95 enum _tmp_enum {
98 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, 96 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
99 ICMPARM32_TABLE 97 ICMPARM32_TABLE
100 #undef X 98 #undef X
101 _num 99 _num
102 }; 100 };
103 // Define a set of constants based on high-level table entries. 101 // Define a set of constants based on high-level table entries.
104 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; 102 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
105 ICEINSTICMP_TABLE 103 ICEINSTICMP_TABLE
106 #undef X 104 #undef X
107 // Define a set of constants based on low-level table entries, and 105 // Define a set of constants based on low-level table entries, and ensure the
108 // ensure the table entry keys are consistent. 106 // table entry keys are consistent.
109 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \ 107 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \
110 static const int _table2_##val = _tmp_##val; \ 108 static const int _table2_##val = _tmp_##val; \
111 static_assert( \ 109 static_assert( \
112 _table1_##val == _table2_##val, \ 110 _table1_##val == _table2_##val, \
113 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); 111 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
114 ICMPARM32_TABLE 112 ICMPARM32_TABLE
115 #undef X 113 #undef X
116 // Repeat the static asserts with respect to the high-level table 114 // Repeat the static asserts with respect to the high-level table entries in
117 // entries in case the high-level table has extra entries. 115 // case the high-level table has extra entries.
118 #define X(tag, str) \ 116 #define X(tag, str) \
119 static_assert( \ 117 static_assert( \
120 _table1_##tag == _table2_##tag, \ 118 _table1_##tag == _table2_##tag, \
121 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); 119 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
122 ICEINSTICMP_TABLE 120 ICEINSTICMP_TABLE
123 #undef X 121 #undef X
124 } // end of namespace dummy1 122 } // end of namespace dummy1
125 123
126 // Stack alignment 124 // Stack alignment
127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; 125 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
128 126
129 // Value is in bytes. Return Value adjusted to the next highest multiple 127 // Value is in bytes. Return Value adjusted to the next highest multiple of the
130 // of the stack alignment. 128 // stack alignment.
131 uint32_t applyStackAlignment(uint32_t Value) { 129 uint32_t applyStackAlignment(uint32_t Value) {
132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); 130 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
133 } 131 }
134 132
135 // Value is in bytes. Return Value adjusted to the next highest multiple 133 // Value is in bytes. Return Value adjusted to the next highest multiple of the
136 // of the stack alignment required for the given type. 134 // stack alignment required for the given type.
137 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { 135 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
138 // Use natural alignment, except that normally (non-NaCl) ARM only 136 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
139 // aligns vectors to 8 bytes. 137 // vectors to 8 bytes.
140 // TODO(jvoung): Check this ... 138 // TODO(jvoung): Check this ...
141 size_t typeAlignInBytes = typeWidthInBytes(Ty); 139 size_t typeAlignInBytes = typeWidthInBytes(Ty);
142 if (isVectorType(Ty)) 140 if (isVectorType(Ty))
143 typeAlignInBytes = 8; 141 typeAlignInBytes = 8;
144 return Utils::applyAlignment(Value, typeAlignInBytes); 142 return Utils::applyAlignment(Value, typeAlignInBytes);
145 } 143 }
146 144
147 // Conservatively check if at compile time we know that the operand is 145 // Conservatively check if at compile time we know that the operand is
148 // definitely a non-zero integer. 146 // definitely a non-zero integer.
149 bool isGuaranteedNonzeroInt(const Operand *Op) { 147 bool isGuaranteedNonzeroInt(const Operand *Op) {
(...skipping 15 matching lines...) Expand all
165 TargetInstructionSet::BaseInstructionSet) { 163 TargetInstructionSet::BaseInstructionSet) {
166 InstructionSet = static_cast<ARM32InstructionSet>( 164 InstructionSet = static_cast<ARM32InstructionSet>(
167 (Flags.getTargetInstructionSet() - 165 (Flags.getTargetInstructionSet() -
168 TargetInstructionSet::ARM32InstructionSet_Begin) + 166 TargetInstructionSet::ARM32InstructionSet_Begin) +
169 ARM32InstructionSet::Begin); 167 ARM32InstructionSet::Begin);
170 } 168 }
171 } 169 }
172 170
173 TargetARM32::TargetARM32(Cfg *Func) 171 TargetARM32::TargetARM32(Cfg *Func)
174 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { 172 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
175 // TODO: Don't initialize IntegerRegisters and friends every time. 173 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
176 // Instead, initialize in some sort of static initializer for the 174 // initialize in some sort of static initializer for the class.
177 // class.
178 // Limit this size (or do all bitsets need to be the same width)??? 175 // Limit this size (or do all bitsets need to be the same width)???
179 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); 176 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
180 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); 177 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
181 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); 178 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
182 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); 179 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
183 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); 180 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
184 ScratchRegs.resize(RegARM32::Reg_NUM); 181 ScratchRegs.resize(RegARM32::Reg_NUM);
185 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ 182 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
186 isFP32, isFP64, isVec128, alias_init) \ 183 isFP32, isFP64, isVec128, alias_init) \
187 IntegerRegisters[RegARM32::val] = isInt; \ 184 IntegerRegisters[RegARM32::val] = isInt; \
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
236 Func->dump("After Phi lowering"); 233 Func->dump("After Phi lowering");
237 } 234 }
238 235
239 // Address mode optimization. 236 // Address mode optimization.
240 Func->getVMetadata()->init(VMK_SingleDefs); 237 Func->getVMetadata()->init(VMK_SingleDefs);
241 Func->doAddressOpt(); 238 Func->doAddressOpt();
242 239
243 // Argument lowering 240 // Argument lowering
244 Func->doArgLowering(); 241 Func->doArgLowering();
245 242
246 // Target lowering. This requires liveness analysis for some parts 243 // Target lowering. This requires liveness analysis for some parts of the
247 // of the lowering decisions, such as compare/branch fusing. If 244 // lowering decisions, such as compare/branch fusing. If non-lightweight
248 // non-lightweight liveness analysis is used, the instructions need 245 // liveness analysis is used, the instructions need to be renumbered first.
249 // to be renumbered first. TODO: This renumbering should only be 246 // TODO: This renumbering should only be necessary if we're actually
250 // necessary if we're actually calculating live intervals, which we 247 // calculating live intervals, which we only do for register allocation.
251 // only do for register allocation.
252 Func->renumberInstructions(); 248 Func->renumberInstructions();
253 if (Func->hasError()) 249 if (Func->hasError())
254 return; 250 return;
255 251
256 // TODO: It should be sufficient to use the fastest liveness 252 // TODO: It should be sufficient to use the fastest liveness calculation,
257 // calculation, i.e. livenessLightweight(). However, for some 253 // i.e. livenessLightweight(). However, for some reason that slows down the
258 // reason that slows down the rest of the translation. Investigate. 254 // rest of the translation. Investigate.
259 Func->liveness(Liveness_Basic); 255 Func->liveness(Liveness_Basic);
260 if (Func->hasError()) 256 if (Func->hasError())
261 return; 257 return;
262 Func->dump("After ARM32 address mode opt"); 258 Func->dump("After ARM32 address mode opt");
263 259
264 Func->genCode(); 260 Func->genCode();
265 if (Func->hasError()) 261 if (Func->hasError())
266 return; 262 return;
267 Func->dump("After ARM32 codegen"); 263 Func->dump("After ARM32 codegen");
268 264
269 // Register allocation. This requires instruction renumbering and 265 // Register allocation. This requires instruction renumbering and full
270 // full liveness analysis. 266 // liveness analysis.
271 Func->renumberInstructions(); 267 Func->renumberInstructions();
272 if (Func->hasError()) 268 if (Func->hasError())
273 return; 269 return;
274 Func->liveness(Liveness_Intervals); 270 Func->liveness(Liveness_Intervals);
275 if (Func->hasError()) 271 if (Func->hasError())
276 return; 272 return;
277 // Validate the live range computations. The expensive validation 273 // Validate the live range computations. The expensive validation call is
278 // call is deliberately only made when assertions are enabled. 274 // deliberately only made when assertions are enabled.
279 assert(Func->validateLiveness()); 275 assert(Func->validateLiveness());
280 // The post-codegen dump is done here, after liveness analysis and 276 // The post-codegen dump is done here, after liveness analysis and associated
281 // associated cleanup, to make the dump cleaner and more useful. 277 // cleanup, to make the dump cleaner and more useful.
282 Func->dump("After initial ARM32 codegen"); 278 Func->dump("After initial ARM32 codegen");
283 Func->getVMetadata()->init(VMK_All); 279 Func->getVMetadata()->init(VMK_All);
284 regAlloc(RAK_Global); 280 regAlloc(RAK_Global);
285 if (Func->hasError()) 281 if (Func->hasError())
286 return; 282 return;
287 Func->dump("After linear scan regalloc"); 283 Func->dump("After linear scan regalloc");
288 284
289 if (Ctx->getFlags().getPhiEdgeSplit()) { 285 if (Ctx->getFlags().getPhiEdgeSplit()) {
290 Func->advancedPhiLowering(); 286 Func->advancedPhiLowering();
291 Func->dump("After advanced Phi lowering"); 287 Func->dump("After advanced Phi lowering");
292 } 288 }
293 289
294 // Stack frame mapping. 290 // Stack frame mapping.
295 Func->genFrame(); 291 Func->genFrame();
296 if (Func->hasError()) 292 if (Func->hasError())
297 return; 293 return;
298 Func->dump("After stack frame mapping"); 294 Func->dump("After stack frame mapping");
299 295
300 legalizeStackSlots(); 296 legalizeStackSlots();
301 if (Func->hasError()) 297 if (Func->hasError())
302 return; 298 return;
303 Func->dump("After legalizeStackSlots"); 299 Func->dump("After legalizeStackSlots");
304 300
305 Func->contractEmptyNodes(); 301 Func->contractEmptyNodes();
306 Func->reorderNodes(); 302 Func->reorderNodes();
307 303
308 // Branch optimization. This needs to be done just before code 304 // Branch optimization. This needs to be done just before code emission. In
309 // emission. In particular, no transformations that insert or 305 // particular, no transformations that insert or reorder CfgNodes should be
310 // reorder CfgNodes should be done after branch optimization. We go 306 // done after branch optimization. We go ahead and do it before nop insertion
311 // ahead and do it before nop insertion to reduce the amount of work 307 // to reduce the amount of work needed for searching for opportunities.
312 // needed for searching for opportunities.
313 Func->doBranchOpt(); 308 Func->doBranchOpt();
314 Func->dump("After branch optimization"); 309 Func->dump("After branch optimization");
315 310
316 // Nop insertion 311 // Nop insertion
317 if (Ctx->getFlags().shouldDoNopInsertion()) { 312 if (Ctx->getFlags().shouldDoNopInsertion()) {
318 Func->doNopInsertion(); 313 Func->doNopInsertion();
319 } 314 }
320 } 315 }
321 316
322 void TargetARM32::translateOm1() { 317 void TargetARM32::translateOm1() {
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
388 if (Ty == IceType_void) 383 if (Ty == IceType_void)
389 Ty = IceType_i32; 384 Ty = IceType_i32;
390 if (PhysicalRegisters[Ty].empty()) 385 if (PhysicalRegisters[Ty].empty())
391 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); 386 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
392 assert(RegNum < PhysicalRegisters[Ty].size()); 387 assert(RegNum < PhysicalRegisters[Ty].size());
393 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 388 Variable *Reg = PhysicalRegisters[Ty][RegNum];
394 if (Reg == nullptr) { 389 if (Reg == nullptr) {
395 Reg = Func->makeVariable(Ty); 390 Reg = Func->makeVariable(Ty);
396 Reg->setRegNum(RegNum); 391 Reg->setRegNum(RegNum);
397 PhysicalRegisters[Ty][RegNum] = Reg; 392 PhysicalRegisters[Ty][RegNum] = Reg;
398 // Specially mark SP and LR as an "argument" so that it is considered 393 // Specially mark SP and LR as an "argument" so that it is considered live
399 // live upon function entry. 394 // upon function entry.
400 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { 395 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
401 Func->addImplicitArg(Reg); 396 Func->addImplicitArg(Reg);
402 Reg->setIgnoreLiveness(); 397 Reg->setIgnoreLiveness();
403 } 398 }
404 } 399 }
405 return Reg; 400 return Reg;
406 } 401 }
407 402
408 void TargetARM32::emitJumpTable(const Cfg *Func, 403 void TargetARM32::emitJumpTable(const Cfg *Func,
409 const InstJumpTable *JumpTable) const { 404 const InstJumpTable *JumpTable) const {
(...skipping 28 matching lines...) Expand all
438 if (Offset != 0) { 433 if (Offset != 0) {
439 Str << ", " << getConstantPrefix() << Offset; 434 Str << ", " << getConstantPrefix() << Offset;
440 } 435 }
441 Str << "]"; 436 Str << "]";
442 } 437 }
443 438
444 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { 439 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
445 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 440 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
446 return false; 441 return false;
447 int32_t RegLo, RegHi; 442 int32_t RegLo, RegHi;
448 // Always start i64 registers at an even register, so this may end 443 // Always start i64 registers at an even register, so this may end up padding
449 // up padding away a register. 444 // away a register.
450 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); 445 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
451 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; 446 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
452 ++NumGPRRegsUsed; 447 ++NumGPRRegsUsed;
453 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; 448 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
454 ++NumGPRRegsUsed; 449 ++NumGPRRegsUsed;
455 // If this bumps us past the boundary, don't allocate to a register 450 // If this bumps us past the boundary, don't allocate to a register and leave
456 // and leave any previously speculatively consumed registers as consumed. 451 // any previously speculatively consumed registers as consumed.
457 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) 452 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
458 return false; 453 return false;
459 Regs->first = RegLo; 454 Regs->first = RegLo;
460 Regs->second = RegHi; 455 Regs->second = RegHi;
461 return true; 456 return true;
462 } 457 }
463 458
464 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { 459 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
465 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 460 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
466 return false; 461 return false;
467 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; 462 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
468 ++NumGPRRegsUsed; 463 ++NumGPRRegsUsed;
469 return true; 464 return true;
470 } 465 }
471 466
472 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { 467 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
473 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) 468 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS)
474 return false; 469 return false;
475 if (isVectorType(Ty)) { 470 if (isVectorType(Ty)) {
476 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); 471 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
477 // Q registers are declared in reverse order, so 472 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
478 // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract 473 // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from
479 // NumFPRegUnits from Reg_q0. Same thing goes for D registers. 474 // Reg_q0. Same thing goes for D registers.
480 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, 475 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
481 "ARM32 Q registers are possibly declared incorrectly."); 476 "ARM32 Q registers are possibly declared incorrectly.");
482 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4); 477 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
483 NumFPRegUnits += 4; 478 NumFPRegUnits += 4;
484 // If this bumps us past the boundary, don't allocate to a register 479 // If this bumps us past the boundary, don't allocate to a register and
485 // and leave any previously speculatively consumed registers as consumed. 480 // leave any previously speculatively consumed registers as consumed.
486 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) 481 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
487 return false; 482 return false;
488 } else if (Ty == IceType_f64) { 483 } else if (Ty == IceType_f64) {
489 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, 484 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
490 "ARM32 D registers are possibly declared incorrectly."); 485 "ARM32 D registers are possibly declared incorrectly.");
491 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); 486 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
492 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); 487 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
493 NumFPRegUnits += 2; 488 NumFPRegUnits += 2;
494 // If this bumps us past the boundary, don't allocate to a register 489 // If this bumps us past the boundary, don't allocate to a register and
495 // and leave any previously speculatively consumed registers as consumed. 490 // leave any previously speculatively consumed registers as consumed.
496 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) 491 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
497 return false; 492 return false;
498 } else { 493 } else {
499 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, 494 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
500 "ARM32 S registers are possibly declared incorrectly."); 495 "ARM32 S registers are possibly declared incorrectly.");
501 assert(Ty == IceType_f32); 496 assert(Ty == IceType_f32);
502 *Reg = RegARM32::Reg_s0 + NumFPRegUnits; 497 *Reg = RegARM32::Reg_s0 + NumFPRegUnits;
503 ++NumFPRegUnits; 498 ++NumFPRegUnits;
504 } 499 }
505 return true; 500 return true;
506 } 501 }
507 502
508 void TargetARM32::lowerArguments() { 503 void TargetARM32::lowerArguments() {
509 VarList &Args = Func->getArgs(); 504 VarList &Args = Func->getArgs();
510 TargetARM32::CallingConv CC; 505 TargetARM32::CallingConv CC;
511 506
512 // For each register argument, replace Arg in the argument list with the 507 // For each register argument, replace Arg in the argument list with the home
513 // home register. Then generate an instruction in the prolog to copy the 508 // register. Then generate an instruction in the prolog to copy the home
514 // home register to the assigned location of Arg. 509 // register to the assigned location of Arg.
515 Context.init(Func->getEntryNode()); 510 Context.init(Func->getEntryNode());
516 Context.setInsertPoint(Context.getCur()); 511 Context.setInsertPoint(Context.getCur());
517 512
518 for (SizeT I = 0, E = Args.size(); I < E; ++I) { 513 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
519 Variable *Arg = Args[I]; 514 Variable *Arg = Args[I];
520 Type Ty = Arg->getType(); 515 Type Ty = Arg->getType();
521 if (Ty == IceType_i64) { 516 if (Ty == IceType_i64) {
522 std::pair<int32_t, int32_t> RegPair; 517 std::pair<int32_t, int32_t> RegPair;
523 if (!CC.I64InRegs(&RegPair)) 518 if (!CC.I64InRegs(&RegPair))
524 continue; 519 continue;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
561 556
562 Args[I] = RegisterArg; 557 Args[I] = RegisterArg;
563 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 558 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
564 continue; 559 continue;
565 } 560 }
566 } 561 }
567 } 562 }
568 563
569 // Helper function for addProlog(). 564 // Helper function for addProlog().
570 // 565 //
571 // This assumes Arg is an argument passed on the stack. This sets the 566 // This assumes Arg is an argument passed on the stack. This sets the frame
572 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 567 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
573 // width. For an I64 arg that has been split into Lo and Hi components, 568 // I64 arg that has been split into Lo and Hi components, it calls itself
574 // it calls itself recursively on the components, taking care to handle 569 // recursively on the components, taking care to handle Lo first because of the
575 // Lo first because of the little-endian architecture. Lastly, this 570 // little-endian architecture. Lastly, this function generates an instruction
576 // function generates an instruction to copy Arg into its assigned 571 // to copy Arg into its assigned register if applicable.
577 // register if applicable.
578 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 572 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
579 size_t BasicFrameOffset, 573 size_t BasicFrameOffset,
580 size_t &InArgsSizeBytes) { 574 size_t &InArgsSizeBytes) {
581 Variable *Lo = Arg->getLo(); 575 Variable *Lo = Arg->getLo();
582 Variable *Hi = Arg->getHi(); 576 Variable *Hi = Arg->getHi();
583 Type Ty = Arg->getType(); 577 Type Ty = Arg->getType();
584 if (Lo && Hi && Ty == IceType_i64) { 578 if (Lo && Hi && Ty == IceType_i64) {
585 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 579 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
586 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 580 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
587 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 581 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
588 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 582 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
589 return; 583 return;
590 } 584 }
591 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); 585 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
592 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 586 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
593 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 587 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
594 // If the argument variable has been assigned a register, we need to load 588 // If the argument variable has been assigned a register, we need to load the
595 // the value from the stack slot. 589 // value from the stack slot.
596 if (Arg->hasReg()) { 590 if (Arg->hasReg()) {
597 assert(Ty != IceType_i64); 591 assert(Ty != IceType_i64);
598 OperandARM32Mem *Mem = OperandARM32Mem::create( 592 OperandARM32Mem *Mem = OperandARM32Mem::create(
599 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( 593 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
600 Ctx->getConstantInt32(Arg->getStackOffset()))); 594 Ctx->getConstantInt32(Arg->getStackOffset())));
601 if (isVectorType(Arg->getType())) { 595 if (isVectorType(Arg->getType())) {
602 // Use vld1.$elem or something? 596 // Use vld1.$elem or something?
603 UnimplementedError(Func->getContext()->getFlags()); 597 UnimplementedError(Func->getContext()->getFlags());
604 } else if (isFloatingType(Arg->getType())) { 598 } else if (isFloatingType(Arg->getType())) {
605 _vldr(Arg, Mem); 599 _vldr(Arg, Mem);
606 } else { 600 } else {
607 _ldr(Arg, Mem); 601 _ldr(Arg, Mem);
608 } 602 }
609 // This argument-copying instruction uses an explicit 603 // This argument-copying instruction uses an explicit OperandARM32Mem
610 // OperandARM32Mem operand instead of a Variable, so its 604 // operand instead of a Variable, so its fill-from-stack operation has to
611 // fill-from-stack operation has to be tracked separately for 605 // be tracked separately for statistics.
612 // statistics.
613 Ctx->statsUpdateFills(); 606 Ctx->statsUpdateFills();
614 } 607 }
615 } 608 }
616 609
617 Type TargetARM32::stackSlotType() { return IceType_i32; } 610 Type TargetARM32::stackSlotType() { return IceType_i32; }
618 611
619 void TargetARM32::addProlog(CfgNode *Node) { 612 void TargetARM32::addProlog(CfgNode *Node) {
620 // Stack frame layout: 613 // Stack frame layout:
621 // 614 //
622 // +------------------------+ 615 // +------------------------+
(...skipping 12 matching lines...) Expand all
635 // | 7. allocas | 628 // | 7. allocas |
636 // +------------------------+ <--- StackPointer 629 // +------------------------+ <--- StackPointer
637 // 630 //
638 // The following variables record the size in bytes of the given areas: 631 // The following variables record the size in bytes of the given areas:
639 // * PreservedRegsSizeBytes: area 1 632 // * PreservedRegsSizeBytes: area 1
640 // * SpillAreaPaddingBytes: area 2 633 // * SpillAreaPaddingBytes: area 2
641 // * GlobalsSize: area 3 634 // * GlobalsSize: area 3
642 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 635 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
643 // * LocalsSpillAreaSize: area 5 636 // * LocalsSpillAreaSize: area 5
644 // * SpillAreaSizeBytes: areas 2 - 6 637 // * SpillAreaSizeBytes: areas 2 - 6
645 // Determine stack frame offsets for each Variable without a 638 // Determine stack frame offsets for each Variable without a register
646 // register assignment. This can be done as one variable per stack 639 // assignment. This can be done as one variable per stack slot. Or, do
647 // slot. Or, do coalescing by running the register allocator again 640 // coalescing by running the register allocator again with an infinite set of
648 // with an infinite set of registers (as a side effect, this gives 641 // registers (as a side effect, this gives variables a second chance at
649 // variables a second chance at physical register assignment). 642 // physical register assignment).
650 // 643 //
651 // A middle ground approach is to leverage sparsity and allocate one 644 // A middle ground approach is to leverage sparsity and allocate one block of
652 // block of space on the frame for globals (variables with 645 // space on the frame for globals (variables with multi-block lifetime), and
653 // multi-block lifetime), and one block to share for locals 646 // one block to share for locals (single-block lifetime).
654 // (single-block lifetime).
655 647
656 Context.init(Node); 648 Context.init(Node);
657 Context.setInsertPoint(Context.getCur()); 649 Context.setInsertPoint(Context.getCur());
658 650
659 llvm::SmallBitVector CalleeSaves = 651 llvm::SmallBitVector CalleeSaves =
660 getRegisterSet(RegSet_CalleeSave, RegSet_None); 652 getRegisterSet(RegSet_CalleeSave, RegSet_None);
661 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); 653 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
662 VarList SortedSpilledVariables; 654 VarList SortedSpilledVariables;
663 size_t GlobalsSize = 0; 655 size_t GlobalsSize = 0;
664 // If there is a separate locals area, this represents that area. 656 // If there is a separate locals area, this represents that area. Otherwise
665 // Otherwise it counts any variable not counted by GlobalsSize. 657 // it counts any variable not counted by GlobalsSize.
666 SpillAreaSizeBytes = 0; 658 SpillAreaSizeBytes = 0;
667 // If there is a separate locals area, this specifies the alignment 659 // If there is a separate locals area, this specifies the alignment for it.
668 // for it.
669 uint32_t LocalsSlotsAlignmentBytes = 0; 660 uint32_t LocalsSlotsAlignmentBytes = 0;
670 // The entire spill locations area gets aligned to largest natural 661 // The entire spill locations area gets aligned to largest natural alignment
671 // alignment of the variables that have a spill slot. 662 // of the variables that have a spill slot.
672 uint32_t SpillAreaAlignmentBytes = 0; 663 uint32_t SpillAreaAlignmentBytes = 0;
673 // For now, we don't have target-specific variables that need special 664 // For now, we don't have target-specific variables that need special
674 // treatment (no stack-slot-linked SpillVariable type). 665 // treatment (no stack-slot-linked SpillVariable type).
675 std::function<bool(Variable *)> TargetVarHook = 666 std::function<bool(Variable *)> TargetVarHook =
676 [](Variable *) { return false; }; 667 [](Variable *) { return false; };
677 668
678 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 669 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
679 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, 670 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
680 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, 671 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
681 &LocalsSlotsAlignmentBytes, TargetVarHook); 672 &LocalsSlotsAlignmentBytes, TargetVarHook);
682 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 673 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
683 SpillAreaSizeBytes += GlobalsSize; 674 SpillAreaSizeBytes += GlobalsSize;
684 675
685 // Add push instructions for preserved registers. 676 // Add push instructions for preserved registers. On ARM, "push" can push a
686 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15). 677 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
687 // Unlike x86, ARM also has callee-saved float/vector registers. 678 // callee-saved float/vector registers. The "vpush" instruction can handle a
688 // The "vpush" instruction can handle a whole list of float/vector 679 // whole list of float/vector registers, but it only handles contiguous
689 // registers, but it only handles contiguous sequences of registers 680 // sequences of registers by specifying the start and the length.
690 // by specifying the start and the length.
691 VarList GPRsToPreserve; 681 VarList GPRsToPreserve;
692 GPRsToPreserve.reserve(CalleeSaves.size()); 682 GPRsToPreserve.reserve(CalleeSaves.size());
693 uint32_t NumCallee = 0; 683 uint32_t NumCallee = 0;
694 size_t PreservedRegsSizeBytes = 0; 684 size_t PreservedRegsSizeBytes = 0;
695 // Consider FP and LR as callee-save / used as needed. 685 // Consider FP and LR as callee-save / used as needed.
696 if (UsesFramePointer) { 686 if (UsesFramePointer) {
697 CalleeSaves[RegARM32::Reg_fp] = true; 687 CalleeSaves[RegARM32::Reg_fp] = true;
698 assert(RegsUsed[RegARM32::Reg_fp] == false); 688 assert(RegsUsed[RegARM32::Reg_fp] == false);
699 RegsUsed[RegARM32::Reg_fp] = true; 689 RegsUsed[RegARM32::Reg_fp] = true;
700 } 690 }
701 if (!MaybeLeafFunc) { 691 if (!MaybeLeafFunc) {
702 CalleeSaves[RegARM32::Reg_lr] = true; 692 CalleeSaves[RegARM32::Reg_lr] = true;
703 RegsUsed[RegARM32::Reg_lr] = true; 693 RegsUsed[RegARM32::Reg_lr] = true;
704 } 694 }
705 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 695 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
706 if (CalleeSaves[i] && RegsUsed[i]) { 696 if (CalleeSaves[i] && RegsUsed[i]) {
707 // TODO(jvoung): do separate vpush for each floating point 697 // TODO(jvoung): do separate vpush for each floating point register
708 // register segment and += 4, or 8 depending on type. 698 // segment and += 4, or 8 depending on type.
709 ++NumCallee; 699 ++NumCallee;
710 PreservedRegsSizeBytes += 4; 700 PreservedRegsSizeBytes += 4;
711 GPRsToPreserve.push_back(getPhysicalRegister(i)); 701 GPRsToPreserve.push_back(getPhysicalRegister(i));
712 } 702 }
713 } 703 }
714 Ctx->statsUpdateRegistersSaved(NumCallee); 704 Ctx->statsUpdateRegistersSaved(NumCallee);
715 if (!GPRsToPreserve.empty()) 705 if (!GPRsToPreserve.empty())
716 _push(GPRsToPreserve); 706 _push(GPRsToPreserve);
717 707
718 // Generate "mov FP, SP" if needed. 708 // Generate "mov FP, SP" if needed.
719 if (UsesFramePointer) { 709 if (UsesFramePointer) {
720 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 710 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
721 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 711 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
722 _mov(FP, SP); 712 _mov(FP, SP);
723 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 713 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
724 Context.insert(InstFakeUse::create(Func, FP)); 714 Context.insert(InstFakeUse::create(Func, FP));
725 } 715 }
726 716
727 // Align the variables area. SpillAreaPaddingBytes is the size of 717 // Align the variables area. SpillAreaPaddingBytes is the size of the region
728 // the region after the preserved registers and before the spill areas. 718 // after the preserved registers and before the spill areas.
729 // LocalsSlotsPaddingBytes is the amount of padding between the globals 719 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
730 // and locals area if they are separate. 720 // locals area if they are separate.
731 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); 721 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
732 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 722 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
733 uint32_t SpillAreaPaddingBytes = 0; 723 uint32_t SpillAreaPaddingBytes = 0;
734 uint32_t LocalsSlotsPaddingBytes = 0; 724 uint32_t LocalsSlotsPaddingBytes = 0;
735 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 725 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
736 GlobalsSize, LocalsSlotsAlignmentBytes, 726 GlobalsSize, LocalsSlotsAlignmentBytes,
737 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 727 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
738 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 728 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
739 uint32_t GlobalsAndSubsequentPaddingSize = 729 uint32_t GlobalsAndSubsequentPaddingSize =
740 GlobalsSize + LocalsSlotsPaddingBytes; 730 GlobalsSize + LocalsSlotsPaddingBytes;
(...skipping 10 matching lines...) Expand all
751 // Use the scratch register if needed to legalize the immediate. 741 // Use the scratch register if needed to legalize the immediate.
752 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 742 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
753 Legal_Reg | Legal_Flex, getReservedTmpReg()); 743 Legal_Reg | Legal_Flex, getReservedTmpReg());
754 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
755 _sub(SP, SP, SubAmount); 745 _sub(SP, SP, SubAmount);
756 } 746 }
757 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 747 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
758 748
759 resetStackAdjustment(); 749 resetStackAdjustment();
760 750
761 // Fill in stack offsets for stack args, and copy args into registers 751 // Fill in stack offsets for stack args, and copy args into registers for
762 // for those that were register-allocated. Args are pushed right to 752 // those that were register-allocated. Args are pushed right to left, so
763 // left, so Arg[0] is closest to the stack/frame pointer. 753 // Arg[0] is closest to the stack/frame pointer.
764 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 754 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
765 size_t BasicFrameOffset = PreservedRegsSizeBytes; 755 size_t BasicFrameOffset = PreservedRegsSizeBytes;
766 if (!UsesFramePointer) 756 if (!UsesFramePointer)
767 BasicFrameOffset += SpillAreaSizeBytes; 757 BasicFrameOffset += SpillAreaSizeBytes;
768 758
769 const VarList &Args = Func->getArgs(); 759 const VarList &Args = Func->getArgs();
770 size_t InArgsSizeBytes = 0; 760 size_t InArgsSizeBytes = 0;
771 TargetARM32::CallingConv CC; 761 TargetARM32::CallingConv CC;
772 for (Variable *Arg : Args) { 762 for (Variable *Arg : Args) {
773 Type Ty = Arg->getType(); 763 Type Ty = Arg->getType();
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
823 void TargetARM32::addEpilog(CfgNode *Node) { 813 void TargetARM32::addEpilog(CfgNode *Node) {
824 InstList &Insts = Node->getInsts(); 814 InstList &Insts = Node->getInsts();
825 InstList::reverse_iterator RI, E; 815 InstList::reverse_iterator RI, E;
826 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 816 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
827 if (llvm::isa<InstARM32Ret>(*RI)) 817 if (llvm::isa<InstARM32Ret>(*RI))
828 break; 818 break;
829 } 819 }
830 if (RI == E) 820 if (RI == E)
831 return; 821 return;
832 822
833 // Convert the reverse_iterator position into its corresponding 823 // Convert the reverse_iterator position into its corresponding (forward)
834 // (forward) iterator position. 824 // iterator position.
835 InstList::iterator InsertPoint = RI.base(); 825 InstList::iterator InsertPoint = RI.base();
836 --InsertPoint; 826 --InsertPoint;
837 Context.init(Node); 827 Context.init(Node);
838 Context.setInsertPoint(InsertPoint); 828 Context.setInsertPoint(InsertPoint);
839 829
840 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 830 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
841 if (UsesFramePointer) { 831 if (UsesFramePointer) {
842 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 832 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
843 // For late-stage liveness analysis (e.g. asm-verbose mode), 833 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
844 // adding a fake use of SP before the assignment of SP=FP keeps 834 // use of SP before the assignment of SP=FP keeps previous SP adjustments
845 // previous SP adjustments from being dead-code eliminated. 835 // from being dead-code eliminated.
846 Context.insert(InstFakeUse::create(Func, SP)); 836 Context.insert(InstFakeUse::create(Func, SP));
847 _mov(SP, FP); 837 _mov(SP, FP);
848 } else { 838 } else {
849 // add SP, SpillAreaSizeBytes 839 // add SP, SpillAreaSizeBytes
850 if (SpillAreaSizeBytes) { 840 if (SpillAreaSizeBytes) {
851 // Use the scratch register if needed to legalize the immediate. 841 // Use the scratch register if needed to legalize the immediate.
852 Operand *AddAmount = 842 Operand *AddAmount =
853 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 843 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
854 Legal_Reg | Legal_Flex, getReservedTmpReg()); 844 Legal_Reg | Legal_Flex, getReservedTmpReg());
855 _add(SP, SP, AddAmount); 845 _add(SP, SP, AddAmount);
856 } 846 }
857 } 847 }
858 848
859 // Add pop instructions for preserved registers. 849 // Add pop instructions for preserved registers.
860 llvm::SmallBitVector CalleeSaves = 850 llvm::SmallBitVector CalleeSaves =
861 getRegisterSet(RegSet_CalleeSave, RegSet_None); 851 getRegisterSet(RegSet_CalleeSave, RegSet_None);
862 VarList GPRsToRestore; 852 VarList GPRsToRestore;
863 GPRsToRestore.reserve(CalleeSaves.size()); 853 GPRsToRestore.reserve(CalleeSaves.size());
864 // Consider FP and LR as callee-save / used as needed. 854 // Consider FP and LR as callee-save / used as needed.
865 if (UsesFramePointer) { 855 if (UsesFramePointer) {
866 CalleeSaves[RegARM32::Reg_fp] = true; 856 CalleeSaves[RegARM32::Reg_fp] = true;
867 } 857 }
868 if (!MaybeLeafFunc) { 858 if (!MaybeLeafFunc) {
869 CalleeSaves[RegARM32::Reg_lr] = true; 859 CalleeSaves[RegARM32::Reg_lr] = true;
870 } 860 }
871 // Pop registers in ascending order just like push 861 // Pop registers in ascending order just like push (instead of in reverse
872 // (instead of in reverse order). 862 // order).
873 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 863 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
874 if (CalleeSaves[i] && RegsUsed[i]) { 864 if (CalleeSaves[i] && RegsUsed[i]) {
875 GPRsToRestore.push_back(getPhysicalRegister(i)); 865 GPRsToRestore.push_back(getPhysicalRegister(i));
876 } 866 }
877 } 867 }
878 if (!GPRsToRestore.empty()) 868 if (!GPRsToRestore.empty())
879 _pop(GPRsToRestore); 869 _pop(GPRsToRestore);
880 870
881 if (!Ctx->getFlags().getUseSandboxing()) 871 if (!Ctx->getFlags().getUseSandboxing())
882 return; 872 return;
(...skipping 13 matching lines...) Expand all
896 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 886 RetValue = llvm::cast<Variable>(RI->getSrc(0));
897 _bundle_lock(); 887 _bundle_lock();
898 _bic(LR, LR, RetMask); 888 _bic(LR, LR, RetMask);
899 _ret(LR, RetValue); 889 _ret(LR, RetValue);
900 _bundle_unlock(); 890 _bundle_unlock();
901 RI->setDeleted(); 891 RI->setDeleted();
902 } 892 }
903 893
904 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { 894 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
905 constexpr bool SignExt = false; 895 constexpr bool SignExt = false;
906 // TODO(jvoung): vldr of FP stack slots has a different limit from the 896 // TODO(jvoung): vldr of FP stack slots has a different limit from the plain
907 // plain stackSlotType(). 897 // stackSlotType().
908 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); 898 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
909 } 899 }
910 900
911 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, 901 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
912 Variable *OrigBaseReg) { 902 Variable *OrigBaseReg) {
913 int32_t Offset = Var->getStackOffset(); 903 int32_t Offset = Var->getStackOffset();
914 // Legalize will likely need a movw/movt combination, but if the top 904 // Legalize will likely need a movw/movt combination, but if the top bits are
915 // bits are all 0 from negating the offset and subtracting, we could 905 // all 0 from negating the offset and subtracting, we could use that instead.
916 // use that instead.
917 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; 906 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
918 if (ShouldSub) 907 if (ShouldSub)
919 Offset = -Offset; 908 Offset = -Offset;
920 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), 909 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
921 Legal_Reg | Legal_Flex, getReservedTmpReg()); 910 Legal_Reg | Legal_Flex, getReservedTmpReg());
922 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); 911 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
923 if (ShouldSub) 912 if (ShouldSub)
924 _sub(ScratchReg, OrigBaseReg, OffsetVal); 913 _sub(ScratchReg, OrigBaseReg, OffsetVal);
925 else 914 else
926 _add(ScratchReg, OrigBaseReg, OffsetVal); 915 _add(ScratchReg, OrigBaseReg, OffsetVal);
(...skipping 15 matching lines...) Expand all
942 // 931 //
943 // This is safe because we have reserved TMP, and add for ARM does not 932 // This is safe because we have reserved TMP, and add for ARM does not
944 // clobber the flags register. 933 // clobber the flags register.
945 Func->dump("Before legalizeStackSlots"); 934 Func->dump("Before legalizeStackSlots");
946 assert(hasComputedFrame()); 935 assert(hasComputedFrame());
947 // Early exit, if SpillAreaSizeBytes is really small. 936 // Early exit, if SpillAreaSizeBytes is really small.
948 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) 937 if (isLegalVariableStackOffset(SpillAreaSizeBytes))
949 return; 938 return;
950 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); 939 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
951 int32_t StackAdjust = 0; 940 int32_t StackAdjust = 0;
952 // Do a fairly naive greedy clustering for now. Pick the first stack slot 941 // Do a fairly naive greedy clustering for now. Pick the first stack slot
953 // that's out of bounds and make a new base reg using the architecture's temp 942 // that's out of bounds and make a new base reg using the architecture's temp
954 // register. If that works for the next slot, then great. Otherwise, create 943 // register. If that works for the next slot, then great. Otherwise, create a
955 // a new base register, clobbering the previous base register. Never share a 944 // new base register, clobbering the previous base register. Never share a
956 // base reg across different basic blocks. This isn't ideal if local and 945 // base reg across different basic blocks. This isn't ideal if local and
957 // multi-block variables are far apart and their references are interspersed. 946 // multi-block variables are far apart and their references are interspersed.
958 // It may help to be more coordinated about assign stack slot numbers 947 // It may help to be more coordinated about assign stack slot numbers and may
959 // and may help to assign smaller offsets to higher-weight variables 948 // help to assign smaller offsets to higher-weight variables so that they
960 // so that they don't depend on this legalization. 949 // don't depend on this legalization.
961 for (CfgNode *Node : Func->getNodes()) { 950 for (CfgNode *Node : Func->getNodes()) {
962 Context.init(Node); 951 Context.init(Node);
963 StackVariable *NewBaseReg = nullptr; 952 StackVariable *NewBaseReg = nullptr;
964 int32_t NewBaseOffset = 0; 953 int32_t NewBaseOffset = 0;
965 while (!Context.atEnd()) { 954 while (!Context.atEnd()) {
966 PostIncrLoweringContext PostIncrement(Context); 955 PostIncrLoweringContext PostIncrement(Context);
967 Inst *CurInstr = Context.getCur(); 956 Inst *CurInstr = Context.getCur();
968 Variable *Dest = CurInstr->getDest(); 957 Variable *Dest = CurInstr->getDest();
969 // Check if the previous NewBaseReg is clobbered, and reset if needed. 958 // Check if the previous NewBaseReg is clobbered, and reset if needed.
970 if ((Dest && NewBaseReg && Dest->hasReg() && 959 if ((Dest && NewBaseReg && Dest->hasReg() &&
971 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || 960 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
972 llvm::isa<InstFakeKill>(CurInstr)) { 961 llvm::isa<InstFakeKill>(CurInstr)) {
973 NewBaseReg = nullptr; 962 NewBaseReg = nullptr;
974 NewBaseOffset = 0; 963 NewBaseOffset = 0;
975 } 964 }
976 // The stack adjustment only matters if we are using SP instead of FP. 965 // The stack adjustment only matters if we are using SP instead of FP.
977 if (!hasFramePointer()) { 966 if (!hasFramePointer()) {
978 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { 967 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
979 StackAdjust += AdjInst->getAmount(); 968 StackAdjust += AdjInst->getAmount();
980 NewBaseOffset += AdjInst->getAmount(); 969 NewBaseOffset += AdjInst->getAmount();
981 continue; 970 continue;
982 } 971 }
983 if (llvm::isa<InstARM32Call>(CurInstr)) { 972 if (llvm::isa<InstARM32Call>(CurInstr)) {
984 NewBaseOffset -= StackAdjust; 973 NewBaseOffset -= StackAdjust;
985 StackAdjust = 0; 974 StackAdjust = 0;
986 continue; 975 continue;
987 } 976 }
988 } 977 }
989 // For now, only Mov instructions can have stack variables. We need to 978 // For now, only Mov instructions can have stack variables. We need to
990 // know the type of instruction because we currently create a fresh one 979 // know the type of instruction because we currently create a fresh one
991 // to replace Dest/Source, rather than mutate in place. 980 // to replace Dest/Source, rather than mutate in place.
992 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); 981 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
993 if (!MovInst) { 982 if (!MovInst) {
994 continue; 983 continue;
995 } 984 }
996 if (!Dest->hasReg()) { 985 if (!Dest->hasReg()) {
997 int32_t Offset = Dest->getStackOffset(); 986 int32_t Offset = Dest->getStackOffset();
998 Offset += StackAdjust; 987 Offset += StackAdjust;
999 if (!isLegalVariableStackOffset(Offset)) { 988 if (!isLegalVariableStackOffset(Offset)) {
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
1110 return Operand; 1099 return Operand;
1111 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 1100 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
1112 split64(Var); 1101 split64(Var);
1113 return Var->getHi(); 1102 return Var->getHi();
1114 } 1103 }
1115 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1104 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1116 return Ctx->getConstantInt32( 1105 return Ctx->getConstantInt32(
1117 static_cast<uint32_t>(Const->getValue() >> 32)); 1106 static_cast<uint32_t>(Const->getValue() >> 32));
1118 } 1107 }
1119 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { 1108 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1120 // Conservatively disallow memory operands with side-effects 1109 // Conservatively disallow memory operands with side-effects in case of
1121 // in case of duplication. 1110 // duplication.
1122 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || 1111 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1123 Mem->getAddrMode() == OperandARM32Mem::NegOffset); 1112 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1124 const Type SplitType = IceType_i32; 1113 const Type SplitType = IceType_i32;
1125 if (Mem->isRegReg()) { 1114 if (Mem->isRegReg()) {
1126 // We have to make a temp variable T, and add 4 to either Base or Index. 1115 // We have to make a temp variable T, and add 4 to either Base or Index.
1127 // The Index may be shifted, so adding 4 can mean something else. 1116 // The Index may be shifted, so adding 4 can mean something else. Thus,
1128 // Thus, prefer T := Base + 4, and use T as the new Base. 1117 // prefer T := Base + 4, and use T as the new Base.
1129 Variable *Base = Mem->getBase(); 1118 Variable *Base = Mem->getBase();
1130 Constant *Four = Ctx->getConstantInt32(4); 1119 Constant *Four = Ctx->getConstantInt32(4);
1131 Variable *NewBase = Func->makeVariable(Base->getType()); 1120 Variable *NewBase = Func->makeVariable(Base->getType());
1132 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, 1121 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1133 Base, Four)); 1122 Base, Four));
1134 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), 1123 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
1135 Mem->getShiftOp(), Mem->getShiftAmt(), 1124 Mem->getShiftOp(), Mem->getShiftAmt(),
1136 Mem->getAddrMode()); 1125 Mem->getAddrMode());
1137 } else { 1126 } else {
1138 Variable *Base = Mem->getBase(); 1127 Variable *Base = Mem->getBase();
1139 ConstantInteger32 *Offset = Mem->getOffset(); 1128 ConstantInteger32 *Offset = Mem->getOffset();
1140 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); 1129 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1141 int32_t NextOffsetVal = Offset->getValue() + 4; 1130 int32_t NextOffsetVal = Offset->getValue() + 4;
1142 const bool SignExt = false; 1131 const bool SignExt = false;
1143 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { 1132 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
1144 // We have to make a temp variable and add 4 to either Base or Offset. 1133 // We have to make a temp variable and add 4 to either Base or Offset.
1145 // If we add 4 to Offset, this will convert a non-RegReg addressing 1134 // If we add 4 to Offset, this will convert a non-RegReg addressing
1146 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows 1135 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
1147 // RegReg addressing modes, prefer adding to base and replacing instead. 1136 // RegReg addressing modes, prefer adding to base and replacing
1148 // Thus we leave the old offset alone. 1137 // instead. Thus we leave the old offset alone.
1149 Constant *Four = Ctx->getConstantInt32(4); 1138 Constant *Four = Ctx->getConstantInt32(4);
1150 Variable *NewBase = Func->makeVariable(Base->getType()); 1139 Variable *NewBase = Func->makeVariable(Base->getType());
1151 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, 1140 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1152 NewBase, Base, Four)); 1141 NewBase, Base, Four));
1153 Base = NewBase; 1142 Base = NewBase;
1154 } else { 1143 } else {
1155 Offset = 1144 Offset =
1156 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); 1145 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1157 } 1146 }
1158 return OperandARM32Mem::create(Func, SplitType, Base, Offset, 1147 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
(...skipping 29 matching lines...) Expand all
1188 1177
1189 REGARM32_TABLE 1178 REGARM32_TABLE
1190 1179
1191 #undef X 1180 #undef X
1192 1181
1193 return Registers; 1182 return Registers;
1194 } 1183 }
1195 1184
1196 void TargetARM32::lowerAlloca(const InstAlloca *Inst) { 1185 void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
1197 UsesFramePointer = true; 1186 UsesFramePointer = true;
1198 // Conservatively require the stack to be aligned. Some stack 1187 // Conservatively require the stack to be aligned. Some stack adjustment
1199 // adjustment operations implemented below assume that the stack is 1188 // operations implemented below assume that the stack is aligned before the
1200 // aligned before the alloca. All the alloca code ensures that the 1189 // alloca. All the alloca code ensures that the stack alignment is preserved
1201 // stack alignment is preserved after the alloca. The stack alignment 1190 // after the alloca. The stack alignment restriction can be relaxed in some
1202 // restriction can be relaxed in some cases. 1191 // cases.
1203 NeedsStackAlignment = true; 1192 NeedsStackAlignment = true;
1204 1193
1205 // TODO(stichnot): minimize the number of adjustments of SP, etc. 1194 // TODO(stichnot): minimize the number of adjustments of SP, etc.
1206 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1195 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1207 Variable *Dest = Inst->getDest(); 1196 Variable *Dest = Inst->getDest();
1208 uint32_t AlignmentParam = Inst->getAlignInBytes(); 1197 uint32_t AlignmentParam = Inst->getAlignInBytes();
1209 // For default align=0, set it to the real value 1, to avoid any 1198 // For default align=0, set it to the real value 1, to avoid any
1210 // bit-manipulation problems below. 1199 // bit-manipulation problems below.
1211 AlignmentParam = std::max(AlignmentParam, 1u); 1200 AlignmentParam = std::max(AlignmentParam, 1u);
1212 1201
1213 // LLVM enforces power of 2 alignment. 1202 // LLVM enforces power of 2 alignment.
1214 assert(llvm::isPowerOf2_32(AlignmentParam)); 1203 assert(llvm::isPowerOf2_32(AlignmentParam));
1215 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); 1204 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1216 1205
1217 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); 1206 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1218 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { 1207 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
1219 alignRegisterPow2(SP, Alignment); 1208 alignRegisterPow2(SP, Alignment);
1220 } 1209 }
1221 Operand *TotalSize = Inst->getSizeInBytes(); 1210 Operand *TotalSize = Inst->getSizeInBytes();
1222 if (const auto *ConstantTotalSize = 1211 if (const auto *ConstantTotalSize =
1223 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1212 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1224 uint32_t Value = ConstantTotalSize->getValue(); 1213 uint32_t Value = ConstantTotalSize->getValue();
1225 Value = Utils::applyAlignment(Value, Alignment); 1214 Value = Utils::applyAlignment(Value, Alignment);
1226 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); 1215 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
1227 _sub(SP, SP, SubAmount); 1216 _sub(SP, SP, SubAmount);
1228 } else { 1217 } else {
1229 // Non-constant sizes need to be adjusted to the next highest 1218 // Non-constant sizes need to be adjusted to the next highest multiple of
1230 // multiple of the required alignment at runtime. 1219 // the required alignment at runtime.
1231 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); 1220 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
1232 Variable *T = makeReg(IceType_i32); 1221 Variable *T = makeReg(IceType_i32);
1233 _mov(T, TotalSize); 1222 _mov(T, TotalSize);
1234 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); 1223 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1235 _add(T, T, AddAmount); 1224 _add(T, T, AddAmount);
1236 alignRegisterPow2(T, Alignment); 1225 alignRegisterPow2(T, Alignment);
1237 _sub(SP, SP, T); 1226 _sub(SP, SP, T);
1238 } 1227 }
1239 _mov(Dest, SP); 1228 _mov(Dest, SP);
1240 } 1229 }
(...skipping 17 matching lines...) Expand all
1258 _tst(SrcLoReg, Mask); 1247 _tst(SrcLoReg, Mask);
1259 break; 1248 break;
1260 } 1249 }
1261 case IceType_i32: { 1250 case IceType_i32: {
1262 _tst(SrcLoReg, SrcLoReg); 1251 _tst(SrcLoReg, SrcLoReg);
1263 break; 1252 break;
1264 } 1253 }
1265 case IceType_i64: { 1254 case IceType_i64: {
1266 Variable *ScratchReg = makeReg(IceType_i32); 1255 Variable *ScratchReg = makeReg(IceType_i32);
1267 _orrs(ScratchReg, SrcLoReg, SrcHi); 1256 _orrs(ScratchReg, SrcLoReg, SrcHi);
1268 // ScratchReg isn't going to be used, but we need the 1257 // ScratchReg isn't going to be used, but we need the side-effect of
1269 // side-effect of setting flags from this operation. 1258 // setting flags from this operation.
1270 Context.insert(InstFakeUse::create(Func, ScratchReg)); 1259 Context.insert(InstFakeUse::create(Func, ScratchReg));
1271 } 1260 }
1272 } 1261 }
1273 InstARM32Label *Label = InstARM32Label::create(Func, this); 1262 InstARM32Label *Label = InstARM32Label::create(Func, this);
1274 _br(Label, CondARM32::NE); 1263 _br(Label, CondARM32::NE);
1275 _trap(); 1264 _trap();
1276 Context.insert(Label); 1265 Context.insert(Label);
1277 } 1266 }
1278 1267
1279 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, 1268 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
(...skipping 23 matching lines...) Expand all
1303 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); 1292 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
1304 Call->addArg(T0R); 1293 Call->addArg(T0R);
1305 Call->addArg(T1R); 1294 Call->addArg(T1R);
1306 lowerCall(Call); 1295 lowerCall(Call);
1307 } 1296 }
1308 return; 1297 return;
1309 } 1298 }
1310 1299
1311 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 1300 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
1312 Variable *Dest = Inst->getDest(); 1301 Variable *Dest = Inst->getDest();
1313 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier 1302 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
1314 // to legalize Src0 to flex or Src1 to flex and there is a reversible 1303 // legalize Src0 to flex or Src1 to flex and there is a reversible
1315 // instruction. E.g., reverse subtract with immediate, register vs 1304 // instruction. E.g., reverse subtract with immediate, register vs register,
1316 // register, immediate. 1305 // immediate.
1317 // Or it may be the case that the operands aren't swapped, but the 1306 // Or it may be the case that the operands aren't swapped, but the bits can
1318 // bits can be flipped and a different operation applied. 1307 // be flipped and a different operation applied. E.g., use BIC (bit clear)
1319 // E.g., use BIC (bit clear) instead of AND for some masks. 1308 // instead of AND for some masks.
1320 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 1309 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1321 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 1310 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
1322 if (Dest->getType() == IceType_i64) { 1311 if (Dest->getType() == IceType_i64) {
1323 // These helper-call-involved instructions are lowered in this 1312 // These helper-call-involved instructions are lowered in this separate
1324 // separate switch. This is because we would otherwise assume that 1313 // switch. This is because we would otherwise assume that we need to
1325 // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused 1314 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1326 // with helper calls, and such unused/redundant instructions will fail 1315 // helper calls, and such unused/redundant instructions will fail liveness
1327 // liveness analysis under -Om1 setting. 1316 // analysis under -Om1 setting.
1328 switch (Inst->getOp()) { 1317 switch (Inst->getOp()) {
1329 default: 1318 default:
1330 break; 1319 break;
1331 case InstArithmetic::Udiv: 1320 case InstArithmetic::Udiv:
1332 case InstArithmetic::Sdiv: 1321 case InstArithmetic::Sdiv:
1333 case InstArithmetic::Urem: 1322 case InstArithmetic::Urem:
1334 case InstArithmetic::Srem: { 1323 case InstArithmetic::Srem: {
1335 // Check for divide by 0 (ARM normally doesn't trap, but we want it 1324 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1336 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized 1325 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1337 // to a register, which will hide a constant source operand. 1326 // register, which will hide a constant source operand. Instead, check
1338 // Instead, check the not-yet-legalized Src1 to optimize-out a divide 1327 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1339 // by 0 check.
1340 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { 1328 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1341 if (C64->getValue() == 0) { 1329 if (C64->getValue() == 0) {
1342 _trap(); 1330 _trap();
1343 return; 1331 return;
1344 } 1332 }
1345 } else { 1333 } else {
1346 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); 1334 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1347 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); 1335 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1348 div0Check(IceType_i64, Src1Lo, Src1Hi); 1336 div0Check(IceType_i64, Src1Lo, Src1Hi);
1349 } 1337 }
1350 // Technically, ARM has their own aeabi routines, but we can use the 1338 // Technically, ARM has their own aeabi routines, but we can use the
1351 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, 1339 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1352 // but uses the more standard __moddi3 for rem. 1340 // the more standard __moddi3 for rem.
1353 const char *HelperName = ""; 1341 const char *HelperName = "";
1354 switch (Inst->getOp()) { 1342 switch (Inst->getOp()) {
1355 default: 1343 default:
1356 llvm_unreachable("Should have only matched div ops."); 1344 llvm_unreachable("Should have only matched div ops.");
1357 break; 1345 break;
1358 case InstArithmetic::Udiv: 1346 case InstArithmetic::Udiv:
1359 HelperName = H_udiv_i64; 1347 HelperName = H_udiv_i64;
1360 break; 1348 break;
1361 case InstArithmetic::Sdiv: 1349 case InstArithmetic::Sdiv:
1362 HelperName = H_sdiv_i64; 1350 HelperName = H_sdiv_i64;
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1465 // a=b<<c ==> 1453 // a=b<<c ==>
1466 // GCC 4.8 does: 1454 // GCC 4.8 does:
1467 // sub t_c1, c.lo, #32 1455 // sub t_c1, c.lo, #32
1468 // lsl t_hi, b.hi, c.lo 1456 // lsl t_hi, b.hi, c.lo
1469 // orr t_hi, t_hi, b.lo, lsl t_c1 1457 // orr t_hi, t_hi, b.lo, lsl t_c1
1470 // rsb t_c2, c.lo, #32 1458 // rsb t_c2, c.lo, #32
1471 // orr t_hi, t_hi, b.lo, lsr t_c2 1459 // orr t_hi, t_hi, b.lo, lsr t_c2
1472 // lsl t_lo, b.lo, c.lo 1460 // lsl t_lo, b.lo, c.lo
1473 // a.lo = t_lo 1461 // a.lo = t_lo
1474 // a.hi = t_hi 1462 // a.hi = t_hi
1475 // Can be strength-reduced for constant-shifts, but we don't do 1463 // Can be strength-reduced for constant-shifts, but we don't do that for
1476 // that for now. 1464 // now.
1477 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. 1465 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1478 // On ARM, shifts only take the lower 8 bits of the shift register, 1466 // ARM, shifts only take the lower 8 bits of the shift register, and
1479 // and saturate to the range 0-32, so the negative value will 1467 // saturate to the range 0-32, so the negative value will saturate to 32.
1480 // saturate to 32.
1481 Variable *T_Hi = makeReg(IceType_i32); 1468 Variable *T_Hi = makeReg(IceType_i32);
1482 Variable *Src1RLo = legalizeToReg(Src1Lo); 1469 Variable *Src1RLo = legalizeToReg(Src1Lo);
1483 Constant *ThirtyTwo = Ctx->getConstantInt32(32); 1470 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1484 Variable *T_C1 = makeReg(IceType_i32); 1471 Variable *T_C1 = makeReg(IceType_i32);
1485 Variable *T_C2 = makeReg(IceType_i32); 1472 Variable *T_C2 = makeReg(IceType_i32);
1486 _sub(T_C1, Src1RLo, ThirtyTwo); 1473 _sub(T_C1, Src1RLo, ThirtyTwo);
1487 _lsl(T_Hi, Src0RHi, Src1RLo); 1474 _lsl(T_Hi, Src0RHi, Src1RLo);
1488 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, 1475 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1489 OperandARM32::LSL, T_C1)); 1476 OperandARM32::LSL, T_C1));
1490 _rsb(T_C2, Src1RLo, ThirtyTwo); 1477 _rsb(T_C2, Src1RLo, ThirtyTwo);
1491 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, 1478 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1492 OperandARM32::LSR, T_C2)); 1479 OperandARM32::LSR, T_C2));
1493 _mov(DestHi, T_Hi); 1480 _mov(DestHi, T_Hi);
1494 Variable *T_Lo = makeReg(IceType_i32); 1481 Variable *T_Lo = makeReg(IceType_i32);
1495 // _mov seems to sometimes have better register preferencing than lsl. 1482 // _mov seems to sometimes have better register preferencing than lsl.
1496 // Otherwise mov w/ lsl shifted register is a pseudo-instruction 1483 // Otherwise mov w/ lsl shifted register is a pseudo-instruction that
1497 // that maps to lsl. 1484 // maps to lsl.
1498 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, 1485 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1499 OperandARM32::LSL, Src1RLo)); 1486 OperandARM32::LSL, Src1RLo));
1500 _mov(DestLo, T_Lo); 1487 _mov(DestLo, T_Lo);
1501 return; 1488 return;
1502 } 1489 }
1503 case InstArithmetic::Lshr: 1490 case InstArithmetic::Lshr:
1504 // a=b>>c (unsigned) ==> 1491 // a=b>>c (unsigned) ==>
1505 // GCC 4.8 does: 1492 // GCC 4.8 does:
1506 // rsb t_c1, c.lo, #32 1493 // rsb t_c1, c.lo, #32
1507 // lsr t_lo, b.lo, c.lo 1494 // lsr t_lo, b.lo, c.lo
1508 // orr t_lo, t_lo, b.hi, lsl t_c1 1495 // orr t_lo, t_lo, b.hi, lsl t_c1
1509 // sub t_c2, c.lo, #32 1496 // sub t_c2, c.lo, #32
1510 // orr t_lo, t_lo, b.hi, lsr t_c2 1497 // orr t_lo, t_lo, b.hi, lsr t_c2
1511 // lsr t_hi, b.hi, c.lo 1498 // lsr t_hi, b.hi, c.lo
1512 // a.lo = t_lo 1499 // a.lo = t_lo
1513 // a.hi = t_hi 1500 // a.hi = t_hi
1514 case InstArithmetic::Ashr: { 1501 case InstArithmetic::Ashr: {
1515 // a=b>>c (signed) ==> ... 1502 // a=b>>c (signed) ==> ...
1516 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, 1503 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the
1517 // and the next orr should be conditioned on PLUS. The last two 1504 // next orr should be conditioned on PLUS. The last two right shifts
1518 // right shifts should also be arithmetic. 1505 // should also be arithmetic.
1519 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; 1506 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1520 Variable *T_Lo = makeReg(IceType_i32); 1507 Variable *T_Lo = makeReg(IceType_i32);
1521 Variable *Src1RLo = legalizeToReg(Src1Lo); 1508 Variable *Src1RLo = legalizeToReg(Src1Lo);
1522 Constant *ThirtyTwo = Ctx->getConstantInt32(32); 1509 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1523 Variable *T_C1 = makeReg(IceType_i32); 1510 Variable *T_C1 = makeReg(IceType_i32);
1524 Variable *T_C2 = makeReg(IceType_i32); 1511 Variable *T_C2 = makeReg(IceType_i32);
1525 _rsb(T_C1, Src1RLo, ThirtyTwo); 1512 _rsb(T_C1, Src1RLo, ThirtyTwo);
1526 _lsr(T_Lo, Src0RLo, Src1RLo); 1513 _lsr(T_Lo, Src0RLo, Src1RLo);
1527 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, 1514 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1528 OperandARM32::LSL, T_C1)); 1515 OperandARM32::LSL, T_C1));
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
1716 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1717 Variable *T_Lo = nullptr, *T_Hi = nullptr; 1704 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1718 _mov(T_Lo, Src0Lo); 1705 _mov(T_Lo, Src0Lo);
1719 _mov(DestLo, T_Lo); 1706 _mov(DestLo, T_Lo);
1720 _mov(T_Hi, Src0Hi); 1707 _mov(T_Hi, Src0Hi);
1721 _mov(DestHi, T_Hi); 1708 _mov(DestHi, T_Hi);
1722 } else { 1709 } else {
1723 Operand *NewSrc; 1710 Operand *NewSrc;
1724 if (Dest->hasReg()) { 1711 if (Dest->hasReg()) {
1725 // If Dest already has a physical register, then legalize the Src operand 1712 // If Dest already has a physical register, then legalize the Src operand
1726 // into a Variable with the same register assignment. This especially 1713 // into a Variable with the same register assignment. This especially
1727 // helps allow the use of Flex operands. 1714 // helps allow the use of Flex operands.
1728 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); 1715 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1729 } else { 1716 } else {
1730 // Dest could be a stack operand. Since we could potentially need 1717 // Dest could be a stack operand. Since we could potentially need to do a
1731 // to do a Store (and store can only have Register operands), 1718 // Store (and store can only have Register operands), legalize this to a
1732 // legalize this to a register. 1719 // register.
1733 NewSrc = legalize(Src0, Legal_Reg); 1720 NewSrc = legalize(Src0, Legal_Reg);
1734 } 1721 }
1735 if (isVectorType(Dest->getType())) { 1722 if (isVectorType(Dest->getType())) {
1736 UnimplementedError(Func->getContext()->getFlags()); 1723 UnimplementedError(Func->getContext()->getFlags());
1737 } else if (isFloatingType(Dest->getType())) { 1724 } else if (isFloatingType(Dest->getType())) {
1738 Variable *SrcR = legalizeToReg(NewSrc); 1725 Variable *SrcR = legalizeToReg(NewSrc);
1739 _vmov(Dest, SrcR); 1726 _vmov(Dest, SrcR);
1740 } else { 1727 } else {
1741 _mov(Dest, NewSrc); 1728 _mov(Dest, NewSrc);
1742 } 1729 }
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
1803 } 1790 }
1804 1791
1805 if (!InRegs) { 1792 if (!InRegs) {
1806 ParameterAreaSizeBytes = 1793 ParameterAreaSizeBytes =
1807 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); 1794 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1808 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); 1795 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1809 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 1796 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1810 } 1797 }
1811 } 1798 }
1812 1799
1813 // Adjust the parameter area so that the stack is aligned. It is 1800 // Adjust the parameter area so that the stack is aligned. It is assumed that
1814 // assumed that the stack is already aligned at the start of the 1801 // the stack is already aligned at the start of the calling sequence.
1815 // calling sequence.
1816 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1802 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1817 1803
1818 // Subtract the appropriate amount for the argument area. This also 1804 // Subtract the appropriate amount for the argument area. This also takes
1819 // takes care of setting the stack adjustment during emission. 1805 // care of setting the stack adjustment during emission.
1820 // 1806 //
1821 // TODO: If for some reason the call instruction gets dead-code 1807 // TODO: If for some reason the call instruction gets dead-code eliminated
1822 // eliminated after lowering, we would need to ensure that the 1808 // after lowering, we would need to ensure that the pre-call and the
1823 // pre-call and the post-call esp adjustment get eliminated as well. 1809 // post-call esp adjustment get eliminated as well.
1824 if (ParameterAreaSizeBytes) { 1810 if (ParameterAreaSizeBytes) {
1825 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), 1811 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1826 Legal_Reg | Legal_Flex); 1812 Legal_Reg | Legal_Flex);
1827 _adjust_stack(ParameterAreaSizeBytes, SubAmount); 1813 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1828 } 1814 }
1829 1815
1830 // Copy arguments that are passed on the stack to the appropriate 1816 // Copy arguments that are passed on the stack to the appropriate stack
1831 // stack locations. 1817 // locations.
1832 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1818 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1833 for (auto &StackArg : StackArgs) { 1819 for (auto &StackArg : StackArgs) {
1834 ConstantInteger32 *Loc = 1820 ConstantInteger32 *Loc =
1835 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); 1821 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1836 Type Ty = StackArg.first->getType(); 1822 Type Ty = StackArg.first->getType();
1837 OperandARM32Mem *Addr; 1823 OperandARM32Mem *Addr;
1838 constexpr bool SignExt = false; 1824 constexpr bool SignExt = false;
1839 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { 1825 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1840 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); 1826 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1841 } else { 1827 } else {
1842 Variable *NewBase = Func->makeVariable(SP->getType()); 1828 Variable *NewBase = Func->makeVariable(SP->getType());
1843 lowerArithmetic( 1829 lowerArithmetic(
1844 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); 1830 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1845 Addr = formMemoryOperand(NewBase, Ty); 1831 Addr = formMemoryOperand(NewBase, Ty);
1846 } 1832 }
1847 lowerStore(InstStore::create(Func, StackArg.first, Addr)); 1833 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1848 } 1834 }
1849 1835
1850 // Copy arguments to be passed in registers to the appropriate registers. 1836 // Copy arguments to be passed in registers to the appropriate registers.
1851 for (auto &GPRArg : GPRArgs) { 1837 for (auto &GPRArg : GPRArgs) {
1852 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); 1838 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
1853 // Generate a FakeUse of register arguments so that they do not get 1839 // Generate a FakeUse of register arguments so that they do not get dead
1854 // dead code eliminated as a result of the FakeKill of scratch 1840 // code eliminated as a result of the FakeKill of scratch registers after
1855 // registers after the call. 1841 // the call.
1856 Context.insert(InstFakeUse::create(Func, Reg)); 1842 Context.insert(InstFakeUse::create(Func, Reg));
1857 } 1843 }
1858 for (auto &FPArg : FPArgs) { 1844 for (auto &FPArg : FPArgs) {
1859 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); 1845 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
1860 Context.insert(InstFakeUse::create(Func, Reg)); 1846 Context.insert(InstFakeUse::create(Func, Reg));
1861 } 1847 }
1862 1848
1863 // Generate the call instruction. Assign its result to a temporary 1849 // Generate the call instruction. Assign its result to a temporary with high
1864 // with high register allocation weight. 1850 // register allocation weight.
1865 Variable *Dest = Instr->getDest(); 1851 Variable *Dest = Instr->getDest();
1866 // ReturnReg doubles as ReturnRegLo as necessary. 1852 // ReturnReg doubles as ReturnRegLo as necessary.
1867 Variable *ReturnReg = nullptr; 1853 Variable *ReturnReg = nullptr;
1868 Variable *ReturnRegHi = nullptr; 1854 Variable *ReturnRegHi = nullptr;
1869 if (Dest) { 1855 if (Dest) {
1870 switch (Dest->getType()) { 1856 switch (Dest->getType()) {
1871 case IceType_NUM: 1857 case IceType_NUM:
1872 llvm_unreachable("Invalid Call dest type"); 1858 llvm_unreachable("Invalid Call dest type");
1873 break; 1859 break;
1874 case IceType_void: 1860 case IceType_void:
(...skipping 19 matching lines...) Expand all
1894 case IceType_v16i1: 1880 case IceType_v16i1:
1895 case IceType_v16i8: 1881 case IceType_v16i8:
1896 case IceType_v8i16: 1882 case IceType_v8i16:
1897 case IceType_v4i32: 1883 case IceType_v4i32:
1898 case IceType_v4f32: 1884 case IceType_v4f32:
1899 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); 1885 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
1900 break; 1886 break;
1901 } 1887 }
1902 } 1888 }
1903 Operand *CallTarget = Instr->getCallTarget(); 1889 Operand *CallTarget = Instr->getCallTarget();
1904 // TODO(jvoung): Handle sandboxing. 1890 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
1905 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); 1891 // Ctx->getFlags().getUseSandboxing();
1906 1892
1907 // Allow ConstantRelocatable to be left alone as a direct call, 1893 // Allow ConstantRelocatable to be left alone as a direct call, but force
1908 // but force other constants like ConstantInteger32 to be in 1894 // other constants like ConstantInteger32 to be in a register and make it an
1909 // a register and make it an indirect call. 1895 // indirect call.
1910 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { 1896 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1911 CallTarget = legalize(CallTarget, Legal_Reg); 1897 CallTarget = legalize(CallTarget, Legal_Reg);
1912 } 1898 }
1913 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); 1899 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1914 Context.insert(NewCall); 1900 Context.insert(NewCall);
1915 if (ReturnRegHi) 1901 if (ReturnRegHi)
1916 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1902 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1917 1903
1918 // Add the appropriate offset to SP. The call instruction takes care 1904 // Add the appropriate offset to SP. The call instruction takes care of
1919 // of resetting the stack offset during emission. 1905 // resetting the stack offset during emission.
1920 if (ParameterAreaSizeBytes) { 1906 if (ParameterAreaSizeBytes) {
1921 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), 1907 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1922 Legal_Reg | Legal_Flex); 1908 Legal_Reg | Legal_Flex);
1923 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1909 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1924 _add(SP, SP, AddAmount); 1910 _add(SP, SP, AddAmount);
1925 } 1911 }
1926 1912
1927 // Insert a register-kill pseudo instruction. 1913 // Insert a register-kill pseudo instruction.
1928 Context.insert(InstFakeKill::create(Func, NewCall)); 1914 Context.insert(InstFakeKill::create(Func, NewCall));
1929 1915
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
2017 } 2003 }
2018 case InstCast::Zext: { 2004 case InstCast::Zext: {
2019 if (isVectorType(Dest->getType())) { 2005 if (isVectorType(Dest->getType())) {
2020 UnimplementedError(Func->getContext()->getFlags()); 2006 UnimplementedError(Func->getContext()->getFlags());
2021 } else if (Dest->getType() == IceType_i64) { 2007 } else if (Dest->getType() == IceType_i64) {
2022 // t1=uxtb src; dst.lo=t1; dst.hi=0 2008 // t1=uxtb src; dst.lo=t1; dst.hi=0
2023 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2009 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2024 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2025 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2026 Variable *T_Lo = makeReg(DestLo->getType()); 2012 Variable *T_Lo = makeReg(DestLo->getType());
2027 // i32 and i1 can just take up the whole register. 2013 // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
2028 // i32 doesn't need uxt, while i1 will have an and mask later anyway. 2014 // while i1 will have an and mask later anyway.
2029 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { 2015 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
2030 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 2016 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2031 _mov(T_Lo, Src0RF); 2017 _mov(T_Lo, Src0RF);
2032 } else { 2018 } else {
2033 Variable *Src0R = legalizeToReg(Src0); 2019 Variable *Src0R = legalizeToReg(Src0);
2034 _uxt(T_Lo, Src0R); 2020 _uxt(T_Lo, Src0R);
2035 } 2021 }
2036 if (Src0->getType() == IceType_i1) { 2022 if (Src0->getType() == IceType_i1) {
2037 Constant *One = Ctx->getConstantInt32(1); 2023 Constant *One = Ctx->getConstantInt32(1);
2038 _and(T_Lo, T_Lo, One); 2024 _and(T_Lo, T_Lo, One);
2039 } 2025 }
2040 _mov(DestLo, T_Lo); 2026 _mov(DestLo, T_Lo);
2041 Variable *T_Hi = makeReg(DestLo->getType()); 2027 Variable *T_Hi = makeReg(DestLo->getType());
2042 _mov(T_Hi, Zero); 2028 _mov(T_Hi, Zero);
2043 _mov(DestHi, T_Hi); 2029 _mov(DestHi, T_Hi);
2044 } else if (Src0->getType() == IceType_i1) { 2030 } else if (Src0->getType() == IceType_i1) {
2045 // t = Src0; t &= 1; Dest = t 2031 // t = Src0; t &= 1; Dest = t
2046 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 2032 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2047 Constant *One = Ctx->getConstantInt32(1); 2033 Constant *One = Ctx->getConstantInt32(1);
2048 Variable *T = makeReg(Dest->getType()); 2034 Variable *T = makeReg(Dest->getType());
2049 // Just use _mov instead of _uxt since all registers are 32-bit. 2035 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
2050 // _uxt requires the source to be a register so could have required 2036 // requires the source to be a register so could have required a _mov
2051 // a _mov from legalize anyway. 2037 // from legalize anyway.
2052 _mov(T, Src0RF); 2038 _mov(T, Src0RF);
2053 _and(T, T, One); 2039 _and(T, T, One);
2054 _mov(Dest, T); 2040 _mov(Dest, T);
2055 } else { 2041 } else {
2056 // t1 = uxt src; dst = t1 2042 // t1 = uxt src; dst = t1
2057 Variable *Src0R = legalizeToReg(Src0); 2043 Variable *Src0R = legalizeToReg(Src0);
2058 Variable *T = makeReg(Dest->getType()); 2044 Variable *T = makeReg(Dest->getType());
2059 _uxt(T, Src0R); 2045 _uxt(T, Src0R);
2060 _mov(Dest, T); 2046 _mov(Dest, T);
2061 } 2047 }
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after
2205 } 2191 }
2206 2192
2207 // a=icmp cond, b, c ==> 2193 // a=icmp cond, b, c ==>
2208 // GCC does: 2194 // GCC does:
2209 // cmp b.hi, c.hi or cmp b.lo, c.lo 2195 // cmp b.hi, c.hi or cmp b.lo, c.lo
2210 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi 2196 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2211 // mov.<C1> t, #1 mov.<C1> t, #1 2197 // mov.<C1> t, #1 mov.<C1> t, #1
2212 // mov.<C2> t, #0 mov.<C2> t, #0 2198 // mov.<C2> t, #0 mov.<C2> t, #0
2213 // mov a, t mov a, t 2199 // mov a, t mov a, t
2214 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" 2200 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2215 // is used for signed compares. In some cases, b and c need to be swapped 2201 // is used for signed compares. In some cases, b and c need to be swapped as
2216 // as well. 2202 // well.
2217 // 2203 //
2218 // LLVM does: 2204 // LLVM does:
2219 // for EQ and NE: 2205 // for EQ and NE:
2220 // eor t1, b.hi, c.hi 2206 // eor t1, b.hi, c.hi
2221 // eor t2, b.lo, c.hi 2207 // eor t2, b.lo, c.hi
2222 // orrs t, t1, t2 2208 // orrs t, t1, t2
2223 // mov.<C> t, #1 2209 // mov.<C> t, #1
2224 // mov a, t 2210 // mov a, t
2225 // 2211 //
2226 // that's nice in that it's just as short but has fewer dependencies 2212 // that's nice in that it's just as short but has fewer dependencies for
2227 // for better ILP at the cost of more registers. 2213 // better ILP at the cost of more registers.
2228 // 2214 //
2229 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with 2215 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2230 // two unconditional mov #0, two cmps, two conditional mov #1, 2216 // unconditional mov #0, two cmps, two conditional mov #1, and one conditonal
Jim Stichnoth 2015/09/16 00:01:29 conditional
ascull 2015/09/16 18:30:09 Done.
2231 // and one conditonal reg mov. That has few dependencies for good ILP, 2217 // reg mov. That has few dependencies for good ILP, but is a longer sequence.
2232 // but is a longer sequence.
2233 // 2218 //
2234 // So, we are going with the GCC version since it's usually better (except 2219 // So, we are going with the GCC version since it's usually better (except
2235 // perhaps for eq/ne). We could revisit special-casing eq/ne later. 2220 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
2236 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2221 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2237 Constant *One = Ctx->getConstantInt32(1); 2222 Constant *One = Ctx->getConstantInt32(1);
2238 if (Src0->getType() == IceType_i64) { 2223 if (Src0->getType() == IceType_i64) {
2239 InstIcmp::ICond Conditon = Inst->getCondition(); 2224 InstIcmp::ICond Conditon = Inst->getCondition();
2240 size_t Index = static_cast<size_t>(Conditon); 2225 size_t Index = static_cast<size_t>(Conditon);
2241 assert(Index < llvm::array_lengthof(TableIcmp64)); 2226 assert(Index < llvm::array_lengthof(TableIcmp64));
2242 Variable *Src0Lo, *Src0Hi; 2227 Variable *Src0Lo, *Src0Hi;
2243 Operand *Src1LoRF, *Src1HiRF; 2228 Operand *Src1LoRF, *Src1HiRF;
2244 if (TableIcmp64[Index].Swapped) { 2229 if (TableIcmp64[Index].Swapped) {
2245 Src0Lo = legalizeToReg(loOperand(Src1)); 2230 Src0Lo = legalizeToReg(loOperand(Src1));
2246 Src0Hi = legalizeToReg(hiOperand(Src1)); 2231 Src0Hi = legalizeToReg(hiOperand(Src1));
2247 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 2232 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2248 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); 2233 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2249 } else { 2234 } else {
2250 Src0Lo = legalizeToReg(loOperand(Src0)); 2235 Src0Lo = legalizeToReg(loOperand(Src0));
2251 Src0Hi = legalizeToReg(hiOperand(Src0)); 2236 Src0Hi = legalizeToReg(hiOperand(Src0));
2252 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); 2237 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2253 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); 2238 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2254 } 2239 }
2255 Variable *T = makeReg(IceType_i32); 2240 Variable *T = makeReg(IceType_i32);
2256 if (TableIcmp64[Index].IsSigned) { 2241 if (TableIcmp64[Index].IsSigned) {
2257 Variable *ScratchReg = makeReg(IceType_i32); 2242 Variable *ScratchReg = makeReg(IceType_i32);
2258 _cmp(Src0Lo, Src1LoRF); 2243 _cmp(Src0Lo, Src1LoRF);
2259 _sbcs(ScratchReg, Src0Hi, Src1HiRF); 2244 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2260 // ScratchReg isn't going to be used, but we need the 2245 // ScratchReg isn't going to be used, but we need the side-effect of
2261 // side-effect of setting flags from this operation. 2246 // setting flags from this operation.
2262 Context.insert(InstFakeUse::create(Func, ScratchReg)); 2247 Context.insert(InstFakeUse::create(Func, ScratchReg));
2263 } else { 2248 } else {
2264 _cmp(Src0Hi, Src1HiRF); 2249 _cmp(Src0Hi, Src1HiRF);
2265 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); 2250 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2266 } 2251 }
2267 _mov(T, One, TableIcmp64[Index].C1); 2252 _mov(T, One, TableIcmp64[Index].C1);
2268 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2); 2253 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
2269 _mov(Dest, T); 2254 _mov(Dest, T);
2270 return; 2255 return;
2271 } 2256 }
2272 2257
2273 // a=icmp cond b, c ==> 2258 // a=icmp cond b, c ==>
2274 // GCC does: 2259 // GCC does:
2275 // <u/s>xtb tb, b 2260 // <u/s>xtb tb, b
2276 // <u/s>xtb tc, c 2261 // <u/s>xtb tc, c
2277 // cmp tb, tc 2262 // cmp tb, tc
2278 // mov.C1 t, #0 2263 // mov.C1 t, #0
2279 // mov.C2 t, #1 2264 // mov.C2 t, #1
2280 // mov a, t 2265 // mov a, t
2281 // where the unsigned/sign extension is not needed for 32-bit. 2266 // where the unsigned/sign extension is not needed for 32-bit. They also have
2282 // They also have special cases for EQ and NE. E.g., for NE: 2267 // special cases for EQ and NE. E.g., for NE:
2283 // <extend to tb, tc> 2268 // <extend to tb, tc>
2284 // subs t, tb, tc 2269 // subs t, tb, tc
2285 // movne t, #1 2270 // movne t, #1
2286 // mov a, t 2271 // mov a, t
2287 // 2272 //
2288 // LLVM does: 2273 // LLVM does:
2289 // lsl tb, b, #<N> 2274 // lsl tb, b, #<N>
2290 // mov t, #0 2275 // mov t, #0
2291 // cmp tb, c, lsl #<N> 2276 // cmp tb, c, lsl #<N>
2292 // mov.<C> t, #1 2277 // mov.<C> t, #1
2293 // mov a, t 2278 // mov a, t
2294 // 2279 //
2295 // the left shift is by 0, 16, or 24, which allows the comparison to focus 2280 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
2296 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned). 2281 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2297 // For the unsigned case, for some reason it does similar to GCC and does 2282 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2298 // a uxtb first. It's not clear to me why that special-casing is needed. 2283 // first. It's not clear to me why that special-casing is needed.
2299 // 2284 //
2300 // We'll go with the LLVM way for now, since it's shorter and has just as 2285 // We'll go with the LLVM way for now, since it's shorter and has just as few
2301 // few dependencies. 2286 // dependencies.
2302 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); 2287 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
2303 assert(ShiftAmt >= 0); 2288 assert(ShiftAmt >= 0);
2304 Constant *ShiftConst = nullptr; 2289 Constant *ShiftConst = nullptr;
2305 Variable *Src0R = nullptr; 2290 Variable *Src0R = nullptr;
2306 Variable *T = makeReg(IceType_i32); 2291 Variable *T = makeReg(IceType_i32);
2307 if (ShiftAmt) { 2292 if (ShiftAmt) {
2308 ShiftConst = Ctx->getConstantInt32(ShiftAmt); 2293 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
2309 Src0R = makeReg(IceType_i32); 2294 Src0R = makeReg(IceType_i32);
2310 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); 2295 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);
2311 } else { 2296 } else {
(...skipping 22 matching lines...) Expand all
2334 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2319 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2335 switch (Instr->getIntrinsicInfo().ID) { 2320 switch (Instr->getIntrinsicInfo().ID) {
2336 case Intrinsics::AtomicCmpxchg: { 2321 case Intrinsics::AtomicCmpxchg: {
2337 UnimplementedError(Func->getContext()->getFlags()); 2322 UnimplementedError(Func->getContext()->getFlags());
2338 return; 2323 return;
2339 } 2324 }
2340 case Intrinsics::AtomicFence: 2325 case Intrinsics::AtomicFence:
2341 UnimplementedError(Func->getContext()->getFlags()); 2326 UnimplementedError(Func->getContext()->getFlags());
2342 return; 2327 return;
2343 case Intrinsics::AtomicFenceAll: 2328 case Intrinsics::AtomicFenceAll:
2344 // NOTE: FenceAll should prevent and load/store from being moved 2329 // NOTE: FenceAll should prevent and load/store from being moved across the
2345 // across the fence (both atomic and non-atomic). The InstARM32Mfence 2330 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
2346 // instruction is currently marked coarsely as "HasSideEffects". 2331 // currently marked coarsely as "HasSideEffects".
2347 UnimplementedError(Func->getContext()->getFlags()); 2332 UnimplementedError(Func->getContext()->getFlags());
2348 return; 2333 return;
2349 case Intrinsics::AtomicIsLockFree: { 2334 case Intrinsics::AtomicIsLockFree: {
2350 UnimplementedError(Func->getContext()->getFlags()); 2335 UnimplementedError(Func->getContext()->getFlags());
2351 return; 2336 return;
2352 } 2337 }
2353 case Intrinsics::AtomicLoad: { 2338 case Intrinsics::AtomicLoad: {
2354 UnimplementedError(Func->getContext()->getFlags()); 2339 UnimplementedError(Func->getContext()->getFlags());
2355 return; 2340 return;
2356 } 2341 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2394 case Intrinsics::Ctpop: { 2379 case Intrinsics::Ctpop: {
2395 Variable *Dest = Instr->getDest(); 2380 Variable *Dest = Instr->getDest();
2396 Operand *Val = Instr->getArg(0); 2381 Operand *Val = Instr->getArg(0);
2397 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) 2382 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2398 ? H_call_ctpop_i32 2383 ? H_call_ctpop_i32
2399 : H_call_ctpop_i64, 2384 : H_call_ctpop_i64,
2400 Dest, 1); 2385 Dest, 1);
2401 Call->addArg(Val); 2386 Call->addArg(Val);
2402 lowerCall(Call); 2387 lowerCall(Call);
2403 // The popcount helpers always return 32-bit values, while the intrinsic's 2388 // The popcount helpers always return 32-bit values, while the intrinsic's
2404 // signature matches some 64-bit platform's native instructions and 2389 // signature matches some 64-bit platform's native instructions and expect
2405 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest 2390 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
2406 // just in case the user doesn't do that in the IR or doesn't toss the bits 2391 // case the user doesn't do that in the IR or doesn't toss the bits via
2407 // via truncate. 2392 // truncate.
2408 if (Val->getType() == IceType_i64) { 2393 if (Val->getType() == IceType_i64) {
2409 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2394 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2410 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2395 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2411 Variable *T = nullptr; 2396 Variable *T = nullptr;
2412 _mov(T, Zero); 2397 _mov(T, Zero);
2413 _mov(DestHi, T); 2398 _mov(DestHi, T);
2414 } 2399 }
2415 return; 2400 return;
2416 } 2401 }
2417 case Intrinsics::Ctlz: { 2402 case Intrinsics::Ctlz: {
2418 // The "is zero undef" parameter is ignored and we always return 2403 // The "is zero undef" parameter is ignored and we always return a
2419 // a well-defined value. 2404 // well-defined value.
2420 Operand *Val = Instr->getArg(0); 2405 Operand *Val = Instr->getArg(0);
2421 Variable *ValLoR; 2406 Variable *ValLoR;
2422 Variable *ValHiR = nullptr; 2407 Variable *ValHiR = nullptr;
2423 if (Val->getType() == IceType_i64) { 2408 if (Val->getType() == IceType_i64) {
2424 Val = legalizeUndef(Val); 2409 Val = legalizeUndef(Val);
2425 ValLoR = legalizeToReg(loOperand(Val)); 2410 ValLoR = legalizeToReg(loOperand(Val));
2426 ValHiR = legalizeToReg(hiOperand(Val)); 2411 ValHiR = legalizeToReg(hiOperand(Val));
2427 } else { 2412 } else {
2428 ValLoR = legalizeToReg(Val); 2413 ValLoR = legalizeToReg(Val);
2429 } 2414 }
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
2556 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2541 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2557 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2542 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2558 Operand *Zero = 2543 Operand *Zero =
2559 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 2544 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2560 Operand *ThirtyTwo = 2545 Operand *ThirtyTwo =
2561 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); 2546 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2562 _cmp(ValHiR, Zero); 2547 _cmp(ValHiR, Zero);
2563 Variable *T2 = makeReg(IceType_i32); 2548 Variable *T2 = makeReg(IceType_i32);
2564 _add(T2, T, ThirtyTwo); 2549 _add(T2, T, ThirtyTwo);
2565 _clz(T2, ValHiR, CondARM32::NE); 2550 _clz(T2, ValHiR, CondARM32::NE);
2566 // T2 is actually a source as well when the predicate is not AL 2551 // T2 is actually a source as well when the predicate is not AL (since it
2567 // (since it may leave T2 alone). We use set_dest_nonkillable to 2552 // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness
2568 // prolong the liveness of T2 as if it was used as a source. 2553 // of T2 as if it was used as a source.
2569 _set_dest_nonkillable(); 2554 _set_dest_nonkillable();
2570 _mov(DestLo, T2); 2555 _mov(DestLo, T2);
2571 Variable *T3 = nullptr; 2556 Variable *T3 = nullptr;
2572 _mov(T3, Zero); 2557 _mov(T3, Zero);
2573 _mov(DestHi, T3); 2558 _mov(DestHi, T3);
2574 return; 2559 return;
2575 } 2560 }
2576 _mov(Dest, T); 2561 _mov(Dest, T);
2577 return; 2562 return;
2578 } 2563 }
2579 2564
2580 void TargetARM32::lowerLoad(const InstLoad *Load) { 2565 void TargetARM32::lowerLoad(const InstLoad *Load) {
2581 // A Load instruction can be treated the same as an Assign 2566 // A Load instruction can be treated the same as an Assign instruction, after
2582 // instruction, after the source operand is transformed into an 2567 // the source operand is transformed into an OperandARM32Mem operand.
2583 // OperandARM32Mem operand.
2584 Type Ty = Load->getDest()->getType(); 2568 Type Ty = Load->getDest()->getType();
2585 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 2569 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
2586 Variable *DestLoad = Load->getDest(); 2570 Variable *DestLoad = Load->getDest();
2587 2571
2588 // TODO(jvoung): handled folding opportunities. Sign and zero extension 2572 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
2589 // can be folded into a load. 2573 // be folded into a load.
2590 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 2574 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
2591 lowerAssign(Assign); 2575 lowerAssign(Assign);
2592 } 2576 }
2593 2577
2594 void TargetARM32::doAddressOptLoad() { 2578 void TargetARM32::doAddressOptLoad() {
2595 UnimplementedError(Func->getContext()->getFlags()); 2579 UnimplementedError(Func->getContext()->getFlags());
2596 } 2580 }
2597 2581
2598 void TargetARM32::randomlyInsertNop(float Probability, 2582 void TargetARM32::randomlyInsertNop(float Probability,
2599 RandomNumberGenerator &RNG) { 2583 RandomNumberGenerator &RNG) {
(...skipping 25 matching lines...) Expand all
2625 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); 2609 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
2626 Reg = D0; 2610 Reg = D0;
2627 } else if (isVectorType(Src0->getType())) { 2611 } else if (isVectorType(Src0->getType())) {
2628 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); 2612 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
2629 Reg = Q0; 2613 Reg = Q0;
2630 } else { 2614 } else {
2631 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); 2615 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
2632 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); 2616 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
2633 } 2617 }
2634 } 2618 }
2635 // Add a ret instruction even if sandboxing is enabled, because 2619 // Add a ret instruction even if sandboxing is enabled, because addEpilog
2636 // addEpilog explicitly looks for a ret instruction as a marker for 2620 // explicitly looks for a ret instruction as a marker for where to insert the
2637 // where to insert the frame removal instructions. 2621 // frame removal instructions. addEpilog is responsible for restoring the
2638 // addEpilog is responsible for restoring the "lr" register as needed 2622 // "lr" register as needed prior to this ret instruction.
2639 // prior to this ret instruction.
2640 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); 2623 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
2641 // Add a fake use of sp to make sure sp stays alive for the entire 2624 // Add a fake use of sp to make sure sp stays alive for the entire function.
2642 // function. Otherwise post-call sp adjustments get dead-code 2625 // Otherwise post-call sp adjustments get dead-code eliminated.
2643 // eliminated. TODO: Are there more places where the fake use 2626 // TODO: Are there more places where the fake use should be inserted? E.g.
2644 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 2627 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
2645 // have a ret instruction.
2646 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 2628 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2647 Context.insert(InstFakeUse::create(Func, SP)); 2629 Context.insert(InstFakeUse::create(Func, SP));
2648 } 2630 }
2649 2631
2650 void TargetARM32::lowerSelect(const InstSelect *Inst) { 2632 void TargetARM32::lowerSelect(const InstSelect *Inst) {
2651 Variable *Dest = Inst->getDest(); 2633 Variable *Dest = Inst->getDest();
2652 Type DestTy = Dest->getType(); 2634 Type DestTy = Dest->getType();
2653 Operand *SrcT = Inst->getTrueOperand(); 2635 Operand *SrcT = Inst->getTrueOperand();
2654 Operand *SrcF = Inst->getFalseOperand(); 2636 Operand *SrcF = Inst->getFalseOperand();
2655 Operand *Condition = Inst->getCondition(); 2637 Operand *Condition = Inst->getCondition();
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
2769 } 2751 }
2770 2752
2771 // Helper for legalize() to emit the right code to lower an operand to a 2753 // Helper for legalize() to emit the right code to lower an operand to a
2772 // register of the appropriate type. 2754 // register of the appropriate type.
2773 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { 2755 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
2774 Type Ty = Src->getType(); 2756 Type Ty = Src->getType();
2775 Variable *Reg = makeReg(Ty, RegNum); 2757 Variable *Reg = makeReg(Ty, RegNum);
2776 if (isVectorType(Ty) || isFloatingType(Ty)) { 2758 if (isVectorType(Ty) || isFloatingType(Ty)) {
2777 _vmov(Reg, Src); 2759 _vmov(Reg, Src);
2778 } else { 2760 } else {
2779 // Mov's Src operand can really only be the flexible second operand type 2761 // Mov's Src operand can really only be the flexible second operand type or
2780 // or a register. Users should guarantee that. 2762 // a register. Users should guarantee that.
2781 _mov(Reg, Src); 2763 _mov(Reg, Src);
2782 } 2764 }
2783 return Reg; 2765 return Reg;
2784 } 2766 }
2785 2767
2786 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, 2768 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
2787 int32_t RegNum) { 2769 int32_t RegNum) {
2788 Type Ty = From->getType(); 2770 Type Ty = From->getType();
2789 // Assert that a physical register is allowed. To date, all calls 2771 // Assert that a physical register is allowed. To date, all calls to
2790 // to legalize() allow a physical register. Legal_Flex converts 2772 // legalize() allow a physical register. Legal_Flex converts registers to the
2791 // registers to the right type OperandARM32FlexReg as needed. 2773 // right type OperandARM32FlexReg as needed.
2792 assert(Allowed & Legal_Reg); 2774 assert(Allowed & Legal_Reg);
2793 // Go through the various types of operands: 2775 // Go through the various types of operands: OperandARM32Mem,
2794 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable. 2776 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
2795 // Given the above assertion, if type of operand is not legal 2777 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
2796 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy 2778 // can always copy to a register.
2797 // to a register.
2798 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { 2779 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
2799 // Before doing anything with a Mem operand, we need to ensure 2780 // Before doing anything with a Mem operand, we need to ensure that the
2800 // that the Base and Index components are in physical registers. 2781 // Base and Index components are in physical registers.
2801 Variable *Base = Mem->getBase(); 2782 Variable *Base = Mem->getBase();
2802 Variable *Index = Mem->getIndex(); 2783 Variable *Index = Mem->getIndex();
2803 Variable *RegBase = nullptr; 2784 Variable *RegBase = nullptr;
2804 Variable *RegIndex = nullptr; 2785 Variable *RegIndex = nullptr;
2805 if (Base) { 2786 if (Base) {
2806 RegBase = legalizeToReg(Base); 2787 RegBase = legalizeToReg(Base);
2807 } 2788 }
2808 if (Index) { 2789 if (Index) {
2809 RegIndex = legalizeToReg(Index); 2790 RegIndex = legalizeToReg(Index);
2810 } 2791 }
(...skipping 24 matching lines...) Expand all
2835 From = Mem; 2816 From = Mem;
2836 } 2817 }
2837 return From; 2818 return From;
2838 } 2819 }
2839 2820
2840 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { 2821 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2841 if (!(Allowed & Legal_Flex)) { 2822 if (!(Allowed & Legal_Flex)) {
2842 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { 2823 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2843 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { 2824 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2844 From = FlexReg->getReg(); 2825 From = FlexReg->getReg();
2845 // Fall through and let From be checked as a Variable below, 2826 // Fall through and let From be checked as a Variable below, where it
2846 // where it may or may not need a register. 2827 // may or may not need a register.
2847 } else { 2828 } else {
2848 return copyToReg(Flex, RegNum); 2829 return copyToReg(Flex, RegNum);
2849 } 2830 }
2850 } else { 2831 } else {
2851 return copyToReg(Flex, RegNum); 2832 return copyToReg(Flex, RegNum);
2852 } 2833 }
2853 } else { 2834 } else {
2854 return From; 2835 return From;
2855 } 2836 }
2856 } 2837 }
2857 2838
2858 if (llvm::isa<Constant>(From)) { 2839 if (llvm::isa<Constant>(From)) {
2859 if (llvm::isa<ConstantUndef>(From)) { 2840 if (llvm::isa<ConstantUndef>(From)) {
2860 From = legalizeUndef(From, RegNum); 2841 From = legalizeUndef(From, RegNum);
2861 if (isVectorType(Ty)) 2842 if (isVectorType(Ty))
2862 return From; 2843 return From;
2863 } 2844 }
2864 // There should be no constants of vector type (other than undef). 2845 // There should be no constants of vector type (other than undef).
2865 assert(!isVectorType(Ty)); 2846 assert(!isVectorType(Ty));
2866 bool CanBeFlex = Allowed & Legal_Flex; 2847 bool CanBeFlex = Allowed & Legal_Flex;
2867 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { 2848 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
2868 uint32_t RotateAmt; 2849 uint32_t RotateAmt;
2869 uint32_t Immed_8; 2850 uint32_t Immed_8;
2870 uint32_t Value = static_cast<uint32_t>(C32->getValue()); 2851 uint32_t Value = static_cast<uint32_t>(C32->getValue());
2871 // Check if the immediate will fit in a Flexible second operand, 2852 // Check if the immediate will fit in a Flexible second operand, if a
2872 // if a Flexible second operand is allowed. We need to know the exact 2853 // Flexible second operand is allowed. We need to know the exact value,
2873 // value, so that rules out relocatable constants. 2854 // so that rules out relocatable constants. Also try the inverse and use
2874 // Also try the inverse and use MVN if possible. 2855 // MVN if possible.
2875 if (CanBeFlex && 2856 if (CanBeFlex &&
2876 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { 2857 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
2877 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 2858 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
2878 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( 2859 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2879 ~Value, &RotateAmt, &Immed_8)) { 2860 ~Value, &RotateAmt, &Immed_8)) {
2880 auto InvertedFlex = 2861 auto InvertedFlex =
2881 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 2862 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
2882 Variable *Reg = makeReg(Ty, RegNum); 2863 Variable *Reg = makeReg(Ty, RegNum);
2883 _mvn(Reg, InvertedFlex); 2864 _mvn(Reg, InvertedFlex);
2884 return Reg; 2865 return Reg;
2885 } else { 2866 } else {
2886 // Do a movw/movt to a register. 2867 // Do a movw/movt to a register.
2887 Variable *Reg = makeReg(Ty, RegNum); 2868 Variable *Reg = makeReg(Ty, RegNum);
2888 uint32_t UpperBits = (Value >> 16) & 0xFFFF; 2869 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2889 _movw(Reg, 2870 _movw(Reg,
2890 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); 2871 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2891 if (UpperBits != 0) { 2872 if (UpperBits != 0) {
2892 _movt(Reg, Ctx->getConstantInt32(UpperBits)); 2873 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2893 } 2874 }
2894 return Reg; 2875 return Reg;
2895 } 2876 }
2896 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 2877 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
2897 Variable *Reg = makeReg(Ty, RegNum); 2878 Variable *Reg = makeReg(Ty, RegNum);
2898 _movw(Reg, C); 2879 _movw(Reg, C);
2899 _movt(Reg, C); 2880 _movt(Reg, C);
2900 return Reg; 2881 return Reg;
2901 } else { 2882 } else {
2902 assert(isScalarFloatingType(Ty)); 2883 assert(isScalarFloatingType(Ty));
2903 // Load floats/doubles from literal pool. 2884 // Load floats/doubles from literal pool.
2904 // TODO(jvoung): Allow certain immediates to be encoded directly in 2885 // TODO(jvoung): Allow certain immediates to be encoded directly in an
2905 // an operand. See Table A7-18 of the ARM manual: 2886 // operand. See Table A7-18 of the ARM manual: "Floating-point modified
2906 // "Floating-point modified immediate constants". 2887 // immediate constants". Or, for 32-bit floating point numbers, just
2907 // Or, for 32-bit floating point numbers, just encode the raw bits 2888 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,
2908 // into a movw/movt pair to GPR, and vmov to an SREG, instead of using 2889 // instead of using a movw/movt pair to get the const-pool address then
2909 // a movw/movt pair to get the const-pool address then loading to SREG. 2890 // loading to SREG.
2910 std::string Buffer; 2891 std::string Buffer;
2911 llvm::raw_string_ostream StrBuf(Buffer); 2892 llvm::raw_string_ostream StrBuf(Buffer);
2912 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); 2893 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
2913 llvm::cast<Constant>(From)->setShouldBePooled(true); 2894 llvm::cast<Constant>(From)->setShouldBePooled(true);
2914 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 2895 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
2915 Variable *BaseReg = makeReg(getPointerType()); 2896 Variable *BaseReg = makeReg(getPointerType());
2916 _movw(BaseReg, Offset); 2897 _movw(BaseReg, Offset);
2917 _movt(BaseReg, Offset); 2898 _movt(BaseReg, Offset);
2918 From = formMemoryOperand(BaseReg, Ty); 2899 From = formMemoryOperand(BaseReg, Ty);
2919 return copyToReg(From, RegNum); 2900 return copyToReg(From, RegNum);
2920 } 2901 }
2921 } 2902 }
2922 2903
2923 if (auto Var = llvm::dyn_cast<Variable>(From)) { 2904 if (auto Var = llvm::dyn_cast<Variable>(From)) {
2924 // Check if the variable is guaranteed a physical register. This 2905 // Check if the variable is guaranteed a physical register. This can happen
2925 // can happen either when the variable is pre-colored or when it is 2906 // either when the variable is pre-colored or when it is assigned infinite
2926 // assigned infinite weight. 2907 // weight.
2927 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 2908 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
2928 // We need a new physical register for the operand if: 2909 // We need a new physical register for the operand if:
2929 // Mem is not allowed and Var isn't guaranteed a physical 2910 // Mem is not allowed and Var isn't guaranteed a physical
2930 // register, or 2911 // register, or
2931 // RegNum is required and Var->getRegNum() doesn't match. 2912 // RegNum is required and Var->getRegNum() doesn't match.
2932 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 2913 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
2933 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 2914 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
2934 From = copyToReg(From, RegNum); 2915 From = copyToReg(From, RegNum);
2935 } 2916 }
2936 return From; 2917 return From;
2937 } 2918 }
2938 llvm_unreachable("Unhandled operand kind in legalize()"); 2919 llvm_unreachable("Unhandled operand kind in legalize()");
2939 2920
2940 return From; 2921 return From;
2941 } 2922 }
2942 2923
2943 /// Provide a trivial wrapper to legalize() for this common usage. 2924 /// Provide a trivial wrapper to legalize() for this common usage.
2944 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) { 2925 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) {
2945 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 2926 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
2946 } 2927 }
2947 2928
2948 /// Legalize undef values to concrete values. 2929 /// Legalize undef values to concrete values.
2949 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) { 2930 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
2950 Type Ty = From->getType(); 2931 Type Ty = From->getType();
2951 if (llvm::isa<ConstantUndef>(From)) { 2932 if (llvm::isa<ConstantUndef>(From)) {
2952 // Lower undefs to zero. Another option is to lower undefs to an 2933 // Lower undefs to zero. Another option is to lower undefs to an
2953 // uninitialized register; however, using an uninitialized register 2934 // uninitialized register; however, using an uninitialized register results
2954 // results in less predictable code. 2935 // in less predictable code.
2955 // 2936 //
2956 // If in the future the implementation is changed to lower undef 2937 // If in the future the implementation is changed to lower undef values to
2957 // values to uninitialized registers, a FakeDef will be needed: 2938 // uninitialized registers, a FakeDef will be needed:
2958 // Context.insert(InstFakeDef::create(Func, Reg)); 2939 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
2959 // This is in order to ensure that the live range of Reg is not 2940 // ensure that the live range of Reg is not overestimated. If the constant
2960 // overestimated. If the constant being lowered is a 64 bit value, 2941 // being lowered is a 64 bit value, then the result should be split and the
2961 // then the result should be split and the lo and hi components will 2942 // lo and hi components will need to go in uninitialized registers.
2962 // need to go in uninitialized registers.
2963 if (isVectorType(Ty)) 2943 if (isVectorType(Ty))
2964 return makeVectorOfZeros(Ty, RegNum); 2944 return makeVectorOfZeros(Ty, RegNum);
2965 return Ctx->getConstantZero(Ty); 2945 return Ctx->getConstantZero(Ty);
2966 } 2946 }
2967 return From; 2947 return From;
2968 } 2948 }
2969 2949
2970 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { 2950 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
2971 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); 2951 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
2972 // It may be the case that address mode optimization already creates 2952 // It may be the case that address mode optimization already creates an
2973 // an OperandARM32Mem, so in that case it wouldn't need another level 2953 // OperandARM32Mem, so in that case it wouldn't need another level of
2974 // of transformation. 2954 // transformation.
2975 if (Mem) { 2955 if (Mem) {
2976 return llvm::cast<OperandARM32Mem>(legalize(Mem)); 2956 return llvm::cast<OperandARM32Mem>(legalize(Mem));
2977 } 2957 }
2978 // If we didn't do address mode optimization, then we only 2958 // If we didn't do address mode optimization, then we only have a base/offset
2979 // have a base/offset to work with. ARM always requires a base 2959 // to work with. ARM always requires a base register, so just use that to
2980 // register, so just use that to hold the operand. 2960 // hold the operand.
2981 Variable *Base = legalizeToReg(Operand); 2961 Variable *Base = legalizeToReg(Operand);
2982 return OperandARM32Mem::create( 2962 return OperandARM32Mem::create(
2983 Func, Ty, Base, 2963 Func, Ty, Base,
2984 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 2964 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
2985 } 2965 }
2986 2966
2987 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { 2967 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
2988 // There aren't any 64-bit integer registers for ARM32. 2968 // There aren't any 64-bit integer registers for ARM32.
2989 assert(Type != IceType_i64); 2969 assert(Type != IceType_i64);
2990 Variable *Reg = Func->makeVariable(Type); 2970 Variable *Reg = Func->makeVariable(Type);
2991 if (RegNum == Variable::NoRegister) 2971 if (RegNum == Variable::NoRegister)
2992 Reg->setMustHaveReg(); 2972 Reg->setMustHaveReg();
2993 else 2973 else
2994 Reg->setRegNum(RegNum); 2974 Reg->setRegNum(RegNum);
2995 return Reg; 2975 return Reg;
2996 } 2976 }
2997 2977
2998 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { 2978 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
2999 assert(llvm::isPowerOf2_32(Align)); 2979 assert(llvm::isPowerOf2_32(Align));
3000 uint32_t RotateAmt; 2980 uint32_t RotateAmt;
3001 uint32_t Immed_8; 2981 uint32_t Immed_8;
3002 Operand *Mask; 2982 Operand *Mask;
3003 // Use AND or BIC to mask off the bits, depending on which immediate fits 2983 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
3004 // (if it fits at all). Assume Align is usually small, in which case BIC 2984 // it fits at all). Assume Align is usually small, in which case BIC works
3005 // works better. Thus, this rounds down to the alignment. 2985 // better. Thus, this rounds down to the alignment.
3006 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { 2986 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
3007 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); 2987 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
3008 _bic(Reg, Reg, Mask); 2988 _bic(Reg, Reg, Mask);
3009 } else { 2989 } else {
3010 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); 2990 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
3011 _and(Reg, Reg, Mask); 2991 _and(Reg, Reg, Mask);
3012 } 2992 }
3013 } 2993 }
3014 2994
3015 void TargetARM32::postLower() { 2995 void TargetARM32::postLower() {
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
3087 UnimplementedError(Ctx->getFlags()); 3067 UnimplementedError(Ctx->getFlags());
3088 } 3068 }
3089 3069
3090 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) 3070 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
3091 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} 3071 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
3092 3072
3093 void TargetHeaderARM32::lower() { 3073 void TargetHeaderARM32::lower() {
3094 OstreamLocker L(Ctx); 3074 OstreamLocker L(Ctx);
3095 Ostream &Str = Ctx->getStrEmit(); 3075 Ostream &Str = Ctx->getStrEmit();
3096 Str << ".syntax unified\n"; 3076 Str << ".syntax unified\n";
3097 // Emit build attributes in format: .eabi_attribute TAG, VALUE. 3077 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
3098 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture" 3078 // "Addenda to, and Errata in the ABI for the ARM architecture"
3099 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_adde nda.pdf 3079 // http://infocenter.arm.com
3080 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
3100 // 3081 //
3101 // Tag_conformance should be be emitted first in a file-scope 3082 // Tag_conformance should be be emitted first in a file-scope sub-subsection
3102 // sub-subsection of the first public subsection of the attributes. 3083 // of the first public subsection of the attributes.
3103 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; 3084 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
3104 // Chromebooks are at least A15, but do A9 for higher compat. 3085 // Chromebooks are at least A15, but do A9 for higher compat. For some
3105 // For some reason, the LLVM ARM asm parser has the .cpu directive override 3086 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
3106 // the mattr specified on the commandline. So to test hwdiv, we need to set 3087 // specified on the commandline. So to test hwdiv, we need to set the .cpu
3107 // the .cpu directive higher (can't just rely on --mattr=...). 3088 // directive higher (can't just rely on --mattr=...).
3108 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 3089 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3109 Str << ".cpu cortex-a15\n"; 3090 Str << ".cpu cortex-a15\n";
3110 } else { 3091 } else {
3111 Str << ".cpu cortex-a9\n"; 3092 Str << ".cpu cortex-a9\n";
3112 } 3093 }
3113 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" 3094 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
3114 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; 3095 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
3115 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" 3096 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
3116 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; 3097 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
3117 Str << ".fpu neon\n" 3098 Str << ".fpu neon\n"
(...skipping 11 matching lines...) Expand all
3129 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 3110 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
3130 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 3111 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3131 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 3112 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3132 } 3113 }
3133 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 3114 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3134 // However, for compatibility with current NaCl LLVM, don't claim that. 3115 // However, for compatibility with current NaCl LLVM, don't claim that.
3135 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 3116 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3136 } 3117 }
3137 3118
3138 } // end of namespace Ice 3119 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698