OLD | NEW |
---|---|
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 29 matching lines...) Expand all Loading... | |
40 do { \ | 40 do { \ |
41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ | 41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ |
42 /* Use llvm_unreachable instead of report_fatal_error, which gives \ | 42 /* Use llvm_unreachable instead of report_fatal_error, which gives \ |
43 better stack traces. */ \ | 43 better stack traces. */ \ |
44 llvm_unreachable("Not yet implemented"); \ | 44 llvm_unreachable("Not yet implemented"); \ |
45 abort(); \ | 45 abort(); \ |
46 } \ | 46 } \ |
47 } while (0) | 47 } while (0) |
48 | 48 |
49 // The following table summarizes the logic for lowering the icmp instruction | 49 // The following table summarizes the logic for lowering the icmp instruction |
50 // for i32 and narrower types. Each icmp condition has a clear mapping to an | 50 // for i32 and narrower types. Each icmp condition has a clear mapping to an |
51 // ARM32 conditional move instruction. | 51 // ARM32 conditional move instruction. |
52 | 52 |
53 const struct TableIcmp32_ { | 53 const struct TableIcmp32_ { |
54 CondARM32::Cond Mapping; | 54 CondARM32::Cond Mapping; |
55 } TableIcmp32[] = { | 55 } TableIcmp32[] = { |
56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ | 56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ |
57 { CondARM32::C_32 } \ | 57 { CondARM32::C_32 } \ |
58 , | 58 , |
59 ICMPARM32_TABLE | 59 ICMPARM32_TABLE |
60 #undef X | 60 #undef X |
61 }; | 61 }; |
62 | 62 |
63 // The following table summarizes the logic for lowering the icmp instruction | 63 // The following table summarizes the logic for lowering the icmp instruction |
64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0. | 64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0. |
65 // The operands may need to be swapped, and there is a slight difference | 65 // The operands may need to be swapped, and there is a slight difference for |
66 // for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). | 66 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). |
67 const struct TableIcmp64_ { | 67 const struct TableIcmp64_ { |
68 bool IsSigned; | 68 bool IsSigned; |
69 bool Swapped; | 69 bool Swapped; |
70 CondARM32::Cond C1, C2; | 70 CondARM32::Cond C1, C2; |
71 } TableIcmp64[] = { | 71 } TableIcmp64[] = { |
72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ | 72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ |
73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ | 73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ |
74 , | 74 , |
75 ICMPARM32_TABLE | 75 ICMPARM32_TABLE |
76 #undef X | 76 #undef X |
77 }; | 77 }; |
78 | 78 |
79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { | 79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { |
80 size_t Index = static_cast<size_t>(Cond); | 80 size_t Index = static_cast<size_t>(Cond); |
81 assert(Index < llvm::array_lengthof(TableIcmp32)); | 81 assert(Index < llvm::array_lengthof(TableIcmp32)); |
82 return TableIcmp32[Index].Mapping; | 82 return TableIcmp32[Index].Mapping; |
83 } | 83 } |
84 | 84 |
85 // In some cases, there are x-macros tables for both high-level and | 85 // In some cases, there are x-macros tables for both high-level and low-level |
86 // low-level instructions/operands that use the same enum key value. | 86 // instructions/operands that use the same enum key value. The tables are kept |
87 // The tables are kept separate to maintain a proper separation | 87 // separate to maintain a proper separation between abstraction layers. There |
88 // between abstraction layers. There is a risk that the tables could | 88 // is a risk that the tables could get out of sync if enum values are reordered |
89 // get out of sync if enum values are reordered or if entries are | 89 // or if entries are added or deleted. The following dummy namespaces use |
90 // added or deleted. The following dummy namespaces use | |
91 // static_asserts to ensure everything is kept in sync. | 90 // static_asserts to ensure everything is kept in sync. |
92 | 91 |
93 // Validate the enum values in ICMPARM32_TABLE. | 92 // Validate the enum values in ICMPARM32_TABLE. |
94 namespace dummy1 { | 93 namespace dummy1 { |
95 // Define a temporary set of enum values based on low-level table | 94 // Define a temporary set of enum values based on low-level table entries. |
96 // entries. | |
97 enum _tmp_enum { | 95 enum _tmp_enum { |
98 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, | 96 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, |
99 ICMPARM32_TABLE | 97 ICMPARM32_TABLE |
100 #undef X | 98 #undef X |
101 _num | 99 _num |
102 }; | 100 }; |
103 // Define a set of constants based on high-level table entries. | 101 // Define a set of constants based on high-level table entries. |
104 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | 102 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
105 ICEINSTICMP_TABLE | 103 ICEINSTICMP_TABLE |
106 #undef X | 104 #undef X |
107 // Define a set of constants based on low-level table entries, and | 105 // Define a set of constants based on low-level table entries, and ensure the |
108 // ensure the table entry keys are consistent. | 106 // table entry keys are consistent. |
109 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \ | 107 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \ |
110 static const int _table2_##val = _tmp_##val; \ | 108 static const int _table2_##val = _tmp_##val; \ |
111 static_assert( \ | 109 static_assert( \ |
112 _table1_##val == _table2_##val, \ | 110 _table1_##val == _table2_##val, \ |
113 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); | 111 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); |
114 ICMPARM32_TABLE | 112 ICMPARM32_TABLE |
115 #undef X | 113 #undef X |
116 // Repeat the static asserts with respect to the high-level table | 114 // Repeat the static asserts with respect to the high-level table entries in |
117 // entries in case the high-level table has extra entries. | 115 // case the high-level table has extra entries. |
118 #define X(tag, str) \ | 116 #define X(tag, str) \ |
119 static_assert( \ | 117 static_assert( \ |
120 _table1_##tag == _table2_##tag, \ | 118 _table1_##tag == _table2_##tag, \ |
121 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); | 119 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); |
122 ICEINSTICMP_TABLE | 120 ICEINSTICMP_TABLE |
123 #undef X | 121 #undef X |
124 } // end of namespace dummy1 | 122 } // end of namespace dummy1 |
125 | 123 |
126 // Stack alignment | 124 // Stack alignment |
127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; | 125 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; |
128 | 126 |
129 // Value is in bytes. Return Value adjusted to the next highest multiple | 127 // Value is in bytes. Return Value adjusted to the next highest multiple of the |
130 // of the stack alignment. | 128 // stack alignment. |
131 uint32_t applyStackAlignment(uint32_t Value) { | 129 uint32_t applyStackAlignment(uint32_t Value) { |
132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); | 130 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); |
133 } | 131 } |
134 | 132 |
135 // Value is in bytes. Return Value adjusted to the next highest multiple | 133 // Value is in bytes. Return Value adjusted to the next highest multiple of the |
136 // of the stack alignment required for the given type. | 134 // stack alignment required for the given type. |
137 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { | 135 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { |
138 // Use natural alignment, except that normally (non-NaCl) ARM only | 136 // Use natural alignment, except that normally (non-NaCl) ARM only aligns |
139 // aligns vectors to 8 bytes. | 137 // vectors to 8 bytes. |
140 // TODO(jvoung): Check this ... | 138 // TODO(jvoung): Check this ... |
141 size_t typeAlignInBytes = typeWidthInBytes(Ty); | 139 size_t typeAlignInBytes = typeWidthInBytes(Ty); |
142 if (isVectorType(Ty)) | 140 if (isVectorType(Ty)) |
143 typeAlignInBytes = 8; | 141 typeAlignInBytes = 8; |
144 return Utils::applyAlignment(Value, typeAlignInBytes); | 142 return Utils::applyAlignment(Value, typeAlignInBytes); |
145 } | 143 } |
146 | 144 |
147 // Conservatively check if at compile time we know that the operand is | 145 // Conservatively check if at compile time we know that the operand is |
148 // definitely a non-zero integer. | 146 // definitely a non-zero integer. |
149 bool isGuaranteedNonzeroInt(const Operand *Op) { | 147 bool isGuaranteedNonzeroInt(const Operand *Op) { |
(...skipping 15 matching lines...) Expand all Loading... | |
165 TargetInstructionSet::BaseInstructionSet) { | 163 TargetInstructionSet::BaseInstructionSet) { |
166 InstructionSet = static_cast<ARM32InstructionSet>( | 164 InstructionSet = static_cast<ARM32InstructionSet>( |
167 (Flags.getTargetInstructionSet() - | 165 (Flags.getTargetInstructionSet() - |
168 TargetInstructionSet::ARM32InstructionSet_Begin) + | 166 TargetInstructionSet::ARM32InstructionSet_Begin) + |
169 ARM32InstructionSet::Begin); | 167 ARM32InstructionSet::Begin); |
170 } | 168 } |
171 } | 169 } |
172 | 170 |
173 TargetARM32::TargetARM32(Cfg *Func) | 171 TargetARM32::TargetARM32(Cfg *Func) |
174 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { | 172 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { |
175 // TODO: Don't initialize IntegerRegisters and friends every time. | 173 // TODO: Don't initialize IntegerRegisters and friends every time. Instead, |
176 // Instead, initialize in some sort of static initializer for the | 174 // initialize in some sort of static initializer for the class. |
177 // class. | |
178 // Limit this size (or do all bitsets need to be the same width)??? | 175 // Limit this size (or do all bitsets need to be the same width)??? |
179 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); | 176 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); |
180 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); | 177 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); |
181 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); | 178 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); |
182 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); | 179 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); |
183 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); | 180 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); |
184 ScratchRegs.resize(RegARM32::Reg_NUM); | 181 ScratchRegs.resize(RegARM32::Reg_NUM); |
185 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ | 182 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ |
186 isFP32, isFP64, isVec128, alias_init) \ | 183 isFP32, isFP64, isVec128, alias_init) \ |
187 IntegerRegisters[RegARM32::val] = isInt; \ | 184 IntegerRegisters[RegARM32::val] = isInt; \ |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
236 Func->dump("After Phi lowering"); | 233 Func->dump("After Phi lowering"); |
237 } | 234 } |
238 | 235 |
239 // Address mode optimization. | 236 // Address mode optimization. |
240 Func->getVMetadata()->init(VMK_SingleDefs); | 237 Func->getVMetadata()->init(VMK_SingleDefs); |
241 Func->doAddressOpt(); | 238 Func->doAddressOpt(); |
242 | 239 |
243 // Argument lowering | 240 // Argument lowering |
244 Func->doArgLowering(); | 241 Func->doArgLowering(); |
245 | 242 |
246 // Target lowering. This requires liveness analysis for some parts | 243 // Target lowering. This requires liveness analysis for some parts of the |
247 // of the lowering decisions, such as compare/branch fusing. If | 244 // lowering decisions, such as compare/branch fusing. If non-lightweight |
248 // non-lightweight liveness analysis is used, the instructions need | 245 // liveness analysis is used, the instructions need to be renumbered first. |
249 // to be renumbered first. TODO: This renumbering should only be | 246 // TODO: This renumbering should only be necessary if we're actually |
250 // necessary if we're actually calculating live intervals, which we | 247 // calculating live intervals, which we only do for register allocation. |
251 // only do for register allocation. | |
252 Func->renumberInstructions(); | 248 Func->renumberInstructions(); |
253 if (Func->hasError()) | 249 if (Func->hasError()) |
254 return; | 250 return; |
255 | 251 |
256 // TODO: It should be sufficient to use the fastest liveness | 252 // TODO: It should be sufficient to use the fastest liveness calculation, |
257 // calculation, i.e. livenessLightweight(). However, for some | 253 // i.e. livenessLightweight(). However, for some reason that slows down the |
258 // reason that slows down the rest of the translation. Investigate. | 254 // rest of the translation. Investigate. |
259 Func->liveness(Liveness_Basic); | 255 Func->liveness(Liveness_Basic); |
260 if (Func->hasError()) | 256 if (Func->hasError()) |
261 return; | 257 return; |
262 Func->dump("After ARM32 address mode opt"); | 258 Func->dump("After ARM32 address mode opt"); |
263 | 259 |
264 Func->genCode(); | 260 Func->genCode(); |
265 if (Func->hasError()) | 261 if (Func->hasError()) |
266 return; | 262 return; |
267 Func->dump("After ARM32 codegen"); | 263 Func->dump("After ARM32 codegen"); |
268 | 264 |
269 // Register allocation. This requires instruction renumbering and | 265 // Register allocation. This requires instruction renumbering and full |
270 // full liveness analysis. | 266 // liveness analysis. |
271 Func->renumberInstructions(); | 267 Func->renumberInstructions(); |
272 if (Func->hasError()) | 268 if (Func->hasError()) |
273 return; | 269 return; |
274 Func->liveness(Liveness_Intervals); | 270 Func->liveness(Liveness_Intervals); |
275 if (Func->hasError()) | 271 if (Func->hasError()) |
276 return; | 272 return; |
277 // Validate the live range computations. The expensive validation | 273 // Validate the live range computations. The expensive validation call is |
278 // call is deliberately only made when assertions are enabled. | 274 // deliberately only made when assertions are enabled. |
279 assert(Func->validateLiveness()); | 275 assert(Func->validateLiveness()); |
280 // The post-codegen dump is done here, after liveness analysis and | 276 // The post-codegen dump is done here, after liveness analysis and associated |
281 // associated cleanup, to make the dump cleaner and more useful. | 277 // cleanup, to make the dump cleaner and more useful. |
282 Func->dump("After initial ARM32 codegen"); | 278 Func->dump("After initial ARM32 codegen"); |
283 Func->getVMetadata()->init(VMK_All); | 279 Func->getVMetadata()->init(VMK_All); |
284 regAlloc(RAK_Global); | 280 regAlloc(RAK_Global); |
285 if (Func->hasError()) | 281 if (Func->hasError()) |
286 return; | 282 return; |
287 Func->dump("After linear scan regalloc"); | 283 Func->dump("After linear scan regalloc"); |
288 | 284 |
289 if (Ctx->getFlags().getPhiEdgeSplit()) { | 285 if (Ctx->getFlags().getPhiEdgeSplit()) { |
290 Func->advancedPhiLowering(); | 286 Func->advancedPhiLowering(); |
291 Func->dump("After advanced Phi lowering"); | 287 Func->dump("After advanced Phi lowering"); |
292 } | 288 } |
293 | 289 |
294 // Stack frame mapping. | 290 // Stack frame mapping. |
295 Func->genFrame(); | 291 Func->genFrame(); |
296 if (Func->hasError()) | 292 if (Func->hasError()) |
297 return; | 293 return; |
298 Func->dump("After stack frame mapping"); | 294 Func->dump("After stack frame mapping"); |
299 | 295 |
300 legalizeStackSlots(); | 296 legalizeStackSlots(); |
301 if (Func->hasError()) | 297 if (Func->hasError()) |
302 return; | 298 return; |
303 Func->dump("After legalizeStackSlots"); | 299 Func->dump("After legalizeStackSlots"); |
304 | 300 |
305 Func->contractEmptyNodes(); | 301 Func->contractEmptyNodes(); |
306 Func->reorderNodes(); | 302 Func->reorderNodes(); |
307 | 303 |
308 // Branch optimization. This needs to be done just before code | 304 // Branch optimization. This needs to be done just before code emission. In |
309 // emission. In particular, no transformations that insert or | 305 // particular, no transformations that insert or reorder CfgNodes should be |
310 // reorder CfgNodes should be done after branch optimization. We go | 306 // done after branch optimization. We go ahead and do it before nop insertion |
311 // ahead and do it before nop insertion to reduce the amount of work | 307 // to reduce the amount of work needed for searching for opportunities. |
312 // needed for searching for opportunities. | |
313 Func->doBranchOpt(); | 308 Func->doBranchOpt(); |
314 Func->dump("After branch optimization"); | 309 Func->dump("After branch optimization"); |
315 | 310 |
316 // Nop insertion | 311 // Nop insertion |
317 if (Ctx->getFlags().shouldDoNopInsertion()) { | 312 if (Ctx->getFlags().shouldDoNopInsertion()) { |
318 Func->doNopInsertion(); | 313 Func->doNopInsertion(); |
319 } | 314 } |
320 } | 315 } |
321 | 316 |
322 void TargetARM32::translateOm1() { | 317 void TargetARM32::translateOm1() { |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
388 if (Ty == IceType_void) | 383 if (Ty == IceType_void) |
389 Ty = IceType_i32; | 384 Ty = IceType_i32; |
390 if (PhysicalRegisters[Ty].empty()) | 385 if (PhysicalRegisters[Ty].empty()) |
391 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); | 386 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); |
392 assert(RegNum < PhysicalRegisters[Ty].size()); | 387 assert(RegNum < PhysicalRegisters[Ty].size()); |
393 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 388 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
394 if (Reg == nullptr) { | 389 if (Reg == nullptr) { |
395 Reg = Func->makeVariable(Ty); | 390 Reg = Func->makeVariable(Ty); |
396 Reg->setRegNum(RegNum); | 391 Reg->setRegNum(RegNum); |
397 PhysicalRegisters[Ty][RegNum] = Reg; | 392 PhysicalRegisters[Ty][RegNum] = Reg; |
398 // Specially mark SP and LR as an "argument" so that it is considered | 393 // Specially mark SP and LR as an "argument" so that it is considered live |
399 // live upon function entry. | 394 // upon function entry. |
400 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { | 395 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { |
401 Func->addImplicitArg(Reg); | 396 Func->addImplicitArg(Reg); |
402 Reg->setIgnoreLiveness(); | 397 Reg->setIgnoreLiveness(); |
403 } | 398 } |
404 } | 399 } |
405 return Reg; | 400 return Reg; |
406 } | 401 } |
407 | 402 |
408 void TargetARM32::emitJumpTable(const Cfg *Func, | 403 void TargetARM32::emitJumpTable(const Cfg *Func, |
409 const InstJumpTable *JumpTable) const { | 404 const InstJumpTable *JumpTable) const { |
(...skipping 28 matching lines...) Expand all Loading... | |
438 if (Offset != 0) { | 433 if (Offset != 0) { |
439 Str << ", " << getConstantPrefix() << Offset; | 434 Str << ", " << getConstantPrefix() << Offset; |
440 } | 435 } |
441 Str << "]"; | 436 Str << "]"; |
442 } | 437 } |
443 | 438 |
444 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { | 439 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { |
445 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 440 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
446 return false; | 441 return false; |
447 int32_t RegLo, RegHi; | 442 int32_t RegLo, RegHi; |
448 // Always start i64 registers at an even register, so this may end | 443 // Always start i64 registers at an even register, so this may end up padding |
449 // up padding away a register. | 444 // away a register. |
450 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); | 445 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); |
451 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; | 446 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; |
452 ++NumGPRRegsUsed; | 447 ++NumGPRRegsUsed; |
453 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; | 448 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; |
454 ++NumGPRRegsUsed; | 449 ++NumGPRRegsUsed; |
455 // If this bumps us past the boundary, don't allocate to a register | 450 // If this bumps us past the boundary, don't allocate to a register and leave |
456 // and leave any previously speculatively consumed registers as consumed. | 451 // any previously speculatively consumed registers as consumed. |
457 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) | 452 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) |
458 return false; | 453 return false; |
459 Regs->first = RegLo; | 454 Regs->first = RegLo; |
460 Regs->second = RegHi; | 455 Regs->second = RegHi; |
461 return true; | 456 return true; |
462 } | 457 } |
463 | 458 |
464 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { | 459 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { |
465 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 460 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
466 return false; | 461 return false; |
467 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; | 462 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; |
468 ++NumGPRRegsUsed; | 463 ++NumGPRRegsUsed; |
469 return true; | 464 return true; |
470 } | 465 } |
471 | 466 |
472 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { | 467 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { |
473 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) | 468 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) |
474 return false; | 469 return false; |
475 if (isVectorType(Ty)) { | 470 if (isVectorType(Ty)) { |
476 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); | 471 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); |
477 // Q registers are declared in reverse order, so | 472 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > |
478 // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract | 473 // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from |
479 // NumFPRegUnits from Reg_q0. Same thing goes for D registers. | 474 // Reg_q0. Same thing goes for D registers. |
480 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, | 475 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, |
481 "ARM32 Q registers are possibly declared incorrectly."); | 476 "ARM32 Q registers are possibly declared incorrectly."); |
482 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4); | 477 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4); |
483 NumFPRegUnits += 4; | 478 NumFPRegUnits += 4; |
484 // If this bumps us past the boundary, don't allocate to a register | 479 // If this bumps us past the boundary, don't allocate to a register and |
485 // and leave any previously speculatively consumed registers as consumed. | 480 // leave any previously speculatively consumed registers as consumed. |
486 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) | 481 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) |
487 return false; | 482 return false; |
488 } else if (Ty == IceType_f64) { | 483 } else if (Ty == IceType_f64) { |
489 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, | 484 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, |
490 "ARM32 D registers are possibly declared incorrectly."); | 485 "ARM32 D registers are possibly declared incorrectly."); |
491 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); | 486 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); |
492 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); | 487 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); |
493 NumFPRegUnits += 2; | 488 NumFPRegUnits += 2; |
494 // If this bumps us past the boundary, don't allocate to a register | 489 // If this bumps us past the boundary, don't allocate to a register and |
495 // and leave any previously speculatively consumed registers as consumed. | 490 // leave any previously speculatively consumed registers as consumed. |
496 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) | 491 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) |
497 return false; | 492 return false; |
498 } else { | 493 } else { |
499 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, | 494 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, |
500 "ARM32 S registers are possibly declared incorrectly."); | 495 "ARM32 S registers are possibly declared incorrectly."); |
501 assert(Ty == IceType_f32); | 496 assert(Ty == IceType_f32); |
502 *Reg = RegARM32::Reg_s0 + NumFPRegUnits; | 497 *Reg = RegARM32::Reg_s0 + NumFPRegUnits; |
503 ++NumFPRegUnits; | 498 ++NumFPRegUnits; |
504 } | 499 } |
505 return true; | 500 return true; |
506 } | 501 } |
507 | 502 |
508 void TargetARM32::lowerArguments() { | 503 void TargetARM32::lowerArguments() { |
509 VarList &Args = Func->getArgs(); | 504 VarList &Args = Func->getArgs(); |
510 TargetARM32::CallingConv CC; | 505 TargetARM32::CallingConv CC; |
511 | 506 |
512 // For each register argument, replace Arg in the argument list with the | 507 // For each register argument, replace Arg in the argument list with the home |
513 // home register. Then generate an instruction in the prolog to copy the | 508 // register. Then generate an instruction in the prolog to copy the home |
514 // home register to the assigned location of Arg. | 509 // register to the assigned location of Arg. |
515 Context.init(Func->getEntryNode()); | 510 Context.init(Func->getEntryNode()); |
516 Context.setInsertPoint(Context.getCur()); | 511 Context.setInsertPoint(Context.getCur()); |
517 | 512 |
518 for (SizeT I = 0, E = Args.size(); I < E; ++I) { | 513 for (SizeT I = 0, E = Args.size(); I < E; ++I) { |
519 Variable *Arg = Args[I]; | 514 Variable *Arg = Args[I]; |
520 Type Ty = Arg->getType(); | 515 Type Ty = Arg->getType(); |
521 if (Ty == IceType_i64) { | 516 if (Ty == IceType_i64) { |
522 std::pair<int32_t, int32_t> RegPair; | 517 std::pair<int32_t, int32_t> RegPair; |
523 if (!CC.I64InRegs(&RegPair)) | 518 if (!CC.I64InRegs(&RegPair)) |
524 continue; | 519 continue; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
561 | 556 |
562 Args[I] = RegisterArg; | 557 Args[I] = RegisterArg; |
563 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 558 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
564 continue; | 559 continue; |
565 } | 560 } |
566 } | 561 } |
567 } | 562 } |
568 | 563 |
569 // Helper function for addProlog(). | 564 // Helper function for addProlog(). |
570 // | 565 // |
571 // This assumes Arg is an argument passed on the stack. This sets the | 566 // This assumes Arg is an argument passed on the stack. This sets the frame |
572 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 567 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
573 // width. For an I64 arg that has been split into Lo and Hi components, | 568 // I64 arg that has been split into Lo and Hi components, it calls itself |
574 // it calls itself recursively on the components, taking care to handle | 569 // recursively on the components, taking care to handle Lo first because of the |
575 // Lo first because of the little-endian architecture. Lastly, this | 570 // little-endian architecture. Lastly, this function generates an instruction |
576 // function generates an instruction to copy Arg into its assigned | 571 // to copy Arg into its assigned register if applicable. |
577 // register if applicable. | |
578 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 572 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
579 size_t BasicFrameOffset, | 573 size_t BasicFrameOffset, |
580 size_t &InArgsSizeBytes) { | 574 size_t &InArgsSizeBytes) { |
581 Variable *Lo = Arg->getLo(); | 575 Variable *Lo = Arg->getLo(); |
582 Variable *Hi = Arg->getHi(); | 576 Variable *Hi = Arg->getHi(); |
583 Type Ty = Arg->getType(); | 577 Type Ty = Arg->getType(); |
584 if (Lo && Hi && Ty == IceType_i64) { | 578 if (Lo && Hi && Ty == IceType_i64) { |
585 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 579 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
586 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 580 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
587 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 581 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
588 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 582 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
589 return; | 583 return; |
590 } | 584 } |
591 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); | 585 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); |
592 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 586 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
593 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 587 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
594 // If the argument variable has been assigned a register, we need to load | 588 // If the argument variable has been assigned a register, we need to load the |
595 // the value from the stack slot. | 589 // value from the stack slot. |
596 if (Arg->hasReg()) { | 590 if (Arg->hasReg()) { |
597 assert(Ty != IceType_i64); | 591 assert(Ty != IceType_i64); |
598 OperandARM32Mem *Mem = OperandARM32Mem::create( | 592 OperandARM32Mem *Mem = OperandARM32Mem::create( |
599 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( | 593 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
600 Ctx->getConstantInt32(Arg->getStackOffset()))); | 594 Ctx->getConstantInt32(Arg->getStackOffset()))); |
601 if (isVectorType(Arg->getType())) { | 595 if (isVectorType(Arg->getType())) { |
602 // Use vld1.$elem or something? | 596 // Use vld1.$elem or something? |
603 UnimplementedError(Func->getContext()->getFlags()); | 597 UnimplementedError(Func->getContext()->getFlags()); |
604 } else if (isFloatingType(Arg->getType())) { | 598 } else if (isFloatingType(Arg->getType())) { |
605 _vldr(Arg, Mem); | 599 _vldr(Arg, Mem); |
606 } else { | 600 } else { |
607 _ldr(Arg, Mem); | 601 _ldr(Arg, Mem); |
608 } | 602 } |
609 // This argument-copying instruction uses an explicit | 603 // This argument-copying instruction uses an explicit OperandARM32Mem |
610 // OperandARM32Mem operand instead of a Variable, so its | 604 // operand instead of a Variable, so its fill-from-stack operation has to |
611 // fill-from-stack operation has to be tracked separately for | 605 // be tracked separately for statistics. |
612 // statistics. | |
613 Ctx->statsUpdateFills(); | 606 Ctx->statsUpdateFills(); |
614 } | 607 } |
615 } | 608 } |
616 | 609 |
617 Type TargetARM32::stackSlotType() { return IceType_i32; } | 610 Type TargetARM32::stackSlotType() { return IceType_i32; } |
618 | 611 |
619 void TargetARM32::addProlog(CfgNode *Node) { | 612 void TargetARM32::addProlog(CfgNode *Node) { |
620 // Stack frame layout: | 613 // Stack frame layout: |
621 // | 614 // |
622 // +------------------------+ | 615 // +------------------------+ |
(...skipping 12 matching lines...) Expand all Loading... | |
635 // | 7. allocas | | 628 // | 7. allocas | |
636 // +------------------------+ <--- StackPointer | 629 // +------------------------+ <--- StackPointer |
637 // | 630 // |
638 // The following variables record the size in bytes of the given areas: | 631 // The following variables record the size in bytes of the given areas: |
639 // * PreservedRegsSizeBytes: area 1 | 632 // * PreservedRegsSizeBytes: area 1 |
640 // * SpillAreaPaddingBytes: area 2 | 633 // * SpillAreaPaddingBytes: area 2 |
641 // * GlobalsSize: area 3 | 634 // * GlobalsSize: area 3 |
642 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 | 635 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
643 // * LocalsSpillAreaSize: area 5 | 636 // * LocalsSpillAreaSize: area 5 |
644 // * SpillAreaSizeBytes: areas 2 - 6 | 637 // * SpillAreaSizeBytes: areas 2 - 6 |
645 // Determine stack frame offsets for each Variable without a | 638 // Determine stack frame offsets for each Variable without a register |
646 // register assignment. This can be done as one variable per stack | 639 // assignment. This can be done as one variable per stack slot. Or, do |
647 // slot. Or, do coalescing by running the register allocator again | 640 // coalescing by running the register allocator again with an infinite set of |
648 // with an infinite set of registers (as a side effect, this gives | 641 // registers (as a side effect, this gives variables a second chance at |
649 // variables a second chance at physical register assignment). | 642 // physical register assignment). |
650 // | 643 // |
651 // A middle ground approach is to leverage sparsity and allocate one | 644 // A middle ground approach is to leverage sparsity and allocate one block of |
652 // block of space on the frame for globals (variables with | 645 // space on the frame for globals (variables with multi-block lifetime), and |
653 // multi-block lifetime), and one block to share for locals | 646 // one block to share for locals (single-block lifetime). |
654 // (single-block lifetime). | |
655 | 647 |
656 Context.init(Node); | 648 Context.init(Node); |
657 Context.setInsertPoint(Context.getCur()); | 649 Context.setInsertPoint(Context.getCur()); |
658 | 650 |
659 llvm::SmallBitVector CalleeSaves = | 651 llvm::SmallBitVector CalleeSaves = |
660 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 652 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
661 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | 653 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
662 VarList SortedSpilledVariables; | 654 VarList SortedSpilledVariables; |
663 size_t GlobalsSize = 0; | 655 size_t GlobalsSize = 0; |
664 // If there is a separate locals area, this represents that area. | 656 // If there is a separate locals area, this represents that area. Otherwise |
665 // Otherwise it counts any variable not counted by GlobalsSize. | 657 // it counts any variable not counted by GlobalsSize. |
666 SpillAreaSizeBytes = 0; | 658 SpillAreaSizeBytes = 0; |
667 // If there is a separate locals area, this specifies the alignment | 659 // If there is a separate locals area, this specifies the alignment for it. |
668 // for it. | |
669 uint32_t LocalsSlotsAlignmentBytes = 0; | 660 uint32_t LocalsSlotsAlignmentBytes = 0; |
670 // The entire spill locations area gets aligned to largest natural | 661 // The entire spill locations area gets aligned to largest natural alignment |
671 // alignment of the variables that have a spill slot. | 662 // of the variables that have a spill slot. |
672 uint32_t SpillAreaAlignmentBytes = 0; | 663 uint32_t SpillAreaAlignmentBytes = 0; |
673 // For now, we don't have target-specific variables that need special | 664 // For now, we don't have target-specific variables that need special |
674 // treatment (no stack-slot-linked SpillVariable type). | 665 // treatment (no stack-slot-linked SpillVariable type). |
675 std::function<bool(Variable *)> TargetVarHook = | 666 std::function<bool(Variable *)> TargetVarHook = |
676 [](Variable *) { return false; }; | 667 [](Variable *) { return false; }; |
677 | 668 |
678 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | 669 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
679 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | 670 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
680 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | 671 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
681 &LocalsSlotsAlignmentBytes, TargetVarHook); | 672 &LocalsSlotsAlignmentBytes, TargetVarHook); |
682 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 673 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
683 SpillAreaSizeBytes += GlobalsSize; | 674 SpillAreaSizeBytes += GlobalsSize; |
684 | 675 |
685 // Add push instructions for preserved registers. | 676 // Add push instructions for preserved registers. On ARM, "push" can push a |
686 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15). | 677 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has |
687 // Unlike x86, ARM also has callee-saved float/vector registers. | 678 // callee-saved float/vector registers. The "vpush" instruction can handle a |
688 // The "vpush" instruction can handle a whole list of float/vector | 679 // whole list of float/vector registers, but it only handles contiguous |
689 // registers, but it only handles contiguous sequences of registers | 680 // sequences of registers by specifying the start and the length. |
690 // by specifying the start and the length. | |
691 VarList GPRsToPreserve; | 681 VarList GPRsToPreserve; |
692 GPRsToPreserve.reserve(CalleeSaves.size()); | 682 GPRsToPreserve.reserve(CalleeSaves.size()); |
693 uint32_t NumCallee = 0; | 683 uint32_t NumCallee = 0; |
694 size_t PreservedRegsSizeBytes = 0; | 684 size_t PreservedRegsSizeBytes = 0; |
695 // Consider FP and LR as callee-save / used as needed. | 685 // Consider FP and LR as callee-save / used as needed. |
696 if (UsesFramePointer) { | 686 if (UsesFramePointer) { |
697 CalleeSaves[RegARM32::Reg_fp] = true; | 687 CalleeSaves[RegARM32::Reg_fp] = true; |
698 assert(RegsUsed[RegARM32::Reg_fp] == false); | 688 assert(RegsUsed[RegARM32::Reg_fp] == false); |
699 RegsUsed[RegARM32::Reg_fp] = true; | 689 RegsUsed[RegARM32::Reg_fp] = true; |
700 } | 690 } |
701 if (!MaybeLeafFunc) { | 691 if (!MaybeLeafFunc) { |
702 CalleeSaves[RegARM32::Reg_lr] = true; | 692 CalleeSaves[RegARM32::Reg_lr] = true; |
703 RegsUsed[RegARM32::Reg_lr] = true; | 693 RegsUsed[RegARM32::Reg_lr] = true; |
704 } | 694 } |
705 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 695 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
706 if (CalleeSaves[i] && RegsUsed[i]) { | 696 if (CalleeSaves[i] && RegsUsed[i]) { |
707 // TODO(jvoung): do separate vpush for each floating point | 697 // TODO(jvoung): do separate vpush for each floating point register |
708 // register segment and += 4, or 8 depending on type. | 698 // segment and += 4, or 8 depending on type. |
709 ++NumCallee; | 699 ++NumCallee; |
710 PreservedRegsSizeBytes += 4; | 700 PreservedRegsSizeBytes += 4; |
711 GPRsToPreserve.push_back(getPhysicalRegister(i)); | 701 GPRsToPreserve.push_back(getPhysicalRegister(i)); |
712 } | 702 } |
713 } | 703 } |
714 Ctx->statsUpdateRegistersSaved(NumCallee); | 704 Ctx->statsUpdateRegistersSaved(NumCallee); |
715 if (!GPRsToPreserve.empty()) | 705 if (!GPRsToPreserve.empty()) |
716 _push(GPRsToPreserve); | 706 _push(GPRsToPreserve); |
717 | 707 |
718 // Generate "mov FP, SP" if needed. | 708 // Generate "mov FP, SP" if needed. |
719 if (UsesFramePointer) { | 709 if (UsesFramePointer) { |
720 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 710 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
721 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 711 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
722 _mov(FP, SP); | 712 _mov(FP, SP); |
723 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 713 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
724 Context.insert(InstFakeUse::create(Func, FP)); | 714 Context.insert(InstFakeUse::create(Func, FP)); |
725 } | 715 } |
726 | 716 |
727 // Align the variables area. SpillAreaPaddingBytes is the size of | 717 // Align the variables area. SpillAreaPaddingBytes is the size of the region |
728 // the region after the preserved registers and before the spill areas. | 718 // after the preserved registers and before the spill areas. |
729 // LocalsSlotsPaddingBytes is the amount of padding between the globals | 719 // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
730 // and locals area if they are separate. | 720 // locals area if they are separate. |
731 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); | 721 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); |
732 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 722 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
733 uint32_t SpillAreaPaddingBytes = 0; | 723 uint32_t SpillAreaPaddingBytes = 0; |
734 uint32_t LocalsSlotsPaddingBytes = 0; | 724 uint32_t LocalsSlotsPaddingBytes = 0; |
735 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 725 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
736 GlobalsSize, LocalsSlotsAlignmentBytes, | 726 GlobalsSize, LocalsSlotsAlignmentBytes, |
737 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 727 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
738 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 728 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
739 uint32_t GlobalsAndSubsequentPaddingSize = | 729 uint32_t GlobalsAndSubsequentPaddingSize = |
740 GlobalsSize + LocalsSlotsPaddingBytes; | 730 GlobalsSize + LocalsSlotsPaddingBytes; |
(...skipping 10 matching lines...) Expand all Loading... | |
751 // Use the scratch register if needed to legalize the immediate. | 741 // Use the scratch register if needed to legalize the immediate. |
752 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 742 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
753 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 743 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
754 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
755 _sub(SP, SP, SubAmount); | 745 _sub(SP, SP, SubAmount); |
756 } | 746 } |
757 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 747 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
758 | 748 |
759 resetStackAdjustment(); | 749 resetStackAdjustment(); |
760 | 750 |
761 // Fill in stack offsets for stack args, and copy args into registers | 751 // Fill in stack offsets for stack args, and copy args into registers for |
762 // for those that were register-allocated. Args are pushed right to | 752 // those that were register-allocated. Args are pushed right to left, so |
763 // left, so Arg[0] is closest to the stack/frame pointer. | 753 // Arg[0] is closest to the stack/frame pointer. |
764 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 754 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
765 size_t BasicFrameOffset = PreservedRegsSizeBytes; | 755 size_t BasicFrameOffset = PreservedRegsSizeBytes; |
766 if (!UsesFramePointer) | 756 if (!UsesFramePointer) |
767 BasicFrameOffset += SpillAreaSizeBytes; | 757 BasicFrameOffset += SpillAreaSizeBytes; |
768 | 758 |
769 const VarList &Args = Func->getArgs(); | 759 const VarList &Args = Func->getArgs(); |
770 size_t InArgsSizeBytes = 0; | 760 size_t InArgsSizeBytes = 0; |
771 TargetARM32::CallingConv CC; | 761 TargetARM32::CallingConv CC; |
772 for (Variable *Arg : Args) { | 762 for (Variable *Arg : Args) { |
773 Type Ty = Arg->getType(); | 763 Type Ty = Arg->getType(); |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
823 void TargetARM32::addEpilog(CfgNode *Node) { | 813 void TargetARM32::addEpilog(CfgNode *Node) { |
824 InstList &Insts = Node->getInsts(); | 814 InstList &Insts = Node->getInsts(); |
825 InstList::reverse_iterator RI, E; | 815 InstList::reverse_iterator RI, E; |
826 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 816 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
827 if (llvm::isa<InstARM32Ret>(*RI)) | 817 if (llvm::isa<InstARM32Ret>(*RI)) |
828 break; | 818 break; |
829 } | 819 } |
830 if (RI == E) | 820 if (RI == E) |
831 return; | 821 return; |
832 | 822 |
833 // Convert the reverse_iterator position into its corresponding | 823 // Convert the reverse_iterator position into its corresponding (forward) |
834 // (forward) iterator position. | 824 // iterator position. |
835 InstList::iterator InsertPoint = RI.base(); | 825 InstList::iterator InsertPoint = RI.base(); |
836 --InsertPoint; | 826 --InsertPoint; |
837 Context.init(Node); | 827 Context.init(Node); |
838 Context.setInsertPoint(InsertPoint); | 828 Context.setInsertPoint(InsertPoint); |
839 | 829 |
840 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 830 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
841 if (UsesFramePointer) { | 831 if (UsesFramePointer) { |
842 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 832 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
843 // For late-stage liveness analysis (e.g. asm-verbose mode), | 833 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake |
844 // adding a fake use of SP before the assignment of SP=FP keeps | 834 // use of SP before the assignment of SP=FP keeps previous SP adjustments |
845 // previous SP adjustments from being dead-code eliminated. | 835 // from being dead-code eliminated. |
846 Context.insert(InstFakeUse::create(Func, SP)); | 836 Context.insert(InstFakeUse::create(Func, SP)); |
847 _mov(SP, FP); | 837 _mov(SP, FP); |
848 } else { | 838 } else { |
849 // add SP, SpillAreaSizeBytes | 839 // add SP, SpillAreaSizeBytes |
850 if (SpillAreaSizeBytes) { | 840 if (SpillAreaSizeBytes) { |
851 // Use the scratch register if needed to legalize the immediate. | 841 // Use the scratch register if needed to legalize the immediate. |
852 Operand *AddAmount = | 842 Operand *AddAmount = |
853 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 843 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
854 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 844 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
855 _add(SP, SP, AddAmount); | 845 _add(SP, SP, AddAmount); |
856 } | 846 } |
857 } | 847 } |
858 | 848 |
859 // Add pop instructions for preserved registers. | 849 // Add pop instructions for preserved registers. |
860 llvm::SmallBitVector CalleeSaves = | 850 llvm::SmallBitVector CalleeSaves = |
861 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 851 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
862 VarList GPRsToRestore; | 852 VarList GPRsToRestore; |
863 GPRsToRestore.reserve(CalleeSaves.size()); | 853 GPRsToRestore.reserve(CalleeSaves.size()); |
864 // Consider FP and LR as callee-save / used as needed. | 854 // Consider FP and LR as callee-save / used as needed. |
865 if (UsesFramePointer) { | 855 if (UsesFramePointer) { |
866 CalleeSaves[RegARM32::Reg_fp] = true; | 856 CalleeSaves[RegARM32::Reg_fp] = true; |
867 } | 857 } |
868 if (!MaybeLeafFunc) { | 858 if (!MaybeLeafFunc) { |
869 CalleeSaves[RegARM32::Reg_lr] = true; | 859 CalleeSaves[RegARM32::Reg_lr] = true; |
870 } | 860 } |
871 // Pop registers in ascending order just like push | 861 // Pop registers in ascending order just like push (instead of in reverse |
872 // (instead of in reverse order). | 862 // order). |
873 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 863 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
874 if (CalleeSaves[i] && RegsUsed[i]) { | 864 if (CalleeSaves[i] && RegsUsed[i]) { |
875 GPRsToRestore.push_back(getPhysicalRegister(i)); | 865 GPRsToRestore.push_back(getPhysicalRegister(i)); |
876 } | 866 } |
877 } | 867 } |
878 if (!GPRsToRestore.empty()) | 868 if (!GPRsToRestore.empty()) |
879 _pop(GPRsToRestore); | 869 _pop(GPRsToRestore); |
880 | 870 |
881 if (!Ctx->getFlags().getUseSandboxing()) | 871 if (!Ctx->getFlags().getUseSandboxing()) |
882 return; | 872 return; |
(...skipping 13 matching lines...) Expand all Loading... | |
896 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 886 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
897 _bundle_lock(); | 887 _bundle_lock(); |
898 _bic(LR, LR, RetMask); | 888 _bic(LR, LR, RetMask); |
899 _ret(LR, RetValue); | 889 _ret(LR, RetValue); |
900 _bundle_unlock(); | 890 _bundle_unlock(); |
901 RI->setDeleted(); | 891 RI->setDeleted(); |
902 } | 892 } |
903 | 893 |
904 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { | 894 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { |
905 constexpr bool SignExt = false; | 895 constexpr bool SignExt = false; |
906 // TODO(jvoung): vldr of FP stack slots has a different limit from the | 896 // TODO(jvoung): vldr of FP stack slots has a different limit from the plain |
907 // plain stackSlotType(). | 897 // stackSlotType(). |
908 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); | 898 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); |
909 } | 899 } |
910 | 900 |
911 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, | 901 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, |
912 Variable *OrigBaseReg) { | 902 Variable *OrigBaseReg) { |
913 int32_t Offset = Var->getStackOffset(); | 903 int32_t Offset = Var->getStackOffset(); |
914 // Legalize will likely need a movw/movt combination, but if the top | 904 // Legalize will likely need a movw/movt combination, but if the top bits are |
915 // bits are all 0 from negating the offset and subtracting, we could | 905 // all 0 from negating the offset and subtracting, we could use that instead. |
916 // use that instead. | |
917 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; | 906 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; |
918 if (ShouldSub) | 907 if (ShouldSub) |
919 Offset = -Offset; | 908 Offset = -Offset; |
920 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), | 909 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), |
921 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 910 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
922 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); | 911 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); |
923 if (ShouldSub) | 912 if (ShouldSub) |
924 _sub(ScratchReg, OrigBaseReg, OffsetVal); | 913 _sub(ScratchReg, OrigBaseReg, OffsetVal); |
925 else | 914 else |
926 _add(ScratchReg, OrigBaseReg, OffsetVal); | 915 _add(ScratchReg, OrigBaseReg, OffsetVal); |
(...skipping 15 matching lines...) Expand all Loading... | |
942 // | 931 // |
943 // This is safe because we have reserved TMP, and add for ARM does not | 932 // This is safe because we have reserved TMP, and add for ARM does not |
944 // clobber the flags register. | 933 // clobber the flags register. |
945 Func->dump("Before legalizeStackSlots"); | 934 Func->dump("Before legalizeStackSlots"); |
946 assert(hasComputedFrame()); | 935 assert(hasComputedFrame()); |
947 // Early exit, if SpillAreaSizeBytes is really small. | 936 // Early exit, if SpillAreaSizeBytes is really small. |
948 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) | 937 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) |
949 return; | 938 return; |
950 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); | 939 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); |
951 int32_t StackAdjust = 0; | 940 int32_t StackAdjust = 0; |
952 // Do a fairly naive greedy clustering for now. Pick the first stack slot | 941 // Do a fairly naive greedy clustering for now. Pick the first stack slot |
953 // that's out of bounds and make a new base reg using the architecture's temp | 942 // that's out of bounds and make a new base reg using the architecture's temp |
954 // register. If that works for the next slot, then great. Otherwise, create | 943 // register. If that works for the next slot, then great. Otherwise, create a |
955 // a new base register, clobbering the previous base register. Never share a | 944 // new base register, clobbering the previous base register. Never share a |
956 // base reg across different basic blocks. This isn't ideal if local and | 945 // base reg across different basic blocks. This isn't ideal if local and |
957 // multi-block variables are far apart and their references are interspersed. | 946 // multi-block variables are far apart and their references are interspersed. |
958 // It may help to be more coordinated about assign stack slot numbers | 947 // It may help to be more coordinated about assign stack slot numbers and may |
959 // and may help to assign smaller offsets to higher-weight variables | 948 // help to assign smaller offsets to higher-weight variables so that they |
960 // so that they don't depend on this legalization. | 949 // don't depend on this legalization. |
961 for (CfgNode *Node : Func->getNodes()) { | 950 for (CfgNode *Node : Func->getNodes()) { |
962 Context.init(Node); | 951 Context.init(Node); |
963 StackVariable *NewBaseReg = nullptr; | 952 StackVariable *NewBaseReg = nullptr; |
964 int32_t NewBaseOffset = 0; | 953 int32_t NewBaseOffset = 0; |
965 while (!Context.atEnd()) { | 954 while (!Context.atEnd()) { |
966 PostIncrLoweringContext PostIncrement(Context); | 955 PostIncrLoweringContext PostIncrement(Context); |
967 Inst *CurInstr = Context.getCur(); | 956 Inst *CurInstr = Context.getCur(); |
968 Variable *Dest = CurInstr->getDest(); | 957 Variable *Dest = CurInstr->getDest(); |
969 // Check if the previous NewBaseReg is clobbered, and reset if needed. | 958 // Check if the previous NewBaseReg is clobbered, and reset if needed. |
970 if ((Dest && NewBaseReg && Dest->hasReg() && | 959 if ((Dest && NewBaseReg && Dest->hasReg() && |
971 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || | 960 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || |
972 llvm::isa<InstFakeKill>(CurInstr)) { | 961 llvm::isa<InstFakeKill>(CurInstr)) { |
973 NewBaseReg = nullptr; | 962 NewBaseReg = nullptr; |
974 NewBaseOffset = 0; | 963 NewBaseOffset = 0; |
975 } | 964 } |
976 // The stack adjustment only matters if we are using SP instead of FP. | 965 // The stack adjustment only matters if we are using SP instead of FP. |
977 if (!hasFramePointer()) { | 966 if (!hasFramePointer()) { |
978 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { | 967 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { |
979 StackAdjust += AdjInst->getAmount(); | 968 StackAdjust += AdjInst->getAmount(); |
980 NewBaseOffset += AdjInst->getAmount(); | 969 NewBaseOffset += AdjInst->getAmount(); |
981 continue; | 970 continue; |
982 } | 971 } |
983 if (llvm::isa<InstARM32Call>(CurInstr)) { | 972 if (llvm::isa<InstARM32Call>(CurInstr)) { |
984 NewBaseOffset -= StackAdjust; | 973 NewBaseOffset -= StackAdjust; |
985 StackAdjust = 0; | 974 StackAdjust = 0; |
986 continue; | 975 continue; |
987 } | 976 } |
988 } | 977 } |
989 // For now, only Mov instructions can have stack variables. We need to | 978 // For now, only Mov instructions can have stack variables. We need to |
990 // know the type of instruction because we currently create a fresh one | 979 // know the type of instruction because we currently create a fresh one |
991 // to replace Dest/Source, rather than mutate in place. | 980 // to replace Dest/Source, rather than mutate in place. |
992 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); | 981 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); |
993 if (!MovInst) { | 982 if (!MovInst) { |
994 continue; | 983 continue; |
995 } | 984 } |
996 if (!Dest->hasReg()) { | 985 if (!Dest->hasReg()) { |
997 int32_t Offset = Dest->getStackOffset(); | 986 int32_t Offset = Dest->getStackOffset(); |
998 Offset += StackAdjust; | 987 Offset += StackAdjust; |
999 if (!isLegalVariableStackOffset(Offset)) { | 988 if (!isLegalVariableStackOffset(Offset)) { |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1110 return Operand; | 1099 return Operand; |
1111 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 1100 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { |
1112 split64(Var); | 1101 split64(Var); |
1113 return Var->getHi(); | 1102 return Var->getHi(); |
1114 } | 1103 } |
1115 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1104 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
1116 return Ctx->getConstantInt32( | 1105 return Ctx->getConstantInt32( |
1117 static_cast<uint32_t>(Const->getValue() >> 32)); | 1106 static_cast<uint32_t>(Const->getValue() >> 32)); |
1118 } | 1107 } |
1119 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { | 1108 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { |
1120 // Conservatively disallow memory operands with side-effects | 1109 // Conservatively disallow memory operands with side-effects in case of |
1121 // in case of duplication. | 1110 // duplication. |
1122 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || | 1111 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || |
1123 Mem->getAddrMode() == OperandARM32Mem::NegOffset); | 1112 Mem->getAddrMode() == OperandARM32Mem::NegOffset); |
1124 const Type SplitType = IceType_i32; | 1113 const Type SplitType = IceType_i32; |
1125 if (Mem->isRegReg()) { | 1114 if (Mem->isRegReg()) { |
1126 // We have to make a temp variable T, and add 4 to either Base or Index. | 1115 // We have to make a temp variable T, and add 4 to either Base or Index. |
1127 // The Index may be shifted, so adding 4 can mean something else. | 1116 // The Index may be shifted, so adding 4 can mean something else. Thus, |
1128 // Thus, prefer T := Base + 4, and use T as the new Base. | 1117 // prefer T := Base + 4, and use T as the new Base. |
1129 Variable *Base = Mem->getBase(); | 1118 Variable *Base = Mem->getBase(); |
1130 Constant *Four = Ctx->getConstantInt32(4); | 1119 Constant *Four = Ctx->getConstantInt32(4); |
1131 Variable *NewBase = Func->makeVariable(Base->getType()); | 1120 Variable *NewBase = Func->makeVariable(Base->getType()); |
1132 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, | 1121 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, |
1133 Base, Four)); | 1122 Base, Four)); |
1134 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), | 1123 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), |
1135 Mem->getShiftOp(), Mem->getShiftAmt(), | 1124 Mem->getShiftOp(), Mem->getShiftAmt(), |
1136 Mem->getAddrMode()); | 1125 Mem->getAddrMode()); |
1137 } else { | 1126 } else { |
1138 Variable *Base = Mem->getBase(); | 1127 Variable *Base = Mem->getBase(); |
1139 ConstantInteger32 *Offset = Mem->getOffset(); | 1128 ConstantInteger32 *Offset = Mem->getOffset(); |
1140 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); | 1129 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); |
1141 int32_t NextOffsetVal = Offset->getValue() + 4; | 1130 int32_t NextOffsetVal = Offset->getValue() + 4; |
1142 const bool SignExt = false; | 1131 const bool SignExt = false; |
1143 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { | 1132 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { |
1144 // We have to make a temp variable and add 4 to either Base or Offset. | 1133 // We have to make a temp variable and add 4 to either Base or Offset. |
1145 // If we add 4 to Offset, this will convert a non-RegReg addressing | 1134 // If we add 4 to Offset, this will convert a non-RegReg addressing |
1146 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows | 1135 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows |
1147 // RegReg addressing modes, prefer adding to base and replacing instead. | 1136 // RegReg addressing modes, prefer adding to base and replacing |
1148 // Thus we leave the old offset alone. | 1137 // instead. Thus we leave the old offset alone. |
1149 Constant *Four = Ctx->getConstantInt32(4); | 1138 Constant *Four = Ctx->getConstantInt32(4); |
1150 Variable *NewBase = Func->makeVariable(Base->getType()); | 1139 Variable *NewBase = Func->makeVariable(Base->getType()); |
1151 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, | 1140 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, |
1152 NewBase, Base, Four)); | 1141 NewBase, Base, Four)); |
1153 Base = NewBase; | 1142 Base = NewBase; |
1154 } else { | 1143 } else { |
1155 Offset = | 1144 Offset = |
1156 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); | 1145 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); |
1157 } | 1146 } |
1158 return OperandARM32Mem::create(Func, SplitType, Base, Offset, | 1147 return OperandARM32Mem::create(Func, SplitType, Base, Offset, |
(...skipping 29 matching lines...) Expand all Loading... | |
1188 | 1177 |
1189 REGARM32_TABLE | 1178 REGARM32_TABLE |
1190 | 1179 |
1191 #undef X | 1180 #undef X |
1192 | 1181 |
1193 return Registers; | 1182 return Registers; |
1194 } | 1183 } |
1195 | 1184 |
1196 void TargetARM32::lowerAlloca(const InstAlloca *Inst) { | 1185 void TargetARM32::lowerAlloca(const InstAlloca *Inst) { |
1197 UsesFramePointer = true; | 1186 UsesFramePointer = true; |
1198 // Conservatively require the stack to be aligned. Some stack | 1187 // Conservatively require the stack to be aligned. Some stack adjustment |
1199 // adjustment operations implemented below assume that the stack is | 1188 // operations implemented below assume that the stack is aligned before the |
1200 // aligned before the alloca. All the alloca code ensures that the | 1189 // alloca. All the alloca code ensures that the stack alignment is preserved |
1201 // stack alignment is preserved after the alloca. The stack alignment | 1190 // after the alloca. The stack alignment restriction can be relaxed in some |
1202 // restriction can be relaxed in some cases. | 1191 // cases. |
1203 NeedsStackAlignment = true; | 1192 NeedsStackAlignment = true; |
1204 | 1193 |
1205 // TODO(stichnot): minimize the number of adjustments of SP, etc. | 1194 // TODO(stichnot): minimize the number of adjustments of SP, etc. |
1206 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1195 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
1207 Variable *Dest = Inst->getDest(); | 1196 Variable *Dest = Inst->getDest(); |
1208 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 1197 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
1209 // For default align=0, set it to the real value 1, to avoid any | 1198 // For default align=0, set it to the real value 1, to avoid any |
1210 // bit-manipulation problems below. | 1199 // bit-manipulation problems below. |
1211 AlignmentParam = std::max(AlignmentParam, 1u); | 1200 AlignmentParam = std::max(AlignmentParam, 1u); |
1212 | 1201 |
1213 // LLVM enforces power of 2 alignment. | 1202 // LLVM enforces power of 2 alignment. |
1214 assert(llvm::isPowerOf2_32(AlignmentParam)); | 1203 assert(llvm::isPowerOf2_32(AlignmentParam)); |
1215 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); | 1204 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); |
1216 | 1205 |
1217 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); | 1206 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); |
1218 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { | 1207 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { |
1219 alignRegisterPow2(SP, Alignment); | 1208 alignRegisterPow2(SP, Alignment); |
1220 } | 1209 } |
1221 Operand *TotalSize = Inst->getSizeInBytes(); | 1210 Operand *TotalSize = Inst->getSizeInBytes(); |
1222 if (const auto *ConstantTotalSize = | 1211 if (const auto *ConstantTotalSize = |
1223 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1212 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
1224 uint32_t Value = ConstantTotalSize->getValue(); | 1213 uint32_t Value = ConstantTotalSize->getValue(); |
1225 Value = Utils::applyAlignment(Value, Alignment); | 1214 Value = Utils::applyAlignment(Value, Alignment); |
1226 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); | 1215 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); |
1227 _sub(SP, SP, SubAmount); | 1216 _sub(SP, SP, SubAmount); |
1228 } else { | 1217 } else { |
1229 // Non-constant sizes need to be adjusted to the next highest | 1218 // Non-constant sizes need to be adjusted to the next highest multiple of |
1230 // multiple of the required alignment at runtime. | 1219 // the required alignment at runtime. |
1231 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); | 1220 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); |
1232 Variable *T = makeReg(IceType_i32); | 1221 Variable *T = makeReg(IceType_i32); |
1233 _mov(T, TotalSize); | 1222 _mov(T, TotalSize); |
1234 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); | 1223 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); |
1235 _add(T, T, AddAmount); | 1224 _add(T, T, AddAmount); |
1236 alignRegisterPow2(T, Alignment); | 1225 alignRegisterPow2(T, Alignment); |
1237 _sub(SP, SP, T); | 1226 _sub(SP, SP, T); |
1238 } | 1227 } |
1239 _mov(Dest, SP); | 1228 _mov(Dest, SP); |
1240 } | 1229 } |
(...skipping 17 matching lines...) Expand all Loading... | |
1258 _tst(SrcLoReg, Mask); | 1247 _tst(SrcLoReg, Mask); |
1259 break; | 1248 break; |
1260 } | 1249 } |
1261 case IceType_i32: { | 1250 case IceType_i32: { |
1262 _tst(SrcLoReg, SrcLoReg); | 1251 _tst(SrcLoReg, SrcLoReg); |
1263 break; | 1252 break; |
1264 } | 1253 } |
1265 case IceType_i64: { | 1254 case IceType_i64: { |
1266 Variable *ScratchReg = makeReg(IceType_i32); | 1255 Variable *ScratchReg = makeReg(IceType_i32); |
1267 _orrs(ScratchReg, SrcLoReg, SrcHi); | 1256 _orrs(ScratchReg, SrcLoReg, SrcHi); |
1268 // ScratchReg isn't going to be used, but we need the | 1257 // ScratchReg isn't going to be used, but we need the side-effect of |
1269 // side-effect of setting flags from this operation. | 1258 // setting flags from this operation. |
1270 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 1259 Context.insert(InstFakeUse::create(Func, ScratchReg)); |
1271 } | 1260 } |
1272 } | 1261 } |
1273 InstARM32Label *Label = InstARM32Label::create(Func, this); | 1262 InstARM32Label *Label = InstARM32Label::create(Func, this); |
1274 _br(Label, CondARM32::NE); | 1263 _br(Label, CondARM32::NE); |
1275 _trap(); | 1264 _trap(); |
1276 Context.insert(Label); | 1265 Context.insert(Label); |
1277 } | 1266 } |
1278 | 1267 |
1279 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, | 1268 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, |
(...skipping 23 matching lines...) Expand all Loading... | |
1303 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); | 1292 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); |
1304 Call->addArg(T0R); | 1293 Call->addArg(T0R); |
1305 Call->addArg(T1R); | 1294 Call->addArg(T1R); |
1306 lowerCall(Call); | 1295 lowerCall(Call); |
1307 } | 1296 } |
1308 return; | 1297 return; |
1309 } | 1298 } |
1310 | 1299 |
1311 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 1300 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
1312 Variable *Dest = Inst->getDest(); | 1301 Variable *Dest = Inst->getDest(); |
1313 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier | 1302 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to |
1314 // to legalize Src0 to flex or Src1 to flex and there is a reversible | 1303 // legalize Src0 to flex or Src1 to flex and there is a reversible |
1315 // instruction. E.g., reverse subtract with immediate, register vs | 1304 // instruction. E.g., reverse subtract with immediate, register vs register, |
1316 // register, immediate. | 1305 // immediate. |
1317 // Or it may be the case that the operands aren't swapped, but the | 1306 // Or it may be the case that the operands aren't swapped, but the bits can |
1318 // bits can be flipped and a different operation applied. | 1307 // be flipped and a different operation applied. E.g., use BIC (bit clear) |
1319 // E.g., use BIC (bit clear) instead of AND for some masks. | 1308 // instead of AND for some masks. |
1320 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1309 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
1321 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 1310 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
1322 if (Dest->getType() == IceType_i64) { | 1311 if (Dest->getType() == IceType_i64) { |
1323 // These helper-call-involved instructions are lowered in this | 1312 // These helper-call-involved instructions are lowered in this separate |
1324 // separate switch. This is because we would otherwise assume that | 1313 // switch. This is because we would otherwise assume that we need to |
1325 // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused | 1314 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with |
1326 // with helper calls, and such unused/redundant instructions will fail | 1315 // helper calls, and such unused/redundant instructions will fail liveness |
1327 // liveness analysis under -Om1 setting. | 1316 // analysis under -Om1 setting. |
1328 switch (Inst->getOp()) { | 1317 switch (Inst->getOp()) { |
1329 default: | 1318 default: |
1330 break; | 1319 break; |
1331 case InstArithmetic::Udiv: | 1320 case InstArithmetic::Udiv: |
1332 case InstArithmetic::Sdiv: | 1321 case InstArithmetic::Sdiv: |
1333 case InstArithmetic::Urem: | 1322 case InstArithmetic::Urem: |
1334 case InstArithmetic::Srem: { | 1323 case InstArithmetic::Srem: { |
1335 // Check for divide by 0 (ARM normally doesn't trap, but we want it | 1324 // Check for divide by 0 (ARM normally doesn't trap, but we want it to |
1336 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized | 1325 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a |
1337 // to a register, which will hide a constant source operand. | 1326 // register, which will hide a constant source operand. Instead, check |
1338 // Instead, check the not-yet-legalized Src1 to optimize-out a divide | 1327 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. |
1339 // by 0 check. | |
1340 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | 1328 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
1341 if (C64->getValue() == 0) { | 1329 if (C64->getValue() == 0) { |
1342 _trap(); | 1330 _trap(); |
1343 return; | 1331 return; |
1344 } | 1332 } |
1345 } else { | 1333 } else { |
1346 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | 1334 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
1347 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | 1335 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
1348 div0Check(IceType_i64, Src1Lo, Src1Hi); | 1336 div0Check(IceType_i64, Src1Lo, Src1Hi); |
1349 } | 1337 } |
1350 // Technically, ARM has their own aeabi routines, but we can use the | 1338 // Technically, ARM has their own aeabi routines, but we can use the |
1351 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, | 1339 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses |
1352 // but uses the more standard __moddi3 for rem. | 1340 // the more standard __moddi3 for rem. |
1353 const char *HelperName = ""; | 1341 const char *HelperName = ""; |
1354 switch (Inst->getOp()) { | 1342 switch (Inst->getOp()) { |
1355 default: | 1343 default: |
1356 llvm_unreachable("Should have only matched div ops."); | 1344 llvm_unreachable("Should have only matched div ops."); |
1357 break; | 1345 break; |
1358 case InstArithmetic::Udiv: | 1346 case InstArithmetic::Udiv: |
1359 HelperName = H_udiv_i64; | 1347 HelperName = H_udiv_i64; |
1360 break; | 1348 break; |
1361 case InstArithmetic::Sdiv: | 1349 case InstArithmetic::Sdiv: |
1362 HelperName = H_sdiv_i64; | 1350 HelperName = H_sdiv_i64; |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1465 // a=b<<c ==> | 1453 // a=b<<c ==> |
1466 // GCC 4.8 does: | 1454 // GCC 4.8 does: |
1467 // sub t_c1, c.lo, #32 | 1455 // sub t_c1, c.lo, #32 |
1468 // lsl t_hi, b.hi, c.lo | 1456 // lsl t_hi, b.hi, c.lo |
1469 // orr t_hi, t_hi, b.lo, lsl t_c1 | 1457 // orr t_hi, t_hi, b.lo, lsl t_c1 |
1470 // rsb t_c2, c.lo, #32 | 1458 // rsb t_c2, c.lo, #32 |
1471 // orr t_hi, t_hi, b.lo, lsr t_c2 | 1459 // orr t_hi, t_hi, b.lo, lsr t_c2 |
1472 // lsl t_lo, b.lo, c.lo | 1460 // lsl t_lo, b.lo, c.lo |
1473 // a.lo = t_lo | 1461 // a.lo = t_lo |
1474 // a.hi = t_hi | 1462 // a.hi = t_hi |
1475 // Can be strength-reduced for constant-shifts, but we don't do | 1463 // Can be strength-reduced for constant-shifts, but we don't do that for |
1476 // that for now. | 1464 // now. |
1477 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. | 1465 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
1478 // On ARM, shifts only take the lower 8 bits of the shift register, | 1466 // ARM, shifts only take the lower 8 bits of the shift register, and |
1479 // and saturate to the range 0-32, so the negative value will | 1467 // saturate to the range 0-32, so the negative value will saturate to 32. |
1480 // saturate to 32. | |
1481 Variable *T_Hi = makeReg(IceType_i32); | 1468 Variable *T_Hi = makeReg(IceType_i32); |
1482 Variable *Src1RLo = legalizeToReg(Src1Lo); | 1469 Variable *Src1RLo = legalizeToReg(Src1Lo); |
1483 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1470 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
1484 Variable *T_C1 = makeReg(IceType_i32); | 1471 Variable *T_C1 = makeReg(IceType_i32); |
1485 Variable *T_C2 = makeReg(IceType_i32); | 1472 Variable *T_C2 = makeReg(IceType_i32); |
1486 _sub(T_C1, Src1RLo, ThirtyTwo); | 1473 _sub(T_C1, Src1RLo, ThirtyTwo); |
1487 _lsl(T_Hi, Src0RHi, Src1RLo); | 1474 _lsl(T_Hi, Src0RHi, Src1RLo); |
1488 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1475 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
1489 OperandARM32::LSL, T_C1)); | 1476 OperandARM32::LSL, T_C1)); |
1490 _rsb(T_C2, Src1RLo, ThirtyTwo); | 1477 _rsb(T_C2, Src1RLo, ThirtyTwo); |
1491 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1478 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
1492 OperandARM32::LSR, T_C2)); | 1479 OperandARM32::LSR, T_C2)); |
1493 _mov(DestHi, T_Hi); | 1480 _mov(DestHi, T_Hi); |
1494 Variable *T_Lo = makeReg(IceType_i32); | 1481 Variable *T_Lo = makeReg(IceType_i32); |
1495 // _mov seems to sometimes have better register preferencing than lsl. | 1482 // _mov seems to sometimes have better register preferencing than lsl. |
1496 // Otherwise mov w/ lsl shifted register is a pseudo-instruction | 1483 // Otherwise mov w/ lsl shifted register is a pseudo-instruction that |
1497 // that maps to lsl. | 1484 // maps to lsl. |
1498 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1485 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
1499 OperandARM32::LSL, Src1RLo)); | 1486 OperandARM32::LSL, Src1RLo)); |
1500 _mov(DestLo, T_Lo); | 1487 _mov(DestLo, T_Lo); |
1501 return; | 1488 return; |
1502 } | 1489 } |
1503 case InstArithmetic::Lshr: | 1490 case InstArithmetic::Lshr: |
1504 // a=b>>c (unsigned) ==> | 1491 // a=b>>c (unsigned) ==> |
1505 // GCC 4.8 does: | 1492 // GCC 4.8 does: |
1506 // rsb t_c1, c.lo, #32 | 1493 // rsb t_c1, c.lo, #32 |
1507 // lsr t_lo, b.lo, c.lo | 1494 // lsr t_lo, b.lo, c.lo |
1508 // orr t_lo, t_lo, b.hi, lsl t_c1 | 1495 // orr t_lo, t_lo, b.hi, lsl t_c1 |
1509 // sub t_c2, c.lo, #32 | 1496 // sub t_c2, c.lo, #32 |
1510 // orr t_lo, t_lo, b.hi, lsr t_c2 | 1497 // orr t_lo, t_lo, b.hi, lsr t_c2 |
1511 // lsr t_hi, b.hi, c.lo | 1498 // lsr t_hi, b.hi, c.lo |
1512 // a.lo = t_lo | 1499 // a.lo = t_lo |
1513 // a.hi = t_hi | 1500 // a.hi = t_hi |
1514 case InstArithmetic::Ashr: { | 1501 case InstArithmetic::Ashr: { |
1515 // a=b>>c (signed) ==> ... | 1502 // a=b>>c (signed) ==> ... |
1516 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, | 1503 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the |
1517 // and the next orr should be conditioned on PLUS. The last two | 1504 // next orr should be conditioned on PLUS. The last two right shifts |
1518 // right shifts should also be arithmetic. | 1505 // should also be arithmetic. |
1519 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | 1506 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
1520 Variable *T_Lo = makeReg(IceType_i32); | 1507 Variable *T_Lo = makeReg(IceType_i32); |
1521 Variable *Src1RLo = legalizeToReg(Src1Lo); | 1508 Variable *Src1RLo = legalizeToReg(Src1Lo); |
1522 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1509 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
1523 Variable *T_C1 = makeReg(IceType_i32); | 1510 Variable *T_C1 = makeReg(IceType_i32); |
1524 Variable *T_C2 = makeReg(IceType_i32); | 1511 Variable *T_C2 = makeReg(IceType_i32); |
1525 _rsb(T_C1, Src1RLo, ThirtyTwo); | 1512 _rsb(T_C1, Src1RLo, ThirtyTwo); |
1526 _lsr(T_Lo, Src0RLo, Src1RLo); | 1513 _lsr(T_Lo, Src0RLo, Src1RLo); |
1527 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1514 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
1528 OperandARM32::LSL, T_C1)); | 1515 OperandARM32::LSL, T_C1)); |
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1716 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1717 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1704 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
1718 _mov(T_Lo, Src0Lo); | 1705 _mov(T_Lo, Src0Lo); |
1719 _mov(DestLo, T_Lo); | 1706 _mov(DestLo, T_Lo); |
1720 _mov(T_Hi, Src0Hi); | 1707 _mov(T_Hi, Src0Hi); |
1721 _mov(DestHi, T_Hi); | 1708 _mov(DestHi, T_Hi); |
1722 } else { | 1709 } else { |
1723 Operand *NewSrc; | 1710 Operand *NewSrc; |
1724 if (Dest->hasReg()) { | 1711 if (Dest->hasReg()) { |
1725 // If Dest already has a physical register, then legalize the Src operand | 1712 // If Dest already has a physical register, then legalize the Src operand |
1726 // into a Variable with the same register assignment. This especially | 1713 // into a Variable with the same register assignment. This especially |
1727 // helps allow the use of Flex operands. | 1714 // helps allow the use of Flex operands. |
1728 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | 1715 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
1729 } else { | 1716 } else { |
1730 // Dest could be a stack operand. Since we could potentially need | 1717 // Dest could be a stack operand. Since we could potentially need to do a |
1731 // to do a Store (and store can only have Register operands), | 1718 // Store (and store can only have Register operands), legalize this to a |
1732 // legalize this to a register. | 1719 // register. |
1733 NewSrc = legalize(Src0, Legal_Reg); | 1720 NewSrc = legalize(Src0, Legal_Reg); |
1734 } | 1721 } |
1735 if (isVectorType(Dest->getType())) { | 1722 if (isVectorType(Dest->getType())) { |
1736 UnimplementedError(Func->getContext()->getFlags()); | 1723 UnimplementedError(Func->getContext()->getFlags()); |
1737 } else if (isFloatingType(Dest->getType())) { | 1724 } else if (isFloatingType(Dest->getType())) { |
1738 Variable *SrcR = legalizeToReg(NewSrc); | 1725 Variable *SrcR = legalizeToReg(NewSrc); |
1739 _vmov(Dest, SrcR); | 1726 _vmov(Dest, SrcR); |
1740 } else { | 1727 } else { |
1741 _mov(Dest, NewSrc); | 1728 _mov(Dest, NewSrc); |
1742 } | 1729 } |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1803 } | 1790 } |
1804 | 1791 |
1805 if (!InRegs) { | 1792 if (!InRegs) { |
1806 ParameterAreaSizeBytes = | 1793 ParameterAreaSizeBytes = |
1807 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); | 1794 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); |
1808 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); | 1795 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); |
1809 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 1796 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
1810 } | 1797 } |
1811 } | 1798 } |
1812 | 1799 |
1813 // Adjust the parameter area so that the stack is aligned. It is | 1800 // Adjust the parameter area so that the stack is aligned. It is assumed that |
1814 // assumed that the stack is already aligned at the start of the | 1801 // the stack is already aligned at the start of the calling sequence. |
1815 // calling sequence. | |
1816 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 1802 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
1817 | 1803 |
1818 // Subtract the appropriate amount for the argument area. This also | 1804 // Subtract the appropriate amount for the argument area. This also takes |
1819 // takes care of setting the stack adjustment during emission. | 1805 // care of setting the stack adjustment during emission. |
1820 // | 1806 // |
1821 // TODO: If for some reason the call instruction gets dead-code | 1807 // TODO: If for some reason the call instruction gets dead-code eliminated |
1822 // eliminated after lowering, we would need to ensure that the | 1808 // after lowering, we would need to ensure that the pre-call and the |
1823 // pre-call and the post-call esp adjustment get eliminated as well. | 1809 // post-call esp adjustment get eliminated as well. |
1824 if (ParameterAreaSizeBytes) { | 1810 if (ParameterAreaSizeBytes) { |
1825 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), | 1811 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
1826 Legal_Reg | Legal_Flex); | 1812 Legal_Reg | Legal_Flex); |
1827 _adjust_stack(ParameterAreaSizeBytes, SubAmount); | 1813 _adjust_stack(ParameterAreaSizeBytes, SubAmount); |
1828 } | 1814 } |
1829 | 1815 |
1830 // Copy arguments that are passed on the stack to the appropriate | 1816 // Copy arguments that are passed on the stack to the appropriate stack |
1831 // stack locations. | 1817 // locations. |
1832 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1818 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
1833 for (auto &StackArg : StackArgs) { | 1819 for (auto &StackArg : StackArgs) { |
1834 ConstantInteger32 *Loc = | 1820 ConstantInteger32 *Loc = |
1835 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); | 1821 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); |
1836 Type Ty = StackArg.first->getType(); | 1822 Type Ty = StackArg.first->getType(); |
1837 OperandARM32Mem *Addr; | 1823 OperandARM32Mem *Addr; |
1838 constexpr bool SignExt = false; | 1824 constexpr bool SignExt = false; |
1839 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { | 1825 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { |
1840 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); | 1826 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); |
1841 } else { | 1827 } else { |
1842 Variable *NewBase = Func->makeVariable(SP->getType()); | 1828 Variable *NewBase = Func->makeVariable(SP->getType()); |
1843 lowerArithmetic( | 1829 lowerArithmetic( |
1844 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); | 1830 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); |
1845 Addr = formMemoryOperand(NewBase, Ty); | 1831 Addr = formMemoryOperand(NewBase, Ty); |
1846 } | 1832 } |
1847 lowerStore(InstStore::create(Func, StackArg.first, Addr)); | 1833 lowerStore(InstStore::create(Func, StackArg.first, Addr)); |
1848 } | 1834 } |
1849 | 1835 |
1850 // Copy arguments to be passed in registers to the appropriate registers. | 1836 // Copy arguments to be passed in registers to the appropriate registers. |
1851 for (auto &GPRArg : GPRArgs) { | 1837 for (auto &GPRArg : GPRArgs) { |
1852 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | 1838 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); |
1853 // Generate a FakeUse of register arguments so that they do not get | 1839 // Generate a FakeUse of register arguments so that they do not get dead |
1854 // dead code eliminated as a result of the FakeKill of scratch | 1840 // code eliminated as a result of the FakeKill of scratch registers after |
1855 // registers after the call. | 1841 // the call. |
1856 Context.insert(InstFakeUse::create(Func, Reg)); | 1842 Context.insert(InstFakeUse::create(Func, Reg)); |
1857 } | 1843 } |
1858 for (auto &FPArg : FPArgs) { | 1844 for (auto &FPArg : FPArgs) { |
1859 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | 1845 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); |
1860 Context.insert(InstFakeUse::create(Func, Reg)); | 1846 Context.insert(InstFakeUse::create(Func, Reg)); |
1861 } | 1847 } |
1862 | 1848 |
1863 // Generate the call instruction. Assign its result to a temporary | 1849 // Generate the call instruction. Assign its result to a temporary with high |
1864 // with high register allocation weight. | 1850 // register allocation weight. |
1865 Variable *Dest = Instr->getDest(); | 1851 Variable *Dest = Instr->getDest(); |
1866 // ReturnReg doubles as ReturnRegLo as necessary. | 1852 // ReturnReg doubles as ReturnRegLo as necessary. |
1867 Variable *ReturnReg = nullptr; | 1853 Variable *ReturnReg = nullptr; |
1868 Variable *ReturnRegHi = nullptr; | 1854 Variable *ReturnRegHi = nullptr; |
1869 if (Dest) { | 1855 if (Dest) { |
1870 switch (Dest->getType()) { | 1856 switch (Dest->getType()) { |
1871 case IceType_NUM: | 1857 case IceType_NUM: |
1872 llvm_unreachable("Invalid Call dest type"); | 1858 llvm_unreachable("Invalid Call dest type"); |
1873 break; | 1859 break; |
1874 case IceType_void: | 1860 case IceType_void: |
(...skipping 19 matching lines...) Expand all Loading... | |
1894 case IceType_v16i1: | 1880 case IceType_v16i1: |
1895 case IceType_v16i8: | 1881 case IceType_v16i8: |
1896 case IceType_v8i16: | 1882 case IceType_v8i16: |
1897 case IceType_v4i32: | 1883 case IceType_v4i32: |
1898 case IceType_v4f32: | 1884 case IceType_v4f32: |
1899 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); | 1885 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); |
1900 break; | 1886 break; |
1901 } | 1887 } |
1902 } | 1888 } |
1903 Operand *CallTarget = Instr->getCallTarget(); | 1889 Operand *CallTarget = Instr->getCallTarget(); |
1904 // TODO(jvoung): Handle sandboxing. | 1890 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing = |
1905 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 1891 // Ctx->getFlags().getUseSandboxing(); |
1906 | 1892 |
1907 // Allow ConstantRelocatable to be left alone as a direct call, | 1893 // Allow ConstantRelocatable to be left alone as a direct call, but force |
1908 // but force other constants like ConstantInteger32 to be in | 1894 // other constants like ConstantInteger32 to be in a register and make it an |
1909 // a register and make it an indirect call. | 1895 // indirect call. |
1910 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { | 1896 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { |
1911 CallTarget = legalize(CallTarget, Legal_Reg); | 1897 CallTarget = legalize(CallTarget, Legal_Reg); |
1912 } | 1898 } |
1913 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); | 1899 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); |
1914 Context.insert(NewCall); | 1900 Context.insert(NewCall); |
1915 if (ReturnRegHi) | 1901 if (ReturnRegHi) |
1916 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1902 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
1917 | 1903 |
1918 // Add the appropriate offset to SP. The call instruction takes care | 1904 // Add the appropriate offset to SP. The call instruction takes care of |
1919 // of resetting the stack offset during emission. | 1905 // resetting the stack offset during emission. |
1920 if (ParameterAreaSizeBytes) { | 1906 if (ParameterAreaSizeBytes) { |
1921 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), | 1907 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
1922 Legal_Reg | Legal_Flex); | 1908 Legal_Reg | Legal_Flex); |
1923 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1909 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
1924 _add(SP, SP, AddAmount); | 1910 _add(SP, SP, AddAmount); |
1925 } | 1911 } |
1926 | 1912 |
1927 // Insert a register-kill pseudo instruction. | 1913 // Insert a register-kill pseudo instruction. |
1928 Context.insert(InstFakeKill::create(Func, NewCall)); | 1914 Context.insert(InstFakeKill::create(Func, NewCall)); |
1929 | 1915 |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2017 } | 2003 } |
2018 case InstCast::Zext: { | 2004 case InstCast::Zext: { |
2019 if (isVectorType(Dest->getType())) { | 2005 if (isVectorType(Dest->getType())) { |
2020 UnimplementedError(Func->getContext()->getFlags()); | 2006 UnimplementedError(Func->getContext()->getFlags()); |
2021 } else if (Dest->getType() == IceType_i64) { | 2007 } else if (Dest->getType() == IceType_i64) { |
2022 // t1=uxtb src; dst.lo=t1; dst.hi=0 | 2008 // t1=uxtb src; dst.lo=t1; dst.hi=0 |
2023 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2009 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2024 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2025 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2026 Variable *T_Lo = makeReg(DestLo->getType()); | 2012 Variable *T_Lo = makeReg(DestLo->getType()); |
2027 // i32 and i1 can just take up the whole register. | 2013 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, |
2028 // i32 doesn't need uxt, while i1 will have an and mask later anyway. | 2014 // while i1 will have an and mask later anyway. |
2029 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { | 2015 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { |
2030 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2016 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2031 _mov(T_Lo, Src0RF); | 2017 _mov(T_Lo, Src0RF); |
2032 } else { | 2018 } else { |
2033 Variable *Src0R = legalizeToReg(Src0); | 2019 Variable *Src0R = legalizeToReg(Src0); |
2034 _uxt(T_Lo, Src0R); | 2020 _uxt(T_Lo, Src0R); |
2035 } | 2021 } |
2036 if (Src0->getType() == IceType_i1) { | 2022 if (Src0->getType() == IceType_i1) { |
2037 Constant *One = Ctx->getConstantInt32(1); | 2023 Constant *One = Ctx->getConstantInt32(1); |
2038 _and(T_Lo, T_Lo, One); | 2024 _and(T_Lo, T_Lo, One); |
2039 } | 2025 } |
2040 _mov(DestLo, T_Lo); | 2026 _mov(DestLo, T_Lo); |
2041 Variable *T_Hi = makeReg(DestLo->getType()); | 2027 Variable *T_Hi = makeReg(DestLo->getType()); |
2042 _mov(T_Hi, Zero); | 2028 _mov(T_Hi, Zero); |
2043 _mov(DestHi, T_Hi); | 2029 _mov(DestHi, T_Hi); |
2044 } else if (Src0->getType() == IceType_i1) { | 2030 } else if (Src0->getType() == IceType_i1) { |
2045 // t = Src0; t &= 1; Dest = t | 2031 // t = Src0; t &= 1; Dest = t |
2046 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2032 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2047 Constant *One = Ctx->getConstantInt32(1); | 2033 Constant *One = Ctx->getConstantInt32(1); |
2048 Variable *T = makeReg(Dest->getType()); | 2034 Variable *T = makeReg(Dest->getType()); |
2049 // Just use _mov instead of _uxt since all registers are 32-bit. | 2035 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt |
2050 // _uxt requires the source to be a register so could have required | 2036 // requires the source to be a register so could have required a _mov |
2051 // a _mov from legalize anyway. | 2037 // from legalize anyway. |
2052 _mov(T, Src0RF); | 2038 _mov(T, Src0RF); |
2053 _and(T, T, One); | 2039 _and(T, T, One); |
2054 _mov(Dest, T); | 2040 _mov(Dest, T); |
2055 } else { | 2041 } else { |
2056 // t1 = uxt src; dst = t1 | 2042 // t1 = uxt src; dst = t1 |
2057 Variable *Src0R = legalizeToReg(Src0); | 2043 Variable *Src0R = legalizeToReg(Src0); |
2058 Variable *T = makeReg(Dest->getType()); | 2044 Variable *T = makeReg(Dest->getType()); |
2059 _uxt(T, Src0R); | 2045 _uxt(T, Src0R); |
2060 _mov(Dest, T); | 2046 _mov(Dest, T); |
2061 } | 2047 } |
(...skipping 143 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2205 } | 2191 } |
2206 | 2192 |
2207 // a=icmp cond, b, c ==> | 2193 // a=icmp cond, b, c ==> |
2208 // GCC does: | 2194 // GCC does: |
2209 // cmp b.hi, c.hi or cmp b.lo, c.lo | 2195 // cmp b.hi, c.hi or cmp b.lo, c.lo |
2210 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 2196 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
2211 // mov.<C1> t, #1 mov.<C1> t, #1 | 2197 // mov.<C1> t, #1 mov.<C1> t, #1 |
2212 // mov.<C2> t, #0 mov.<C2> t, #0 | 2198 // mov.<C2> t, #0 mov.<C2> t, #0 |
2213 // mov a, t mov a, t | 2199 // mov a, t mov a, t |
2214 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 2200 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
2215 // is used for signed compares. In some cases, b and c need to be swapped | 2201 // is used for signed compares. In some cases, b and c need to be swapped as |
2216 // as well. | 2202 // well. |
2217 // | 2203 // |
2218 // LLVM does: | 2204 // LLVM does: |
2219 // for EQ and NE: | 2205 // for EQ and NE: |
2220 // eor t1, b.hi, c.hi | 2206 // eor t1, b.hi, c.hi |
2221 // eor t2, b.lo, c.hi | 2207 // eor t2, b.lo, c.hi |
2222 // orrs t, t1, t2 | 2208 // orrs t, t1, t2 |
2223 // mov.<C> t, #1 | 2209 // mov.<C> t, #1 |
2224 // mov a, t | 2210 // mov a, t |
2225 // | 2211 // |
2226 // that's nice in that it's just as short but has fewer dependencies | 2212 // that's nice in that it's just as short but has fewer dependencies for |
2227 // for better ILP at the cost of more registers. | 2213 // better ILP at the cost of more registers. |
2228 // | 2214 // |
2229 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with | 2215 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
2230 // two unconditional mov #0, two cmps, two conditional mov #1, | 2216 // unconditional mov #0, two cmps, two conditional mov #1, and one conditonal |
Jim Stichnoth
2015/09/16 00:01:29
conditional
ascull
2015/09/16 18:30:09
Done.
| |
2231 // and one conditonal reg mov. That has few dependencies for good ILP, | 2217 // reg mov. That has few dependencies for good ILP, but is a longer sequence. |
2232 // but is a longer sequence. | |
2233 // | 2218 // |
2234 // So, we are going with the GCC version since it's usually better (except | 2219 // So, we are going with the GCC version since it's usually better (except |
2235 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 2220 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
2236 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2221 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2237 Constant *One = Ctx->getConstantInt32(1); | 2222 Constant *One = Ctx->getConstantInt32(1); |
2238 if (Src0->getType() == IceType_i64) { | 2223 if (Src0->getType() == IceType_i64) { |
2239 InstIcmp::ICond Conditon = Inst->getCondition(); | 2224 InstIcmp::ICond Conditon = Inst->getCondition(); |
2240 size_t Index = static_cast<size_t>(Conditon); | 2225 size_t Index = static_cast<size_t>(Conditon); |
2241 assert(Index < llvm::array_lengthof(TableIcmp64)); | 2226 assert(Index < llvm::array_lengthof(TableIcmp64)); |
2242 Variable *Src0Lo, *Src0Hi; | 2227 Variable *Src0Lo, *Src0Hi; |
2243 Operand *Src1LoRF, *Src1HiRF; | 2228 Operand *Src1LoRF, *Src1HiRF; |
2244 if (TableIcmp64[Index].Swapped) { | 2229 if (TableIcmp64[Index].Swapped) { |
2245 Src0Lo = legalizeToReg(loOperand(Src1)); | 2230 Src0Lo = legalizeToReg(loOperand(Src1)); |
2246 Src0Hi = legalizeToReg(hiOperand(Src1)); | 2231 Src0Hi = legalizeToReg(hiOperand(Src1)); |
2247 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 2232 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
2248 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 2233 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
2249 } else { | 2234 } else { |
2250 Src0Lo = legalizeToReg(loOperand(Src0)); | 2235 Src0Lo = legalizeToReg(loOperand(Src0)); |
2251 Src0Hi = legalizeToReg(hiOperand(Src0)); | 2236 Src0Hi = legalizeToReg(hiOperand(Src0)); |
2252 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | 2237 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
2253 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | 2238 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
2254 } | 2239 } |
2255 Variable *T = makeReg(IceType_i32); | 2240 Variable *T = makeReg(IceType_i32); |
2256 if (TableIcmp64[Index].IsSigned) { | 2241 if (TableIcmp64[Index].IsSigned) { |
2257 Variable *ScratchReg = makeReg(IceType_i32); | 2242 Variable *ScratchReg = makeReg(IceType_i32); |
2258 _cmp(Src0Lo, Src1LoRF); | 2243 _cmp(Src0Lo, Src1LoRF); |
2259 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | 2244 _sbcs(ScratchReg, Src0Hi, Src1HiRF); |
2260 // ScratchReg isn't going to be used, but we need the | 2245 // ScratchReg isn't going to be used, but we need the side-effect of |
2261 // side-effect of setting flags from this operation. | 2246 // setting flags from this operation. |
2262 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 2247 Context.insert(InstFakeUse::create(Func, ScratchReg)); |
2263 } else { | 2248 } else { |
2264 _cmp(Src0Hi, Src1HiRF); | 2249 _cmp(Src0Hi, Src1HiRF); |
2265 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | 2250 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
2266 } | 2251 } |
2267 _mov(T, One, TableIcmp64[Index].C1); | 2252 _mov(T, One, TableIcmp64[Index].C1); |
2268 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2); | 2253 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2); |
2269 _mov(Dest, T); | 2254 _mov(Dest, T); |
2270 return; | 2255 return; |
2271 } | 2256 } |
2272 | 2257 |
2273 // a=icmp cond b, c ==> | 2258 // a=icmp cond b, c ==> |
2274 // GCC does: | 2259 // GCC does: |
2275 // <u/s>xtb tb, b | 2260 // <u/s>xtb tb, b |
2276 // <u/s>xtb tc, c | 2261 // <u/s>xtb tc, c |
2277 // cmp tb, tc | 2262 // cmp tb, tc |
2278 // mov.C1 t, #0 | 2263 // mov.C1 t, #0 |
2279 // mov.C2 t, #1 | 2264 // mov.C2 t, #1 |
2280 // mov a, t | 2265 // mov a, t |
2281 // where the unsigned/sign extension is not needed for 32-bit. | 2266 // where the unsigned/sign extension is not needed for 32-bit. They also have |
2282 // They also have special cases for EQ and NE. E.g., for NE: | 2267 // special cases for EQ and NE. E.g., for NE: |
2283 // <extend to tb, tc> | 2268 // <extend to tb, tc> |
2284 // subs t, tb, tc | 2269 // subs t, tb, tc |
2285 // movne t, #1 | 2270 // movne t, #1 |
2286 // mov a, t | 2271 // mov a, t |
2287 // | 2272 // |
2288 // LLVM does: | 2273 // LLVM does: |
2289 // lsl tb, b, #<N> | 2274 // lsl tb, b, #<N> |
2290 // mov t, #0 | 2275 // mov t, #0 |
2291 // cmp tb, c, lsl #<N> | 2276 // cmp tb, c, lsl #<N> |
2292 // mov.<C> t, #1 | 2277 // mov.<C> t, #1 |
2293 // mov a, t | 2278 // mov a, t |
2294 // | 2279 // |
2295 // the left shift is by 0, 16, or 24, which allows the comparison to focus | 2280 // the left shift is by 0, 16, or 24, which allows the comparison to focus on |
2296 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned). | 2281 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
2297 // For the unsigned case, for some reason it does similar to GCC and does | 2282 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
2298 // a uxtb first. It's not clear to me why that special-casing is needed. | 2283 // first. It's not clear to me why that special-casing is needed. |
2299 // | 2284 // |
2300 // We'll go with the LLVM way for now, since it's shorter and has just as | 2285 // We'll go with the LLVM way for now, since it's shorter and has just as few |
2301 // few dependencies. | 2286 // dependencies. |
2302 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 2287 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
2303 assert(ShiftAmt >= 0); | 2288 assert(ShiftAmt >= 0); |
2304 Constant *ShiftConst = nullptr; | 2289 Constant *ShiftConst = nullptr; |
2305 Variable *Src0R = nullptr; | 2290 Variable *Src0R = nullptr; |
2306 Variable *T = makeReg(IceType_i32); | 2291 Variable *T = makeReg(IceType_i32); |
2307 if (ShiftAmt) { | 2292 if (ShiftAmt) { |
2308 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 2293 ShiftConst = Ctx->getConstantInt32(ShiftAmt); |
2309 Src0R = makeReg(IceType_i32); | 2294 Src0R = makeReg(IceType_i32); |
2310 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 2295 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); |
2311 } else { | 2296 } else { |
(...skipping 22 matching lines...) Expand all Loading... | |
2334 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2319 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
2335 switch (Instr->getIntrinsicInfo().ID) { | 2320 switch (Instr->getIntrinsicInfo().ID) { |
2336 case Intrinsics::AtomicCmpxchg: { | 2321 case Intrinsics::AtomicCmpxchg: { |
2337 UnimplementedError(Func->getContext()->getFlags()); | 2322 UnimplementedError(Func->getContext()->getFlags()); |
2338 return; | 2323 return; |
2339 } | 2324 } |
2340 case Intrinsics::AtomicFence: | 2325 case Intrinsics::AtomicFence: |
2341 UnimplementedError(Func->getContext()->getFlags()); | 2326 UnimplementedError(Func->getContext()->getFlags()); |
2342 return; | 2327 return; |
2343 case Intrinsics::AtomicFenceAll: | 2328 case Intrinsics::AtomicFenceAll: |
2344 // NOTE: FenceAll should prevent and load/store from being moved | 2329 // NOTE: FenceAll should prevent and load/store from being moved across the |
2345 // across the fence (both atomic and non-atomic). The InstARM32Mfence | 2330 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is |
2346 // instruction is currently marked coarsely as "HasSideEffects". | 2331 // currently marked coarsely as "HasSideEffects". |
2347 UnimplementedError(Func->getContext()->getFlags()); | 2332 UnimplementedError(Func->getContext()->getFlags()); |
2348 return; | 2333 return; |
2349 case Intrinsics::AtomicIsLockFree: { | 2334 case Intrinsics::AtomicIsLockFree: { |
2350 UnimplementedError(Func->getContext()->getFlags()); | 2335 UnimplementedError(Func->getContext()->getFlags()); |
2351 return; | 2336 return; |
2352 } | 2337 } |
2353 case Intrinsics::AtomicLoad: { | 2338 case Intrinsics::AtomicLoad: { |
2354 UnimplementedError(Func->getContext()->getFlags()); | 2339 UnimplementedError(Func->getContext()->getFlags()); |
2355 return; | 2340 return; |
2356 } | 2341 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2394 case Intrinsics::Ctpop: { | 2379 case Intrinsics::Ctpop: { |
2395 Variable *Dest = Instr->getDest(); | 2380 Variable *Dest = Instr->getDest(); |
2396 Operand *Val = Instr->getArg(0); | 2381 Operand *Val = Instr->getArg(0); |
2397 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) | 2382 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) |
2398 ? H_call_ctpop_i32 | 2383 ? H_call_ctpop_i32 |
2399 : H_call_ctpop_i64, | 2384 : H_call_ctpop_i64, |
2400 Dest, 1); | 2385 Dest, 1); |
2401 Call->addArg(Val); | 2386 Call->addArg(Val); |
2402 lowerCall(Call); | 2387 lowerCall(Call); |
2403 // The popcount helpers always return 32-bit values, while the intrinsic's | 2388 // The popcount helpers always return 32-bit values, while the intrinsic's |
2404 // signature matches some 64-bit platform's native instructions and | 2389 // signature matches some 64-bit platform's native instructions and expect |
2405 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest | 2390 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in |
2406 // just in case the user doesn't do that in the IR or doesn't toss the bits | 2391 // case the user doesn't do that in the IR or doesn't toss the bits via |
2407 // via truncate. | 2392 // truncate. |
2408 if (Val->getType() == IceType_i64) { | 2393 if (Val->getType() == IceType_i64) { |
2409 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2394 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2410 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2395 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2411 Variable *T = nullptr; | 2396 Variable *T = nullptr; |
2412 _mov(T, Zero); | 2397 _mov(T, Zero); |
2413 _mov(DestHi, T); | 2398 _mov(DestHi, T); |
2414 } | 2399 } |
2415 return; | 2400 return; |
2416 } | 2401 } |
2417 case Intrinsics::Ctlz: { | 2402 case Intrinsics::Ctlz: { |
2418 // The "is zero undef" parameter is ignored and we always return | 2403 // The "is zero undef" parameter is ignored and we always return a |
2419 // a well-defined value. | 2404 // well-defined value. |
2420 Operand *Val = Instr->getArg(0); | 2405 Operand *Val = Instr->getArg(0); |
2421 Variable *ValLoR; | 2406 Variable *ValLoR; |
2422 Variable *ValHiR = nullptr; | 2407 Variable *ValHiR = nullptr; |
2423 if (Val->getType() == IceType_i64) { | 2408 if (Val->getType() == IceType_i64) { |
2424 Val = legalizeUndef(Val); | 2409 Val = legalizeUndef(Val); |
2425 ValLoR = legalizeToReg(loOperand(Val)); | 2410 ValLoR = legalizeToReg(loOperand(Val)); |
2426 ValHiR = legalizeToReg(hiOperand(Val)); | 2411 ValHiR = legalizeToReg(hiOperand(Val)); |
2427 } else { | 2412 } else { |
2428 ValLoR = legalizeToReg(Val); | 2413 ValLoR = legalizeToReg(Val); |
2429 } | 2414 } |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2556 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2541 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2557 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2542 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2558 Operand *Zero = | 2543 Operand *Zero = |
2559 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 2544 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
2560 Operand *ThirtyTwo = | 2545 Operand *ThirtyTwo = |
2561 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 2546 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
2562 _cmp(ValHiR, Zero); | 2547 _cmp(ValHiR, Zero); |
2563 Variable *T2 = makeReg(IceType_i32); | 2548 Variable *T2 = makeReg(IceType_i32); |
2564 _add(T2, T, ThirtyTwo); | 2549 _add(T2, T, ThirtyTwo); |
2565 _clz(T2, ValHiR, CondARM32::NE); | 2550 _clz(T2, ValHiR, CondARM32::NE); |
2566 // T2 is actually a source as well when the predicate is not AL | 2551 // T2 is actually a source as well when the predicate is not AL (since it |
2567 // (since it may leave T2 alone). We use set_dest_nonkillable to | 2552 // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness |
2568 // prolong the liveness of T2 as if it was used as a source. | 2553 // of T2 as if it was used as a source. |
2569 _set_dest_nonkillable(); | 2554 _set_dest_nonkillable(); |
2570 _mov(DestLo, T2); | 2555 _mov(DestLo, T2); |
2571 Variable *T3 = nullptr; | 2556 Variable *T3 = nullptr; |
2572 _mov(T3, Zero); | 2557 _mov(T3, Zero); |
2573 _mov(DestHi, T3); | 2558 _mov(DestHi, T3); |
2574 return; | 2559 return; |
2575 } | 2560 } |
2576 _mov(Dest, T); | 2561 _mov(Dest, T); |
2577 return; | 2562 return; |
2578 } | 2563 } |
2579 | 2564 |
2580 void TargetARM32::lowerLoad(const InstLoad *Load) { | 2565 void TargetARM32::lowerLoad(const InstLoad *Load) { |
2581 // A Load instruction can be treated the same as an Assign | 2566 // A Load instruction can be treated the same as an Assign instruction, after |
2582 // instruction, after the source operand is transformed into an | 2567 // the source operand is transformed into an OperandARM32Mem operand. |
2583 // OperandARM32Mem operand. | |
2584 Type Ty = Load->getDest()->getType(); | 2568 Type Ty = Load->getDest()->getType(); |
2585 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 2569 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
2586 Variable *DestLoad = Load->getDest(); | 2570 Variable *DestLoad = Load->getDest(); |
2587 | 2571 |
2588 // TODO(jvoung): handled folding opportunities. Sign and zero extension | 2572 // TODO(jvoung): handled folding opportunities. Sign and zero extension can |
2589 // can be folded into a load. | 2573 // be folded into a load. |
2590 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); | 2574 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
2591 lowerAssign(Assign); | 2575 lowerAssign(Assign); |
2592 } | 2576 } |
2593 | 2577 |
2594 void TargetARM32::doAddressOptLoad() { | 2578 void TargetARM32::doAddressOptLoad() { |
2595 UnimplementedError(Func->getContext()->getFlags()); | 2579 UnimplementedError(Func->getContext()->getFlags()); |
2596 } | 2580 } |
2597 | 2581 |
2598 void TargetARM32::randomlyInsertNop(float Probability, | 2582 void TargetARM32::randomlyInsertNop(float Probability, |
2599 RandomNumberGenerator &RNG) { | 2583 RandomNumberGenerator &RNG) { |
(...skipping 25 matching lines...) Expand all Loading... | |
2625 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); | 2609 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); |
2626 Reg = D0; | 2610 Reg = D0; |
2627 } else if (isVectorType(Src0->getType())) { | 2611 } else if (isVectorType(Src0->getType())) { |
2628 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); | 2612 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); |
2629 Reg = Q0; | 2613 Reg = Q0; |
2630 } else { | 2614 } else { |
2631 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); | 2615 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); |
2632 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); | 2616 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); |
2633 } | 2617 } |
2634 } | 2618 } |
2635 // Add a ret instruction even if sandboxing is enabled, because | 2619 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
2636 // addEpilog explicitly looks for a ret instruction as a marker for | 2620 // explicitly looks for a ret instruction as a marker for where to insert the |
2637 // where to insert the frame removal instructions. | 2621 // frame removal instructions. addEpilog is responsible for restoring the |
2638 // addEpilog is responsible for restoring the "lr" register as needed | 2622 // "lr" register as needed prior to this ret instruction. |
2639 // prior to this ret instruction. | |
2640 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); | 2623 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); |
2641 // Add a fake use of sp to make sure sp stays alive for the entire | 2624 // Add a fake use of sp to make sure sp stays alive for the entire function. |
2642 // function. Otherwise post-call sp adjustments get dead-code | 2625 // Otherwise post-call sp adjustments get dead-code eliminated. |
2643 // eliminated. TODO: Are there more places where the fake use | 2626 // TODO: Are there more places where the fake use should be inserted? E.g. |
2644 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | 2627 // "void f(int n){while(1) g(n);}" may not have a ret instruction. |
2645 // have a ret instruction. | |
2646 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 2628 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2647 Context.insert(InstFakeUse::create(Func, SP)); | 2629 Context.insert(InstFakeUse::create(Func, SP)); |
2648 } | 2630 } |
2649 | 2631 |
2650 void TargetARM32::lowerSelect(const InstSelect *Inst) { | 2632 void TargetARM32::lowerSelect(const InstSelect *Inst) { |
2651 Variable *Dest = Inst->getDest(); | 2633 Variable *Dest = Inst->getDest(); |
2652 Type DestTy = Dest->getType(); | 2634 Type DestTy = Dest->getType(); |
2653 Operand *SrcT = Inst->getTrueOperand(); | 2635 Operand *SrcT = Inst->getTrueOperand(); |
2654 Operand *SrcF = Inst->getFalseOperand(); | 2636 Operand *SrcF = Inst->getFalseOperand(); |
2655 Operand *Condition = Inst->getCondition(); | 2637 Operand *Condition = Inst->getCondition(); |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
2769 } | 2751 } |
2770 | 2752 |
2771 // Helper for legalize() to emit the right code to lower an operand to a | 2753 // Helper for legalize() to emit the right code to lower an operand to a |
2772 // register of the appropriate type. | 2754 // register of the appropriate type. |
2773 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { | 2755 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { |
2774 Type Ty = Src->getType(); | 2756 Type Ty = Src->getType(); |
2775 Variable *Reg = makeReg(Ty, RegNum); | 2757 Variable *Reg = makeReg(Ty, RegNum); |
2776 if (isVectorType(Ty) || isFloatingType(Ty)) { | 2758 if (isVectorType(Ty) || isFloatingType(Ty)) { |
2777 _vmov(Reg, Src); | 2759 _vmov(Reg, Src); |
2778 } else { | 2760 } else { |
2779 // Mov's Src operand can really only be the flexible second operand type | 2761 // Mov's Src operand can really only be the flexible second operand type or |
2780 // or a register. Users should guarantee that. | 2762 // a register. Users should guarantee that. |
2781 _mov(Reg, Src); | 2763 _mov(Reg, Src); |
2782 } | 2764 } |
2783 return Reg; | 2765 return Reg; |
2784 } | 2766 } |
2785 | 2767 |
2786 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, | 2768 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
2787 int32_t RegNum) { | 2769 int32_t RegNum) { |
2788 Type Ty = From->getType(); | 2770 Type Ty = From->getType(); |
2789 // Assert that a physical register is allowed. To date, all calls | 2771 // Assert that a physical register is allowed. To date, all calls to |
2790 // to legalize() allow a physical register. Legal_Flex converts | 2772 // legalize() allow a physical register. Legal_Flex converts registers to the |
2791 // registers to the right type OperandARM32FlexReg as needed. | 2773 // right type OperandARM32FlexReg as needed. |
2792 assert(Allowed & Legal_Reg); | 2774 assert(Allowed & Legal_Reg); |
2793 // Go through the various types of operands: | 2775 // Go through the various types of operands: OperandARM32Mem, |
2794 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable. | 2776 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if |
2795 // Given the above assertion, if type of operand is not legal | 2777 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we |
2796 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy | 2778 // can always copy to a register. |
2797 // to a register. | |
2798 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { | 2779 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { |
2799 // Before doing anything with a Mem operand, we need to ensure | 2780 // Before doing anything with a Mem operand, we need to ensure that the |
2800 // that the Base and Index components are in physical registers. | 2781 // Base and Index components are in physical registers. |
2801 Variable *Base = Mem->getBase(); | 2782 Variable *Base = Mem->getBase(); |
2802 Variable *Index = Mem->getIndex(); | 2783 Variable *Index = Mem->getIndex(); |
2803 Variable *RegBase = nullptr; | 2784 Variable *RegBase = nullptr; |
2804 Variable *RegIndex = nullptr; | 2785 Variable *RegIndex = nullptr; |
2805 if (Base) { | 2786 if (Base) { |
2806 RegBase = legalizeToReg(Base); | 2787 RegBase = legalizeToReg(Base); |
2807 } | 2788 } |
2808 if (Index) { | 2789 if (Index) { |
2809 RegIndex = legalizeToReg(Index); | 2790 RegIndex = legalizeToReg(Index); |
2810 } | 2791 } |
(...skipping 24 matching lines...) Expand all Loading... | |
2835 From = Mem; | 2816 From = Mem; |
2836 } | 2817 } |
2837 return From; | 2818 return From; |
2838 } | 2819 } |
2839 | 2820 |
2840 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { | 2821 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { |
2841 if (!(Allowed & Legal_Flex)) { | 2822 if (!(Allowed & Legal_Flex)) { |
2842 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { | 2823 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { |
2843 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { | 2824 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { |
2844 From = FlexReg->getReg(); | 2825 From = FlexReg->getReg(); |
2845 // Fall through and let From be checked as a Variable below, | 2826 // Fall through and let From be checked as a Variable below, where it |
2846 // where it may or may not need a register. | 2827 // may or may not need a register. |
2847 } else { | 2828 } else { |
2848 return copyToReg(Flex, RegNum); | 2829 return copyToReg(Flex, RegNum); |
2849 } | 2830 } |
2850 } else { | 2831 } else { |
2851 return copyToReg(Flex, RegNum); | 2832 return copyToReg(Flex, RegNum); |
2852 } | 2833 } |
2853 } else { | 2834 } else { |
2854 return From; | 2835 return From; |
2855 } | 2836 } |
2856 } | 2837 } |
2857 | 2838 |
2858 if (llvm::isa<Constant>(From)) { | 2839 if (llvm::isa<Constant>(From)) { |
2859 if (llvm::isa<ConstantUndef>(From)) { | 2840 if (llvm::isa<ConstantUndef>(From)) { |
2860 From = legalizeUndef(From, RegNum); | 2841 From = legalizeUndef(From, RegNum); |
2861 if (isVectorType(Ty)) | 2842 if (isVectorType(Ty)) |
2862 return From; | 2843 return From; |
2863 } | 2844 } |
2864 // There should be no constants of vector type (other than undef). | 2845 // There should be no constants of vector type (other than undef). |
2865 assert(!isVectorType(Ty)); | 2846 assert(!isVectorType(Ty)); |
2866 bool CanBeFlex = Allowed & Legal_Flex; | 2847 bool CanBeFlex = Allowed & Legal_Flex; |
2867 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { | 2848 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { |
2868 uint32_t RotateAmt; | 2849 uint32_t RotateAmt; |
2869 uint32_t Immed_8; | 2850 uint32_t Immed_8; |
2870 uint32_t Value = static_cast<uint32_t>(C32->getValue()); | 2851 uint32_t Value = static_cast<uint32_t>(C32->getValue()); |
2871 // Check if the immediate will fit in a Flexible second operand, | 2852 // Check if the immediate will fit in a Flexible second operand, if a |
2872 // if a Flexible second operand is allowed. We need to know the exact | 2853 // Flexible second operand is allowed. We need to know the exact value, |
2873 // value, so that rules out relocatable constants. | 2854 // so that rules out relocatable constants. Also try the inverse and use |
2874 // Also try the inverse and use MVN if possible. | 2855 // MVN if possible. |
2875 if (CanBeFlex && | 2856 if (CanBeFlex && |
2876 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { | 2857 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { |
2877 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 2858 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
2878 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( | 2859 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( |
2879 ~Value, &RotateAmt, &Immed_8)) { | 2860 ~Value, &RotateAmt, &Immed_8)) { |
2880 auto InvertedFlex = | 2861 auto InvertedFlex = |
2881 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 2862 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
2882 Variable *Reg = makeReg(Ty, RegNum); | 2863 Variable *Reg = makeReg(Ty, RegNum); |
2883 _mvn(Reg, InvertedFlex); | 2864 _mvn(Reg, InvertedFlex); |
2884 return Reg; | 2865 return Reg; |
2885 } else { | 2866 } else { |
2886 // Do a movw/movt to a register. | 2867 // Do a movw/movt to a register. |
2887 Variable *Reg = makeReg(Ty, RegNum); | 2868 Variable *Reg = makeReg(Ty, RegNum); |
2888 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 2869 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
2889 _movw(Reg, | 2870 _movw(Reg, |
2890 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 2871 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
2891 if (UpperBits != 0) { | 2872 if (UpperBits != 0) { |
2892 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 2873 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
2893 } | 2874 } |
2894 return Reg; | 2875 return Reg; |
2895 } | 2876 } |
2896 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 2877 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
2897 Variable *Reg = makeReg(Ty, RegNum); | 2878 Variable *Reg = makeReg(Ty, RegNum); |
2898 _movw(Reg, C); | 2879 _movw(Reg, C); |
2899 _movt(Reg, C); | 2880 _movt(Reg, C); |
2900 return Reg; | 2881 return Reg; |
2901 } else { | 2882 } else { |
2902 assert(isScalarFloatingType(Ty)); | 2883 assert(isScalarFloatingType(Ty)); |
2903 // Load floats/doubles from literal pool. | 2884 // Load floats/doubles from literal pool. |
2904 // TODO(jvoung): Allow certain immediates to be encoded directly in | 2885 // TODO(jvoung): Allow certain immediates to be encoded directly in an |
2905 // an operand. See Table A7-18 of the ARM manual: | 2886 // operand. See Table A7-18 of the ARM manual: "Floating-point modified |
2906 // "Floating-point modified immediate constants". | 2887 // immediate constants". Or, for 32-bit floating point numbers, just |
2907 // Or, for 32-bit floating point numbers, just encode the raw bits | 2888 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG, |
2908 // into a movw/movt pair to GPR, and vmov to an SREG, instead of using | 2889 // instead of using a movw/movt pair to get the const-pool address then |
2909 // a movw/movt pair to get the const-pool address then loading to SREG. | 2890 // loading to SREG. |
2910 std::string Buffer; | 2891 std::string Buffer; |
2911 llvm::raw_string_ostream StrBuf(Buffer); | 2892 llvm::raw_string_ostream StrBuf(Buffer); |
2912 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); | 2893 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); |
2913 llvm::cast<Constant>(From)->setShouldBePooled(true); | 2894 llvm::cast<Constant>(From)->setShouldBePooled(true); |
2914 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 2895 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
2915 Variable *BaseReg = makeReg(getPointerType()); | 2896 Variable *BaseReg = makeReg(getPointerType()); |
2916 _movw(BaseReg, Offset); | 2897 _movw(BaseReg, Offset); |
2917 _movt(BaseReg, Offset); | 2898 _movt(BaseReg, Offset); |
2918 From = formMemoryOperand(BaseReg, Ty); | 2899 From = formMemoryOperand(BaseReg, Ty); |
2919 return copyToReg(From, RegNum); | 2900 return copyToReg(From, RegNum); |
2920 } | 2901 } |
2921 } | 2902 } |
2922 | 2903 |
2923 if (auto Var = llvm::dyn_cast<Variable>(From)) { | 2904 if (auto Var = llvm::dyn_cast<Variable>(From)) { |
2924 // Check if the variable is guaranteed a physical register. This | 2905 // Check if the variable is guaranteed a physical register. This can happen |
2925 // can happen either when the variable is pre-colored or when it is | 2906 // either when the variable is pre-colored or when it is assigned infinite |
2926 // assigned infinite weight. | 2907 // weight. |
2927 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 2908 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
2928 // We need a new physical register for the operand if: | 2909 // We need a new physical register for the operand if: |
2929 // Mem is not allowed and Var isn't guaranteed a physical | 2910 // Mem is not allowed and Var isn't guaranteed a physical |
2930 // register, or | 2911 // register, or |
2931 // RegNum is required and Var->getRegNum() doesn't match. | 2912 // RegNum is required and Var->getRegNum() doesn't match. |
2932 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 2913 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
2933 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 2914 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
2934 From = copyToReg(From, RegNum); | 2915 From = copyToReg(From, RegNum); |
2935 } | 2916 } |
2936 return From; | 2917 return From; |
2937 } | 2918 } |
2938 llvm_unreachable("Unhandled operand kind in legalize()"); | 2919 llvm_unreachable("Unhandled operand kind in legalize()"); |
2939 | 2920 |
2940 return From; | 2921 return From; |
2941 } | 2922 } |
2942 | 2923 |
2943 /// Provide a trivial wrapper to legalize() for this common usage. | 2924 /// Provide a trivial wrapper to legalize() for this common usage. |
2944 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) { | 2925 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) { |
2945 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 2926 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
2946 } | 2927 } |
2947 | 2928 |
2948 /// Legalize undef values to concrete values. | 2929 /// Legalize undef values to concrete values. |
2949 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) { | 2930 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) { |
2950 Type Ty = From->getType(); | 2931 Type Ty = From->getType(); |
2951 if (llvm::isa<ConstantUndef>(From)) { | 2932 if (llvm::isa<ConstantUndef>(From)) { |
2952 // Lower undefs to zero. Another option is to lower undefs to an | 2933 // Lower undefs to zero. Another option is to lower undefs to an |
2953 // uninitialized register; however, using an uninitialized register | 2934 // uninitialized register; however, using an uninitialized register results |
2954 // results in less predictable code. | 2935 // in less predictable code. |
2955 // | 2936 // |
2956 // If in the future the implementation is changed to lower undef | 2937 // If in the future the implementation is changed to lower undef values to |
2957 // values to uninitialized registers, a FakeDef will be needed: | 2938 // uninitialized registers, a FakeDef will be needed: |
2958 // Context.insert(InstFakeDef::create(Func, Reg)); | 2939 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to |
2959 // This is in order to ensure that the live range of Reg is not | 2940 // ensure that the live range of Reg is not overestimated. If the constant |
2960 // overestimated. If the constant being lowered is a 64 bit value, | 2941 // being lowered is a 64 bit value, then the result should be split and the |
2961 // then the result should be split and the lo and hi components will | 2942 // lo and hi components will need to go in uninitialized registers. |
2962 // need to go in uninitialized registers. | |
2963 if (isVectorType(Ty)) | 2943 if (isVectorType(Ty)) |
2964 return makeVectorOfZeros(Ty, RegNum); | 2944 return makeVectorOfZeros(Ty, RegNum); |
2965 return Ctx->getConstantZero(Ty); | 2945 return Ctx->getConstantZero(Ty); |
2966 } | 2946 } |
2967 return From; | 2947 return From; |
2968 } | 2948 } |
2969 | 2949 |
2970 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { | 2950 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { |
2971 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); | 2951 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); |
2972 // It may be the case that address mode optimization already creates | 2952 // It may be the case that address mode optimization already creates an |
2973 // an OperandARM32Mem, so in that case it wouldn't need another level | 2953 // OperandARM32Mem, so in that case it wouldn't need another level of |
2974 // of transformation. | 2954 // transformation. |
2975 if (Mem) { | 2955 if (Mem) { |
2976 return llvm::cast<OperandARM32Mem>(legalize(Mem)); | 2956 return llvm::cast<OperandARM32Mem>(legalize(Mem)); |
2977 } | 2957 } |
2978 // If we didn't do address mode optimization, then we only | 2958 // If we didn't do address mode optimization, then we only have a base/offset |
2979 // have a base/offset to work with. ARM always requires a base | 2959 // to work with. ARM always requires a base register, so just use that to |
2980 // register, so just use that to hold the operand. | 2960 // hold the operand. |
2981 Variable *Base = legalizeToReg(Operand); | 2961 Variable *Base = legalizeToReg(Operand); |
2982 return OperandARM32Mem::create( | 2962 return OperandARM32Mem::create( |
2983 Func, Ty, Base, | 2963 Func, Ty, Base, |
2984 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 2964 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); |
2985 } | 2965 } |
2986 | 2966 |
2987 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { | 2967 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { |
2988 // There aren't any 64-bit integer registers for ARM32. | 2968 // There aren't any 64-bit integer registers for ARM32. |
2989 assert(Type != IceType_i64); | 2969 assert(Type != IceType_i64); |
2990 Variable *Reg = Func->makeVariable(Type); | 2970 Variable *Reg = Func->makeVariable(Type); |
2991 if (RegNum == Variable::NoRegister) | 2971 if (RegNum == Variable::NoRegister) |
2992 Reg->setMustHaveReg(); | 2972 Reg->setMustHaveReg(); |
2993 else | 2973 else |
2994 Reg->setRegNum(RegNum); | 2974 Reg->setRegNum(RegNum); |
2995 return Reg; | 2975 return Reg; |
2996 } | 2976 } |
2997 | 2977 |
2998 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { | 2978 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { |
2999 assert(llvm::isPowerOf2_32(Align)); | 2979 assert(llvm::isPowerOf2_32(Align)); |
3000 uint32_t RotateAmt; | 2980 uint32_t RotateAmt; |
3001 uint32_t Immed_8; | 2981 uint32_t Immed_8; |
3002 Operand *Mask; | 2982 Operand *Mask; |
3003 // Use AND or BIC to mask off the bits, depending on which immediate fits | 2983 // Use AND or BIC to mask off the bits, depending on which immediate fits (if |
3004 // (if it fits at all). Assume Align is usually small, in which case BIC | 2984 // it fits at all). Assume Align is usually small, in which case BIC works |
3005 // works better. Thus, this rounds down to the alignment. | 2985 // better. Thus, this rounds down to the alignment. |
3006 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { | 2986 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { |
3007 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); | 2987 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); |
3008 _bic(Reg, Reg, Mask); | 2988 _bic(Reg, Reg, Mask); |
3009 } else { | 2989 } else { |
3010 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); | 2990 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); |
3011 _and(Reg, Reg, Mask); | 2991 _and(Reg, Reg, Mask); |
3012 } | 2992 } |
3013 } | 2993 } |
3014 | 2994 |
3015 void TargetARM32::postLower() { | 2995 void TargetARM32::postLower() { |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
3087 UnimplementedError(Ctx->getFlags()); | 3067 UnimplementedError(Ctx->getFlags()); |
3088 } | 3068 } |
3089 | 3069 |
3090 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) | 3070 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) |
3091 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} | 3071 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} |
3092 | 3072 |
3093 void TargetHeaderARM32::lower() { | 3073 void TargetHeaderARM32::lower() { |
3094 OstreamLocker L(Ctx); | 3074 OstreamLocker L(Ctx); |
3095 Ostream &Str = Ctx->getStrEmit(); | 3075 Ostream &Str = Ctx->getStrEmit(); |
3096 Str << ".syntax unified\n"; | 3076 Str << ".syntax unified\n"; |
3097 // Emit build attributes in format: .eabi_attribute TAG, VALUE. | 3077 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of |
3098 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture" | 3078 // "Addenda to, and Errata in the ABI for the ARM architecture" |
3099 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_adde nda.pdf | 3079 // http://infocenter.arm.com |
3080 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf | |
3100 // | 3081 // |
3101 // Tag_conformance should be be emitted first in a file-scope | 3082 // Tag_conformance should be be emitted first in a file-scope sub-subsection |
3102 // sub-subsection of the first public subsection of the attributes. | 3083 // of the first public subsection of the attributes. |
3103 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; | 3084 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; |
3104 // Chromebooks are at least A15, but do A9 for higher compat. | 3085 // Chromebooks are at least A15, but do A9 for higher compat. For some |
3105 // For some reason, the LLVM ARM asm parser has the .cpu directive override | 3086 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr |
3106 // the mattr specified on the commandline. So to test hwdiv, we need to set | 3087 // specified on the commandline. So to test hwdiv, we need to set the .cpu |
3107 // the .cpu directive higher (can't just rely on --mattr=...). | 3088 // directive higher (can't just rely on --mattr=...). |
3108 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 3089 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3109 Str << ".cpu cortex-a15\n"; | 3090 Str << ".cpu cortex-a15\n"; |
3110 } else { | 3091 } else { |
3111 Str << ".cpu cortex-a9\n"; | 3092 Str << ".cpu cortex-a9\n"; |
3112 } | 3093 } |
3113 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" | 3094 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" |
3114 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; | 3095 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; |
3115 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" | 3096 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" |
3116 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; | 3097 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; |
3117 Str << ".fpu neon\n" | 3098 Str << ".fpu neon\n" |
(...skipping 11 matching lines...) Expand all Loading... | |
3129 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 3110 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
3130 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 3111 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3131 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 3112 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
3132 } | 3113 } |
3133 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 3114 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
3134 // However, for compatibility with current NaCl LLVM, don't claim that. | 3115 // However, for compatibility with current NaCl LLVM, don't claim that. |
3135 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 3116 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
3136 } | 3117 } |
3137 | 3118 |
3138 } // end of namespace Ice | 3119 } // end of namespace Ice |
OLD | NEW |