OLD | NEW |
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// | 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 /// | 9 /// |
10 /// \file | 10 /// \file |
(...skipping 29 matching lines...) Expand all Loading... |
40 do { \ | 40 do { \ |
41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ | 41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ |
42 /* Use llvm_unreachable instead of report_fatal_error, which gives \ | 42 /* Use llvm_unreachable instead of report_fatal_error, which gives \ |
43 better stack traces. */ \ | 43 better stack traces. */ \ |
44 llvm_unreachable("Not yet implemented"); \ | 44 llvm_unreachable("Not yet implemented"); \ |
45 abort(); \ | 45 abort(); \ |
46 } \ | 46 } \ |
47 } while (0) | 47 } while (0) |
48 | 48 |
49 // The following table summarizes the logic for lowering the icmp instruction | 49 // The following table summarizes the logic for lowering the icmp instruction |
50 // for i32 and narrower types. Each icmp condition has a clear mapping to an | 50 // for i32 and narrower types. Each icmp condition has a clear mapping to an |
51 // ARM32 conditional move instruction. | 51 // ARM32 conditional move instruction. |
52 | 52 |
53 const struct TableIcmp32_ { | 53 const struct TableIcmp32_ { |
54 CondARM32::Cond Mapping; | 54 CondARM32::Cond Mapping; |
55 } TableIcmp32[] = { | 55 } TableIcmp32[] = { |
56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ | 56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ |
57 { CondARM32::C_32 } \ | 57 { CondARM32::C_32 } \ |
58 , | 58 , |
59 ICMPARM32_TABLE | 59 ICMPARM32_TABLE |
60 #undef X | 60 #undef X |
61 }; | 61 }; |
62 | 62 |
63 // The following table summarizes the logic for lowering the icmp instruction | 63 // The following table summarizes the logic for lowering the icmp instruction |
64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0. | 64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0. |
65 // The operands may need to be swapped, and there is a slight difference | 65 // The operands may need to be swapped, and there is a slight difference for |
66 // for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). | 66 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). |
67 const struct TableIcmp64_ { | 67 const struct TableIcmp64_ { |
68 bool IsSigned; | 68 bool IsSigned; |
69 bool Swapped; | 69 bool Swapped; |
70 CondARM32::Cond C1, C2; | 70 CondARM32::Cond C1, C2; |
71 } TableIcmp64[] = { | 71 } TableIcmp64[] = { |
72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ | 72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ |
73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ | 73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ |
74 , | 74 , |
75 ICMPARM32_TABLE | 75 ICMPARM32_TABLE |
76 #undef X | 76 #undef X |
77 }; | 77 }; |
78 | 78 |
79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { | 79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { |
80 size_t Index = static_cast<size_t>(Cond); | 80 size_t Index = static_cast<size_t>(Cond); |
81 assert(Index < llvm::array_lengthof(TableIcmp32)); | 81 assert(Index < llvm::array_lengthof(TableIcmp32)); |
82 return TableIcmp32[Index].Mapping; | 82 return TableIcmp32[Index].Mapping; |
83 } | 83 } |
84 | 84 |
85 // In some cases, there are x-macros tables for both high-level and | 85 // In some cases, there are x-macros tables for both high-level and low-level |
86 // low-level instructions/operands that use the same enum key value. | 86 // instructions/operands that use the same enum key value. The tables are kept |
87 // The tables are kept separate to maintain a proper separation | 87 // separate to maintain a proper separation between abstraction layers. There |
88 // between abstraction layers. There is a risk that the tables could | 88 // is a risk that the tables could get out of sync if enum values are reordered |
89 // get out of sync if enum values are reordered or if entries are | 89 // or if entries are added or deleted. The following dummy namespaces use |
90 // added or deleted. The following dummy namespaces use | |
91 // static_asserts to ensure everything is kept in sync. | 90 // static_asserts to ensure everything is kept in sync. |
92 | 91 |
93 // Validate the enum values in ICMPARM32_TABLE. | 92 // Validate the enum values in ICMPARM32_TABLE. |
94 namespace dummy1 { | 93 namespace dummy1 { |
95 // Define a temporary set of enum values based on low-level table | 94 // Define a temporary set of enum values based on low-level table entries. |
96 // entries. | |
97 enum _tmp_enum { | 95 enum _tmp_enum { |
98 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, | 96 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, |
99 ICMPARM32_TABLE | 97 ICMPARM32_TABLE |
100 #undef X | 98 #undef X |
101 _num | 99 _num |
102 }; | 100 }; |
103 // Define a set of constants based on high-level table entries. | 101 // Define a set of constants based on high-level table entries. |
104 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | 102 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
105 ICEINSTICMP_TABLE | 103 ICEINSTICMP_TABLE |
106 #undef X | 104 #undef X |
107 // Define a set of constants based on low-level table entries, and | 105 // Define a set of constants based on low-level table entries, and ensure the |
108 // ensure the table entry keys are consistent. | 106 // table entry keys are consistent. |
109 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \ | 107 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \ |
110 static const int _table2_##val = _tmp_##val; \ | 108 static const int _table2_##val = _tmp_##val; \ |
111 static_assert( \ | 109 static_assert( \ |
112 _table1_##val == _table2_##val, \ | 110 _table1_##val == _table2_##val, \ |
113 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); | 111 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); |
114 ICMPARM32_TABLE | 112 ICMPARM32_TABLE |
115 #undef X | 113 #undef X |
116 // Repeat the static asserts with respect to the high-level table | 114 // Repeat the static asserts with respect to the high-level table entries in |
117 // entries in case the high-level table has extra entries. | 115 // case the high-level table has extra entries. |
118 #define X(tag, str) \ | 116 #define X(tag, str) \ |
119 static_assert( \ | 117 static_assert( \ |
120 _table1_##tag == _table2_##tag, \ | 118 _table1_##tag == _table2_##tag, \ |
121 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); | 119 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); |
122 ICEINSTICMP_TABLE | 120 ICEINSTICMP_TABLE |
123 #undef X | 121 #undef X |
124 } // end of namespace dummy1 | 122 } // end of namespace dummy1 |
125 | 123 |
126 // Stack alignment | 124 // Stack alignment |
127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; | 125 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; |
128 | 126 |
129 // Value is in bytes. Return Value adjusted to the next highest multiple | 127 // Value is in bytes. Return Value adjusted to the next highest multiple of the |
130 // of the stack alignment. | 128 // stack alignment. |
131 uint32_t applyStackAlignment(uint32_t Value) { | 129 uint32_t applyStackAlignment(uint32_t Value) { |
132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); | 130 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); |
133 } | 131 } |
134 | 132 |
135 // Value is in bytes. Return Value adjusted to the next highest multiple | 133 // Value is in bytes. Return Value adjusted to the next highest multiple of the |
136 // of the stack alignment required for the given type. | 134 // stack alignment required for the given type. |
137 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { | 135 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { |
138 // Use natural alignment, except that normally (non-NaCl) ARM only | 136 // Use natural alignment, except that normally (non-NaCl) ARM only aligns |
139 // aligns vectors to 8 bytes. | 137 // vectors to 8 bytes. |
140 // TODO(jvoung): Check this ... | 138 // TODO(jvoung): Check this ... |
141 size_t typeAlignInBytes = typeWidthInBytes(Ty); | 139 size_t typeAlignInBytes = typeWidthInBytes(Ty); |
142 if (isVectorType(Ty)) | 140 if (isVectorType(Ty)) |
143 typeAlignInBytes = 8; | 141 typeAlignInBytes = 8; |
144 return Utils::applyAlignment(Value, typeAlignInBytes); | 142 return Utils::applyAlignment(Value, typeAlignInBytes); |
145 } | 143 } |
146 | 144 |
147 // Conservatively check if at compile time we know that the operand is | 145 // Conservatively check if at compile time we know that the operand is |
148 // definitely a non-zero integer. | 146 // definitely a non-zero integer. |
149 bool isGuaranteedNonzeroInt(const Operand *Op) { | 147 bool isGuaranteedNonzeroInt(const Operand *Op) { |
(...skipping 15 matching lines...) Expand all Loading... |
165 TargetInstructionSet::BaseInstructionSet) { | 163 TargetInstructionSet::BaseInstructionSet) { |
166 InstructionSet = static_cast<ARM32InstructionSet>( | 164 InstructionSet = static_cast<ARM32InstructionSet>( |
167 (Flags.getTargetInstructionSet() - | 165 (Flags.getTargetInstructionSet() - |
168 TargetInstructionSet::ARM32InstructionSet_Begin) + | 166 TargetInstructionSet::ARM32InstructionSet_Begin) + |
169 ARM32InstructionSet::Begin); | 167 ARM32InstructionSet::Begin); |
170 } | 168 } |
171 } | 169 } |
172 | 170 |
173 TargetARM32::TargetARM32(Cfg *Func) | 171 TargetARM32::TargetARM32(Cfg *Func) |
174 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { | 172 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { |
175 // TODO: Don't initialize IntegerRegisters and friends every time. | 173 // TODO: Don't initialize IntegerRegisters and friends every time. Instead, |
176 // Instead, initialize in some sort of static initializer for the | 174 // initialize in some sort of static initializer for the class. |
177 // class. | |
178 // Limit this size (or do all bitsets need to be the same width)??? | 175 // Limit this size (or do all bitsets need to be the same width)??? |
179 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); | 176 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); |
180 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); | 177 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); |
181 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); | 178 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); |
182 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); | 179 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); |
183 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); | 180 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); |
184 ScratchRegs.resize(RegARM32::Reg_NUM); | 181 ScratchRegs.resize(RegARM32::Reg_NUM); |
185 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ | 182 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ |
186 isFP32, isFP64, isVec128, alias_init) \ | 183 isFP32, isFP64, isVec128, alias_init) \ |
187 IntegerRegisters[RegARM32::val] = isInt; \ | 184 IntegerRegisters[RegARM32::val] = isInt; \ |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
236 Func->dump("After Phi lowering"); | 233 Func->dump("After Phi lowering"); |
237 } | 234 } |
238 | 235 |
239 // Address mode optimization. | 236 // Address mode optimization. |
240 Func->getVMetadata()->init(VMK_SingleDefs); | 237 Func->getVMetadata()->init(VMK_SingleDefs); |
241 Func->doAddressOpt(); | 238 Func->doAddressOpt(); |
242 | 239 |
243 // Argument lowering | 240 // Argument lowering |
244 Func->doArgLowering(); | 241 Func->doArgLowering(); |
245 | 242 |
246 // Target lowering. This requires liveness analysis for some parts | 243 // Target lowering. This requires liveness analysis for some parts of the |
247 // of the lowering decisions, such as compare/branch fusing. If | 244 // lowering decisions, such as compare/branch fusing. If non-lightweight |
248 // non-lightweight liveness analysis is used, the instructions need | 245 // liveness analysis is used, the instructions need to be renumbered first. |
249 // to be renumbered first. TODO: This renumbering should only be | 246 // TODO: This renumbering should only be necessary if we're actually |
250 // necessary if we're actually calculating live intervals, which we | 247 // calculating live intervals, which we only do for register allocation. |
251 // only do for register allocation. | |
252 Func->renumberInstructions(); | 248 Func->renumberInstructions(); |
253 if (Func->hasError()) | 249 if (Func->hasError()) |
254 return; | 250 return; |
255 | 251 |
256 // TODO: It should be sufficient to use the fastest liveness | 252 // TODO: It should be sufficient to use the fastest liveness calculation, |
257 // calculation, i.e. livenessLightweight(). However, for some | 253 // i.e. livenessLightweight(). However, for some reason that slows down the |
258 // reason that slows down the rest of the translation. Investigate. | 254 // rest of the translation. Investigate. |
259 Func->liveness(Liveness_Basic); | 255 Func->liveness(Liveness_Basic); |
260 if (Func->hasError()) | 256 if (Func->hasError()) |
261 return; | 257 return; |
262 Func->dump("After ARM32 address mode opt"); | 258 Func->dump("After ARM32 address mode opt"); |
263 | 259 |
264 Func->genCode(); | 260 Func->genCode(); |
265 if (Func->hasError()) | 261 if (Func->hasError()) |
266 return; | 262 return; |
267 Func->dump("After ARM32 codegen"); | 263 Func->dump("After ARM32 codegen"); |
268 | 264 |
269 // Register allocation. This requires instruction renumbering and | 265 // Register allocation. This requires instruction renumbering and full |
270 // full liveness analysis. | 266 // liveness analysis. |
271 Func->renumberInstructions(); | 267 Func->renumberInstructions(); |
272 if (Func->hasError()) | 268 if (Func->hasError()) |
273 return; | 269 return; |
274 Func->liveness(Liveness_Intervals); | 270 Func->liveness(Liveness_Intervals); |
275 if (Func->hasError()) | 271 if (Func->hasError()) |
276 return; | 272 return; |
277 // Validate the live range computations. The expensive validation | 273 // Validate the live range computations. The expensive validation call is |
278 // call is deliberately only made when assertions are enabled. | 274 // deliberately only made when assertions are enabled. |
279 assert(Func->validateLiveness()); | 275 assert(Func->validateLiveness()); |
280 // The post-codegen dump is done here, after liveness analysis and | 276 // The post-codegen dump is done here, after liveness analysis and associated |
281 // associated cleanup, to make the dump cleaner and more useful. | 277 // cleanup, to make the dump cleaner and more useful. |
282 Func->dump("After initial ARM32 codegen"); | 278 Func->dump("After initial ARM32 codegen"); |
283 Func->getVMetadata()->init(VMK_All); | 279 Func->getVMetadata()->init(VMK_All); |
284 regAlloc(RAK_Global); | 280 regAlloc(RAK_Global); |
285 if (Func->hasError()) | 281 if (Func->hasError()) |
286 return; | 282 return; |
287 Func->dump("After linear scan regalloc"); | 283 Func->dump("After linear scan regalloc"); |
288 | 284 |
289 if (Ctx->getFlags().getPhiEdgeSplit()) { | 285 if (Ctx->getFlags().getPhiEdgeSplit()) { |
290 Func->advancedPhiLowering(); | 286 Func->advancedPhiLowering(); |
291 Func->dump("After advanced Phi lowering"); | 287 Func->dump("After advanced Phi lowering"); |
292 } | 288 } |
293 | 289 |
294 // Stack frame mapping. | 290 // Stack frame mapping. |
295 Func->genFrame(); | 291 Func->genFrame(); |
296 if (Func->hasError()) | 292 if (Func->hasError()) |
297 return; | 293 return; |
298 Func->dump("After stack frame mapping"); | 294 Func->dump("After stack frame mapping"); |
299 | 295 |
300 legalizeStackSlots(); | 296 legalizeStackSlots(); |
301 if (Func->hasError()) | 297 if (Func->hasError()) |
302 return; | 298 return; |
303 Func->dump("After legalizeStackSlots"); | 299 Func->dump("After legalizeStackSlots"); |
304 | 300 |
305 Func->contractEmptyNodes(); | 301 Func->contractEmptyNodes(); |
306 Func->reorderNodes(); | 302 Func->reorderNodes(); |
307 | 303 |
308 // Branch optimization. This needs to be done just before code | 304 // Branch optimization. This needs to be done just before code emission. In |
309 // emission. In particular, no transformations that insert or | 305 // particular, no transformations that insert or reorder CfgNodes should be |
310 // reorder CfgNodes should be done after branch optimization. We go | 306 // done after branch optimization. We go ahead and do it before nop insertion |
311 // ahead and do it before nop insertion to reduce the amount of work | 307 // to reduce the amount of work needed for searching for opportunities. |
312 // needed for searching for opportunities. | |
313 Func->doBranchOpt(); | 308 Func->doBranchOpt(); |
314 Func->dump("After branch optimization"); | 309 Func->dump("After branch optimization"); |
315 | 310 |
316 // Nop insertion | 311 // Nop insertion |
317 if (Ctx->getFlags().shouldDoNopInsertion()) { | 312 if (Ctx->getFlags().shouldDoNopInsertion()) { |
318 Func->doNopInsertion(); | 313 Func->doNopInsertion(); |
319 } | 314 } |
320 } | 315 } |
321 | 316 |
322 void TargetARM32::translateOm1() { | 317 void TargetARM32::translateOm1() { |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
388 if (Ty == IceType_void) | 383 if (Ty == IceType_void) |
389 Ty = IceType_i32; | 384 Ty = IceType_i32; |
390 if (PhysicalRegisters[Ty].empty()) | 385 if (PhysicalRegisters[Ty].empty()) |
391 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); | 386 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); |
392 assert(RegNum < PhysicalRegisters[Ty].size()); | 387 assert(RegNum < PhysicalRegisters[Ty].size()); |
393 Variable *Reg = PhysicalRegisters[Ty][RegNum]; | 388 Variable *Reg = PhysicalRegisters[Ty][RegNum]; |
394 if (Reg == nullptr) { | 389 if (Reg == nullptr) { |
395 Reg = Func->makeVariable(Ty); | 390 Reg = Func->makeVariable(Ty); |
396 Reg->setRegNum(RegNum); | 391 Reg->setRegNum(RegNum); |
397 PhysicalRegisters[Ty][RegNum] = Reg; | 392 PhysicalRegisters[Ty][RegNum] = Reg; |
398 // Specially mark SP and LR as an "argument" so that it is considered | 393 // Specially mark SP and LR as an "argument" so that it is considered live |
399 // live upon function entry. | 394 // upon function entry. |
400 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { | 395 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { |
401 Func->addImplicitArg(Reg); | 396 Func->addImplicitArg(Reg); |
402 Reg->setIgnoreLiveness(); | 397 Reg->setIgnoreLiveness(); |
403 } | 398 } |
404 } | 399 } |
405 return Reg; | 400 return Reg; |
406 } | 401 } |
407 | 402 |
408 void TargetARM32::emitJumpTable(const Cfg *Func, | 403 void TargetARM32::emitJumpTable(const Cfg *Func, |
409 const InstJumpTable *JumpTable) const { | 404 const InstJumpTable *JumpTable) const { |
(...skipping 28 matching lines...) Expand all Loading... |
438 if (Offset != 0) { | 433 if (Offset != 0) { |
439 Str << ", " << getConstantPrefix() << Offset; | 434 Str << ", " << getConstantPrefix() << Offset; |
440 } | 435 } |
441 Str << "]"; | 436 Str << "]"; |
442 } | 437 } |
443 | 438 |
444 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { | 439 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { |
445 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 440 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
446 return false; | 441 return false; |
447 int32_t RegLo, RegHi; | 442 int32_t RegLo, RegHi; |
448 // Always start i64 registers at an even register, so this may end | 443 // Always start i64 registers at an even register, so this may end up padding |
449 // up padding away a register. | 444 // away a register. |
450 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); | 445 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); |
451 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; | 446 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; |
452 ++NumGPRRegsUsed; | 447 ++NumGPRRegsUsed; |
453 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; | 448 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; |
454 ++NumGPRRegsUsed; | 449 ++NumGPRRegsUsed; |
455 // If this bumps us past the boundary, don't allocate to a register | 450 // If this bumps us past the boundary, don't allocate to a register and leave |
456 // and leave any previously speculatively consumed registers as consumed. | 451 // any previously speculatively consumed registers as consumed. |
457 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) | 452 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) |
458 return false; | 453 return false; |
459 Regs->first = RegLo; | 454 Regs->first = RegLo; |
460 Regs->second = RegHi; | 455 Regs->second = RegHi; |
461 return true; | 456 return true; |
462 } | 457 } |
463 | 458 |
464 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { | 459 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { |
465 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) | 460 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) |
466 return false; | 461 return false; |
467 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; | 462 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; |
468 ++NumGPRRegsUsed; | 463 ++NumGPRRegsUsed; |
469 return true; | 464 return true; |
470 } | 465 } |
471 | 466 |
472 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { | 467 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { |
473 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) | 468 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) |
474 return false; | 469 return false; |
475 if (isVectorType(Ty)) { | 470 if (isVectorType(Ty)) { |
476 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); | 471 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); |
477 // Q registers are declared in reverse order, so | 472 // Q registers are declared in reverse order, so RegARM32::Reg_q0 > |
478 // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract | 473 // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from |
479 // NumFPRegUnits from Reg_q0. Same thing goes for D registers. | 474 // Reg_q0. Same thing goes for D registers. |
480 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, | 475 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, |
481 "ARM32 Q registers are possibly declared incorrectly."); | 476 "ARM32 Q registers are possibly declared incorrectly."); |
482 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4); | 477 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4); |
483 NumFPRegUnits += 4; | 478 NumFPRegUnits += 4; |
484 // If this bumps us past the boundary, don't allocate to a register | 479 // If this bumps us past the boundary, don't allocate to a register and |
485 // and leave any previously speculatively consumed registers as consumed. | 480 // leave any previously speculatively consumed registers as consumed. |
486 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) | 481 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) |
487 return false; | 482 return false; |
488 } else if (Ty == IceType_f64) { | 483 } else if (Ty == IceType_f64) { |
489 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, | 484 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, |
490 "ARM32 D registers are possibly declared incorrectly."); | 485 "ARM32 D registers are possibly declared incorrectly."); |
491 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); | 486 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); |
492 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); | 487 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); |
493 NumFPRegUnits += 2; | 488 NumFPRegUnits += 2; |
494 // If this bumps us past the boundary, don't allocate to a register | 489 // If this bumps us past the boundary, don't allocate to a register and |
495 // and leave any previously speculatively consumed registers as consumed. | 490 // leave any previously speculatively consumed registers as consumed. |
496 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) | 491 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) |
497 return false; | 492 return false; |
498 } else { | 493 } else { |
499 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, | 494 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, |
500 "ARM32 S registers are possibly declared incorrectly."); | 495 "ARM32 S registers are possibly declared incorrectly."); |
501 assert(Ty == IceType_f32); | 496 assert(Ty == IceType_f32); |
502 *Reg = RegARM32::Reg_s0 + NumFPRegUnits; | 497 *Reg = RegARM32::Reg_s0 + NumFPRegUnits; |
503 ++NumFPRegUnits; | 498 ++NumFPRegUnits; |
504 } | 499 } |
505 return true; | 500 return true; |
506 } | 501 } |
507 | 502 |
508 void TargetARM32::lowerArguments() { | 503 void TargetARM32::lowerArguments() { |
509 VarList &Args = Func->getArgs(); | 504 VarList &Args = Func->getArgs(); |
510 TargetARM32::CallingConv CC; | 505 TargetARM32::CallingConv CC; |
511 | 506 |
512 // For each register argument, replace Arg in the argument list with the | 507 // For each register argument, replace Arg in the argument list with the home |
513 // home register. Then generate an instruction in the prolog to copy the | 508 // register. Then generate an instruction in the prolog to copy the home |
514 // home register to the assigned location of Arg. | 509 // register to the assigned location of Arg. |
515 Context.init(Func->getEntryNode()); | 510 Context.init(Func->getEntryNode()); |
516 Context.setInsertPoint(Context.getCur()); | 511 Context.setInsertPoint(Context.getCur()); |
517 | 512 |
518 for (SizeT I = 0, E = Args.size(); I < E; ++I) { | 513 for (SizeT I = 0, E = Args.size(); I < E; ++I) { |
519 Variable *Arg = Args[I]; | 514 Variable *Arg = Args[I]; |
520 Type Ty = Arg->getType(); | 515 Type Ty = Arg->getType(); |
521 if (Ty == IceType_i64) { | 516 if (Ty == IceType_i64) { |
522 std::pair<int32_t, int32_t> RegPair; | 517 std::pair<int32_t, int32_t> RegPair; |
523 if (!CC.I64InRegs(&RegPair)) | 518 if (!CC.I64InRegs(&RegPair)) |
524 continue; | 519 continue; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
561 | 556 |
562 Args[I] = RegisterArg; | 557 Args[I] = RegisterArg; |
563 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 558 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
564 continue; | 559 continue; |
565 } | 560 } |
566 } | 561 } |
567 } | 562 } |
568 | 563 |
569 // Helper function for addProlog(). | 564 // Helper function for addProlog(). |
570 // | 565 // |
571 // This assumes Arg is an argument passed on the stack. This sets the | 566 // This assumes Arg is an argument passed on the stack. This sets the frame |
572 // frame offset for Arg and updates InArgsSizeBytes according to Arg's | 567 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an |
573 // width. For an I64 arg that has been split into Lo and Hi components, | 568 // I64 arg that has been split into Lo and Hi components, it calls itself |
574 // it calls itself recursively on the components, taking care to handle | 569 // recursively on the components, taking care to handle Lo first because of the |
575 // Lo first because of the little-endian architecture. Lastly, this | 570 // little-endian architecture. Lastly, this function generates an instruction |
576 // function generates an instruction to copy Arg into its assigned | 571 // to copy Arg into its assigned register if applicable. |
577 // register if applicable. | |
578 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, | 572 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
579 size_t BasicFrameOffset, | 573 size_t BasicFrameOffset, |
580 size_t &InArgsSizeBytes) { | 574 size_t &InArgsSizeBytes) { |
581 Variable *Lo = Arg->getLo(); | 575 Variable *Lo = Arg->getLo(); |
582 Variable *Hi = Arg->getHi(); | 576 Variable *Hi = Arg->getHi(); |
583 Type Ty = Arg->getType(); | 577 Type Ty = Arg->getType(); |
584 if (Lo && Hi && Ty == IceType_i64) { | 578 if (Lo && Hi && Ty == IceType_i64) { |
585 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | 579 assert(Lo->getType() != IceType_i64); // don't want infinite recursion |
586 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | 580 assert(Hi->getType() != IceType_i64); // don't want infinite recursion |
587 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 581 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
588 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | 582 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); |
589 return; | 583 return; |
590 } | 584 } |
591 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); | 585 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); |
592 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | 586 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); |
593 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | 587 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); |
594 // If the argument variable has been assigned a register, we need to load | 588 // If the argument variable has been assigned a register, we need to load the |
595 // the value from the stack slot. | 589 // value from the stack slot. |
596 if (Arg->hasReg()) { | 590 if (Arg->hasReg()) { |
597 assert(Ty != IceType_i64); | 591 assert(Ty != IceType_i64); |
598 OperandARM32Mem *Mem = OperandARM32Mem::create( | 592 OperandARM32Mem *Mem = OperandARM32Mem::create( |
599 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( | 593 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( |
600 Ctx->getConstantInt32(Arg->getStackOffset()))); | 594 Ctx->getConstantInt32(Arg->getStackOffset()))); |
601 if (isVectorType(Arg->getType())) { | 595 if (isVectorType(Arg->getType())) { |
602 // Use vld1.$elem or something? | 596 // Use vld1.$elem or something? |
603 UnimplementedError(Func->getContext()->getFlags()); | 597 UnimplementedError(Func->getContext()->getFlags()); |
604 } else if (isFloatingType(Arg->getType())) { | 598 } else if (isFloatingType(Arg->getType())) { |
605 _vldr(Arg, Mem); | 599 _vldr(Arg, Mem); |
606 } else { | 600 } else { |
607 _ldr(Arg, Mem); | 601 _ldr(Arg, Mem); |
608 } | 602 } |
609 // This argument-copying instruction uses an explicit | 603 // This argument-copying instruction uses an explicit OperandARM32Mem |
610 // OperandARM32Mem operand instead of a Variable, so its | 604 // operand instead of a Variable, so its fill-from-stack operation has to |
611 // fill-from-stack operation has to be tracked separately for | 605 // be tracked separately for statistics. |
612 // statistics. | |
613 Ctx->statsUpdateFills(); | 606 Ctx->statsUpdateFills(); |
614 } | 607 } |
615 } | 608 } |
616 | 609 |
617 Type TargetARM32::stackSlotType() { return IceType_i32; } | 610 Type TargetARM32::stackSlotType() { return IceType_i32; } |
618 | 611 |
619 void TargetARM32::addProlog(CfgNode *Node) { | 612 void TargetARM32::addProlog(CfgNode *Node) { |
620 // Stack frame layout: | 613 // Stack frame layout: |
621 // | 614 // |
622 // +------------------------+ | 615 // +------------------------+ |
(...skipping 12 matching lines...) Expand all Loading... |
635 // | 7. allocas | | 628 // | 7. allocas | |
636 // +------------------------+ <--- StackPointer | 629 // +------------------------+ <--- StackPointer |
637 // | 630 // |
638 // The following variables record the size in bytes of the given areas: | 631 // The following variables record the size in bytes of the given areas: |
639 // * PreservedRegsSizeBytes: area 1 | 632 // * PreservedRegsSizeBytes: area 1 |
640 // * SpillAreaPaddingBytes: area 2 | 633 // * SpillAreaPaddingBytes: area 2 |
641 // * GlobalsSize: area 3 | 634 // * GlobalsSize: area 3 |
642 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 | 635 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 |
643 // * LocalsSpillAreaSize: area 5 | 636 // * LocalsSpillAreaSize: area 5 |
644 // * SpillAreaSizeBytes: areas 2 - 6 | 637 // * SpillAreaSizeBytes: areas 2 - 6 |
645 // Determine stack frame offsets for each Variable without a | 638 // Determine stack frame offsets for each Variable without a register |
646 // register assignment. This can be done as one variable per stack | 639 // assignment. This can be done as one variable per stack slot. Or, do |
647 // slot. Or, do coalescing by running the register allocator again | 640 // coalescing by running the register allocator again with an infinite set of |
648 // with an infinite set of registers (as a side effect, this gives | 641 // registers (as a side effect, this gives variables a second chance at |
649 // variables a second chance at physical register assignment). | 642 // physical register assignment). |
650 // | 643 // |
651 // A middle ground approach is to leverage sparsity and allocate one | 644 // A middle ground approach is to leverage sparsity and allocate one block of |
652 // block of space on the frame for globals (variables with | 645 // space on the frame for globals (variables with multi-block lifetime), and |
653 // multi-block lifetime), and one block to share for locals | 646 // one block to share for locals (single-block lifetime). |
654 // (single-block lifetime). | |
655 | 647 |
656 Context.init(Node); | 648 Context.init(Node); |
657 Context.setInsertPoint(Context.getCur()); | 649 Context.setInsertPoint(Context.getCur()); |
658 | 650 |
659 llvm::SmallBitVector CalleeSaves = | 651 llvm::SmallBitVector CalleeSaves = |
660 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 652 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
661 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | 653 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
662 VarList SortedSpilledVariables; | 654 VarList SortedSpilledVariables; |
663 size_t GlobalsSize = 0; | 655 size_t GlobalsSize = 0; |
664 // If there is a separate locals area, this represents that area. | 656 // If there is a separate locals area, this represents that area. Otherwise |
665 // Otherwise it counts any variable not counted by GlobalsSize. | 657 // it counts any variable not counted by GlobalsSize. |
666 SpillAreaSizeBytes = 0; | 658 SpillAreaSizeBytes = 0; |
667 // If there is a separate locals area, this specifies the alignment | 659 // If there is a separate locals area, this specifies the alignment for it. |
668 // for it. | |
669 uint32_t LocalsSlotsAlignmentBytes = 0; | 660 uint32_t LocalsSlotsAlignmentBytes = 0; |
670 // The entire spill locations area gets aligned to largest natural | 661 // The entire spill locations area gets aligned to largest natural alignment |
671 // alignment of the variables that have a spill slot. | 662 // of the variables that have a spill slot. |
672 uint32_t SpillAreaAlignmentBytes = 0; | 663 uint32_t SpillAreaAlignmentBytes = 0; |
673 // For now, we don't have target-specific variables that need special | 664 // For now, we don't have target-specific variables that need special |
674 // treatment (no stack-slot-linked SpillVariable type). | 665 // treatment (no stack-slot-linked SpillVariable type). |
675 std::function<bool(Variable *)> TargetVarHook = | 666 std::function<bool(Variable *)> TargetVarHook = |
676 [](Variable *) { return false; }; | 667 [](Variable *) { return false; }; |
677 | 668 |
678 // Compute the list of spilled variables and bounds for GlobalsSize, etc. | 669 // Compute the list of spilled variables and bounds for GlobalsSize, etc. |
679 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, | 670 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, |
680 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, | 671 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, |
681 &LocalsSlotsAlignmentBytes, TargetVarHook); | 672 &LocalsSlotsAlignmentBytes, TargetVarHook); |
682 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; | 673 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; |
683 SpillAreaSizeBytes += GlobalsSize; | 674 SpillAreaSizeBytes += GlobalsSize; |
684 | 675 |
685 // Add push instructions for preserved registers. | 676 // Add push instructions for preserved registers. On ARM, "push" can push a |
686 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15). | 677 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has |
687 // Unlike x86, ARM also has callee-saved float/vector registers. | 678 // callee-saved float/vector registers. The "vpush" instruction can handle a |
688 // The "vpush" instruction can handle a whole list of float/vector | 679 // whole list of float/vector registers, but it only handles contiguous |
689 // registers, but it only handles contiguous sequences of registers | 680 // sequences of registers by specifying the start and the length. |
690 // by specifying the start and the length. | |
691 VarList GPRsToPreserve; | 681 VarList GPRsToPreserve; |
692 GPRsToPreserve.reserve(CalleeSaves.size()); | 682 GPRsToPreserve.reserve(CalleeSaves.size()); |
693 uint32_t NumCallee = 0; | 683 uint32_t NumCallee = 0; |
694 size_t PreservedRegsSizeBytes = 0; | 684 size_t PreservedRegsSizeBytes = 0; |
695 // Consider FP and LR as callee-save / used as needed. | 685 // Consider FP and LR as callee-save / used as needed. |
696 if (UsesFramePointer) { | 686 if (UsesFramePointer) { |
697 CalleeSaves[RegARM32::Reg_fp] = true; | 687 CalleeSaves[RegARM32::Reg_fp] = true; |
698 assert(RegsUsed[RegARM32::Reg_fp] == false); | 688 assert(RegsUsed[RegARM32::Reg_fp] == false); |
699 RegsUsed[RegARM32::Reg_fp] = true; | 689 RegsUsed[RegARM32::Reg_fp] = true; |
700 } | 690 } |
701 if (!MaybeLeafFunc) { | 691 if (!MaybeLeafFunc) { |
702 CalleeSaves[RegARM32::Reg_lr] = true; | 692 CalleeSaves[RegARM32::Reg_lr] = true; |
703 RegsUsed[RegARM32::Reg_lr] = true; | 693 RegsUsed[RegARM32::Reg_lr] = true; |
704 } | 694 } |
705 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 695 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
706 if (CalleeSaves[i] && RegsUsed[i]) { | 696 if (CalleeSaves[i] && RegsUsed[i]) { |
707 // TODO(jvoung): do separate vpush for each floating point | 697 // TODO(jvoung): do separate vpush for each floating point register |
708 // register segment and += 4, or 8 depending on type. | 698 // segment and += 4, or 8 depending on type. |
709 ++NumCallee; | 699 ++NumCallee; |
710 PreservedRegsSizeBytes += 4; | 700 PreservedRegsSizeBytes += 4; |
711 GPRsToPreserve.push_back(getPhysicalRegister(i)); | 701 GPRsToPreserve.push_back(getPhysicalRegister(i)); |
712 } | 702 } |
713 } | 703 } |
714 Ctx->statsUpdateRegistersSaved(NumCallee); | 704 Ctx->statsUpdateRegistersSaved(NumCallee); |
715 if (!GPRsToPreserve.empty()) | 705 if (!GPRsToPreserve.empty()) |
716 _push(GPRsToPreserve); | 706 _push(GPRsToPreserve); |
717 | 707 |
718 // Generate "mov FP, SP" if needed. | 708 // Generate "mov FP, SP" if needed. |
719 if (UsesFramePointer) { | 709 if (UsesFramePointer) { |
720 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 710 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
721 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 711 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
722 _mov(FP, SP); | 712 _mov(FP, SP); |
723 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). | 713 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). |
724 Context.insert(InstFakeUse::create(Func, FP)); | 714 Context.insert(InstFakeUse::create(Func, FP)); |
725 } | 715 } |
726 | 716 |
727 // Align the variables area. SpillAreaPaddingBytes is the size of | 717 // Align the variables area. SpillAreaPaddingBytes is the size of the region |
728 // the region after the preserved registers and before the spill areas. | 718 // after the preserved registers and before the spill areas. |
729 // LocalsSlotsPaddingBytes is the amount of padding between the globals | 719 // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
730 // and locals area if they are separate. | 720 // locals area if they are separate. |
731 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); | 721 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); |
732 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 722 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
733 uint32_t SpillAreaPaddingBytes = 0; | 723 uint32_t SpillAreaPaddingBytes = 0; |
734 uint32_t LocalsSlotsPaddingBytes = 0; | 724 uint32_t LocalsSlotsPaddingBytes = 0; |
735 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, | 725 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, |
736 GlobalsSize, LocalsSlotsAlignmentBytes, | 726 GlobalsSize, LocalsSlotsAlignmentBytes, |
737 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); | 727 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); |
738 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; | 728 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; |
739 uint32_t GlobalsAndSubsequentPaddingSize = | 729 uint32_t GlobalsAndSubsequentPaddingSize = |
740 GlobalsSize + LocalsSlotsPaddingBytes; | 730 GlobalsSize + LocalsSlotsPaddingBytes; |
(...skipping 10 matching lines...) Expand all Loading... |
751 // Use the scratch register if needed to legalize the immediate. | 741 // Use the scratch register if needed to legalize the immediate. |
752 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 742 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
753 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 743 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
754 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
755 _sub(SP, SP, SubAmount); | 745 _sub(SP, SP, SubAmount); |
756 } | 746 } |
757 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 747 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
758 | 748 |
759 resetStackAdjustment(); | 749 resetStackAdjustment(); |
760 | 750 |
761 // Fill in stack offsets for stack args, and copy args into registers | 751 // Fill in stack offsets for stack args, and copy args into registers for |
762 // for those that were register-allocated. Args are pushed right to | 752 // those that were register-allocated. Args are pushed right to left, so |
763 // left, so Arg[0] is closest to the stack/frame pointer. | 753 // Arg[0] is closest to the stack/frame pointer. |
764 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 754 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
765 size_t BasicFrameOffset = PreservedRegsSizeBytes; | 755 size_t BasicFrameOffset = PreservedRegsSizeBytes; |
766 if (!UsesFramePointer) | 756 if (!UsesFramePointer) |
767 BasicFrameOffset += SpillAreaSizeBytes; | 757 BasicFrameOffset += SpillAreaSizeBytes; |
768 | 758 |
769 const VarList &Args = Func->getArgs(); | 759 const VarList &Args = Func->getArgs(); |
770 size_t InArgsSizeBytes = 0; | 760 size_t InArgsSizeBytes = 0; |
771 TargetARM32::CallingConv CC; | 761 TargetARM32::CallingConv CC; |
772 for (Variable *Arg : Args) { | 762 for (Variable *Arg : Args) { |
773 Type Ty = Arg->getType(); | 763 Type Ty = Arg->getType(); |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
823 void TargetARM32::addEpilog(CfgNode *Node) { | 813 void TargetARM32::addEpilog(CfgNode *Node) { |
824 InstList &Insts = Node->getInsts(); | 814 InstList &Insts = Node->getInsts(); |
825 InstList::reverse_iterator RI, E; | 815 InstList::reverse_iterator RI, E; |
826 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | 816 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { |
827 if (llvm::isa<InstARM32Ret>(*RI)) | 817 if (llvm::isa<InstARM32Ret>(*RI)) |
828 break; | 818 break; |
829 } | 819 } |
830 if (RI == E) | 820 if (RI == E) |
831 return; | 821 return; |
832 | 822 |
833 // Convert the reverse_iterator position into its corresponding | 823 // Convert the reverse_iterator position into its corresponding (forward) |
834 // (forward) iterator position. | 824 // iterator position. |
835 InstList::iterator InsertPoint = RI.base(); | 825 InstList::iterator InsertPoint = RI.base(); |
836 --InsertPoint; | 826 --InsertPoint; |
837 Context.init(Node); | 827 Context.init(Node); |
838 Context.setInsertPoint(InsertPoint); | 828 Context.setInsertPoint(InsertPoint); |
839 | 829 |
840 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 830 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
841 if (UsesFramePointer) { | 831 if (UsesFramePointer) { |
842 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); | 832 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); |
843 // For late-stage liveness analysis (e.g. asm-verbose mode), | 833 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake |
844 // adding a fake use of SP before the assignment of SP=FP keeps | 834 // use of SP before the assignment of SP=FP keeps previous SP adjustments |
845 // previous SP adjustments from being dead-code eliminated. | 835 // from being dead-code eliminated. |
846 Context.insert(InstFakeUse::create(Func, SP)); | 836 Context.insert(InstFakeUse::create(Func, SP)); |
847 _mov(SP, FP); | 837 _mov(SP, FP); |
848 } else { | 838 } else { |
849 // add SP, SpillAreaSizeBytes | 839 // add SP, SpillAreaSizeBytes |
850 if (SpillAreaSizeBytes) { | 840 if (SpillAreaSizeBytes) { |
851 // Use the scratch register if needed to legalize the immediate. | 841 // Use the scratch register if needed to legalize the immediate. |
852 Operand *AddAmount = | 842 Operand *AddAmount = |
853 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), | 843 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), |
854 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 844 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
855 _add(SP, SP, AddAmount); | 845 _add(SP, SP, AddAmount); |
856 } | 846 } |
857 } | 847 } |
858 | 848 |
859 // Add pop instructions for preserved registers. | 849 // Add pop instructions for preserved registers. |
860 llvm::SmallBitVector CalleeSaves = | 850 llvm::SmallBitVector CalleeSaves = |
861 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 851 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
862 VarList GPRsToRestore; | 852 VarList GPRsToRestore; |
863 GPRsToRestore.reserve(CalleeSaves.size()); | 853 GPRsToRestore.reserve(CalleeSaves.size()); |
864 // Consider FP and LR as callee-save / used as needed. | 854 // Consider FP and LR as callee-save / used as needed. |
865 if (UsesFramePointer) { | 855 if (UsesFramePointer) { |
866 CalleeSaves[RegARM32::Reg_fp] = true; | 856 CalleeSaves[RegARM32::Reg_fp] = true; |
867 } | 857 } |
868 if (!MaybeLeafFunc) { | 858 if (!MaybeLeafFunc) { |
869 CalleeSaves[RegARM32::Reg_lr] = true; | 859 CalleeSaves[RegARM32::Reg_lr] = true; |
870 } | 860 } |
871 // Pop registers in ascending order just like push | 861 // Pop registers in ascending order just like push (instead of in reverse |
872 // (instead of in reverse order). | 862 // order). |
873 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 863 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
874 if (CalleeSaves[i] && RegsUsed[i]) { | 864 if (CalleeSaves[i] && RegsUsed[i]) { |
875 GPRsToRestore.push_back(getPhysicalRegister(i)); | 865 GPRsToRestore.push_back(getPhysicalRegister(i)); |
876 } | 866 } |
877 } | 867 } |
878 if (!GPRsToRestore.empty()) | 868 if (!GPRsToRestore.empty()) |
879 _pop(GPRsToRestore); | 869 _pop(GPRsToRestore); |
880 | 870 |
881 if (!Ctx->getFlags().getUseSandboxing()) | 871 if (!Ctx->getFlags().getUseSandboxing()) |
882 return; | 872 return; |
(...skipping 13 matching lines...) Expand all Loading... |
896 RetValue = llvm::cast<Variable>(RI->getSrc(0)); | 886 RetValue = llvm::cast<Variable>(RI->getSrc(0)); |
897 _bundle_lock(); | 887 _bundle_lock(); |
898 _bic(LR, LR, RetMask); | 888 _bic(LR, LR, RetMask); |
899 _ret(LR, RetValue); | 889 _ret(LR, RetValue); |
900 _bundle_unlock(); | 890 _bundle_unlock(); |
901 RI->setDeleted(); | 891 RI->setDeleted(); |
902 } | 892 } |
903 | 893 |
904 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { | 894 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { |
905 constexpr bool SignExt = false; | 895 constexpr bool SignExt = false; |
906 // TODO(jvoung): vldr of FP stack slots has a different limit from the | 896 // TODO(jvoung): vldr of FP stack slots has a different limit from the plain |
907 // plain stackSlotType(). | 897 // stackSlotType(). |
908 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); | 898 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); |
909 } | 899 } |
910 | 900 |
911 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, | 901 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, |
912 Variable *OrigBaseReg) { | 902 Variable *OrigBaseReg) { |
913 int32_t Offset = Var->getStackOffset(); | 903 int32_t Offset = Var->getStackOffset(); |
914 // Legalize will likely need a movw/movt combination, but if the top | 904 // Legalize will likely need a movw/movt combination, but if the top bits are |
915 // bits are all 0 from negating the offset and subtracting, we could | 905 // all 0 from negating the offset and subtracting, we could use that instead. |
916 // use that instead. | |
917 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; | 906 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; |
918 if (ShouldSub) | 907 if (ShouldSub) |
919 Offset = -Offset; | 908 Offset = -Offset; |
920 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), | 909 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), |
921 Legal_Reg | Legal_Flex, getReservedTmpReg()); | 910 Legal_Reg | Legal_Flex, getReservedTmpReg()); |
922 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); | 911 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); |
923 if (ShouldSub) | 912 if (ShouldSub) |
924 _sub(ScratchReg, OrigBaseReg, OffsetVal); | 913 _sub(ScratchReg, OrigBaseReg, OffsetVal); |
925 else | 914 else |
926 _add(ScratchReg, OrigBaseReg, OffsetVal); | 915 _add(ScratchReg, OrigBaseReg, OffsetVal); |
(...skipping 15 matching lines...) Expand all Loading... |
942 // | 931 // |
943 // This is safe because we have reserved TMP, and add for ARM does not | 932 // This is safe because we have reserved TMP, and add for ARM does not |
944 // clobber the flags register. | 933 // clobber the flags register. |
945 Func->dump("Before legalizeStackSlots"); | 934 Func->dump("Before legalizeStackSlots"); |
946 assert(hasComputedFrame()); | 935 assert(hasComputedFrame()); |
947 // Early exit, if SpillAreaSizeBytes is really small. | 936 // Early exit, if SpillAreaSizeBytes is really small. |
948 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) | 937 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) |
949 return; | 938 return; |
950 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); | 939 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); |
951 int32_t StackAdjust = 0; | 940 int32_t StackAdjust = 0; |
952 // Do a fairly naive greedy clustering for now. Pick the first stack slot | 941 // Do a fairly naive greedy clustering for now. Pick the first stack slot |
953 // that's out of bounds and make a new base reg using the architecture's temp | 942 // that's out of bounds and make a new base reg using the architecture's temp |
954 // register. If that works for the next slot, then great. Otherwise, create | 943 // register. If that works for the next slot, then great. Otherwise, create a |
955 // a new base register, clobbering the previous base register. Never share a | 944 // new base register, clobbering the previous base register. Never share a |
956 // base reg across different basic blocks. This isn't ideal if local and | 945 // base reg across different basic blocks. This isn't ideal if local and |
957 // multi-block variables are far apart and their references are interspersed. | 946 // multi-block variables are far apart and their references are interspersed. |
958 // It may help to be more coordinated about assign stack slot numbers | 947 // It may help to be more coordinated about assign stack slot numbers and may |
959 // and may help to assign smaller offsets to higher-weight variables | 948 // help to assign smaller offsets to higher-weight variables so that they |
960 // so that they don't depend on this legalization. | 949 // don't depend on this legalization. |
961 for (CfgNode *Node : Func->getNodes()) { | 950 for (CfgNode *Node : Func->getNodes()) { |
962 Context.init(Node); | 951 Context.init(Node); |
963 StackVariable *NewBaseReg = nullptr; | 952 StackVariable *NewBaseReg = nullptr; |
964 int32_t NewBaseOffset = 0; | 953 int32_t NewBaseOffset = 0; |
965 while (!Context.atEnd()) { | 954 while (!Context.atEnd()) { |
966 PostIncrLoweringContext PostIncrement(Context); | 955 PostIncrLoweringContext PostIncrement(Context); |
967 Inst *CurInstr = Context.getCur(); | 956 Inst *CurInstr = Context.getCur(); |
968 Variable *Dest = CurInstr->getDest(); | 957 Variable *Dest = CurInstr->getDest(); |
969 // Check if the previous NewBaseReg is clobbered, and reset if needed. | 958 // Check if the previous NewBaseReg is clobbered, and reset if needed. |
970 if ((Dest && NewBaseReg && Dest->hasReg() && | 959 if ((Dest && NewBaseReg && Dest->hasReg() && |
971 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || | 960 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || |
972 llvm::isa<InstFakeKill>(CurInstr)) { | 961 llvm::isa<InstFakeKill>(CurInstr)) { |
973 NewBaseReg = nullptr; | 962 NewBaseReg = nullptr; |
974 NewBaseOffset = 0; | 963 NewBaseOffset = 0; |
975 } | 964 } |
976 // The stack adjustment only matters if we are using SP instead of FP. | 965 // The stack adjustment only matters if we are using SP instead of FP. |
977 if (!hasFramePointer()) { | 966 if (!hasFramePointer()) { |
978 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { | 967 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { |
979 StackAdjust += AdjInst->getAmount(); | 968 StackAdjust += AdjInst->getAmount(); |
980 NewBaseOffset += AdjInst->getAmount(); | 969 NewBaseOffset += AdjInst->getAmount(); |
981 continue; | 970 continue; |
982 } | 971 } |
983 if (llvm::isa<InstARM32Call>(CurInstr)) { | 972 if (llvm::isa<InstARM32Call>(CurInstr)) { |
984 NewBaseOffset -= StackAdjust; | 973 NewBaseOffset -= StackAdjust; |
985 StackAdjust = 0; | 974 StackAdjust = 0; |
986 continue; | 975 continue; |
987 } | 976 } |
988 } | 977 } |
989 // For now, only Mov instructions can have stack variables. We need to | 978 // For now, only Mov instructions can have stack variables. We need to |
990 // know the type of instruction because we currently create a fresh one | 979 // know the type of instruction because we currently create a fresh one |
991 // to replace Dest/Source, rather than mutate in place. | 980 // to replace Dest/Source, rather than mutate in place. |
992 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); | 981 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); |
993 if (!MovInst) { | 982 if (!MovInst) { |
994 continue; | 983 continue; |
995 } | 984 } |
996 if (!Dest->hasReg()) { | 985 if (!Dest->hasReg()) { |
997 int32_t Offset = Dest->getStackOffset(); | 986 int32_t Offset = Dest->getStackOffset(); |
998 Offset += StackAdjust; | 987 Offset += StackAdjust; |
999 if (!isLegalVariableStackOffset(Offset)) { | 988 if (!isLegalVariableStackOffset(Offset)) { |
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1110 return Operand; | 1099 return Operand; |
1111 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { | 1100 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { |
1112 split64(Var); | 1101 split64(Var); |
1113 return Var->getHi(); | 1102 return Var->getHi(); |
1114 } | 1103 } |
1115 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { | 1104 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { |
1116 return Ctx->getConstantInt32( | 1105 return Ctx->getConstantInt32( |
1117 static_cast<uint32_t>(Const->getValue() >> 32)); | 1106 static_cast<uint32_t>(Const->getValue() >> 32)); |
1118 } | 1107 } |
1119 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { | 1108 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { |
1120 // Conservatively disallow memory operands with side-effects | 1109 // Conservatively disallow memory operands with side-effects in case of |
1121 // in case of duplication. | 1110 // duplication. |
1122 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || | 1111 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || |
1123 Mem->getAddrMode() == OperandARM32Mem::NegOffset); | 1112 Mem->getAddrMode() == OperandARM32Mem::NegOffset); |
1124 const Type SplitType = IceType_i32; | 1113 const Type SplitType = IceType_i32; |
1125 if (Mem->isRegReg()) { | 1114 if (Mem->isRegReg()) { |
1126 // We have to make a temp variable T, and add 4 to either Base or Index. | 1115 // We have to make a temp variable T, and add 4 to either Base or Index. |
1127 // The Index may be shifted, so adding 4 can mean something else. | 1116 // The Index may be shifted, so adding 4 can mean something else. Thus, |
1128 // Thus, prefer T := Base + 4, and use T as the new Base. | 1117 // prefer T := Base + 4, and use T as the new Base. |
1129 Variable *Base = Mem->getBase(); | 1118 Variable *Base = Mem->getBase(); |
1130 Constant *Four = Ctx->getConstantInt32(4); | 1119 Constant *Four = Ctx->getConstantInt32(4); |
1131 Variable *NewBase = Func->makeVariable(Base->getType()); | 1120 Variable *NewBase = Func->makeVariable(Base->getType()); |
1132 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, | 1121 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, |
1133 Base, Four)); | 1122 Base, Four)); |
1134 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), | 1123 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), |
1135 Mem->getShiftOp(), Mem->getShiftAmt(), | 1124 Mem->getShiftOp(), Mem->getShiftAmt(), |
1136 Mem->getAddrMode()); | 1125 Mem->getAddrMode()); |
1137 } else { | 1126 } else { |
1138 Variable *Base = Mem->getBase(); | 1127 Variable *Base = Mem->getBase(); |
1139 ConstantInteger32 *Offset = Mem->getOffset(); | 1128 ConstantInteger32 *Offset = Mem->getOffset(); |
1140 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); | 1129 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); |
1141 int32_t NextOffsetVal = Offset->getValue() + 4; | 1130 int32_t NextOffsetVal = Offset->getValue() + 4; |
1142 const bool SignExt = false; | 1131 const bool SignExt = false; |
1143 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { | 1132 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { |
1144 // We have to make a temp variable and add 4 to either Base or Offset. | 1133 // We have to make a temp variable and add 4 to either Base or Offset. |
1145 // If we add 4 to Offset, this will convert a non-RegReg addressing | 1134 // If we add 4 to Offset, this will convert a non-RegReg addressing |
1146 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows | 1135 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows |
1147 // RegReg addressing modes, prefer adding to base and replacing instead. | 1136 // RegReg addressing modes, prefer adding to base and replacing |
1148 // Thus we leave the old offset alone. | 1137 // instead. Thus we leave the old offset alone. |
1149 Constant *Four = Ctx->getConstantInt32(4); | 1138 Constant *Four = Ctx->getConstantInt32(4); |
1150 Variable *NewBase = Func->makeVariable(Base->getType()); | 1139 Variable *NewBase = Func->makeVariable(Base->getType()); |
1151 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, | 1140 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, |
1152 NewBase, Base, Four)); | 1141 NewBase, Base, Four)); |
1153 Base = NewBase; | 1142 Base = NewBase; |
1154 } else { | 1143 } else { |
1155 Offset = | 1144 Offset = |
1156 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); | 1145 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); |
1157 } | 1146 } |
1158 return OperandARM32Mem::create(Func, SplitType, Base, Offset, | 1147 return OperandARM32Mem::create(Func, SplitType, Base, Offset, |
(...skipping 29 matching lines...) Expand all Loading... |
1188 | 1177 |
1189 REGARM32_TABLE | 1178 REGARM32_TABLE |
1190 | 1179 |
1191 #undef X | 1180 #undef X |
1192 | 1181 |
1193 return Registers; | 1182 return Registers; |
1194 } | 1183 } |
1195 | 1184 |
1196 void TargetARM32::lowerAlloca(const InstAlloca *Inst) { | 1185 void TargetARM32::lowerAlloca(const InstAlloca *Inst) { |
1197 UsesFramePointer = true; | 1186 UsesFramePointer = true; |
1198 // Conservatively require the stack to be aligned. Some stack | 1187 // Conservatively require the stack to be aligned. Some stack adjustment |
1199 // adjustment operations implemented below assume that the stack is | 1188 // operations implemented below assume that the stack is aligned before the |
1200 // aligned before the alloca. All the alloca code ensures that the | 1189 // alloca. All the alloca code ensures that the stack alignment is preserved |
1201 // stack alignment is preserved after the alloca. The stack alignment | 1190 // after the alloca. The stack alignment restriction can be relaxed in some |
1202 // restriction can be relaxed in some cases. | 1191 // cases. |
1203 NeedsStackAlignment = true; | 1192 NeedsStackAlignment = true; |
1204 | 1193 |
1205 // TODO(stichnot): minimize the number of adjustments of SP, etc. | 1194 // TODO(stichnot): minimize the number of adjustments of SP, etc. |
1206 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1195 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
1207 Variable *Dest = Inst->getDest(); | 1196 Variable *Dest = Inst->getDest(); |
1208 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 1197 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
1209 // For default align=0, set it to the real value 1, to avoid any | 1198 // For default align=0, set it to the real value 1, to avoid any |
1210 // bit-manipulation problems below. | 1199 // bit-manipulation problems below. |
1211 AlignmentParam = std::max(AlignmentParam, 1u); | 1200 AlignmentParam = std::max(AlignmentParam, 1u); |
1212 | 1201 |
1213 // LLVM enforces power of 2 alignment. | 1202 // LLVM enforces power of 2 alignment. |
1214 assert(llvm::isPowerOf2_32(AlignmentParam)); | 1203 assert(llvm::isPowerOf2_32(AlignmentParam)); |
1215 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); | 1204 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); |
1216 | 1205 |
1217 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); | 1206 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); |
1218 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { | 1207 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { |
1219 alignRegisterPow2(SP, Alignment); | 1208 alignRegisterPow2(SP, Alignment); |
1220 } | 1209 } |
1221 Operand *TotalSize = Inst->getSizeInBytes(); | 1210 Operand *TotalSize = Inst->getSizeInBytes(); |
1222 if (const auto *ConstantTotalSize = | 1211 if (const auto *ConstantTotalSize = |
1223 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { | 1212 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { |
1224 uint32_t Value = ConstantTotalSize->getValue(); | 1213 uint32_t Value = ConstantTotalSize->getValue(); |
1225 Value = Utils::applyAlignment(Value, Alignment); | 1214 Value = Utils::applyAlignment(Value, Alignment); |
1226 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); | 1215 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); |
1227 _sub(SP, SP, SubAmount); | 1216 _sub(SP, SP, SubAmount); |
1228 } else { | 1217 } else { |
1229 // Non-constant sizes need to be adjusted to the next highest | 1218 // Non-constant sizes need to be adjusted to the next highest multiple of |
1230 // multiple of the required alignment at runtime. | 1219 // the required alignment at runtime. |
1231 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); | 1220 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); |
1232 Variable *T = makeReg(IceType_i32); | 1221 Variable *T = makeReg(IceType_i32); |
1233 _mov(T, TotalSize); | 1222 _mov(T, TotalSize); |
1234 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); | 1223 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); |
1235 _add(T, T, AddAmount); | 1224 _add(T, T, AddAmount); |
1236 alignRegisterPow2(T, Alignment); | 1225 alignRegisterPow2(T, Alignment); |
1237 _sub(SP, SP, T); | 1226 _sub(SP, SP, T); |
1238 } | 1227 } |
1239 _mov(Dest, SP); | 1228 _mov(Dest, SP); |
1240 } | 1229 } |
(...skipping 17 matching lines...) Expand all Loading... |
1258 _tst(SrcLoReg, Mask); | 1247 _tst(SrcLoReg, Mask); |
1259 break; | 1248 break; |
1260 } | 1249 } |
1261 case IceType_i32: { | 1250 case IceType_i32: { |
1262 _tst(SrcLoReg, SrcLoReg); | 1251 _tst(SrcLoReg, SrcLoReg); |
1263 break; | 1252 break; |
1264 } | 1253 } |
1265 case IceType_i64: { | 1254 case IceType_i64: { |
1266 Variable *ScratchReg = makeReg(IceType_i32); | 1255 Variable *ScratchReg = makeReg(IceType_i32); |
1267 _orrs(ScratchReg, SrcLoReg, SrcHi); | 1256 _orrs(ScratchReg, SrcLoReg, SrcHi); |
1268 // ScratchReg isn't going to be used, but we need the | 1257 // ScratchReg isn't going to be used, but we need the side-effect of |
1269 // side-effect of setting flags from this operation. | 1258 // setting flags from this operation. |
1270 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 1259 Context.insert(InstFakeUse::create(Func, ScratchReg)); |
1271 } | 1260 } |
1272 } | 1261 } |
1273 InstARM32Label *Label = InstARM32Label::create(Func, this); | 1262 InstARM32Label *Label = InstARM32Label::create(Func, this); |
1274 _br(Label, CondARM32::NE); | 1263 _br(Label, CondARM32::NE); |
1275 _trap(); | 1264 _trap(); |
1276 Context.insert(Label); | 1265 Context.insert(Label); |
1277 } | 1266 } |
1278 | 1267 |
1279 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, | 1268 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, |
(...skipping 23 matching lines...) Expand all Loading... |
1303 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); | 1292 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); |
1304 Call->addArg(T0R); | 1293 Call->addArg(T0R); |
1305 Call->addArg(T1R); | 1294 Call->addArg(T1R); |
1306 lowerCall(Call); | 1295 lowerCall(Call); |
1307 } | 1296 } |
1308 return; | 1297 return; |
1309 } | 1298 } |
1310 | 1299 |
1311 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { | 1300 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { |
1312 Variable *Dest = Inst->getDest(); | 1301 Variable *Dest = Inst->getDest(); |
1313 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier | 1302 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to |
1314 // to legalize Src0 to flex or Src1 to flex and there is a reversible | 1303 // legalize Src0 to flex or Src1 to flex and there is a reversible |
1315 // instruction. E.g., reverse subtract with immediate, register vs | 1304 // instruction. E.g., reverse subtract with immediate, register vs register, |
1316 // register, immediate. | 1305 // immediate. |
1317 // Or it may be the case that the operands aren't swapped, but the | 1306 // Or it may be the case that the operands aren't swapped, but the bits can |
1318 // bits can be flipped and a different operation applied. | 1307 // be flipped and a different operation applied. E.g., use BIC (bit clear) |
1319 // E.g., use BIC (bit clear) instead of AND for some masks. | 1308 // instead of AND for some masks. |
1320 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); | 1309 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); |
1321 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); | 1310 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); |
1322 if (Dest->getType() == IceType_i64) { | 1311 if (Dest->getType() == IceType_i64) { |
1323 // These helper-call-involved instructions are lowered in this | 1312 // These helper-call-involved instructions are lowered in this separate |
1324 // separate switch. This is because we would otherwise assume that | 1313 // switch. This is because we would otherwise assume that we need to |
1325 // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused | 1314 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with |
1326 // with helper calls, and such unused/redundant instructions will fail | 1315 // helper calls, and such unused/redundant instructions will fail liveness |
1327 // liveness analysis under -Om1 setting. | 1316 // analysis under -Om1 setting. |
1328 switch (Inst->getOp()) { | 1317 switch (Inst->getOp()) { |
1329 default: | 1318 default: |
1330 break; | 1319 break; |
1331 case InstArithmetic::Udiv: | 1320 case InstArithmetic::Udiv: |
1332 case InstArithmetic::Sdiv: | 1321 case InstArithmetic::Sdiv: |
1333 case InstArithmetic::Urem: | 1322 case InstArithmetic::Urem: |
1334 case InstArithmetic::Srem: { | 1323 case InstArithmetic::Srem: { |
1335 // Check for divide by 0 (ARM normally doesn't trap, but we want it | 1324 // Check for divide by 0 (ARM normally doesn't trap, but we want it to |
1336 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized | 1325 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a |
1337 // to a register, which will hide a constant source operand. | 1326 // register, which will hide a constant source operand. Instead, check |
1338 // Instead, check the not-yet-legalized Src1 to optimize-out a divide | 1327 // the not-yet-legalized Src1 to optimize-out a divide by 0 check. |
1339 // by 0 check. | |
1340 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { | 1328 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { |
1341 if (C64->getValue() == 0) { | 1329 if (C64->getValue() == 0) { |
1342 _trap(); | 1330 _trap(); |
1343 return; | 1331 return; |
1344 } | 1332 } |
1345 } else { | 1333 } else { |
1346 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | 1334 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
1347 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | 1335 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
1348 div0Check(IceType_i64, Src1Lo, Src1Hi); | 1336 div0Check(IceType_i64, Src1Lo, Src1Hi); |
1349 } | 1337 } |
1350 // Technically, ARM has their own aeabi routines, but we can use the | 1338 // Technically, ARM has their own aeabi routines, but we can use the |
1351 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, | 1339 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses |
1352 // but uses the more standard __moddi3 for rem. | 1340 // the more standard __moddi3 for rem. |
1353 const char *HelperName = ""; | 1341 const char *HelperName = ""; |
1354 switch (Inst->getOp()) { | 1342 switch (Inst->getOp()) { |
1355 default: | 1343 default: |
1356 llvm_unreachable("Should have only matched div ops."); | 1344 llvm_unreachable("Should have only matched div ops."); |
1357 break; | 1345 break; |
1358 case InstArithmetic::Udiv: | 1346 case InstArithmetic::Udiv: |
1359 HelperName = H_udiv_i64; | 1347 HelperName = H_udiv_i64; |
1360 break; | 1348 break; |
1361 case InstArithmetic::Sdiv: | 1349 case InstArithmetic::Sdiv: |
1362 HelperName = H_sdiv_i64; | 1350 HelperName = H_sdiv_i64; |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1465 // a=b<<c ==> | 1453 // a=b<<c ==> |
1466 // GCC 4.8 does: | 1454 // GCC 4.8 does: |
1467 // sub t_c1, c.lo, #32 | 1455 // sub t_c1, c.lo, #32 |
1468 // lsl t_hi, b.hi, c.lo | 1456 // lsl t_hi, b.hi, c.lo |
1469 // orr t_hi, t_hi, b.lo, lsl t_c1 | 1457 // orr t_hi, t_hi, b.lo, lsl t_c1 |
1470 // rsb t_c2, c.lo, #32 | 1458 // rsb t_c2, c.lo, #32 |
1471 // orr t_hi, t_hi, b.lo, lsr t_c2 | 1459 // orr t_hi, t_hi, b.lo, lsr t_c2 |
1472 // lsl t_lo, b.lo, c.lo | 1460 // lsl t_lo, b.lo, c.lo |
1473 // a.lo = t_lo | 1461 // a.lo = t_lo |
1474 // a.hi = t_hi | 1462 // a.hi = t_hi |
1475 // Can be strength-reduced for constant-shifts, but we don't do | 1463 // Can be strength-reduced for constant-shifts, but we don't do that for |
1476 // that for now. | 1464 // now. |
1477 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. | 1465 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On |
1478 // On ARM, shifts only take the lower 8 bits of the shift register, | 1466 // ARM, shifts only take the lower 8 bits of the shift register, and |
1479 // and saturate to the range 0-32, so the negative value will | 1467 // saturate to the range 0-32, so the negative value will saturate to 32. |
1480 // saturate to 32. | |
1481 Variable *T_Hi = makeReg(IceType_i32); | 1468 Variable *T_Hi = makeReg(IceType_i32); |
1482 Variable *Src1RLo = legalizeToReg(Src1Lo); | 1469 Variable *Src1RLo = legalizeToReg(Src1Lo); |
1483 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1470 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
1484 Variable *T_C1 = makeReg(IceType_i32); | 1471 Variable *T_C1 = makeReg(IceType_i32); |
1485 Variable *T_C2 = makeReg(IceType_i32); | 1472 Variable *T_C2 = makeReg(IceType_i32); |
1486 _sub(T_C1, Src1RLo, ThirtyTwo); | 1473 _sub(T_C1, Src1RLo, ThirtyTwo); |
1487 _lsl(T_Hi, Src0RHi, Src1RLo); | 1474 _lsl(T_Hi, Src0RHi, Src1RLo); |
1488 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1475 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
1489 OperandARM32::LSL, T_C1)); | 1476 OperandARM32::LSL, T_C1)); |
1490 _rsb(T_C2, Src1RLo, ThirtyTwo); | 1477 _rsb(T_C2, Src1RLo, ThirtyTwo); |
1491 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1478 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
1492 OperandARM32::LSR, T_C2)); | 1479 OperandARM32::LSR, T_C2)); |
1493 _mov(DestHi, T_Hi); | 1480 _mov(DestHi, T_Hi); |
1494 Variable *T_Lo = makeReg(IceType_i32); | 1481 Variable *T_Lo = makeReg(IceType_i32); |
1495 // _mov seems to sometimes have better register preferencing than lsl. | 1482 // _mov seems to sometimes have better register preferencing than lsl. |
1496 // Otherwise mov w/ lsl shifted register is a pseudo-instruction | 1483 // Otherwise mov w/ lsl shifted register is a pseudo-instruction that |
1497 // that maps to lsl. | 1484 // maps to lsl. |
1498 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, | 1485 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, |
1499 OperandARM32::LSL, Src1RLo)); | 1486 OperandARM32::LSL, Src1RLo)); |
1500 _mov(DestLo, T_Lo); | 1487 _mov(DestLo, T_Lo); |
1501 return; | 1488 return; |
1502 } | 1489 } |
1503 case InstArithmetic::Lshr: | 1490 case InstArithmetic::Lshr: |
1504 // a=b>>c (unsigned) ==> | 1491 // a=b>>c (unsigned) ==> |
1505 // GCC 4.8 does: | 1492 // GCC 4.8 does: |
1506 // rsb t_c1, c.lo, #32 | 1493 // rsb t_c1, c.lo, #32 |
1507 // lsr t_lo, b.lo, c.lo | 1494 // lsr t_lo, b.lo, c.lo |
1508 // orr t_lo, t_lo, b.hi, lsl t_c1 | 1495 // orr t_lo, t_lo, b.hi, lsl t_c1 |
1509 // sub t_c2, c.lo, #32 | 1496 // sub t_c2, c.lo, #32 |
1510 // orr t_lo, t_lo, b.hi, lsr t_c2 | 1497 // orr t_lo, t_lo, b.hi, lsr t_c2 |
1511 // lsr t_hi, b.hi, c.lo | 1498 // lsr t_hi, b.hi, c.lo |
1512 // a.lo = t_lo | 1499 // a.lo = t_lo |
1513 // a.hi = t_hi | 1500 // a.hi = t_hi |
1514 case InstArithmetic::Ashr: { | 1501 case InstArithmetic::Ashr: { |
1515 // a=b>>c (signed) ==> ... | 1502 // a=b>>c (signed) ==> ... |
1516 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, | 1503 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the |
1517 // and the next orr should be conditioned on PLUS. The last two | 1504 // next orr should be conditioned on PLUS. The last two right shifts |
1518 // right shifts should also be arithmetic. | 1505 // should also be arithmetic. |
1519 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; | 1506 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; |
1520 Variable *T_Lo = makeReg(IceType_i32); | 1507 Variable *T_Lo = makeReg(IceType_i32); |
1521 Variable *Src1RLo = legalizeToReg(Src1Lo); | 1508 Variable *Src1RLo = legalizeToReg(Src1Lo); |
1522 Constant *ThirtyTwo = Ctx->getConstantInt32(32); | 1509 Constant *ThirtyTwo = Ctx->getConstantInt32(32); |
1523 Variable *T_C1 = makeReg(IceType_i32); | 1510 Variable *T_C1 = makeReg(IceType_i32); |
1524 Variable *T_C2 = makeReg(IceType_i32); | 1511 Variable *T_C2 = makeReg(IceType_i32); |
1525 _rsb(T_C1, Src1RLo, ThirtyTwo); | 1512 _rsb(T_C1, Src1RLo, ThirtyTwo); |
1526 _lsr(T_Lo, Src0RLo, Src1RLo); | 1513 _lsr(T_Lo, Src0RLo, Src1RLo); |
1527 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, | 1514 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, |
1528 OperandARM32::LSL, T_C1)); | 1515 OperandARM32::LSL, T_C1)); |
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1716 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 1703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
1717 Variable *T_Lo = nullptr, *T_Hi = nullptr; | 1704 Variable *T_Lo = nullptr, *T_Hi = nullptr; |
1718 _mov(T_Lo, Src0Lo); | 1705 _mov(T_Lo, Src0Lo); |
1719 _mov(DestLo, T_Lo); | 1706 _mov(DestLo, T_Lo); |
1720 _mov(T_Hi, Src0Hi); | 1707 _mov(T_Hi, Src0Hi); |
1721 _mov(DestHi, T_Hi); | 1708 _mov(DestHi, T_Hi); |
1722 } else { | 1709 } else { |
1723 Operand *NewSrc; | 1710 Operand *NewSrc; |
1724 if (Dest->hasReg()) { | 1711 if (Dest->hasReg()) { |
1725 // If Dest already has a physical register, then legalize the Src operand | 1712 // If Dest already has a physical register, then legalize the Src operand |
1726 // into a Variable with the same register assignment. This especially | 1713 // into a Variable with the same register assignment. This especially |
1727 // helps allow the use of Flex operands. | 1714 // helps allow the use of Flex operands. |
1728 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); | 1715 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); |
1729 } else { | 1716 } else { |
1730 // Dest could be a stack operand. Since we could potentially need | 1717 // Dest could be a stack operand. Since we could potentially need to do a |
1731 // to do a Store (and store can only have Register operands), | 1718 // Store (and store can only have Register operands), legalize this to a |
1732 // legalize this to a register. | 1719 // register. |
1733 NewSrc = legalize(Src0, Legal_Reg); | 1720 NewSrc = legalize(Src0, Legal_Reg); |
1734 } | 1721 } |
1735 if (isVectorType(Dest->getType())) { | 1722 if (isVectorType(Dest->getType())) { |
1736 UnimplementedError(Func->getContext()->getFlags()); | 1723 UnimplementedError(Func->getContext()->getFlags()); |
1737 } else if (isFloatingType(Dest->getType())) { | 1724 } else if (isFloatingType(Dest->getType())) { |
1738 Variable *SrcR = legalizeToReg(NewSrc); | 1725 Variable *SrcR = legalizeToReg(NewSrc); |
1739 _vmov(Dest, SrcR); | 1726 _vmov(Dest, SrcR); |
1740 } else { | 1727 } else { |
1741 _mov(Dest, NewSrc); | 1728 _mov(Dest, NewSrc); |
1742 } | 1729 } |
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1803 } | 1790 } |
1804 | 1791 |
1805 if (!InRegs) { | 1792 if (!InRegs) { |
1806 ParameterAreaSizeBytes = | 1793 ParameterAreaSizeBytes = |
1807 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); | 1794 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); |
1808 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); | 1795 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); |
1809 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 1796 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
1810 } | 1797 } |
1811 } | 1798 } |
1812 | 1799 |
1813 // Adjust the parameter area so that the stack is aligned. It is | 1800 // Adjust the parameter area so that the stack is aligned. It is assumed that |
1814 // assumed that the stack is already aligned at the start of the | 1801 // the stack is already aligned at the start of the calling sequence. |
1815 // calling sequence. | |
1816 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 1802 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
1817 | 1803 |
1818 // Subtract the appropriate amount for the argument area. This also | 1804 // Subtract the appropriate amount for the argument area. This also takes |
1819 // takes care of setting the stack adjustment during emission. | 1805 // care of setting the stack adjustment during emission. |
1820 // | 1806 // |
1821 // TODO: If for some reason the call instruction gets dead-code | 1807 // TODO: If for some reason the call instruction gets dead-code eliminated |
1822 // eliminated after lowering, we would need to ensure that the | 1808 // after lowering, we would need to ensure that the pre-call and the |
1823 // pre-call and the post-call esp adjustment get eliminated as well. | 1809 // post-call esp adjustment get eliminated as well. |
1824 if (ParameterAreaSizeBytes) { | 1810 if (ParameterAreaSizeBytes) { |
1825 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), | 1811 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
1826 Legal_Reg | Legal_Flex); | 1812 Legal_Reg | Legal_Flex); |
1827 _adjust_stack(ParameterAreaSizeBytes, SubAmount); | 1813 _adjust_stack(ParameterAreaSizeBytes, SubAmount); |
1828 } | 1814 } |
1829 | 1815 |
1830 // Copy arguments that are passed on the stack to the appropriate | 1816 // Copy arguments that are passed on the stack to the appropriate stack |
1831 // stack locations. | 1817 // locations. |
1832 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1818 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
1833 for (auto &StackArg : StackArgs) { | 1819 for (auto &StackArg : StackArgs) { |
1834 ConstantInteger32 *Loc = | 1820 ConstantInteger32 *Loc = |
1835 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); | 1821 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); |
1836 Type Ty = StackArg.first->getType(); | 1822 Type Ty = StackArg.first->getType(); |
1837 OperandARM32Mem *Addr; | 1823 OperandARM32Mem *Addr; |
1838 constexpr bool SignExt = false; | 1824 constexpr bool SignExt = false; |
1839 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { | 1825 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { |
1840 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); | 1826 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); |
1841 } else { | 1827 } else { |
1842 Variable *NewBase = Func->makeVariable(SP->getType()); | 1828 Variable *NewBase = Func->makeVariable(SP->getType()); |
1843 lowerArithmetic( | 1829 lowerArithmetic( |
1844 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); | 1830 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); |
1845 Addr = formMemoryOperand(NewBase, Ty); | 1831 Addr = formMemoryOperand(NewBase, Ty); |
1846 } | 1832 } |
1847 lowerStore(InstStore::create(Func, StackArg.first, Addr)); | 1833 lowerStore(InstStore::create(Func, StackArg.first, Addr)); |
1848 } | 1834 } |
1849 | 1835 |
1850 // Copy arguments to be passed in registers to the appropriate registers. | 1836 // Copy arguments to be passed in registers to the appropriate registers. |
1851 for (auto &GPRArg : GPRArgs) { | 1837 for (auto &GPRArg : GPRArgs) { |
1852 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); | 1838 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); |
1853 // Generate a FakeUse of register arguments so that they do not get | 1839 // Generate a FakeUse of register arguments so that they do not get dead |
1854 // dead code eliminated as a result of the FakeKill of scratch | 1840 // code eliminated as a result of the FakeKill of scratch registers after |
1855 // registers after the call. | 1841 // the call. |
1856 Context.insert(InstFakeUse::create(Func, Reg)); | 1842 Context.insert(InstFakeUse::create(Func, Reg)); |
1857 } | 1843 } |
1858 for (auto &FPArg : FPArgs) { | 1844 for (auto &FPArg : FPArgs) { |
1859 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); | 1845 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); |
1860 Context.insert(InstFakeUse::create(Func, Reg)); | 1846 Context.insert(InstFakeUse::create(Func, Reg)); |
1861 } | 1847 } |
1862 | 1848 |
1863 // Generate the call instruction. Assign its result to a temporary | 1849 // Generate the call instruction. Assign its result to a temporary with high |
1864 // with high register allocation weight. | 1850 // register allocation weight. |
1865 Variable *Dest = Instr->getDest(); | 1851 Variable *Dest = Instr->getDest(); |
1866 // ReturnReg doubles as ReturnRegLo as necessary. | 1852 // ReturnReg doubles as ReturnRegLo as necessary. |
1867 Variable *ReturnReg = nullptr; | 1853 Variable *ReturnReg = nullptr; |
1868 Variable *ReturnRegHi = nullptr; | 1854 Variable *ReturnRegHi = nullptr; |
1869 if (Dest) { | 1855 if (Dest) { |
1870 switch (Dest->getType()) { | 1856 switch (Dest->getType()) { |
1871 case IceType_NUM: | 1857 case IceType_NUM: |
1872 llvm_unreachable("Invalid Call dest type"); | 1858 llvm_unreachable("Invalid Call dest type"); |
1873 break; | 1859 break; |
1874 case IceType_void: | 1860 case IceType_void: |
(...skipping 19 matching lines...) Expand all Loading... |
1894 case IceType_v16i1: | 1880 case IceType_v16i1: |
1895 case IceType_v16i8: | 1881 case IceType_v16i8: |
1896 case IceType_v8i16: | 1882 case IceType_v8i16: |
1897 case IceType_v4i32: | 1883 case IceType_v4i32: |
1898 case IceType_v4f32: | 1884 case IceType_v4f32: |
1899 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); | 1885 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); |
1900 break; | 1886 break; |
1901 } | 1887 } |
1902 } | 1888 } |
1903 Operand *CallTarget = Instr->getCallTarget(); | 1889 Operand *CallTarget = Instr->getCallTarget(); |
1904 // TODO(jvoung): Handle sandboxing. | 1890 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing = |
1905 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); | 1891 // Ctx->getFlags().getUseSandboxing(); |
1906 | 1892 |
1907 // Allow ConstantRelocatable to be left alone as a direct call, | 1893 // Allow ConstantRelocatable to be left alone as a direct call, but force |
1908 // but force other constants like ConstantInteger32 to be in | 1894 // other constants like ConstantInteger32 to be in a register and make it an |
1909 // a register and make it an indirect call. | 1895 // indirect call. |
1910 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { | 1896 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { |
1911 CallTarget = legalize(CallTarget, Legal_Reg); | 1897 CallTarget = legalize(CallTarget, Legal_Reg); |
1912 } | 1898 } |
1913 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); | 1899 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); |
1914 Context.insert(NewCall); | 1900 Context.insert(NewCall); |
1915 if (ReturnRegHi) | 1901 if (ReturnRegHi) |
1916 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1902 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
1917 | 1903 |
1918 // Add the appropriate offset to SP. The call instruction takes care | 1904 // Add the appropriate offset to SP. The call instruction takes care of |
1919 // of resetting the stack offset during emission. | 1905 // resetting the stack offset during emission. |
1920 if (ParameterAreaSizeBytes) { | 1906 if (ParameterAreaSizeBytes) { |
1921 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), | 1907 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), |
1922 Legal_Reg | Legal_Flex); | 1908 Legal_Reg | Legal_Flex); |
1923 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 1909 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
1924 _add(SP, SP, AddAmount); | 1910 _add(SP, SP, AddAmount); |
1925 } | 1911 } |
1926 | 1912 |
1927 // Insert a register-kill pseudo instruction. | 1913 // Insert a register-kill pseudo instruction. |
1928 Context.insert(InstFakeKill::create(Func, NewCall)); | 1914 Context.insert(InstFakeKill::create(Func, NewCall)); |
1929 | 1915 |
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2017 } | 2003 } |
2018 case InstCast::Zext: { | 2004 case InstCast::Zext: { |
2019 if (isVectorType(Dest->getType())) { | 2005 if (isVectorType(Dest->getType())) { |
2020 UnimplementedError(Func->getContext()->getFlags()); | 2006 UnimplementedError(Func->getContext()->getFlags()); |
2021 } else if (Dest->getType() == IceType_i64) { | 2007 } else if (Dest->getType() == IceType_i64) { |
2022 // t1=uxtb src; dst.lo=t1; dst.hi=0 | 2008 // t1=uxtb src; dst.lo=t1; dst.hi=0 |
2023 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2009 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2024 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2025 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2026 Variable *T_Lo = makeReg(DestLo->getType()); | 2012 Variable *T_Lo = makeReg(DestLo->getType()); |
2027 // i32 and i1 can just take up the whole register. | 2013 // i32 and i1 can just take up the whole register. i32 doesn't need uxt, |
2028 // i32 doesn't need uxt, while i1 will have an and mask later anyway. | 2014 // while i1 will have an and mask later anyway. |
2029 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { | 2015 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { |
2030 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2016 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2031 _mov(T_Lo, Src0RF); | 2017 _mov(T_Lo, Src0RF); |
2032 } else { | 2018 } else { |
2033 Variable *Src0R = legalizeToReg(Src0); | 2019 Variable *Src0R = legalizeToReg(Src0); |
2034 _uxt(T_Lo, Src0R); | 2020 _uxt(T_Lo, Src0R); |
2035 } | 2021 } |
2036 if (Src0->getType() == IceType_i1) { | 2022 if (Src0->getType() == IceType_i1) { |
2037 Constant *One = Ctx->getConstantInt32(1); | 2023 Constant *One = Ctx->getConstantInt32(1); |
2038 _and(T_Lo, T_Lo, One); | 2024 _and(T_Lo, T_Lo, One); |
2039 } | 2025 } |
2040 _mov(DestLo, T_Lo); | 2026 _mov(DestLo, T_Lo); |
2041 Variable *T_Hi = makeReg(DestLo->getType()); | 2027 Variable *T_Hi = makeReg(DestLo->getType()); |
2042 _mov(T_Hi, Zero); | 2028 _mov(T_Hi, Zero); |
2043 _mov(DestHi, T_Hi); | 2029 _mov(DestHi, T_Hi); |
2044 } else if (Src0->getType() == IceType_i1) { | 2030 } else if (Src0->getType() == IceType_i1) { |
2045 // t = Src0; t &= 1; Dest = t | 2031 // t = Src0; t &= 1; Dest = t |
2046 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); | 2032 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); |
2047 Constant *One = Ctx->getConstantInt32(1); | 2033 Constant *One = Ctx->getConstantInt32(1); |
2048 Variable *T = makeReg(Dest->getType()); | 2034 Variable *T = makeReg(Dest->getType()); |
2049 // Just use _mov instead of _uxt since all registers are 32-bit. | 2035 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt |
2050 // _uxt requires the source to be a register so could have required | 2036 // requires the source to be a register so could have required a _mov |
2051 // a _mov from legalize anyway. | 2037 // from legalize anyway. |
2052 _mov(T, Src0RF); | 2038 _mov(T, Src0RF); |
2053 _and(T, T, One); | 2039 _and(T, T, One); |
2054 _mov(Dest, T); | 2040 _mov(Dest, T); |
2055 } else { | 2041 } else { |
2056 // t1 = uxt src; dst = t1 | 2042 // t1 = uxt src; dst = t1 |
2057 Variable *Src0R = legalizeToReg(Src0); | 2043 Variable *Src0R = legalizeToReg(Src0); |
2058 Variable *T = makeReg(Dest->getType()); | 2044 Variable *T = makeReg(Dest->getType()); |
2059 _uxt(T, Src0R); | 2045 _uxt(T, Src0R); |
2060 _mov(Dest, T); | 2046 _mov(Dest, T); |
2061 } | 2047 } |
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2281 } | 2267 } |
2282 | 2268 |
2283 // a=icmp cond, b, c ==> | 2269 // a=icmp cond, b, c ==> |
2284 // GCC does: | 2270 // GCC does: |
2285 // cmp b.hi, c.hi or cmp b.lo, c.lo | 2271 // cmp b.hi, c.hi or cmp b.lo, c.lo |
2286 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi | 2272 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi |
2287 // mov.<C1> t, #1 mov.<C1> t, #1 | 2273 // mov.<C1> t, #1 mov.<C1> t, #1 |
2288 // mov.<C2> t, #0 mov.<C2> t, #0 | 2274 // mov.<C2> t, #0 mov.<C2> t, #0 |
2289 // mov a, t mov a, t | 2275 // mov a, t mov a, t |
2290 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" | 2276 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" |
2291 // is used for signed compares. In some cases, b and c need to be swapped | 2277 // is used for signed compares. In some cases, b and c need to be swapped as |
2292 // as well. | 2278 // well. |
2293 // | 2279 // |
2294 // LLVM does: | 2280 // LLVM does: |
2295 // for EQ and NE: | 2281 // for EQ and NE: |
2296 // eor t1, b.hi, c.hi | 2282 // eor t1, b.hi, c.hi |
2297 // eor t2, b.lo, c.hi | 2283 // eor t2, b.lo, c.hi |
2298 // orrs t, t1, t2 | 2284 // orrs t, t1, t2 |
2299 // mov.<C> t, #1 | 2285 // mov.<C> t, #1 |
2300 // mov a, t | 2286 // mov a, t |
2301 // | 2287 // |
2302 // that's nice in that it's just as short but has fewer dependencies | 2288 // that's nice in that it's just as short but has fewer dependencies for |
2303 // for better ILP at the cost of more registers. | 2289 // better ILP at the cost of more registers. |
2304 // | 2290 // |
2305 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with | 2291 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two |
2306 // two unconditional mov #0, two cmps, two conditional mov #1, | 2292 // unconditional mov #0, two cmps, two conditional mov #1, and one |
2307 // and one conditonal reg mov. That has few dependencies for good ILP, | 2293 // conditional reg mov. That has few dependencies for good ILP, but is a |
2308 // but is a longer sequence. | 2294 // longer sequence. |
2309 // | 2295 // |
2310 // So, we are going with the GCC version since it's usually better (except | 2296 // So, we are going with the GCC version since it's usually better (except |
2311 // perhaps for eq/ne). We could revisit special-casing eq/ne later. | 2297 // perhaps for eq/ne). We could revisit special-casing eq/ne later. |
2312 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2298 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2313 Constant *One = Ctx->getConstantInt32(1); | 2299 Constant *One = Ctx->getConstantInt32(1); |
2314 if (Src0->getType() == IceType_i64) { | 2300 if (Src0->getType() == IceType_i64) { |
2315 InstIcmp::ICond Conditon = Inst->getCondition(); | 2301 InstIcmp::ICond Conditon = Inst->getCondition(); |
2316 size_t Index = static_cast<size_t>(Conditon); | 2302 size_t Index = static_cast<size_t>(Conditon); |
2317 assert(Index < llvm::array_lengthof(TableIcmp64)); | 2303 assert(Index < llvm::array_lengthof(TableIcmp64)); |
2318 Variable *Src0Lo, *Src0Hi; | 2304 Variable *Src0Lo, *Src0Hi; |
2319 Operand *Src1LoRF, *Src1HiRF; | 2305 Operand *Src1LoRF, *Src1HiRF; |
2320 if (TableIcmp64[Index].Swapped) { | 2306 if (TableIcmp64[Index].Swapped) { |
2321 Src0Lo = legalizeToReg(loOperand(Src1)); | 2307 Src0Lo = legalizeToReg(loOperand(Src1)); |
2322 Src0Hi = legalizeToReg(hiOperand(Src1)); | 2308 Src0Hi = legalizeToReg(hiOperand(Src1)); |
2323 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); | 2309 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); |
2324 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); | 2310 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); |
2325 } else { | 2311 } else { |
2326 Src0Lo = legalizeToReg(loOperand(Src0)); | 2312 Src0Lo = legalizeToReg(loOperand(Src0)); |
2327 Src0Hi = legalizeToReg(hiOperand(Src0)); | 2313 Src0Hi = legalizeToReg(hiOperand(Src0)); |
2328 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); | 2314 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); |
2329 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); | 2315 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); |
2330 } | 2316 } |
2331 Variable *T = makeReg(IceType_i32); | 2317 Variable *T = makeReg(IceType_i32); |
2332 if (TableIcmp64[Index].IsSigned) { | 2318 if (TableIcmp64[Index].IsSigned) { |
2333 Variable *ScratchReg = makeReg(IceType_i32); | 2319 Variable *ScratchReg = makeReg(IceType_i32); |
2334 _cmp(Src0Lo, Src1LoRF); | 2320 _cmp(Src0Lo, Src1LoRF); |
2335 _sbcs(ScratchReg, Src0Hi, Src1HiRF); | 2321 _sbcs(ScratchReg, Src0Hi, Src1HiRF); |
2336 // ScratchReg isn't going to be used, but we need the | 2322 // ScratchReg isn't going to be used, but we need the side-effect of |
2337 // side-effect of setting flags from this operation. | 2323 // setting flags from this operation. |
2338 Context.insert(InstFakeUse::create(Func, ScratchReg)); | 2324 Context.insert(InstFakeUse::create(Func, ScratchReg)); |
2339 } else { | 2325 } else { |
2340 _cmp(Src0Hi, Src1HiRF); | 2326 _cmp(Src0Hi, Src1HiRF); |
2341 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); | 2327 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); |
2342 } | 2328 } |
2343 _mov(T, One, TableIcmp64[Index].C1); | 2329 _mov(T, One, TableIcmp64[Index].C1); |
2344 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2); | 2330 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2); |
2345 _mov(Dest, T); | 2331 _mov(Dest, T); |
2346 return; | 2332 return; |
2347 } | 2333 } |
2348 | 2334 |
2349 // a=icmp cond b, c ==> | 2335 // a=icmp cond b, c ==> |
2350 // GCC does: | 2336 // GCC does: |
2351 // <u/s>xtb tb, b | 2337 // <u/s>xtb tb, b |
2352 // <u/s>xtb tc, c | 2338 // <u/s>xtb tc, c |
2353 // cmp tb, tc | 2339 // cmp tb, tc |
2354 // mov.C1 t, #0 | 2340 // mov.C1 t, #0 |
2355 // mov.C2 t, #1 | 2341 // mov.C2 t, #1 |
2356 // mov a, t | 2342 // mov a, t |
2357 // where the unsigned/sign extension is not needed for 32-bit. | 2343 // where the unsigned/sign extension is not needed for 32-bit. They also have |
2358 // They also have special cases for EQ and NE. E.g., for NE: | 2344 // special cases for EQ and NE. E.g., for NE: |
2359 // <extend to tb, tc> | 2345 // <extend to tb, tc> |
2360 // subs t, tb, tc | 2346 // subs t, tb, tc |
2361 // movne t, #1 | 2347 // movne t, #1 |
2362 // mov a, t | 2348 // mov a, t |
2363 // | 2349 // |
2364 // LLVM does: | 2350 // LLVM does: |
2365 // lsl tb, b, #<N> | 2351 // lsl tb, b, #<N> |
2366 // mov t, #0 | 2352 // mov t, #0 |
2367 // cmp tb, c, lsl #<N> | 2353 // cmp tb, c, lsl #<N> |
2368 // mov.<C> t, #1 | 2354 // mov.<C> t, #1 |
2369 // mov a, t | 2355 // mov a, t |
2370 // | 2356 // |
2371 // the left shift is by 0, 16, or 24, which allows the comparison to focus | 2357 // the left shift is by 0, 16, or 24, which allows the comparison to focus on |
2372 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned). | 2358 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For |
2373 // For the unsigned case, for some reason it does similar to GCC and does | 2359 // the unsigned case, for some reason it does similar to GCC and does a uxtb |
2374 // a uxtb first. It's not clear to me why that special-casing is needed. | 2360 // first. It's not clear to me why that special-casing is needed. |
2375 // | 2361 // |
2376 // We'll go with the LLVM way for now, since it's shorter and has just as | 2362 // We'll go with the LLVM way for now, since it's shorter and has just as few |
2377 // few dependencies. | 2363 // dependencies. |
2378 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); | 2364 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); |
2379 assert(ShiftAmt >= 0); | 2365 assert(ShiftAmt >= 0); |
2380 Constant *ShiftConst = nullptr; | 2366 Constant *ShiftConst = nullptr; |
2381 Variable *Src0R = nullptr; | 2367 Variable *Src0R = nullptr; |
2382 Variable *T = makeReg(IceType_i32); | 2368 Variable *T = makeReg(IceType_i32); |
2383 if (ShiftAmt) { | 2369 if (ShiftAmt) { |
2384 ShiftConst = Ctx->getConstantInt32(ShiftAmt); | 2370 ShiftConst = Ctx->getConstantInt32(ShiftAmt); |
2385 Src0R = makeReg(IceType_i32); | 2371 Src0R = makeReg(IceType_i32); |
2386 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); | 2372 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); |
2387 } else { | 2373 } else { |
(...skipping 22 matching lines...) Expand all Loading... |
2410 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { | 2396 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { |
2411 switch (Instr->getIntrinsicInfo().ID) { | 2397 switch (Instr->getIntrinsicInfo().ID) { |
2412 case Intrinsics::AtomicCmpxchg: { | 2398 case Intrinsics::AtomicCmpxchg: { |
2413 UnimplementedError(Func->getContext()->getFlags()); | 2399 UnimplementedError(Func->getContext()->getFlags()); |
2414 return; | 2400 return; |
2415 } | 2401 } |
2416 case Intrinsics::AtomicFence: | 2402 case Intrinsics::AtomicFence: |
2417 UnimplementedError(Func->getContext()->getFlags()); | 2403 UnimplementedError(Func->getContext()->getFlags()); |
2418 return; | 2404 return; |
2419 case Intrinsics::AtomicFenceAll: | 2405 case Intrinsics::AtomicFenceAll: |
2420 // NOTE: FenceAll should prevent and load/store from being moved | 2406 // NOTE: FenceAll should prevent and load/store from being moved across the |
2421 // across the fence (both atomic and non-atomic). The InstARM32Mfence | 2407 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is |
2422 // instruction is currently marked coarsely as "HasSideEffects". | 2408 // currently marked coarsely as "HasSideEffects". |
2423 UnimplementedError(Func->getContext()->getFlags()); | 2409 UnimplementedError(Func->getContext()->getFlags()); |
2424 return; | 2410 return; |
2425 case Intrinsics::AtomicIsLockFree: { | 2411 case Intrinsics::AtomicIsLockFree: { |
2426 UnimplementedError(Func->getContext()->getFlags()); | 2412 UnimplementedError(Func->getContext()->getFlags()); |
2427 return; | 2413 return; |
2428 } | 2414 } |
2429 case Intrinsics::AtomicLoad: { | 2415 case Intrinsics::AtomicLoad: { |
2430 UnimplementedError(Func->getContext()->getFlags()); | 2416 UnimplementedError(Func->getContext()->getFlags()); |
2431 return; | 2417 return; |
2432 } | 2418 } |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2470 case Intrinsics::Ctpop: { | 2456 case Intrinsics::Ctpop: { |
2471 Variable *Dest = Instr->getDest(); | 2457 Variable *Dest = Instr->getDest(); |
2472 Operand *Val = Instr->getArg(0); | 2458 Operand *Val = Instr->getArg(0); |
2473 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) | 2459 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) |
2474 ? H_call_ctpop_i32 | 2460 ? H_call_ctpop_i32 |
2475 : H_call_ctpop_i64, | 2461 : H_call_ctpop_i64, |
2476 Dest, 1); | 2462 Dest, 1); |
2477 Call->addArg(Val); | 2463 Call->addArg(Val); |
2478 lowerCall(Call); | 2464 lowerCall(Call); |
2479 // The popcount helpers always return 32-bit values, while the intrinsic's | 2465 // The popcount helpers always return 32-bit values, while the intrinsic's |
2480 // signature matches some 64-bit platform's native instructions and | 2466 // signature matches some 64-bit platform's native instructions and expect |
2481 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest | 2467 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in |
2482 // just in case the user doesn't do that in the IR or doesn't toss the bits | 2468 // case the user doesn't do that in the IR or doesn't toss the bits via |
2483 // via truncate. | 2469 // truncate. |
2484 if (Val->getType() == IceType_i64) { | 2470 if (Val->getType() == IceType_i64) { |
2485 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2471 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2486 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 2472 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
2487 Variable *T = nullptr; | 2473 Variable *T = nullptr; |
2488 _mov(T, Zero); | 2474 _mov(T, Zero); |
2489 _mov(DestHi, T); | 2475 _mov(DestHi, T); |
2490 } | 2476 } |
2491 return; | 2477 return; |
2492 } | 2478 } |
2493 case Intrinsics::Ctlz: { | 2479 case Intrinsics::Ctlz: { |
2494 // The "is zero undef" parameter is ignored and we always return | 2480 // The "is zero undef" parameter is ignored and we always return a |
2495 // a well-defined value. | 2481 // well-defined value. |
2496 Operand *Val = Instr->getArg(0); | 2482 Operand *Val = Instr->getArg(0); |
2497 Variable *ValLoR; | 2483 Variable *ValLoR; |
2498 Variable *ValHiR = nullptr; | 2484 Variable *ValHiR = nullptr; |
2499 if (Val->getType() == IceType_i64) { | 2485 if (Val->getType() == IceType_i64) { |
2500 Val = legalizeUndef(Val); | 2486 Val = legalizeUndef(Val); |
2501 ValLoR = legalizeToReg(loOperand(Val)); | 2487 ValLoR = legalizeToReg(loOperand(Val)); |
2502 ValHiR = legalizeToReg(hiOperand(Val)); | 2488 ValHiR = legalizeToReg(hiOperand(Val)); |
2503 } else { | 2489 } else { |
2504 ValLoR = legalizeToReg(Val); | 2490 ValLoR = legalizeToReg(Val); |
2505 } | 2491 } |
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2632 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 2618 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
2633 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 2619 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
2634 Operand *Zero = | 2620 Operand *Zero = |
2635 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); | 2621 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); |
2636 Operand *ThirtyTwo = | 2622 Operand *ThirtyTwo = |
2637 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); | 2623 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); |
2638 _cmp(ValHiR, Zero); | 2624 _cmp(ValHiR, Zero); |
2639 Variable *T2 = makeReg(IceType_i32); | 2625 Variable *T2 = makeReg(IceType_i32); |
2640 _add(T2, T, ThirtyTwo); | 2626 _add(T2, T, ThirtyTwo); |
2641 _clz(T2, ValHiR, CondARM32::NE); | 2627 _clz(T2, ValHiR, CondARM32::NE); |
2642 // T2 is actually a source as well when the predicate is not AL | 2628 // T2 is actually a source as well when the predicate is not AL (since it |
2643 // (since it may leave T2 alone). We use set_dest_nonkillable to | 2629 // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness |
2644 // prolong the liveness of T2 as if it was used as a source. | 2630 // of T2 as if it was used as a source. |
2645 _set_dest_nonkillable(); | 2631 _set_dest_nonkillable(); |
2646 _mov(DestLo, T2); | 2632 _mov(DestLo, T2); |
2647 Variable *T3 = nullptr; | 2633 Variable *T3 = nullptr; |
2648 _mov(T3, Zero); | 2634 _mov(T3, Zero); |
2649 _mov(DestHi, T3); | 2635 _mov(DestHi, T3); |
2650 return; | 2636 return; |
2651 } | 2637 } |
2652 _mov(Dest, T); | 2638 _mov(Dest, T); |
2653 return; | 2639 return; |
2654 } | 2640 } |
2655 | 2641 |
2656 void TargetARM32::lowerLoad(const InstLoad *Load) { | 2642 void TargetARM32::lowerLoad(const InstLoad *Load) { |
2657 // A Load instruction can be treated the same as an Assign | 2643 // A Load instruction can be treated the same as an Assign instruction, after |
2658 // instruction, after the source operand is transformed into an | 2644 // the source operand is transformed into an OperandARM32Mem operand. |
2659 // OperandARM32Mem operand. | |
2660 Type Ty = Load->getDest()->getType(); | 2645 Type Ty = Load->getDest()->getType(); |
2661 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); | 2646 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); |
2662 Variable *DestLoad = Load->getDest(); | 2647 Variable *DestLoad = Load->getDest(); |
2663 | 2648 |
2664 // TODO(jvoung): handled folding opportunities. Sign and zero extension | 2649 // TODO(jvoung): handled folding opportunities. Sign and zero extension can |
2665 // can be folded into a load. | 2650 // be folded into a load. |
2666 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); | 2651 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); |
2667 lowerAssign(Assign); | 2652 lowerAssign(Assign); |
2668 } | 2653 } |
2669 | 2654 |
2670 void TargetARM32::doAddressOptLoad() { | 2655 void TargetARM32::doAddressOptLoad() { |
2671 UnimplementedError(Func->getContext()->getFlags()); | 2656 UnimplementedError(Func->getContext()->getFlags()); |
2672 } | 2657 } |
2673 | 2658 |
2674 void TargetARM32::randomlyInsertNop(float Probability, | 2659 void TargetARM32::randomlyInsertNop(float Probability, |
2675 RandomNumberGenerator &RNG) { | 2660 RandomNumberGenerator &RNG) { |
(...skipping 25 matching lines...) Expand all Loading... |
2701 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); | 2686 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); |
2702 Reg = D0; | 2687 Reg = D0; |
2703 } else if (isVectorType(Src0->getType())) { | 2688 } else if (isVectorType(Src0->getType())) { |
2704 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); | 2689 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); |
2705 Reg = Q0; | 2690 Reg = Q0; |
2706 } else { | 2691 } else { |
2707 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); | 2692 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); |
2708 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); | 2693 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); |
2709 } | 2694 } |
2710 } | 2695 } |
2711 // Add a ret instruction even if sandboxing is enabled, because | 2696 // Add a ret instruction even if sandboxing is enabled, because addEpilog |
2712 // addEpilog explicitly looks for a ret instruction as a marker for | 2697 // explicitly looks for a ret instruction as a marker for where to insert the |
2713 // where to insert the frame removal instructions. | 2698 // frame removal instructions. addEpilog is responsible for restoring the |
2714 // addEpilog is responsible for restoring the "lr" register as needed | 2699 // "lr" register as needed prior to this ret instruction. |
2715 // prior to this ret instruction. | |
2716 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); | 2700 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); |
2717 // Add a fake use of sp to make sure sp stays alive for the entire | 2701 // Add a fake use of sp to make sure sp stays alive for the entire function. |
2718 // function. Otherwise post-call sp adjustments get dead-code | 2702 // Otherwise post-call sp adjustments get dead-code eliminated. |
2719 // eliminated. TODO: Are there more places where the fake use | 2703 // TODO: Are there more places where the fake use should be inserted? E.g. |
2720 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | 2704 // "void f(int n){while(1) g(n);}" may not have a ret instruction. |
2721 // have a ret instruction. | |
2722 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); | 2705 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); |
2723 Context.insert(InstFakeUse::create(Func, SP)); | 2706 Context.insert(InstFakeUse::create(Func, SP)); |
2724 } | 2707 } |
2725 | 2708 |
2726 void TargetARM32::lowerSelect(const InstSelect *Inst) { | 2709 void TargetARM32::lowerSelect(const InstSelect *Inst) { |
2727 Variable *Dest = Inst->getDest(); | 2710 Variable *Dest = Inst->getDest(); |
2728 Type DestTy = Dest->getType(); | 2711 Type DestTy = Dest->getType(); |
2729 Operand *SrcT = Inst->getTrueOperand(); | 2712 Operand *SrcT = Inst->getTrueOperand(); |
2730 Operand *SrcF = Inst->getFalseOperand(); | 2713 Operand *SrcF = Inst->getFalseOperand(); |
2731 Operand *Condition = Inst->getCondition(); | 2714 Operand *Condition = Inst->getCondition(); |
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2845 } | 2828 } |
2846 | 2829 |
2847 // Helper for legalize() to emit the right code to lower an operand to a | 2830 // Helper for legalize() to emit the right code to lower an operand to a |
2848 // register of the appropriate type. | 2831 // register of the appropriate type. |
2849 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { | 2832 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { |
2850 Type Ty = Src->getType(); | 2833 Type Ty = Src->getType(); |
2851 Variable *Reg = makeReg(Ty, RegNum); | 2834 Variable *Reg = makeReg(Ty, RegNum); |
2852 if (isVectorType(Ty) || isFloatingType(Ty)) { | 2835 if (isVectorType(Ty) || isFloatingType(Ty)) { |
2853 _vmov(Reg, Src); | 2836 _vmov(Reg, Src); |
2854 } else { | 2837 } else { |
2855 // Mov's Src operand can really only be the flexible second operand type | 2838 // Mov's Src operand can really only be the flexible second operand type or |
2856 // or a register. Users should guarantee that. | 2839 // a register. Users should guarantee that. |
2857 _mov(Reg, Src); | 2840 _mov(Reg, Src); |
2858 } | 2841 } |
2859 return Reg; | 2842 return Reg; |
2860 } | 2843 } |
2861 | 2844 |
2862 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, | 2845 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, |
2863 int32_t RegNum) { | 2846 int32_t RegNum) { |
2864 Type Ty = From->getType(); | 2847 Type Ty = From->getType(); |
2865 // Assert that a physical register is allowed. To date, all calls | 2848 // Assert that a physical register is allowed. To date, all calls to |
2866 // to legalize() allow a physical register. Legal_Flex converts | 2849 // legalize() allow a physical register. Legal_Flex converts registers to the |
2867 // registers to the right type OperandARM32FlexReg as needed. | 2850 // right type OperandARM32FlexReg as needed. |
2868 assert(Allowed & Legal_Reg); | 2851 assert(Allowed & Legal_Reg); |
2869 // Go through the various types of operands: | 2852 // Go through the various types of operands: OperandARM32Mem, |
2870 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable. | 2853 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if |
2871 // Given the above assertion, if type of operand is not legal | 2854 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we |
2872 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy | 2855 // can always copy to a register. |
2873 // to a register. | |
2874 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { | 2856 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { |
2875 // Before doing anything with a Mem operand, we need to ensure | 2857 // Before doing anything with a Mem operand, we need to ensure that the |
2876 // that the Base and Index components are in physical registers. | 2858 // Base and Index components are in physical registers. |
2877 Variable *Base = Mem->getBase(); | 2859 Variable *Base = Mem->getBase(); |
2878 Variable *Index = Mem->getIndex(); | 2860 Variable *Index = Mem->getIndex(); |
2879 Variable *RegBase = nullptr; | 2861 Variable *RegBase = nullptr; |
2880 Variable *RegIndex = nullptr; | 2862 Variable *RegIndex = nullptr; |
2881 if (Base) { | 2863 if (Base) { |
2882 RegBase = legalizeToReg(Base); | 2864 RegBase = legalizeToReg(Base); |
2883 } | 2865 } |
2884 if (Index) { | 2866 if (Index) { |
2885 RegIndex = legalizeToReg(Index); | 2867 RegIndex = legalizeToReg(Index); |
2886 } | 2868 } |
(...skipping 24 matching lines...) Expand all Loading... |
2911 From = Mem; | 2893 From = Mem; |
2912 } | 2894 } |
2913 return From; | 2895 return From; |
2914 } | 2896 } |
2915 | 2897 |
2916 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { | 2898 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { |
2917 if (!(Allowed & Legal_Flex)) { | 2899 if (!(Allowed & Legal_Flex)) { |
2918 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { | 2900 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { |
2919 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { | 2901 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { |
2920 From = FlexReg->getReg(); | 2902 From = FlexReg->getReg(); |
2921 // Fall through and let From be checked as a Variable below, | 2903 // Fall through and let From be checked as a Variable below, where it |
2922 // where it may or may not need a register. | 2904 // may or may not need a register. |
2923 } else { | 2905 } else { |
2924 return copyToReg(Flex, RegNum); | 2906 return copyToReg(Flex, RegNum); |
2925 } | 2907 } |
2926 } else { | 2908 } else { |
2927 return copyToReg(Flex, RegNum); | 2909 return copyToReg(Flex, RegNum); |
2928 } | 2910 } |
2929 } else { | 2911 } else { |
2930 return From; | 2912 return From; |
2931 } | 2913 } |
2932 } | 2914 } |
2933 | 2915 |
2934 if (llvm::isa<Constant>(From)) { | 2916 if (llvm::isa<Constant>(From)) { |
2935 if (llvm::isa<ConstantUndef>(From)) { | 2917 if (llvm::isa<ConstantUndef>(From)) { |
2936 From = legalizeUndef(From, RegNum); | 2918 From = legalizeUndef(From, RegNum); |
2937 if (isVectorType(Ty)) | 2919 if (isVectorType(Ty)) |
2938 return From; | 2920 return From; |
2939 } | 2921 } |
2940 // There should be no constants of vector type (other than undef). | 2922 // There should be no constants of vector type (other than undef). |
2941 assert(!isVectorType(Ty)); | 2923 assert(!isVectorType(Ty)); |
2942 bool CanBeFlex = Allowed & Legal_Flex; | 2924 bool CanBeFlex = Allowed & Legal_Flex; |
2943 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { | 2925 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { |
2944 uint32_t RotateAmt; | 2926 uint32_t RotateAmt; |
2945 uint32_t Immed_8; | 2927 uint32_t Immed_8; |
2946 uint32_t Value = static_cast<uint32_t>(C32->getValue()); | 2928 uint32_t Value = static_cast<uint32_t>(C32->getValue()); |
2947 // Check if the immediate will fit in a Flexible second operand, | 2929 // Check if the immediate will fit in a Flexible second operand, if a |
2948 // if a Flexible second operand is allowed. We need to know the exact | 2930 // Flexible second operand is allowed. We need to know the exact value, |
2949 // value, so that rules out relocatable constants. | 2931 // so that rules out relocatable constants. Also try the inverse and use |
2950 // Also try the inverse and use MVN if possible. | 2932 // MVN if possible. |
2951 if (CanBeFlex && | 2933 if (CanBeFlex && |
2952 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { | 2934 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { |
2953 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 2935 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
2954 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( | 2936 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( |
2955 ~Value, &RotateAmt, &Immed_8)) { | 2937 ~Value, &RotateAmt, &Immed_8)) { |
2956 auto InvertedFlex = | 2938 auto InvertedFlex = |
2957 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); | 2939 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); |
2958 Variable *Reg = makeReg(Ty, RegNum); | 2940 Variable *Reg = makeReg(Ty, RegNum); |
2959 _mvn(Reg, InvertedFlex); | 2941 _mvn(Reg, InvertedFlex); |
2960 return Reg; | 2942 return Reg; |
2961 } else { | 2943 } else { |
2962 // Do a movw/movt to a register. | 2944 // Do a movw/movt to a register. |
2963 Variable *Reg = makeReg(Ty, RegNum); | 2945 Variable *Reg = makeReg(Ty, RegNum); |
2964 uint32_t UpperBits = (Value >> 16) & 0xFFFF; | 2946 uint32_t UpperBits = (Value >> 16) & 0xFFFF; |
2965 _movw(Reg, | 2947 _movw(Reg, |
2966 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); | 2948 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); |
2967 if (UpperBits != 0) { | 2949 if (UpperBits != 0) { |
2968 _movt(Reg, Ctx->getConstantInt32(UpperBits)); | 2950 _movt(Reg, Ctx->getConstantInt32(UpperBits)); |
2969 } | 2951 } |
2970 return Reg; | 2952 return Reg; |
2971 } | 2953 } |
2972 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { | 2954 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { |
2973 Variable *Reg = makeReg(Ty, RegNum); | 2955 Variable *Reg = makeReg(Ty, RegNum); |
2974 _movw(Reg, C); | 2956 _movw(Reg, C); |
2975 _movt(Reg, C); | 2957 _movt(Reg, C); |
2976 return Reg; | 2958 return Reg; |
2977 } else { | 2959 } else { |
2978 assert(isScalarFloatingType(Ty)); | 2960 assert(isScalarFloatingType(Ty)); |
2979 // Load floats/doubles from literal pool. | 2961 // Load floats/doubles from literal pool. |
2980 // TODO(jvoung): Allow certain immediates to be encoded directly in | 2962 // TODO(jvoung): Allow certain immediates to be encoded directly in an |
2981 // an operand. See Table A7-18 of the ARM manual: | 2963 // operand. See Table A7-18 of the ARM manual: "Floating-point modified |
2982 // "Floating-point modified immediate constants". | 2964 // immediate constants". Or, for 32-bit floating point numbers, just |
2983 // Or, for 32-bit floating point numbers, just encode the raw bits | 2965 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG, |
2984 // into a movw/movt pair to GPR, and vmov to an SREG, instead of using | 2966 // instead of using a movw/movt pair to get the const-pool address then |
2985 // a movw/movt pair to get the const-pool address then loading to SREG. | 2967 // loading to SREG. |
2986 std::string Buffer; | 2968 std::string Buffer; |
2987 llvm::raw_string_ostream StrBuf(Buffer); | 2969 llvm::raw_string_ostream StrBuf(Buffer); |
2988 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); | 2970 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); |
2989 llvm::cast<Constant>(From)->setShouldBePooled(true); | 2971 llvm::cast<Constant>(From)->setShouldBePooled(true); |
2990 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); | 2972 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); |
2991 Variable *BaseReg = makeReg(getPointerType()); | 2973 Variable *BaseReg = makeReg(getPointerType()); |
2992 _movw(BaseReg, Offset); | 2974 _movw(BaseReg, Offset); |
2993 _movt(BaseReg, Offset); | 2975 _movt(BaseReg, Offset); |
2994 From = formMemoryOperand(BaseReg, Ty); | 2976 From = formMemoryOperand(BaseReg, Ty); |
2995 return copyToReg(From, RegNum); | 2977 return copyToReg(From, RegNum); |
2996 } | 2978 } |
2997 } | 2979 } |
2998 | 2980 |
2999 if (auto Var = llvm::dyn_cast<Variable>(From)) { | 2981 if (auto Var = llvm::dyn_cast<Variable>(From)) { |
3000 // Check if the variable is guaranteed a physical register. This | 2982 // Check if the variable is guaranteed a physical register. This can happen |
3001 // can happen either when the variable is pre-colored or when it is | 2983 // either when the variable is pre-colored or when it is assigned infinite |
3002 // assigned infinite weight. | 2984 // weight. |
3003 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); | 2985 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); |
3004 // We need a new physical register for the operand if: | 2986 // We need a new physical register for the operand if: |
3005 // Mem is not allowed and Var isn't guaranteed a physical | 2987 // Mem is not allowed and Var isn't guaranteed a physical |
3006 // register, or | 2988 // register, or |
3007 // RegNum is required and Var->getRegNum() doesn't match. | 2989 // RegNum is required and Var->getRegNum() doesn't match. |
3008 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || | 2990 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || |
3009 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | 2991 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { |
3010 From = copyToReg(From, RegNum); | 2992 From = copyToReg(From, RegNum); |
3011 } | 2993 } |
3012 return From; | 2994 return From; |
3013 } | 2995 } |
3014 llvm_unreachable("Unhandled operand kind in legalize()"); | 2996 llvm_unreachable("Unhandled operand kind in legalize()"); |
3015 | 2997 |
3016 return From; | 2998 return From; |
3017 } | 2999 } |
3018 | 3000 |
3019 /// Provide a trivial wrapper to legalize() for this common usage. | 3001 /// Provide a trivial wrapper to legalize() for this common usage. |
3020 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) { | 3002 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) { |
3021 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); | 3003 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
3022 } | 3004 } |
3023 | 3005 |
3024 /// Legalize undef values to concrete values. | 3006 /// Legalize undef values to concrete values. |
3025 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) { | 3007 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) { |
3026 Type Ty = From->getType(); | 3008 Type Ty = From->getType(); |
3027 if (llvm::isa<ConstantUndef>(From)) { | 3009 if (llvm::isa<ConstantUndef>(From)) { |
3028 // Lower undefs to zero. Another option is to lower undefs to an | 3010 // Lower undefs to zero. Another option is to lower undefs to an |
3029 // uninitialized register; however, using an uninitialized register | 3011 // uninitialized register; however, using an uninitialized register results |
3030 // results in less predictable code. | 3012 // in less predictable code. |
3031 // | 3013 // |
3032 // If in the future the implementation is changed to lower undef | 3014 // If in the future the implementation is changed to lower undef values to |
3033 // values to uninitialized registers, a FakeDef will be needed: | 3015 // uninitialized registers, a FakeDef will be needed: |
3034 // Context.insert(InstFakeDef::create(Func, Reg)); | 3016 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to |
3035 // This is in order to ensure that the live range of Reg is not | 3017 // ensure that the live range of Reg is not overestimated. If the constant |
3036 // overestimated. If the constant being lowered is a 64 bit value, | 3018 // being lowered is a 64 bit value, then the result should be split and the |
3037 // then the result should be split and the lo and hi components will | 3019 // lo and hi components will need to go in uninitialized registers. |
3038 // need to go in uninitialized registers. | |
3039 if (isVectorType(Ty)) | 3020 if (isVectorType(Ty)) |
3040 return makeVectorOfZeros(Ty, RegNum); | 3021 return makeVectorOfZeros(Ty, RegNum); |
3041 return Ctx->getConstantZero(Ty); | 3022 return Ctx->getConstantZero(Ty); |
3042 } | 3023 } |
3043 return From; | 3024 return From; |
3044 } | 3025 } |
3045 | 3026 |
3046 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { | 3027 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { |
3047 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); | 3028 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); |
3048 // It may be the case that address mode optimization already creates | 3029 // It may be the case that address mode optimization already creates an |
3049 // an OperandARM32Mem, so in that case it wouldn't need another level | 3030 // OperandARM32Mem, so in that case it wouldn't need another level of |
3050 // of transformation. | 3031 // transformation. |
3051 if (Mem) { | 3032 if (Mem) { |
3052 return llvm::cast<OperandARM32Mem>(legalize(Mem)); | 3033 return llvm::cast<OperandARM32Mem>(legalize(Mem)); |
3053 } | 3034 } |
3054 // If we didn't do address mode optimization, then we only | 3035 // If we didn't do address mode optimization, then we only have a base/offset |
3055 // have a base/offset to work with. ARM always requires a base | 3036 // to work with. ARM always requires a base register, so just use that to |
3056 // register, so just use that to hold the operand. | 3037 // hold the operand. |
3057 Variable *Base = legalizeToReg(Operand); | 3038 Variable *Base = legalizeToReg(Operand); |
3058 return OperandARM32Mem::create( | 3039 return OperandARM32Mem::create( |
3059 Func, Ty, Base, | 3040 Func, Ty, Base, |
3060 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); | 3041 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); |
3061 } | 3042 } |
3062 | 3043 |
3063 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { | 3044 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { |
3064 // There aren't any 64-bit integer registers for ARM32. | 3045 // There aren't any 64-bit integer registers for ARM32. |
3065 assert(Type != IceType_i64); | 3046 assert(Type != IceType_i64); |
3066 Variable *Reg = Func->makeVariable(Type); | 3047 Variable *Reg = Func->makeVariable(Type); |
3067 if (RegNum == Variable::NoRegister) | 3048 if (RegNum == Variable::NoRegister) |
3068 Reg->setMustHaveReg(); | 3049 Reg->setMustHaveReg(); |
3069 else | 3050 else |
3070 Reg->setRegNum(RegNum); | 3051 Reg->setRegNum(RegNum); |
3071 return Reg; | 3052 return Reg; |
3072 } | 3053 } |
3073 | 3054 |
3074 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { | 3055 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { |
3075 assert(llvm::isPowerOf2_32(Align)); | 3056 assert(llvm::isPowerOf2_32(Align)); |
3076 uint32_t RotateAmt; | 3057 uint32_t RotateAmt; |
3077 uint32_t Immed_8; | 3058 uint32_t Immed_8; |
3078 Operand *Mask; | 3059 Operand *Mask; |
3079 // Use AND or BIC to mask off the bits, depending on which immediate fits | 3060 // Use AND or BIC to mask off the bits, depending on which immediate fits (if |
3080 // (if it fits at all). Assume Align is usually small, in which case BIC | 3061 // it fits at all). Assume Align is usually small, in which case BIC works |
3081 // works better. Thus, this rounds down to the alignment. | 3062 // better. Thus, this rounds down to the alignment. |
3082 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { | 3063 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { |
3083 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); | 3064 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); |
3084 _bic(Reg, Reg, Mask); | 3065 _bic(Reg, Reg, Mask); |
3085 } else { | 3066 } else { |
3086 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); | 3067 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); |
3087 _and(Reg, Reg, Mask); | 3068 _and(Reg, Reg, Mask); |
3088 } | 3069 } |
3089 } | 3070 } |
3090 | 3071 |
3091 void TargetARM32::postLower() { | 3072 void TargetARM32::postLower() { |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3163 UnimplementedError(Ctx->getFlags()); | 3144 UnimplementedError(Ctx->getFlags()); |
3164 } | 3145 } |
3165 | 3146 |
3166 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) | 3147 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) |
3167 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} | 3148 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} |
3168 | 3149 |
3169 void TargetHeaderARM32::lower() { | 3150 void TargetHeaderARM32::lower() { |
3170 OstreamLocker L(Ctx); | 3151 OstreamLocker L(Ctx); |
3171 Ostream &Str = Ctx->getStrEmit(); | 3152 Ostream &Str = Ctx->getStrEmit(); |
3172 Str << ".syntax unified\n"; | 3153 Str << ".syntax unified\n"; |
3173 // Emit build attributes in format: .eabi_attribute TAG, VALUE. | 3154 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of |
3174 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture" | 3155 // "Addenda to, and Errata in the ABI for the ARM architecture" |
3175 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_adde
nda.pdf | 3156 // http://infocenter.arm.com |
| 3157 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf |
3176 // | 3158 // |
3177 // Tag_conformance should be be emitted first in a file-scope | 3159 // Tag_conformance should be be emitted first in a file-scope sub-subsection |
3178 // sub-subsection of the first public subsection of the attributes. | 3160 // of the first public subsection of the attributes. |
3179 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; | 3161 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; |
3180 // Chromebooks are at least A15, but do A9 for higher compat. | 3162 // Chromebooks are at least A15, but do A9 for higher compat. For some |
3181 // For some reason, the LLVM ARM asm parser has the .cpu directive override | 3163 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr |
3182 // the mattr specified on the commandline. So to test hwdiv, we need to set | 3164 // specified on the commandline. So to test hwdiv, we need to set the .cpu |
3183 // the .cpu directive higher (can't just rely on --mattr=...). | 3165 // directive higher (can't just rely on --mattr=...). |
3184 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 3166 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3185 Str << ".cpu cortex-a15\n"; | 3167 Str << ".cpu cortex-a15\n"; |
3186 } else { | 3168 } else { |
3187 Str << ".cpu cortex-a9\n"; | 3169 Str << ".cpu cortex-a9\n"; |
3188 } | 3170 } |
3189 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" | 3171 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" |
3190 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; | 3172 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; |
3191 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" | 3173 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" |
3192 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; | 3174 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; |
3193 Str << ".fpu neon\n" | 3175 Str << ".fpu neon\n" |
(...skipping 11 matching lines...) Expand all Loading... |
3205 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; | 3187 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; |
3206 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { | 3188 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { |
3207 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; | 3189 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; |
3208 } | 3190 } |
3209 // Technically R9 is used for TLS with Sandboxing, and we reserve it. | 3191 // Technically R9 is used for TLS with Sandboxing, and we reserve it. |
3210 // However, for compatibility with current NaCl LLVM, don't claim that. | 3192 // However, for compatibility with current NaCl LLVM, don't claim that. |
3211 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; | 3193 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; |
3212 } | 3194 } |
3213 | 3195 |
3214 } // end of namespace Ice | 3196 } // end of namespace Ice |
OLD | NEW |