Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(396)

Side by Side Diff: src/IceTargetLoweringARM32.cpp

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix spelling and rebase Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===// 1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 /// 9 ///
10 /// \file 10 /// \file
(...skipping 29 matching lines...) Expand all
40 do { \ 40 do { \
41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \ 41 if (!static_cast<const ClFlags &>(Flags).getSkipUnimplemented()) { \
42 /* Use llvm_unreachable instead of report_fatal_error, which gives \ 42 /* Use llvm_unreachable instead of report_fatal_error, which gives \
43 better stack traces. */ \ 43 better stack traces. */ \
44 llvm_unreachable("Not yet implemented"); \ 44 llvm_unreachable("Not yet implemented"); \
45 abort(); \ 45 abort(); \
46 } \ 46 } \
47 } while (0) 47 } while (0)
48 48
49 // The following table summarizes the logic for lowering the icmp instruction 49 // The following table summarizes the logic for lowering the icmp instruction
50 // for i32 and narrower types. Each icmp condition has a clear mapping to an 50 // for i32 and narrower types. Each icmp condition has a clear mapping to an
51 // ARM32 conditional move instruction. 51 // ARM32 conditional move instruction.
52 52
53 const struct TableIcmp32_ { 53 const struct TableIcmp32_ {
54 CondARM32::Cond Mapping; 54 CondARM32::Cond Mapping;
55 } TableIcmp32[] = { 55 } TableIcmp32[] = {
56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ 56 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
57 { CondARM32::C_32 } \ 57 { CondARM32::C_32 } \
58 , 58 ,
59 ICMPARM32_TABLE 59 ICMPARM32_TABLE
60 #undef X 60 #undef X
61 }; 61 };
62 62
63 // The following table summarizes the logic for lowering the icmp instruction 63 // The following table summarizes the logic for lowering the icmp instruction
64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0. 64 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
65 // The operands may need to be swapped, and there is a slight difference 65 // The operands may need to be swapped, and there is a slight difference for
66 // for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc). 66 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
67 const struct TableIcmp64_ { 67 const struct TableIcmp64_ {
68 bool IsSigned; 68 bool IsSigned;
69 bool Swapped; 69 bool Swapped;
70 CondARM32::Cond C1, C2; 70 CondARM32::Cond C1, C2;
71 } TableIcmp64[] = { 71 } TableIcmp64[] = {
72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \ 72 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64) \
73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \ 73 { is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64 } \
74 , 74 ,
75 ICMPARM32_TABLE 75 ICMPARM32_TABLE
76 #undef X 76 #undef X
77 }; 77 };
78 78
79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) { 79 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
80 size_t Index = static_cast<size_t>(Cond); 80 size_t Index = static_cast<size_t>(Cond);
81 assert(Index < llvm::array_lengthof(TableIcmp32)); 81 assert(Index < llvm::array_lengthof(TableIcmp32));
82 return TableIcmp32[Index].Mapping; 82 return TableIcmp32[Index].Mapping;
83 } 83 }
84 84
85 // In some cases, there are x-macros tables for both high-level and 85 // In some cases, there are x-macros tables for both high-level and low-level
86 // low-level instructions/operands that use the same enum key value. 86 // instructions/operands that use the same enum key value. The tables are kept
87 // The tables are kept separate to maintain a proper separation 87 // separate to maintain a proper separation between abstraction layers. There
88 // between abstraction layers. There is a risk that the tables could 88 // is a risk that the tables could get out of sync if enum values are reordered
89 // get out of sync if enum values are reordered or if entries are 89 // or if entries are added or deleted. The following dummy namespaces use
90 // added or deleted. The following dummy namespaces use
91 // static_asserts to ensure everything is kept in sync. 90 // static_asserts to ensure everything is kept in sync.
92 91
93 // Validate the enum values in ICMPARM32_TABLE. 92 // Validate the enum values in ICMPARM32_TABLE.
94 namespace dummy1 { 93 namespace dummy1 {
95 // Define a temporary set of enum values based on low-level table 94 // Define a temporary set of enum values based on low-level table entries.
96 // entries.
97 enum _tmp_enum { 95 enum _tmp_enum {
98 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val, 96 #define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
99 ICMPARM32_TABLE 97 ICMPARM32_TABLE
100 #undef X 98 #undef X
101 _num 99 _num
102 }; 100 };
103 // Define a set of constants based on high-level table entries. 101 // Define a set of constants based on high-level table entries.
104 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; 102 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
105 ICEINSTICMP_TABLE 103 ICEINSTICMP_TABLE
106 #undef X 104 #undef X
107 // Define a set of constants based on low-level table entries, and 105 // Define a set of constants based on low-level table entries, and ensure the
108 // ensure the table entry keys are consistent. 106 // table entry keys are consistent.
109 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \ 107 #define X(val, signed, swapped64, C_32, C1_64, C2_64) \
110 static const int _table2_##val = _tmp_##val; \ 108 static const int _table2_##val = _tmp_##val; \
111 static_assert( \ 109 static_assert( \
112 _table1_##val == _table2_##val, \ 110 _table1_##val == _table2_##val, \
113 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); 111 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
114 ICMPARM32_TABLE 112 ICMPARM32_TABLE
115 #undef X 113 #undef X
116 // Repeat the static asserts with respect to the high-level table 114 // Repeat the static asserts with respect to the high-level table entries in
117 // entries in case the high-level table has extra entries. 115 // case the high-level table has extra entries.
118 #define X(tag, str) \ 116 #define X(tag, str) \
119 static_assert( \ 117 static_assert( \
120 _table1_##tag == _table2_##tag, \ 118 _table1_##tag == _table2_##tag, \
121 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE"); 119 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
122 ICEINSTICMP_TABLE 120 ICEINSTICMP_TABLE
123 #undef X 121 #undef X
124 } // end of namespace dummy1 122 } // end of namespace dummy1
125 123
126 // Stack alignment 124 // Stack alignment
127 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16; 125 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
128 126
129 // Value is in bytes. Return Value adjusted to the next highest multiple 127 // Value is in bytes. Return Value adjusted to the next highest multiple of the
130 // of the stack alignment. 128 // stack alignment.
131 uint32_t applyStackAlignment(uint32_t Value) { 129 uint32_t applyStackAlignment(uint32_t Value) {
132 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES); 130 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
133 } 131 }
134 132
135 // Value is in bytes. Return Value adjusted to the next highest multiple 133 // Value is in bytes. Return Value adjusted to the next highest multiple of the
136 // of the stack alignment required for the given type. 134 // stack alignment required for the given type.
137 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) { 135 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
138 // Use natural alignment, except that normally (non-NaCl) ARM only 136 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
139 // aligns vectors to 8 bytes. 137 // vectors to 8 bytes.
140 // TODO(jvoung): Check this ... 138 // TODO(jvoung): Check this ...
141 size_t typeAlignInBytes = typeWidthInBytes(Ty); 139 size_t typeAlignInBytes = typeWidthInBytes(Ty);
142 if (isVectorType(Ty)) 140 if (isVectorType(Ty))
143 typeAlignInBytes = 8; 141 typeAlignInBytes = 8;
144 return Utils::applyAlignment(Value, typeAlignInBytes); 142 return Utils::applyAlignment(Value, typeAlignInBytes);
145 } 143 }
146 144
147 // Conservatively check if at compile time we know that the operand is 145 // Conservatively check if at compile time we know that the operand is
148 // definitely a non-zero integer. 146 // definitely a non-zero integer.
149 bool isGuaranteedNonzeroInt(const Operand *Op) { 147 bool isGuaranteedNonzeroInt(const Operand *Op) {
(...skipping 15 matching lines...) Expand all
165 TargetInstructionSet::BaseInstructionSet) { 163 TargetInstructionSet::BaseInstructionSet) {
166 InstructionSet = static_cast<ARM32InstructionSet>( 164 InstructionSet = static_cast<ARM32InstructionSet>(
167 (Flags.getTargetInstructionSet() - 165 (Flags.getTargetInstructionSet() -
168 TargetInstructionSet::ARM32InstructionSet_Begin) + 166 TargetInstructionSet::ARM32InstructionSet_Begin) +
169 ARM32InstructionSet::Begin); 167 ARM32InstructionSet::Begin);
170 } 168 }
171 } 169 }
172 170
173 TargetARM32::TargetARM32(Cfg *Func) 171 TargetARM32::TargetARM32(Cfg *Func)
174 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) { 172 : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
175 // TODO: Don't initialize IntegerRegisters and friends every time. 173 // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
176 // Instead, initialize in some sort of static initializer for the 174 // initialize in some sort of static initializer for the class.
177 // class.
178 // Limit this size (or do all bitsets need to be the same width)??? 175 // Limit this size (or do all bitsets need to be the same width)???
179 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM); 176 llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
180 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM); 177 llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
181 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM); 178 llvm::SmallBitVector Float64Registers(RegARM32::Reg_NUM);
182 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM); 179 llvm::SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
183 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM); 180 llvm::SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
184 ScratchRegs.resize(RegARM32::Reg_NUM); 181 ScratchRegs.resize(RegARM32::Reg_NUM);
185 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \ 182 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
186 isFP32, isFP64, isVec128, alias_init) \ 183 isFP32, isFP64, isVec128, alias_init) \
187 IntegerRegisters[RegARM32::val] = isInt; \ 184 IntegerRegisters[RegARM32::val] = isInt; \
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
236 Func->dump("After Phi lowering"); 233 Func->dump("After Phi lowering");
237 } 234 }
238 235
239 // Address mode optimization. 236 // Address mode optimization.
240 Func->getVMetadata()->init(VMK_SingleDefs); 237 Func->getVMetadata()->init(VMK_SingleDefs);
241 Func->doAddressOpt(); 238 Func->doAddressOpt();
242 239
243 // Argument lowering 240 // Argument lowering
244 Func->doArgLowering(); 241 Func->doArgLowering();
245 242
246 // Target lowering. This requires liveness analysis for some parts 243 // Target lowering. This requires liveness analysis for some parts of the
247 // of the lowering decisions, such as compare/branch fusing. If 244 // lowering decisions, such as compare/branch fusing. If non-lightweight
248 // non-lightweight liveness analysis is used, the instructions need 245 // liveness analysis is used, the instructions need to be renumbered first.
249 // to be renumbered first. TODO: This renumbering should only be 246 // TODO: This renumbering should only be necessary if we're actually
250 // necessary if we're actually calculating live intervals, which we 247 // calculating live intervals, which we only do for register allocation.
251 // only do for register allocation.
252 Func->renumberInstructions(); 248 Func->renumberInstructions();
253 if (Func->hasError()) 249 if (Func->hasError())
254 return; 250 return;
255 251
256 // TODO: It should be sufficient to use the fastest liveness 252 // TODO: It should be sufficient to use the fastest liveness calculation,
257 // calculation, i.e. livenessLightweight(). However, for some 253 // i.e. livenessLightweight(). However, for some reason that slows down the
258 // reason that slows down the rest of the translation. Investigate. 254 // rest of the translation. Investigate.
259 Func->liveness(Liveness_Basic); 255 Func->liveness(Liveness_Basic);
260 if (Func->hasError()) 256 if (Func->hasError())
261 return; 257 return;
262 Func->dump("After ARM32 address mode opt"); 258 Func->dump("After ARM32 address mode opt");
263 259
264 Func->genCode(); 260 Func->genCode();
265 if (Func->hasError()) 261 if (Func->hasError())
266 return; 262 return;
267 Func->dump("After ARM32 codegen"); 263 Func->dump("After ARM32 codegen");
268 264
269 // Register allocation. This requires instruction renumbering and 265 // Register allocation. This requires instruction renumbering and full
270 // full liveness analysis. 266 // liveness analysis.
271 Func->renumberInstructions(); 267 Func->renumberInstructions();
272 if (Func->hasError()) 268 if (Func->hasError())
273 return; 269 return;
274 Func->liveness(Liveness_Intervals); 270 Func->liveness(Liveness_Intervals);
275 if (Func->hasError()) 271 if (Func->hasError())
276 return; 272 return;
277 // Validate the live range computations. The expensive validation 273 // Validate the live range computations. The expensive validation call is
278 // call is deliberately only made when assertions are enabled. 274 // deliberately only made when assertions are enabled.
279 assert(Func->validateLiveness()); 275 assert(Func->validateLiveness());
280 // The post-codegen dump is done here, after liveness analysis and 276 // The post-codegen dump is done here, after liveness analysis and associated
281 // associated cleanup, to make the dump cleaner and more useful. 277 // cleanup, to make the dump cleaner and more useful.
282 Func->dump("After initial ARM32 codegen"); 278 Func->dump("After initial ARM32 codegen");
283 Func->getVMetadata()->init(VMK_All); 279 Func->getVMetadata()->init(VMK_All);
284 regAlloc(RAK_Global); 280 regAlloc(RAK_Global);
285 if (Func->hasError()) 281 if (Func->hasError())
286 return; 282 return;
287 Func->dump("After linear scan regalloc"); 283 Func->dump("After linear scan regalloc");
288 284
289 if (Ctx->getFlags().getPhiEdgeSplit()) { 285 if (Ctx->getFlags().getPhiEdgeSplit()) {
290 Func->advancedPhiLowering(); 286 Func->advancedPhiLowering();
291 Func->dump("After advanced Phi lowering"); 287 Func->dump("After advanced Phi lowering");
292 } 288 }
293 289
294 // Stack frame mapping. 290 // Stack frame mapping.
295 Func->genFrame(); 291 Func->genFrame();
296 if (Func->hasError()) 292 if (Func->hasError())
297 return; 293 return;
298 Func->dump("After stack frame mapping"); 294 Func->dump("After stack frame mapping");
299 295
300 legalizeStackSlots(); 296 legalizeStackSlots();
301 if (Func->hasError()) 297 if (Func->hasError())
302 return; 298 return;
303 Func->dump("After legalizeStackSlots"); 299 Func->dump("After legalizeStackSlots");
304 300
305 Func->contractEmptyNodes(); 301 Func->contractEmptyNodes();
306 Func->reorderNodes(); 302 Func->reorderNodes();
307 303
308 // Branch optimization. This needs to be done just before code 304 // Branch optimization. This needs to be done just before code emission. In
309 // emission. In particular, no transformations that insert or 305 // particular, no transformations that insert or reorder CfgNodes should be
310 // reorder CfgNodes should be done after branch optimization. We go 306 // done after branch optimization. We go ahead and do it before nop insertion
311 // ahead and do it before nop insertion to reduce the amount of work 307 // to reduce the amount of work needed for searching for opportunities.
312 // needed for searching for opportunities.
313 Func->doBranchOpt(); 308 Func->doBranchOpt();
314 Func->dump("After branch optimization"); 309 Func->dump("After branch optimization");
315 310
316 // Nop insertion 311 // Nop insertion
317 if (Ctx->getFlags().shouldDoNopInsertion()) { 312 if (Ctx->getFlags().shouldDoNopInsertion()) {
318 Func->doNopInsertion(); 313 Func->doNopInsertion();
319 } 314 }
320 } 315 }
321 316
322 void TargetARM32::translateOm1() { 317 void TargetARM32::translateOm1() {
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
388 if (Ty == IceType_void) 383 if (Ty == IceType_void)
389 Ty = IceType_i32; 384 Ty = IceType_i32;
390 if (PhysicalRegisters[Ty].empty()) 385 if (PhysicalRegisters[Ty].empty())
391 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM); 386 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
392 assert(RegNum < PhysicalRegisters[Ty].size()); 387 assert(RegNum < PhysicalRegisters[Ty].size());
393 Variable *Reg = PhysicalRegisters[Ty][RegNum]; 388 Variable *Reg = PhysicalRegisters[Ty][RegNum];
394 if (Reg == nullptr) { 389 if (Reg == nullptr) {
395 Reg = Func->makeVariable(Ty); 390 Reg = Func->makeVariable(Ty);
396 Reg->setRegNum(RegNum); 391 Reg->setRegNum(RegNum);
397 PhysicalRegisters[Ty][RegNum] = Reg; 392 PhysicalRegisters[Ty][RegNum] = Reg;
398 // Specially mark SP and LR as an "argument" so that it is considered 393 // Specially mark SP and LR as an "argument" so that it is considered live
399 // live upon function entry. 394 // upon function entry.
400 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) { 395 if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
401 Func->addImplicitArg(Reg); 396 Func->addImplicitArg(Reg);
402 Reg->setIgnoreLiveness(); 397 Reg->setIgnoreLiveness();
403 } 398 }
404 } 399 }
405 return Reg; 400 return Reg;
406 } 401 }
407 402
408 void TargetARM32::emitJumpTable(const Cfg *Func, 403 void TargetARM32::emitJumpTable(const Cfg *Func,
409 const InstJumpTable *JumpTable) const { 404 const InstJumpTable *JumpTable) const {
(...skipping 28 matching lines...) Expand all
438 if (Offset != 0) { 433 if (Offset != 0) {
439 Str << ", " << getConstantPrefix() << Offset; 434 Str << ", " << getConstantPrefix() << Offset;
440 } 435 }
441 Str << "]"; 436 Str << "]";
442 } 437 }
443 438
444 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) { 439 bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
445 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 440 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
446 return false; 441 return false;
447 int32_t RegLo, RegHi; 442 int32_t RegLo, RegHi;
448 // Always start i64 registers at an even register, so this may end 443 // Always start i64 registers at an even register, so this may end up padding
449 // up padding away a register. 444 // away a register.
450 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2); 445 NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
451 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed; 446 RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
452 ++NumGPRRegsUsed; 447 ++NumGPRRegsUsed;
453 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed; 448 RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
454 ++NumGPRRegsUsed; 449 ++NumGPRRegsUsed;
455 // If this bumps us past the boundary, don't allocate to a register 450 // If this bumps us past the boundary, don't allocate to a register and leave
456 // and leave any previously speculatively consumed registers as consumed. 451 // any previously speculatively consumed registers as consumed.
457 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG) 452 if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
458 return false; 453 return false;
459 Regs->first = RegLo; 454 Regs->first = RegLo;
460 Regs->second = RegHi; 455 Regs->second = RegHi;
461 return true; 456 return true;
462 } 457 }
463 458
464 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) { 459 bool TargetARM32::CallingConv::I32InReg(int32_t *Reg) {
465 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG) 460 if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
466 return false; 461 return false;
467 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed; 462 *Reg = RegARM32::Reg_r0 + NumGPRRegsUsed;
468 ++NumGPRRegsUsed; 463 ++NumGPRRegsUsed;
469 return true; 464 return true;
470 } 465 }
471 466
472 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) { 467 bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
473 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS) 468 if (NumFPRegUnits >= ARM32_MAX_FP_REG_UNITS)
474 return false; 469 return false;
475 if (isVectorType(Ty)) { 470 if (isVectorType(Ty)) {
476 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4); 471 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
477 // Q registers are declared in reverse order, so 472 // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
478 // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract 473 // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from
479 // NumFPRegUnits from Reg_q0. Same thing goes for D registers. 474 // Reg_q0. Same thing goes for D registers.
480 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1, 475 static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
481 "ARM32 Q registers are possibly declared incorrectly."); 476 "ARM32 Q registers are possibly declared incorrectly.");
482 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4); 477 *Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
483 NumFPRegUnits += 4; 478 NumFPRegUnits += 4;
484 // If this bumps us past the boundary, don't allocate to a register 479 // If this bumps us past the boundary, don't allocate to a register and
485 // and leave any previously speculatively consumed registers as consumed. 480 // leave any previously speculatively consumed registers as consumed.
486 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) 481 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
487 return false; 482 return false;
488 } else if (Ty == IceType_f64) { 483 } else if (Ty == IceType_f64) {
489 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1, 484 static_assert(RegARM32::Reg_d0 > RegARM32::Reg_d1,
490 "ARM32 D registers are possibly declared incorrectly."); 485 "ARM32 D registers are possibly declared incorrectly.");
491 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2); 486 NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
492 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2); 487 *Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
493 NumFPRegUnits += 2; 488 NumFPRegUnits += 2;
494 // If this bumps us past the boundary, don't allocate to a register 489 // If this bumps us past the boundary, don't allocate to a register and
495 // and leave any previously speculatively consumed registers as consumed. 490 // leave any previously speculatively consumed registers as consumed.
496 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS) 491 if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
497 return false; 492 return false;
498 } else { 493 } else {
499 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1, 494 static_assert(RegARM32::Reg_s0 < RegARM32::Reg_s1,
500 "ARM32 S registers are possibly declared incorrectly."); 495 "ARM32 S registers are possibly declared incorrectly.");
501 assert(Ty == IceType_f32); 496 assert(Ty == IceType_f32);
502 *Reg = RegARM32::Reg_s0 + NumFPRegUnits; 497 *Reg = RegARM32::Reg_s0 + NumFPRegUnits;
503 ++NumFPRegUnits; 498 ++NumFPRegUnits;
504 } 499 }
505 return true; 500 return true;
506 } 501 }
507 502
508 void TargetARM32::lowerArguments() { 503 void TargetARM32::lowerArguments() {
509 VarList &Args = Func->getArgs(); 504 VarList &Args = Func->getArgs();
510 TargetARM32::CallingConv CC; 505 TargetARM32::CallingConv CC;
511 506
512 // For each register argument, replace Arg in the argument list with the 507 // For each register argument, replace Arg in the argument list with the home
513 // home register. Then generate an instruction in the prolog to copy the 508 // register. Then generate an instruction in the prolog to copy the home
514 // home register to the assigned location of Arg. 509 // register to the assigned location of Arg.
515 Context.init(Func->getEntryNode()); 510 Context.init(Func->getEntryNode());
516 Context.setInsertPoint(Context.getCur()); 511 Context.setInsertPoint(Context.getCur());
517 512
518 for (SizeT I = 0, E = Args.size(); I < E; ++I) { 513 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
519 Variable *Arg = Args[I]; 514 Variable *Arg = Args[I];
520 Type Ty = Arg->getType(); 515 Type Ty = Arg->getType();
521 if (Ty == IceType_i64) { 516 if (Ty == IceType_i64) {
522 std::pair<int32_t, int32_t> RegPair; 517 std::pair<int32_t, int32_t> RegPair;
523 if (!CC.I64InRegs(&RegPair)) 518 if (!CC.I64InRegs(&RegPair))
524 continue; 519 continue;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
561 556
562 Args[I] = RegisterArg; 557 Args[I] = RegisterArg;
563 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 558 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
564 continue; 559 continue;
565 } 560 }
566 } 561 }
567 } 562 }
568 563
569 // Helper function for addProlog(). 564 // Helper function for addProlog().
570 // 565 //
571 // This assumes Arg is an argument passed on the stack. This sets the 566 // This assumes Arg is an argument passed on the stack. This sets the frame
572 // frame offset for Arg and updates InArgsSizeBytes according to Arg's 567 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
573 // width. For an I64 arg that has been split into Lo and Hi components, 568 // I64 arg that has been split into Lo and Hi components, it calls itself
574 // it calls itself recursively on the components, taking care to handle 569 // recursively on the components, taking care to handle Lo first because of the
575 // Lo first because of the little-endian architecture. Lastly, this 570 // little-endian architecture. Lastly, this function generates an instruction
576 // function generates an instruction to copy Arg into its assigned 571 // to copy Arg into its assigned register if applicable.
577 // register if applicable.
578 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr, 572 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
579 size_t BasicFrameOffset, 573 size_t BasicFrameOffset,
580 size_t &InArgsSizeBytes) { 574 size_t &InArgsSizeBytes) {
581 Variable *Lo = Arg->getLo(); 575 Variable *Lo = Arg->getLo();
582 Variable *Hi = Arg->getHi(); 576 Variable *Hi = Arg->getHi();
583 Type Ty = Arg->getType(); 577 Type Ty = Arg->getType();
584 if (Lo && Hi && Ty == IceType_i64) { 578 if (Lo && Hi && Ty == IceType_i64) {
585 assert(Lo->getType() != IceType_i64); // don't want infinite recursion 579 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
586 assert(Hi->getType() != IceType_i64); // don't want infinite recursion 580 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
587 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); 581 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
588 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); 582 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
589 return; 583 return;
590 } 584 }
591 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty); 585 InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
592 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); 586 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
593 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); 587 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
594 // If the argument variable has been assigned a register, we need to load 588 // If the argument variable has been assigned a register, we need to load the
595 // the value from the stack slot. 589 // value from the stack slot.
596 if (Arg->hasReg()) { 590 if (Arg->hasReg()) {
597 assert(Ty != IceType_i64); 591 assert(Ty != IceType_i64);
598 OperandARM32Mem *Mem = OperandARM32Mem::create( 592 OperandARM32Mem *Mem = OperandARM32Mem::create(
599 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>( 593 Func, Ty, FramePtr, llvm::cast<ConstantInteger32>(
600 Ctx->getConstantInt32(Arg->getStackOffset()))); 594 Ctx->getConstantInt32(Arg->getStackOffset())));
601 if (isVectorType(Arg->getType())) { 595 if (isVectorType(Arg->getType())) {
602 // Use vld1.$elem or something? 596 // Use vld1.$elem or something?
603 UnimplementedError(Func->getContext()->getFlags()); 597 UnimplementedError(Func->getContext()->getFlags());
604 } else if (isFloatingType(Arg->getType())) { 598 } else if (isFloatingType(Arg->getType())) {
605 _vldr(Arg, Mem); 599 _vldr(Arg, Mem);
606 } else { 600 } else {
607 _ldr(Arg, Mem); 601 _ldr(Arg, Mem);
608 } 602 }
609 // This argument-copying instruction uses an explicit 603 // This argument-copying instruction uses an explicit OperandARM32Mem
610 // OperandARM32Mem operand instead of a Variable, so its 604 // operand instead of a Variable, so its fill-from-stack operation has to
611 // fill-from-stack operation has to be tracked separately for 605 // be tracked separately for statistics.
612 // statistics.
613 Ctx->statsUpdateFills(); 606 Ctx->statsUpdateFills();
614 } 607 }
615 } 608 }
616 609
617 Type TargetARM32::stackSlotType() { return IceType_i32; } 610 Type TargetARM32::stackSlotType() { return IceType_i32; }
618 611
619 void TargetARM32::addProlog(CfgNode *Node) { 612 void TargetARM32::addProlog(CfgNode *Node) {
620 // Stack frame layout: 613 // Stack frame layout:
621 // 614 //
622 // +------------------------+ 615 // +------------------------+
(...skipping 12 matching lines...) Expand all
635 // | 7. allocas | 628 // | 7. allocas |
636 // +------------------------+ <--- StackPointer 629 // +------------------------+ <--- StackPointer
637 // 630 //
638 // The following variables record the size in bytes of the given areas: 631 // The following variables record the size in bytes of the given areas:
639 // * PreservedRegsSizeBytes: area 1 632 // * PreservedRegsSizeBytes: area 1
640 // * SpillAreaPaddingBytes: area 2 633 // * SpillAreaPaddingBytes: area 2
641 // * GlobalsSize: area 3 634 // * GlobalsSize: area 3
642 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4 635 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
643 // * LocalsSpillAreaSize: area 5 636 // * LocalsSpillAreaSize: area 5
644 // * SpillAreaSizeBytes: areas 2 - 6 637 // * SpillAreaSizeBytes: areas 2 - 6
645 // Determine stack frame offsets for each Variable without a 638 // Determine stack frame offsets for each Variable without a register
646 // register assignment. This can be done as one variable per stack 639 // assignment. This can be done as one variable per stack slot. Or, do
647 // slot. Or, do coalescing by running the register allocator again 640 // coalescing by running the register allocator again with an infinite set of
648 // with an infinite set of registers (as a side effect, this gives 641 // registers (as a side effect, this gives variables a second chance at
649 // variables a second chance at physical register assignment). 642 // physical register assignment).
650 // 643 //
651 // A middle ground approach is to leverage sparsity and allocate one 644 // A middle ground approach is to leverage sparsity and allocate one block of
652 // block of space on the frame for globals (variables with 645 // space on the frame for globals (variables with multi-block lifetime), and
653 // multi-block lifetime), and one block to share for locals 646 // one block to share for locals (single-block lifetime).
654 // (single-block lifetime).
655 647
656 Context.init(Node); 648 Context.init(Node);
657 Context.setInsertPoint(Context.getCur()); 649 Context.setInsertPoint(Context.getCur());
658 650
659 llvm::SmallBitVector CalleeSaves = 651 llvm::SmallBitVector CalleeSaves =
660 getRegisterSet(RegSet_CalleeSave, RegSet_None); 652 getRegisterSet(RegSet_CalleeSave, RegSet_None);
661 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); 653 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
662 VarList SortedSpilledVariables; 654 VarList SortedSpilledVariables;
663 size_t GlobalsSize = 0; 655 size_t GlobalsSize = 0;
664 // If there is a separate locals area, this represents that area. 656 // If there is a separate locals area, this represents that area. Otherwise
665 // Otherwise it counts any variable not counted by GlobalsSize. 657 // it counts any variable not counted by GlobalsSize.
666 SpillAreaSizeBytes = 0; 658 SpillAreaSizeBytes = 0;
667 // If there is a separate locals area, this specifies the alignment 659 // If there is a separate locals area, this specifies the alignment for it.
668 // for it.
669 uint32_t LocalsSlotsAlignmentBytes = 0; 660 uint32_t LocalsSlotsAlignmentBytes = 0;
670 // The entire spill locations area gets aligned to largest natural 661 // The entire spill locations area gets aligned to largest natural alignment
671 // alignment of the variables that have a spill slot. 662 // of the variables that have a spill slot.
672 uint32_t SpillAreaAlignmentBytes = 0; 663 uint32_t SpillAreaAlignmentBytes = 0;
673 // For now, we don't have target-specific variables that need special 664 // For now, we don't have target-specific variables that need special
674 // treatment (no stack-slot-linked SpillVariable type). 665 // treatment (no stack-slot-linked SpillVariable type).
675 std::function<bool(Variable *)> TargetVarHook = 666 std::function<bool(Variable *)> TargetVarHook =
676 [](Variable *) { return false; }; 667 [](Variable *) { return false; };
677 668
678 // Compute the list of spilled variables and bounds for GlobalsSize, etc. 669 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
679 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize, 670 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
680 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes, 671 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
681 &LocalsSlotsAlignmentBytes, TargetVarHook); 672 &LocalsSlotsAlignmentBytes, TargetVarHook);
682 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes; 673 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
683 SpillAreaSizeBytes += GlobalsSize; 674 SpillAreaSizeBytes += GlobalsSize;
684 675
685 // Add push instructions for preserved registers. 676 // Add push instructions for preserved registers. On ARM, "push" can push a
686 // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15). 677 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
687 // Unlike x86, ARM also has callee-saved float/vector registers. 678 // callee-saved float/vector registers. The "vpush" instruction can handle a
688 // The "vpush" instruction can handle a whole list of float/vector 679 // whole list of float/vector registers, but it only handles contiguous
689 // registers, but it only handles contiguous sequences of registers 680 // sequences of registers by specifying the start and the length.
690 // by specifying the start and the length.
691 VarList GPRsToPreserve; 681 VarList GPRsToPreserve;
692 GPRsToPreserve.reserve(CalleeSaves.size()); 682 GPRsToPreserve.reserve(CalleeSaves.size());
693 uint32_t NumCallee = 0; 683 uint32_t NumCallee = 0;
694 size_t PreservedRegsSizeBytes = 0; 684 size_t PreservedRegsSizeBytes = 0;
695 // Consider FP and LR as callee-save / used as needed. 685 // Consider FP and LR as callee-save / used as needed.
696 if (UsesFramePointer) { 686 if (UsesFramePointer) {
697 CalleeSaves[RegARM32::Reg_fp] = true; 687 CalleeSaves[RegARM32::Reg_fp] = true;
698 assert(RegsUsed[RegARM32::Reg_fp] == false); 688 assert(RegsUsed[RegARM32::Reg_fp] == false);
699 RegsUsed[RegARM32::Reg_fp] = true; 689 RegsUsed[RegARM32::Reg_fp] = true;
700 } 690 }
701 if (!MaybeLeafFunc) { 691 if (!MaybeLeafFunc) {
702 CalleeSaves[RegARM32::Reg_lr] = true; 692 CalleeSaves[RegARM32::Reg_lr] = true;
703 RegsUsed[RegARM32::Reg_lr] = true; 693 RegsUsed[RegARM32::Reg_lr] = true;
704 } 694 }
705 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 695 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
706 if (CalleeSaves[i] && RegsUsed[i]) { 696 if (CalleeSaves[i] && RegsUsed[i]) {
707 // TODO(jvoung): do separate vpush for each floating point 697 // TODO(jvoung): do separate vpush for each floating point register
708 // register segment and += 4, or 8 depending on type. 698 // segment and += 4, or 8 depending on type.
709 ++NumCallee; 699 ++NumCallee;
710 PreservedRegsSizeBytes += 4; 700 PreservedRegsSizeBytes += 4;
711 GPRsToPreserve.push_back(getPhysicalRegister(i)); 701 GPRsToPreserve.push_back(getPhysicalRegister(i));
712 } 702 }
713 } 703 }
714 Ctx->statsUpdateRegistersSaved(NumCallee); 704 Ctx->statsUpdateRegistersSaved(NumCallee);
715 if (!GPRsToPreserve.empty()) 705 if (!GPRsToPreserve.empty())
716 _push(GPRsToPreserve); 706 _push(GPRsToPreserve);
717 707
718 // Generate "mov FP, SP" if needed. 708 // Generate "mov FP, SP" if needed.
719 if (UsesFramePointer) { 709 if (UsesFramePointer) {
720 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 710 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
721 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 711 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
722 _mov(FP, SP); 712 _mov(FP, SP);
723 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode). 713 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
724 Context.insert(InstFakeUse::create(Func, FP)); 714 Context.insert(InstFakeUse::create(Func, FP));
725 } 715 }
726 716
727 // Align the variables area. SpillAreaPaddingBytes is the size of 717 // Align the variables area. SpillAreaPaddingBytes is the size of the region
728 // the region after the preserved registers and before the spill areas. 718 // after the preserved registers and before the spill areas.
729 // LocalsSlotsPaddingBytes is the amount of padding between the globals 719 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
730 // and locals area if they are separate. 720 // locals area if they are separate.
731 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES); 721 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
732 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); 722 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
733 uint32_t SpillAreaPaddingBytes = 0; 723 uint32_t SpillAreaPaddingBytes = 0;
734 uint32_t LocalsSlotsPaddingBytes = 0; 724 uint32_t LocalsSlotsPaddingBytes = 0;
735 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes, 725 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
736 GlobalsSize, LocalsSlotsAlignmentBytes, 726 GlobalsSize, LocalsSlotsAlignmentBytes,
737 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes); 727 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
738 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes; 728 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
739 uint32_t GlobalsAndSubsequentPaddingSize = 729 uint32_t GlobalsAndSubsequentPaddingSize =
740 GlobalsSize + LocalsSlotsPaddingBytes; 730 GlobalsSize + LocalsSlotsPaddingBytes;
(...skipping 10 matching lines...) Expand all
751 // Use the scratch register if needed to legalize the immediate. 741 // Use the scratch register if needed to legalize the immediate.
752 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 742 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
753 Legal_Reg | Legal_Flex, getReservedTmpReg()); 743 Legal_Reg | Legal_Flex, getReservedTmpReg());
754 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 744 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
755 _sub(SP, SP, SubAmount); 745 _sub(SP, SP, SubAmount);
756 } 746 }
757 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 747 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
758 748
759 resetStackAdjustment(); 749 resetStackAdjustment();
760 750
761 // Fill in stack offsets for stack args, and copy args into registers 751 // Fill in stack offsets for stack args, and copy args into registers for
762 // for those that were register-allocated. Args are pushed right to 752 // those that were register-allocated. Args are pushed right to left, so
763 // left, so Arg[0] is closest to the stack/frame pointer. 753 // Arg[0] is closest to the stack/frame pointer.
764 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 754 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
765 size_t BasicFrameOffset = PreservedRegsSizeBytes; 755 size_t BasicFrameOffset = PreservedRegsSizeBytes;
766 if (!UsesFramePointer) 756 if (!UsesFramePointer)
767 BasicFrameOffset += SpillAreaSizeBytes; 757 BasicFrameOffset += SpillAreaSizeBytes;
768 758
769 const VarList &Args = Func->getArgs(); 759 const VarList &Args = Func->getArgs();
770 size_t InArgsSizeBytes = 0; 760 size_t InArgsSizeBytes = 0;
771 TargetARM32::CallingConv CC; 761 TargetARM32::CallingConv CC;
772 for (Variable *Arg : Args) { 762 for (Variable *Arg : Args) {
773 Type Ty = Arg->getType(); 763 Type Ty = Arg->getType();
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
823 void TargetARM32::addEpilog(CfgNode *Node) { 813 void TargetARM32::addEpilog(CfgNode *Node) {
824 InstList &Insts = Node->getInsts(); 814 InstList &Insts = Node->getInsts();
825 InstList::reverse_iterator RI, E; 815 InstList::reverse_iterator RI, E;
826 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { 816 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
827 if (llvm::isa<InstARM32Ret>(*RI)) 817 if (llvm::isa<InstARM32Ret>(*RI))
828 break; 818 break;
829 } 819 }
830 if (RI == E) 820 if (RI == E)
831 return; 821 return;
832 822
833 // Convert the reverse_iterator position into its corresponding 823 // Convert the reverse_iterator position into its corresponding (forward)
834 // (forward) iterator position. 824 // iterator position.
835 InstList::iterator InsertPoint = RI.base(); 825 InstList::iterator InsertPoint = RI.base();
836 --InsertPoint; 826 --InsertPoint;
837 Context.init(Node); 827 Context.init(Node);
838 Context.setInsertPoint(InsertPoint); 828 Context.setInsertPoint(InsertPoint);
839 829
840 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 830 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
841 if (UsesFramePointer) { 831 if (UsesFramePointer) {
842 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp); 832 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
843 // For late-stage liveness analysis (e.g. asm-verbose mode), 833 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
844 // adding a fake use of SP before the assignment of SP=FP keeps 834 // use of SP before the assignment of SP=FP keeps previous SP adjustments
845 // previous SP adjustments from being dead-code eliminated. 835 // from being dead-code eliminated.
846 Context.insert(InstFakeUse::create(Func, SP)); 836 Context.insert(InstFakeUse::create(Func, SP));
847 _mov(SP, FP); 837 _mov(SP, FP);
848 } else { 838 } else {
849 // add SP, SpillAreaSizeBytes 839 // add SP, SpillAreaSizeBytes
850 if (SpillAreaSizeBytes) { 840 if (SpillAreaSizeBytes) {
851 // Use the scratch register if needed to legalize the immediate. 841 // Use the scratch register if needed to legalize the immediate.
852 Operand *AddAmount = 842 Operand *AddAmount =
853 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), 843 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
854 Legal_Reg | Legal_Flex, getReservedTmpReg()); 844 Legal_Reg | Legal_Flex, getReservedTmpReg());
855 _add(SP, SP, AddAmount); 845 _add(SP, SP, AddAmount);
856 } 846 }
857 } 847 }
858 848
859 // Add pop instructions for preserved registers. 849 // Add pop instructions for preserved registers.
860 llvm::SmallBitVector CalleeSaves = 850 llvm::SmallBitVector CalleeSaves =
861 getRegisterSet(RegSet_CalleeSave, RegSet_None); 851 getRegisterSet(RegSet_CalleeSave, RegSet_None);
862 VarList GPRsToRestore; 852 VarList GPRsToRestore;
863 GPRsToRestore.reserve(CalleeSaves.size()); 853 GPRsToRestore.reserve(CalleeSaves.size());
864 // Consider FP and LR as callee-save / used as needed. 854 // Consider FP and LR as callee-save / used as needed.
865 if (UsesFramePointer) { 855 if (UsesFramePointer) {
866 CalleeSaves[RegARM32::Reg_fp] = true; 856 CalleeSaves[RegARM32::Reg_fp] = true;
867 } 857 }
868 if (!MaybeLeafFunc) { 858 if (!MaybeLeafFunc) {
869 CalleeSaves[RegARM32::Reg_lr] = true; 859 CalleeSaves[RegARM32::Reg_lr] = true;
870 } 860 }
871 // Pop registers in ascending order just like push 861 // Pop registers in ascending order just like push (instead of in reverse
872 // (instead of in reverse order). 862 // order).
873 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 863 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
874 if (CalleeSaves[i] && RegsUsed[i]) { 864 if (CalleeSaves[i] && RegsUsed[i]) {
875 GPRsToRestore.push_back(getPhysicalRegister(i)); 865 GPRsToRestore.push_back(getPhysicalRegister(i));
876 } 866 }
877 } 867 }
878 if (!GPRsToRestore.empty()) 868 if (!GPRsToRestore.empty())
879 _pop(GPRsToRestore); 869 _pop(GPRsToRestore);
880 870
881 if (!Ctx->getFlags().getUseSandboxing()) 871 if (!Ctx->getFlags().getUseSandboxing())
882 return; 872 return;
(...skipping 13 matching lines...) Expand all
896 RetValue = llvm::cast<Variable>(RI->getSrc(0)); 886 RetValue = llvm::cast<Variable>(RI->getSrc(0));
897 _bundle_lock(); 887 _bundle_lock();
898 _bic(LR, LR, RetMask); 888 _bic(LR, LR, RetMask);
899 _ret(LR, RetValue); 889 _ret(LR, RetValue);
900 _bundle_unlock(); 890 _bundle_unlock();
901 RI->setDeleted(); 891 RI->setDeleted();
902 } 892 }
903 893
904 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const { 894 bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
905 constexpr bool SignExt = false; 895 constexpr bool SignExt = false;
906 // TODO(jvoung): vldr of FP stack slots has a different limit from the 896 // TODO(jvoung): vldr of FP stack slots has a different limit from the plain
907 // plain stackSlotType(). 897 // stackSlotType().
908 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset); 898 return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
909 } 899 }
910 900
911 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var, 901 StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
912 Variable *OrigBaseReg) { 902 Variable *OrigBaseReg) {
913 int32_t Offset = Var->getStackOffset(); 903 int32_t Offset = Var->getStackOffset();
914 // Legalize will likely need a movw/movt combination, but if the top 904 // Legalize will likely need a movw/movt combination, but if the top bits are
915 // bits are all 0 from negating the offset and subtracting, we could 905 // all 0 from negating the offset and subtracting, we could use that instead.
916 // use that instead.
917 bool ShouldSub = (-Offset & 0xFFFF0000) == 0; 906 bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
918 if (ShouldSub) 907 if (ShouldSub)
919 Offset = -Offset; 908 Offset = -Offset;
920 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset), 909 Operand *OffsetVal = legalize(Ctx->getConstantInt32(Offset),
921 Legal_Reg | Legal_Flex, getReservedTmpReg()); 910 Legal_Reg | Legal_Flex, getReservedTmpReg());
922 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg()); 911 Variable *ScratchReg = makeReg(IceType_i32, getReservedTmpReg());
923 if (ShouldSub) 912 if (ShouldSub)
924 _sub(ScratchReg, OrigBaseReg, OffsetVal); 913 _sub(ScratchReg, OrigBaseReg, OffsetVal);
925 else 914 else
926 _add(ScratchReg, OrigBaseReg, OffsetVal); 915 _add(ScratchReg, OrigBaseReg, OffsetVal);
(...skipping 15 matching lines...) Expand all
942 // 931 //
943 // This is safe because we have reserved TMP, and add for ARM does not 932 // This is safe because we have reserved TMP, and add for ARM does not
944 // clobber the flags register. 933 // clobber the flags register.
945 Func->dump("Before legalizeStackSlots"); 934 Func->dump("Before legalizeStackSlots");
946 assert(hasComputedFrame()); 935 assert(hasComputedFrame());
947 // Early exit, if SpillAreaSizeBytes is really small. 936 // Early exit, if SpillAreaSizeBytes is really small.
948 if (isLegalVariableStackOffset(SpillAreaSizeBytes)) 937 if (isLegalVariableStackOffset(SpillAreaSizeBytes))
949 return; 938 return;
950 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg()); 939 Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
951 int32_t StackAdjust = 0; 940 int32_t StackAdjust = 0;
952 // Do a fairly naive greedy clustering for now. Pick the first stack slot 941 // Do a fairly naive greedy clustering for now. Pick the first stack slot
953 // that's out of bounds and make a new base reg using the architecture's temp 942 // that's out of bounds and make a new base reg using the architecture's temp
954 // register. If that works for the next slot, then great. Otherwise, create 943 // register. If that works for the next slot, then great. Otherwise, create a
955 // a new base register, clobbering the previous base register. Never share a 944 // new base register, clobbering the previous base register. Never share a
956 // base reg across different basic blocks. This isn't ideal if local and 945 // base reg across different basic blocks. This isn't ideal if local and
957 // multi-block variables are far apart and their references are interspersed. 946 // multi-block variables are far apart and their references are interspersed.
958 // It may help to be more coordinated about assign stack slot numbers 947 // It may help to be more coordinated about assign stack slot numbers and may
959 // and may help to assign smaller offsets to higher-weight variables 948 // help to assign smaller offsets to higher-weight variables so that they
960 // so that they don't depend on this legalization. 949 // don't depend on this legalization.
961 for (CfgNode *Node : Func->getNodes()) { 950 for (CfgNode *Node : Func->getNodes()) {
962 Context.init(Node); 951 Context.init(Node);
963 StackVariable *NewBaseReg = nullptr; 952 StackVariable *NewBaseReg = nullptr;
964 int32_t NewBaseOffset = 0; 953 int32_t NewBaseOffset = 0;
965 while (!Context.atEnd()) { 954 while (!Context.atEnd()) {
966 PostIncrLoweringContext PostIncrement(Context); 955 PostIncrLoweringContext PostIncrement(Context);
967 Inst *CurInstr = Context.getCur(); 956 Inst *CurInstr = Context.getCur();
968 Variable *Dest = CurInstr->getDest(); 957 Variable *Dest = CurInstr->getDest();
969 // Check if the previous NewBaseReg is clobbered, and reset if needed. 958 // Check if the previous NewBaseReg is clobbered, and reset if needed.
970 if ((Dest && NewBaseReg && Dest->hasReg() && 959 if ((Dest && NewBaseReg && Dest->hasReg() &&
971 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) || 960 Dest->getRegNum() == NewBaseReg->getBaseRegNum()) ||
972 llvm::isa<InstFakeKill>(CurInstr)) { 961 llvm::isa<InstFakeKill>(CurInstr)) {
973 NewBaseReg = nullptr; 962 NewBaseReg = nullptr;
974 NewBaseOffset = 0; 963 NewBaseOffset = 0;
975 } 964 }
976 // The stack adjustment only matters if we are using SP instead of FP. 965 // The stack adjustment only matters if we are using SP instead of FP.
977 if (!hasFramePointer()) { 966 if (!hasFramePointer()) {
978 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) { 967 if (auto *AdjInst = llvm::dyn_cast<InstARM32AdjustStack>(CurInstr)) {
979 StackAdjust += AdjInst->getAmount(); 968 StackAdjust += AdjInst->getAmount();
980 NewBaseOffset += AdjInst->getAmount(); 969 NewBaseOffset += AdjInst->getAmount();
981 continue; 970 continue;
982 } 971 }
983 if (llvm::isa<InstARM32Call>(CurInstr)) { 972 if (llvm::isa<InstARM32Call>(CurInstr)) {
984 NewBaseOffset -= StackAdjust; 973 NewBaseOffset -= StackAdjust;
985 StackAdjust = 0; 974 StackAdjust = 0;
986 continue; 975 continue;
987 } 976 }
988 } 977 }
989 // For now, only Mov instructions can have stack variables. We need to 978 // For now, only Mov instructions can have stack variables. We need to
990 // know the type of instruction because we currently create a fresh one 979 // know the type of instruction because we currently create a fresh one
991 // to replace Dest/Source, rather than mutate in place. 980 // to replace Dest/Source, rather than mutate in place.
992 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr); 981 auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
993 if (!MovInst) { 982 if (!MovInst) {
994 continue; 983 continue;
995 } 984 }
996 if (!Dest->hasReg()) { 985 if (!Dest->hasReg()) {
997 int32_t Offset = Dest->getStackOffset(); 986 int32_t Offset = Dest->getStackOffset();
998 Offset += StackAdjust; 987 Offset += StackAdjust;
999 if (!isLegalVariableStackOffset(Offset)) { 988 if (!isLegalVariableStackOffset(Offset)) {
(...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after
1110 return Operand; 1099 return Operand;
1111 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) { 1100 if (auto *Var = llvm::dyn_cast<Variable>(Operand)) {
1112 split64(Var); 1101 split64(Var);
1113 return Var->getHi(); 1102 return Var->getHi();
1114 } 1103 }
1115 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) { 1104 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
1116 return Ctx->getConstantInt32( 1105 return Ctx->getConstantInt32(
1117 static_cast<uint32_t>(Const->getValue() >> 32)); 1106 static_cast<uint32_t>(Const->getValue() >> 32));
1118 } 1107 }
1119 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) { 1108 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
1120 // Conservatively disallow memory operands with side-effects 1109 // Conservatively disallow memory operands with side-effects in case of
1121 // in case of duplication. 1110 // duplication.
1122 assert(Mem->getAddrMode() == OperandARM32Mem::Offset || 1111 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
1123 Mem->getAddrMode() == OperandARM32Mem::NegOffset); 1112 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
1124 const Type SplitType = IceType_i32; 1113 const Type SplitType = IceType_i32;
1125 if (Mem->isRegReg()) { 1114 if (Mem->isRegReg()) {
1126 // We have to make a temp variable T, and add 4 to either Base or Index. 1115 // We have to make a temp variable T, and add 4 to either Base or Index.
1127 // The Index may be shifted, so adding 4 can mean something else. 1116 // The Index may be shifted, so adding 4 can mean something else. Thus,
1128 // Thus, prefer T := Base + 4, and use T as the new Base. 1117 // prefer T := Base + 4, and use T as the new Base.
1129 Variable *Base = Mem->getBase(); 1118 Variable *Base = Mem->getBase();
1130 Constant *Four = Ctx->getConstantInt32(4); 1119 Constant *Four = Ctx->getConstantInt32(4);
1131 Variable *NewBase = Func->makeVariable(Base->getType()); 1120 Variable *NewBase = Func->makeVariable(Base->getType());
1132 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase, 1121 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
1133 Base, Four)); 1122 Base, Four));
1134 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(), 1123 return OperandARM32Mem::create(Func, SplitType, NewBase, Mem->getIndex(),
1135 Mem->getShiftOp(), Mem->getShiftAmt(), 1124 Mem->getShiftOp(), Mem->getShiftAmt(),
1136 Mem->getAddrMode()); 1125 Mem->getAddrMode());
1137 } else { 1126 } else {
1138 Variable *Base = Mem->getBase(); 1127 Variable *Base = Mem->getBase();
1139 ConstantInteger32 *Offset = Mem->getOffset(); 1128 ConstantInteger32 *Offset = Mem->getOffset();
1140 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4)); 1129 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
1141 int32_t NextOffsetVal = Offset->getValue() + 4; 1130 int32_t NextOffsetVal = Offset->getValue() + 4;
1142 const bool SignExt = false; 1131 const bool SignExt = false;
1143 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) { 1132 if (!OperandARM32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
1144 // We have to make a temp variable and add 4 to either Base or Offset. 1133 // We have to make a temp variable and add 4 to either Base or Offset.
1145 // If we add 4 to Offset, this will convert a non-RegReg addressing 1134 // If we add 4 to Offset, this will convert a non-RegReg addressing
1146 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows 1135 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
1147 // RegReg addressing modes, prefer adding to base and replacing instead. 1136 // RegReg addressing modes, prefer adding to base and replacing
1148 // Thus we leave the old offset alone. 1137 // instead. Thus we leave the old offset alone.
1149 Constant *Four = Ctx->getConstantInt32(4); 1138 Constant *Four = Ctx->getConstantInt32(4);
1150 Variable *NewBase = Func->makeVariable(Base->getType()); 1139 Variable *NewBase = Func->makeVariable(Base->getType());
1151 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, 1140 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
1152 NewBase, Base, Four)); 1141 NewBase, Base, Four));
1153 Base = NewBase; 1142 Base = NewBase;
1154 } else { 1143 } else {
1155 Offset = 1144 Offset =
1156 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal)); 1145 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
1157 } 1146 }
1158 return OperandARM32Mem::create(Func, SplitType, Base, Offset, 1147 return OperandARM32Mem::create(Func, SplitType, Base, Offset,
(...skipping 29 matching lines...) Expand all
1188 1177
1189 REGARM32_TABLE 1178 REGARM32_TABLE
1190 1179
1191 #undef X 1180 #undef X
1192 1181
1193 return Registers; 1182 return Registers;
1194 } 1183 }
1195 1184
1196 void TargetARM32::lowerAlloca(const InstAlloca *Inst) { 1185 void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
1197 UsesFramePointer = true; 1186 UsesFramePointer = true;
1198 // Conservatively require the stack to be aligned. Some stack 1187 // Conservatively require the stack to be aligned. Some stack adjustment
1199 // adjustment operations implemented below assume that the stack is 1188 // operations implemented below assume that the stack is aligned before the
1200 // aligned before the alloca. All the alloca code ensures that the 1189 // alloca. All the alloca code ensures that the stack alignment is preserved
1201 // stack alignment is preserved after the alloca. The stack alignment 1190 // after the alloca. The stack alignment restriction can be relaxed in some
1202 // restriction can be relaxed in some cases. 1191 // cases.
1203 NeedsStackAlignment = true; 1192 NeedsStackAlignment = true;
1204 1193
1205 // TODO(stichnot): minimize the number of adjustments of SP, etc. 1194 // TODO(stichnot): minimize the number of adjustments of SP, etc.
1206 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1195 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1207 Variable *Dest = Inst->getDest(); 1196 Variable *Dest = Inst->getDest();
1208 uint32_t AlignmentParam = Inst->getAlignInBytes(); 1197 uint32_t AlignmentParam = Inst->getAlignInBytes();
1209 // For default align=0, set it to the real value 1, to avoid any 1198 // For default align=0, set it to the real value 1, to avoid any
1210 // bit-manipulation problems below. 1199 // bit-manipulation problems below.
1211 AlignmentParam = std::max(AlignmentParam, 1u); 1200 AlignmentParam = std::max(AlignmentParam, 1u);
1212 1201
1213 // LLVM enforces power of 2 alignment. 1202 // LLVM enforces power of 2 alignment.
1214 assert(llvm::isPowerOf2_32(AlignmentParam)); 1203 assert(llvm::isPowerOf2_32(AlignmentParam));
1215 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES)); 1204 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
1216 1205
1217 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES); 1206 uint32_t Alignment = std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
1218 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) { 1207 if (Alignment > ARM32_STACK_ALIGNMENT_BYTES) {
1219 alignRegisterPow2(SP, Alignment); 1208 alignRegisterPow2(SP, Alignment);
1220 } 1209 }
1221 Operand *TotalSize = Inst->getSizeInBytes(); 1210 Operand *TotalSize = Inst->getSizeInBytes();
1222 if (const auto *ConstantTotalSize = 1211 if (const auto *ConstantTotalSize =
1223 llvm::dyn_cast<ConstantInteger32>(TotalSize)) { 1212 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
1224 uint32_t Value = ConstantTotalSize->getValue(); 1213 uint32_t Value = ConstantTotalSize->getValue();
1225 Value = Utils::applyAlignment(Value, Alignment); 1214 Value = Utils::applyAlignment(Value, Alignment);
1226 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value)); 1215 Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
1227 _sub(SP, SP, SubAmount); 1216 _sub(SP, SP, SubAmount);
1228 } else { 1217 } else {
1229 // Non-constant sizes need to be adjusted to the next highest 1218 // Non-constant sizes need to be adjusted to the next highest multiple of
1230 // multiple of the required alignment at runtime. 1219 // the required alignment at runtime.
1231 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex); 1220 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
1232 Variable *T = makeReg(IceType_i32); 1221 Variable *T = makeReg(IceType_i32);
1233 _mov(T, TotalSize); 1222 _mov(T, TotalSize);
1234 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); 1223 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
1235 _add(T, T, AddAmount); 1224 _add(T, T, AddAmount);
1236 alignRegisterPow2(T, Alignment); 1225 alignRegisterPow2(T, Alignment);
1237 _sub(SP, SP, T); 1226 _sub(SP, SP, T);
1238 } 1227 }
1239 _mov(Dest, SP); 1228 _mov(Dest, SP);
1240 } 1229 }
(...skipping 17 matching lines...) Expand all
1258 _tst(SrcLoReg, Mask); 1247 _tst(SrcLoReg, Mask);
1259 break; 1248 break;
1260 } 1249 }
1261 case IceType_i32: { 1250 case IceType_i32: {
1262 _tst(SrcLoReg, SrcLoReg); 1251 _tst(SrcLoReg, SrcLoReg);
1263 break; 1252 break;
1264 } 1253 }
1265 case IceType_i64: { 1254 case IceType_i64: {
1266 Variable *ScratchReg = makeReg(IceType_i32); 1255 Variable *ScratchReg = makeReg(IceType_i32);
1267 _orrs(ScratchReg, SrcLoReg, SrcHi); 1256 _orrs(ScratchReg, SrcLoReg, SrcHi);
1268 // ScratchReg isn't going to be used, but we need the 1257 // ScratchReg isn't going to be used, but we need the side-effect of
1269 // side-effect of setting flags from this operation. 1258 // setting flags from this operation.
1270 Context.insert(InstFakeUse::create(Func, ScratchReg)); 1259 Context.insert(InstFakeUse::create(Func, ScratchReg));
1271 } 1260 }
1272 } 1261 }
1273 InstARM32Label *Label = InstARM32Label::create(Func, this); 1262 InstARM32Label *Label = InstARM32Label::create(Func, this);
1274 _br(Label, CondARM32::NE); 1263 _br(Label, CondARM32::NE);
1275 _trap(); 1264 _trap();
1276 Context.insert(Label); 1265 Context.insert(Label);
1277 } 1266 }
1278 1267
1279 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, 1268 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
(...skipping 23 matching lines...) Expand all
1303 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs); 1292 InstCall *Call = makeHelperCall(DivHelperName, Dest, MaxSrcs);
1304 Call->addArg(T0R); 1293 Call->addArg(T0R);
1305 Call->addArg(T1R); 1294 Call->addArg(T1R);
1306 lowerCall(Call); 1295 lowerCall(Call);
1307 } 1296 }
1308 return; 1297 return;
1309 } 1298 }
1310 1299
1311 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) { 1300 void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
1312 Variable *Dest = Inst->getDest(); 1301 Variable *Dest = Inst->getDest();
1313 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier 1302 // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
1314 // to legalize Src0 to flex or Src1 to flex and there is a reversible 1303 // legalize Src0 to flex or Src1 to flex and there is a reversible
1315 // instruction. E.g., reverse subtract with immediate, register vs 1304 // instruction. E.g., reverse subtract with immediate, register vs register,
1316 // register, immediate. 1305 // immediate.
1317 // Or it may be the case that the operands aren't swapped, but the 1306 // Or it may be the case that the operands aren't swapped, but the bits can
1318 // bits can be flipped and a different operation applied. 1307 // be flipped and a different operation applied. E.g., use BIC (bit clear)
1319 // E.g., use BIC (bit clear) instead of AND for some masks. 1308 // instead of AND for some masks.
1320 Operand *Src0 = legalizeUndef(Inst->getSrc(0)); 1309 Operand *Src0 = legalizeUndef(Inst->getSrc(0));
1321 Operand *Src1 = legalizeUndef(Inst->getSrc(1)); 1310 Operand *Src1 = legalizeUndef(Inst->getSrc(1));
1322 if (Dest->getType() == IceType_i64) { 1311 if (Dest->getType() == IceType_i64) {
1323 // These helper-call-involved instructions are lowered in this 1312 // These helper-call-involved instructions are lowered in this separate
1324 // separate switch. This is because we would otherwise assume that 1313 // switch. This is because we would otherwise assume that we need to
1325 // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused 1314 // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
1326 // with helper calls, and such unused/redundant instructions will fail 1315 // helper calls, and such unused/redundant instructions will fail liveness
1327 // liveness analysis under -Om1 setting. 1316 // analysis under -Om1 setting.
1328 switch (Inst->getOp()) { 1317 switch (Inst->getOp()) {
1329 default: 1318 default:
1330 break; 1319 break;
1331 case InstArithmetic::Udiv: 1320 case InstArithmetic::Udiv:
1332 case InstArithmetic::Sdiv: 1321 case InstArithmetic::Sdiv:
1333 case InstArithmetic::Urem: 1322 case InstArithmetic::Urem:
1334 case InstArithmetic::Srem: { 1323 case InstArithmetic::Srem: {
1335 // Check for divide by 0 (ARM normally doesn't trap, but we want it 1324 // Check for divide by 0 (ARM normally doesn't trap, but we want it to
1336 // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized 1325 // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
1337 // to a register, which will hide a constant source operand. 1326 // register, which will hide a constant source operand. Instead, check
1338 // Instead, check the not-yet-legalized Src1 to optimize-out a divide 1327 // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
1339 // by 0 check.
1340 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) { 1328 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
1341 if (C64->getValue() == 0) { 1329 if (C64->getValue() == 0) {
1342 _trap(); 1330 _trap();
1343 return; 1331 return;
1344 } 1332 }
1345 } else { 1333 } else {
1346 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); 1334 Operand *Src1Lo = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
1347 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); 1335 Operand *Src1Hi = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
1348 div0Check(IceType_i64, Src1Lo, Src1Hi); 1336 div0Check(IceType_i64, Src1Lo, Src1Hi);
1349 } 1337 }
1350 // Technically, ARM has their own aeabi routines, but we can use the 1338 // Technically, ARM has their own aeabi routines, but we can use the
1351 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, 1339 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
1352 // but uses the more standard __moddi3 for rem. 1340 // the more standard __moddi3 for rem.
1353 const char *HelperName = ""; 1341 const char *HelperName = "";
1354 switch (Inst->getOp()) { 1342 switch (Inst->getOp()) {
1355 default: 1343 default:
1356 llvm_unreachable("Should have only matched div ops."); 1344 llvm_unreachable("Should have only matched div ops.");
1357 break; 1345 break;
1358 case InstArithmetic::Udiv: 1346 case InstArithmetic::Udiv:
1359 HelperName = H_udiv_i64; 1347 HelperName = H_udiv_i64;
1360 break; 1348 break;
1361 case InstArithmetic::Sdiv: 1349 case InstArithmetic::Sdiv:
1362 HelperName = H_sdiv_i64; 1350 HelperName = H_sdiv_i64;
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
1465 // a=b<<c ==> 1453 // a=b<<c ==>
1466 // GCC 4.8 does: 1454 // GCC 4.8 does:
1467 // sub t_c1, c.lo, #32 1455 // sub t_c1, c.lo, #32
1468 // lsl t_hi, b.hi, c.lo 1456 // lsl t_hi, b.hi, c.lo
1469 // orr t_hi, t_hi, b.lo, lsl t_c1 1457 // orr t_hi, t_hi, b.lo, lsl t_c1
1470 // rsb t_c2, c.lo, #32 1458 // rsb t_c2, c.lo, #32
1471 // orr t_hi, t_hi, b.lo, lsr t_c2 1459 // orr t_hi, t_hi, b.lo, lsr t_c2
1472 // lsl t_lo, b.lo, c.lo 1460 // lsl t_lo, b.lo, c.lo
1473 // a.lo = t_lo 1461 // a.lo = t_lo
1474 // a.hi = t_hi 1462 // a.hi = t_hi
1475 // Can be strength-reduced for constant-shifts, but we don't do 1463 // Can be strength-reduced for constant-shifts, but we don't do that for
1476 // that for now. 1464 // now.
1477 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. 1465 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
1478 // On ARM, shifts only take the lower 8 bits of the shift register, 1466 // ARM, shifts only take the lower 8 bits of the shift register, and
1479 // and saturate to the range 0-32, so the negative value will 1467 // saturate to the range 0-32, so the negative value will saturate to 32.
1480 // saturate to 32.
1481 Variable *T_Hi = makeReg(IceType_i32); 1468 Variable *T_Hi = makeReg(IceType_i32);
1482 Variable *Src1RLo = legalizeToReg(Src1Lo); 1469 Variable *Src1RLo = legalizeToReg(Src1Lo);
1483 Constant *ThirtyTwo = Ctx->getConstantInt32(32); 1470 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1484 Variable *T_C1 = makeReg(IceType_i32); 1471 Variable *T_C1 = makeReg(IceType_i32);
1485 Variable *T_C2 = makeReg(IceType_i32); 1472 Variable *T_C2 = makeReg(IceType_i32);
1486 _sub(T_C1, Src1RLo, ThirtyTwo); 1473 _sub(T_C1, Src1RLo, ThirtyTwo);
1487 _lsl(T_Hi, Src0RHi, Src1RLo); 1474 _lsl(T_Hi, Src0RHi, Src1RLo);
1488 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, 1475 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1489 OperandARM32::LSL, T_C1)); 1476 OperandARM32::LSL, T_C1));
1490 _rsb(T_C2, Src1RLo, ThirtyTwo); 1477 _rsb(T_C2, Src1RLo, ThirtyTwo);
1491 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, 1478 _orr(T_Hi, T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1492 OperandARM32::LSR, T_C2)); 1479 OperandARM32::LSR, T_C2));
1493 _mov(DestHi, T_Hi); 1480 _mov(DestHi, T_Hi);
1494 Variable *T_Lo = makeReg(IceType_i32); 1481 Variable *T_Lo = makeReg(IceType_i32);
1495 // _mov seems to sometimes have better register preferencing than lsl. 1482 // _mov seems to sometimes have better register preferencing than lsl.
1496 // Otherwise mov w/ lsl shifted register is a pseudo-instruction 1483 // Otherwise mov w/ lsl shifted register is a pseudo-instruction that
1497 // that maps to lsl. 1484 // maps to lsl.
1498 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo, 1485 _mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
1499 OperandARM32::LSL, Src1RLo)); 1486 OperandARM32::LSL, Src1RLo));
1500 _mov(DestLo, T_Lo); 1487 _mov(DestLo, T_Lo);
1501 return; 1488 return;
1502 } 1489 }
1503 case InstArithmetic::Lshr: 1490 case InstArithmetic::Lshr:
1504 // a=b>>c (unsigned) ==> 1491 // a=b>>c (unsigned) ==>
1505 // GCC 4.8 does: 1492 // GCC 4.8 does:
1506 // rsb t_c1, c.lo, #32 1493 // rsb t_c1, c.lo, #32
1507 // lsr t_lo, b.lo, c.lo 1494 // lsr t_lo, b.lo, c.lo
1508 // orr t_lo, t_lo, b.hi, lsl t_c1 1495 // orr t_lo, t_lo, b.hi, lsl t_c1
1509 // sub t_c2, c.lo, #32 1496 // sub t_c2, c.lo, #32
1510 // orr t_lo, t_lo, b.hi, lsr t_c2 1497 // orr t_lo, t_lo, b.hi, lsr t_c2
1511 // lsr t_hi, b.hi, c.lo 1498 // lsr t_hi, b.hi, c.lo
1512 // a.lo = t_lo 1499 // a.lo = t_lo
1513 // a.hi = t_hi 1500 // a.hi = t_hi
1514 case InstArithmetic::Ashr: { 1501 case InstArithmetic::Ashr: {
1515 // a=b>>c (signed) ==> ... 1502 // a=b>>c (signed) ==> ...
1516 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, 1503 // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the
1517 // and the next orr should be conditioned on PLUS. The last two 1504 // next orr should be conditioned on PLUS. The last two right shifts
1518 // right shifts should also be arithmetic. 1505 // should also be arithmetic.
1519 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr; 1506 bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
1520 Variable *T_Lo = makeReg(IceType_i32); 1507 Variable *T_Lo = makeReg(IceType_i32);
1521 Variable *Src1RLo = legalizeToReg(Src1Lo); 1508 Variable *Src1RLo = legalizeToReg(Src1Lo);
1522 Constant *ThirtyTwo = Ctx->getConstantInt32(32); 1509 Constant *ThirtyTwo = Ctx->getConstantInt32(32);
1523 Variable *T_C1 = makeReg(IceType_i32); 1510 Variable *T_C1 = makeReg(IceType_i32);
1524 Variable *T_C2 = makeReg(IceType_i32); 1511 Variable *T_C2 = makeReg(IceType_i32);
1525 _rsb(T_C1, Src1RLo, ThirtyTwo); 1512 _rsb(T_C1, Src1RLo, ThirtyTwo);
1526 _lsr(T_Lo, Src0RLo, Src1RLo); 1513 _lsr(T_Lo, Src0RLo, Src1RLo);
1527 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi, 1514 _orr(T_Lo, T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
1528 OperandARM32::LSL, T_C1)); 1515 OperandARM32::LSL, T_C1));
(...skipping 187 matching lines...) Expand 10 before | Expand all | Expand 10 after
1716 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 1703 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1717 Variable *T_Lo = nullptr, *T_Hi = nullptr; 1704 Variable *T_Lo = nullptr, *T_Hi = nullptr;
1718 _mov(T_Lo, Src0Lo); 1705 _mov(T_Lo, Src0Lo);
1719 _mov(DestLo, T_Lo); 1706 _mov(DestLo, T_Lo);
1720 _mov(T_Hi, Src0Hi); 1707 _mov(T_Hi, Src0Hi);
1721 _mov(DestHi, T_Hi); 1708 _mov(DestHi, T_Hi);
1722 } else { 1709 } else {
1723 Operand *NewSrc; 1710 Operand *NewSrc;
1724 if (Dest->hasReg()) { 1711 if (Dest->hasReg()) {
1725 // If Dest already has a physical register, then legalize the Src operand 1712 // If Dest already has a physical register, then legalize the Src operand
1726 // into a Variable with the same register assignment. This especially 1713 // into a Variable with the same register assignment. This especially
1727 // helps allow the use of Flex operands. 1714 // helps allow the use of Flex operands.
1728 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum()); 1715 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
1729 } else { 1716 } else {
1730 // Dest could be a stack operand. Since we could potentially need 1717 // Dest could be a stack operand. Since we could potentially need to do a
1731 // to do a Store (and store can only have Register operands), 1718 // Store (and store can only have Register operands), legalize this to a
1732 // legalize this to a register. 1719 // register.
1733 NewSrc = legalize(Src0, Legal_Reg); 1720 NewSrc = legalize(Src0, Legal_Reg);
1734 } 1721 }
1735 if (isVectorType(Dest->getType())) { 1722 if (isVectorType(Dest->getType())) {
1736 UnimplementedError(Func->getContext()->getFlags()); 1723 UnimplementedError(Func->getContext()->getFlags());
1737 } else if (isFloatingType(Dest->getType())) { 1724 } else if (isFloatingType(Dest->getType())) {
1738 Variable *SrcR = legalizeToReg(NewSrc); 1725 Variable *SrcR = legalizeToReg(NewSrc);
1739 _vmov(Dest, SrcR); 1726 _vmov(Dest, SrcR);
1740 } else { 1727 } else {
1741 _mov(Dest, NewSrc); 1728 _mov(Dest, NewSrc);
1742 } 1729 }
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
1803 } 1790 }
1804 1791
1805 if (!InRegs) { 1792 if (!InRegs) {
1806 ParameterAreaSizeBytes = 1793 ParameterAreaSizeBytes =
1807 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty); 1794 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
1808 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes)); 1795 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
1809 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 1796 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1810 } 1797 }
1811 } 1798 }
1812 1799
1813 // Adjust the parameter area so that the stack is aligned. It is 1800 // Adjust the parameter area so that the stack is aligned. It is assumed that
1814 // assumed that the stack is already aligned at the start of the 1801 // the stack is already aligned at the start of the calling sequence.
1815 // calling sequence.
1816 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1802 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1817 1803
1818 // Subtract the appropriate amount for the argument area. This also 1804 // Subtract the appropriate amount for the argument area. This also takes
1819 // takes care of setting the stack adjustment during emission. 1805 // care of setting the stack adjustment during emission.
1820 // 1806 //
1821 // TODO: If for some reason the call instruction gets dead-code 1807 // TODO: If for some reason the call instruction gets dead-code eliminated
1822 // eliminated after lowering, we would need to ensure that the 1808 // after lowering, we would need to ensure that the pre-call and the
1823 // pre-call and the post-call esp adjustment get eliminated as well. 1809 // post-call esp adjustment get eliminated as well.
1824 if (ParameterAreaSizeBytes) { 1810 if (ParameterAreaSizeBytes) {
1825 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), 1811 Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1826 Legal_Reg | Legal_Flex); 1812 Legal_Reg | Legal_Flex);
1827 _adjust_stack(ParameterAreaSizeBytes, SubAmount); 1813 _adjust_stack(ParameterAreaSizeBytes, SubAmount);
1828 } 1814 }
1829 1815
1830 // Copy arguments that are passed on the stack to the appropriate 1816 // Copy arguments that are passed on the stack to the appropriate stack
1831 // stack locations. 1817 // locations.
1832 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1818 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1833 for (auto &StackArg : StackArgs) { 1819 for (auto &StackArg : StackArgs) {
1834 ConstantInteger32 *Loc = 1820 ConstantInteger32 *Loc =
1835 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second)); 1821 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
1836 Type Ty = StackArg.first->getType(); 1822 Type Ty = StackArg.first->getType();
1837 OperandARM32Mem *Addr; 1823 OperandARM32Mem *Addr;
1838 constexpr bool SignExt = false; 1824 constexpr bool SignExt = false;
1839 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) { 1825 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
1840 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc); 1826 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
1841 } else { 1827 } else {
1842 Variable *NewBase = Func->makeVariable(SP->getType()); 1828 Variable *NewBase = Func->makeVariable(SP->getType());
1843 lowerArithmetic( 1829 lowerArithmetic(
1844 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc)); 1830 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
1845 Addr = formMemoryOperand(NewBase, Ty); 1831 Addr = formMemoryOperand(NewBase, Ty);
1846 } 1832 }
1847 lowerStore(InstStore::create(Func, StackArg.first, Addr)); 1833 lowerStore(InstStore::create(Func, StackArg.first, Addr));
1848 } 1834 }
1849 1835
1850 // Copy arguments to be passed in registers to the appropriate registers. 1836 // Copy arguments to be passed in registers to the appropriate registers.
1851 for (auto &GPRArg : GPRArgs) { 1837 for (auto &GPRArg : GPRArgs) {
1852 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second); 1838 Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
1853 // Generate a FakeUse of register arguments so that they do not get 1839 // Generate a FakeUse of register arguments so that they do not get dead
1854 // dead code eliminated as a result of the FakeKill of scratch 1840 // code eliminated as a result of the FakeKill of scratch registers after
1855 // registers after the call. 1841 // the call.
1856 Context.insert(InstFakeUse::create(Func, Reg)); 1842 Context.insert(InstFakeUse::create(Func, Reg));
1857 } 1843 }
1858 for (auto &FPArg : FPArgs) { 1844 for (auto &FPArg : FPArgs) {
1859 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second); 1845 Variable *Reg = legalizeToReg(FPArg.first, FPArg.second);
1860 Context.insert(InstFakeUse::create(Func, Reg)); 1846 Context.insert(InstFakeUse::create(Func, Reg));
1861 } 1847 }
1862 1848
1863 // Generate the call instruction. Assign its result to a temporary 1849 // Generate the call instruction. Assign its result to a temporary with high
1864 // with high register allocation weight. 1850 // register allocation weight.
1865 Variable *Dest = Instr->getDest(); 1851 Variable *Dest = Instr->getDest();
1866 // ReturnReg doubles as ReturnRegLo as necessary. 1852 // ReturnReg doubles as ReturnRegLo as necessary.
1867 Variable *ReturnReg = nullptr; 1853 Variable *ReturnReg = nullptr;
1868 Variable *ReturnRegHi = nullptr; 1854 Variable *ReturnRegHi = nullptr;
1869 if (Dest) { 1855 if (Dest) {
1870 switch (Dest->getType()) { 1856 switch (Dest->getType()) {
1871 case IceType_NUM: 1857 case IceType_NUM:
1872 llvm_unreachable("Invalid Call dest type"); 1858 llvm_unreachable("Invalid Call dest type");
1873 break; 1859 break;
1874 case IceType_void: 1860 case IceType_void:
(...skipping 19 matching lines...) Expand all
1894 case IceType_v16i1: 1880 case IceType_v16i1:
1895 case IceType_v16i8: 1881 case IceType_v16i8:
1896 case IceType_v8i16: 1882 case IceType_v8i16:
1897 case IceType_v4i32: 1883 case IceType_v4i32:
1898 case IceType_v4f32: 1884 case IceType_v4f32:
1899 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0); 1885 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
1900 break; 1886 break;
1901 } 1887 }
1902 } 1888 }
1903 Operand *CallTarget = Instr->getCallTarget(); 1889 Operand *CallTarget = Instr->getCallTarget();
1904 // TODO(jvoung): Handle sandboxing. 1890 // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
1905 // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing(); 1891 // Ctx->getFlags().getUseSandboxing();
1906 1892
1907 // Allow ConstantRelocatable to be left alone as a direct call, 1893 // Allow ConstantRelocatable to be left alone as a direct call, but force
1908 // but force other constants like ConstantInteger32 to be in 1894 // other constants like ConstantInteger32 to be in a register and make it an
1909 // a register and make it an indirect call. 1895 // indirect call.
1910 if (!llvm::isa<ConstantRelocatable>(CallTarget)) { 1896 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
1911 CallTarget = legalize(CallTarget, Legal_Reg); 1897 CallTarget = legalize(CallTarget, Legal_Reg);
1912 } 1898 }
1913 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); 1899 Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget);
1914 Context.insert(NewCall); 1900 Context.insert(NewCall);
1915 if (ReturnRegHi) 1901 if (ReturnRegHi)
1916 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1902 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1917 1903
1918 // Add the appropriate offset to SP. The call instruction takes care 1904 // Add the appropriate offset to SP. The call instruction takes care of
1919 // of resetting the stack offset during emission. 1905 // resetting the stack offset during emission.
1920 if (ParameterAreaSizeBytes) { 1906 if (ParameterAreaSizeBytes) {
1921 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes), 1907 Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
1922 Legal_Reg | Legal_Flex); 1908 Legal_Reg | Legal_Flex);
1923 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 1909 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1924 _add(SP, SP, AddAmount); 1910 _add(SP, SP, AddAmount);
1925 } 1911 }
1926 1912
1927 // Insert a register-kill pseudo instruction. 1913 // Insert a register-kill pseudo instruction.
1928 Context.insert(InstFakeKill::create(Func, NewCall)); 1914 Context.insert(InstFakeKill::create(Func, NewCall));
1929 1915
(...skipping 87 matching lines...) Expand 10 before | Expand all | Expand 10 after
2017 } 2003 }
2018 case InstCast::Zext: { 2004 case InstCast::Zext: {
2019 if (isVectorType(Dest->getType())) { 2005 if (isVectorType(Dest->getType())) {
2020 UnimplementedError(Func->getContext()->getFlags()); 2006 UnimplementedError(Func->getContext()->getFlags());
2021 } else if (Dest->getType() == IceType_i64) { 2007 } else if (Dest->getType() == IceType_i64) {
2022 // t1=uxtb src; dst.lo=t1; dst.hi=0 2008 // t1=uxtb src; dst.lo=t1; dst.hi=0
2023 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2009 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2024 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2010 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2025 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2011 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2026 Variable *T_Lo = makeReg(DestLo->getType()); 2012 Variable *T_Lo = makeReg(DestLo->getType());
2027 // i32 and i1 can just take up the whole register. 2013 // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
2028 // i32 doesn't need uxt, while i1 will have an and mask later anyway. 2014 // while i1 will have an and mask later anyway.
2029 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) { 2015 if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
2030 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 2016 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2031 _mov(T_Lo, Src0RF); 2017 _mov(T_Lo, Src0RF);
2032 } else { 2018 } else {
2033 Variable *Src0R = legalizeToReg(Src0); 2019 Variable *Src0R = legalizeToReg(Src0);
2034 _uxt(T_Lo, Src0R); 2020 _uxt(T_Lo, Src0R);
2035 } 2021 }
2036 if (Src0->getType() == IceType_i1) { 2022 if (Src0->getType() == IceType_i1) {
2037 Constant *One = Ctx->getConstantInt32(1); 2023 Constant *One = Ctx->getConstantInt32(1);
2038 _and(T_Lo, T_Lo, One); 2024 _and(T_Lo, T_Lo, One);
2039 } 2025 }
2040 _mov(DestLo, T_Lo); 2026 _mov(DestLo, T_Lo);
2041 Variable *T_Hi = makeReg(DestLo->getType()); 2027 Variable *T_Hi = makeReg(DestLo->getType());
2042 _mov(T_Hi, Zero); 2028 _mov(T_Hi, Zero);
2043 _mov(DestHi, T_Hi); 2029 _mov(DestHi, T_Hi);
2044 } else if (Src0->getType() == IceType_i1) { 2030 } else if (Src0->getType() == IceType_i1) {
2045 // t = Src0; t &= 1; Dest = t 2031 // t = Src0; t &= 1; Dest = t
2046 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex); 2032 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
2047 Constant *One = Ctx->getConstantInt32(1); 2033 Constant *One = Ctx->getConstantInt32(1);
2048 Variable *T = makeReg(Dest->getType()); 2034 Variable *T = makeReg(Dest->getType());
2049 // Just use _mov instead of _uxt since all registers are 32-bit. 2035 // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
2050 // _uxt requires the source to be a register so could have required 2036 // requires the source to be a register so could have required a _mov
2051 // a _mov from legalize anyway. 2037 // from legalize anyway.
2052 _mov(T, Src0RF); 2038 _mov(T, Src0RF);
2053 _and(T, T, One); 2039 _and(T, T, One);
2054 _mov(Dest, T); 2040 _mov(Dest, T);
2055 } else { 2041 } else {
2056 // t1 = uxt src; dst = t1 2042 // t1 = uxt src; dst = t1
2057 Variable *Src0R = legalizeToReg(Src0); 2043 Variable *Src0R = legalizeToReg(Src0);
2058 Variable *T = makeReg(Dest->getType()); 2044 Variable *T = makeReg(Dest->getType());
2059 _uxt(T, Src0R); 2045 _uxt(T, Src0R);
2060 _mov(Dest, T); 2046 _mov(Dest, T);
2061 } 2047 }
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after
2281 } 2267 }
2282 2268
2283 // a=icmp cond, b, c ==> 2269 // a=icmp cond, b, c ==>
2284 // GCC does: 2270 // GCC does:
2285 // cmp b.hi, c.hi or cmp b.lo, c.lo 2271 // cmp b.hi, c.hi or cmp b.lo, c.lo
2286 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi 2272 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
2287 // mov.<C1> t, #1 mov.<C1> t, #1 2273 // mov.<C1> t, #1 mov.<C1> t, #1
2288 // mov.<C2> t, #0 mov.<C2> t, #0 2274 // mov.<C2> t, #0 mov.<C2> t, #0
2289 // mov a, t mov a, t 2275 // mov a, t mov a, t
2290 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi" 2276 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
2291 // is used for signed compares. In some cases, b and c need to be swapped 2277 // is used for signed compares. In some cases, b and c need to be swapped as
2292 // as well. 2278 // well.
2293 // 2279 //
2294 // LLVM does: 2280 // LLVM does:
2295 // for EQ and NE: 2281 // for EQ and NE:
2296 // eor t1, b.hi, c.hi 2282 // eor t1, b.hi, c.hi
2297 // eor t2, b.lo, c.hi 2283 // eor t2, b.lo, c.hi
2298 // orrs t, t1, t2 2284 // orrs t, t1, t2
2299 // mov.<C> t, #1 2285 // mov.<C> t, #1
2300 // mov a, t 2286 // mov a, t
2301 // 2287 //
2302 // that's nice in that it's just as short but has fewer dependencies 2288 // that's nice in that it's just as short but has fewer dependencies for
2303 // for better ILP at the cost of more registers. 2289 // better ILP at the cost of more registers.
2304 // 2290 //
2305 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with 2291 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
2306 // two unconditional mov #0, two cmps, two conditional mov #1, 2292 // unconditional mov #0, two cmps, two conditional mov #1, and one
2307 // and one conditonal reg mov. That has few dependencies for good ILP, 2293 // conditional reg mov. That has few dependencies for good ILP, but is a
2308 // but is a longer sequence. 2294 // longer sequence.
2309 // 2295 //
2310 // So, we are going with the GCC version since it's usually better (except 2296 // So, we are going with the GCC version since it's usually better (except
2311 // perhaps for eq/ne). We could revisit special-casing eq/ne later. 2297 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
2312 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2298 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2313 Constant *One = Ctx->getConstantInt32(1); 2299 Constant *One = Ctx->getConstantInt32(1);
2314 if (Src0->getType() == IceType_i64) { 2300 if (Src0->getType() == IceType_i64) {
2315 InstIcmp::ICond Conditon = Inst->getCondition(); 2301 InstIcmp::ICond Conditon = Inst->getCondition();
2316 size_t Index = static_cast<size_t>(Conditon); 2302 size_t Index = static_cast<size_t>(Conditon);
2317 assert(Index < llvm::array_lengthof(TableIcmp64)); 2303 assert(Index < llvm::array_lengthof(TableIcmp64));
2318 Variable *Src0Lo, *Src0Hi; 2304 Variable *Src0Lo, *Src0Hi;
2319 Operand *Src1LoRF, *Src1HiRF; 2305 Operand *Src1LoRF, *Src1HiRF;
2320 if (TableIcmp64[Index].Swapped) { 2306 if (TableIcmp64[Index].Swapped) {
2321 Src0Lo = legalizeToReg(loOperand(Src1)); 2307 Src0Lo = legalizeToReg(loOperand(Src1));
2322 Src0Hi = legalizeToReg(hiOperand(Src1)); 2308 Src0Hi = legalizeToReg(hiOperand(Src1));
2323 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex); 2309 Src1LoRF = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
2324 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex); 2310 Src1HiRF = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
2325 } else { 2311 } else {
2326 Src0Lo = legalizeToReg(loOperand(Src0)); 2312 Src0Lo = legalizeToReg(loOperand(Src0));
2327 Src0Hi = legalizeToReg(hiOperand(Src0)); 2313 Src0Hi = legalizeToReg(hiOperand(Src0));
2328 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex); 2314 Src1LoRF = legalize(loOperand(Src1), Legal_Reg | Legal_Flex);
2329 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex); 2315 Src1HiRF = legalize(hiOperand(Src1), Legal_Reg | Legal_Flex);
2330 } 2316 }
2331 Variable *T = makeReg(IceType_i32); 2317 Variable *T = makeReg(IceType_i32);
2332 if (TableIcmp64[Index].IsSigned) { 2318 if (TableIcmp64[Index].IsSigned) {
2333 Variable *ScratchReg = makeReg(IceType_i32); 2319 Variable *ScratchReg = makeReg(IceType_i32);
2334 _cmp(Src0Lo, Src1LoRF); 2320 _cmp(Src0Lo, Src1LoRF);
2335 _sbcs(ScratchReg, Src0Hi, Src1HiRF); 2321 _sbcs(ScratchReg, Src0Hi, Src1HiRF);
2336 // ScratchReg isn't going to be used, but we need the 2322 // ScratchReg isn't going to be used, but we need the side-effect of
2337 // side-effect of setting flags from this operation. 2323 // setting flags from this operation.
2338 Context.insert(InstFakeUse::create(Func, ScratchReg)); 2324 Context.insert(InstFakeUse::create(Func, ScratchReg));
2339 } else { 2325 } else {
2340 _cmp(Src0Hi, Src1HiRF); 2326 _cmp(Src0Hi, Src1HiRF);
2341 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ); 2327 _cmp(Src0Lo, Src1LoRF, CondARM32::EQ);
2342 } 2328 }
2343 _mov(T, One, TableIcmp64[Index].C1); 2329 _mov(T, One, TableIcmp64[Index].C1);
2344 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2); 2330 _mov_nonkillable(T, Zero, TableIcmp64[Index].C2);
2345 _mov(Dest, T); 2331 _mov(Dest, T);
2346 return; 2332 return;
2347 } 2333 }
2348 2334
2349 // a=icmp cond b, c ==> 2335 // a=icmp cond b, c ==>
2350 // GCC does: 2336 // GCC does:
2351 // <u/s>xtb tb, b 2337 // <u/s>xtb tb, b
2352 // <u/s>xtb tc, c 2338 // <u/s>xtb tc, c
2353 // cmp tb, tc 2339 // cmp tb, tc
2354 // mov.C1 t, #0 2340 // mov.C1 t, #0
2355 // mov.C2 t, #1 2341 // mov.C2 t, #1
2356 // mov a, t 2342 // mov a, t
2357 // where the unsigned/sign extension is not needed for 32-bit. 2343 // where the unsigned/sign extension is not needed for 32-bit. They also have
2358 // They also have special cases for EQ and NE. E.g., for NE: 2344 // special cases for EQ and NE. E.g., for NE:
2359 // <extend to tb, tc> 2345 // <extend to tb, tc>
2360 // subs t, tb, tc 2346 // subs t, tb, tc
2361 // movne t, #1 2347 // movne t, #1
2362 // mov a, t 2348 // mov a, t
2363 // 2349 //
2364 // LLVM does: 2350 // LLVM does:
2365 // lsl tb, b, #<N> 2351 // lsl tb, b, #<N>
2366 // mov t, #0 2352 // mov t, #0
2367 // cmp tb, c, lsl #<N> 2353 // cmp tb, c, lsl #<N>
2368 // mov.<C> t, #1 2354 // mov.<C> t, #1
2369 // mov a, t 2355 // mov a, t
2370 // 2356 //
2371 // the left shift is by 0, 16, or 24, which allows the comparison to focus 2357 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
2372 // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned). 2358 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
2373 // For the unsigned case, for some reason it does similar to GCC and does 2359 // the unsigned case, for some reason it does similar to GCC and does a uxtb
2374 // a uxtb first. It's not clear to me why that special-casing is needed. 2360 // first. It's not clear to me why that special-casing is needed.
2375 // 2361 //
2376 // We'll go with the LLVM way for now, since it's shorter and has just as 2362 // We'll go with the LLVM way for now, since it's shorter and has just as few
2377 // few dependencies. 2363 // dependencies.
2378 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType()); 2364 int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
2379 assert(ShiftAmt >= 0); 2365 assert(ShiftAmt >= 0);
2380 Constant *ShiftConst = nullptr; 2366 Constant *ShiftConst = nullptr;
2381 Variable *Src0R = nullptr; 2367 Variable *Src0R = nullptr;
2382 Variable *T = makeReg(IceType_i32); 2368 Variable *T = makeReg(IceType_i32);
2383 if (ShiftAmt) { 2369 if (ShiftAmt) {
2384 ShiftConst = Ctx->getConstantInt32(ShiftAmt); 2370 ShiftConst = Ctx->getConstantInt32(ShiftAmt);
2385 Src0R = makeReg(IceType_i32); 2371 Src0R = makeReg(IceType_i32);
2386 _lsl(Src0R, legalizeToReg(Src0), ShiftConst); 2372 _lsl(Src0R, legalizeToReg(Src0), ShiftConst);
2387 } else { 2373 } else {
(...skipping 22 matching lines...) Expand all
2410 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { 2396 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
2411 switch (Instr->getIntrinsicInfo().ID) { 2397 switch (Instr->getIntrinsicInfo().ID) {
2412 case Intrinsics::AtomicCmpxchg: { 2398 case Intrinsics::AtomicCmpxchg: {
2413 UnimplementedError(Func->getContext()->getFlags()); 2399 UnimplementedError(Func->getContext()->getFlags());
2414 return; 2400 return;
2415 } 2401 }
2416 case Intrinsics::AtomicFence: 2402 case Intrinsics::AtomicFence:
2417 UnimplementedError(Func->getContext()->getFlags()); 2403 UnimplementedError(Func->getContext()->getFlags());
2418 return; 2404 return;
2419 case Intrinsics::AtomicFenceAll: 2405 case Intrinsics::AtomicFenceAll:
2420 // NOTE: FenceAll should prevent and load/store from being moved 2406 // NOTE: FenceAll should prevent and load/store from being moved across the
2421 // across the fence (both atomic and non-atomic). The InstARM32Mfence 2407 // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
2422 // instruction is currently marked coarsely as "HasSideEffects". 2408 // currently marked coarsely as "HasSideEffects".
2423 UnimplementedError(Func->getContext()->getFlags()); 2409 UnimplementedError(Func->getContext()->getFlags());
2424 return; 2410 return;
2425 case Intrinsics::AtomicIsLockFree: { 2411 case Intrinsics::AtomicIsLockFree: {
2426 UnimplementedError(Func->getContext()->getFlags()); 2412 UnimplementedError(Func->getContext()->getFlags());
2427 return; 2413 return;
2428 } 2414 }
2429 case Intrinsics::AtomicLoad: { 2415 case Intrinsics::AtomicLoad: {
2430 UnimplementedError(Func->getContext()->getFlags()); 2416 UnimplementedError(Func->getContext()->getFlags());
2431 return; 2417 return;
2432 } 2418 }
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
2470 case Intrinsics::Ctpop: { 2456 case Intrinsics::Ctpop: {
2471 Variable *Dest = Instr->getDest(); 2457 Variable *Dest = Instr->getDest();
2472 Operand *Val = Instr->getArg(0); 2458 Operand *Val = Instr->getArg(0);
2473 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType()) 2459 InstCall *Call = makeHelperCall(isInt32Asserting32Or64(Val->getType())
2474 ? H_call_ctpop_i32 2460 ? H_call_ctpop_i32
2475 : H_call_ctpop_i64, 2461 : H_call_ctpop_i64,
2476 Dest, 1); 2462 Dest, 1);
2477 Call->addArg(Val); 2463 Call->addArg(Val);
2478 lowerCall(Call); 2464 lowerCall(Call);
2479 // The popcount helpers always return 32-bit values, while the intrinsic's 2465 // The popcount helpers always return 32-bit values, while the intrinsic's
2480 // signature matches some 64-bit platform's native instructions and 2466 // signature matches some 64-bit platform's native instructions and expect
2481 // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest 2467 // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
2482 // just in case the user doesn't do that in the IR or doesn't toss the bits 2468 // case the user doesn't do that in the IR or doesn't toss the bits via
2483 // via truncate. 2469 // truncate.
2484 if (Val->getType() == IceType_i64) { 2470 if (Val->getType() == IceType_i64) {
2485 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2471 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2486 Constant *Zero = Ctx->getConstantZero(IceType_i32); 2472 Constant *Zero = Ctx->getConstantZero(IceType_i32);
2487 Variable *T = nullptr; 2473 Variable *T = nullptr;
2488 _mov(T, Zero); 2474 _mov(T, Zero);
2489 _mov(DestHi, T); 2475 _mov(DestHi, T);
2490 } 2476 }
2491 return; 2477 return;
2492 } 2478 }
2493 case Intrinsics::Ctlz: { 2479 case Intrinsics::Ctlz: {
2494 // The "is zero undef" parameter is ignored and we always return 2480 // The "is zero undef" parameter is ignored and we always return a
2495 // a well-defined value. 2481 // well-defined value.
2496 Operand *Val = Instr->getArg(0); 2482 Operand *Val = Instr->getArg(0);
2497 Variable *ValLoR; 2483 Variable *ValLoR;
2498 Variable *ValHiR = nullptr; 2484 Variable *ValHiR = nullptr;
2499 if (Val->getType() == IceType_i64) { 2485 if (Val->getType() == IceType_i64) {
2500 Val = legalizeUndef(Val); 2486 Val = legalizeUndef(Val);
2501 ValLoR = legalizeToReg(loOperand(Val)); 2487 ValLoR = legalizeToReg(loOperand(Val));
2502 ValHiR = legalizeToReg(hiOperand(Val)); 2488 ValHiR = legalizeToReg(hiOperand(Val));
2503 } else { 2489 } else {
2504 ValLoR = legalizeToReg(Val); 2490 ValLoR = legalizeToReg(Val);
2505 } 2491 }
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
2632 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 2618 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
2633 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 2619 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2634 Operand *Zero = 2620 Operand *Zero =
2635 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex); 2621 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2636 Operand *ThirtyTwo = 2622 Operand *ThirtyTwo =
2637 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex); 2623 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2638 _cmp(ValHiR, Zero); 2624 _cmp(ValHiR, Zero);
2639 Variable *T2 = makeReg(IceType_i32); 2625 Variable *T2 = makeReg(IceType_i32);
2640 _add(T2, T, ThirtyTwo); 2626 _add(T2, T, ThirtyTwo);
2641 _clz(T2, ValHiR, CondARM32::NE); 2627 _clz(T2, ValHiR, CondARM32::NE);
2642 // T2 is actually a source as well when the predicate is not AL 2628 // T2 is actually a source as well when the predicate is not AL (since it
2643 // (since it may leave T2 alone). We use set_dest_nonkillable to 2629 // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness
2644 // prolong the liveness of T2 as if it was used as a source. 2630 // of T2 as if it was used as a source.
2645 _set_dest_nonkillable(); 2631 _set_dest_nonkillable();
2646 _mov(DestLo, T2); 2632 _mov(DestLo, T2);
2647 Variable *T3 = nullptr; 2633 Variable *T3 = nullptr;
2648 _mov(T3, Zero); 2634 _mov(T3, Zero);
2649 _mov(DestHi, T3); 2635 _mov(DestHi, T3);
2650 return; 2636 return;
2651 } 2637 }
2652 _mov(Dest, T); 2638 _mov(Dest, T);
2653 return; 2639 return;
2654 } 2640 }
2655 2641
2656 void TargetARM32::lowerLoad(const InstLoad *Load) { 2642 void TargetARM32::lowerLoad(const InstLoad *Load) {
2657 // A Load instruction can be treated the same as an Assign 2643 // A Load instruction can be treated the same as an Assign instruction, after
2658 // instruction, after the source operand is transformed into an 2644 // the source operand is transformed into an OperandARM32Mem operand.
2659 // OperandARM32Mem operand.
2660 Type Ty = Load->getDest()->getType(); 2645 Type Ty = Load->getDest()->getType();
2661 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty); 2646 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
2662 Variable *DestLoad = Load->getDest(); 2647 Variable *DestLoad = Load->getDest();
2663 2648
2664 // TODO(jvoung): handled folding opportunities. Sign and zero extension 2649 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
2665 // can be folded into a load. 2650 // be folded into a load.
2666 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0); 2651 InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
2667 lowerAssign(Assign); 2652 lowerAssign(Assign);
2668 } 2653 }
2669 2654
2670 void TargetARM32::doAddressOptLoad() { 2655 void TargetARM32::doAddressOptLoad() {
2671 UnimplementedError(Func->getContext()->getFlags()); 2656 UnimplementedError(Func->getContext()->getFlags());
2672 } 2657 }
2673 2658
2674 void TargetARM32::randomlyInsertNop(float Probability, 2659 void TargetARM32::randomlyInsertNop(float Probability,
2675 RandomNumberGenerator &RNG) { 2660 RandomNumberGenerator &RNG) {
(...skipping 25 matching lines...) Expand all
2701 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0); 2686 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
2702 Reg = D0; 2687 Reg = D0;
2703 } else if (isVectorType(Src0->getType())) { 2688 } else if (isVectorType(Src0->getType())) {
2704 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0); 2689 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
2705 Reg = Q0; 2690 Reg = Q0;
2706 } else { 2691 } else {
2707 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex); 2692 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
2708 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0); 2693 _mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
2709 } 2694 }
2710 } 2695 }
2711 // Add a ret instruction even if sandboxing is enabled, because 2696 // Add a ret instruction even if sandboxing is enabled, because addEpilog
2712 // addEpilog explicitly looks for a ret instruction as a marker for 2697 // explicitly looks for a ret instruction as a marker for where to insert the
2713 // where to insert the frame removal instructions. 2698 // frame removal instructions. addEpilog is responsible for restoring the
2714 // addEpilog is responsible for restoring the "lr" register as needed 2699 // "lr" register as needed prior to this ret instruction.
2715 // prior to this ret instruction.
2716 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); 2700 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
2717 // Add a fake use of sp to make sure sp stays alive for the entire 2701 // Add a fake use of sp to make sure sp stays alive for the entire function.
2718 // function. Otherwise post-call sp adjustments get dead-code 2702 // Otherwise post-call sp adjustments get dead-code eliminated.
2719 // eliminated. TODO: Are there more places where the fake use 2703 // TODO: Are there more places where the fake use should be inserted? E.g.
2720 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 2704 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
2721 // have a ret instruction.
2722 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); 2705 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2723 Context.insert(InstFakeUse::create(Func, SP)); 2706 Context.insert(InstFakeUse::create(Func, SP));
2724 } 2707 }
2725 2708
2726 void TargetARM32::lowerSelect(const InstSelect *Inst) { 2709 void TargetARM32::lowerSelect(const InstSelect *Inst) {
2727 Variable *Dest = Inst->getDest(); 2710 Variable *Dest = Inst->getDest();
2728 Type DestTy = Dest->getType(); 2711 Type DestTy = Dest->getType();
2729 Operand *SrcT = Inst->getTrueOperand(); 2712 Operand *SrcT = Inst->getTrueOperand();
2730 Operand *SrcF = Inst->getFalseOperand(); 2713 Operand *SrcF = Inst->getFalseOperand();
2731 Operand *Condition = Inst->getCondition(); 2714 Operand *Condition = Inst->getCondition();
(...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after
2845 } 2828 }
2846 2829
2847 // Helper for legalize() to emit the right code to lower an operand to a 2830 // Helper for legalize() to emit the right code to lower an operand to a
2848 // register of the appropriate type. 2831 // register of the appropriate type.
2849 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) { 2832 Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
2850 Type Ty = Src->getType(); 2833 Type Ty = Src->getType();
2851 Variable *Reg = makeReg(Ty, RegNum); 2834 Variable *Reg = makeReg(Ty, RegNum);
2852 if (isVectorType(Ty) || isFloatingType(Ty)) { 2835 if (isVectorType(Ty) || isFloatingType(Ty)) {
2853 _vmov(Reg, Src); 2836 _vmov(Reg, Src);
2854 } else { 2837 } else {
2855 // Mov's Src operand can really only be the flexible second operand type 2838 // Mov's Src operand can really only be the flexible second operand type or
2856 // or a register. Users should guarantee that. 2839 // a register. Users should guarantee that.
2857 _mov(Reg, Src); 2840 _mov(Reg, Src);
2858 } 2841 }
2859 return Reg; 2842 return Reg;
2860 } 2843 }
2861 2844
2862 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed, 2845 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
2863 int32_t RegNum) { 2846 int32_t RegNum) {
2864 Type Ty = From->getType(); 2847 Type Ty = From->getType();
2865 // Assert that a physical register is allowed. To date, all calls 2848 // Assert that a physical register is allowed. To date, all calls to
2866 // to legalize() allow a physical register. Legal_Flex converts 2849 // legalize() allow a physical register. Legal_Flex converts registers to the
2867 // registers to the right type OperandARM32FlexReg as needed. 2850 // right type OperandARM32FlexReg as needed.
2868 assert(Allowed & Legal_Reg); 2851 assert(Allowed & Legal_Reg);
2869 // Go through the various types of operands: 2852 // Go through the various types of operands: OperandARM32Mem,
2870 // OperandARM32Mem, OperandARM32Flex, Constant, and Variable. 2853 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
2871 // Given the above assertion, if type of operand is not legal 2854 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
2872 // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy 2855 // can always copy to a register.
2873 // to a register.
2874 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) { 2856 if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
2875 // Before doing anything with a Mem operand, we need to ensure 2857 // Before doing anything with a Mem operand, we need to ensure that the
2876 // that the Base and Index components are in physical registers. 2858 // Base and Index components are in physical registers.
2877 Variable *Base = Mem->getBase(); 2859 Variable *Base = Mem->getBase();
2878 Variable *Index = Mem->getIndex(); 2860 Variable *Index = Mem->getIndex();
2879 Variable *RegBase = nullptr; 2861 Variable *RegBase = nullptr;
2880 Variable *RegIndex = nullptr; 2862 Variable *RegIndex = nullptr;
2881 if (Base) { 2863 if (Base) {
2882 RegBase = legalizeToReg(Base); 2864 RegBase = legalizeToReg(Base);
2883 } 2865 }
2884 if (Index) { 2866 if (Index) {
2885 RegIndex = legalizeToReg(Index); 2867 RegIndex = legalizeToReg(Index);
2886 } 2868 }
(...skipping 24 matching lines...) Expand all
2911 From = Mem; 2893 From = Mem;
2912 } 2894 }
2913 return From; 2895 return From;
2914 } 2896 }
2915 2897
2916 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) { 2898 if (auto Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
2917 if (!(Allowed & Legal_Flex)) { 2899 if (!(Allowed & Legal_Flex)) {
2918 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) { 2900 if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
2919 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) { 2901 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
2920 From = FlexReg->getReg(); 2902 From = FlexReg->getReg();
2921 // Fall through and let From be checked as a Variable below, 2903 // Fall through and let From be checked as a Variable below, where it
2922 // where it may or may not need a register. 2904 // may or may not need a register.
2923 } else { 2905 } else {
2924 return copyToReg(Flex, RegNum); 2906 return copyToReg(Flex, RegNum);
2925 } 2907 }
2926 } else { 2908 } else {
2927 return copyToReg(Flex, RegNum); 2909 return copyToReg(Flex, RegNum);
2928 } 2910 }
2929 } else { 2911 } else {
2930 return From; 2912 return From;
2931 } 2913 }
2932 } 2914 }
2933 2915
2934 if (llvm::isa<Constant>(From)) { 2916 if (llvm::isa<Constant>(From)) {
2935 if (llvm::isa<ConstantUndef>(From)) { 2917 if (llvm::isa<ConstantUndef>(From)) {
2936 From = legalizeUndef(From, RegNum); 2918 From = legalizeUndef(From, RegNum);
2937 if (isVectorType(Ty)) 2919 if (isVectorType(Ty))
2938 return From; 2920 return From;
2939 } 2921 }
2940 // There should be no constants of vector type (other than undef). 2922 // There should be no constants of vector type (other than undef).
2941 assert(!isVectorType(Ty)); 2923 assert(!isVectorType(Ty));
2942 bool CanBeFlex = Allowed & Legal_Flex; 2924 bool CanBeFlex = Allowed & Legal_Flex;
2943 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) { 2925 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
2944 uint32_t RotateAmt; 2926 uint32_t RotateAmt;
2945 uint32_t Immed_8; 2927 uint32_t Immed_8;
2946 uint32_t Value = static_cast<uint32_t>(C32->getValue()); 2928 uint32_t Value = static_cast<uint32_t>(C32->getValue());
2947 // Check if the immediate will fit in a Flexible second operand, 2929 // Check if the immediate will fit in a Flexible second operand, if a
2948 // if a Flexible second operand is allowed. We need to know the exact 2930 // Flexible second operand is allowed. We need to know the exact value,
2949 // value, so that rules out relocatable constants. 2931 // so that rules out relocatable constants. Also try the inverse and use
2950 // Also try the inverse and use MVN if possible. 2932 // MVN if possible.
2951 if (CanBeFlex && 2933 if (CanBeFlex &&
2952 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) { 2934 OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
2953 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 2935 return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
2954 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm( 2936 } else if (CanBeFlex && OperandARM32FlexImm::canHoldImm(
2955 ~Value, &RotateAmt, &Immed_8)) { 2937 ~Value, &RotateAmt, &Immed_8)) {
2956 auto InvertedFlex = 2938 auto InvertedFlex =
2957 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt); 2939 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
2958 Variable *Reg = makeReg(Ty, RegNum); 2940 Variable *Reg = makeReg(Ty, RegNum);
2959 _mvn(Reg, InvertedFlex); 2941 _mvn(Reg, InvertedFlex);
2960 return Reg; 2942 return Reg;
2961 } else { 2943 } else {
2962 // Do a movw/movt to a register. 2944 // Do a movw/movt to a register.
2963 Variable *Reg = makeReg(Ty, RegNum); 2945 Variable *Reg = makeReg(Ty, RegNum);
2964 uint32_t UpperBits = (Value >> 16) & 0xFFFF; 2946 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
2965 _movw(Reg, 2947 _movw(Reg,
2966 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32); 2948 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
2967 if (UpperBits != 0) { 2949 if (UpperBits != 0) {
2968 _movt(Reg, Ctx->getConstantInt32(UpperBits)); 2950 _movt(Reg, Ctx->getConstantInt32(UpperBits));
2969 } 2951 }
2970 return Reg; 2952 return Reg;
2971 } 2953 }
2972 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) { 2954 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
2973 Variable *Reg = makeReg(Ty, RegNum); 2955 Variable *Reg = makeReg(Ty, RegNum);
2974 _movw(Reg, C); 2956 _movw(Reg, C);
2975 _movt(Reg, C); 2957 _movt(Reg, C);
2976 return Reg; 2958 return Reg;
2977 } else { 2959 } else {
2978 assert(isScalarFloatingType(Ty)); 2960 assert(isScalarFloatingType(Ty));
2979 // Load floats/doubles from literal pool. 2961 // Load floats/doubles from literal pool.
2980 // TODO(jvoung): Allow certain immediates to be encoded directly in 2962 // TODO(jvoung): Allow certain immediates to be encoded directly in an
2981 // an operand. See Table A7-18 of the ARM manual: 2963 // operand. See Table A7-18 of the ARM manual: "Floating-point modified
2982 // "Floating-point modified immediate constants". 2964 // immediate constants". Or, for 32-bit floating point numbers, just
2983 // Or, for 32-bit floating point numbers, just encode the raw bits 2965 // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,
2984 // into a movw/movt pair to GPR, and vmov to an SREG, instead of using 2966 // instead of using a movw/movt pair to get the const-pool address then
2985 // a movw/movt pair to get the const-pool address then loading to SREG. 2967 // loading to SREG.
2986 std::string Buffer; 2968 std::string Buffer;
2987 llvm::raw_string_ostream StrBuf(Buffer); 2969 llvm::raw_string_ostream StrBuf(Buffer);
2988 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf); 2970 llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
2989 llvm::cast<Constant>(From)->setShouldBePooled(true); 2971 llvm::cast<Constant>(From)->setShouldBePooled(true);
2990 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true); 2972 Constant *Offset = Ctx->getConstantSym(0, StrBuf.str(), true);
2991 Variable *BaseReg = makeReg(getPointerType()); 2973 Variable *BaseReg = makeReg(getPointerType());
2992 _movw(BaseReg, Offset); 2974 _movw(BaseReg, Offset);
2993 _movt(BaseReg, Offset); 2975 _movt(BaseReg, Offset);
2994 From = formMemoryOperand(BaseReg, Ty); 2976 From = formMemoryOperand(BaseReg, Ty);
2995 return copyToReg(From, RegNum); 2977 return copyToReg(From, RegNum);
2996 } 2978 }
2997 } 2979 }
2998 2980
2999 if (auto Var = llvm::dyn_cast<Variable>(From)) { 2981 if (auto Var = llvm::dyn_cast<Variable>(From)) {
3000 // Check if the variable is guaranteed a physical register. This 2982 // Check if the variable is guaranteed a physical register. This can happen
3001 // can happen either when the variable is pre-colored or when it is 2983 // either when the variable is pre-colored or when it is assigned infinite
3002 // assigned infinite weight. 2984 // weight.
3003 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg()); 2985 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
3004 // We need a new physical register for the operand if: 2986 // We need a new physical register for the operand if:
3005 // Mem is not allowed and Var isn't guaranteed a physical 2987 // Mem is not allowed and Var isn't guaranteed a physical
3006 // register, or 2988 // register, or
3007 // RegNum is required and Var->getRegNum() doesn't match. 2989 // RegNum is required and Var->getRegNum() doesn't match.
3008 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) || 2990 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
3009 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { 2991 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
3010 From = copyToReg(From, RegNum); 2992 From = copyToReg(From, RegNum);
3011 } 2993 }
3012 return From; 2994 return From;
3013 } 2995 }
3014 llvm_unreachable("Unhandled operand kind in legalize()"); 2996 llvm_unreachable("Unhandled operand kind in legalize()");
3015 2997
3016 return From; 2998 return From;
3017 } 2999 }
3018 3000
3019 /// Provide a trivial wrapper to legalize() for this common usage. 3001 /// Provide a trivial wrapper to legalize() for this common usage.
3020 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) { 3002 Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) {
3021 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); 3003 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
3022 } 3004 }
3023 3005
3024 /// Legalize undef values to concrete values. 3006 /// Legalize undef values to concrete values.
3025 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) { 3007 Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
3026 Type Ty = From->getType(); 3008 Type Ty = From->getType();
3027 if (llvm::isa<ConstantUndef>(From)) { 3009 if (llvm::isa<ConstantUndef>(From)) {
3028 // Lower undefs to zero. Another option is to lower undefs to an 3010 // Lower undefs to zero. Another option is to lower undefs to an
3029 // uninitialized register; however, using an uninitialized register 3011 // uninitialized register; however, using an uninitialized register results
3030 // results in less predictable code. 3012 // in less predictable code.
3031 // 3013 //
3032 // If in the future the implementation is changed to lower undef 3014 // If in the future the implementation is changed to lower undef values to
3033 // values to uninitialized registers, a FakeDef will be needed: 3015 // uninitialized registers, a FakeDef will be needed:
3034 // Context.insert(InstFakeDef::create(Func, Reg)); 3016 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
3035 // This is in order to ensure that the live range of Reg is not 3017 // ensure that the live range of Reg is not overestimated. If the constant
3036 // overestimated. If the constant being lowered is a 64 bit value, 3018 // being lowered is a 64 bit value, then the result should be split and the
3037 // then the result should be split and the lo and hi components will 3019 // lo and hi components will need to go in uninitialized registers.
3038 // need to go in uninitialized registers.
3039 if (isVectorType(Ty)) 3020 if (isVectorType(Ty))
3040 return makeVectorOfZeros(Ty, RegNum); 3021 return makeVectorOfZeros(Ty, RegNum);
3041 return Ctx->getConstantZero(Ty); 3022 return Ctx->getConstantZero(Ty);
3042 } 3023 }
3043 return From; 3024 return From;
3044 } 3025 }
3045 3026
3046 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) { 3027 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
3047 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand); 3028 OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
3048 // It may be the case that address mode optimization already creates 3029 // It may be the case that address mode optimization already creates an
3049 // an OperandARM32Mem, so in that case it wouldn't need another level 3030 // OperandARM32Mem, so in that case it wouldn't need another level of
3050 // of transformation. 3031 // transformation.
3051 if (Mem) { 3032 if (Mem) {
3052 return llvm::cast<OperandARM32Mem>(legalize(Mem)); 3033 return llvm::cast<OperandARM32Mem>(legalize(Mem));
3053 } 3034 }
3054 // If we didn't do address mode optimization, then we only 3035 // If we didn't do address mode optimization, then we only have a base/offset
3055 // have a base/offset to work with. ARM always requires a base 3036 // to work with. ARM always requires a base register, so just use that to
3056 // register, so just use that to hold the operand. 3037 // hold the operand.
3057 Variable *Base = legalizeToReg(Operand); 3038 Variable *Base = legalizeToReg(Operand);
3058 return OperandARM32Mem::create( 3039 return OperandARM32Mem::create(
3059 Func, Ty, Base, 3040 Func, Ty, Base,
3060 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))); 3041 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
3061 } 3042 }
3062 3043
3063 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) { 3044 Variable *TargetARM32::makeReg(Type Type, int32_t RegNum) {
3064 // There aren't any 64-bit integer registers for ARM32. 3045 // There aren't any 64-bit integer registers for ARM32.
3065 assert(Type != IceType_i64); 3046 assert(Type != IceType_i64);
3066 Variable *Reg = Func->makeVariable(Type); 3047 Variable *Reg = Func->makeVariable(Type);
3067 if (RegNum == Variable::NoRegister) 3048 if (RegNum == Variable::NoRegister)
3068 Reg->setMustHaveReg(); 3049 Reg->setMustHaveReg();
3069 else 3050 else
3070 Reg->setRegNum(RegNum); 3051 Reg->setRegNum(RegNum);
3071 return Reg; 3052 return Reg;
3072 } 3053 }
3073 3054
3074 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) { 3055 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
3075 assert(llvm::isPowerOf2_32(Align)); 3056 assert(llvm::isPowerOf2_32(Align));
3076 uint32_t RotateAmt; 3057 uint32_t RotateAmt;
3077 uint32_t Immed_8; 3058 uint32_t Immed_8;
3078 Operand *Mask; 3059 Operand *Mask;
3079 // Use AND or BIC to mask off the bits, depending on which immediate fits 3060 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
3080 // (if it fits at all). Assume Align is usually small, in which case BIC 3061 // it fits at all). Assume Align is usually small, in which case BIC works
3081 // works better. Thus, this rounds down to the alignment. 3062 // better. Thus, this rounds down to the alignment.
3082 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) { 3063 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
3083 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex); 3064 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
3084 _bic(Reg, Reg, Mask); 3065 _bic(Reg, Reg, Mask);
3085 } else { 3066 } else {
3086 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex); 3067 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex);
3087 _and(Reg, Reg, Mask); 3068 _and(Reg, Reg, Mask);
3088 } 3069 }
3089 } 3070 }
3090 3071
3091 void TargetARM32::postLower() { 3072 void TargetARM32::postLower() {
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after
3163 UnimplementedError(Ctx->getFlags()); 3144 UnimplementedError(Ctx->getFlags());
3164 } 3145 }
3165 3146
3166 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx) 3147 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
3167 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {} 3148 : TargetHeaderLowering(Ctx), CPUFeatures(Ctx->getFlags()) {}
3168 3149
3169 void TargetHeaderARM32::lower() { 3150 void TargetHeaderARM32::lower() {
3170 OstreamLocker L(Ctx); 3151 OstreamLocker L(Ctx);
3171 Ostream &Str = Ctx->getStrEmit(); 3152 Ostream &Str = Ctx->getStrEmit();
3172 Str << ".syntax unified\n"; 3153 Str << ".syntax unified\n";
3173 // Emit build attributes in format: .eabi_attribute TAG, VALUE. 3154 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
3174 // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture" 3155 // "Addenda to, and Errata in the ABI for the ARM architecture"
3175 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_adde nda.pdf 3156 // http://infocenter.arm.com
3157 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
3176 // 3158 //
3177 // Tag_conformance should be be emitted first in a file-scope 3159 // Tag_conformance should be be emitted first in a file-scope sub-subsection
3178 // sub-subsection of the first public subsection of the attributes. 3160 // of the first public subsection of the attributes.
3179 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n"; 3161 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
3180 // Chromebooks are at least A15, but do A9 for higher compat. 3162 // Chromebooks are at least A15, but do A9 for higher compat. For some
3181 // For some reason, the LLVM ARM asm parser has the .cpu directive override 3163 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
3182 // the mattr specified on the commandline. So to test hwdiv, we need to set 3164 // specified on the commandline. So to test hwdiv, we need to set the .cpu
3183 // the .cpu directive higher (can't just rely on --mattr=...). 3165 // directive higher (can't just rely on --mattr=...).
3184 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 3166 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3185 Str << ".cpu cortex-a15\n"; 3167 Str << ".cpu cortex-a15\n";
3186 } else { 3168 } else {
3187 Str << ".cpu cortex-a9\n"; 3169 Str << ".cpu cortex-a9\n";
3188 } 3170 }
3189 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n" 3171 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
3190 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n"; 3172 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
3191 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n" 3173 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
3192 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n"; 3174 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
3193 Str << ".fpu neon\n" 3175 Str << ".fpu neon\n"
(...skipping 11 matching lines...) Expand all
3205 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n"; 3187 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
3206 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) { 3188 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
3207 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n"; 3189 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
3208 } 3190 }
3209 // Technically R9 is used for TLS with Sandboxing, and we reserve it. 3191 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
3210 // However, for compatibility with current NaCl LLVM, don't claim that. 3192 // However, for compatibility with current NaCl LLVM, don't claim that.
3211 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n"; 3193 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
3212 } 3194 }
3213 3195
3214 } // end of namespace Ice 3196 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698