OLD | NEW |
---|---|
(Empty) | |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | |
2 // | |
3 // The Subzero Code Generator | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This file implements the TargetLoweringX8632 class, which | |
11 // consists almost entirely of the lowering sequence for each | |
12 // high-level instruction. It also implements | |
13 // TargetX8632Fast::postLower() which does the simplest possible | |
14 // register allocation for the "fast" target. | |
15 // | |
16 //===----------------------------------------------------------------------===// | |
17 | |
18 #include "IceDefs.h" | |
19 #include "IceCfg.h" | |
20 #include "IceCfgNode.h" | |
21 #include "IceInstX8632.h" | |
22 #include "IceOperand.h" | |
23 #include "IceTargetLoweringX8632.def" | |
24 #include "IceTargetLoweringX8632.h" | |
25 | |
26 namespace Ice { | |
27 | |
28 namespace { | |
29 | |
30 // The following table summarizes the logic for lowering the fcmp instruction. | |
31 // There is one table entry for each of the 16 conditions. A comment in | |
32 // lowerFcmp() describes the lowering template. In the most general case, there | |
33 // is a compare followed by two conditional branches, because some fcmp | |
34 // conditions don't map to a single x86 conditional branch. However, in many | |
35 // cases it is possible to swap the operands in the comparison and have a single | |
36 // conditional branch. Since it's quite tedious to validate the table by hand, | |
37 // good execution tests are helpful. | |
38 | |
39 const struct TableFcmp_ { | |
40 uint32_t Default; | |
41 bool SwapOperands; | |
42 InstX8632Br::BrCond C1, C2; | |
43 } TableFcmp[] = { | |
44 #define X(val, dflt, swap, C1, C2) \ | |
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \ | |
46 , | |
47 FCMPX8632_TABLE | |
48 #undef X | |
49 }; | |
50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | |
51 | |
52 // The following table summarizes the logic for lowering the icmp instruction | |
53 // for i32 and narrower types. Each icmp condition has a clear mapping to an | |
54 // x86 conditional branch instruction. | |
55 | |
56 const struct TableIcmp32_ { | |
57 InstX8632Br::BrCond Mapping; | |
58 } TableIcmp32[] = { | |
59 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
60 { InstX8632Br::C_32 } \ | |
61 , | |
62 ICMPX8632_TABLE | |
63 #undef X | |
64 }; | |
65 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); | |
66 | |
67 // The following table summarizes the logic for lowering the icmp instruction | |
68 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | |
69 // conditional branches are needed. For the other conditions, three separate | |
70 // conditional branches are needed. | |
71 const struct TableIcmp64_ { | |
72 InstX8632Br::BrCond C1, C2, C3; | |
73 } TableIcmp64[] = { | |
74 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \ | |
76 , | |
77 ICMPX8632_TABLE | |
78 #undef X | |
79 }; | |
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); | |
81 | |
82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | |
83 size_t Index = static_cast<size_t>(Cond); | |
84 assert(Index < TableIcmp32Size); | |
85 return TableIcmp32[Index].Mapping; | |
86 } | |
87 | |
88 // In some cases, there are x-macros tables for both high-level and | |
89 // low-level instructions/operands that use the same enum key value. | |
90 // The tables are kept separate to maintain a proper separation | |
91 // between abstraction layers. There is a risk that the tables | |
92 // could get out of sync if enum values are reordered or if entries | |
93 // are added or deleted. This dummy function uses static_assert to | |
94 // ensure everything is kept in sync. | |
95 void xMacroIntegrityCheck() { | |
96 // Validate the enum values in FCMPX8632_TABLE. | |
97 { | |
98 // Define a temporary set of enum values based on low-level | |
99 // table entries. | |
100 enum _tmp_enum { | |
101 #define X(val, dflt, swap, C1, C2) _tmp_##val, | |
102 FCMPX8632_TABLE | |
103 #undef X | |
104 }; | |
105 // Define a set of constants based on high-level table entries. | |
106 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; | |
107 ICEINSTFCMP_TABLE; | |
108 #undef X | |
109 // Define a set of constants based on low-level table entries, | |
110 // and ensure the table entry keys are consistent. | |
111 #define X(val, dflt, swap, C1, C2) \ | |
112 static const int _table2_##val = _tmp_##val; \ | |
113 STATIC_ASSERT(_table1_##val == _table2_##val); | |
114 FCMPX8632_TABLE; | |
115 #undef X | |
116 // Repeat the static asserts with respect to the high-level | |
117 // table entries in case the high-level table has extra entries. | |
118 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
119 ICEINSTFCMP_TABLE; | |
120 #undef X | |
121 } | |
122 | |
123 // Validate the enum values in ICMPX8632_TABLE. | |
124 { | |
125 // Define a temporary set of enum values based on low-level | |
126 // table entries. | |
127 enum _tmp_enum { | |
128 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, | |
129 ICMPX8632_TABLE | |
130 #undef X | |
131 }; | |
132 // Define a set of constants based on high-level table entries. | |
133 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | |
134 ICEINSTICMP_TABLE; | |
135 #undef X | |
136 // Define a set of constants based on low-level table entries, | |
137 // and ensure the table entry keys are consistent. | |
138 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
139 static const int _table2_##val = _tmp_##val; \ | |
140 STATIC_ASSERT(_table1_##val == _table2_##val); | |
141 ICMPX8632_TABLE; | |
142 #undef X | |
143 // Repeat the static asserts with respect to the high-level | |
144 // table entries in case the high-level table has extra entries. | |
145 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
146 ICEINSTICMP_TABLE; | |
147 #undef X | |
148 } | |
149 | |
150 // Validate the enum values in ICETYPEX8632_TABLE. | |
151 { | |
152 // Define a temporary set of enum values based on low-level | |
153 // table entries. | |
154 enum _tmp_enum { | |
155 #define X(tag, cvt, sdss, width) _tmp_##tag, | |
156 ICETYPEX8632_TABLE | |
157 #undef X | |
158 }; | |
159 // Define a set of constants based on high-level table entries. | |
160 #define X(tag, size, align, str) static const int _table1_##tag = tag; | |
161 ICETYPE_TABLE; | |
162 #undef X | |
163 // Define a set of constants based on low-level table entries, | |
164 // and ensure the table entry keys are consistent. | |
165 #define X(tag, cvt, sdss, width) \ | |
166 static const int _table2_##tag = _tmp_##tag; \ | |
167 STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
168 ICETYPEX8632_TABLE; | |
169 #undef X | |
170 // Repeat the static asserts with respect to the high-level | |
171 // table entries in case the high-level table has extra entries. | |
172 #define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
173 ICETYPE_TABLE; | |
174 #undef X | |
175 } | |
176 } | |
177 | |
178 } // end of anonymous namespace | |
179 | |
180 TargetX8632::TargetX8632(Cfg *Func) | |
181 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0), | |
182 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), | |
183 PhysicalRegisters(VarList(Reg_NUM)) { | |
184 // TODO: Don't initialize IntegerRegisters and friends every time. | |
185 // Instead, initialize in some sort of static initializer for the | |
186 // class. | |
187 llvm::SmallBitVector IntegerRegisters(Reg_NUM); | |
188 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); | |
189 llvm::SmallBitVector FloatRegisters(Reg_NUM); | |
190 llvm::SmallBitVector InvalidRegisters(Reg_NUM); | |
191 ScratchRegs.resize(Reg_NUM); | |
192 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
193 frameptr, isI8, isInt, isFP) \ | |
194 IntegerRegisters[val] = isInt; \ | |
195 IntegerRegistersI8[val] = isI8; \ | |
196 FloatRegisters[val] = isFP; \ | |
197 ScratchRegs[val] = scratch; | |
198 REGX8632_TABLE; | |
199 #undef X | |
200 TypeToRegisterSet[IceType_void] = InvalidRegisters; | |
201 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; | |
202 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; | |
203 TypeToRegisterSet[IceType_i16] = IntegerRegisters; | |
204 TypeToRegisterSet[IceType_i32] = IntegerRegisters; | |
205 TypeToRegisterSet[IceType_i64] = IntegerRegisters; | |
206 TypeToRegisterSet[IceType_f32] = FloatRegisters; | |
207 TypeToRegisterSet[IceType_f64] = FloatRegisters; | |
208 } | |
209 | |
210 void TargetX8632::translateOm1() { | |
211 GlobalContext *Context = Func->getContext(); | |
212 Ostream &Str = Context->getStrDump(); | |
213 Timer T_placePhiLoads; | |
214 Func->placePhiLoads(); | |
215 if (Func->hasError()) | |
216 return; | |
217 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()"); | |
218 Timer T_placePhiStores; | |
219 Func->placePhiStores(); | |
220 if (Func->hasError()) | |
221 return; | |
222 T_placePhiStores.printElapsedUs(Context, "placePhiStores()"); | |
223 Timer T_deletePhis; | |
224 Func->deletePhis(); | |
225 if (Func->hasError()) | |
226 return; | |
227 T_deletePhis.printElapsedUs(Context, "deletePhis()"); | |
228 if (Context->isVerbose()) { | |
229 Str << "================ After Phi lowering ================\n"; | |
230 Func->dump(); | |
231 } | |
232 | |
233 Timer T_genCode; | |
234 Func->genCode(); | |
235 if (Func->hasError()) | |
236 return; | |
237 T_genCode.printElapsedUs(Context, "genCode()"); | |
238 if (Context->isVerbose()) { | |
239 Str << "================ After initial x8632 codegen ================\n"; | |
240 Func->dump(); | |
241 } | |
242 | |
243 Timer T_genFrame; | |
244 Func->genFrame(); | |
245 if (Func->hasError()) | |
246 return; | |
247 T_genFrame.printElapsedUs(Context, "genFrame()"); | |
248 if (Context->isVerbose()) { | |
249 Str << "================ After stack frame mapping ================\n"; | |
250 Func->dump(); | |
251 } | |
252 } | |
253 | |
254 IceString TargetX8632::RegNames[] = { | |
255 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
256 frameptr, isI8, isInt, isFP) \ | |
257 name, | |
258 REGX8632_TABLE | |
259 #undef X | |
260 }; | |
261 | |
262 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) { | |
263 assert(RegNum < PhysicalRegisters.size()); | |
264 Variable *Reg = PhysicalRegisters[RegNum]; | |
265 if (Reg == NULL) { | |
266 CfgNode *Node = NULL; // NULL means multi-block lifetime | |
267 Reg = Func->makeVariable(IceType_i32, Node); | |
268 Reg->setRegNum(RegNum); | |
269 PhysicalRegisters[RegNum] = Reg; | |
270 } | |
271 return Reg; | |
272 } | |
273 | |
274 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { | |
275 assert(RegNum < Reg_NUM); | |
276 static IceString RegNames8[] = { | |
277 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
278 frameptr, isI8, isInt, isFP) \ | |
279 "" name8, | |
280 REGX8632_TABLE | |
281 #undef X | |
282 }; | |
283 static IceString RegNames16[] = { | |
284 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
285 frameptr, isI8, isInt, isFP) \ | |
286 "" name16, | |
287 REGX8632_TABLE | |
288 #undef X | |
289 }; | |
290 switch (Ty) { | |
291 case IceType_i1: | |
292 case IceType_i8: | |
293 return RegNames8[RegNum]; | |
294 case IceType_i16: | |
295 return RegNames16[RegNum]; | |
296 default: | |
297 return RegNames[RegNum]; | |
298 } | |
299 } | |
300 | |
301 void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const { | |
302 Ostream &Str = Ctx->getStrEmit(); | |
303 assert(Var->getLocalUseNode() == NULL || | |
304 Var->getLocalUseNode() == Func->getCurrentNode()); | |
305 if (Var->hasReg()) { | |
306 Str << getRegName(Var->getRegNum(), Var->getType()); | |
307 return; | |
308 } | |
309 Str << InstX8632::getWidthString(Var->getType()); | |
310 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32); | |
311 int32_t Offset = Var->getStackOffset() + getStackAdjustment(); | |
312 if (Offset) { | |
313 if (Offset > 0) | |
314 Str << "+"; | |
315 Str << Offset; | |
316 } | |
317 Str << "]"; | |
318 } | |
319 | |
320 // Helper function for addProlog(). Sets the frame offset for Arg, | |
321 // updates InArgsSizeBytes according to Arg's width, and generates an | |
322 // instruction to copy Arg into its assigned register if applicable. | |
323 // For an I64 arg that has been split into Lo and Hi components, it | |
324 // calls itself recursively on the components, taking care to handle | |
325 // Lo first because of the little-endian architecture. | |
326 void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr, | |
327 int32_t BasicFrameOffset, | |
328 int32_t &InArgsSizeBytes) { | |
329 Variable *Lo = Arg->getLo(); | |
330 Variable *Hi = Arg->getHi(); | |
331 Type Ty = Arg->getType(); | |
332 if (Lo && Hi && Ty == IceType_i64) { | |
333 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | |
334 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | |
335 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
336 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
337 return; | |
338 } | |
339 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | |
340 if (Arg->hasReg()) { | |
341 assert(Ty != IceType_i64); | |
342 OperandX8632Mem *Mem = OperandX8632Mem::create( | |
343 Func, Ty, FramePtr, | |
344 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | |
345 _mov(Arg, Mem); | |
346 } | |
347 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | |
348 } | |
349 | |
350 void TargetX8632::addProlog(CfgNode *Node) { | |
351 // If SimpleCoalescing is false, each variable without a register | |
352 // gets its own unique stack slot, which leads to large stack | |
353 // frames. If SimpleCoalescing is true, then each "global" variable | |
354 // without a register gets its own slot, but "local" variable slots | |
355 // are reused across basic blocks. E.g., if A and B are local to | |
356 // block 1 and C is local to block 2, then C may share a slot with A | |
357 // or B. | |
358 const bool SimpleCoalescing = true; | |
359 int32_t InArgsSizeBytes = 0; | |
360 int32_t RetIpSizeBytes = 4; | |
361 int32_t PreservedRegsSizeBytes = 0; | |
362 LocalsSizeBytes = 0; | |
363 Context.init(Node); | |
364 Context.setInsertPoint(Context.getCur()); | |
365 | |
366 // Determine stack frame offsets for each Variable without a | |
367 // register assignment. This can be done as one variable per stack | |
368 // slot. Or, do coalescing by running the register allocator again | |
369 // with an infinite set of registers (as a side effect, this gives | |
370 // variables a second chance at physical register assignment). | |
371 // | |
372 // A middle ground approach is to leverage sparsity and allocate one | |
373 // block of space on the frame for globals (variables with | |
374 // multi-block lifetime), and one block to share for locals | |
375 // (single-block lifetime). | |
376 | |
377 llvm::SmallBitVector CalleeSaves = | |
378 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
379 | |
380 int32_t GlobalsSize = 0; | |
381 std::vector<int> LocalsSize(Func->getNumNodes()); | |
382 | |
383 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and | |
384 // LocalsSizeBytes. | |
385 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | |
386 const VarList &Variables = Func->getVariables(); | |
387 const VarList &Args = Func->getArgs(); | |
388 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | |
389 I != E; ++I) { | |
390 Variable *Var = *I; | |
391 if (Var->hasReg()) { | |
392 RegsUsed[Var->getRegNum()] = true; | |
393 continue; | |
394 } | |
395 // An argument passed on the stack already has a stack slot. | |
396 if (Var->getIsArg()) | |
397 continue; | |
398 // A spill slot linked to a variable with a stack slot should reuse | |
399 // that stack slot. | |
400 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | |
401 if (Variable *Linked = Var->getPreferredRegister()) { | |
402 if (!Linked->hasReg()) | |
403 continue; | |
404 } | |
405 } | |
406 int32_t Increment = typeWidthInBytesOnStack(Var->getType()); | |
407 if (SimpleCoalescing) { | |
408 if (Var->isMultiblockLife()) { | |
409 GlobalsSize += Increment; | |
410 } else { | |
411 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); | |
412 LocalsSize[NodeIndex] += Increment; | |
413 if (LocalsSize[NodeIndex] > LocalsSizeBytes) | |
414 LocalsSizeBytes = LocalsSize[NodeIndex]; | |
415 } | |
416 } else { | |
417 LocalsSizeBytes += Increment; | |
418 } | |
419 } | |
420 LocalsSizeBytes += GlobalsSize; | |
421 | |
422 // Add push instructions for preserved registers. | |
423 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
424 if (CalleeSaves[i] && RegsUsed[i]) { | |
425 PreservedRegsSizeBytes += 4; | |
426 const bool SuppressStackAdjustment = true; | |
427 _push(getPhysicalRegister(i), SuppressStackAdjustment); | |
428 } | |
429 } | |
430 | |
431 // Generate "push ebp; mov ebp, esp" | |
432 if (IsEbpBasedFrame) { | |
433 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | |
434 .count() == 0); | |
435 PreservedRegsSizeBytes += 4; | |
436 Variable *ebp = getPhysicalRegister(Reg_ebp); | |
437 Variable *esp = getPhysicalRegister(Reg_esp); | |
438 const bool SuppressStackAdjustment = true; | |
439 _push(ebp, SuppressStackAdjustment); | |
440 _mov(ebp, esp); | |
441 } | |
442 | |
443 // Generate "sub esp, LocalsSizeBytes" | |
444 if (LocalsSizeBytes) | |
445 _sub(getPhysicalRegister(Reg_esp), | |
446 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | |
447 | |
448 resetStackAdjustment(); | |
449 | |
450 // Fill in stack offsets for args, and copy args into registers for | |
451 // those that were register-allocated. Args are pushed right to | |
452 // left, so Arg[0] is closest to the stack/frame pointer. | |
453 // | |
454 // TODO: Make this right for different width args, calling | |
455 // conventions, etc. For one thing, args passed in registers will | |
456 // need to be copied/shuffled to their home registers (the | |
457 // RegManager code may have some permutation logic to leverage), | |
458 // and if they have no home register, home space will need to be | |
459 // allocated on the stack to copy into. | |
460 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | |
461 int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; | |
462 if (!IsEbpBasedFrame) | |
463 BasicFrameOffset += LocalsSizeBytes; | |
464 for (SizeT i = 0; i < Args.size(); ++i) { | |
465 Variable *Arg = Args[i]; | |
466 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
467 } | |
468 | |
469 // Fill in stack offsets for locals. | |
470 int32_t TotalGlobalsSize = GlobalsSize; | |
471 GlobalsSize = 0; | |
472 LocalsSize.assign(LocalsSize.size(), 0); | |
473 int32_t NextStackOffset = 0; | |
474 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | |
475 I != E; ++I) { | |
476 Variable *Var = *I; | |
477 if (Var->hasReg()) { | |
478 RegsUsed[Var->getRegNum()] = true; | |
479 continue; | |
480 } | |
481 if (Var->getIsArg()) | |
482 continue; | |
483 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | |
484 if (Variable *Linked = Var->getPreferredRegister()) { | |
485 if (!Linked->hasReg()) { | |
486 // TODO: Make sure Linked has already been assigned a stack | |
487 // slot. | |
488 Var->setStackOffset(Linked->getStackOffset()); | |
489 continue; | |
490 } | |
491 } | |
492 } | |
493 int32_t Increment = typeWidthInBytesOnStack(Var->getType()); | |
494 if (SimpleCoalescing) { | |
495 if (Var->isMultiblockLife()) { | |
496 GlobalsSize += Increment; | |
497 NextStackOffset = GlobalsSize; | |
498 } else { | |
499 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); | |
500 LocalsSize[NodeIndex] += Increment; | |
501 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; | |
502 } | |
503 } else { | |
504 NextStackOffset += Increment; | |
505 } | |
506 if (IsEbpBasedFrame) | |
507 Var->setStackOffset(-NextStackOffset); | |
508 else | |
509 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); | |
510 } | |
511 this->FrameSizeLocals = NextStackOffset; | |
512 this->HasComputedFrame = true; | |
513 | |
514 if (Func->getContext()->isVerbose(IceV_Frame)) { | |
515 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes | |
516 << "\n" | |
517 << "InArgsSizeBytes=" << InArgsSizeBytes | |
518 << "\n" | |
519 << "PreservedRegsSizeBytes=" | |
520 << PreservedRegsSizeBytes << "\n"; | |
521 } | |
522 } | |
523 | |
524 void TargetX8632::addEpilog(CfgNode *Node) { | |
525 InstList &Insts = Node->getInsts(); | |
526 InstList::reverse_iterator RI, E; | |
527 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | |
528 if (llvm::isa<InstX8632Ret>(*RI)) | |
529 break; | |
530 } | |
531 if (RI == E) | |
532 return; | |
533 | |
534 // Convert the reverse_iterator position into its corresponding | |
535 // (forward) iterator position. | |
536 InstList::iterator InsertPoint = RI.base(); | |
537 --InsertPoint; | |
538 Context.init(Node); | |
539 Context.setInsertPoint(InsertPoint); | |
540 | |
541 Variable *esp = getPhysicalRegister(Reg_esp); | |
542 if (IsEbpBasedFrame) { | |
543 Variable *ebp = getPhysicalRegister(Reg_ebp); | |
544 _mov(esp, ebp); | |
545 _pop(ebp); | |
546 } else { | |
547 // add esp, LocalsSizeBytes | |
548 if (LocalsSizeBytes) | |
549 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | |
550 } | |
551 | |
552 // Add pop instructions for preserved registers. | |
553 llvm::SmallBitVector CalleeSaves = | |
554 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
555 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
556 SizeT j = CalleeSaves.size() - i - 1; | |
557 if (j == Reg_ebp && IsEbpBasedFrame) | |
558 continue; | |
559 if (CalleeSaves[j] && RegsUsed[j]) { | |
560 _pop(getPhysicalRegister(j)); | |
561 } | |
562 } | |
563 } | |
564 | |
565 void TargetX8632::split64(Variable *Var) { | |
566 switch (Var->getType()) { | |
567 default: | |
568 return; | |
569 case IceType_i64: | |
570 // TODO: Only consider F64 if we need to push each half when | |
571 // passing as an argument to a function call. Note that each half | |
572 // is still typed as I32. | |
573 case IceType_f64: | |
574 break; | |
575 } | |
576 Variable *Lo = Var->getLo(); | |
577 Variable *Hi = Var->getHi(); | |
578 if (Lo) { | |
579 assert(Hi); | |
580 return; | |
581 } | |
582 assert(Hi == NULL); | |
583 Lo = Func->makeVariable(IceType_i32, Context.getNode(), | |
584 Var->getName() + "__lo"); | |
585 Hi = Func->makeVariable(IceType_i32, Context.getNode(), | |
586 Var->getName() + "__hi"); | |
587 Var->setLoHi(Lo, Hi); | |
588 if (Var->getIsArg()) { | |
589 Lo->setIsArg(Func); | |
590 Hi->setIsArg(Func); | |
591 } | |
592 } | |
593 | |
594 Operand *TargetX8632::loOperand(Operand *Operand) { | |
595 assert(Operand->getType() == IceType_i64); | |
596 if (Operand->getType() != IceType_i64) | |
597 return Operand; | |
598 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | |
599 split64(Var); | |
600 return Var->getLo(); | |
601 } | |
602 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { | |
603 uint64_t Mask = (1ull << 32) - 1; | |
604 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask); | |
605 } | |
606 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | |
607 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), | |
608 Mem->getOffset(), Mem->getIndex(), | |
609 Mem->getShift()); | |
610 } | |
611 llvm_unreachable("Unsupported operand type"); | |
612 return NULL; | |
613 } | |
614 | |
615 Operand *TargetX8632::hiOperand(Operand *Operand) { | |
616 assert(Operand->getType() == IceType_i64); | |
617 if (Operand->getType() != IceType_i64) | |
618 return Operand; | |
619 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | |
620 split64(Var); | |
621 return Var->getHi(); | |
622 } | |
623 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { | |
624 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32); | |
625 } | |
626 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | |
627 Constant *Offset = Mem->getOffset(); | |
628 if (Offset == NULL) | |
629 Offset = Ctx->getConstantInt(IceType_i32, 4); | |
630 else if (ConstantInteger *IntOffset = | |
631 llvm::dyn_cast<ConstantInteger>(Offset)) { | |
632 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue()); | |
633 } else if (ConstantRelocatable *SymOffset = | |
634 llvm::dyn_cast<ConstantRelocatable>(Offset)) { | |
635 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(), | |
636 SymOffset->getName()); | |
637 } | |
638 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, | |
639 Mem->getIndex(), Mem->getShift()); | |
640 } | |
641 llvm_unreachable("Unsupported operand type"); | |
642 return NULL; | |
643 } | |
644 | |
645 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, | |
646 RegSetMask Exclude) const { | |
647 llvm::SmallBitVector Registers(Reg_NUM); | |
648 | |
649 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
650 frameptr, isI8, isInt, isFP) \ | |
651 if (scratch && (Include & RegSet_CallerSave)) \ | |
652 Registers[val] = true; \ | |
653 if (preserved && (Include & RegSet_CalleeSave)) \ | |
654 Registers[val] = true; \ | |
655 if (stackptr && (Include & RegSet_StackPointer)) \ | |
656 Registers[val] = true; \ | |
657 if (frameptr && (Include & RegSet_FramePointer)) \ | |
658 Registers[val] = true; \ | |
659 if (scratch && (Exclude & RegSet_CallerSave)) \ | |
660 Registers[val] = false; \ | |
661 if (preserved && (Exclude & RegSet_CalleeSave)) \ | |
662 Registers[val] = false; \ | |
663 if (stackptr && (Exclude & RegSet_StackPointer)) \ | |
664 Registers[val] = false; \ | |
665 if (frameptr && (Exclude & RegSet_FramePointer)) \ | |
666 Registers[val] = false; | |
667 | |
668 REGX8632_TABLE | |
669 | |
670 #undef X | |
671 | |
672 return Registers; | |
673 } | |
674 | |
675 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | |
676 IsEbpBasedFrame = true; | |
677 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize | |
678 // the number of adjustments of esp, etc. | |
679 Variable *esp = getPhysicalRegister(Reg_esp); | |
680 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | |
681 Variable *Dest = Inst->getDest(); | |
682 _sub(esp, TotalSize); | |
683 _mov(Dest, esp); | |
684 } | |
685 | |
686 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | |
687 Variable *Dest = Inst->getDest(); | |
688 Operand *Src0 = legalize(Inst->getSrc(0)); | |
689 Operand *Src1 = legalize(Inst->getSrc(1)); | |
690 if (Dest->getType() == IceType_i64) { | |
691 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
692 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
693 Operand *Src0Lo = loOperand(Src0); | |
694 Operand *Src0Hi = hiOperand(Src0); | |
695 Operand *Src1Lo = loOperand(Src1); | |
696 Operand *Src1Hi = hiOperand(Src1); | |
697 Variable *T_Lo = NULL, *T_Hi = NULL; | |
698 switch (Inst->getOp()) { | |
699 case InstArithmetic::Add: | |
700 _mov(T_Lo, Src0Lo); | |
701 _add(T_Lo, Src1Lo); | |
702 _mov(DestLo, T_Lo); | |
703 _mov(T_Hi, Src0Hi); | |
704 _adc(T_Hi, Src1Hi); | |
705 _mov(DestHi, T_Hi); | |
706 break; | |
707 case InstArithmetic::And: | |
708 _mov(T_Lo, Src0Lo); | |
709 _and(T_Lo, Src1Lo); | |
710 _mov(DestLo, T_Lo); | |
711 _mov(T_Hi, Src0Hi); | |
712 _and(T_Hi, Src1Hi); | |
713 _mov(DestHi, T_Hi); | |
714 break; | |
715 case InstArithmetic::Or: | |
716 _mov(T_Lo, Src0Lo); | |
717 _or(T_Lo, Src1Lo); | |
718 _mov(DestLo, T_Lo); | |
719 _mov(T_Hi, Src0Hi); | |
720 _or(T_Hi, Src1Hi); | |
721 _mov(DestHi, T_Hi); | |
722 break; | |
723 case InstArithmetic::Xor: | |
724 _mov(T_Lo, Src0Lo); | |
725 _xor(T_Lo, Src1Lo); | |
726 _mov(DestLo, T_Lo); | |
727 _mov(T_Hi, Src0Hi); | |
728 _xor(T_Hi, Src1Hi); | |
729 _mov(DestHi, T_Hi); | |
730 break; | |
731 case InstArithmetic::Sub: | |
732 _mov(T_Lo, Src0Lo); | |
733 _sub(T_Lo, Src1Lo); | |
734 _mov(DestLo, T_Lo); | |
735 _mov(T_Hi, Src0Hi); | |
736 _sbb(T_Hi, Src1Hi); | |
737 _mov(DestHi, T_Hi); | |
738 break; | |
739 case InstArithmetic::Mul: { | |
740 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
741 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax); | |
742 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx); | |
743 // gcc does the following: | |
744 // a=b*c ==> | |
745 // t1 = b.hi; t1 *=(imul) c.lo | |
746 // t2 = c.hi; t2 *=(imul) b.lo | |
747 // t3:eax = b.lo | |
748 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo | |
749 // a.lo = t4.lo | |
750 // t4.hi += t1 | |
751 // t4.hi += t2 | |
752 // a.hi = t4.hi | |
753 _mov(T_1, Src0Hi); | |
754 _imul(T_1, Src1Lo); | |
755 _mov(T_2, Src1Hi); | |
756 _imul(T_2, Src0Lo); | |
757 _mov(T_3, Src0Lo, Reg_eax); | |
758 _mul(T_4Lo, T_3, Src1Lo); | |
759 // The mul instruction produces two dest variables, edx:eax. We | |
760 // create a fake definition of edx to account for this. | |
761 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); | |
762 _mov(DestLo, T_4Lo); | |
763 _add(T_4Hi, T_1); | |
764 _add(T_4Hi, T_2); | |
765 _mov(DestHi, T_4Hi); | |
766 } break; | |
767 case InstArithmetic::Shl: { | |
768 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. | |
769 // gcc does the following: | |
770 // a=b<<c ==> | |
771 // t1:ecx = c.lo & 0xff | |
772 // t2 = b.lo | |
773 // t3 = b.hi | |
774 // t3 = shld t3, t2, t1 | |
775 // t2 = shl t2, t1 | |
776 // test t1, 0x20 | |
777 // je L1 | |
778 // use(t3) | |
779 // t3 = t2 | |
780 // t2 = 0 | |
781 // L1: | |
782 // a.lo = t2 | |
783 // a.hi = t3 | |
784 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
785 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); | |
786 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
787 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
788 _mov(T_1, Src1Lo, Reg_ecx); | |
789 _mov(T_2, Src0Lo); | |
790 _mov(T_3, Src0Hi); | |
791 _shld(T_3, T_2, T_1); | |
792 _shl(T_2, T_1); | |
793 _test(T_1, BitTest); | |
794 _br(InstX8632Br::Br_e, Label); | |
795 // Because of the intra-block control flow, we need to fake a use | |
796 // of T_3 to prevent its earlier definition from being dead-code | |
797 // eliminated in the presence of its later definition. | |
798 Context.insert(InstFakeUse::create(Func, T_3)); | |
799 _mov(T_3, T_2); | |
800 _mov(T_2, Zero); | |
801 Context.insert(Label); | |
802 _mov(DestLo, T_2); | |
803 _mov(DestHi, T_3); | |
804 } break; | |
805 case InstArithmetic::Lshr: { | |
806 // a=b>>c (unsigned) ==> | |
807 // t1:ecx = c.lo & 0xff | |
808 // t2 = b.lo | |
809 // t3 = b.hi | |
810 // t2 = shrd t2, t3, t1 | |
811 // t3 = shr t3, t1 | |
812 // test t1, 0x20 | |
813 // je L1 | |
814 // use(t2) | |
815 // t2 = t3 | |
816 // t3 = 0 | |
817 // L1: | |
818 // a.lo = t2 | |
819 // a.hi = t3 | |
820 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
821 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); | |
822 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
823 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
824 _mov(T_1, Src1Lo, Reg_ecx); | |
825 _mov(T_2, Src0Lo); | |
826 _mov(T_3, Src0Hi); | |
827 _shrd(T_2, T_3, T_1); | |
828 _shr(T_3, T_1); | |
829 _test(T_1, BitTest); | |
830 _br(InstX8632Br::Br_e, Label); | |
831 // Because of the intra-block control flow, we need to fake a use | |
832 // of T_3 to prevent its earlier definition from being dead-code | |
833 // eliminated in the presence of its later definition. | |
834 Context.insert(InstFakeUse::create(Func, T_2)); | |
835 _mov(T_2, T_3); | |
836 _mov(T_3, Zero); | |
837 Context.insert(Label); | |
838 _mov(DestLo, T_2); | |
839 _mov(DestHi, T_3); | |
840 } break; | |
841 case InstArithmetic::Ashr: { | |
842 // a=b>>c (signed) ==> | |
843 // t1:ecx = c.lo & 0xff | |
844 // t2 = b.lo | |
845 // t3 = b.hi | |
846 // t2 = shrd t2, t3, t1 | |
847 // t3 = sar t3, t1 | |
848 // test t1, 0x20 | |
849 // je L1 | |
850 // use(t2) | |
851 // t2 = t3 | |
852 // t3 = sar t3, 0x1f | |
853 // L1: | |
854 // a.lo = t2 | |
855 // a.hi = t3 | |
856 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
857 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); | |
858 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f); | |
859 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
860 _mov(T_1, Src1Lo, Reg_ecx); | |
861 _mov(T_2, Src0Lo); | |
862 _mov(T_3, Src0Hi); | |
863 _shrd(T_2, T_3, T_1); | |
864 _sar(T_3, T_1); | |
865 _test(T_1, BitTest); | |
866 _br(InstX8632Br::Br_e, Label); | |
867 // Because of the intra-block control flow, we need to fake a use | |
868 // of T_3 to prevent its earlier definition from being dead-code | |
869 // eliminated in the presence of its later definition. | |
870 Context.insert(InstFakeUse::create(Func, T_2)); | |
871 _mov(T_2, T_3); | |
872 _sar(T_3, SignExtend); | |
873 Context.insert(Label); | |
874 _mov(DestLo, T_2); | |
875 _mov(DestHi, T_3); | |
876 } break; | |
877 case InstArithmetic::Udiv: { | |
878 const SizeT MaxSrcs = 2; | |
879 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs); | |
880 Call->addArg(Inst->getSrc(0)); | |
881 Call->addArg(Inst->getSrc(1)); | |
882 lowerCall(Call); | |
883 } break; | |
884 case InstArithmetic::Sdiv: { | |
885 const SizeT MaxSrcs = 2; | |
886 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs); | |
887 Call->addArg(Inst->getSrc(0)); | |
888 Call->addArg(Inst->getSrc(1)); | |
889 lowerCall(Call); | |
890 } break; | |
891 case InstArithmetic::Urem: { | |
892 const SizeT MaxSrcs = 2; | |
893 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs); | |
894 Call->addArg(Inst->getSrc(0)); | |
895 Call->addArg(Inst->getSrc(1)); | |
896 lowerCall(Call); | |
897 } break; | |
898 case InstArithmetic::Srem: { | |
899 const SizeT MaxSrcs = 2; | |
900 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs); | |
901 Call->addArg(Inst->getSrc(0)); | |
902 Call->addArg(Inst->getSrc(1)); | |
903 lowerCall(Call); | |
904 } break; | |
905 case InstArithmetic::Fadd: | |
906 case InstArithmetic::Fsub: | |
907 case InstArithmetic::Fmul: | |
908 case InstArithmetic::Fdiv: | |
909 case InstArithmetic::Frem: | |
910 llvm_unreachable("FP instruction with i64 type"); | |
911 break; | |
912 } | |
913 } else { // Dest->getType() != IceType_i64 | |
914 Variable *T_edx = NULL; | |
915 Variable *T = NULL; | |
916 switch (Inst->getOp()) { | |
917 case InstArithmetic::Add: | |
918 _mov(T, Src0); | |
919 _add(T, Src1); | |
920 _mov(Dest, T); | |
921 break; | |
922 case InstArithmetic::And: | |
923 _mov(T, Src0); | |
924 _and(T, Src1); | |
925 _mov(Dest, T); | |
926 break; | |
927 case InstArithmetic::Or: | |
928 _mov(T, Src0); | |
929 _or(T, Src1); | |
930 _mov(Dest, T); | |
931 break; | |
932 case InstArithmetic::Xor: | |
933 _mov(T, Src0); | |
934 _xor(T, Src1); | |
935 _mov(Dest, T); | |
936 break; | |
937 case InstArithmetic::Sub: | |
938 _mov(T, Src0); | |
939 _sub(T, Src1); | |
940 _mov(Dest, T); | |
941 break; | |
942 case InstArithmetic::Mul: | |
943 // TODO: Optimize for llvm::isa<Constant>(Src1) | |
944 // TODO: Strength-reduce multiplications by a constant, | |
945 // particularly -1 and powers of 2. Advanced: use lea to | |
946 // multiply by 3, 5, 9. | |
947 // | |
948 // The 8-bit version of imul only allows the form "imul r/m8" | |
949 // where T must be in eax. | |
950 if (Dest->getType() == IceType_i8) | |
951 _mov(T, Src0, Reg_eax); | |
952 else | |
953 _mov(T, Src0); | |
954 _imul(T, Src1); | |
955 _mov(Dest, T); | |
956 break; | |
957 case InstArithmetic::Shl: | |
958 _mov(T, Src0); | |
959 if (!llvm::isa<Constant>(Src1)) | |
960 Src1 = legalizeToVar(Src1, false, Reg_ecx); | |
961 _shl(T, Src1); | |
962 _mov(Dest, T); | |
963 break; | |
964 case InstArithmetic::Lshr: | |
965 _mov(T, Src0); | |
966 if (!llvm::isa<Constant>(Src1)) | |
967 Src1 = legalizeToVar(Src1, false, Reg_ecx); | |
968 _shr(T, Src1); | |
969 _mov(Dest, T); | |
970 break; | |
971 case InstArithmetic::Ashr: | |
972 _mov(T, Src0); | |
973 if (!llvm::isa<Constant>(Src1)) | |
974 Src1 = legalizeToVar(Src1, false, Reg_ecx); | |
975 _sar(T, Src1); | |
976 _mov(Dest, T); | |
977 break; | |
978 case InstArithmetic::Udiv: | |
979 if (Dest->getType() == IceType_i8) { | |
980 Variable *T_ah = NULL; | |
981 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0); | |
982 _mov(T, Src0, Reg_eax); | |
983 _mov(T_ah, Zero, Reg_ah); | |
984 _div(T, Src1, T_ah); | |
985 _mov(Dest, T); | |
986 } else { | |
987 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
988 _mov(T, Src0, Reg_eax); | |
989 _mov(T_edx, Zero, Reg_edx); | |
990 _div(T, Src1, T_edx); | |
991 _mov(Dest, T); | |
992 } | |
993 break; | |
994 case InstArithmetic::Sdiv: | |
995 T_edx = makeReg(IceType_i32, Reg_edx); | |
996 _mov(T, Src0, Reg_eax); | |
997 _cdq(T_edx, T); | |
998 _idiv(T, Src1, T_edx); | |
999 _mov(Dest, T); | |
1000 break; | |
1001 case InstArithmetic::Urem: | |
1002 if (Dest->getType() == IceType_i8) { | |
1003 Variable *T_ah = NULL; | |
1004 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0); | |
1005 _mov(T, Src0, Reg_eax); | |
1006 _mov(T_ah, Zero, Reg_ah); | |
1007 _div(T_ah, Src1, T); | |
1008 _mov(Dest, T_ah); | |
1009 } else { | |
1010 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1011 _mov(T_edx, Zero, Reg_edx); | |
1012 _mov(T, Src0, Reg_eax); | |
1013 _div(T_edx, Src1, T); | |
1014 _mov(Dest, T_edx); | |
1015 } | |
1016 break; | |
1017 case InstArithmetic::Srem: | |
1018 T_edx = makeReg(IceType_i32, Reg_edx); | |
1019 _mov(T, Src0, Reg_eax); | |
1020 _cdq(T_edx, T); | |
1021 _idiv(T_edx, Src1, T); | |
1022 _mov(Dest, T_edx); | |
1023 break; | |
1024 case InstArithmetic::Fadd: | |
1025 _mov(T, Src0); | |
1026 _addss(T, Src1); | |
1027 _mov(Dest, T); | |
1028 break; | |
1029 case InstArithmetic::Fsub: | |
1030 _mov(T, Src0); | |
1031 _subss(T, Src1); | |
1032 _mov(Dest, T); | |
1033 break; | |
1034 case InstArithmetic::Fmul: | |
1035 _mov(T, Src0); | |
1036 _mulss(T, Src1); | |
1037 _mov(Dest, T); | |
1038 break; | |
1039 case InstArithmetic::Fdiv: | |
1040 _mov(T, Src0); | |
1041 _divss(T, Src1); | |
1042 _mov(Dest, T); | |
1043 break; | |
1044 case InstArithmetic::Frem: { | |
1045 const SizeT MaxSrcs = 2; | |
1046 Type Ty = Dest->getType(); | |
1047 InstCall *Call = | |
1048 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs); | |
1049 Call->addArg(Src0); | |
1050 Call->addArg(Src1); | |
1051 return lowerCall(Call); | |
1052 } break; | |
1053 } | |
1054 } | |
1055 } | |
1056 | |
1057 void TargetX8632::lowerAssign(const InstAssign *Inst) { | |
1058 Variable *Dest = Inst->getDest(); | |
1059 Operand *Src0 = Inst->getSrc(0); | |
1060 assert(Dest->getType() == Src0->getType()); | |
1061 if (Dest->getType() == IceType_i64) { | |
1062 Src0 = legalize(Src0); | |
1063 Operand *Src0Lo = loOperand(Src0); | |
1064 Operand *Src0Hi = hiOperand(Src0); | |
1065 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1066 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1067 Variable *T_Lo = NULL, *T_Hi = NULL; | |
1068 _mov(T_Lo, Src0Lo); | |
1069 _mov(DestLo, T_Lo); | |
1070 _mov(T_Hi, Src0Hi); | |
1071 _mov(DestHi, T_Hi); | |
1072 } else { | |
1073 const bool AllowOverlap = true; | |
1074 // RI is either a physical register or an immediate. | |
1075 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap); | |
1076 _mov(Dest, RI); | |
1077 } | |
1078 } | |
1079 | |
1080 void TargetX8632::lowerBr(const InstBr *Inst) { | |
1081 if (Inst->isUnconditional()) { | |
1082 _br(Inst->getTargetUnconditional()); | |
1083 } else { | |
1084 Operand *Src0 = legalize(Inst->getCondition()); | |
1085 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1086 _cmp(Src0, Zero); | |
1087 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | |
1088 } | |
1089 } | |
1090 | |
1091 void TargetX8632::lowerCall(const InstCall *Instr) { | |
1092 // Generate a sequence of push instructions, pushing right to left, | |
1093 // keeping track of stack offsets in case a push involves a stack | |
1094 // operand and we are using an esp-based frame. | |
1095 uint32_t StackOffset = 0; | |
1096 // TODO: If for some reason the call instruction gets dead-code | |
1097 // eliminated after lowering, we would need to ensure that the | |
1098 // pre-call push instructions and the post-call esp adjustment get | |
1099 // eliminated as well. | |
1100 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) { | |
1101 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1)); | |
1102 if (Arg->getType() == IceType_i64) { | |
1103 _push(hiOperand(Arg)); | |
1104 _push(loOperand(Arg)); | |
1105 } else if (Arg->getType() == IceType_f64) { | |
1106 // If the Arg turns out to be a memory operand, we need to push | |
1107 // 8 bytes, which requires two push instructions. This ends up | |
1108 // being somewhat clumsy in the current IR, so we use a | |
1109 // workaround. Force the operand into a (xmm) register, and | |
1110 // then push the register. An xmm register push is actually not | |
1111 // possible in x86, but the Push instruction emitter handles | |
1112 // this by decrementing the stack pointer and directly writing | |
1113 // the xmm register value. | |
1114 Variable *T = NULL; | |
1115 _mov(T, Arg); | |
1116 _push(T); | |
1117 } else { | |
1118 _push(Arg); | |
1119 } | |
1120 StackOffset += typeWidthInBytesOnStack(Arg->getType()); | |
1121 } | |
1122 // Generate the call instruction. Assign its result to a temporary | |
1123 // with high register allocation weight. | |
1124 Variable *Dest = Instr->getDest(); | |
1125 Variable *eax = NULL; // doubles as RegLo as necessary | |
1126 Variable *edx = NULL; | |
1127 if (Dest) { | |
1128 switch (Dest->getType()) { | |
1129 case IceType_NUM: | |
1130 llvm_unreachable("Invalid Call dest type"); | |
1131 break; | |
1132 case IceType_void: | |
1133 break; | |
1134 case IceType_i1: | |
1135 case IceType_i8: | |
1136 case IceType_i16: | |
1137 case IceType_i32: | |
1138 eax = makeReg(Dest->getType(), Reg_eax); | |
1139 break; | |
1140 case IceType_i64: | |
1141 eax = makeReg(IceType_i32, Reg_eax); | |
1142 edx = makeReg(IceType_i32, Reg_edx); | |
1143 break; | |
1144 case IceType_f32: | |
1145 case IceType_f64: | |
1146 // Leave eax==edx==NULL, and capture the result with the fstp | |
1147 // instruction. | |
1148 break; | |
1149 } | |
1150 } | |
1151 Operand *CallTarget = legalize(Instr->getCallTarget()); | |
1152 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget); | |
1153 Context.insert(NewCall); | |
1154 if (edx) | |
1155 Context.insert(InstFakeDef::create(Func, edx)); | |
1156 | |
1157 // Add the appropriate offset to esp. | |
1158 if (StackOffset) { | |
1159 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | |
1160 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); | |
1161 } | |
1162 | |
1163 // Insert a register-kill pseudo instruction. | |
1164 VarList KilledRegs; | |
1165 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { | |
1166 if (ScratchRegs[i]) | |
1167 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); | |
1168 } | |
1169 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); | |
1170 | |
1171 // Generate a FakeUse to keep the call live if necessary. | |
1172 if (Instr->hasSideEffects() && eax) { | |
1173 Inst *FakeUse = InstFakeUse::create(Func, eax); | |
1174 Context.insert(FakeUse); | |
1175 } | |
1176 | |
1177 // Generate Dest=eax assignment. | |
1178 if (Dest && eax) { | |
1179 if (edx) { | |
1180 split64(Dest); | |
1181 Variable *DestLo = Dest->getLo(); | |
1182 Variable *DestHi = Dest->getHi(); | |
1183 DestLo->setPreferredRegister(eax, false); | |
1184 DestHi->setPreferredRegister(edx, false); | |
1185 _mov(DestLo, eax); | |
1186 _mov(DestHi, edx); | |
1187 } else { | |
1188 Dest->setPreferredRegister(eax, false); | |
1189 _mov(Dest, eax); | |
1190 } | |
1191 } | |
1192 | |
1193 // Special treatment for an FP function which returns its result in | |
1194 // st(0). | |
1195 if (Dest && | |
1196 (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) { | |
1197 _fstp(Dest); | |
1198 // If Dest ends up being a physical xmm register, the fstp emit | |
1199 // code will route st(0) through a temporary stack slot. | |
1200 } | |
1201 } | |
1202 | |
1203 void TargetX8632::lowerCast(const InstCast *Inst) { | |
1204 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | |
1205 InstCast::OpKind CastKind = Inst->getCastKind(); | |
1206 Variable *Dest = Inst->getDest(); | |
1207 // Src0RM is the source operand legalized to physical register or memory, but | |
1208 // not immediate, since the relevant x86 native instructions don't allow an | |
1209 // immediate operand. If the operand is an immediate, we could consider | |
1210 // computing the strength-reduced result at translation time, but we're | |
1211 // unlikely to see something like that in the bitcode that the optimizer | |
1212 // wouldn't have already taken care of. | |
1213 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem, true); | |
1214 switch (CastKind) { | |
1215 default: | |
1216 Func->setError("Cast type not supported"); | |
1217 return; | |
1218 case InstCast::Sext: | |
1219 if (Dest->getType() == IceType_i64) { | |
1220 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | |
1221 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1222 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1223 Variable *T_Lo = makeReg(DestLo->getType()); | |
1224 if (Src0RM->getType() == IceType_i32) | |
1225 _mov(T_Lo, Src0RM); | |
1226 else | |
1227 _movsx(T_Lo, Src0RM); | |
1228 _mov(DestLo, T_Lo); | |
1229 Variable *T_Hi = NULL; | |
1230 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31); | |
1231 _mov(T_Hi, T_Lo); | |
1232 _sar(T_Hi, Shift); | |
1233 _mov(DestHi, T_Hi); | |
1234 } else { | |
1235 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and | |
1236 // also copy to the high operand of a 64-bit variable. | |
1237 // t1 = movsx src; dst = t1 | |
1238 Variable *T = makeReg(Dest->getType()); | |
1239 _movsx(T, Src0RM); | |
1240 _mov(Dest, T); | |
1241 } | |
1242 break; | |
1243 case InstCast::Zext: | |
1244 if (Dest->getType() == IceType_i64) { | |
1245 // t1=movzx src; dst.lo=t1; dst.hi=0 | |
1246 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1247 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1248 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1249 Variable *Tmp = makeReg(DestLo->getType()); | |
1250 if (Src0RM->getType() == IceType_i32) | |
1251 _mov(Tmp, Src0RM); | |
1252 else | |
1253 _movzx(Tmp, Src0RM); | |
1254 _mov(DestLo, Tmp); | |
1255 _mov(DestHi, Zero); | |
1256 } else if (Src0RM->getType() == IceType_i1) { | |
1257 // t = Src0RM; t &= 1; Dest = t | |
1258 Operand *One = Ctx->getConstantInt(IceType_i32, 1); | |
1259 Variable *T = makeReg(IceType_i32); | |
1260 _movzx(T, Src0RM); | |
1261 _and(T, One); | |
1262 _mov(Dest, T); | |
1263 } else { | |
1264 // t1 = movzx src; dst = t1 | |
1265 Variable *T = makeReg(Dest->getType()); | |
1266 _movzx(T, Src0RM); | |
1267 _mov(Dest, T); | |
1268 } | |
1269 break; | |
1270 case InstCast::Trunc: { | |
1271 if (Src0RM->getType() == IceType_i64) | |
1272 Src0RM = loOperand(Src0RM); | |
1273 // t1 = trunc Src0RM; Dest = t1 | |
1274 Variable *T = NULL; | |
1275 _mov(T, Src0RM); | |
1276 _mov(Dest, T); | |
1277 break; | |
1278 } | |
1279 case InstCast::Fptrunc: | |
1280 case InstCast::Fpext: { | |
1281 // t1 = cvt Src0RM; Dest = t1 | |
1282 Variable *T = makeReg(Dest->getType()); | |
1283 _cvt(T, Src0RM); | |
1284 _mov(Dest, T); | |
1285 break; | |
1286 } | |
1287 case InstCast::Fptosi: | |
1288 if (Dest->getType() == IceType_i64) { | |
1289 // Use a helper for converting floating-point values to 64-bit | |
1290 // integers. SSE2 appears to have no way to convert from xmm | |
1291 // registers to something like the edx:eax register pair, and | |
1292 // gcc and clang both want to use x87 instructions complete with | |
1293 // temporary manipulation of the status word. This helper is | |
1294 // not needed for x86-64. | |
1295 split64(Dest); | |
1296 const SizeT MaxSrcs = 1; | |
1297 Type SrcType = Inst->getSrc(0)->getType(); | |
1298 InstCall *Call = makeHelperCall( | |
1299 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs); | |
1300 // TODO: Call the correct compiler-rt helper function. | |
1301 Call->addArg(Inst->getSrc(0)); | |
1302 lowerCall(Call); | |
1303 } else { | |
1304 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | |
1305 Variable *T_1 = makeReg(IceType_i32); | |
1306 Variable *T_2 = makeReg(Dest->getType()); | |
1307 _cvt(T_1, Src0RM); | |
1308 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | |
1309 _mov(Dest, T_2); | |
1310 T_2->setPreferredRegister(T_1, true); | |
1311 } | |
1312 break; | |
1313 case InstCast::Fptoui: | |
1314 if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) { | |
1315 // Use a helper for both x86-32 and x86-64. | |
1316 split64(Dest); | |
1317 const SizeT MaxSrcs = 1; | |
1318 Type DestType = Dest->getType(); | |
1319 Type SrcType = Src0RM->getType(); | |
1320 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32"); | |
1321 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d"); | |
1322 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64 | |
1323 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring; | |
1324 // TODO: Call the correct compiler-rt helper function. | |
1325 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | |
1326 Call->addArg(Inst->getSrc(0)); | |
1327 lowerCall(Call); | |
1328 return; | |
1329 } else { | |
1330 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | |
1331 Variable *T_1 = makeReg(IceType_i32); | |
1332 Variable *T_2 = makeReg(Dest->getType()); | |
1333 _cvt(T_1, Src0RM); | |
1334 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | |
1335 _mov(Dest, T_2); | |
1336 T_2->setPreferredRegister(T_1, true); | |
1337 } | |
1338 break; | |
1339 case InstCast::Sitofp: | |
1340 if (Src0RM->getType() == IceType_i64) { | |
1341 // Use a helper for x86-32. | |
1342 const SizeT MaxSrcs = 1; | |
1343 Type DestType = Dest->getType(); | |
1344 InstCall *Call = makeHelperCall( | |
1345 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs); | |
1346 // TODO: Call the correct compiler-rt helper function. | |
1347 Call->addArg(Inst->getSrc(0)); | |
1348 lowerCall(Call); | |
1349 return; | |
1350 } else { | |
1351 // Sign-extend the operand. | |
1352 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | |
1353 Variable *T_1 = makeReg(IceType_i32); | |
1354 Variable *T_2 = makeReg(Dest->getType()); | |
1355 if (Src0RM->getType() == IceType_i32) | |
1356 _mov(T_1, Src0RM); | |
1357 else | |
1358 _movsx(T_1, Src0RM); | |
1359 _cvt(T_2, T_1); | |
1360 _mov(Dest, T_2); | |
1361 } | |
1362 break; | |
1363 case InstCast::Uitofp: | |
1364 if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) { | |
1365 // Use a helper for x86-32 and x86-64. Also use a helper for | |
1366 // i32 on x86-32. | |
1367 const SizeT MaxSrcs = 1; | |
1368 Type DestType = Dest->getType(); | |
1369 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32"); | |
1370 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d"); | |
1371 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod | |
1372 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring; | |
1373 // TODO: Call the correct compiler-rt helper function. | |
1374 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | |
1375 Call->addArg(Inst->getSrc(0)); | |
1376 lowerCall(Call); | |
1377 return; | |
1378 } else { | |
1379 // Zero-extend the operand. | |
1380 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 | |
1381 Variable *T_1 = makeReg(IceType_i32); | |
1382 Variable *T_2 = makeReg(Dest->getType()); | |
1383 if (Src0RM->getType() == IceType_i32) | |
1384 _mov(T_1, Src0RM); | |
1385 else | |
1386 _movzx(T_1, Src0RM); | |
1387 _cvt(T_2, T_1); | |
1388 _mov(Dest, T_2); | |
1389 } | |
1390 break; | |
1391 case InstCast::Bitcast: | |
1392 if (Dest->getType() == Src0RM->getType()) { | |
1393 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM); | |
1394 lowerAssign(Assign); | |
1395 llvm_unreachable("Pointer bitcasts aren't lowered correctly."); | |
1396 return; | |
1397 } | |
1398 switch (Dest->getType()) { | |
1399 default: | |
1400 llvm_unreachable("Unexpected Bitcast dest type"); | |
1401 case IceType_i32: | |
1402 case IceType_f32: { | |
1403 Type DestType = Dest->getType(); | |
1404 Type SrcType = Src0RM->getType(); | |
1405 assert((DestType == IceType_i32 && SrcType == IceType_f32) || | |
1406 (DestType == IceType_f32 && SrcType == IceType_i32)); | |
1407 // a.i32 = bitcast b.f32 ==> | |
1408 // t.f32 = b.f32 | |
1409 // s.f32 = spill t.f32 | |
1410 // a.i32 = s.f32 | |
1411 Variable *T = NULL; | |
1412 // TODO: Should be able to force a spill setup by calling legalize() with | |
1413 // Legal_Mem and not Legal_Reg or Legal_Imm. | |
1414 Variable *Spill = Func->makeVariable(SrcType, Context.getNode()); | |
1415 Spill->setWeight(RegWeight::Zero); | |
1416 Spill->setPreferredRegister(Dest, true); | |
1417 _mov(T, Src0RM); | |
1418 _mov(Spill, T); | |
1419 _mov(Dest, Spill); | |
1420 } break; | |
1421 case IceType_i64: { | |
1422 assert(Src0RM->getType() == IceType_f64); | |
1423 // a.i64 = bitcast b.f64 ==> | |
1424 // s.f64 = spill b.f64 | |
1425 // t_lo.i32 = lo(s.f64) | |
1426 // a_lo.i32 = t_lo.i32 | |
1427 // t_hi.i32 = hi(s.f64) | |
1428 // a_hi.i32 = t_hi.i32 | |
1429 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode()); | |
1430 Spill->setWeight(RegWeight::Zero); | |
1431 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true); | |
1432 _mov(Spill, Src0RM); | |
1433 | |
1434 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1435 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1436 Variable *T_Lo = makeReg(IceType_i32); | |
1437 Variable *T_Hi = makeReg(IceType_i32); | |
1438 VariableSplit *SpillLo = | |
1439 VariableSplit::create(Func, Spill, VariableSplit::Low); | |
1440 VariableSplit *SpillHi = | |
1441 VariableSplit::create(Func, Spill, VariableSplit::High); | |
1442 | |
1443 _mov(T_Lo, SpillLo); | |
1444 _mov(DestLo, T_Lo); | |
1445 _mov(T_Hi, SpillHi); | |
1446 _mov(DestHi, T_Hi); | |
1447 } break; | |
1448 case IceType_f64: { | |
1449 assert(Src0RM->getType() == IceType_i64); | |
1450 // a.f64 = bitcast b.i64 ==> | |
1451 // t_lo.i32 = b_lo.i32 | |
1452 // lo(s.f64) = t_lo.i32 | |
1453 // FakeUse(s.f64) | |
1454 // t_hi.i32 = b_hi.i32 | |
1455 // hi(s.f64) = t_hi.i32 | |
1456 // a.f64 = s.f64 | |
1457 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode()); | |
1458 Spill->setWeight(RegWeight::Zero); | |
1459 Spill->setPreferredRegister(Dest, true); | |
1460 | |
1461 Context.insert(InstFakeDef::create(Func, Spill)); | |
1462 | |
1463 Variable *T_Lo = NULL, *T_Hi = NULL; | |
1464 VariableSplit *SpillLo = | |
1465 VariableSplit::create(Func, Spill, VariableSplit::Low); | |
1466 VariableSplit *SpillHi = | |
1467 VariableSplit::create(Func, Spill, VariableSplit::High); | |
1468 _mov(T_Lo, loOperand(Src0RM)); | |
1469 _store(T_Lo, SpillLo); | |
1470 _mov(T_Hi, hiOperand(Src0RM)); | |
1471 _store(T_Hi, SpillHi); | |
1472 _mov(Dest, Spill); | |
1473 } break; | |
1474 } | |
1475 break; | |
1476 } | |
1477 } | |
1478 | |
1479 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | |
1480 Operand *Src0 = Inst->getSrc(0); | |
1481 Operand *Src1 = Inst->getSrc(1); | |
1482 Variable *Dest = Inst->getDest(); | |
1483 // Lowering a = fcmp cond, b, c | |
1484 // ucomiss b, c /* only if C1 != Br_None */ | |
1485 // /* but swap b,c order if SwapOperands==true */ | |
1486 // mov a, <default> | |
1487 // j<C1> label /* only if C1 != Br_None */ | |
1488 // j<C2> label /* only if C2 != Br_None */ | |
1489 // FakeUse(a) /* only if C1 != Br_None */ | |
1490 // mov a, !<default> /* only if C1 != Br_None */ | |
1491 // label: /* only if C1 != Br_None */ | |
1492 InstFcmp::FCond Condition = Inst->getCondition(); | |
1493 size_t Index = static_cast<size_t>(Condition); | |
1494 assert(Index < TableFcmpSize); | |
1495 if (TableFcmp[Index].SwapOperands) { | |
1496 Operand *Tmp = Src0; | |
1497 Src0 = Src1; | |
1498 Src1 = Tmp; | |
1499 } | |
1500 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); | |
1501 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); | |
1502 if (HasC1) { | |
1503 Src0 = legalize(Src0); | |
1504 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
1505 Variable *T = NULL; | |
1506 _mov(T, Src0); | |
1507 _ucomiss(T, Src1RM); | |
1508 } | |
1509 Constant *Default = | |
1510 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default); | |
1511 _mov(Dest, Default); | |
1512 if (HasC1) { | |
1513 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1514 _br(TableFcmp[Index].C1, Label); | |
1515 if (HasC2) { | |
1516 _br(TableFcmp[Index].C2, Label); | |
1517 } | |
1518 Context.insert(InstFakeUse::create(Func, Dest)); | |
1519 Constant *NonDefault = | |
1520 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default); | |
1521 _mov(Dest, NonDefault); | |
1522 Context.insert(Label); | |
1523 } | |
1524 } | |
1525 | |
1526 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | |
1527 Operand *Src0 = legalize(Inst->getSrc(0)); | |
1528 Operand *Src1 = legalize(Inst->getSrc(1)); | |
1529 Variable *Dest = Inst->getDest(); | |
1530 | |
1531 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | |
1532 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1533 Constant *One = Ctx->getConstantInt(IceType_i32, 1); | |
1534 if (Src0->getType() == IceType_i64) { | |
1535 InstIcmp::ICond Condition = Inst->getCondition(); | |
1536 size_t Index = static_cast<size_t>(Condition); | |
1537 assert(Index < TableIcmp64Size); | |
1538 // The table is indexed by InstIcmp::Condition. Make sure it didn't fall | |
jvoung (off chromium)
2014/05/22 18:02:38
Also from before a wild xMacroIntegrityCheck appea
Jim Stichnoth
2014/05/22 20:19:15
Humph. I thought I had searched for more instance
| |
1539 // out of order. | |
1540 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | |
1541 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | |
1542 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { | |
1543 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1544 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); | |
1545 _cmp(loOperand(Src0), Src1LoRI); | |
1546 _br(InstX8632Br::Br_ne, Label); | |
1547 _cmp(hiOperand(Src0), Src1HiRI); | |
1548 _br(InstX8632Br::Br_ne, Label); | |
1549 Context.insert(InstFakeUse::create(Func, Dest)); | |
1550 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); | |
1551 Context.insert(Label); | |
1552 } else { | |
1553 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | |
1554 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | |
1555 _mov(Dest, One); | |
1556 _cmp(hiOperand(Src0), Src1HiRI); | |
1557 _br(TableIcmp64[Index].C1, LabelTrue); | |
1558 _br(TableIcmp64[Index].C2, LabelFalse); | |
1559 _cmp(loOperand(Src0), Src1LoRI); | |
1560 _br(TableIcmp64[Index].C3, LabelTrue); | |
1561 Context.insert(LabelFalse); | |
1562 Context.insert(InstFakeUse::create(Func, Dest)); | |
1563 _mov(Dest, Zero); | |
1564 Context.insert(LabelTrue); | |
1565 } | |
1566 return; | |
1567 } | |
1568 | |
1569 // If Src1 is an immediate, or known to be a physical register, we can | |
1570 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | |
1571 // a physical register. (Actually, either Src0 or Src1 can be chosen for | |
1572 // the physical register, but unfortunately we have to commit to one or | |
1573 // the other before register allocation.) | |
1574 bool IsSrc1ImmOrReg = false; | |
1575 if (llvm::isa<Constant>(Src1)) { | |
1576 IsSrc1ImmOrReg = true; | |
1577 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | |
1578 if (Var->hasReg()) | |
1579 IsSrc1ImmOrReg = true; | |
1580 } | |
1581 | |
1582 // cmp b, c | |
1583 Operand *Src0New = | |
1584 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | |
1585 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1586 _cmp(Src0New, Src1); | |
1587 _mov(Dest, One); | |
1588 _br(getIcmp32Mapping(Inst->getCondition()), Label); | |
1589 Context.insert(InstFakeUse::create(Func, Dest)); | |
1590 _mov(Dest, Zero); | |
1591 Context.insert(Label); | |
1592 } | |
1593 | |
1594 void TargetX8632::lowerLoad(const InstLoad *Inst) { | |
1595 // A Load instruction can be treated the same as an Assign | |
1596 // instruction, after the source operand is transformed into an | |
1597 // OperandX8632Mem operand. Note that the address mode | |
1598 // optimization already creates an OperandX8632Mem operand, so it | |
1599 // doesn't need another level of transformation. | |
1600 Type Ty = Inst->getDest()->getType(); | |
1601 Operand *Src0 = Inst->getSourceAddress(); | |
1602 // Address mode optimization already creates an OperandX8632Mem | |
1603 // operand, so it doesn't need another level of transformation. | |
1604 if (!llvm::isa<OperandX8632Mem>(Src0)) { | |
1605 Variable *Base = llvm::dyn_cast<Variable>(Src0); | |
1606 Constant *Offset = llvm::dyn_cast<Constant>(Src0); | |
1607 assert(Base || Offset); | |
1608 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset); | |
1609 } | |
1610 | |
1611 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0); | |
1612 lowerAssign(Assign); | |
1613 } | |
1614 | |
1615 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { | |
1616 Func->setError("Phi found in regular instruction list"); | |
1617 } | |
1618 | |
1619 void TargetX8632::lowerRet(const InstRet *Inst) { | |
1620 Variable *Reg = NULL; | |
1621 if (Inst->hasRetValue()) { | |
1622 Operand *Src0 = legalize(Inst->getRetValue()); | |
1623 if (Src0->getType() == IceType_i64) { | |
1624 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax); | |
1625 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx); | |
1626 Reg = eax; | |
1627 Context.insert(InstFakeUse::create(Func, edx)); | |
1628 } else if (Src0->getType() == IceType_f32 || | |
1629 Src0->getType() == IceType_f64) { | |
1630 _fld(Src0); | |
1631 } else { | |
1632 _mov(Reg, Src0, Reg_eax); | |
1633 } | |
1634 } | |
1635 _ret(Reg); | |
1636 // Add a fake use of esp to make sure esp stays alive for the entire | |
1637 // function. Otherwise post-call esp adjustments get dead-code | |
1638 // eliminated. TODO: Are there more places where the fake use | |
1639 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | |
1640 // have a ret instruction. | |
1641 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | |
1642 Context.insert(InstFakeUse::create(Func, esp)); | |
1643 } | |
1644 | |
1645 void TargetX8632::lowerSelect(const InstSelect *Inst) { | |
1646 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: | |
1647 Variable *Dest = Inst->getDest(); | |
1648 Operand *SrcT = Inst->getTrueOperand(); | |
1649 Operand *SrcF = Inst->getFalseOperand(); | |
1650 Operand *Condition = legalize(Inst->getCondition()); | |
1651 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1652 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1653 | |
1654 if (Dest->getType() == IceType_i64) { | |
1655 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1656 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1657 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true); | |
1658 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true); | |
1659 _cmp(Condition, Zero); | |
1660 _mov(DestLo, SrcLoRI); | |
1661 _mov(DestHi, SrcHiRI); | |
1662 _br(InstX8632Br::Br_ne, Label); | |
1663 Context.insert(InstFakeUse::create(Func, DestLo)); | |
1664 Context.insert(InstFakeUse::create(Func, DestHi)); | |
1665 Operand *SrcFLo = loOperand(SrcF); | |
1666 Operand *SrcFHi = hiOperand(SrcF); | |
1667 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true); | |
1668 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true); | |
1669 _mov(DestLo, SrcLoRI); | |
1670 _mov(DestHi, SrcHiRI); | |
1671 } else { | |
1672 _cmp(Condition, Zero); | |
1673 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true); | |
1674 _mov(Dest, SrcT); | |
1675 _br(InstX8632Br::Br_ne, Label); | |
1676 Context.insert(InstFakeUse::create(Func, Dest)); | |
1677 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true); | |
1678 _mov(Dest, SrcF); | |
1679 } | |
1680 | |
1681 Context.insert(Label); | |
1682 } | |
1683 | |
1684 void TargetX8632::lowerStore(const InstStore *Inst) { | |
1685 Operand *Value = Inst->getData(); | |
1686 Operand *Addr = Inst->getAddr(); | |
1687 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr); | |
1688 // Address mode optimization already creates an OperandX8632Mem | |
1689 // operand, so it doesn't need another level of transformation. | |
1690 if (!NewAddr) { | |
1691 // The address will be either a constant (which represents a global | |
1692 // variable) or a variable, so either the Base or Offset component | |
1693 // of the OperandX8632Mem will be set. | |
1694 Variable *Base = llvm::dyn_cast<Variable>(Addr); | |
1695 Constant *Offset = llvm::dyn_cast<Constant>(Addr); | |
1696 assert(Base || Offset); | |
1697 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset); | |
1698 } | |
1699 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr)); | |
1700 | |
1701 if (NewAddr->getType() == IceType_i64) { | |
1702 Value = legalize(Value); | |
1703 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | |
1704 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | |
1705 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | |
1706 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | |
1707 } else { | |
1708 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | |
1709 _store(Value, NewAddr); | |
1710 } | |
1711 } | |
1712 | |
1713 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | |
1714 // This implements the most naive possible lowering. | |
1715 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | |
1716 Operand *Src0 = Inst->getComparison(); | |
1717 SizeT NumCases = Inst->getNumCases(); | |
1718 // OK, we'll be slightly less naive by forcing Src into a physical | |
1719 // register if there are 2 or more uses. | |
1720 if (NumCases >= 2) | |
1721 Src0 = legalizeToVar(Src0, true); | |
1722 else | |
1723 Src0 = legalize(Src0, Legal_All, true); | |
1724 for (SizeT I = 0; I < NumCases; ++I) { | |
1725 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); | |
1726 _cmp(Src0, Value); | |
1727 _br(InstX8632Br::Br_e, Inst->getLabel(I)); | |
1728 } | |
1729 | |
1730 _br(Inst->getLabelDefault()); | |
1731 } | |
1732 | |
1733 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { | |
1734 const SizeT MaxSrcs = 0; | |
1735 Variable *Dest = NULL; | |
1736 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); | |
1737 lowerCall(Call); | |
1738 } | |
1739 | |
1740 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, | |
1741 bool AllowOverlap, int32_t RegNum) { | |
1742 // Assert that a physical register is allowed. To date, all calls | |
1743 // to legalize() allow a physical register. If a physical register | |
1744 // needs to be explicitly disallowed, then new code will need to be | |
1745 // written to force a spill. | |
1746 assert(Allowed & Legal_Reg); | |
1747 // If we're asking for a specific physical register, make sure we're | |
1748 // not allowing any other operand kinds. (This could be future | |
1749 // work, e.g. allow the shl shift amount to be either an immediate | |
1750 // or in ecx.) | |
1751 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); | |
1752 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) { | |
1753 // Before doing anything with a Mem operand, we need to ensure | |
1754 // that the Base and Index components are in physical registers. | |
1755 Variable *Base = Mem->getBase(); | |
1756 Variable *Index = Mem->getIndex(); | |
1757 Variable *RegBase = NULL; | |
1758 Variable *RegIndex = NULL; | |
1759 if (Base) { | |
1760 RegBase = legalizeToVar(Base, true); | |
1761 } | |
1762 if (Index) { | |
1763 RegIndex = legalizeToVar(Index, true); | |
1764 } | |
1765 if (Base != RegBase || Index != RegIndex) { | |
1766 From = | |
1767 OperandX8632Mem::create(Func, Mem->getType(), RegBase, | |
1768 Mem->getOffset(), RegIndex, Mem->getShift()); | |
1769 } | |
1770 | |
1771 if (!(Allowed & Legal_Mem)) { | |
1772 Variable *Reg = makeReg(From->getType(), RegNum); | |
1773 _mov(Reg, From, RegNum); | |
1774 From = Reg; | |
1775 } | |
1776 return From; | |
1777 } | |
1778 if (llvm::isa<Constant>(From)) { | |
1779 if (!(Allowed & Legal_Imm)) { | |
1780 Variable *Reg = makeReg(From->getType(), RegNum); | |
1781 _mov(Reg, From); | |
1782 From = Reg; | |
1783 } | |
1784 return From; | |
1785 } | |
1786 if (Variable *Var = llvm::dyn_cast<Variable>(From)) { | |
1787 // We need a new physical register for the operand if: | |
1788 // Mem is not allowed and Var->getRegNum() is unknown, or | |
1789 // RegNum is required and Var->getRegNum() doesn't match. | |
1790 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) || | |
1791 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | |
1792 Variable *Reg = makeReg(From->getType(), RegNum); | |
1793 if (RegNum == Variable::NoRegister) { | |
1794 Reg->setPreferredRegister(Var, AllowOverlap); | |
1795 } | |
1796 _mov(Reg, From); | |
1797 From = Reg; | |
1798 } | |
1799 return From; | |
1800 } | |
1801 llvm_unreachable("Unhandled operand kind in legalize()"); | |
1802 return From; | |
1803 } | |
1804 | |
1805 // Provide a trivial wrapper to legalize() for this common usage. | |
1806 Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap, | |
1807 int32_t RegNum) { | |
1808 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum)); | |
1809 } | |
1810 | |
1811 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | |
1812 Variable *Reg = Func->makeVariable(Type, Context.getNode()); | |
1813 if (RegNum == Variable::NoRegister) | |
1814 Reg->setWeightInfinite(); | |
1815 else | |
1816 Reg->setRegNum(RegNum); | |
1817 return Reg; | |
1818 } | |
1819 | |
1820 void TargetX8632::postLower() { | |
1821 if (Ctx->getOptLevel() != Opt_m1) | |
1822 return; | |
1823 // TODO: Avoid recomputing WhiteList every instruction. | |
1824 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None); | |
1825 // Make one pass to black-list pre-colored registers. TODO: If | |
1826 // there was some prior register allocation pass that made register | |
1827 // assignments, those registers need to be black-listed here as | |
1828 // well. | |
1829 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | |
1830 ++I) { | |
1831 const Inst *Inst = *I; | |
1832 if (Inst->isDeleted()) | |
1833 continue; | |
1834 if (llvm::isa<InstFakeKill>(Inst)) | |
1835 continue; | |
1836 SizeT VarIndex = 0; | |
1837 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | |
1838 Operand *Src = Inst->getSrc(SrcNum); | |
1839 SizeT NumVars = Src->getNumVars(); | |
1840 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) { | |
1841 const Variable *Var = Src->getVar(J); | |
1842 if (!Var->hasReg()) | |
1843 continue; | |
1844 WhiteList[Var->getRegNum()] = false; | |
1845 } | |
1846 } | |
1847 } | |
1848 // The second pass colors infinite-weight variables. | |
1849 llvm::SmallBitVector AvailableRegisters = WhiteList; | |
1850 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | |
1851 ++I) { | |
1852 const Inst *Inst = *I; | |
1853 if (Inst->isDeleted()) | |
1854 continue; | |
1855 SizeT VarIndex = 0; | |
1856 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | |
1857 Operand *Src = Inst->getSrc(SrcNum); | |
1858 SizeT NumVars = Src->getNumVars(); | |
1859 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) { | |
1860 Variable *Var = Src->getVar(J); | |
1861 if (Var->hasReg()) | |
1862 continue; | |
1863 if (!Var->getWeight().isInf()) | |
1864 continue; | |
1865 llvm::SmallBitVector AvailableTypedRegisters = | |
1866 AvailableRegisters & getRegisterSetForType(Var->getType()); | |
1867 if (!AvailableTypedRegisters.any()) { | |
1868 // This is a hack in case we run out of physical registers | |
1869 // due to an excessive number of "push" instructions from | |
1870 // lowering a call. | |
1871 AvailableRegisters = WhiteList; | |
1872 AvailableTypedRegisters = | |
1873 AvailableRegisters & getRegisterSetForType(Var->getType()); | |
1874 } | |
1875 assert(AvailableTypedRegisters.any()); | |
1876 int32_t RegNum = AvailableTypedRegisters.find_first(); | |
1877 Var->setRegNum(RegNum); | |
1878 AvailableRegisters[RegNum] = false; | |
1879 } | |
1880 } | |
1881 } | |
1882 } | |
1883 | |
1884 } // end of namespace Ice | |
OLD | NEW |