OLD | NEW |
---|---|
(Empty) | |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | |
2 // | |
3 // The Subzero Code Generator | |
4 // | |
5 // This file is distributed under the University of Illinois Open Source | |
6 // License. See LICENSE.TXT for details. | |
7 // | |
8 //===----------------------------------------------------------------------===// | |
9 // | |
10 // This file implements the TargetLoweringX8632 class, which | |
11 // consists almost entirely of the lowering sequence for each | |
12 // high-level instruction. It also implements | |
13 // TargetX8632Fast::postLower() which does the simplest possible | |
14 // register allocation for the "fast" target. | |
15 // | |
16 //===----------------------------------------------------------------------===// | |
17 | |
18 #include "IceDefs.h" | |
19 #include "IceCfg.h" | |
20 #include "IceCfgNode.h" | |
21 #include "IceInstX8632.h" | |
22 #include "IceOperand.h" | |
23 #include "IceTargetLoweringX8632.def" | |
24 #include "IceTargetLoweringX8632.h" | |
25 | |
26 namespace Ice { | |
27 | |
28 namespace { | |
29 | |
30 // The following table summarizes the logic for lowering the fcmp instruction. | |
31 // There is one table entry for each of the 16 conditions. A comment in | |
32 // lowerFcmp() describes the lowering template. In the most general case, there | |
33 // is a compare followed by two conditional branches, because some fcmp | |
34 // conditions don't map to a single x86 conditional branch. However, in many | |
35 // cases it is possible to swap the operands in the comparison and have a single | |
36 // conditional branch. Since it's quite tedious to validate the table by hand, | |
37 // good execution tests are helpful. | |
38 | |
39 const struct TableFcmp_ { | |
40 uint32_t Default; | |
41 bool SwapOperands; | |
42 InstX8632Br::BrCond C1, C2; | |
43 } TableFcmp[] = { | |
44 #define X(val, dflt, swap, C1, C2) \ | |
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \ | |
46 , | |
47 FCMPX8632_TABLE | |
48 #undef X | |
49 }; | |
50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | |
51 | |
52 // The following table summarizes the logic for lowering the icmp instruction | |
53 // for i32 and narrower types. Each icmp condition has a clear mapping to an | |
54 // x86 conditional branch instruction. | |
55 | |
56 const struct TableIcmp32_ { | |
57 InstX8632Br::BrCond Mapping; | |
58 } TableIcmp32[] = { | |
59 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
60 { InstX8632Br::C_32 } \ | |
61 , | |
62 ICMPX8632_TABLE | |
63 #undef X | |
64 }; | |
65 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); | |
66 | |
67 // The following table summarizes the logic for lowering the icmp instruction | |
68 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | |
69 // conditional branches are needed. For the other conditions, three separate | |
70 // conditional branches are needed. | |
71 const struct TableIcmp64_ { | |
72 InstX8632Br::BrCond C1, C2, C3; | |
73 } TableIcmp64[] = { | |
74 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \ | |
76 , | |
77 ICMPX8632_TABLE | |
78 #undef X | |
79 }; | |
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); | |
81 | |
82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | |
83 size_t Index = static_cast<size_t>(Cond); | |
84 assert(Index < TableIcmp32Size); | |
85 return TableIcmp32[Index].Mapping; | |
86 } | |
87 | |
88 // In some cases, there are x-macros tables for both high-level and | |
89 // low-level instructions/operands that use the same enum key value. | |
90 // The tables are kept separate to maintain a proper separation | |
91 // between abstraction layers. There is a risk that the tables | |
92 // could get out of sync if enum values are reordered or if entries | |
93 // are added or deleted. This dummy function uses static_assert to | |
94 // ensure everything is kept in sync. | |
95 void xMacroIntegrityCheck() { | |
96 // Validate the enum values in FCMPX8632_TABLE. | |
97 { | |
98 // Define a temporary set of enum values based on low-level | |
99 // table entries. | |
100 enum _tmp_enum { | |
101 #define X(val, dflt, swap, C1, C2) _tmp_##val, | |
102 FCMPX8632_TABLE | |
103 #undef X | |
104 }; | |
105 // Define a set of constants based on high-level table entries. | |
106 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag; | |
107 ICEINSTFCMP_TABLE; | |
108 #undef X | |
109 // Define a set of constants based on low-level table entries, | |
110 // and ensure the table entry keys are consistent. | |
111 #define X(val, dflt, swap, C1, C2) \ | |
112 static const int _table2_##val = _tmp_##val; \ | |
113 STATIC_ASSERT(_table1_##val == _table2_##val); | |
114 FCMPX8632_TABLE; | |
115 #undef X | |
116 // Repeat the static asserts with respect to the high-level | |
117 // table entries in case the high-level table has extra entries. | |
118 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
119 ICEINSTFCMP_TABLE; | |
120 #undef X | |
121 } | |
122 | |
123 // Validate the enum values in ICMPX8632_TABLE. | |
124 { | |
125 // Define a temporary set of enum values based on low-level | |
126 // table entries. | |
127 enum _tmp_enum { | |
128 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, | |
129 ICMPX8632_TABLE | |
130 #undef X | |
131 }; | |
132 // Define a set of constants based on high-level table entries. | |
133 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag; | |
134 ICEINSTICMP_TABLE; | |
135 #undef X | |
136 // Define a set of constants based on low-level table entries, | |
137 // and ensure the table entry keys are consistent. | |
138 #define X(val, C_32, C1_64, C2_64, C3_64) \ | |
139 static const int _table2_##val = _tmp_##val; \ | |
140 STATIC_ASSERT(_table1_##val == _table2_##val); | |
141 ICMPX8632_TABLE; | |
142 #undef X | |
143 // Repeat the static asserts with respect to the high-level | |
144 // table entries in case the high-level table has extra entries. | |
145 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
146 ICEINSTICMP_TABLE; | |
147 #undef X | |
148 } | |
149 | |
150 // Validate the enum values in ICETYPEX8632_TABLE. | |
151 { | |
152 // Define a temporary set of enum values based on low-level | |
153 // table entries. | |
154 enum _tmp_enum { | |
155 #define X(tag, cvt, sdss, width) _tmp_##tag, | |
156 ICETYPEX8632_TABLE | |
157 #undef X | |
158 }; | |
159 // Define a set of constants based on high-level table entries. | |
160 #define X(tag, size, align, str) static const int _table1_##tag = tag; | |
161 ICETYPE_TABLE; | |
162 #undef X | |
163 // Define a set of constants based on low-level table entries, | |
164 // and ensure the table entry keys are consistent. | |
165 #define X(tag, cvt, sdss, width) \ | |
166 static const int _table2_##tag = _tmp_##tag; \ | |
167 STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
168 ICETYPEX8632_TABLE; | |
169 #undef X | |
170 // Repeat the static asserts with respect to the high-level | |
171 // table entries in case the high-level table has extra entries. | |
172 #define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag); | |
173 ICETYPE_TABLE; | |
174 #undef X | |
175 } | |
176 } | |
177 | |
178 } // end of anonymous namespace | |
179 | |
180 TargetX8632::TargetX8632(Cfg *Func) | |
181 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0), | |
182 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), | |
183 PhysicalRegisters(VarList(Reg_NUM)) { | |
184 llvm::SmallBitVector IntegerRegisters(Reg_NUM); | |
185 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); | |
186 llvm::SmallBitVector FloatRegisters(Reg_NUM); | |
187 llvm::SmallBitVector InvalidRegisters(Reg_NUM); | |
188 ScratchRegs.resize(Reg_NUM); | |
189 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
190 frameptr, isI8, isInt, isFP) \ | |
191 IntegerRegisters[val] = isInt; \ | |
192 IntegerRegistersI8[val] = isI8; \ | |
193 FloatRegisters[val] = isFP; \ | |
194 ScratchRegs[val] = scratch; | |
195 REGX8632_TABLE; | |
196 #undef X | |
197 TypeToRegisterSet[IceType_void] = InvalidRegisters; | |
jvoung (off chromium)
2014/05/15 23:47:34
Maybe at some point some of this be initialized on
Jim Stichnoth
2014/05/17 14:14:32
This is all true. For now, I'd like to leave a TO
| |
198 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; | |
199 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; | |
200 TypeToRegisterSet[IceType_i16] = IntegerRegisters; | |
201 TypeToRegisterSet[IceType_i32] = IntegerRegisters; | |
202 TypeToRegisterSet[IceType_i64] = IntegerRegisters; | |
203 TypeToRegisterSet[IceType_f32] = FloatRegisters; | |
204 TypeToRegisterSet[IceType_f64] = FloatRegisters; | |
205 } | |
206 | |
207 void TargetX8632::translateOm1() { | |
208 GlobalContext *Context = Func->getContext(); | |
209 Ostream &Str = Context->getStrDump(); | |
210 Timer T_placePhiLoads; | |
211 Func->placePhiLoads(); | |
212 if (Func->hasError()) | |
213 return; | |
214 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()"); | |
215 Timer T_placePhiStores; | |
216 Func->placePhiStores(); | |
217 if (Func->hasError()) | |
218 return; | |
219 T_placePhiStores.printElapsedUs(Context, "placePhiStores()"); | |
220 Timer T_deletePhis; | |
221 Func->deletePhis(); | |
222 if (Func->hasError()) | |
223 return; | |
224 T_deletePhis.printElapsedUs(Context, "deletePhis()"); | |
225 if (Context->isVerbose()) | |
226 Str << "================ After Phi lowering ================\n"; | |
227 Func->dump(); | |
228 | |
229 Timer T_genCode; | |
230 Func->genCode(); | |
231 if (Func->hasError()) | |
232 return; | |
233 T_genCode.printElapsedUs(Context, "genCode()"); | |
234 if (Context->isVerbose()) | |
235 Str << "================ After initial x8632 codegen ================\n"; | |
236 Func->dump(); | |
237 | |
238 Timer T_genFrame; | |
239 Func->genFrame(); | |
240 if (Func->hasError()) | |
241 return; | |
242 T_genFrame.printElapsedUs(Context, "genFrame()"); | |
243 if (Context->isVerbose()) | |
244 Str << "================ After stack frame mapping ================\n"; | |
245 Func->dump(); | |
246 } | |
247 | |
248 IceString TargetX8632::RegNames[] = { | |
249 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
250 frameptr, isI8, isInt, isFP) \ | |
251 name, | |
252 REGX8632_TABLE | |
253 #undef X | |
254 }; | |
255 | |
256 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) { | |
257 assert(RegNum < PhysicalRegisters.size()); | |
258 Variable *Reg = PhysicalRegisters[RegNum]; | |
259 if (Reg == NULL) { | |
260 CfgNode *Node = NULL; // NULL means multi-block lifetime | |
261 Reg = Func->makeVariable(IceType_i32, Node); | |
262 Reg->setRegNum(RegNum); | |
263 PhysicalRegisters[RegNum] = Reg; | |
264 } | |
265 return Reg; | |
266 } | |
267 | |
268 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { | |
269 assert(RegNum < Reg_NUM); | |
270 static IceString RegNames8[] = { | |
271 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
272 frameptr, isI8, isInt, isFP) \ | |
273 "" name8, | |
274 REGX8632_TABLE | |
275 #undef X | |
276 }; | |
277 static IceString RegNames16[] = { | |
278 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
279 frameptr, isI8, isInt, isFP) \ | |
280 "" name16, | |
281 REGX8632_TABLE | |
282 #undef X | |
283 }; | |
284 switch (Ty) { | |
285 case IceType_i1: | |
286 case IceType_i8: | |
287 return RegNames8[RegNum]; | |
288 case IceType_i16: | |
289 return RegNames16[RegNum]; | |
290 default: | |
291 return RegNames[RegNum]; | |
292 } | |
293 } | |
294 | |
295 void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const { | |
296 Ostream &Str = Ctx->getStrEmit(); | |
297 assert(Var->getLocalUseNode() == NULL || | |
298 Var->getLocalUseNode() == Func->getCurrentNode()); | |
299 if (Var->hasReg()) { | |
300 Str << getRegName(Var->getRegNum(), Var->getType()); | |
301 return; | |
302 } | |
303 Str << InstX8632::getWidthString(Var->getType()); | |
304 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32); | |
305 int32_t Offset = Var->getStackOffset() + getStackAdjustment(); | |
306 if (Offset) { | |
307 if (Offset > 0) | |
308 Str << "+"; | |
309 Str << Offset; | |
310 } | |
311 Str << "]"; | |
312 } | |
313 | |
314 // Helper function for addProlog(). Sets the frame offset for Arg, | |
315 // updates InArgsSizeBytes according to Arg's width, and generates an | |
316 // instruction to copy Arg into its assigned register if applicable. | |
317 // For an I64 arg that has been split into Lo and Hi components, it | |
318 // calls itself recursively on the components, taking care to handle | |
319 // Lo first because of the little-endian architecture. | |
320 void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr, | |
321 int32_t BasicFrameOffset, | |
322 int32_t &InArgsSizeBytes) { | |
323 Variable *Lo = Arg->getLo(); | |
324 Variable *Hi = Arg->getHi(); | |
325 Type Ty = Arg->getType(); | |
326 if (Lo && Hi && Ty == IceType_i64) { | |
327 assert(Lo->getType() != IceType_i64); // don't want infinite recursion | |
328 assert(Hi->getType() != IceType_i64); // don't want infinite recursion | |
329 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
330 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
331 return; | |
332 } | |
333 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes); | |
334 if (Arg->hasReg()) { | |
335 assert(Ty != IceType_i64); | |
336 OperandX8632Mem *Mem = OperandX8632Mem::create( | |
337 Func, Ty, FramePtr, | |
338 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset())); | |
339 _mov(Arg, Mem); | |
340 } | |
341 InArgsSizeBytes += typeWidthInBytesOnStack(Ty); | |
342 } | |
343 | |
344 void TargetX8632::addProlog(CfgNode *Node) { | |
345 // If SimpleCoalescing is false, each variable without a register | |
346 // gets its own unique stack slot, which leads to large stack | |
347 // frames. If SimpleCoalescing is true, then each "global" variable | |
348 // without a register gets its own slot, but "local" variable slots | |
349 // are reused across basic blocks. E.g., if A and B are local to | |
350 // block 1 and C is local to block 2, then C may share a slot with A | |
351 // or B. | |
352 const bool SimpleCoalescing = true; | |
jvoung (off chromium)
2014/05/15 23:47:34
Doesn't seem like this will ever be set to false.
Jim Stichnoth
2014/05/17 14:14:32
I didn't plan to expose this by a flag, since Simp
| |
353 int32_t InArgsSizeBytes = 0; | |
354 int32_t RetIpSizeBytes = 4; | |
355 int32_t PreservedRegsSizeBytes = 0; | |
356 LocalsSizeBytes = 0; | |
357 Context.init(Node); | |
358 Context.setInsertPoint(Context.getCur()); | |
359 | |
360 // Determine stack frame offsets for each Variable without a | |
361 // register assignment. This can be done as one variable per stack | |
362 // slot. Or, do coalescing by running the register allocator again | |
363 // with an infinite set of registers (as a side effect, this gives | |
364 // variables a second chance at physical register assignment). | |
365 // | |
366 // A middle ground approach is to leverage sparsity and allocate one | |
367 // block of space on the frame for globals (variables with | |
368 // multi-block lifetime), and one block to share for locals | |
369 // (single-block lifetime). | |
370 | |
371 llvm::SmallBitVector CalleeSaves = | |
372 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
373 | |
374 int32_t GlobalsSize = 0; | |
375 std::vector<int> LocalsSize(Func->getNumNodes()); | |
376 | |
377 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and | |
378 // LocalsSizeBytes. | |
379 RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | |
380 const VarList &Variables = Func->getVariables(); | |
381 const VarList &Args = Func->getArgs(); | |
382 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | |
383 I != E; ++I) { | |
384 Variable *Var = *I; | |
385 if (Var->hasReg()) { | |
386 RegsUsed[Var->getRegNum()] = true; | |
387 continue; | |
388 } | |
389 // An argument passed on the stack already has a stack slot. | |
390 if (Var->getIsArg()) | |
391 continue; | |
392 // A spill slot linked to a variable with a stack slot should reuse | |
393 // that stack slot. | |
394 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | |
395 if (Variable *Linked = Var->getPreferredRegister()) { | |
396 if (!Linked->hasReg()) | |
397 continue; | |
398 } | |
399 } | |
400 int32_t Increment = typeWidthInBytesOnStack(Var->getType()); | |
401 if (SimpleCoalescing) { | |
402 if (Var->isMultiblockLife()) { | |
403 GlobalsSize += Increment; | |
404 } else { | |
405 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); | |
406 LocalsSize[NodeIndex] += Increment; | |
407 if (LocalsSize[NodeIndex] > LocalsSizeBytes) | |
408 LocalsSizeBytes = LocalsSize[NodeIndex]; | |
409 } | |
410 } else { | |
411 LocalsSizeBytes += Increment; | |
412 } | |
413 } | |
414 LocalsSizeBytes += GlobalsSize; | |
415 | |
416 // Add push instructions for preserved registers. | |
417 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
418 if (CalleeSaves[i] && RegsUsed[i]) { | |
419 PreservedRegsSizeBytes += 4; | |
420 const bool SuppressStackAdjustment = true; | |
421 _push(getPhysicalRegister(i), SuppressStackAdjustment); | |
422 } | |
423 } | |
424 | |
425 // Generate "push ebp; mov ebp, esp" | |
426 if (IsEbpBasedFrame) { | |
427 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | |
428 .count() == 0); | |
429 PreservedRegsSizeBytes += 4; | |
430 Variable *ebp = getPhysicalRegister(Reg_ebp); | |
431 Variable *esp = getPhysicalRegister(Reg_esp); | |
432 const bool SuppressStackAdjustment = true; | |
433 _push(ebp, SuppressStackAdjustment); | |
434 _mov(ebp, esp); | |
435 } | |
436 | |
437 // Generate "sub esp, LocalsSizeBytes" | |
438 if (LocalsSizeBytes) | |
439 _sub(getPhysicalRegister(Reg_esp), | |
440 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | |
441 | |
442 resetStackAdjustment(); | |
jvoung (off chromium)
2014/05/15 23:47:34
question: when will the StackAdjustment be non-zer
Jim Stichnoth
2014/05/17 14:14:32
Currently it should always be zero at this point.
| |
443 | |
444 // Fill in stack offsets for args, and copy args into registers for | |
445 // those that were register-allocated. Args are pushed right to | |
446 // left, so Arg[0] is closest to the stack/frame pointer. | |
447 // | |
448 // TODO: Make this right for different width args, calling | |
449 // conventions, etc. For one thing, args passed in registers will | |
450 // need to be copied/shuffled to their home registers (the | |
451 // RegManager code may have some permutation logic to leverage), | |
452 // and if they have no home register, home space will need to be | |
453 // allocated on the stack to copy into. | |
454 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | |
455 int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes; | |
456 if (!IsEbpBasedFrame) | |
457 BasicFrameOffset += LocalsSizeBytes; | |
458 for (SizeT i = 0; i < Args.size(); ++i) { | |
459 Variable *Arg = Args[i]; | |
460 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes); | |
461 } | |
462 | |
463 // Fill in stack offsets for locals. | |
464 int32_t TotalGlobalsSize = GlobalsSize; | |
465 GlobalsSize = 0; | |
466 LocalsSize.assign(LocalsSize.size(), 0); | |
467 int32_t NextStackOffset = 0; | |
468 for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | |
469 I != E; ++I) { | |
470 Variable *Var = *I; | |
471 if (Var->hasReg()) { | |
472 RegsUsed[Var->getRegNum()] = true; | |
473 continue; | |
474 } | |
475 if (Var->getIsArg()) | |
476 continue; | |
477 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | |
478 if (Variable *Linked = Var->getPreferredRegister()) { | |
479 if (!Linked->hasReg()) { | |
480 // TODO: Make sure Linked has already been assigned a stack | |
481 // slot. | |
482 Var->setStackOffset(Linked->getStackOffset()); | |
483 continue; | |
484 } | |
485 } | |
486 } | |
487 int32_t Increment = typeWidthInBytesOnStack(Var->getType()); | |
488 if (SimpleCoalescing) { | |
489 if (Var->isMultiblockLife()) { | |
490 GlobalsSize += Increment; | |
491 NextStackOffset = GlobalsSize; | |
492 } else { | |
493 SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); | |
494 LocalsSize[NodeIndex] += Increment; | |
495 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; | |
496 } | |
497 } else { | |
498 NextStackOffset += Increment; | |
499 } | |
500 if (IsEbpBasedFrame) | |
501 Var->setStackOffset(-NextStackOffset); | |
502 else | |
503 Var->setStackOffset(LocalsSizeBytes - NextStackOffset); | |
504 } | |
505 this->FrameSizeLocals = NextStackOffset; | |
506 this->HasComputedFrame = true; | |
507 | |
508 if (Func->getContext()->isVerbose(IceV_Frame)) { | |
509 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes | |
510 << "\n" | |
511 << "InArgsSizeBytes=" << InArgsSizeBytes | |
512 << "\n" | |
513 << "PreservedRegsSizeBytes=" | |
514 << PreservedRegsSizeBytes << "\n"; | |
515 } | |
516 } | |
517 | |
518 void TargetX8632::addEpilog(CfgNode *Node) { | |
519 InstList &Insts = Node->getInsts(); | |
520 InstList::reverse_iterator RI, E; | |
521 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) { | |
522 if (llvm::isa<InstX8632Ret>(*RI)) | |
523 break; | |
jvoung (off chromium)
2014/05/15 23:47:34
What are possible instructions at the end of a CFG
Jim Stichnoth
2014/05/17 14:14:32
TargetX8632::lowerRet() adds a FakeUse of esp at t
| |
524 } | |
525 if (RI == E) | |
526 return; | |
527 | |
528 // Convert the reverse_iterator position into its corresponding | |
529 // (forward) iterator position. | |
530 InstList::iterator InsertPoint = RI.base(); | |
531 --InsertPoint; | |
532 Context.init(Node); | |
533 Context.setInsertPoint(InsertPoint); | |
534 | |
535 Variable *esp = getPhysicalRegister(Reg_esp); | |
536 if (IsEbpBasedFrame) { | |
537 Variable *ebp = getPhysicalRegister(Reg_ebp); | |
538 _mov(esp, ebp); | |
539 _pop(ebp); | |
540 } else { | |
541 // add esp, LocalsSizeBytes | |
542 if (LocalsSizeBytes) | |
543 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes)); | |
544 } | |
545 | |
546 // Add pop instructions for preserved registers. | |
547 llvm::SmallBitVector CalleeSaves = | |
548 getRegisterSet(RegSet_CalleeSave, RegSet_None); | |
549 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | |
550 SizeT j = CalleeSaves.size() - i - 1; | |
551 if (j == Reg_ebp && IsEbpBasedFrame) | |
552 continue; | |
553 if (CalleeSaves[j] && RegsUsed[j]) { | |
554 _pop(getPhysicalRegister(j)); | |
555 } | |
556 } | |
557 } | |
558 | |
559 void TargetX8632::split64(Variable *Var) { | |
560 switch (Var->getType()) { | |
561 default: | |
562 return; | |
563 case IceType_i64: | |
564 // TODO: Only consider F64 if we need to push each half when | |
565 // passing as an argument to a function call. Note that each half | |
566 // is still typed as I32. | |
567 case IceType_f64: | |
568 break; | |
569 } | |
570 Variable *Lo = Var->getLo(); | |
571 Variable *Hi = Var->getHi(); | |
572 if (Lo) { | |
573 assert(Hi); | |
574 return; | |
575 } | |
576 assert(Hi == NULL); | |
577 Lo = Func->makeVariable(IceType_i32, Context.getNode(), | |
578 Var->getName() + "__lo"); | |
579 Hi = Func->makeVariable(IceType_i32, Context.getNode(), | |
580 Var->getName() + "__hi"); | |
581 Var->setLoHi(Lo, Hi); | |
582 if (Var->getIsArg()) { | |
583 Lo->setIsArg(Func); | |
584 Hi->setIsArg(Func); | |
585 } | |
586 } | |
587 | |
588 Operand *TargetX8632::loOperand(Operand *Operand) { | |
589 assert(Operand->getType() == IceType_i64); | |
590 if (Operand->getType() != IceType_i64) | |
591 return Operand; | |
592 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | |
593 split64(Var); | |
594 return Var->getLo(); | |
595 } | |
596 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { | |
597 uint64_t Mask = (1ull << 32) - 1; | |
598 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask); | |
599 } | |
600 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | |
601 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), | |
602 Mem->getOffset(), Mem->getIndex(), | |
603 Mem->getShift()); | |
604 } | |
605 llvm_unreachable("Unsupported operand type"); | |
606 return NULL; | |
607 } | |
608 | |
609 Operand *TargetX8632::hiOperand(Operand *Operand) { | |
610 assert(Operand->getType() == IceType_i64); | |
611 if (Operand->getType() != IceType_i64) | |
612 return Operand; | |
613 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) { | |
614 split64(Var); | |
615 return Var->getHi(); | |
616 } | |
617 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) { | |
618 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32); | |
619 } | |
620 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) { | |
621 Constant *Offset = Mem->getOffset(); | |
622 if (Offset == NULL) | |
623 Offset = Ctx->getConstantInt(IceType_i32, 4); | |
624 else if (ConstantInteger *IntOffset = | |
625 llvm::dyn_cast<ConstantInteger>(Offset)) { | |
626 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue()); | |
627 } else if (ConstantRelocatable *SymOffset = | |
628 llvm::dyn_cast<ConstantRelocatable>(Offset)) { | |
629 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(), | |
630 SymOffset->getName()); | |
631 } | |
632 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, | |
633 Mem->getIndex(), Mem->getShift()); | |
634 } | |
635 llvm_unreachable("Unsupported operand type"); | |
636 return NULL; | |
637 } | |
638 | |
639 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, | |
jvoung (off chromium)
2014/05/15 23:47:34
I'm wondering if this needs to be so general. Is t
Jim Stichnoth
2014/05/17 14:14:32
Yes, that instance of getRegisterSet() is kind of
| |
640 RegSetMask Exclude) const { | |
641 llvm::SmallBitVector Registers(Reg_NUM); | |
642 | |
643 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | |
644 frameptr, isI8, isInt, isFP) \ | |
645 if (scratch && (Include & RegSet_CallerSave)) \ | |
646 Registers[val] = true; \ | |
647 if (preserved && (Include & RegSet_CalleeSave)) \ | |
648 Registers[val] = true; \ | |
649 if (stackptr && (Include & RegSet_StackPointer)) \ | |
650 Registers[val] = true; \ | |
651 if (frameptr && (Include & RegSet_FramePointer)) \ | |
652 Registers[val] = true; \ | |
653 if (scratch && (Exclude & RegSet_CallerSave)) \ | |
654 Registers[val] = false; \ | |
655 if (preserved && (Exclude & RegSet_CalleeSave)) \ | |
656 Registers[val] = false; \ | |
657 if (stackptr && (Exclude & RegSet_StackPointer)) \ | |
658 Registers[val] = false; \ | |
659 if (frameptr && (Exclude & RegSet_FramePointer)) \ | |
660 Registers[val] = false; | |
661 | |
662 REGX8632_TABLE | |
663 | |
664 #undef X | |
665 | |
666 return Registers; | |
667 } | |
668 | |
669 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | |
670 IsEbpBasedFrame = true; | |
671 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize | |
672 // the number of adjustments of esp, etc. | |
673 Variable *esp = getPhysicalRegister(Reg_esp); | |
674 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | |
675 Variable *Dest = Inst->getDest(); | |
676 _sub(esp, TotalSize); | |
677 _mov(Dest, esp); | |
678 } | |
679 | |
680 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | |
681 Variable *Dest = Inst->getDest(); | |
682 Operand *Src0 = legalize(Inst->getSrc(0)); | |
683 Operand *Src1 = legalize(Inst->getSrc(1)); | |
684 if (Dest->getType() == IceType_i64) { | |
685 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
686 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
687 Operand *Src0Lo = loOperand(Src0); | |
688 Operand *Src0Hi = hiOperand(Src0); | |
689 Operand *Src1Lo = loOperand(Src1); | |
690 Operand *Src1Hi = hiOperand(Src1); | |
691 Variable *T_Lo = NULL, *T_Hi = NULL; | |
692 switch (Inst->getOp()) { | |
693 case InstArithmetic::Add: | |
694 _mov(T_Lo, Src0Lo); | |
695 _add(T_Lo, Src1Lo); | |
696 _mov(DestLo, T_Lo); | |
697 _mov(T_Hi, Src0Hi); | |
698 _adc(T_Hi, Src1Hi); | |
699 _mov(DestHi, T_Hi); | |
700 break; | |
701 case InstArithmetic::And: | |
702 _mov(T_Lo, Src0Lo); | |
703 _and(T_Lo, Src1Lo); | |
704 _mov(DestLo, T_Lo); | |
705 _mov(T_Hi, Src0Hi); | |
706 _and(T_Hi, Src1Hi); | |
707 _mov(DestHi, T_Hi); | |
708 break; | |
709 case InstArithmetic::Or: | |
710 _mov(T_Lo, Src0Lo); | |
711 _or(T_Lo, Src1Lo); | |
712 _mov(DestLo, T_Lo); | |
713 _mov(T_Hi, Src0Hi); | |
714 _or(T_Hi, Src1Hi); | |
715 _mov(DestHi, T_Hi); | |
716 break; | |
717 case InstArithmetic::Xor: | |
718 _mov(T_Lo, Src0Lo); | |
719 _xor(T_Lo, Src1Lo); | |
720 _mov(DestLo, T_Lo); | |
721 _mov(T_Hi, Src0Hi); | |
722 _xor(T_Hi, Src1Hi); | |
723 _mov(DestHi, T_Hi); | |
724 break; | |
725 case InstArithmetic::Sub: | |
726 _mov(T_Lo, Src0Lo); | |
727 _sub(T_Lo, Src1Lo); | |
728 _mov(DestLo, T_Lo); | |
729 _mov(T_Hi, Src0Hi); | |
730 _sbb(T_Hi, Src1Hi); | |
731 _mov(DestHi, T_Hi); | |
732 break; | |
733 case InstArithmetic::Mul: { | |
734 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
735 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax); | |
jvoung (off chromium)
2014/05/15 23:47:34
When is it appropriate to use getPhysicalRegister(
Jim Stichnoth
2014/05/17 14:14:32
That's right. getPhysicalRegister() is used for t
| |
736 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx); | |
737 // gcc does the following: | |
738 // a=b*c ==> | |
739 // t1 = b.hi; t1 *=(imul) c.lo | |
740 // t2 = c.hi; t2 *=(imul) b.lo | |
741 // t3:eax = b.lo | |
742 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo | |
743 // a.lo = t4.lo | |
744 // t4.hi += t1 | |
745 // t4.hi += t2 | |
746 // a.hi = t4.hi | |
747 _mov(T_1, Src0Hi); | |
748 _imul(T_1, Src1Lo); | |
749 _mov(T_2, Src1Hi); | |
750 _imul(T_2, Src0Lo); | |
751 _mov(T_3, Src0Lo, Reg_eax); | |
752 _mul(T_4Lo, T_3, Src1Lo); | |
753 // The mul instruction produces two dest variables, edx:eax. We | |
754 // create a fake definition of edx to account for this. | |
755 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); | |
756 _mov(DestLo, T_4Lo); | |
757 _add(T_4Hi, T_1); | |
758 _add(T_4Hi, T_2); | |
759 _mov(DestHi, T_4Hi); | |
760 } break; | |
761 case InstArithmetic::Shl: { | |
762 // TODO: Refactor the similarities between Shl, Lshr, and Ashr. | |
763 // gcc does the following: | |
764 // a=b<<c ==> | |
765 // t1:ecx = c.lo & 0xff | |
766 // t2 = b.lo | |
767 // t3 = b.hi | |
768 // t3 = shld t3, t2, t1 | |
769 // t2 = shl t2, t1 | |
770 // test t1, 0x20 | |
771 // je L1 | |
772 // use(t3) | |
773 // t3 = t2 | |
774 // t2 = 0 | |
775 // L1: | |
776 // a.lo = t2 | |
777 // a.hi = t3 | |
778 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
779 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); | |
780 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
781 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
782 _mov(T_1, Src1Lo, Reg_ecx); | |
783 _mov(T_2, Src0Lo); | |
784 _mov(T_3, Src0Hi); | |
785 _shld(T_3, T_2, T_1); | |
786 _shl(T_2, T_1); | |
787 _test(T_1, BitTest); | |
788 _br(InstX8632Br::Br_e, Label); | |
789 // Because of the intra-block control flow, we need to fake a use | |
790 // of T_3 to prevent its earlier definition from being dead-code | |
791 // eliminated in the presence of its later definition. | |
792 Context.insert(InstFakeUse::create(Func, T_3)); | |
793 _mov(T_3, T_2); | |
794 _mov(T_2, Zero); | |
795 Context.insert(Label); | |
796 _mov(DestLo, T_2); | |
797 _mov(DestHi, T_3); | |
798 } break; | |
799 case InstArithmetic::Lshr: { | |
800 // a=b>>c (unsigned) ==> | |
801 // t1:ecx = c.lo & 0xff | |
802 // t2 = b.lo | |
803 // t3 = b.hi | |
804 // t2 = shrd t2, t3, t1 | |
805 // t3 = shr t3, t1 | |
806 // test t1, 0x20 | |
807 // je L1 | |
808 // use(t2) | |
809 // t2 = t3 | |
810 // t3 = 0 | |
811 // L1: | |
812 // a.lo = t2 | |
813 // a.hi = t3 | |
814 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
815 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); | |
816 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
817 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
818 _mov(T_1, Src1Lo, Reg_ecx); | |
819 _mov(T_2, Src0Lo); | |
820 _mov(T_3, Src0Hi); | |
821 _shrd(T_2, T_3, T_1); | |
822 _shr(T_3, T_1); | |
823 _test(T_1, BitTest); | |
824 _br(InstX8632Br::Br_e, Label); | |
825 // Because of the intra-block control flow, we need to fake a use | |
826 // of T_3 to prevent its earlier definition from being dead-code | |
827 // eliminated in the presence of its later definition. | |
828 Context.insert(InstFakeUse::create(Func, T_2)); | |
829 _mov(T_2, T_3); | |
830 _mov(T_3, Zero); | |
831 Context.insert(Label); | |
832 _mov(DestLo, T_2); | |
833 _mov(DestHi, T_3); | |
834 } break; | |
835 case InstArithmetic::Ashr: { | |
836 // a=b>>c (signed) ==> | |
837 // t1:ecx = c.lo & 0xff | |
838 // t2 = b.lo | |
839 // t3 = b.hi | |
840 // t2 = shrd t2, t3, t1 | |
841 // t3 = sar t3, t1 | |
842 // test t1, 0x20 | |
843 // je L1 | |
844 // use(t2) | |
845 // t2 = t3 | |
846 // t3 = sar t3, 0x1f | |
847 // L1: | |
848 // a.lo = t2 | |
849 // a.hi = t3 | |
850 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | |
851 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20); | |
852 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f); | |
853 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
854 _mov(T_1, Src1Lo, Reg_ecx); | |
855 _mov(T_2, Src0Lo); | |
856 _mov(T_3, Src0Hi); | |
857 _shrd(T_2, T_3, T_1); | |
858 _sar(T_3, T_1); | |
859 _test(T_1, BitTest); | |
860 _br(InstX8632Br::Br_e, Label); | |
861 // Because of the intra-block control flow, we need to fake a use | |
862 // of T_3 to prevent its earlier definition from being dead-code | |
863 // eliminated in the presence of its later definition. | |
864 Context.insert(InstFakeUse::create(Func, T_2)); | |
865 _mov(T_2, T_3); | |
866 _sar(T_3, SignExtend); | |
867 Context.insert(Label); | |
868 _mov(DestLo, T_2); | |
869 _mov(DestHi, T_3); | |
870 } break; | |
871 case InstArithmetic::Udiv: { | |
872 const SizeT MaxSrcs = 2; | |
873 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs); | |
874 Call->addArg(Inst->getSrc(0)); | |
875 Call->addArg(Inst->getSrc(1)); | |
876 lowerCall(Call); | |
877 } break; | |
878 case InstArithmetic::Sdiv: { | |
879 const SizeT MaxSrcs = 2; | |
880 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs); | |
881 Call->addArg(Inst->getSrc(0)); | |
882 Call->addArg(Inst->getSrc(1)); | |
883 lowerCall(Call); | |
884 } break; | |
885 case InstArithmetic::Urem: { | |
886 const SizeT MaxSrcs = 2; | |
887 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs); | |
888 Call->addArg(Inst->getSrc(0)); | |
889 Call->addArg(Inst->getSrc(1)); | |
890 lowerCall(Call); | |
891 } break; | |
892 case InstArithmetic::Srem: { | |
893 const SizeT MaxSrcs = 2; | |
894 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs); | |
895 Call->addArg(Inst->getSrc(0)); | |
896 Call->addArg(Inst->getSrc(1)); | |
897 lowerCall(Call); | |
898 } break; | |
899 case InstArithmetic::Fadd: | |
900 case InstArithmetic::Fsub: | |
901 case InstArithmetic::Fmul: | |
902 case InstArithmetic::Fdiv: | |
903 case InstArithmetic::Frem: | |
904 llvm_unreachable("FP instruction with i64 type"); | |
905 break; | |
906 } | |
907 } else { // Dest->getType() != IceType_i64 | |
908 Variable *T_edx = NULL; | |
909 Variable *T = NULL; | |
910 switch (Inst->getOp()) { | |
911 case InstArithmetic::Add: | |
912 _mov(T, Src0); | |
913 _add(T, Src1); | |
914 _mov(Dest, T); | |
915 break; | |
916 case InstArithmetic::And: | |
917 _mov(T, Src0); | |
918 _and(T, Src1); | |
919 _mov(Dest, T); | |
920 break; | |
921 case InstArithmetic::Or: | |
922 _mov(T, Src0); | |
923 _or(T, Src1); | |
924 _mov(Dest, T); | |
925 break; | |
926 case InstArithmetic::Xor: | |
927 _mov(T, Src0); | |
928 _xor(T, Src1); | |
929 _mov(Dest, T); | |
930 break; | |
931 case InstArithmetic::Sub: | |
932 _mov(T, Src0); | |
933 _sub(T, Src1); | |
934 _mov(Dest, T); | |
935 break; | |
936 case InstArithmetic::Mul: | |
937 // TODO: Optimize for llvm::isa<Constant>(Src1) | |
938 // TODO: Strength-reduce multiplications by a constant, | |
939 // particularly -1 and powers of 2. Advanced: use lea to | |
940 // multiply by 3, 5, 9. | |
941 // | |
942 // The 8-bit version of imul only allows the form "imul r/m8" | |
943 // where T must be in eax. | |
944 if (Dest->getType() == IceType_i8) | |
945 _mov(T, Src0, Reg_eax); | |
946 else | |
947 _mov(T, Src0); | |
948 _imul(T, Src1); | |
949 _mov(Dest, T); | |
950 break; | |
951 case InstArithmetic::Shl: | |
952 _mov(T, Src0); | |
953 if (!llvm::isa<Constant>(Src1)) | |
954 Src1 = legalizeToVar(Src1, false, Reg_ecx); | |
955 _shl(T, Src1); | |
956 _mov(Dest, T); | |
957 break; | |
958 case InstArithmetic::Lshr: | |
959 _mov(T, Src0); | |
960 if (!llvm::isa<Constant>(Src1)) | |
961 Src1 = legalizeToVar(Src1, false, Reg_ecx); | |
962 _shr(T, Src1); | |
963 _mov(Dest, T); | |
964 break; | |
965 case InstArithmetic::Ashr: | |
966 _mov(T, Src0); | |
967 if (!llvm::isa<Constant>(Src1)) | |
968 Src1 = legalizeToVar(Src1, false, Reg_ecx); | |
969 _sar(T, Src1); | |
970 _mov(Dest, T); | |
971 break; | |
972 case InstArithmetic::Udiv: | |
973 if (Dest->getType() == IceType_i8) { | |
974 Variable *T_ah = NULL; | |
975 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0); | |
976 _mov(T, Src0, Reg_eax); | |
977 _mov(T_ah, Zero, Reg_ah); | |
978 _div(T_ah, Src1, T); | |
jvoung (off chromium)
2014/05/15 23:47:34
I'm probably missing something:
why is it _div(T_
Jim Stichnoth
2014/05/17 14:14:32
Nice - your simpler suggestion seems to work. :)
| |
979 Context.insert(InstFakeUse::create(Func, T_ah)); | |
980 _mov(Dest, T); | |
981 } else { | |
982 // TODO: fix for 8-bit, see Urem | |
jvoung (off chromium)
2014/05/15 23:47:34
Should the TODO be under the above branch for == I
Jim Stichnoth
2014/05/17 14:14:32
Done. I think that TODO was left in by accident.
| |
983 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
984 _mov(T, Src0, Reg_eax); | |
985 _mov(T_edx, Zero, Reg_edx); | |
986 _div(T, Src1, T_edx); | |
987 _mov(Dest, T); | |
988 } | |
989 break; | |
990 case InstArithmetic::Sdiv: | |
991 T_edx = makeReg(IceType_i32, Reg_edx); | |
992 _mov(T, Src0, Reg_eax); | |
993 _cdq(T_edx, T); | |
994 _idiv(T, Src1, T_edx); | |
995 _mov(Dest, T); | |
996 break; | |
997 case InstArithmetic::Urem: | |
998 if (Dest->getType() == IceType_i8) { | |
999 Variable *T_ah = NULL; | |
1000 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0); | |
1001 _mov(T, Src0, Reg_eax); | |
1002 _mov(T_ah, Zero, Reg_ah); | |
1003 _div(T_ah, Src1, T); | |
1004 _mov(Dest, T_ah); | |
1005 } else { | |
1006 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1007 _mov(T_edx, Zero, Reg_edx); | |
1008 _mov(T, Src0, Reg_eax); | |
1009 _div(T_edx, Src1, T); | |
1010 _mov(Dest, T_edx); | |
1011 } | |
1012 break; | |
1013 case InstArithmetic::Srem: | |
1014 T_edx = makeReg(IceType_i32, Reg_edx); | |
1015 _mov(T, Src0, Reg_eax); | |
1016 _cdq(T_edx, T); | |
1017 _idiv(T_edx, Src1, T); | |
1018 _mov(Dest, T_edx); | |
1019 break; | |
1020 case InstArithmetic::Fadd: | |
1021 _mov(T, Src0); | |
1022 _addss(T, Src1); | |
1023 _mov(Dest, T); | |
1024 break; | |
1025 case InstArithmetic::Fsub: | |
1026 _mov(T, Src0); | |
1027 _subss(T, Src1); | |
1028 _mov(Dest, T); | |
1029 break; | |
1030 case InstArithmetic::Fmul: | |
1031 _mov(T, Src0); | |
1032 _mulss(T, Src1); | |
1033 _mov(Dest, T); | |
1034 break; | |
1035 case InstArithmetic::Fdiv: | |
1036 _mov(T, Src0); | |
1037 _divss(T, Src1); | |
1038 _mov(Dest, T); | |
1039 break; | |
1040 case InstArithmetic::Frem: { | |
1041 const SizeT MaxSrcs = 2; | |
1042 Type Ty = Dest->getType(); | |
1043 InstCall *Call = | |
1044 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs); | |
1045 Call->addArg(Src0); | |
1046 Call->addArg(Src1); | |
1047 return lowerCall(Call); | |
1048 } break; | |
1049 } | |
1050 } | |
1051 } | |
1052 | |
1053 void TargetX8632::lowerAssign(const InstAssign *Inst) { | |
1054 Variable *Dest = Inst->getDest(); | |
1055 Operand *Src0 = legalize(Inst->getSrc(0)); | |
jvoung (off chromium)
2014/05/19 20:28:54
Probably on your mind already since you noted a TO
Jim Stichnoth
2014/05/20 18:20:08
My thought on these kinds of opportunities (e.g. s
| |
1056 assert(Dest->getType() == Src0->getType()); | |
1057 if (Dest->getType() == IceType_i64) { | |
1058 Operand *Src0Lo = loOperand(Src0); | |
1059 Operand *Src0Hi = hiOperand(Src0); | |
1060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1061 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1062 Variable *T_Lo = NULL, *T_Hi = NULL; | |
1063 _mov(T_Lo, Src0Lo); | |
1064 _mov(DestLo, T_Lo); | |
1065 _mov(T_Hi, Src0Hi); | |
1066 _mov(DestHi, T_Hi); | |
1067 } else { | |
1068 const bool AllowOverlap = true; | |
1069 // RI is either a physical register or an immediate. | |
1070 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap); | |
jvoung (off chromium)
2014/05/19 20:28:54
Does this mean that Inst->getSrc(0) gets legalized
Jim Stichnoth
2014/05/20 18:20:08
Done.
| |
1071 _mov(Dest, RI); | |
1072 } | |
1073 } | |
1074 | |
1075 void TargetX8632::lowerBr(const InstBr *Inst) { | |
1076 if (Inst->isUnconditional()) { | |
1077 _br(Inst->getTargetUnconditional()); | |
1078 } else { | |
1079 Operand *Src0 = legalize(Inst->getCondition()); | |
1080 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1081 _cmp(Src0, Zero); | |
jvoung (off chromium)
2014/05/19 20:28:54
Is it better to _test reg,reg than _cmp reg, zero?
Jim Stichnoth
2014/05/20 18:20:08
That's right. That could be done as a peephole as
| |
1082 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | |
1083 } | |
1084 } | |
1085 | |
1086 void TargetX8632::lowerCall(const InstCall *Instr) { | |
1087 // Generate a sequence of push instructions, pushing right to left, | |
1088 // keeping track of stack offsets in case a push involves a stack | |
1089 // operand and we are using an esp-based frame. | |
1090 uint32_t StackOffset = 0; | |
1091 // TODO: If for some reason the call instruction gets dead-code | |
1092 // eliminated after lowering, we would need to ensure that the | |
1093 // pre-call push instructions and the post-call esp adjustment get | |
1094 // eliminated as well. | |
1095 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) { | |
1096 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1)); | |
1097 if (Arg->getType() == IceType_i64) { | |
1098 _push(hiOperand(Arg)); | |
1099 _push(loOperand(Arg)); | |
1100 } else if (Arg->getType() == IceType_f64) { | |
1101 // If the Arg turns out to be a memory operand, we need to push | |
1102 // 8 bytes, which requires two push instructions. This ends up | |
1103 // being somewhat clumsy in the current IR, so we use a | |
1104 // workaround. Force the operand into a (xmm) register, and | |
1105 // then push the register. An xmm register push is actually not | |
1106 // possible in x86, but the Push instruction emitter handles | |
1107 // this by decrementing the stack pointer and directly writing | |
1108 // the xmm register value. | |
1109 Variable *T = NULL; | |
1110 _mov(T, Arg); | |
1111 _push(T); | |
1112 } else { | |
1113 _push(Arg); | |
1114 } | |
1115 StackOffset += typeWidthInBytesOnStack(Arg->getType()); | |
1116 } | |
1117 // Generate the call instruction. Assign its result to a temporary | |
1118 // with high register allocation weight. | |
1119 Variable *Dest = Instr->getDest(); | |
1120 Variable *eax = NULL; // doubles as RegLo as necessary | |
1121 Variable *edx = NULL; | |
1122 if (Dest) { | |
1123 switch (Dest->getType()) { | |
1124 case IceType_NUM: | |
1125 llvm_unreachable("Invalid Call dest type"); | |
1126 break; | |
1127 case IceType_void: | |
1128 break; | |
1129 case IceType_i1: | |
1130 case IceType_i8: | |
1131 case IceType_i16: | |
1132 case IceType_i32: | |
1133 eax = makeReg(Dest->getType(), Reg_eax); | |
1134 break; | |
1135 case IceType_i64: | |
1136 eax = makeReg(IceType_i32, Reg_eax); | |
1137 edx = makeReg(IceType_i32, Reg_edx); | |
1138 break; | |
1139 case IceType_f32: | |
1140 case IceType_f64: | |
1141 // Leave eax==edx==NULL, and capture the result with the fstp | |
1142 // instruction. | |
1143 break; | |
1144 } | |
1145 } | |
1146 Operand *CallTarget = legalize(Instr->getCallTarget()); | |
1147 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget); | |
1148 Context.insert(NewCall); | |
1149 if (edx) | |
1150 Context.insert(InstFakeDef::create(Func, edx)); | |
1151 | |
1152 // Add the appropriate offset to esp. | |
1153 if (StackOffset) { | |
1154 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | |
1155 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset)); | |
1156 } | |
1157 | |
1158 // Insert a register-kill pseudo instruction. | |
1159 VarList KilledRegs; | |
1160 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { | |
1161 if (ScratchRegs[i]) | |
1162 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); | |
1163 } | |
1164 if (!KilledRegs.empty()) { | |
jvoung (off chromium)
2014/05/19 20:28:54
For x86 this is always true because there's always
Jim Stichnoth
2014/05/20 18:20:08
A bit of pedantry, I guess, in case we ever try ou
| |
1165 Inst *Kill = InstFakeKill::create(Func, KilledRegs, NewCall); | |
1166 Context.insert(Kill); | |
1167 } | |
1168 | |
1169 // Generate a FakeUse to keep the call live if necessary. | |
1170 if (Instr->hasSideEffects() && eax) { | |
jvoung (off chromium)
2014/05/19 20:28:54
for InstrCall, hasSideEffects() is always true?
Jim Stichnoth
2014/05/20 18:20:08
Today it's true, but I'm looking ahead to allowing
| |
1171 Inst *FakeUse = InstFakeUse::create(Func, eax); | |
1172 Context.insert(FakeUse); | |
1173 } | |
1174 | |
1175 // Generate Dest=eax assignment. | |
1176 if (Dest && eax) { | |
1177 if (edx) { | |
1178 split64(Dest); | |
1179 Variable *DestLo = Dest->getLo(); | |
1180 Variable *DestHi = Dest->getHi(); | |
1181 DestLo->setPreferredRegister(eax, false); | |
1182 DestHi->setPreferredRegister(edx, false); | |
1183 _mov(DestLo, eax); | |
1184 _mov(DestHi, edx); | |
1185 } else { | |
1186 Dest->setPreferredRegister(eax, false); | |
1187 _mov(Dest, eax); | |
1188 } | |
1189 } | |
1190 | |
1191 // Special treatment for an FP function which returns its result in | |
1192 // st(0). | |
1193 if (Dest && | |
1194 (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) { | |
1195 _fstp(Dest); | |
1196 // If Dest ends up being a physical xmm register, the fstp emit | |
1197 // code will route st(0) through a temporary stack slot. | |
1198 } | |
1199 } | |
1200 | |
1201 void TargetX8632::lowerCast(const InstCast *Inst) { | |
1202 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap) | |
1203 InstCast::OpKind CastKind = Inst->getCastKind(); | |
1204 Variable *Dest = Inst->getDest(); | |
1205 // Src0RM is the source operand legalized to physical register or memory, but | |
1206 // not immediate, since the relevant x86 native instructions don't allow an | |
1207 // immediate operand. If the operand is an immediate, we could consider | |
1208 // computing the strength-reduced result at translation time, but we're | |
1209 // unlikely to see something like that in the bitcode that the optimizer | |
1210 // wouldn't have already taken care of. | |
1211 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem, true); | |
1212 switch (CastKind) { | |
1213 default: | |
1214 Func->setError("Cast type not supported"); | |
1215 return; | |
1216 case InstCast::Sext: | |
1217 if (Dest->getType() == IceType_i64) { | |
1218 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2 | |
1219 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1220 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1221 Variable *T_Lo = makeReg(DestLo->getType()); | |
1222 if (Src0RM->getType() == IceType_i32) | |
1223 _mov(T_Lo, Src0RM); | |
1224 else | |
1225 _movsx(T_Lo, Src0RM); | |
1226 _mov(DestLo, T_Lo); | |
1227 Variable *T_Hi = NULL; | |
1228 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31); | |
1229 _mov(T_Hi, T_Lo); | |
1230 _sar(T_Hi, Shift); | |
1231 _mov(DestHi, T_Hi); | |
1232 } else { | |
1233 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and | |
1234 // also copy to the high operand of a 64-bit variable. | |
1235 // t1 = movsx src; dst = t1 | |
1236 Variable *T = makeReg(Dest->getType()); | |
1237 _movsx(T, Src0RM); | |
1238 _mov(Dest, T); | |
1239 } | |
1240 break; | |
1241 case InstCast::Zext: | |
1242 if (Dest->getType() == IceType_i64) { | |
1243 // t1=movzx src; dst.lo=t1; dst.hi=0 | |
1244 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1245 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1246 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1247 Variable *Tmp = makeReg(DestLo->getType()); | |
1248 if (Src0RM->getType() == IceType_i32) | |
1249 _mov(Tmp, Src0RM); | |
1250 else | |
1251 _movzx(Tmp, Src0RM); | |
1252 _mov(DestLo, Tmp); | |
1253 _mov(DestHi, Zero); | |
1254 } else if (Src0RM->getType() == IceType_i1) { | |
1255 // t = Src0RM; t &= 1; Dest = t | |
1256 Operand *One = Ctx->getConstantInt(IceType_i32, 1); | |
1257 Variable *T = makeReg(IceType_i32); | |
1258 _movzx(T, Src0RM); | |
1259 _and(T, One); | |
1260 _mov(Dest, T); | |
1261 } else { | |
1262 // t1 = movzx src; dst = t1 | |
1263 Variable *T = makeReg(Dest->getType()); | |
1264 _movzx(T, Src0RM); | |
1265 _mov(Dest, T); | |
1266 } | |
1267 break; | |
1268 case InstCast::Trunc: { | |
1269 if (Src0RM->getType() == IceType_i64) | |
1270 Src0RM = loOperand(Src0RM); | |
1271 // t1 = trunc Src0RM; Dest = t1 | |
1272 Variable *T = NULL; | |
1273 _mov(T, Src0RM); | |
1274 _mov(Dest, T); | |
1275 break; | |
1276 } | |
1277 case InstCast::Fptrunc: | |
1278 case InstCast::Fpext: { | |
1279 // t1 = cvt Src0RM; Dest = t1 | |
1280 Variable *T = makeReg(Dest->getType()); | |
1281 _cvt(T, Src0RM); | |
1282 _mov(Dest, T); | |
1283 break; | |
1284 } | |
1285 case InstCast::Fptosi: | |
1286 if (Dest->getType() == IceType_i64) { | |
1287 // Use a helper for converting floating-point values to 64-bit | |
1288 // integers. SSE2 appears to have no way to convert from xmm | |
1289 // registers to something like the edx:eax register pair, and | |
1290 // gcc and clang both want to use x87 instructions complete with | |
1291 // temporary manipulation of the status word. This helper is | |
1292 // not needed for x86-64. | |
1293 split64(Dest); | |
1294 const SizeT MaxSrcs = 1; | |
1295 Type SrcType = Inst->getSrc(0)->getType(); | |
1296 InstCall *Call = makeHelperCall( | |
1297 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs); | |
1298 Call->addArg(Inst->getSrc(0)); | |
1299 lowerCall(Call); | |
1300 } else { | |
1301 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | |
1302 Variable *T_1 = makeReg(IceType_i32); | |
1303 Variable *T_2 = makeReg(Dest->getType()); | |
1304 _cvt(T_1, Src0RM); | |
1305 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | |
1306 _mov(Dest, T_2); | |
1307 T_2->setPreferredRegister(T_1, true); | |
1308 } | |
1309 break; | |
1310 case InstCast::Fptoui: | |
1311 if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) { | |
1312 // Use a helper for both x86-32 and x86-64. | |
1313 split64(Dest); | |
1314 const SizeT MaxSrcs = 1; | |
1315 Type DestType = Dest->getType(); | |
1316 Type SrcType = Src0RM->getType(); | |
1317 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32"); | |
1318 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d"); | |
1319 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64 | |
jvoung (off chromium)
2014/05/19 20:28:54
For the cases where gcc would have just invoked a
Jim Stichnoth
2014/05/20 18:20:08
Actually, these helper function names may resemble
| |
1320 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring; | |
1321 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | |
1322 Call->addArg(Inst->getSrc(0)); | |
1323 lowerCall(Call); | |
1324 return; | |
1325 } else { | |
1326 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type | |
1327 Variable *T_1 = makeReg(IceType_i32); | |
1328 Variable *T_2 = makeReg(Dest->getType()); | |
1329 _cvt(T_1, Src0RM); | |
1330 _mov(T_2, T_1); // T_1 and T_2 may have different integer types | |
1331 _mov(Dest, T_2); | |
1332 T_2->setPreferredRegister(T_1, true); | |
1333 } | |
1334 break; | |
1335 case InstCast::Sitofp: | |
1336 if (Src0RM->getType() == IceType_i64) { | |
1337 // Use a helper for x86-32. | |
1338 const SizeT MaxSrcs = 1; | |
1339 Type DestType = Dest->getType(); | |
1340 InstCall *Call = makeHelperCall( | |
1341 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs); | |
1342 Call->addArg(Inst->getSrc(0)); | |
1343 lowerCall(Call); | |
1344 return; | |
1345 } else { | |
1346 // Sign-extend the operand. | |
1347 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2 | |
1348 Variable *T_1 = makeReg(IceType_i32); | |
1349 Variable *T_2 = makeReg(Dest->getType()); | |
1350 if (Src0RM->getType() == IceType_i32) | |
1351 _mov(T_1, Src0RM); | |
1352 else | |
1353 _movsx(T_1, Src0RM); | |
1354 _cvt(T_2, T_1); | |
1355 _mov(Dest, T_2); | |
1356 } | |
1357 break; | |
1358 case InstCast::Uitofp: | |
1359 if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) { | |
1360 // Use a helper for x86-32 and x86-64. Also use a helper for | |
1361 // i32 on x86-32. | |
1362 const SizeT MaxSrcs = 1; | |
1363 Type DestType = Dest->getType(); | |
1364 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32"); | |
1365 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d"); | |
1366 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod | |
1367 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring; | |
1368 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs); | |
1369 Call->addArg(Inst->getSrc(0)); | |
1370 lowerCall(Call); | |
1371 return; | |
1372 } else { | |
1373 // Zero-extend the operand. | |
1374 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2 | |
1375 Variable *T_1 = makeReg(IceType_i32); | |
1376 Variable *T_2 = makeReg(Dest->getType()); | |
1377 if (Src0RM->getType() == IceType_i32) | |
1378 _mov(T_1, Src0RM); | |
1379 else | |
1380 _movzx(T_1, Src0RM); | |
1381 _cvt(T_2, T_1); | |
1382 _mov(Dest, T_2); | |
1383 } | |
1384 break; | |
1385 case InstCast::Bitcast: | |
1386 if (Dest->getType() == Src0RM->getType()) { | |
1387 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM); | |
1388 lowerAssign(Assign); | |
1389 llvm_unreachable("Pointer bitcasts aren't lowered correctly."); | |
1390 return; | |
1391 } | |
1392 switch (Dest->getType()) { | |
1393 default: | |
1394 llvm_unreachable("Unexpected Bitcast dest type"); | |
1395 case IceType_i32: | |
1396 case IceType_f32: { | |
1397 Type DestType = Dest->getType(); | |
1398 Type SrcType = Src0RM->getType(); | |
1399 assert((DestType == IceType_i32 && SrcType == IceType_f32) || | |
1400 (DestType == IceType_f32 && SrcType == IceType_i32)); | |
1401 // a.i32 = bitcast b.f32 ==> | |
1402 // t.f32 = b.f32 | |
1403 // s.f32 = spill t.f32 | |
1404 // a.i32 = s.f32 | |
1405 Variable *T = NULL; | |
1406 // TODO: Should be able to force a spill setup by calling legalize() with | |
1407 // Legal_Mem and not Legal_Reg or Legal_Imm. | |
1408 Variable *Spill = Func->makeVariable(SrcType, Context.getNode()); | |
1409 Spill->setWeight(RegWeight::Zero); | |
1410 Spill->setPreferredRegister(Dest, true); | |
1411 _mov(T, Src0RM); | |
1412 _mov(Spill, T); | |
1413 _mov(Dest, Spill); | |
1414 } break; | |
1415 case IceType_i64: { | |
1416 assert(Src0RM->getType() == IceType_f64); | |
1417 // a.i64 = bitcast b.f64 ==> | |
1418 // s.f64 = spill b.f64 | |
1419 // t_lo.i32 = lo(s.f64) | |
1420 // a_lo.i32 = t_lo.i32 | |
1421 // t_hi.i32 = hi(s.f64) | |
1422 // a_hi.i32 = t_hi.i32 | |
1423 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode()); | |
1424 Spill->setWeight(RegWeight::Zero); | |
1425 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true); | |
1426 _mov(Spill, Src0RM); | |
1427 | |
1428 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1429 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1430 Variable *T_Lo = makeReg(IceType_i32); | |
1431 Variable *T_Hi = makeReg(IceType_i32); | |
1432 VariableSplit *SpillLo = | |
1433 VariableSplit::create(Func, Spill, VariableSplit::Low); | |
1434 VariableSplit *SpillHi = | |
1435 VariableSplit::create(Func, Spill, VariableSplit::High); | |
1436 | |
1437 _mov(T_Lo, SpillLo); | |
1438 _mov(DestLo, T_Lo); | |
1439 _mov(T_Hi, SpillHi); | |
1440 _mov(DestHi, T_Hi); | |
1441 } break; | |
1442 case IceType_f64: { | |
1443 assert(Src0RM->getType() == IceType_i64); | |
1444 // a.f64 = bitcast b.i64 ==> | |
1445 // t_lo.i32 = b_lo.i32 | |
1446 // lo(s.f64) = t_lo.i32 | |
1447 // FakeUse(s.f64) | |
1448 // t_hi.i32 = b_hi.i32 | |
1449 // hi(s.f64) = t_hi.i32 | |
1450 // a.f64 = s.f64 | |
1451 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode()); | |
1452 Spill->setWeight(RegWeight::Zero); | |
1453 Spill->setPreferredRegister(Dest, true); | |
1454 | |
1455 Context.insert(InstFakeDef::create(Func, Spill)); | |
1456 | |
1457 Variable *T_Lo = NULL, *T_Hi = NULL; | |
1458 VariableSplit *SpillLo = | |
1459 VariableSplit::create(Func, Spill, VariableSplit::Low); | |
1460 VariableSplit *SpillHi = | |
1461 VariableSplit::create(Func, Spill, VariableSplit::High); | |
1462 _mov(T_Lo, loOperand(Src0RM)); | |
1463 _store(T_Lo, SpillLo); | |
1464 _mov(T_Hi, hiOperand(Src0RM)); | |
1465 _store(T_Hi, SpillHi); | |
1466 _mov(Dest, Spill); | |
1467 } break; | |
1468 } | |
1469 break; | |
1470 } | |
1471 } | |
1472 | |
1473 void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | |
1474 Operand *Src0 = Inst->getSrc(0); | |
1475 Operand *Src1 = Inst->getSrc(1); | |
1476 Variable *Dest = Inst->getDest(); | |
1477 // Lowering a = fcmp cond, b, c | |
1478 // ucomiss b, c /* only if C1 != Br_None */ | |
1479 // /* but swap b,c order if SwapOperands==true */ | |
1480 // mov a, <default> | |
1481 // j<C1> label /* only if C1 != Br_None */ | |
1482 // j<C2> label /* only if C2 != Br_None */ | |
1483 // FakeUse(a) /* only if C1 != Br_None */ | |
1484 // mov a, !<default> /* only if C1 != Br_None */ | |
1485 // label: /* only if C1 != Br_None */ | |
1486 InstFcmp::FCond Condition = Inst->getCondition(); | |
1487 size_t Index = static_cast<size_t>(Condition); | |
1488 assert(Index < TableFcmpSize); | |
1489 // The table is indexed by InstFcmp::Condition. Make sure it didn't fall | |
1490 // out of order. | |
1491 if (TableFcmp[Index].SwapOperands) { | |
1492 Operand *Tmp = Src0; | |
1493 Src0 = Src1; | |
1494 Src1 = Tmp; | |
1495 } | |
1496 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); | |
1497 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); | |
1498 if (HasC1) { | |
1499 Src0 = legalize(Src0); | |
1500 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | |
1501 Variable *T = NULL; | |
1502 _mov(T, Src0); | |
1503 _ucomiss(T, Src1RM); | |
1504 } | |
1505 Constant *Default = | |
1506 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default); | |
1507 _mov(Dest, Default); | |
1508 if (HasC1) { | |
1509 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1510 _br(TableFcmp[Index].C1, Label); | |
1511 if (HasC2) { | |
1512 _br(TableFcmp[Index].C2, Label); | |
1513 } | |
1514 Context.insert(InstFakeUse::create(Func, Dest)); | |
1515 Constant *NonDefault = | |
1516 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default); | |
1517 _mov(Dest, NonDefault); | |
1518 Context.insert(Label); | |
1519 } | |
1520 } | |
1521 | |
1522 void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | |
1523 Operand *Src0 = legalize(Inst->getSrc(0)); | |
1524 Operand *Src1 = legalize(Inst->getSrc(1)); | |
1525 Variable *Dest = Inst->getDest(); | |
1526 | |
1527 // If Src1 is an immediate, or known to be a physical register, we can | |
1528 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into | |
1529 // a physical register. (Actually, either Src0 or Src1 can be chosen for | |
1530 // the physical register, but unfortunately we have to commit to one or | |
1531 // the other before register allocation.) | |
1532 bool IsSrc1ImmOrReg = false; | |
1533 if (llvm::isa<Constant>(Src1)) | |
1534 IsSrc1ImmOrReg = true; | |
1535 else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) { | |
1536 if (Var->hasReg()) | |
1537 IsSrc1ImmOrReg = true; | |
1538 } | |
1539 | |
1540 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1: | |
1541 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1542 Constant *One = Ctx->getConstantInt(IceType_i32, 1); | |
1543 if (Src0->getType() == IceType_i64) { | |
1544 InstIcmp::ICond Condition = Inst->getCondition(); | |
1545 size_t Index = static_cast<size_t>(Condition); | |
1546 assert(Index < TableIcmp64Size); | |
1547 // The table is indexed by InstIcmp::Condition. Make sure it didn't fall | |
1548 // out of order. | |
1549 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | |
1550 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | |
1551 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { | |
1552 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1553 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); | |
1554 _cmp(loOperand(Src0), Src1LoRI); | |
1555 _br(InstX8632Br::Br_ne, Label); | |
1556 _cmp(hiOperand(Src0), Src1HiRI); | |
1557 _br(InstX8632Br::Br_ne, Label); | |
1558 Context.insert(InstFakeUse::create(Func, Dest)); | |
1559 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); | |
1560 Context.insert(Label); | |
1561 } else { | |
1562 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | |
1563 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | |
1564 _mov(Dest, One); | |
1565 _cmp(hiOperand(Src0), Src1HiRI); | |
1566 _br(TableIcmp64[Index].C1, LabelTrue); | |
1567 _br(TableIcmp64[Index].C2, LabelFalse); | |
1568 _cmp(loOperand(Src0), Src1LoRI); | |
1569 _br(TableIcmp64[Index].C3, LabelTrue); | |
1570 Context.insert(LabelFalse); | |
1571 Context.insert(InstFakeUse::create(Func, Dest)); | |
1572 _mov(Dest, Zero); | |
1573 Context.insert(LabelTrue); | |
1574 } | |
1575 return; | |
1576 } | |
1577 // cmp b, c | |
1578 Operand *Src0New = | |
1579 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true); | |
1580 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1581 _cmp(Src0New, Src1); | |
1582 _mov(Dest, One); | |
1583 _br(getIcmp32Mapping(Inst->getCondition()), Label); | |
1584 Context.insert(InstFakeUse::create(Func, Dest)); | |
1585 _mov(Dest, Zero); | |
1586 Context.insert(Label); | |
1587 } | |
1588 | |
1589 void TargetX8632::lowerLoad(const InstLoad *Inst) { | |
1590 // A Load instruction can be treated the same as an Assign | |
1591 // instruction, after the source operand is transformed into an | |
1592 // OperandX8632Mem operand. Note that the address mode | |
1593 // optimization already creates an OperandX8632Mem operand, so it | |
1594 // doesn't need another level of transformation. | |
1595 Type Ty = Inst->getDest()->getType(); | |
1596 Operand *Src0 = Inst->getSourceAddress(); | |
1597 // Address mode optimization already creates an OperandX8632Mem | |
1598 // operand, so it doesn't need another level of transformation. | |
1599 if (!llvm::isa<OperandX8632Mem>(Src0)) { | |
1600 Variable *Base = llvm::dyn_cast<Variable>(Src0); | |
1601 Constant *Offset = llvm::dyn_cast<Constant>(Src0); | |
1602 assert(Base || Offset); | |
1603 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset); | |
1604 } | |
1605 | |
1606 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0); | |
1607 lowerAssign(Assign); | |
1608 } | |
1609 | |
1610 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { | |
1611 Func->setError("Phi lowering not implemented"); | |
jvoung (off chromium)
2014/05/15 23:47:34
nit: "not implemented" sounds like something will
Jim Stichnoth
2014/05/17 14:14:32
My longer-term plan is to delay phi lowering until
| |
1612 } | |
1613 | |
1614 void TargetX8632::lowerRet(const InstRet *Inst) { | |
1615 Variable *Reg = NULL; | |
1616 if (Inst->hasRetValue()) { | |
1617 Operand *Src0 = legalize(Inst->getRetValue()); | |
1618 if (Src0->getType() == IceType_i64) { | |
1619 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax); | |
1620 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx); | |
1621 Reg = eax; | |
1622 Context.insert(InstFakeUse::create(Func, edx)); | |
1623 } else if (Src0->getType() == IceType_f32 || | |
1624 Src0->getType() == IceType_f64) { | |
1625 _fld(Src0); | |
1626 } else { | |
1627 _mov(Reg, Src0, Reg_eax); | |
1628 } | |
1629 } | |
1630 _ret(Reg); | |
1631 // Add a fake use of esp to make sure esp stays alive for the entire | |
1632 // function. Otherwise post-call esp adjustments get dead-code | |
1633 // eliminated. TODO: Are there more places where the fake use | |
1634 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | |
1635 // have a ret instruction. | |
1636 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | |
1637 Context.insert(InstFakeUse::create(Func, esp)); | |
1638 } | |
1639 | |
1640 void TargetX8632::lowerSelect(const InstSelect *Inst) { | |
1641 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1: | |
1642 Variable *Dest = Inst->getDest(); | |
1643 Operand *SrcT = Inst->getTrueOperand(); | |
1644 Operand *SrcF = Inst->getFalseOperand(); | |
1645 Operand *Condition = legalize(Inst->getCondition()); | |
1646 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0); | |
1647 InstX8632Label *Label = InstX8632Label::create(Func, this); | |
1648 | |
1649 if (Dest->getType() == IceType_i64) { | |
1650 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | |
1651 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | |
1652 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true); | |
1653 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true); | |
1654 _cmp(Condition, Zero); | |
1655 _mov(DestLo, SrcLoRI); | |
1656 _mov(DestHi, SrcHiRI); | |
1657 _br(InstX8632Br::Br_ne, Label); | |
1658 Context.insert(InstFakeUse::create(Func, DestLo)); | |
1659 Context.insert(InstFakeUse::create(Func, DestHi)); | |
1660 Operand *SrcFLo = loOperand(SrcF); | |
1661 Operand *SrcFHi = hiOperand(SrcF); | |
1662 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true); | |
1663 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true); | |
1664 _mov(DestLo, SrcLoRI); | |
1665 _mov(DestHi, SrcHiRI); | |
1666 } else { | |
1667 _cmp(Condition, Zero); | |
1668 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true); | |
1669 _mov(Dest, SrcT); | |
1670 _br(InstX8632Br::Br_ne, Label); | |
1671 Context.insert(InstFakeUse::create(Func, Dest)); | |
1672 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true); | |
1673 _mov(Dest, SrcF); | |
1674 } | |
1675 | |
1676 Context.insert(Label); | |
1677 } | |
1678 | |
1679 void TargetX8632::lowerStore(const InstStore *Inst) { | |
1680 Operand *Value = Inst->getData(); | |
1681 Operand *Addr = Inst->getAddr(); | |
1682 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr); | |
1683 // Address mode optimization already creates an OperandX8632Mem | |
1684 // operand, so it doesn't need another level of transformation. | |
1685 if (!NewAddr) { | |
1686 // The address will be either a constant (which represents a global | |
1687 // variable) or a variable, so either the Base or Offset component | |
1688 // of the OperandX8632Mem will be set. | |
1689 Variable *Base = llvm::dyn_cast<Variable>(Addr); | |
1690 Constant *Offset = llvm::dyn_cast<Constant>(Addr); | |
1691 assert(Base || Offset); | |
1692 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset); | |
1693 } | |
1694 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr)); | |
1695 | |
1696 if (NewAddr->getType() == IceType_i64) { | |
1697 Value = legalize(Value); | |
1698 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true); | |
1699 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true); | |
1700 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr))); | |
1701 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr))); | |
1702 } else { | |
1703 Value = legalize(Value, Legal_Reg | Legal_Imm, true); | |
1704 _store(Value, NewAddr); | |
1705 } | |
1706 } | |
1707 | |
1708 void TargetX8632::lowerSwitch(const InstSwitch *Inst) { | |
1709 // This implements the most naive possible lowering. | |
1710 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default | |
1711 Operand *Src0 = Inst->getComparison(); | |
1712 SizeT NumCases = Inst->getNumCases(); | |
1713 // OK, we'll be slightly less naive by forcing Src into a physical | |
1714 // register if there are 2 or more uses. | |
1715 if (NumCases >= 2) | |
1716 Src0 = legalizeToVar(Src0, true); | |
1717 else | |
1718 Src0 = legalize(Src0, Legal_All, true); | |
1719 for (SizeT I = 0; I < NumCases; ++I) { | |
1720 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I)); | |
1721 _cmp(Src0, Value); | |
1722 _br(InstX8632Br::Br_e, Inst->getLabel(I)); | |
1723 } | |
1724 | |
1725 _br(Inst->getLabelDefault()); | |
1726 } | |
1727 | |
1728 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) { | |
1729 const SizeT MaxSrcs = 0; | |
1730 Variable *Dest = NULL; | |
1731 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs); | |
1732 lowerCall(Call); | |
1733 } | |
1734 | |
1735 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed, | |
1736 bool AllowOverlap, int32_t RegNum) { | |
1737 assert(Allowed & Legal_Reg); | |
1738 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg); | |
1739 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) { | |
1740 Variable *Base = Mem->getBase(); | |
1741 Variable *Index = Mem->getIndex(); | |
1742 Variable *RegBase = Base; | |
1743 Variable *RegIndex = Index; | |
1744 if (Base) { | |
1745 RegBase = legalizeToVar(Base, true); | |
1746 } | |
1747 if (Index) { | |
1748 RegIndex = legalizeToVar(Index, true); | |
1749 } | |
1750 if (Base != RegBase || Index != RegIndex) { | |
1751 From = | |
1752 OperandX8632Mem::create(Func, Mem->getType(), RegBase, | |
1753 Mem->getOffset(), RegIndex, Mem->getShift()); | |
1754 } | |
1755 | |
1756 if (!(Allowed & Legal_Mem)) { | |
1757 Variable *Reg = makeReg(From->getType(), RegNum); | |
1758 _mov(Reg, From, RegNum); | |
1759 From = Reg; | |
1760 } | |
1761 return From; | |
1762 } | |
1763 if (llvm::isa<Constant>(From)) { | |
1764 if (!(Allowed & Legal_Imm)) { | |
1765 Variable *Reg = makeReg(From->getType(), RegNum); | |
1766 _mov(Reg, From); | |
1767 From = Reg; | |
1768 } | |
1769 return From; | |
1770 } | |
1771 if (Variable *Var = llvm::dyn_cast<Variable>(From)) { | |
1772 // We need a new physical register for the operand if: | |
1773 // Mem is not allowed and Var->getRegNum() is unknown, or | |
1774 // RegNum is required and Var->getRegNum() doesn't match. | |
1775 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) || | |
1776 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) { | |
1777 Variable *Reg = makeReg(From->getType(), RegNum); | |
1778 if (RegNum == Variable::NoRegister) { | |
1779 Reg->setPreferredRegister(Var, AllowOverlap); | |
1780 } | |
1781 _mov(Reg, From); | |
1782 From = Reg; | |
1783 } | |
1784 return From; | |
1785 } | |
1786 llvm_unreachable("Unhandled operand kind in legalize()"); | |
1787 return From; | |
1788 } | |
1789 | |
1790 Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap, | |
1791 int32_t RegNum) { | |
1792 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum)); | |
1793 } | |
1794 | |
1795 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) { | |
1796 Variable *Reg = Func->makeVariable(Type, Context.getNode()); | |
1797 if (RegNum == Variable::NoRegister) | |
1798 Reg->setWeightInfinite(); | |
1799 else | |
1800 Reg->setRegNum(RegNum); | |
1801 return Reg; | |
1802 } | |
1803 | |
1804 void TargetX8632::postLower() { | |
1805 if (Ctx->getOptLevel() != Opt_m1) | |
1806 return; | |
1807 // TODO: Avoid recomputing WhiteList every instruction. | |
1808 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None); | |
1809 // Make one pass to black-list pre-colored registers. TODO: If | |
1810 // there was some prior register allocation pass that made register | |
1811 // assignments, those registers need to be black-listed here as | |
1812 // well. | |
1813 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | |
1814 ++I) { | |
1815 const Inst *Inst = *I; | |
1816 if (Inst->isDeleted()) | |
1817 continue; | |
1818 if (llvm::isa<InstFakeKill>(Inst)) | |
1819 continue; | |
1820 SizeT VarIndex = 0; | |
1821 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | |
1822 Operand *Src = Inst->getSrc(SrcNum); | |
1823 SizeT NumVars = Src->getNumVars(); | |
1824 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) { | |
1825 const Variable *Var = Src->getVar(J); | |
1826 if (!Var->hasReg()) | |
1827 continue; | |
1828 WhiteList[Var->getRegNum()] = false; | |
1829 } | |
1830 } | |
1831 } | |
1832 // The second pass colors infinite-weight variables. | |
1833 llvm::SmallBitVector AvailableRegisters = WhiteList; | |
1834 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E; | |
1835 ++I) { | |
1836 const Inst *Inst = *I; | |
1837 if (Inst->isDeleted()) | |
1838 continue; | |
1839 SizeT VarIndex = 0; | |
1840 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) { | |
1841 Operand *Src = Inst->getSrc(SrcNum); | |
1842 SizeT NumVars = Src->getNumVars(); | |
1843 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) { | |
1844 Variable *Var = Src->getVar(J); | |
1845 if (Var->hasReg()) | |
1846 continue; | |
1847 if (!Var->getWeight().isInf()) | |
1848 continue; | |
1849 llvm::SmallBitVector AvailableTypedRegisters = | |
1850 AvailableRegisters & getRegisterSetForType(Var->getType()); | |
1851 if (!AvailableTypedRegisters.any()) { | |
1852 // This is a hack in case we run out of physical registers | |
1853 // due to an excessive number of "push" instructions from | |
1854 // lowering a call. | |
1855 AvailableRegisters = WhiteList; | |
1856 AvailableTypedRegisters = | |
1857 AvailableRegisters & getRegisterSetForType(Var->getType()); | |
1858 } | |
1859 assert(AvailableTypedRegisters.any()); | |
1860 int32_t RegNum = AvailableTypedRegisters.find_first(); | |
1861 Var->setRegNum(RegNum); | |
1862 AvailableRegisters[RegNum] = false; | |
1863 } | |
1864 } | |
1865 } | |
1866 } | |
1867 | |
1868 } // end of namespace Ice | |
OLD | NEW |