Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(266)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 265703002: Add Om1 lowering with no optimizations (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Add frem test; add LOWERING.rst file Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "IceDefs.h"
19 #include "IceCfg.h"
20 #include "IceCfgNode.h"
21 #include "IceInstX8632.h"
22 #include "IceOperand.h"
23 #include "IceTargetLoweringX8632.def"
24 #include "IceTargetLoweringX8632.h"
25
26 namespace Ice {
27
28 namespace {
29
30 // The following table summarizes the logic for lowering the fcmp instruction.
31 // There is one table entry for each of the 16 conditions. A comment in
32 // lowerFcmp() describes the lowering template. In the most general case, there
33 // is a compare followed by two conditional branches, because some fcmp
34 // conditions don't map to a single x86 conditional branch. However, in many
35 // cases it is possible to swap the operands in the comparison and have a single
36 // conditional branch. Since it's quite tedious to validate the table by hand,
37 // good execution tests are helpful.
38
39 const struct TableFcmp_ {
40 uint32_t Default;
41 bool SwapOperands;
42 InstX8632Br::BrCond C1, C2;
43 } TableFcmp[] = {
44 #define X(val, dflt, swap, C1, C2) \
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
46 ,
47 FCMPX8632_TABLE
48 #undef X
49 };
50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
51
52 // The following table summarizes the logic for lowering the icmp instruction
53 // for i32 and narrower types. Each icmp condition has a clear mapping to an
54 // x86 conditional branch instruction.
55
56 const struct TableIcmp32_ {
57 InstX8632Br::BrCond Mapping;
58 } TableIcmp32[] = {
59 #define X(val, C_32, C1_64, C2_64, C3_64) \
60 { InstX8632Br::C_32 } \
61 ,
62 ICMPX8632_TABLE
63 #undef X
64 };
65 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
66
67 // The following table summarizes the logic for lowering the icmp instruction
68 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
69 // conditional branches are needed. For the other conditions, three separate
70 // conditional branches are needed.
71 const struct TableIcmp64_ {
72 InstX8632Br::BrCond C1, C2, C3;
73 } TableIcmp64[] = {
74 #define X(val, C_32, C1_64, C2_64, C3_64) \
75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
76 ,
77 ICMPX8632_TABLE
78 #undef X
79 };
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81
82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping;
86 }
87
88 // In some cases, there are x-macros tables for both high-level and
89 // low-level instructions/operands that use the same enum key value.
90 // The tables are kept separate to maintain a proper separation
91 // between abstraction layers. There is a risk that the tables
92 // could get out of sync if enum values are reordered or if entries
93 // are added or deleted. This dummy function uses static_assert to
94 // ensure everything is kept in sync.
95 void xMacroIntegrityCheck() {
96 // Validate the enum values in FCMPX8632_TABLE.
97 {
98 // Define a temporary set of enum values based on low-level
99 // table entries.
100 enum _tmp_enum {
101 #define X(val, dflt, swap, C1, C2) _tmp_##val,
102 FCMPX8632_TABLE
103 #undef X
104 };
105 // Define a set of constants based on high-level table entries.
106 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
107 ICEINSTFCMP_TABLE;
108 #undef X
109 // Define a set of constants based on low-level table entries,
110 // and ensure the table entry keys are consistent.
111 #define X(val, dflt, swap, C1, C2) \
112 static const int _table2_##val = _tmp_##val; \
113 STATIC_ASSERT(_table1_##val == _table2_##val);
114 FCMPX8632_TABLE;
115 #undef X
116 // Repeat the static asserts with respect to the high-level
117 // table entries in case the high-level table has extra entries.
118 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
119 ICEINSTFCMP_TABLE;
120 #undef X
121 }
122
123 // Validate the enum values in ICMPX8632_TABLE.
124 {
125 // Define a temporary set of enum values based on low-level
126 // table entries.
127 enum _tmp_enum {
128 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
129 ICMPX8632_TABLE
130 #undef X
131 };
132 // Define a set of constants based on high-level table entries.
133 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
134 ICEINSTICMP_TABLE;
135 #undef X
136 // Define a set of constants based on low-level table entries,
137 // and ensure the table entry keys are consistent.
138 #define X(val, C_32, C1_64, C2_64, C3_64) \
139 static const int _table2_##val = _tmp_##val; \
140 STATIC_ASSERT(_table1_##val == _table2_##val);
141 ICMPX8632_TABLE;
142 #undef X
143 // Repeat the static asserts with respect to the high-level
144 // table entries in case the high-level table has extra entries.
145 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
146 ICEINSTICMP_TABLE;
147 #undef X
148 }
149
150 // Validate the enum values in ICETYPEX8632_TABLE.
151 {
152 // Define a temporary set of enum values based on low-level
153 // table entries.
154 enum _tmp_enum {
155 #define X(tag, cvt, sdss, width) _tmp_##tag,
156 ICETYPEX8632_TABLE
157 #undef X
158 };
159 // Define a set of constants based on high-level table entries.
160 #define X(tag, size, align, str) static const int _table1_##tag = tag;
161 ICETYPE_TABLE;
162 #undef X
163 // Define a set of constants based on low-level table entries,
164 // and ensure the table entry keys are consistent.
165 #define X(tag, cvt, sdss, width) \
166 static const int _table2_##tag = _tmp_##tag; \
167 STATIC_ASSERT(_table1_##tag == _table2_##tag);
168 ICETYPEX8632_TABLE;
169 #undef X
170 // Repeat the static asserts with respect to the high-level
171 // table entries in case the high-level table has extra entries.
172 #define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
173 ICETYPE_TABLE;
174 #undef X
175 }
176 }
177
178 } // end of anonymous namespace
179
180 TargetX8632::TargetX8632(Cfg *Func)
181 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
182 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
183 PhysicalRegisters(VarList(Reg_NUM)) {
184 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
185 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
186 llvm::SmallBitVector FloatRegisters(Reg_NUM);
187 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
188 ScratchRegs.resize(Reg_NUM);
189 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
190 frameptr, isI8, isInt, isFP) \
191 IntegerRegisters[val] = isInt; \
192 IntegerRegistersI8[val] = isI8; \
193 FloatRegisters[val] = isFP; \
194 ScratchRegs[val] = scratch;
195 REGX8632_TABLE;
196 #undef X
197 TypeToRegisterSet[IceType_void] = InvalidRegisters;
jvoung (off chromium) 2014/05/15 23:47:34 Maybe at some point some of this be initialized on
Jim Stichnoth 2014/05/17 14:14:32 This is all true. For now, I'd like to leave a TO
198 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
199 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
200 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
201 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
202 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
203 TypeToRegisterSet[IceType_f32] = FloatRegisters;
204 TypeToRegisterSet[IceType_f64] = FloatRegisters;
205 }
206
207 void TargetX8632::translateOm1() {
208 GlobalContext *Context = Func->getContext();
209 Ostream &Str = Context->getStrDump();
210 Timer T_placePhiLoads;
211 Func->placePhiLoads();
212 if (Func->hasError())
213 return;
214 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
215 Timer T_placePhiStores;
216 Func->placePhiStores();
217 if (Func->hasError())
218 return;
219 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
220 Timer T_deletePhis;
221 Func->deletePhis();
222 if (Func->hasError())
223 return;
224 T_deletePhis.printElapsedUs(Context, "deletePhis()");
225 if (Context->isVerbose())
226 Str << "================ After Phi lowering ================\n";
227 Func->dump();
228
229 Timer T_genCode;
230 Func->genCode();
231 if (Func->hasError())
232 return;
233 T_genCode.printElapsedUs(Context, "genCode()");
234 if (Context->isVerbose())
235 Str << "================ After initial x8632 codegen ================\n";
236 Func->dump();
237
238 Timer T_genFrame;
239 Func->genFrame();
240 if (Func->hasError())
241 return;
242 T_genFrame.printElapsedUs(Context, "genFrame()");
243 if (Context->isVerbose())
244 Str << "================ After stack frame mapping ================\n";
245 Func->dump();
246 }
247
248 IceString TargetX8632::RegNames[] = {
249 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
250 frameptr, isI8, isInt, isFP) \
251 name,
252 REGX8632_TABLE
253 #undef X
254 };
255
256 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
257 assert(RegNum < PhysicalRegisters.size());
258 Variable *Reg = PhysicalRegisters[RegNum];
259 if (Reg == NULL) {
260 CfgNode *Node = NULL; // NULL means multi-block lifetime
261 Reg = Func->makeVariable(IceType_i32, Node);
262 Reg->setRegNum(RegNum);
263 PhysicalRegisters[RegNum] = Reg;
264 }
265 return Reg;
266 }
267
268 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
269 assert(RegNum < Reg_NUM);
270 static IceString RegNames8[] = {
271 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
272 frameptr, isI8, isInt, isFP) \
273 "" name8,
274 REGX8632_TABLE
275 #undef X
276 };
277 static IceString RegNames16[] = {
278 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
279 frameptr, isI8, isInt, isFP) \
280 "" name16,
281 REGX8632_TABLE
282 #undef X
283 };
284 switch (Ty) {
285 case IceType_i1:
286 case IceType_i8:
287 return RegNames8[RegNum];
288 case IceType_i16:
289 return RegNames16[RegNum];
290 default:
291 return RegNames[RegNum];
292 }
293 }
294
295 void TargetX8632::emitVariable(const Variable *Var, const Cfg *Func) const {
296 Ostream &Str = Ctx->getStrEmit();
297 assert(Var->getLocalUseNode() == NULL ||
298 Var->getLocalUseNode() == Func->getCurrentNode());
299 if (Var->hasReg()) {
300 Str << getRegName(Var->getRegNum(), Var->getType());
301 return;
302 }
303 Str << InstX8632::getWidthString(Var->getType());
304 Str << " [" << getRegName(getFrameOrStackReg(), IceType_i32);
305 int32_t Offset = Var->getStackOffset() + getStackAdjustment();
306 if (Offset) {
307 if (Offset > 0)
308 Str << "+";
309 Str << Offset;
310 }
311 Str << "]";
312 }
313
314 // Helper function for addProlog(). Sets the frame offset for Arg,
315 // updates InArgsSizeBytes according to Arg's width, and generates an
316 // instruction to copy Arg into its assigned register if applicable.
317 // For an I64 arg that has been split into Lo and Hi components, it
318 // calls itself recursively on the components, taking care to handle
319 // Lo first because of the little-endian architecture.
320 void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
321 int32_t BasicFrameOffset,
322 int32_t &InArgsSizeBytes) {
323 Variable *Lo = Arg->getLo();
324 Variable *Hi = Arg->getHi();
325 Type Ty = Arg->getType();
326 if (Lo && Hi && Ty == IceType_i64) {
327 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
328 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
329 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
330 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
331 return;
332 }
333 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
334 if (Arg->hasReg()) {
335 assert(Ty != IceType_i64);
336 OperandX8632Mem *Mem = OperandX8632Mem::create(
337 Func, Ty, FramePtr,
338 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
339 _mov(Arg, Mem);
340 }
341 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
342 }
343
344 void TargetX8632::addProlog(CfgNode *Node) {
345 // If SimpleCoalescing is false, each variable without a register
346 // gets its own unique stack slot, which leads to large stack
347 // frames. If SimpleCoalescing is true, then each "global" variable
348 // without a register gets its own slot, but "local" variable slots
349 // are reused across basic blocks. E.g., if A and B are local to
350 // block 1 and C is local to block 2, then C may share a slot with A
351 // or B.
352 const bool SimpleCoalescing = true;
jvoung (off chromium) 2014/05/15 23:47:34 Doesn't seem like this will ever be set to false.
Jim Stichnoth 2014/05/17 14:14:32 I didn't plan to expose this by a flag, since Simp
353 int32_t InArgsSizeBytes = 0;
354 int32_t RetIpSizeBytes = 4;
355 int32_t PreservedRegsSizeBytes = 0;
356 LocalsSizeBytes = 0;
357 Context.init(Node);
358 Context.setInsertPoint(Context.getCur());
359
360 // Determine stack frame offsets for each Variable without a
361 // register assignment. This can be done as one variable per stack
362 // slot. Or, do coalescing by running the register allocator again
363 // with an infinite set of registers (as a side effect, this gives
364 // variables a second chance at physical register assignment).
365 //
366 // A middle ground approach is to leverage sparsity and allocate one
367 // block of space on the frame for globals (variables with
368 // multi-block lifetime), and one block to share for locals
369 // (single-block lifetime).
370
371 llvm::SmallBitVector CalleeSaves =
372 getRegisterSet(RegSet_CalleeSave, RegSet_None);
373
374 int32_t GlobalsSize = 0;
375 std::vector<int> LocalsSize(Func->getNumNodes());
376
377 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
378 // LocalsSizeBytes.
379 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
380 const VarList &Variables = Func->getVariables();
381 const VarList &Args = Func->getArgs();
382 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
383 I != E; ++I) {
384 Variable *Var = *I;
385 if (Var->hasReg()) {
386 RegsUsed[Var->getRegNum()] = true;
387 continue;
388 }
389 // An argument passed on the stack already has a stack slot.
390 if (Var->getIsArg())
391 continue;
392 // A spill slot linked to a variable with a stack slot should reuse
393 // that stack slot.
394 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
395 if (Variable *Linked = Var->getPreferredRegister()) {
396 if (!Linked->hasReg())
397 continue;
398 }
399 }
400 int32_t Increment = typeWidthInBytesOnStack(Var->getType());
401 if (SimpleCoalescing) {
402 if (Var->isMultiblockLife()) {
403 GlobalsSize += Increment;
404 } else {
405 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
406 LocalsSize[NodeIndex] += Increment;
407 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
408 LocalsSizeBytes = LocalsSize[NodeIndex];
409 }
410 } else {
411 LocalsSizeBytes += Increment;
412 }
413 }
414 LocalsSizeBytes += GlobalsSize;
415
416 // Add push instructions for preserved registers.
417 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
418 if (CalleeSaves[i] && RegsUsed[i]) {
419 PreservedRegsSizeBytes += 4;
420 const bool SuppressStackAdjustment = true;
421 _push(getPhysicalRegister(i), SuppressStackAdjustment);
422 }
423 }
424
425 // Generate "push ebp; mov ebp, esp"
426 if (IsEbpBasedFrame) {
427 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
428 .count() == 0);
429 PreservedRegsSizeBytes += 4;
430 Variable *ebp = getPhysicalRegister(Reg_ebp);
431 Variable *esp = getPhysicalRegister(Reg_esp);
432 const bool SuppressStackAdjustment = true;
433 _push(ebp, SuppressStackAdjustment);
434 _mov(ebp, esp);
435 }
436
437 // Generate "sub esp, LocalsSizeBytes"
438 if (LocalsSizeBytes)
439 _sub(getPhysicalRegister(Reg_esp),
440 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
441
442 resetStackAdjustment();
jvoung (off chromium) 2014/05/15 23:47:34 question: when will the StackAdjustment be non-zer
Jim Stichnoth 2014/05/17 14:14:32 Currently it should always be zero at this point.
443
444 // Fill in stack offsets for args, and copy args into registers for
445 // those that were register-allocated. Args are pushed right to
446 // left, so Arg[0] is closest to the stack/frame pointer.
447 //
448 // TODO: Make this right for different width args, calling
449 // conventions, etc. For one thing, args passed in registers will
450 // need to be copied/shuffled to their home registers (the
451 // RegManager code may have some permutation logic to leverage),
452 // and if they have no home register, home space will need to be
453 // allocated on the stack to copy into.
454 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
455 int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
456 if (!IsEbpBasedFrame)
457 BasicFrameOffset += LocalsSizeBytes;
458 for (SizeT i = 0; i < Args.size(); ++i) {
459 Variable *Arg = Args[i];
460 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
461 }
462
463 // Fill in stack offsets for locals.
464 int32_t TotalGlobalsSize = GlobalsSize;
465 GlobalsSize = 0;
466 LocalsSize.assign(LocalsSize.size(), 0);
467 int32_t NextStackOffset = 0;
468 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
469 I != E; ++I) {
470 Variable *Var = *I;
471 if (Var->hasReg()) {
472 RegsUsed[Var->getRegNum()] = true;
473 continue;
474 }
475 if (Var->getIsArg())
476 continue;
477 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
478 if (Variable *Linked = Var->getPreferredRegister()) {
479 if (!Linked->hasReg()) {
480 // TODO: Make sure Linked has already been assigned a stack
481 // slot.
482 Var->setStackOffset(Linked->getStackOffset());
483 continue;
484 }
485 }
486 }
487 int32_t Increment = typeWidthInBytesOnStack(Var->getType());
488 if (SimpleCoalescing) {
489 if (Var->isMultiblockLife()) {
490 GlobalsSize += Increment;
491 NextStackOffset = GlobalsSize;
492 } else {
493 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
494 LocalsSize[NodeIndex] += Increment;
495 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
496 }
497 } else {
498 NextStackOffset += Increment;
499 }
500 if (IsEbpBasedFrame)
501 Var->setStackOffset(-NextStackOffset);
502 else
503 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
504 }
505 this->FrameSizeLocals = NextStackOffset;
506 this->HasComputedFrame = true;
507
508 if (Func->getContext()->isVerbose(IceV_Frame)) {
509 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
510 << "\n"
511 << "InArgsSizeBytes=" << InArgsSizeBytes
512 << "\n"
513 << "PreservedRegsSizeBytes="
514 << PreservedRegsSizeBytes << "\n";
515 }
516 }
517
518 void TargetX8632::addEpilog(CfgNode *Node) {
519 InstList &Insts = Node->getInsts();
520 InstList::reverse_iterator RI, E;
521 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
522 if (llvm::isa<InstX8632Ret>(*RI))
523 break;
jvoung (off chromium) 2014/05/15 23:47:34 What are possible instructions at the end of a CFG
Jim Stichnoth 2014/05/17 14:14:32 TargetX8632::lowerRet() adds a FakeUse of esp at t
524 }
525 if (RI == E)
526 return;
527
528 // Convert the reverse_iterator position into its corresponding
529 // (forward) iterator position.
530 InstList::iterator InsertPoint = RI.base();
531 --InsertPoint;
532 Context.init(Node);
533 Context.setInsertPoint(InsertPoint);
534
535 Variable *esp = getPhysicalRegister(Reg_esp);
536 if (IsEbpBasedFrame) {
537 Variable *ebp = getPhysicalRegister(Reg_ebp);
538 _mov(esp, ebp);
539 _pop(ebp);
540 } else {
541 // add esp, LocalsSizeBytes
542 if (LocalsSizeBytes)
543 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
544 }
545
546 // Add pop instructions for preserved registers.
547 llvm::SmallBitVector CalleeSaves =
548 getRegisterSet(RegSet_CalleeSave, RegSet_None);
549 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
550 SizeT j = CalleeSaves.size() - i - 1;
551 if (j == Reg_ebp && IsEbpBasedFrame)
552 continue;
553 if (CalleeSaves[j] && RegsUsed[j]) {
554 _pop(getPhysicalRegister(j));
555 }
556 }
557 }
558
559 void TargetX8632::split64(Variable *Var) {
560 switch (Var->getType()) {
561 default:
562 return;
563 case IceType_i64:
564 // TODO: Only consider F64 if we need to push each half when
565 // passing as an argument to a function call. Note that each half
566 // is still typed as I32.
567 case IceType_f64:
568 break;
569 }
570 Variable *Lo = Var->getLo();
571 Variable *Hi = Var->getHi();
572 if (Lo) {
573 assert(Hi);
574 return;
575 }
576 assert(Hi == NULL);
577 Lo = Func->makeVariable(IceType_i32, Context.getNode(),
578 Var->getName() + "__lo");
579 Hi = Func->makeVariable(IceType_i32, Context.getNode(),
580 Var->getName() + "__hi");
581 Var->setLoHi(Lo, Hi);
582 if (Var->getIsArg()) {
583 Lo->setIsArg(Func);
584 Hi->setIsArg(Func);
585 }
586 }
587
588 Operand *TargetX8632::loOperand(Operand *Operand) {
589 assert(Operand->getType() == IceType_i64);
590 if (Operand->getType() != IceType_i64)
591 return Operand;
592 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
593 split64(Var);
594 return Var->getLo();
595 }
596 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
597 uint64_t Mask = (1ull << 32) - 1;
598 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
599 }
600 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
601 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
602 Mem->getOffset(), Mem->getIndex(),
603 Mem->getShift());
604 }
605 llvm_unreachable("Unsupported operand type");
606 return NULL;
607 }
608
609 Operand *TargetX8632::hiOperand(Operand *Operand) {
610 assert(Operand->getType() == IceType_i64);
611 if (Operand->getType() != IceType_i64)
612 return Operand;
613 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
614 split64(Var);
615 return Var->getHi();
616 }
617 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
618 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
619 }
620 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
621 Constant *Offset = Mem->getOffset();
622 if (Offset == NULL)
623 Offset = Ctx->getConstantInt(IceType_i32, 4);
624 else if (ConstantInteger *IntOffset =
625 llvm::dyn_cast<ConstantInteger>(Offset)) {
626 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
627 } else if (ConstantRelocatable *SymOffset =
628 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
629 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
630 SymOffset->getName());
631 }
632 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
633 Mem->getIndex(), Mem->getShift());
634 }
635 llvm_unreachable("Unsupported operand type");
636 return NULL;
637 }
638
639 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
jvoung (off chromium) 2014/05/15 23:47:34 I'm wondering if this needs to be so general. Is t
Jim Stichnoth 2014/05/17 14:14:32 Yes, that instance of getRegisterSet() is kind of
640 RegSetMask Exclude) const {
641 llvm::SmallBitVector Registers(Reg_NUM);
642
643 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
644 frameptr, isI8, isInt, isFP) \
645 if (scratch && (Include & RegSet_CallerSave)) \
646 Registers[val] = true; \
647 if (preserved && (Include & RegSet_CalleeSave)) \
648 Registers[val] = true; \
649 if (stackptr && (Include & RegSet_StackPointer)) \
650 Registers[val] = true; \
651 if (frameptr && (Include & RegSet_FramePointer)) \
652 Registers[val] = true; \
653 if (scratch && (Exclude & RegSet_CallerSave)) \
654 Registers[val] = false; \
655 if (preserved && (Exclude & RegSet_CalleeSave)) \
656 Registers[val] = false; \
657 if (stackptr && (Exclude & RegSet_StackPointer)) \
658 Registers[val] = false; \
659 if (frameptr && (Exclude & RegSet_FramePointer)) \
660 Registers[val] = false;
661
662 REGX8632_TABLE
663
664 #undef X
665
666 return Registers;
667 }
668
669 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
670 IsEbpBasedFrame = true;
671 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
672 // the number of adjustments of esp, etc.
673 Variable *esp = getPhysicalRegister(Reg_esp);
674 Operand *TotalSize = legalize(Inst->getSizeInBytes());
675 Variable *Dest = Inst->getDest();
676 _sub(esp, TotalSize);
677 _mov(Dest, esp);
678 }
679
680 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
681 Variable *Dest = Inst->getDest();
682 Operand *Src0 = legalize(Inst->getSrc(0));
683 Operand *Src1 = legalize(Inst->getSrc(1));
684 if (Dest->getType() == IceType_i64) {
685 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
686 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
687 Operand *Src0Lo = loOperand(Src0);
688 Operand *Src0Hi = hiOperand(Src0);
689 Operand *Src1Lo = loOperand(Src1);
690 Operand *Src1Hi = hiOperand(Src1);
691 Variable *T_Lo = NULL, *T_Hi = NULL;
692 switch (Inst->getOp()) {
693 case InstArithmetic::Add:
694 _mov(T_Lo, Src0Lo);
695 _add(T_Lo, Src1Lo);
696 _mov(DestLo, T_Lo);
697 _mov(T_Hi, Src0Hi);
698 _adc(T_Hi, Src1Hi);
699 _mov(DestHi, T_Hi);
700 break;
701 case InstArithmetic::And:
702 _mov(T_Lo, Src0Lo);
703 _and(T_Lo, Src1Lo);
704 _mov(DestLo, T_Lo);
705 _mov(T_Hi, Src0Hi);
706 _and(T_Hi, Src1Hi);
707 _mov(DestHi, T_Hi);
708 break;
709 case InstArithmetic::Or:
710 _mov(T_Lo, Src0Lo);
711 _or(T_Lo, Src1Lo);
712 _mov(DestLo, T_Lo);
713 _mov(T_Hi, Src0Hi);
714 _or(T_Hi, Src1Hi);
715 _mov(DestHi, T_Hi);
716 break;
717 case InstArithmetic::Xor:
718 _mov(T_Lo, Src0Lo);
719 _xor(T_Lo, Src1Lo);
720 _mov(DestLo, T_Lo);
721 _mov(T_Hi, Src0Hi);
722 _xor(T_Hi, Src1Hi);
723 _mov(DestHi, T_Hi);
724 break;
725 case InstArithmetic::Sub:
726 _mov(T_Lo, Src0Lo);
727 _sub(T_Lo, Src1Lo);
728 _mov(DestLo, T_Lo);
729 _mov(T_Hi, Src0Hi);
730 _sbb(T_Hi, Src1Hi);
731 _mov(DestHi, T_Hi);
732 break;
733 case InstArithmetic::Mul: {
734 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
735 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
jvoung (off chromium) 2014/05/15 23:47:34 When is it appropriate to use getPhysicalRegister(
Jim Stichnoth 2014/05/17 14:14:32 That's right. getPhysicalRegister() is used for t
736 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
737 // gcc does the following:
738 // a=b*c ==>
739 // t1 = b.hi; t1 *=(imul) c.lo
740 // t2 = c.hi; t2 *=(imul) b.lo
741 // t3:eax = b.lo
742 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
743 // a.lo = t4.lo
744 // t4.hi += t1
745 // t4.hi += t2
746 // a.hi = t4.hi
747 _mov(T_1, Src0Hi);
748 _imul(T_1, Src1Lo);
749 _mov(T_2, Src1Hi);
750 _imul(T_2, Src0Lo);
751 _mov(T_3, Src0Lo, Reg_eax);
752 _mul(T_4Lo, T_3, Src1Lo);
753 // The mul instruction produces two dest variables, edx:eax. We
754 // create a fake definition of edx to account for this.
755 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
756 _mov(DestLo, T_4Lo);
757 _add(T_4Hi, T_1);
758 _add(T_4Hi, T_2);
759 _mov(DestHi, T_4Hi);
760 } break;
761 case InstArithmetic::Shl: {
762 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
763 // gcc does the following:
764 // a=b<<c ==>
765 // t1:ecx = c.lo & 0xff
766 // t2 = b.lo
767 // t3 = b.hi
768 // t3 = shld t3, t2, t1
769 // t2 = shl t2, t1
770 // test t1, 0x20
771 // je L1
772 // use(t3)
773 // t3 = t2
774 // t2 = 0
775 // L1:
776 // a.lo = t2
777 // a.hi = t3
778 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
779 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
780 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
781 InstX8632Label *Label = InstX8632Label::create(Func, this);
782 _mov(T_1, Src1Lo, Reg_ecx);
783 _mov(T_2, Src0Lo);
784 _mov(T_3, Src0Hi);
785 _shld(T_3, T_2, T_1);
786 _shl(T_2, T_1);
787 _test(T_1, BitTest);
788 _br(InstX8632Br::Br_e, Label);
789 // Because of the intra-block control flow, we need to fake a use
790 // of T_3 to prevent its earlier definition from being dead-code
791 // eliminated in the presence of its later definition.
792 Context.insert(InstFakeUse::create(Func, T_3));
793 _mov(T_3, T_2);
794 _mov(T_2, Zero);
795 Context.insert(Label);
796 _mov(DestLo, T_2);
797 _mov(DestHi, T_3);
798 } break;
799 case InstArithmetic::Lshr: {
800 // a=b>>c (unsigned) ==>
801 // t1:ecx = c.lo & 0xff
802 // t2 = b.lo
803 // t3 = b.hi
804 // t2 = shrd t2, t3, t1
805 // t3 = shr t3, t1
806 // test t1, 0x20
807 // je L1
808 // use(t2)
809 // t2 = t3
810 // t3 = 0
811 // L1:
812 // a.lo = t2
813 // a.hi = t3
814 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
815 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
816 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
817 InstX8632Label *Label = InstX8632Label::create(Func, this);
818 _mov(T_1, Src1Lo, Reg_ecx);
819 _mov(T_2, Src0Lo);
820 _mov(T_3, Src0Hi);
821 _shrd(T_2, T_3, T_1);
822 _shr(T_3, T_1);
823 _test(T_1, BitTest);
824 _br(InstX8632Br::Br_e, Label);
825 // Because of the intra-block control flow, we need to fake a use
826 // of T_3 to prevent its earlier definition from being dead-code
827 // eliminated in the presence of its later definition.
828 Context.insert(InstFakeUse::create(Func, T_2));
829 _mov(T_2, T_3);
830 _mov(T_3, Zero);
831 Context.insert(Label);
832 _mov(DestLo, T_2);
833 _mov(DestHi, T_3);
834 } break;
835 case InstArithmetic::Ashr: {
836 // a=b>>c (signed) ==>
837 // t1:ecx = c.lo & 0xff
838 // t2 = b.lo
839 // t3 = b.hi
840 // t2 = shrd t2, t3, t1
841 // t3 = sar t3, t1
842 // test t1, 0x20
843 // je L1
844 // use(t2)
845 // t2 = t3
846 // t3 = sar t3, 0x1f
847 // L1:
848 // a.lo = t2
849 // a.hi = t3
850 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
851 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
852 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
853 InstX8632Label *Label = InstX8632Label::create(Func, this);
854 _mov(T_1, Src1Lo, Reg_ecx);
855 _mov(T_2, Src0Lo);
856 _mov(T_3, Src0Hi);
857 _shrd(T_2, T_3, T_1);
858 _sar(T_3, T_1);
859 _test(T_1, BitTest);
860 _br(InstX8632Br::Br_e, Label);
861 // Because of the intra-block control flow, we need to fake a use
862 // of T_3 to prevent its earlier definition from being dead-code
863 // eliminated in the presence of its later definition.
864 Context.insert(InstFakeUse::create(Func, T_2));
865 _mov(T_2, T_3);
866 _sar(T_3, SignExtend);
867 Context.insert(Label);
868 _mov(DestLo, T_2);
869 _mov(DestHi, T_3);
870 } break;
871 case InstArithmetic::Udiv: {
872 const SizeT MaxSrcs = 2;
873 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
874 Call->addArg(Inst->getSrc(0));
875 Call->addArg(Inst->getSrc(1));
876 lowerCall(Call);
877 } break;
878 case InstArithmetic::Sdiv: {
879 const SizeT MaxSrcs = 2;
880 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
881 Call->addArg(Inst->getSrc(0));
882 Call->addArg(Inst->getSrc(1));
883 lowerCall(Call);
884 } break;
885 case InstArithmetic::Urem: {
886 const SizeT MaxSrcs = 2;
887 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
888 Call->addArg(Inst->getSrc(0));
889 Call->addArg(Inst->getSrc(1));
890 lowerCall(Call);
891 } break;
892 case InstArithmetic::Srem: {
893 const SizeT MaxSrcs = 2;
894 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
895 Call->addArg(Inst->getSrc(0));
896 Call->addArg(Inst->getSrc(1));
897 lowerCall(Call);
898 } break;
899 case InstArithmetic::Fadd:
900 case InstArithmetic::Fsub:
901 case InstArithmetic::Fmul:
902 case InstArithmetic::Fdiv:
903 case InstArithmetic::Frem:
904 llvm_unreachable("FP instruction with i64 type");
905 break;
906 }
907 } else { // Dest->getType() != IceType_i64
908 Variable *T_edx = NULL;
909 Variable *T = NULL;
910 switch (Inst->getOp()) {
911 case InstArithmetic::Add:
912 _mov(T, Src0);
913 _add(T, Src1);
914 _mov(Dest, T);
915 break;
916 case InstArithmetic::And:
917 _mov(T, Src0);
918 _and(T, Src1);
919 _mov(Dest, T);
920 break;
921 case InstArithmetic::Or:
922 _mov(T, Src0);
923 _or(T, Src1);
924 _mov(Dest, T);
925 break;
926 case InstArithmetic::Xor:
927 _mov(T, Src0);
928 _xor(T, Src1);
929 _mov(Dest, T);
930 break;
931 case InstArithmetic::Sub:
932 _mov(T, Src0);
933 _sub(T, Src1);
934 _mov(Dest, T);
935 break;
936 case InstArithmetic::Mul:
937 // TODO: Optimize for llvm::isa<Constant>(Src1)
938 // TODO: Strength-reduce multiplications by a constant,
939 // particularly -1 and powers of 2. Advanced: use lea to
940 // multiply by 3, 5, 9.
941 //
942 // The 8-bit version of imul only allows the form "imul r/m8"
943 // where T must be in eax.
944 if (Dest->getType() == IceType_i8)
945 _mov(T, Src0, Reg_eax);
946 else
947 _mov(T, Src0);
948 _imul(T, Src1);
949 _mov(Dest, T);
950 break;
951 case InstArithmetic::Shl:
952 _mov(T, Src0);
953 if (!llvm::isa<Constant>(Src1))
954 Src1 = legalizeToVar(Src1, false, Reg_ecx);
955 _shl(T, Src1);
956 _mov(Dest, T);
957 break;
958 case InstArithmetic::Lshr:
959 _mov(T, Src0);
960 if (!llvm::isa<Constant>(Src1))
961 Src1 = legalizeToVar(Src1, false, Reg_ecx);
962 _shr(T, Src1);
963 _mov(Dest, T);
964 break;
965 case InstArithmetic::Ashr:
966 _mov(T, Src0);
967 if (!llvm::isa<Constant>(Src1))
968 Src1 = legalizeToVar(Src1, false, Reg_ecx);
969 _sar(T, Src1);
970 _mov(Dest, T);
971 break;
972 case InstArithmetic::Udiv:
973 if (Dest->getType() == IceType_i8) {
974 Variable *T_ah = NULL;
975 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
976 _mov(T, Src0, Reg_eax);
977 _mov(T_ah, Zero, Reg_ah);
978 _div(T_ah, Src1, T);
jvoung (off chromium) 2014/05/15 23:47:34 I'm probably missing something: why is it _div(T_
Jim Stichnoth 2014/05/17 14:14:32 Nice - your simpler suggestion seems to work. :)
979 Context.insert(InstFakeUse::create(Func, T_ah));
980 _mov(Dest, T);
981 } else {
982 // TODO: fix for 8-bit, see Urem
jvoung (off chromium) 2014/05/15 23:47:34 Should the TODO be under the above branch for == I
Jim Stichnoth 2014/05/17 14:14:32 Done. I think that TODO was left in by accident.
983 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
984 _mov(T, Src0, Reg_eax);
985 _mov(T_edx, Zero, Reg_edx);
986 _div(T, Src1, T_edx);
987 _mov(Dest, T);
988 }
989 break;
990 case InstArithmetic::Sdiv:
991 T_edx = makeReg(IceType_i32, Reg_edx);
992 _mov(T, Src0, Reg_eax);
993 _cdq(T_edx, T);
994 _idiv(T, Src1, T_edx);
995 _mov(Dest, T);
996 break;
997 case InstArithmetic::Urem:
998 if (Dest->getType() == IceType_i8) {
999 Variable *T_ah = NULL;
1000 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1001 _mov(T, Src0, Reg_eax);
1002 _mov(T_ah, Zero, Reg_ah);
1003 _div(T_ah, Src1, T);
1004 _mov(Dest, T_ah);
1005 } else {
1006 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1007 _mov(T_edx, Zero, Reg_edx);
1008 _mov(T, Src0, Reg_eax);
1009 _div(T_edx, Src1, T);
1010 _mov(Dest, T_edx);
1011 }
1012 break;
1013 case InstArithmetic::Srem:
1014 T_edx = makeReg(IceType_i32, Reg_edx);
1015 _mov(T, Src0, Reg_eax);
1016 _cdq(T_edx, T);
1017 _idiv(T_edx, Src1, T);
1018 _mov(Dest, T_edx);
1019 break;
1020 case InstArithmetic::Fadd:
1021 _mov(T, Src0);
1022 _addss(T, Src1);
1023 _mov(Dest, T);
1024 break;
1025 case InstArithmetic::Fsub:
1026 _mov(T, Src0);
1027 _subss(T, Src1);
1028 _mov(Dest, T);
1029 break;
1030 case InstArithmetic::Fmul:
1031 _mov(T, Src0);
1032 _mulss(T, Src1);
1033 _mov(Dest, T);
1034 break;
1035 case InstArithmetic::Fdiv:
1036 _mov(T, Src0);
1037 _divss(T, Src1);
1038 _mov(Dest, T);
1039 break;
1040 case InstArithmetic::Frem: {
1041 const SizeT MaxSrcs = 2;
1042 Type Ty = Dest->getType();
1043 InstCall *Call =
1044 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
1045 Call->addArg(Src0);
1046 Call->addArg(Src1);
1047 return lowerCall(Call);
1048 } break;
1049 }
1050 }
1051 }
1052
1053 void TargetX8632::lowerAssign(const InstAssign *Inst) {
1054 Variable *Dest = Inst->getDest();
1055 Operand *Src0 = legalize(Inst->getSrc(0));
jvoung (off chromium) 2014/05/19 20:28:54 Probably on your mind already since you noted a TO
Jim Stichnoth 2014/05/20 18:20:08 My thought on these kinds of opportunities (e.g. s
1056 assert(Dest->getType() == Src0->getType());
1057 if (Dest->getType() == IceType_i64) {
1058 Operand *Src0Lo = loOperand(Src0);
1059 Operand *Src0Hi = hiOperand(Src0);
1060 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1061 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1062 Variable *T_Lo = NULL, *T_Hi = NULL;
1063 _mov(T_Lo, Src0Lo);
1064 _mov(DestLo, T_Lo);
1065 _mov(T_Hi, Src0Hi);
1066 _mov(DestHi, T_Hi);
1067 } else {
1068 const bool AllowOverlap = true;
1069 // RI is either a physical register or an immediate.
1070 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
jvoung (off chromium) 2014/05/19 20:28:54 Does this mean that Inst->getSrc(0) gets legalized
Jim Stichnoth 2014/05/20 18:20:08 Done.
1071 _mov(Dest, RI);
1072 }
1073 }
1074
1075 void TargetX8632::lowerBr(const InstBr *Inst) {
1076 if (Inst->isUnconditional()) {
1077 _br(Inst->getTargetUnconditional());
1078 } else {
1079 Operand *Src0 = legalize(Inst->getCondition());
1080 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1081 _cmp(Src0, Zero);
jvoung (off chromium) 2014/05/19 20:28:54 Is it better to _test reg,reg than _cmp reg, zero?
Jim Stichnoth 2014/05/20 18:20:08 That's right. That could be done as a peephole as
1082 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1083 }
1084 }
1085
1086 void TargetX8632::lowerCall(const InstCall *Instr) {
1087 // Generate a sequence of push instructions, pushing right to left,
1088 // keeping track of stack offsets in case a push involves a stack
1089 // operand and we are using an esp-based frame.
1090 uint32_t StackOffset = 0;
1091 // TODO: If for some reason the call instruction gets dead-code
1092 // eliminated after lowering, we would need to ensure that the
1093 // pre-call push instructions and the post-call esp adjustment get
1094 // eliminated as well.
1095 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
1096 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
1097 if (Arg->getType() == IceType_i64) {
1098 _push(hiOperand(Arg));
1099 _push(loOperand(Arg));
1100 } else if (Arg->getType() == IceType_f64) {
1101 // If the Arg turns out to be a memory operand, we need to push
1102 // 8 bytes, which requires two push instructions. This ends up
1103 // being somewhat clumsy in the current IR, so we use a
1104 // workaround. Force the operand into a (xmm) register, and
1105 // then push the register. An xmm register push is actually not
1106 // possible in x86, but the Push instruction emitter handles
1107 // this by decrementing the stack pointer and directly writing
1108 // the xmm register value.
1109 Variable *T = NULL;
1110 _mov(T, Arg);
1111 _push(T);
1112 } else {
1113 _push(Arg);
1114 }
1115 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1116 }
1117 // Generate the call instruction. Assign its result to a temporary
1118 // with high register allocation weight.
1119 Variable *Dest = Instr->getDest();
1120 Variable *eax = NULL; // doubles as RegLo as necessary
1121 Variable *edx = NULL;
1122 if (Dest) {
1123 switch (Dest->getType()) {
1124 case IceType_NUM:
1125 llvm_unreachable("Invalid Call dest type");
1126 break;
1127 case IceType_void:
1128 break;
1129 case IceType_i1:
1130 case IceType_i8:
1131 case IceType_i16:
1132 case IceType_i32:
1133 eax = makeReg(Dest->getType(), Reg_eax);
1134 break;
1135 case IceType_i64:
1136 eax = makeReg(IceType_i32, Reg_eax);
1137 edx = makeReg(IceType_i32, Reg_edx);
1138 break;
1139 case IceType_f32:
1140 case IceType_f64:
1141 // Leave eax==edx==NULL, and capture the result with the fstp
1142 // instruction.
1143 break;
1144 }
1145 }
1146 Operand *CallTarget = legalize(Instr->getCallTarget());
1147 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
1148 Context.insert(NewCall);
1149 if (edx)
1150 Context.insert(InstFakeDef::create(Func, edx));
1151
1152 // Add the appropriate offset to esp.
1153 if (StackOffset) {
1154 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1155 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
1156 }
1157
1158 // Insert a register-kill pseudo instruction.
1159 VarList KilledRegs;
1160 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1161 if (ScratchRegs[i])
1162 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1163 }
1164 if (!KilledRegs.empty()) {
jvoung (off chromium) 2014/05/19 20:28:54 For x86 this is always true because there's always
Jim Stichnoth 2014/05/20 18:20:08 A bit of pedantry, I guess, in case we ever try ou
1165 Inst *Kill = InstFakeKill::create(Func, KilledRegs, NewCall);
1166 Context.insert(Kill);
1167 }
1168
1169 // Generate a FakeUse to keep the call live if necessary.
1170 if (Instr->hasSideEffects() && eax) {
jvoung (off chromium) 2014/05/19 20:28:54 for InstrCall, hasSideEffects() is always true?
Jim Stichnoth 2014/05/20 18:20:08 Today it's true, but I'm looking ahead to allowing
1171 Inst *FakeUse = InstFakeUse::create(Func, eax);
1172 Context.insert(FakeUse);
1173 }
1174
1175 // Generate Dest=eax assignment.
1176 if (Dest && eax) {
1177 if (edx) {
1178 split64(Dest);
1179 Variable *DestLo = Dest->getLo();
1180 Variable *DestHi = Dest->getHi();
1181 DestLo->setPreferredRegister(eax, false);
1182 DestHi->setPreferredRegister(edx, false);
1183 _mov(DestLo, eax);
1184 _mov(DestHi, edx);
1185 } else {
1186 Dest->setPreferredRegister(eax, false);
1187 _mov(Dest, eax);
1188 }
1189 }
1190
1191 // Special treatment for an FP function which returns its result in
1192 // st(0).
1193 if (Dest &&
1194 (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) {
1195 _fstp(Dest);
1196 // If Dest ends up being a physical xmm register, the fstp emit
1197 // code will route st(0) through a temporary stack slot.
1198 }
1199 }
1200
1201 void TargetX8632::lowerCast(const InstCast *Inst) {
1202 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1203 InstCast::OpKind CastKind = Inst->getCastKind();
1204 Variable *Dest = Inst->getDest();
1205 // Src0RM is the source operand legalized to physical register or memory, but
1206 // not immediate, since the relevant x86 native instructions don't allow an
1207 // immediate operand. If the operand is an immediate, we could consider
1208 // computing the strength-reduced result at translation time, but we're
1209 // unlikely to see something like that in the bitcode that the optimizer
1210 // wouldn't have already taken care of.
1211 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem, true);
1212 switch (CastKind) {
1213 default:
1214 Func->setError("Cast type not supported");
1215 return;
1216 case InstCast::Sext:
1217 if (Dest->getType() == IceType_i64) {
1218 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1219 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1220 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1221 Variable *T_Lo = makeReg(DestLo->getType());
1222 if (Src0RM->getType() == IceType_i32)
1223 _mov(T_Lo, Src0RM);
1224 else
1225 _movsx(T_Lo, Src0RM);
1226 _mov(DestLo, T_Lo);
1227 Variable *T_Hi = NULL;
1228 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
1229 _mov(T_Hi, T_Lo);
1230 _sar(T_Hi, Shift);
1231 _mov(DestHi, T_Hi);
1232 } else {
1233 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
1234 // also copy to the high operand of a 64-bit variable.
1235 // t1 = movsx src; dst = t1
1236 Variable *T = makeReg(Dest->getType());
1237 _movsx(T, Src0RM);
1238 _mov(Dest, T);
1239 }
1240 break;
1241 case InstCast::Zext:
1242 if (Dest->getType() == IceType_i64) {
1243 // t1=movzx src; dst.lo=t1; dst.hi=0
1244 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1245 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1246 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1247 Variable *Tmp = makeReg(DestLo->getType());
1248 if (Src0RM->getType() == IceType_i32)
1249 _mov(Tmp, Src0RM);
1250 else
1251 _movzx(Tmp, Src0RM);
1252 _mov(DestLo, Tmp);
1253 _mov(DestHi, Zero);
1254 } else if (Src0RM->getType() == IceType_i1) {
1255 // t = Src0RM; t &= 1; Dest = t
1256 Operand *One = Ctx->getConstantInt(IceType_i32, 1);
1257 Variable *T = makeReg(IceType_i32);
1258 _movzx(T, Src0RM);
1259 _and(T, One);
1260 _mov(Dest, T);
1261 } else {
1262 // t1 = movzx src; dst = t1
1263 Variable *T = makeReg(Dest->getType());
1264 _movzx(T, Src0RM);
1265 _mov(Dest, T);
1266 }
1267 break;
1268 case InstCast::Trunc: {
1269 if (Src0RM->getType() == IceType_i64)
1270 Src0RM = loOperand(Src0RM);
1271 // t1 = trunc Src0RM; Dest = t1
1272 Variable *T = NULL;
1273 _mov(T, Src0RM);
1274 _mov(Dest, T);
1275 break;
1276 }
1277 case InstCast::Fptrunc:
1278 case InstCast::Fpext: {
1279 // t1 = cvt Src0RM; Dest = t1
1280 Variable *T = makeReg(Dest->getType());
1281 _cvt(T, Src0RM);
1282 _mov(Dest, T);
1283 break;
1284 }
1285 case InstCast::Fptosi:
1286 if (Dest->getType() == IceType_i64) {
1287 // Use a helper for converting floating-point values to 64-bit
1288 // integers. SSE2 appears to have no way to convert from xmm
1289 // registers to something like the edx:eax register pair, and
1290 // gcc and clang both want to use x87 instructions complete with
1291 // temporary manipulation of the status word. This helper is
1292 // not needed for x86-64.
1293 split64(Dest);
1294 const SizeT MaxSrcs = 1;
1295 Type SrcType = Inst->getSrc(0)->getType();
1296 InstCall *Call = makeHelperCall(
1297 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
1298 Call->addArg(Inst->getSrc(0));
1299 lowerCall(Call);
1300 } else {
1301 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1302 Variable *T_1 = makeReg(IceType_i32);
1303 Variable *T_2 = makeReg(Dest->getType());
1304 _cvt(T_1, Src0RM);
1305 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1306 _mov(Dest, T_2);
1307 T_2->setPreferredRegister(T_1, true);
1308 }
1309 break;
1310 case InstCast::Fptoui:
1311 if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {
1312 // Use a helper for both x86-32 and x86-64.
1313 split64(Dest);
1314 const SizeT MaxSrcs = 1;
1315 Type DestType = Dest->getType();
1316 Type SrcType = Src0RM->getType();
1317 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
1318 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
1319 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
jvoung (off chromium) 2014/05/19 20:28:54 For the cases where gcc would have just invoked a
Jim Stichnoth 2014/05/20 18:20:08 Actually, these helper function names may resemble
1320 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
1321 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1322 Call->addArg(Inst->getSrc(0));
1323 lowerCall(Call);
1324 return;
1325 } else {
1326 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1327 Variable *T_1 = makeReg(IceType_i32);
1328 Variable *T_2 = makeReg(Dest->getType());
1329 _cvt(T_1, Src0RM);
1330 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1331 _mov(Dest, T_2);
1332 T_2->setPreferredRegister(T_1, true);
1333 }
1334 break;
1335 case InstCast::Sitofp:
1336 if (Src0RM->getType() == IceType_i64) {
1337 // Use a helper for x86-32.
1338 const SizeT MaxSrcs = 1;
1339 Type DestType = Dest->getType();
1340 InstCall *Call = makeHelperCall(
1341 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
1342 Call->addArg(Inst->getSrc(0));
1343 lowerCall(Call);
1344 return;
1345 } else {
1346 // Sign-extend the operand.
1347 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
1348 Variable *T_1 = makeReg(IceType_i32);
1349 Variable *T_2 = makeReg(Dest->getType());
1350 if (Src0RM->getType() == IceType_i32)
1351 _mov(T_1, Src0RM);
1352 else
1353 _movsx(T_1, Src0RM);
1354 _cvt(T_2, T_1);
1355 _mov(Dest, T_2);
1356 }
1357 break;
1358 case InstCast::Uitofp:
1359 if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) {
1360 // Use a helper for x86-32 and x86-64. Also use a helper for
1361 // i32 on x86-32.
1362 const SizeT MaxSrcs = 1;
1363 Type DestType = Dest->getType();
1364 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");
1365 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
1366 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
1367 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
1368 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1369 Call->addArg(Inst->getSrc(0));
1370 lowerCall(Call);
1371 return;
1372 } else {
1373 // Zero-extend the operand.
1374 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
1375 Variable *T_1 = makeReg(IceType_i32);
1376 Variable *T_2 = makeReg(Dest->getType());
1377 if (Src0RM->getType() == IceType_i32)
1378 _mov(T_1, Src0RM);
1379 else
1380 _movzx(T_1, Src0RM);
1381 _cvt(T_2, T_1);
1382 _mov(Dest, T_2);
1383 }
1384 break;
1385 case InstCast::Bitcast:
1386 if (Dest->getType() == Src0RM->getType()) {
1387 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);
1388 lowerAssign(Assign);
1389 llvm_unreachable("Pointer bitcasts aren't lowered correctly.");
1390 return;
1391 }
1392 switch (Dest->getType()) {
1393 default:
1394 llvm_unreachable("Unexpected Bitcast dest type");
1395 case IceType_i32:
1396 case IceType_f32: {
1397 Type DestType = Dest->getType();
1398 Type SrcType = Src0RM->getType();
1399 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
1400 (DestType == IceType_f32 && SrcType == IceType_i32));
1401 // a.i32 = bitcast b.f32 ==>
1402 // t.f32 = b.f32
1403 // s.f32 = spill t.f32
1404 // a.i32 = s.f32
1405 Variable *T = NULL;
1406 // TODO: Should be able to force a spill setup by calling legalize() with
1407 // Legal_Mem and not Legal_Reg or Legal_Imm.
1408 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
1409 Spill->setWeight(RegWeight::Zero);
1410 Spill->setPreferredRegister(Dest, true);
1411 _mov(T, Src0RM);
1412 _mov(Spill, T);
1413 _mov(Dest, Spill);
1414 } break;
1415 case IceType_i64: {
1416 assert(Src0RM->getType() == IceType_f64);
1417 // a.i64 = bitcast b.f64 ==>
1418 // s.f64 = spill b.f64
1419 // t_lo.i32 = lo(s.f64)
1420 // a_lo.i32 = t_lo.i32
1421 // t_hi.i32 = hi(s.f64)
1422 // a_hi.i32 = t_hi.i32
1423 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1424 Spill->setWeight(RegWeight::Zero);
1425 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
1426 _mov(Spill, Src0RM);
1427
1428 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1429 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1430 Variable *T_Lo = makeReg(IceType_i32);
1431 Variable *T_Hi = makeReg(IceType_i32);
1432 VariableSplit *SpillLo =
1433 VariableSplit::create(Func, Spill, VariableSplit::Low);
1434 VariableSplit *SpillHi =
1435 VariableSplit::create(Func, Spill, VariableSplit::High);
1436
1437 _mov(T_Lo, SpillLo);
1438 _mov(DestLo, T_Lo);
1439 _mov(T_Hi, SpillHi);
1440 _mov(DestHi, T_Hi);
1441 } break;
1442 case IceType_f64: {
1443 assert(Src0RM->getType() == IceType_i64);
1444 // a.f64 = bitcast b.i64 ==>
1445 // t_lo.i32 = b_lo.i32
1446 // lo(s.f64) = t_lo.i32
1447 // FakeUse(s.f64)
1448 // t_hi.i32 = b_hi.i32
1449 // hi(s.f64) = t_hi.i32
1450 // a.f64 = s.f64
1451 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1452 Spill->setWeight(RegWeight::Zero);
1453 Spill->setPreferredRegister(Dest, true);
1454
1455 Context.insert(InstFakeDef::create(Func, Spill));
1456
1457 Variable *T_Lo = NULL, *T_Hi = NULL;
1458 VariableSplit *SpillLo =
1459 VariableSplit::create(Func, Spill, VariableSplit::Low);
1460 VariableSplit *SpillHi =
1461 VariableSplit::create(Func, Spill, VariableSplit::High);
1462 _mov(T_Lo, loOperand(Src0RM));
1463 _store(T_Lo, SpillLo);
1464 _mov(T_Hi, hiOperand(Src0RM));
1465 _store(T_Hi, SpillHi);
1466 _mov(Dest, Spill);
1467 } break;
1468 }
1469 break;
1470 }
1471 }
1472
1473 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
1474 Operand *Src0 = Inst->getSrc(0);
1475 Operand *Src1 = Inst->getSrc(1);
1476 Variable *Dest = Inst->getDest();
1477 // Lowering a = fcmp cond, b, c
1478 // ucomiss b, c /* only if C1 != Br_None */
1479 // /* but swap b,c order if SwapOperands==true */
1480 // mov a, <default>
1481 // j<C1> label /* only if C1 != Br_None */
1482 // j<C2> label /* only if C2 != Br_None */
1483 // FakeUse(a) /* only if C1 != Br_None */
1484 // mov a, !<default> /* only if C1 != Br_None */
1485 // label: /* only if C1 != Br_None */
1486 InstFcmp::FCond Condition = Inst->getCondition();
1487 size_t Index = static_cast<size_t>(Condition);
1488 assert(Index < TableFcmpSize);
1489 // The table is indexed by InstFcmp::Condition. Make sure it didn't fall
1490 // out of order.
1491 if (TableFcmp[Index].SwapOperands) {
1492 Operand *Tmp = Src0;
1493 Src0 = Src1;
1494 Src1 = Tmp;
1495 }
1496 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
1497 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
1498 if (HasC1) {
1499 Src0 = legalize(Src0);
1500 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
1501 Variable *T = NULL;
1502 _mov(T, Src0);
1503 _ucomiss(T, Src1RM);
1504 }
1505 Constant *Default =
1506 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
1507 _mov(Dest, Default);
1508 if (HasC1) {
1509 InstX8632Label *Label = InstX8632Label::create(Func, this);
1510 _br(TableFcmp[Index].C1, Label);
1511 if (HasC2) {
1512 _br(TableFcmp[Index].C2, Label);
1513 }
1514 Context.insert(InstFakeUse::create(Func, Dest));
1515 Constant *NonDefault =
1516 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
1517 _mov(Dest, NonDefault);
1518 Context.insert(Label);
1519 }
1520 }
1521
1522 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
1523 Operand *Src0 = legalize(Inst->getSrc(0));
1524 Operand *Src1 = legalize(Inst->getSrc(1));
1525 Variable *Dest = Inst->getDest();
1526
1527 // If Src1 is an immediate, or known to be a physical register, we can
1528 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
1529 // a physical register. (Actually, either Src0 or Src1 can be chosen for
1530 // the physical register, but unfortunately we have to commit to one or
1531 // the other before register allocation.)
1532 bool IsSrc1ImmOrReg = false;
1533 if (llvm::isa<Constant>(Src1))
1534 IsSrc1ImmOrReg = true;
1535 else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
1536 if (Var->hasReg())
1537 IsSrc1ImmOrReg = true;
1538 }
1539
1540 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
1541 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1542 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
1543 if (Src0->getType() == IceType_i64) {
1544 InstIcmp::ICond Condition = Inst->getCondition();
1545 size_t Index = static_cast<size_t>(Condition);
1546 assert(Index < TableIcmp64Size);
1547 // The table is indexed by InstIcmp::Condition. Make sure it didn't fall
1548 // out of order.
1549 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
1550 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
1551 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
1552 InstX8632Label *Label = InstX8632Label::create(Func, this);
1553 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
1554 _cmp(loOperand(Src0), Src1LoRI);
1555 _br(InstX8632Br::Br_ne, Label);
1556 _cmp(hiOperand(Src0), Src1HiRI);
1557 _br(InstX8632Br::Br_ne, Label);
1558 Context.insert(InstFakeUse::create(Func, Dest));
1559 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
1560 Context.insert(Label);
1561 } else {
1562 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
1563 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
1564 _mov(Dest, One);
1565 _cmp(hiOperand(Src0), Src1HiRI);
1566 _br(TableIcmp64[Index].C1, LabelTrue);
1567 _br(TableIcmp64[Index].C2, LabelFalse);
1568 _cmp(loOperand(Src0), Src1LoRI);
1569 _br(TableIcmp64[Index].C3, LabelTrue);
1570 Context.insert(LabelFalse);
1571 Context.insert(InstFakeUse::create(Func, Dest));
1572 _mov(Dest, Zero);
1573 Context.insert(LabelTrue);
1574 }
1575 return;
1576 }
1577 // cmp b, c
1578 Operand *Src0New =
1579 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
1580 InstX8632Label *Label = InstX8632Label::create(Func, this);
1581 _cmp(Src0New, Src1);
1582 _mov(Dest, One);
1583 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1584 Context.insert(InstFakeUse::create(Func, Dest));
1585 _mov(Dest, Zero);
1586 Context.insert(Label);
1587 }
1588
1589 void TargetX8632::lowerLoad(const InstLoad *Inst) {
1590 // A Load instruction can be treated the same as an Assign
1591 // instruction, after the source operand is transformed into an
1592 // OperandX8632Mem operand. Note that the address mode
1593 // optimization already creates an OperandX8632Mem operand, so it
1594 // doesn't need another level of transformation.
1595 Type Ty = Inst->getDest()->getType();
1596 Operand *Src0 = Inst->getSourceAddress();
1597 // Address mode optimization already creates an OperandX8632Mem
1598 // operand, so it doesn't need another level of transformation.
1599 if (!llvm::isa<OperandX8632Mem>(Src0)) {
1600 Variable *Base = llvm::dyn_cast<Variable>(Src0);
1601 Constant *Offset = llvm::dyn_cast<Constant>(Src0);
1602 assert(Base || Offset);
1603 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
1604 }
1605
1606 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
1607 lowerAssign(Assign);
1608 }
1609
1610 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
1611 Func->setError("Phi lowering not implemented");
jvoung (off chromium) 2014/05/15 23:47:34 nit: "not implemented" sounds like something will
Jim Stichnoth 2014/05/17 14:14:32 My longer-term plan is to delay phi lowering until
1612 }
1613
1614 void TargetX8632::lowerRet(const InstRet *Inst) {
1615 Variable *Reg = NULL;
1616 if (Inst->hasRetValue()) {
1617 Operand *Src0 = legalize(Inst->getRetValue());
1618 if (Src0->getType() == IceType_i64) {
1619 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
1620 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
1621 Reg = eax;
1622 Context.insert(InstFakeUse::create(Func, edx));
1623 } else if (Src0->getType() == IceType_f32 ||
1624 Src0->getType() == IceType_f64) {
1625 _fld(Src0);
1626 } else {
1627 _mov(Reg, Src0, Reg_eax);
1628 }
1629 }
1630 _ret(Reg);
1631 // Add a fake use of esp to make sure esp stays alive for the entire
1632 // function. Otherwise post-call esp adjustments get dead-code
1633 // eliminated. TODO: Are there more places where the fake use
1634 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
1635 // have a ret instruction.
1636 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1637 Context.insert(InstFakeUse::create(Func, esp));
1638 }
1639
1640 void TargetX8632::lowerSelect(const InstSelect *Inst) {
1641 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
1642 Variable *Dest = Inst->getDest();
1643 Operand *SrcT = Inst->getTrueOperand();
1644 Operand *SrcF = Inst->getFalseOperand();
1645 Operand *Condition = legalize(Inst->getCondition());
1646 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1647 InstX8632Label *Label = InstX8632Label::create(Func, this);
1648
1649 if (Dest->getType() == IceType_i64) {
1650 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1651 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1652 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
1653 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
1654 _cmp(Condition, Zero);
1655 _mov(DestLo, SrcLoRI);
1656 _mov(DestHi, SrcHiRI);
1657 _br(InstX8632Br::Br_ne, Label);
1658 Context.insert(InstFakeUse::create(Func, DestLo));
1659 Context.insert(InstFakeUse::create(Func, DestHi));
1660 Operand *SrcFLo = loOperand(SrcF);
1661 Operand *SrcFHi = hiOperand(SrcF);
1662 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
1663 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
1664 _mov(DestLo, SrcLoRI);
1665 _mov(DestHi, SrcHiRI);
1666 } else {
1667 _cmp(Condition, Zero);
1668 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
1669 _mov(Dest, SrcT);
1670 _br(InstX8632Br::Br_ne, Label);
1671 Context.insert(InstFakeUse::create(Func, Dest));
1672 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
1673 _mov(Dest, SrcF);
1674 }
1675
1676 Context.insert(Label);
1677 }
1678
1679 void TargetX8632::lowerStore(const InstStore *Inst) {
1680 Operand *Value = Inst->getData();
1681 Operand *Addr = Inst->getAddr();
1682 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
1683 // Address mode optimization already creates an OperandX8632Mem
1684 // operand, so it doesn't need another level of transformation.
1685 if (!NewAddr) {
1686 // The address will be either a constant (which represents a global
1687 // variable) or a variable, so either the Base or Offset component
1688 // of the OperandX8632Mem will be set.
1689 Variable *Base = llvm::dyn_cast<Variable>(Addr);
1690 Constant *Offset = llvm::dyn_cast<Constant>(Addr);
1691 assert(Base || Offset);
1692 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
1693 }
1694 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
1695
1696 if (NewAddr->getType() == IceType_i64) {
1697 Value = legalize(Value);
1698 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
1699 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
1700 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
1701 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
1702 } else {
1703 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
1704 _store(Value, NewAddr);
1705 }
1706 }
1707
1708 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
1709 // This implements the most naive possible lowering.
1710 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
1711 Operand *Src0 = Inst->getComparison();
1712 SizeT NumCases = Inst->getNumCases();
1713 // OK, we'll be slightly less naive by forcing Src into a physical
1714 // register if there are 2 or more uses.
1715 if (NumCases >= 2)
1716 Src0 = legalizeToVar(Src0, true);
1717 else
1718 Src0 = legalize(Src0, Legal_All, true);
1719 for (SizeT I = 0; I < NumCases; ++I) {
1720 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
1721 _cmp(Src0, Value);
1722 _br(InstX8632Br::Br_e, Inst->getLabel(I));
1723 }
1724
1725 _br(Inst->getLabelDefault());
1726 }
1727
1728 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
1729 const SizeT MaxSrcs = 0;
1730 Variable *Dest = NULL;
1731 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
1732 lowerCall(Call);
1733 }
1734
1735 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
1736 bool AllowOverlap, int32_t RegNum) {
1737 assert(Allowed & Legal_Reg);
1738 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
1739 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
1740 Variable *Base = Mem->getBase();
1741 Variable *Index = Mem->getIndex();
1742 Variable *RegBase = Base;
1743 Variable *RegIndex = Index;
1744 if (Base) {
1745 RegBase = legalizeToVar(Base, true);
1746 }
1747 if (Index) {
1748 RegIndex = legalizeToVar(Index, true);
1749 }
1750 if (Base != RegBase || Index != RegIndex) {
1751 From =
1752 OperandX8632Mem::create(Func, Mem->getType(), RegBase,
1753 Mem->getOffset(), RegIndex, Mem->getShift());
1754 }
1755
1756 if (!(Allowed & Legal_Mem)) {
1757 Variable *Reg = makeReg(From->getType(), RegNum);
1758 _mov(Reg, From, RegNum);
1759 From = Reg;
1760 }
1761 return From;
1762 }
1763 if (llvm::isa<Constant>(From)) {
1764 if (!(Allowed & Legal_Imm)) {
1765 Variable *Reg = makeReg(From->getType(), RegNum);
1766 _mov(Reg, From);
1767 From = Reg;
1768 }
1769 return From;
1770 }
1771 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
1772 // We need a new physical register for the operand if:
1773 // Mem is not allowed and Var->getRegNum() is unknown, or
1774 // RegNum is required and Var->getRegNum() doesn't match.
1775 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) ||
1776 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
1777 Variable *Reg = makeReg(From->getType(), RegNum);
1778 if (RegNum == Variable::NoRegister) {
1779 Reg->setPreferredRegister(Var, AllowOverlap);
1780 }
1781 _mov(Reg, From);
1782 From = Reg;
1783 }
1784 return From;
1785 }
1786 llvm_unreachable("Unhandled operand kind in legalize()");
1787 return From;
1788 }
1789
1790 Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
1791 int32_t RegNum) {
1792 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
1793 }
1794
1795 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
1796 Variable *Reg = Func->makeVariable(Type, Context.getNode());
1797 if (RegNum == Variable::NoRegister)
1798 Reg->setWeightInfinite();
1799 else
1800 Reg->setRegNum(RegNum);
1801 return Reg;
1802 }
1803
1804 void TargetX8632::postLower() {
1805 if (Ctx->getOptLevel() != Opt_m1)
1806 return;
1807 // TODO: Avoid recomputing WhiteList every instruction.
1808 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);
1809 // Make one pass to black-list pre-colored registers. TODO: If
1810 // there was some prior register allocation pass that made register
1811 // assignments, those registers need to be black-listed here as
1812 // well.
1813 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
1814 ++I) {
1815 const Inst *Inst = *I;
1816 if (Inst->isDeleted())
1817 continue;
1818 if (llvm::isa<InstFakeKill>(Inst))
1819 continue;
1820 SizeT VarIndex = 0;
1821 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
1822 Operand *Src = Inst->getSrc(SrcNum);
1823 SizeT NumVars = Src->getNumVars();
1824 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
1825 const Variable *Var = Src->getVar(J);
1826 if (!Var->hasReg())
1827 continue;
1828 WhiteList[Var->getRegNum()] = false;
1829 }
1830 }
1831 }
1832 // The second pass colors infinite-weight variables.
1833 llvm::SmallBitVector AvailableRegisters = WhiteList;
1834 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
1835 ++I) {
1836 const Inst *Inst = *I;
1837 if (Inst->isDeleted())
1838 continue;
1839 SizeT VarIndex = 0;
1840 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
1841 Operand *Src = Inst->getSrc(SrcNum);
1842 SizeT NumVars = Src->getNumVars();
1843 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
1844 Variable *Var = Src->getVar(J);
1845 if (Var->hasReg())
1846 continue;
1847 if (!Var->getWeight().isInf())
1848 continue;
1849 llvm::SmallBitVector AvailableTypedRegisters =
1850 AvailableRegisters & getRegisterSetForType(Var->getType());
1851 if (!AvailableTypedRegisters.any()) {
1852 // This is a hack in case we run out of physical registers
1853 // due to an excessive number of "push" instructions from
1854 // lowering a call.
1855 AvailableRegisters = WhiteList;
1856 AvailableTypedRegisters =
1857 AvailableRegisters & getRegisterSetForType(Var->getType());
1858 }
1859 assert(AvailableTypedRegisters.any());
1860 int32_t RegNum = AvailableTypedRegisters.find_first();
1861 Var->setRegNum(RegNum);
1862 AvailableRegisters[RegNum] = false;
1863 }
1864 }
1865 }
1866 }
1867
1868 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698