Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(540)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 265703002: Add Om1 lowering with no optimizations (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target.
15 //
16 //===----------------------------------------------------------------------===//
17
18 #include "IceDefs.h"
19 #include "IceCfg.h"
20 #include "IceCfgNode.h"
21 #include "IceInstX8632.h"
22 #include "IceOperand.h"
23 #include "IceTargetLoweringX8632.def"
24 #include "IceTargetLoweringX8632.h"
25
26 namespace Ice {
27
28 namespace {
29
30 // The following table summarizes the logic for lowering the fcmp instruction.
31 // There is one table entry for each of the 16 conditions. A comment in
32 // lowerFcmp() describes the lowering template. In the most general case, there
33 // is a compare followed by two conditional branches, because some fcmp
34 // conditions don't map to a single x86 conditional branch. However, in many
35 // cases it is possible to swap the operands in the comparison and have a single
36 // conditional branch. Since it's quite tedious to validate the table by hand,
37 // good execution tests are helpful.
38
39 const struct _TableFcmp {
40 uint32_t Default;
41 bool SwapOperands;
42 InstX8632Br::BrCond C1, C2;
43 } TableFcmp[] = {
44 #define X(val, dflt, swap, C1, C2) \
45 { dflt, swap, InstX8632Br::C1, InstX8632Br::C2 } \
46 ,
47 FCMPX8632_TABLE
48 #undef X
49 };
50 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
51
52 // The following table summarizes the logic for lowering the icmp instruction
53 // for i32 and narrower types. Each icmp condition has a clear mapping to an
54 // x86 conditional branch instruction.
55
56 const struct _TableIcmp32 {
57 InstX8632Br::BrCond Mapping;
58 } TableIcmp32[] = {
59 #define X(val, C_32, C1_64, C2_64, C3_64) \
60 { InstX8632Br::C_32 } \
61 ,
62 ICMPX8632_TABLE
63 #undef X
64 };
65 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
66
67 // The following table summarizes the logic for lowering the icmp instruction
68 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
69 // conditional branches are needed. For the other conditions, three separate
70 // conditional branches are needed.
71 const struct _TableIcmp64 {
72 InstX8632Br::BrCond C1, C2, C3;
73 } TableIcmp64[] = {
74 #define X(val, C_32, C1_64, C2_64, C3_64) \
75 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \
76 ,
77 ICMPX8632_TABLE
78 #undef X
79 };
80 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
81
82 InstX8632Br::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
83 size_t Index = static_cast<size_t>(Cond);
84 assert(Index < TableIcmp32Size);
85 return TableIcmp32[Index].Mapping;
86 }
87
88 // In some cases, there are x-macros tables for both high-level and
89 // low-level instructions/operands that use the same enum key value.
90 // The tables are kept separate to maintain a proper separation
91 // between abstraction layers. There is a risk that the tables
92 // could get out of sync if enum values are reordered or if entries
93 // are added or deleted. This dummy function uses static_assert to
94 // ensure everything is kept in sync.
95 void xMacroIntegrityCheck() {
96 // Validate the enum values in FCMPX8632_TABLE.
97 {
98 // Define a temporary set of enum values based on low-level
99 // table entries.
100 enum _tmp_enum {
101 #define X(val, dflt, swap, C1, C2) _tmp_##val,
102 FCMPX8632_TABLE
103 #undef X
104 };
105 // Define a set of constants based on high-level table entries.
106 #define X(tag, str) static const int _table1_##tag = InstFcmp::tag;
107 ICEINSTFCMP_TABLE;
108 #undef X
109 // Define a set of constants based on low-level table entries,
110 // and ensure the table entry keys are consistent.
111 #define X(val, dflt, swap, C1, C2) \
112 static const int _table2_##val = _tmp_##val; \
113 STATIC_ASSERT(_table1_##val == _table2_##val);
114 FCMPX8632_TABLE;
115 #undef X
116 // Repeat the static asserts with respect to the high-level
117 // table entries in case the high-level table has extra entries.
118 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
119 ICEINSTFCMP_TABLE;
120 #undef X
121 }
122
123 // Validate the enum values in ICMPX8632_TABLE.
124 {
125 // Define a temporary set of enum values based on low-level
126 // table entries.
127 enum _tmp_enum {
128 #define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val,
129 ICMPX8632_TABLE
130 #undef X
131 };
132 // Define a set of constants based on high-level table entries.
133 #define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
134 ICEINSTICMP_TABLE;
135 #undef X
136 // Define a set of constants based on low-level table entries,
137 // and ensure the table entry keys are consistent.
138 #define X(val, C_32, C1_64, C2_64, C3_64) \
139 static const int _table2_##val = _tmp_##val; \
140 STATIC_ASSERT(_table1_##val == _table2_##val);
141 ICMPX8632_TABLE;
142 #undef X
143 // Repeat the static asserts with respect to the high-level
144 // table entries in case the high-level table has extra entries.
145 #define X(tag, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
146 ICEINSTICMP_TABLE;
147 #undef X
148 }
149
150 // Validate the enum values in ICETYPEX8632_TABLE.
151 {
152 // Define a temporary set of enum values based on low-level
153 // table entries.
154 enum _tmp_enum {
155 #define X(tag, cvt, sdss, width) _tmp_##tag,
156 ICETYPEX8632_TABLE
157 #undef X
158 };
159 // Define a set of constants based on high-level table entries.
160 #define X(tag, size, align, str) static const int _table1_##tag = tag;
161 ICETYPE_TABLE;
162 #undef X
163 // Define a set of constants based on low-level table entries,
164 // and ensure the table entry keys are consistent.
165 #define X(tag, cvt, sdss, width) \
166 static const int _table2_##tag = _tmp_##tag; \
167 STATIC_ASSERT(_table1_##tag == _table2_##tag);
168 ICETYPEX8632_TABLE;
169 #undef X
170 // Repeat the static asserts with respect to the high-level
171 // table entries in case the high-level table has extra entries.
172 #define X(tag, size, align, str) STATIC_ASSERT(_table1_##tag == _table2_##tag);
173 ICETYPE_TABLE;
174 #undef X
175 }
176 }
177
178 } // end of anonymous namespace
179
180 TargetX8632::TargetX8632(Cfg *Func)
181 : TargetLowering(Func), IsEbpBasedFrame(false), FrameSizeLocals(0),
182 LocalsSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
183 PhysicalRegisters(VarList(Reg_NUM)) {
184 llvm::SmallBitVector IntegerRegisters(Reg_NUM);
185 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);
186 llvm::SmallBitVector FloatRegisters(Reg_NUM);
187 llvm::SmallBitVector InvalidRegisters(Reg_NUM);
188 ScratchRegs.resize(Reg_NUM);
189 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
190 frameptr, isI8, isInt, isFP) \
191 IntegerRegisters[val] = isInt; \
192 IntegerRegistersI8[val] = isI8; \
193 FloatRegisters[val] = isFP; \
194 ScratchRegs[val] = scratch;
195 REGX8632_TABLE;
196 #undef X
197 TypeToRegisterSet[IceType_void] = InvalidRegisters;
198 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
199 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
200 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
201 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
202 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
203 TypeToRegisterSet[IceType_f32] = FloatRegisters;
204 TypeToRegisterSet[IceType_f64] = FloatRegisters;
205 }
206
207 void TargetX8632::translateOm1() {
208 GlobalContext *Context = Func->getContext();
209 Ostream &Str = Context->getStrDump();
210 Timer T_placePhiLoads;
211 Func->placePhiLoads();
212 if (Func->hasError())
213 return;
214 T_placePhiLoads.printElapsedUs(Context, "placePhiLoads()");
215 Timer T_placePhiStores;
216 Func->placePhiStores();
217 if (Func->hasError())
218 return;
219 T_placePhiStores.printElapsedUs(Context, "placePhiStores()");
220 Timer T_deletePhis;
221 Func->deletePhis();
222 if (Func->hasError())
223 return;
224 T_deletePhis.printElapsedUs(Context, "deletePhis()");
225 if (Context->isVerbose())
226 Str << "================ After Phi lowering ================\n";
227 Func->dump();
228
229 Timer T_genCode;
230 Func->genCode();
231 if (Func->hasError())
232 return;
233 T_genCode.printElapsedUs(Context, "genCode()");
234 if (Context->isVerbose())
235 Str << "================ After initial x8632 codegen ================\n";
236 Func->dump();
237
238 Timer T_genFrame;
239 Func->genFrame();
240 if (Func->hasError())
241 return;
242 T_genFrame.printElapsedUs(Context, "genFrame()");
243 if (Context->isVerbose())
244 Str << "================ After stack frame mapping ================\n";
245 Func->dump();
246 }
247
248 IceString TargetX8632::RegNames[] = {
249 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
250 frameptr, isI8, isInt, isFP) \
251 name,
252 REGX8632_TABLE
253 #undef X
254 };
255
256 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
257 assert(RegNum < PhysicalRegisters.size());
258 Variable *Reg = PhysicalRegisters[RegNum];
259 if (Reg == NULL) {
260 CfgNode *Node = NULL; // NULL means multi-block lifetime
261 Reg = Func->makeVariable(IceType_i32, Node);
262 Reg->setRegNum(RegNum);
263 PhysicalRegisters[RegNum] = Reg;
264 }
265 return Reg;
266 }
267
268 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
269 assert(RegNum < Reg_NUM);
270 static IceString RegNames8[] = {
271 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
272 frameptr, isI8, isInt, isFP) \
273 "" name8,
274 REGX8632_TABLE
275 #undef X
276 };
277 static IceString RegNames16[] = {
278 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
279 frameptr, isI8, isInt, isFP) \
280 "" name16,
281 REGX8632_TABLE
282 #undef X
283 };
284 switch (Ty) {
285 case IceType_i1:
286 case IceType_i8:
287 return RegNames8[RegNum];
288 case IceType_i16:
289 return RegNames16[RegNum];
290 default:
291 return RegNames[RegNum];
292 }
293 }
294
295 void TargetX8632::emitVariable(const Variable *Var,
296 const Cfg *Func) const {
297 Ostream &Str = Ctx->getStrEmit();
298 assert(Var->getLocalUseNode() == NULL || Var->getLocalUseNode() == Func->getCu rrentNode());
299 if (Var->hasReg()) {
300 Str << getRegName(Var->getRegNum(), Var->getType());
301 return;
302 }
303 Str << InstX8632::getWidthString(Var->getType());
304 Str << " [" << getRegName(
305 getFrameOrStackReg(), IceType_i32);
306 int32_t Offset = Var->getStackOffset() + getStackAdjustment();
307 if (Offset) {
308 if (Offset > 0)
309 Str << "+";
310 Str << Offset;
311 }
312 Str << "]";
313 }
314
315 // Helper function for addProlog(). Sets the frame offset for Arg,
316 // updates InArgsSizeBytes according to Arg's width, and generates an
317 // instruction to copy Arg into its assigned register if applicable.
318 // For an I64 arg that has been split into Lo and Hi components, it
319 // calls itself recursively on the components, taking care to handle
320 // Lo first because of the little-endian architecture.
321 void TargetX8632::setArgOffsetAndCopy(Variable *Arg, Variable *FramePtr,
322 int32_t BasicFrameOffset,
323 int32_t &InArgsSizeBytes) {
324 Variable *Lo = Arg->getLo();
325 Variable *Hi = Arg->getHi();
326 Type Ty = Arg->getType();
327 if (Lo && Hi && Ty == IceType_i64) {
328 assert(Lo->getType() != IceType_i64); // don't want infinite recursion
329 assert(Hi->getType() != IceType_i64); // don't want infinite recursion
330 setArgOffsetAndCopy(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
331 setArgOffsetAndCopy(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
332 return;
333 }
334 Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
335 if (Arg->hasReg()) {
336 assert(Ty != IceType_i64);
337 OperandX8632Mem *Mem = OperandX8632Mem::create(
338 Func, Ty, FramePtr,
339 Ctx->getConstantInt(IceType_i32, Arg->getStackOffset()));
340 _mov(Arg, Mem);
341 }
342 InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
343 }
344
345 void TargetX8632::addProlog(CfgNode *Node) {
346 // If SimpleCoalescing is false, each variable without a register
347 // gets its own unique stack slot, which leads to large stack
348 // frames. If SimpleCoalescing is true, then each "global" variable
349 // without a register gets its own slot, but "local" variable slots
350 // are reused across basic blocks. E.g., if A and B are local to
351 // block 1 and C is local to block 2, then C may share a slot with A
352 // or B.
353 const bool SimpleCoalescing = true;
354 int32_t InArgsSizeBytes = 0;
355 int32_t RetIpSizeBytes = 4;
356 int32_t PreservedRegsSizeBytes = 0;
357 LocalsSizeBytes = 0;
358 Context.init(Node);
359 Context.setInsertPoint(Context.getCur());
360
361 // Determine stack frame offsets for each Variable without a
362 // register assignment. This can be done as one variable per stack
363 // slot. Or, do coalescing by running the register allocator again
364 // with an infinite set of registers (as a side effect, this gives
365 // variables a second chance at physical register assignment).
366 //
367 // A middle ground approach is to leverage sparsity and allocate one
368 // block of space on the frame for globals (variables with
369 // multi-block lifetime), and one block to share for locals
370 // (single-block lifetime).
371
372 llvm::SmallBitVector CalleeSaves =
373 getRegisterSet(RegSet_CalleeSave, RegSet_None);
374
375 int32_t GlobalsSize = 0;
376 std::vector<int> LocalsSize(Func->getNumNodes());
377
378 // Prepass. Compute RegsUsed, PreservedRegsSizeBytes, and
379 // LocalsSizeBytes.
380 RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
381 const VarList &Variables = Func->getVariables();
382 const VarList &Args = Func->getArgs();
383 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
384 I != E; ++I) {
385 Variable *Var = *I;
386 if (Var->hasReg()) {
387 RegsUsed[Var->getRegNum()] = true;
388 continue;
389 }
390 // An argument passed on the stack already has a stack slot.
391 if (Var->getIsArg())
392 continue;
393 // A spill slot linked to a variable with a stack slot should reuse
394 // that stack slot.
395 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
396 if (Variable *Linked = Var->getPreferredRegister()) {
397 if (!Linked->hasReg())
398 continue;
399 }
400 }
401 int32_t Increment = typeWidthInBytesOnStack(Var->getType());
402 if (SimpleCoalescing) {
403 if (Var->isMultiblockLife()) {
404 GlobalsSize += Increment;
405 } else {
406 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
407 LocalsSize[NodeIndex] += Increment;
408 if (LocalsSize[NodeIndex] > LocalsSizeBytes)
409 LocalsSizeBytes = LocalsSize[NodeIndex];
410 }
411 } else {
412 LocalsSizeBytes += Increment;
413 }
414 }
415 LocalsSizeBytes += GlobalsSize;
416
417 // Add push instructions for preserved registers.
418 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
419 if (CalleeSaves[i] && RegsUsed[i]) {
420 PreservedRegsSizeBytes += 4;
421 const bool SuppressStackAdjustment = true;
422 _push(getPhysicalRegister(i), SuppressStackAdjustment);
423 }
424 }
425
426 // Generate "push ebp; mov ebp, esp"
427 if (IsEbpBasedFrame) {
428 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
429 .count() == 0);
430 PreservedRegsSizeBytes += 4;
431 Variable *ebp = getPhysicalRegister(Reg_ebp);
432 Variable *esp = getPhysicalRegister(Reg_esp);
433 const bool SuppressStackAdjustment = true;
434 _push(ebp, SuppressStackAdjustment);
435 _mov(ebp, esp);
436 }
437
438 // Generate "sub esp, LocalsSizeBytes"
439 if (LocalsSizeBytes)
440 _sub(getPhysicalRegister(Reg_esp),
441 Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
442
443 resetStackAdjustment();
444
445 // Fill in stack offsets for args, and copy args into registers for
446 // those that were register-allocated. Args are pushed right to
447 // left, so Arg[0] is closest to the stack/frame pointer.
448 //
449 // TODO: Make this right for different width args, calling
450 // conventions, etc. For one thing, args passed in registers will
451 // need to be copied/shuffled to their home registers (the
452 // RegManager code may have some permutation logic to leverage),
453 // and if they have no home register, home space will need to be
454 // allocated on the stack to copy into.
455 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
456 int32_t BasicFrameOffset = PreservedRegsSizeBytes + RetIpSizeBytes;
457 if (!IsEbpBasedFrame)
458 BasicFrameOffset += LocalsSizeBytes;
459 for (SizeT i = 0; i < Args.size(); ++i) {
460 Variable *Arg = Args[i];
461 setArgOffsetAndCopy(Arg, FramePtr, BasicFrameOffset, InArgsSizeBytes);
462 }
463
464 // Fill in stack offsets for locals.
465 int32_t TotalGlobalsSize = GlobalsSize;
466 GlobalsSize = 0;
467 LocalsSize.assign(LocalsSize.size(), 0);
468 int32_t NextStackOffset = 0;
469 for (VarList::const_iterator I = Variables.begin(), E = Variables.end();
470 I != E; ++I) {
471 Variable *Var = *I;
472 if (Var->hasReg()) {
473 RegsUsed[Var->getRegNum()] = true;
474 continue;
475 }
476 if (Var->getIsArg())
477 continue;
478 if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) {
479 if (Variable *Linked = Var->getPreferredRegister()) {
480 if (!Linked->hasReg()) {
481 // TODO: Make sure Linked has already been assigned a stack
482 // slot.
483 Var->setStackOffset(Linked->getStackOffset());
484 continue;
485 }
486 }
487 }
488 int32_t Increment = typeWidthInBytesOnStack(Var->getType());
489 if (SimpleCoalescing) {
490 if (Var->isMultiblockLife()) {
491 GlobalsSize += Increment;
492 NextStackOffset = GlobalsSize;
493 } else {
494 SizeT NodeIndex = Var->getLocalUseNode()->getIndex();
495 LocalsSize[NodeIndex] += Increment;
496 NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex];
497 }
498 } else {
499 NextStackOffset += Increment;
500 }
501 if (IsEbpBasedFrame)
502 Var->setStackOffset(-NextStackOffset);
503 else
504 Var->setStackOffset(LocalsSizeBytes - NextStackOffset);
505 }
506 this->FrameSizeLocals = NextStackOffset;
507 this->HasComputedFrame = true;
508
509 if (Func->getContext()->isVerbose(IceV_Frame)) {
510 Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes
511 << "\n"
512 << "InArgsSizeBytes=" << InArgsSizeBytes
513 << "\n"
514 << "PreservedRegsSizeBytes="
515 << PreservedRegsSizeBytes << "\n";
516 }
517 }
518
519 void TargetX8632::addEpilog(CfgNode *Node) {
520 InstList &Insts = Node->getInsts();
521 InstList::reverse_iterator RI, E;
522 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
523 if (llvm::isa<InstX8632Ret>(*RI))
524 break;
525 }
526 if (RI == E)
527 return;
528
529 // Convert the reverse_iterator position into its corresponding
530 // (forward) iterator position.
531 InstList::iterator InsertPoint = RI.base();
532 --InsertPoint;
533 Context.init(Node);
534 Context.setInsertPoint(InsertPoint);
535
536 Variable *esp = getPhysicalRegister(Reg_esp);
537 if (IsEbpBasedFrame) {
538 Variable *ebp = getPhysicalRegister(Reg_ebp);
539 _mov(esp, ebp);
540 _pop(ebp);
541 } else {
542 // add esp, LocalsSizeBytes
543 if (LocalsSizeBytes)
544 _add(esp, Ctx->getConstantInt(IceType_i32, LocalsSizeBytes));
545 }
546
547 // Add pop instructions for preserved registers.
548 llvm::SmallBitVector CalleeSaves =
549 getRegisterSet(RegSet_CalleeSave, RegSet_None);
550 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
551 SizeT j = CalleeSaves.size() - i - 1;
552 if (j == Reg_ebp && IsEbpBasedFrame)
553 continue;
554 if (CalleeSaves[j] && RegsUsed[j]) {
555 _pop(getPhysicalRegister(j));
556 }
557 }
558 }
559
560 void TargetX8632::split64(Variable *Var) {
561 switch (Var->getType()) {
562 default:
563 return;
564 case IceType_i64:
565 // TODO: Only consider F64 if we need to push each half when
566 // passing as an argument to a function call. Note that each half
567 // is still typed as I32.
568 case IceType_f64:
569 break;
570 }
571 Variable *Lo = Var->getLo();
572 Variable *Hi = Var->getHi();
573 if (Lo) {
574 assert(Hi);
575 return;
576 }
577 assert(Hi == NULL);
578 Lo = Func->makeVariable(IceType_i32, Context.getNode(),
579 Var->getName() + "__lo");
580 Hi = Func->makeVariable(IceType_i32, Context.getNode(),
581 Var->getName() + "__hi");
582 Var->setLoHi(Lo, Hi);
583 if (Var->getIsArg()) {
584 Lo->setIsArg(Func);
585 Hi->setIsArg(Func);
586 }
587 }
588
589 Operand *TargetX8632::loOperand(Operand *Operand) {
590 assert(Operand->getType() == IceType_i64);
591 if (Operand->getType() != IceType_i64)
592 return Operand;
593 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
594 split64(Var);
595 return Var->getLo();
596 }
597 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
598 uint64_t Mask = (1ull << 32) - 1;
599 return Ctx->getConstantInt(IceType_i32, Const->getValue() & Mask);
600 }
601 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
602 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(),
603 Mem->getOffset(), Mem->getIndex(),
604 Mem->getShift());
605 }
606 llvm_unreachable("Unsupported operand type");
607 return NULL;
608 }
609
610 Operand *TargetX8632::hiOperand(Operand *Operand) {
611 assert(Operand->getType() == IceType_i64);
612 if (Operand->getType() != IceType_i64)
613 return Operand;
614 if (Variable *Var = llvm::dyn_cast<Variable>(Operand)) {
615 split64(Var);
616 return Var->getHi();
617 }
618 if (ConstantInteger *Const = llvm::dyn_cast<ConstantInteger>(Operand)) {
619 return Ctx->getConstantInt(IceType_i32, Const->getValue() >> 32);
620 }
621 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(Operand)) {
622 Constant *Offset = Mem->getOffset();
623 if (Offset == NULL)
624 Offset = Ctx->getConstantInt(IceType_i32, 4);
625 else if (ConstantInteger *IntOffset =
626 llvm::dyn_cast<ConstantInteger>(Offset)) {
627 Offset = Ctx->getConstantInt(IceType_i32, 4 + IntOffset->getValue());
628 } else if (ConstantRelocatable *SymOffset =
629 llvm::dyn_cast<ConstantRelocatable>(Offset)) {
630 Offset = Ctx->getConstantSym(IceType_i32, 4 + SymOffset->getOffset(),
631 SymOffset->getName());
632 }
633 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
634 Mem->getIndex(), Mem->getShift());
635 }
636 llvm_unreachable("Unsupported operand type");
637 return NULL;
638 }
639
640 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
641 RegSetMask Exclude) const {
642 llvm::SmallBitVector Registers(Reg_NUM);
643
644 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \
645 frameptr, isI8, isInt, isFP) \
646 if (scratch && (Include & RegSet_CallerSave)) \
647 Registers[val] = true; \
648 if (preserved && (Include & RegSet_CalleeSave)) \
649 Registers[val] = true; \
650 if (stackptr && (Include & RegSet_StackPointer)) \
651 Registers[val] = true; \
652 if (frameptr && (Include & RegSet_FramePointer)) \
653 Registers[val] = true; \
654 if (scratch && (Exclude & RegSet_CallerSave)) \
655 Registers[val] = false; \
656 if (preserved && (Exclude & RegSet_CalleeSave)) \
657 Registers[val] = false; \
658 if (stackptr && (Exclude & RegSet_StackPointer)) \
659 Registers[val] = false; \
660 if (frameptr && (Exclude & RegSet_FramePointer)) \
661 Registers[val] = false;
662
663 REGX8632_TABLE
664
665 #undef X
666
667 return Registers;
668 }
669
670 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
671 IsEbpBasedFrame = true;
672 // TODO(sehr,stichnot): align allocated memory, keep stack aligned, minimize
673 // the number of adjustments of esp, etc.
674 Variable *esp = getPhysicalRegister(Reg_esp);
675 Operand *TotalSize = legalize(Inst->getSizeInBytes());
676 Variable *Dest = Inst->getDest();
677 _sub(esp, TotalSize);
678 _mov(Dest, esp);
679 }
680
681 void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) {
682 Variable *Dest = Inst->getDest();
683 Operand *Src0 = legalize(Inst->getSrc(0));
684 Operand *Src1 = legalize(Inst->getSrc(1));
685 if (Dest->getType() == IceType_i64) {
686 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
687 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
688 Operand *Src0Lo = loOperand(Src0);
689 Operand *Src0Hi = hiOperand(Src0);
690 Operand *Src1Lo = loOperand(Src1);
691 Operand *Src1Hi = hiOperand(Src1);
692 Variable *T_Lo = NULL, *T_Hi = NULL;
693 switch (Inst->getOp()) {
694 case InstArithmetic::Add:
695 _mov(T_Lo, Src0Lo);
696 _add(T_Lo, Src1Lo);
697 _mov(DestLo, T_Lo);
698 _mov(T_Hi, Src0Hi);
699 _adc(T_Hi, Src1Hi);
700 _mov(DestHi, T_Hi);
701 break;
702 case InstArithmetic::And:
703 _mov(T_Lo, Src0Lo);
704 _and(T_Lo, Src1Lo);
705 _mov(DestLo, T_Lo);
706 _mov(T_Hi, Src0Hi);
707 _and(T_Hi, Src1Hi);
708 _mov(DestHi, T_Hi);
709 break;
710 case InstArithmetic::Or:
711 _mov(T_Lo, Src0Lo);
712 _or(T_Lo, Src1Lo);
713 _mov(DestLo, T_Lo);
714 _mov(T_Hi, Src0Hi);
715 _or(T_Hi, Src1Hi);
716 _mov(DestHi, T_Hi);
717 break;
718 case InstArithmetic::Xor:
719 _mov(T_Lo, Src0Lo);
720 _xor(T_Lo, Src1Lo);
721 _mov(DestLo, T_Lo);
722 _mov(T_Hi, Src0Hi);
723 _xor(T_Hi, Src1Hi);
724 _mov(DestHi, T_Hi);
725 break;
726 case InstArithmetic::Sub:
727 _mov(T_Lo, Src0Lo);
728 _sub(T_Lo, Src1Lo);
729 _mov(DestLo, T_Lo);
730 _mov(T_Hi, Src0Hi);
731 _sbb(T_Hi, Src1Hi);
732 _mov(DestHi, T_Hi);
733 break;
734 case InstArithmetic::Mul: {
735 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
736 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);
737 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);
738 // gcc does the following:
739 // a=b*c ==>
740 // t1 = b.hi; t1 *=(imul) c.lo
741 // t2 = c.hi; t2 *=(imul) b.lo
742 // t3:eax = b.lo
743 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
744 // a.lo = t4.lo
745 // t4.hi += t1
746 // t4.hi += t2
747 // a.hi = t4.hi
748 _mov(T_1, Src0Hi);
749 _imul(T_1, Src1Lo);
750 _mov(T_2, Src1Hi);
751 _imul(T_2, Src0Lo);
752 _mov(T_3, Src0Lo, Reg_eax);
753 _mul(T_4Lo, T_3, Src1Lo);
754 // The mul instruction produces two dest variables, edx:eax. We
755 // create a fake definition of edx to account for this.
756 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
757 _mov(DestLo, T_4Lo);
758 _add(T_4Hi, T_1);
759 _add(T_4Hi, T_2);
760 _mov(DestHi, T_4Hi);
761 } break;
762 case InstArithmetic::Shl: {
763 // TODO: Refactor the similarities between Shl, Lshr, and Ashr.
764 // gcc does the following:
765 // a=b<<c ==>
766 // t1:ecx = c.lo & 0xff
767 // t2 = b.lo
768 // t3 = b.hi
769 // t3 = shld t3, t2, t1
770 // t2 = shl t2, t1
771 // test t1, 0x20
772 // je L1
773 // use(t3)
774 // t3 = t2
775 // t2 = 0
776 // L1:
777 // a.lo = t2
778 // a.hi = t3
779 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
780 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
781 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
782 InstX8632Label *Label = InstX8632Label::create(Func, this);
783 _mov(T_1, Src1Lo, Reg_ecx);
784 _mov(T_2, Src0Lo);
785 _mov(T_3, Src0Hi);
786 _shld(T_3, T_2, T_1);
787 _shl(T_2, T_1);
788 _test(T_1, BitTest);
789 _br(InstX8632Br::Br_e, Label);
790 // Because of the intra-block control flow, we need to fake a use
791 // of T_3 to prevent its earlier definition from being dead-code
792 // eliminated in the presence of its later definition.
793 Context.insert(InstFakeUse::create(Func, T_3));
794 _mov(T_3, T_2);
795 _mov(T_2, Zero);
796 Context.insert(Label);
797 _mov(DestLo, T_2);
798 _mov(DestHi, T_3);
799 } break;
800 case InstArithmetic::Lshr: {
801 // a=b>>c (unsigned) ==>
802 // t1:ecx = c.lo & 0xff
803 // t2 = b.lo
804 // t3 = b.hi
805 // t2 = shrd t2, t3, t1
806 // t3 = shr t3, t1
807 // test t1, 0x20
808 // je L1
809 // use(t2)
810 // t2 = t3
811 // t3 = 0
812 // L1:
813 // a.lo = t2
814 // a.hi = t3
815 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
816 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
817 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
818 InstX8632Label *Label = InstX8632Label::create(Func, this);
819 _mov(T_1, Src1Lo, Reg_ecx);
820 _mov(T_2, Src0Lo);
821 _mov(T_3, Src0Hi);
822 _shrd(T_2, T_3, T_1);
823 _shr(T_3, T_1);
824 _test(T_1, BitTest);
825 _br(InstX8632Br::Br_e, Label);
826 // Because of the intra-block control flow, we need to fake a use
827 // of T_3 to prevent its earlier definition from being dead-code
828 // eliminated in the presence of its later definition.
829 Context.insert(InstFakeUse::create(Func, T_2));
830 _mov(T_2, T_3);
831 _mov(T_3, Zero);
832 Context.insert(Label);
833 _mov(DestLo, T_2);
834 _mov(DestHi, T_3);
835 } break;
836 case InstArithmetic::Ashr: {
837 // a=b>>c (signed) ==>
838 // t1:ecx = c.lo & 0xff
839 // t2 = b.lo
840 // t3 = b.hi
841 // t2 = shrd t2, t3, t1
842 // t3 = sar t3, t1
843 // test t1, 0x20
844 // je L1
845 // use(t2)
846 // t2 = t3
847 // t3 = sar t3, 0x1f
848 // L1:
849 // a.lo = t2
850 // a.hi = t3
851 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
852 Constant *BitTest = Ctx->getConstantInt(IceType_i32, 0x20);
853 Constant *SignExtend = Ctx->getConstantInt(IceType_i32, 0x1f);
854 InstX8632Label *Label = InstX8632Label::create(Func, this);
855 _mov(T_1, Src1Lo, Reg_ecx);
856 _mov(T_2, Src0Lo);
857 _mov(T_3, Src0Hi);
858 _shrd(T_2, T_3, T_1);
859 _sar(T_3, T_1);
860 _test(T_1, BitTest);
861 _br(InstX8632Br::Br_e, Label);
862 // Because of the intra-block control flow, we need to fake a use
863 // of T_3 to prevent its earlier definition from being dead-code
864 // eliminated in the presence of its later definition.
865 Context.insert(InstFakeUse::create(Func, T_2));
866 _mov(T_2, T_3);
867 _sar(T_3, SignExtend);
868 Context.insert(Label);
869 _mov(DestLo, T_2);
870 _mov(DestHi, T_3);
871 } break;
872 case InstArithmetic::Udiv: {
873 const SizeT MaxSrcs = 2;
874 InstCall *Call = makeHelperCall("__udivdi3", Dest, MaxSrcs);
875 Call->addArg(Inst->getSrc(0));
876 Call->addArg(Inst->getSrc(1));
877 lowerCall(Call);
878 } break;
879 case InstArithmetic::Sdiv: {
880 const SizeT MaxSrcs = 2;
881 InstCall *Call = makeHelperCall("__divdi3", Dest, MaxSrcs);
882 Call->addArg(Inst->getSrc(0));
883 Call->addArg(Inst->getSrc(1));
884 lowerCall(Call);
885 } break;
886 case InstArithmetic::Urem: {
887 const SizeT MaxSrcs = 2;
888 InstCall *Call = makeHelperCall("__umoddi3", Dest, MaxSrcs);
889 Call->addArg(Inst->getSrc(0));
890 Call->addArg(Inst->getSrc(1));
891 lowerCall(Call);
892 } break;
893 case InstArithmetic::Srem: {
894 const SizeT MaxSrcs = 2;
895 InstCall *Call = makeHelperCall("__moddi3", Dest, MaxSrcs);
896 Call->addArg(Inst->getSrc(0));
897 Call->addArg(Inst->getSrc(1));
898 lowerCall(Call);
899 } break;
900 case InstArithmetic::Fadd:
901 case InstArithmetic::Fsub:
902 case InstArithmetic::Fmul:
903 case InstArithmetic::Fdiv:
904 case InstArithmetic::Frem:
905 llvm_unreachable("FP instruction with i64 type");
906 break;
907 }
908 } else { // Dest->getType() != IceType_i64
909 Variable *T_edx = NULL;
910 Variable *T = NULL;
911 switch (Inst->getOp()) {
912 case InstArithmetic::Add:
913 _mov(T, Src0);
914 _add(T, Src1);
915 _mov(Dest, T);
916 break;
917 case InstArithmetic::And:
918 _mov(T, Src0);
919 _and(T, Src1);
920 _mov(Dest, T);
921 break;
922 case InstArithmetic::Or:
923 _mov(T, Src0);
924 _or(T, Src1);
925 _mov(Dest, T);
926 break;
927 case InstArithmetic::Xor:
928 _mov(T, Src0);
929 _xor(T, Src1);
930 _mov(Dest, T);
931 break;
932 case InstArithmetic::Sub:
933 _mov(T, Src0);
934 _sub(T, Src1);
935 _mov(Dest, T);
936 break;
937 case InstArithmetic::Mul:
938 // TODO: Optimize for llvm::isa<Constant>(Src1)
939 // TODO: Strength-reduce multiplications by a constant,
940 // particularly -1 and powers of 2. Advanced: use lea to
941 // multiply by 3, 5, 9.
942 //
943 // The 8-bit version of imul only allows the form "imul r/m8"
944 // where T must be in eax.
945 if (Dest->getType() == IceType_i8)
946 _mov(T, Src0, Reg_eax);
947 else
948 _mov(T, Src0);
949 _imul(T, Src1);
950 _mov(Dest, T);
951 break;
952 case InstArithmetic::Shl:
953 _mov(T, Src0);
954 if (!llvm::isa<Constant>(Src1))
955 Src1 = legalizeToVar(Src1, false, Reg_ecx);
956 _shl(T, Src1);
957 _mov(Dest, T);
958 break;
959 case InstArithmetic::Lshr:
960 _mov(T, Src0);
961 if (!llvm::isa<Constant>(Src1))
962 Src1 = legalizeToVar(Src1, false, Reg_ecx);
963 _shr(T, Src1);
964 _mov(Dest, T);
965 break;
966 case InstArithmetic::Ashr:
967 _mov(T, Src0);
968 if (!llvm::isa<Constant>(Src1))
969 Src1 = legalizeToVar(Src1, false, Reg_ecx);
970 _sar(T, Src1);
971 _mov(Dest, T);
972 break;
973 case InstArithmetic::Udiv:
974 if (Dest->getType() == IceType_i8) {
975 Variable *T_ah = NULL;
976 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
977 _mov(T, Src0, Reg_eax);
978 _mov(T_ah, Zero, Reg_ah);
979 _div(T_ah, Src1, T);
980 Context.insert(InstFakeUse::create(Func, T_ah));
981 _mov(Dest, T);
982 } else {
983 // TODO: fix for 8-bit, see Urem
984 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
985 _mov(T, Src0, Reg_eax);
986 _mov(T_edx, Zero, Reg_edx);
987 _div(T, Src1, T_edx);
988 _mov(Dest, T);
989 }
990 break;
991 case InstArithmetic::Sdiv:
992 T_edx = makeReg(IceType_i32, Reg_edx);
993 _mov(T, Src0, Reg_eax);
994 _cdq(T_edx, T);
995 _idiv(T, Src1, T_edx);
996 _mov(Dest, T);
997 break;
998 case InstArithmetic::Urem:
999 if (Dest->getType() == IceType_i8) {
1000 Variable *T_ah = NULL;
1001 Constant *Zero = Ctx->getConstantInt(IceType_i8, 0);
1002 _mov(T, Src0, Reg_eax);
1003 _mov(T_ah, Zero, Reg_ah);
1004 _div(T_ah, Src1, T);
1005 _mov(Dest, T_ah);
1006 } else {
1007 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1008 _mov(T_edx, Zero, Reg_edx);
1009 _mov(T, Src0, Reg_eax);
1010 _div(T_edx, Src1, T);
1011 _mov(Dest, T_edx);
1012 }
1013 break;
1014 case InstArithmetic::Srem:
1015 T_edx = makeReg(IceType_i32, Reg_edx);
1016 _mov(T, Src0, Reg_eax);
1017 _cdq(T_edx, T);
1018 _idiv(T_edx, Src1, T);
1019 _mov(Dest, T_edx);
1020 break;
1021 case InstArithmetic::Fadd:
1022 _mov(T, Src0);
1023 _addss(T, Src1);
1024 _mov(Dest, T);
1025 break;
1026 case InstArithmetic::Fsub:
1027 _mov(T, Src0);
1028 _subss(T, Src1);
1029 _mov(Dest, T);
1030 break;
1031 case InstArithmetic::Fmul:
1032 _mov(T, Src0);
1033 _mulss(T, Src1);
1034 _mov(Dest, T);
1035 break;
1036 case InstArithmetic::Fdiv:
1037 _mov(T, Src0);
1038 _divss(T, Src1);
1039 _mov(Dest, T);
1040 break;
1041 case InstArithmetic::Frem: {
1042 const SizeT MaxSrcs = 2;
1043 Type Ty = Dest->getType();
1044 InstCall *Call =
1045 makeHelperCall(Ty == IceType_f32 ? "fmodf" : "fmod", Dest, MaxSrcs);
1046 Call->addArg(Src0);
1047 Call->addArg(Src1);
1048 return lowerCall(Call);
1049 } break;
1050 }
1051 }
1052 }
1053
1054 void TargetX8632::lowerAssign(const InstAssign *Inst) {
1055 Variable *Dest = Inst->getDest();
1056 Operand *Src0 = legalize(Inst->getSrc(0));
1057 assert(Dest->getType() == Src0->getType());
1058 if (Dest->getType() == IceType_i64) {
1059 Operand *Src0Lo = loOperand(Src0);
1060 Operand *Src0Hi = hiOperand(Src0);
1061 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1062 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1063 Variable *T_Lo = NULL, *T_Hi = NULL;
1064 _mov(T_Lo, Src0Lo);
1065 _mov(DestLo, T_Lo);
1066 _mov(T_Hi, Src0Hi);
1067 _mov(DestHi, T_Hi);
1068 } else {
1069 const bool AllowOverlap = true;
1070 // RI is either a physical register or an immediate.
1071 Operand *RI = legalize(Src0, Legal_Reg | Legal_Imm, AllowOverlap);
1072 _mov(Dest, RI);
1073 }
1074 }
1075
1076 void TargetX8632::lowerBr(const InstBr *Inst) {
1077 if (Inst->isUnconditional()) {
1078 _br(Inst->getTargetUnconditional());
1079 } else {
1080 Operand *Src0 = legalize(Inst->getCondition());
1081 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1082 _cmp(Src0, Zero);
1083 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1084 }
1085 }
1086
1087 void TargetX8632::lowerCall(const InstCall *Instr) {
1088 // Generate a sequence of push instructions, pushing right to left,
1089 // keeping track of stack offsets in case a push involves a stack
1090 // operand and we are using an esp-based frame.
1091 uint32_t StackOffset = 0;
1092 // TODO: If for some reason the call instruction gets dead-code
1093 // eliminated after lowering, we would need to ensure that the
1094 // pre-call push instructions and the post-call esp adjustment get
1095 // eliminated as well.
1096 for (SizeT NumArgs = Instr->getNumArgs(), i = 0; i < NumArgs; ++i) {
1097 Operand *Arg = legalize(Instr->getArg(NumArgs - i - 1));
1098 if (Arg->getType() == IceType_i64) {
1099 _push(hiOperand(Arg));
1100 _push(loOperand(Arg));
1101 } else if (Arg->getType() == IceType_f64) {
1102 // If the Arg turns out to be a memory operand, we need to push
1103 // 8 bytes, which requires two push instructions. This ends up
1104 // being somewhat clumsy in the current IR, so we use a
1105 // workaround. Force the operand into a (xmm) register, and
1106 // then push the register. An xmm register push is actually not
1107 // possible in x86, but the Push instruction emitter handles
1108 // this by decrementing the stack pointer and directly writing
1109 // the xmm register value.
1110 Variable *T = NULL;
1111 _mov(T, Arg);
1112 _push(T);
1113 } else {
1114 _push(Arg);
1115 }
1116 StackOffset += typeWidthInBytesOnStack(Arg->getType());
1117 }
1118 // Generate the call instruction. Assign its result to a temporary
1119 // with high register allocation weight.
1120 Variable *Dest = Instr->getDest();
1121 Variable *eax = NULL; // doubles as RegLo as necessary
1122 Variable *edx = NULL;
1123 if (Dest) {
1124 switch (Dest->getType()) {
1125 case IceType_NUM:
1126 llvm_unreachable("Invalid Call dest type");
1127 break;
1128 case IceType_void:
1129 break;
1130 case IceType_i1:
1131 case IceType_i8:
1132 case IceType_i16:
1133 case IceType_i32:
1134 eax = makeReg(Dest->getType(), Reg_eax);
1135 break;
1136 case IceType_i64:
1137 eax = makeReg(IceType_i32, Reg_eax);
1138 edx = makeReg(IceType_i32, Reg_edx);
1139 break;
1140 case IceType_f32:
1141 case IceType_f64:
1142 // Leave eax==edx==NULL, and capture the result with the fstp
1143 // instruction.
1144 break;
1145 }
1146 }
1147 Operand *CallTarget = legalize(Instr->getCallTarget());
1148 Inst *NewCall = InstX8632Call::create(Func, eax, CallTarget);
1149 Context.insert(NewCall);
1150 if (edx)
1151 Context.insert(InstFakeDef::create(Func, edx));
1152
1153 // Add the appropriate offset to esp.
1154 if (StackOffset) {
1155 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1156 _add(esp, Ctx->getConstantInt(IceType_i32, StackOffset));
1157 }
1158
1159 // Insert a register-kill pseudo instruction.
1160 VarList KilledRegs;
1161 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1162 if (ScratchRegs[i])
1163 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1164 }
1165 if (!KilledRegs.empty()) {
1166 Inst *Kill = InstFakeKill::create(Func, KilledRegs, NewCall);
1167 Context.insert(Kill);
1168 }
1169
1170 // Generate a FakeUse to keep the call live if necessary.
1171 if (Instr->hasSideEffects() && eax) {
1172 Inst *FakeUse = InstFakeUse::create(Func, eax);
1173 Context.insert(FakeUse);
1174 }
1175
1176 // Generate Dest=eax assignment.
1177 if (Dest && eax) {
1178 if (edx) {
1179 split64(Dest);
1180 Variable *DestLo = Dest->getLo();
1181 Variable *DestHi = Dest->getHi();
1182 DestLo->setPreferredRegister(eax, false);
1183 DestHi->setPreferredRegister(edx, false);
1184 _mov(DestLo, eax);
1185 _mov(DestHi, edx);
1186 } else {
1187 Dest->setPreferredRegister(eax, false);
1188 _mov(Dest, eax);
1189 }
1190 }
1191
1192 // Special treatment for an FP function which returns its result in
1193 // st(0).
1194 if (Dest &&
1195 (Dest->getType() == IceType_f32 || Dest->getType() == IceType_f64)) {
1196 _fstp(Dest);
1197 // If Dest ends up being a physical xmm register, the fstp emit
1198 // code will route st(0) through a temporary stack slot.
1199 }
1200 }
1201
1202 void TargetX8632::lowerCast(const InstCast *Inst) {
1203 // a = cast(b) ==> t=cast(b); a=t; (link t->b, link a->t, no overlap)
1204 InstCast::OpKind CastKind = Inst->getCastKind();
1205 Variable *Dest = Inst->getDest();
1206 // Src0RM is the source operand legalized to physical register or memory, but
1207 // not immediate, since the relevant x86 native instructions don't allow an
1208 // immediate operand. If the operand is an immediate, we could consider
1209 // computing the strength-reduced result at translation time, but we're
1210 // unlikely to see something like that in the bitcode that the optimizer
1211 // wouldn't have already taken care of.
1212 Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem, true);
1213 switch (CastKind) {
1214 default:
1215 Func->setError("Cast type not supported");
1216 return;
1217 case InstCast::Sext:
1218 if (Dest->getType() == IceType_i64) {
1219 // t1=movsx src; t2=t1; t2=sar t2, 31; dst.lo=t1; dst.hi=t2
1220 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1221 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1222 Variable *T_Lo = makeReg(DestLo->getType());
1223 if (Src0RM->getType() == IceType_i32)
1224 _mov(T_Lo, Src0RM);
1225 else
1226 _movsx(T_Lo, Src0RM);
1227 _mov(DestLo, T_Lo);
1228 Variable *T_Hi = NULL;
1229 Constant *Shift = Ctx->getConstantInt(IceType_i32, 31);
1230 _mov(T_Hi, T_Lo);
1231 _sar(T_Hi, Shift);
1232 _mov(DestHi, T_Hi);
1233 } else {
1234 // TODO: Sign-extend an i1 via "shl reg, 31; sar reg, 31", and
1235 // also copy to the high operand of a 64-bit variable.
1236 // t1 = movsx src; dst = t1
1237 Variable *T = makeReg(Dest->getType());
1238 _movsx(T, Src0RM);
1239 _mov(Dest, T);
1240 }
1241 break;
1242 case InstCast::Zext:
1243 if (Dest->getType() == IceType_i64) {
1244 // t1=movzx src; dst.lo=t1; dst.hi=0
1245 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1246 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1247 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1248 Variable *Tmp = makeReg(DestLo->getType());
1249 if (Src0RM->getType() == IceType_i32)
1250 _mov(Tmp, Src0RM);
1251 else
1252 _movzx(Tmp, Src0RM);
1253 _mov(DestLo, Tmp);
1254 _mov(DestHi, Zero);
1255 } else if (Src0RM->getType() == IceType_i1) {
1256 // t = Src0RM; t &= 1; Dest = t
1257 Operand *One = Ctx->getConstantInt(IceType_i32, 1);
1258 Variable *T = makeReg(IceType_i32);
1259 _movzx(T, Src0RM);
1260 _and(T, One);
1261 _mov(Dest, T);
1262 } else {
1263 // t1 = movzx src; dst = t1
1264 Variable *T = makeReg(Dest->getType());
1265 _movzx(T, Src0RM);
1266 _mov(Dest, T);
1267 }
1268 break;
1269 case InstCast::Trunc: {
1270 if (Src0RM->getType() == IceType_i64)
1271 Src0RM = loOperand(Src0RM);
1272 // t1 = trunc Src0RM; Dest = t1
1273 Variable *T = NULL;
1274 _mov(T, Src0RM);
1275 _mov(Dest, T);
1276 break;
1277 }
1278 case InstCast::Fptrunc:
1279 case InstCast::Fpext: {
1280 // t1 = cvt Src0RM; Dest = t1
1281 Variable *T = makeReg(Dest->getType());
1282 _cvt(T, Src0RM);
1283 _mov(Dest, T);
1284 break;
1285 }
1286 case InstCast::Fptosi:
1287 if (Dest->getType() == IceType_i64) {
1288 // Use a helper for converting floating-point values to 64-bit
1289 // integers. SSE2 appears to have no way to convert from xmm
1290 // registers to something like the edx:eax register pair, and
1291 // gcc and clang both want to use x87 instructions complete with
1292 // temporary manipulation of the status word. This helper is
1293 // not needed for x86-64.
1294 split64(Dest);
1295 const SizeT MaxSrcs = 1;
1296 Type SrcType = Inst->getSrc(0)->getType();
1297 InstCall *Call = makeHelperCall(
1298 SrcType == IceType_f32 ? "cvtftosi64" : "cvtdtosi64", Dest, MaxSrcs);
1299 Call->addArg(Inst->getSrc(0));
1300 lowerCall(Call);
1301 } else {
1302 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1303 Variable *T_1 = makeReg(IceType_i32);
1304 Variable *T_2 = makeReg(Dest->getType());
1305 _cvt(T_1, Src0RM);
1306 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1307 _mov(Dest, T_2);
1308 T_2->setPreferredRegister(T_1, true);
1309 }
1310 break;
1311 case InstCast::Fptoui:
1312 if (Dest->getType() == IceType_i64 || Dest->getType() == IceType_i32) {
1313 // Use a helper for both x86-32 and x86-64.
1314 split64(Dest);
1315 const SizeT MaxSrcs = 1;
1316 Type DestType = Dest->getType();
1317 Type SrcType = Src0RM->getType();
1318 IceString DstSubstring = (DestType == IceType_i64 ? "64" : "32");
1319 IceString SrcSubstring = (SrcType == IceType_f32 ? "f" : "d");
1320 // Possibilities are cvtftoui32, cvtdtoui32, cvtftoui64, cvtdtoui64
1321 IceString TargetString = "cvt" + SrcSubstring + "toui" + DstSubstring;
1322 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1323 Call->addArg(Inst->getSrc(0));
1324 lowerCall(Call);
1325 return;
1326 } else {
1327 // t1.i32 = cvt Src0RM; t2.dest_type = t1; Dest = t2.dest_type
1328 Variable *T_1 = makeReg(IceType_i32);
1329 Variable *T_2 = makeReg(Dest->getType());
1330 _cvt(T_1, Src0RM);
1331 _mov(T_2, T_1); // T_1 and T_2 may have different integer types
1332 _mov(Dest, T_2);
1333 T_2->setPreferredRegister(T_1, true);
1334 }
1335 break;
1336 case InstCast::Sitofp:
1337 if (Src0RM->getType() == IceType_i64) {
1338 // Use a helper for x86-32.
1339 const SizeT MaxSrcs = 1;
1340 Type DestType = Dest->getType();
1341 InstCall *Call = makeHelperCall(
1342 DestType == IceType_f32 ? "cvtsi64tof" : "cvtsi64tod", Dest, MaxSrcs);
1343 Call->addArg(Inst->getSrc(0));
1344 lowerCall(Call);
1345 return;
1346 } else {
1347 // Sign-extend the operand.
1348 // t1.i32 = movsx Src0RM; t2 = Cvt t1.i32; Dest = t2
1349 Variable *T_1 = makeReg(IceType_i32);
1350 Variable *T_2 = makeReg(Dest->getType());
1351 if (Src0RM->getType() == IceType_i32)
1352 _mov(T_1, Src0RM);
1353 else
1354 _movsx(T_1, Src0RM);
1355 _cvt(T_2, T_1);
1356 _mov(Dest, T_2);
1357 }
1358 break;
1359 case InstCast::Uitofp:
1360 if (Src0RM->getType() == IceType_i64 || Src0RM->getType() == IceType_i32) {
1361 // Use a helper for x86-32 and x86-64. Also use a helper for
1362 // i32 on x86-32.
1363 const SizeT MaxSrcs = 1;
1364 Type DestType = Dest->getType();
1365 IceString SrcSubstring = (Src0RM->getType() == IceType_i64 ? "64" : "32");
1366 IceString DstSubstring = (DestType == IceType_f32 ? "f" : "d");
1367 // Possibilities are cvtui32tof, cvtui32tod, cvtui64tof, cvtui64tod
1368 IceString TargetString = "cvtui" + SrcSubstring + "to" + DstSubstring;
1369 InstCall *Call = makeHelperCall(TargetString, Dest, MaxSrcs);
1370 Call->addArg(Inst->getSrc(0));
1371 lowerCall(Call);
1372 return;
1373 } else {
1374 // Zero-extend the operand.
1375 // t1.i32 = movzx Src0RM; t2 = Cvt t1.i32; Dest = t2
1376 Variable *T_1 = makeReg(IceType_i32);
1377 Variable *T_2 = makeReg(Dest->getType());
1378 if (Src0RM->getType() == IceType_i32)
1379 _mov(T_1, Src0RM);
1380 else
1381 _movzx(T_1, Src0RM);
1382 _cvt(T_2, T_1);
1383 _mov(Dest, T_2);
1384 }
1385 break;
1386 case InstCast::Bitcast:
1387 if (Dest->getType() == Src0RM->getType()) {
1388 InstAssign *Assign = InstAssign::create(Func, Dest, Src0RM);
1389 lowerAssign(Assign);
1390 llvm_unreachable("Pointer bitcasts aren't lowered correctly.");
1391 return;
1392 }
1393 switch (Dest->getType()) {
1394 default:
1395 llvm_unreachable("Unexpected Bitcast dest type");
1396 case IceType_i32:
1397 case IceType_f32: {
1398 Type DestType = Dest->getType();
1399 Type SrcType = Src0RM->getType();
1400 assert((DestType == IceType_i32 && SrcType == IceType_f32) ||
1401 (DestType == IceType_f32 && SrcType == IceType_i32));
1402 // a.i32 = bitcast b.f32 ==>
1403 // t.f32 = b.f32
1404 // s.f32 = spill t.f32
1405 // a.i32 = s.f32
1406 Variable *T = NULL;
1407 // TODO: Should be able to force a spill setup by calling legalize() with
1408 // Legal_Mem and not Legal_Reg or Legal_Imm.
1409 Variable *Spill = Func->makeVariable(SrcType, Context.getNode());
1410 Spill->setWeight(RegWeight::Zero);
1411 Spill->setPreferredRegister(Dest, true);
1412 _mov(T, Src0RM);
1413 _mov(Spill, T);
1414 _mov(Dest, Spill);
1415 } break;
1416 case IceType_i64: {
1417 assert(Src0RM->getType() == IceType_f64);
1418 // a.i64 = bitcast b.f64 ==>
1419 // s.f64 = spill b.f64
1420 // t_lo.i32 = lo(s.f64)
1421 // a_lo.i32 = t_lo.i32
1422 // t_hi.i32 = hi(s.f64)
1423 // a_hi.i32 = t_hi.i32
1424 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1425 Spill->setWeight(RegWeight::Zero);
1426 Spill->setPreferredRegister(llvm::dyn_cast<Variable>(Src0RM), true);
1427 _mov(Spill, Src0RM);
1428
1429 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1430 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1431 Variable *T_Lo = makeReg(IceType_i32);
1432 Variable *T_Hi = makeReg(IceType_i32);
1433 VariableSplit *SpillLo =
1434 VariableSplit::create(Func, Spill, VariableSplit::Low);
1435 VariableSplit *SpillHi =
1436 VariableSplit::create(Func, Spill, VariableSplit::High);
1437
1438 _mov(T_Lo, SpillLo);
1439 _mov(DestLo, T_Lo);
1440 _mov(T_Hi, SpillHi);
1441 _mov(DestHi, T_Hi);
1442 } break;
1443 case IceType_f64: {
1444 assert(Src0RM->getType() == IceType_i64);
1445 // a.f64 = bitcast b.i64 ==>
1446 // t_lo.i32 = b_lo.i32
1447 // lo(s.f64) = t_lo.i32
1448 // FakeUse(s.f64)
1449 // t_hi.i32 = b_hi.i32
1450 // hi(s.f64) = t_hi.i32
1451 // a.f64 = s.f64
1452 Variable *Spill = Func->makeVariable(IceType_f64, Context.getNode());
1453 Spill->setWeight(RegWeight::Zero);
1454 Spill->setPreferredRegister(Dest, true);
1455
1456 Context.insert(InstFakeDef::create(Func, Spill));
1457
1458 Variable *T_Lo = NULL, *T_Hi = NULL;
1459 VariableSplit *SpillLo =
1460 VariableSplit::create(Func, Spill, VariableSplit::Low);
1461 VariableSplit *SpillHi =
1462 VariableSplit::create(Func, Spill, VariableSplit::High);
1463 _mov(T_Lo, loOperand(Src0RM));
1464 _store(T_Lo, SpillLo);
1465 _mov(T_Hi, hiOperand(Src0RM));
1466 _store(T_Hi, SpillHi);
1467 _mov(Dest, Spill);
1468 } break;
1469 }
1470 break;
1471 }
1472 }
1473
1474 void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
1475 Operand *Src0 = Inst->getSrc(0);
1476 Operand *Src1 = Inst->getSrc(1);
1477 Variable *Dest = Inst->getDest();
1478 // Lowering a = fcmp cond, b, c
1479 // ucomiss b, c /* only if C1 != Br_None */
1480 // /* but swap b,c order if SwapOperands==true */
1481 // mov a, <default>
1482 // j<C1> label /* only if C1 != Br_None */
1483 // j<C2> label /* only if C2 != Br_None */
1484 // FakeUse(a) /* only if C1 != Br_None */
1485 // mov a, !<default> /* only if C1 != Br_None */
1486 // label: /* only if C1 != Br_None */
1487 InstFcmp::FCond Condition = Inst->getCondition();
1488 size_t Index = static_cast<size_t>(Condition);
1489 assert(Index < TableFcmpSize);
1490 // The table is indexed by InstFcmp::Condition. Make sure it didn't fall
1491 // out of order.
1492 if (TableFcmp[Index].SwapOperands) {
1493 Operand *Tmp = Src0;
1494 Src0 = Src1;
1495 Src1 = Tmp;
1496 }
1497 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);
1498 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);
1499 if (HasC1) {
1500 Src0 = legalize(Src0);
1501 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
1502 Variable *T = NULL;
1503 _mov(T, Src0);
1504 _ucomiss(T, Src1RM);
1505 }
1506 Constant *Default =
1507 Ctx->getConstantInt(IceType_i32, TableFcmp[Index].Default);
1508 _mov(Dest, Default);
1509 if (HasC1) {
1510 InstX8632Label *Label = InstX8632Label::create(Func, this);
1511 _br(TableFcmp[Index].C1, Label);
1512 if (HasC2) {
1513 _br(TableFcmp[Index].C2, Label);
1514 }
1515 Context.insert(InstFakeUse::create(Func, Dest));
1516 Constant *NonDefault =
1517 Ctx->getConstantInt(IceType_i32, !TableFcmp[Index].Default);
1518 _mov(Dest, NonDefault);
1519 Context.insert(Label);
1520 }
1521 }
1522
1523 void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
1524 Operand *Src0 = legalize(Inst->getSrc(0));
1525 Operand *Src1 = legalize(Inst->getSrc(1));
1526 Variable *Dest = Inst->getDest();
1527
1528 // If Src1 is an immediate, or known to be a physical register, we can
1529 // allow Src0 to be a memory operand. Otherwise, Src0 must be copied into
1530 // a physical register. (Actually, either Src0 or Src1 can be chosen for
1531 // the physical register, but unfortunately we have to commit to one or
1532 // the other before register allocation.)
1533 bool IsSrc1ImmOrReg = false;
1534 if (llvm::isa<Constant>(Src1))
1535 IsSrc1ImmOrReg = true;
1536 else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {
1537 if (Var->hasReg())
1538 IsSrc1ImmOrReg = true;
1539 }
1540
1541 // a=icmp cond, b, c ==> cmp b,c; a=1; br cond,L1; FakeUse(a); a=0; L1:
1542 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1543 Constant *One = Ctx->getConstantInt(IceType_i32, 1);
1544 if (Src0->getType() == IceType_i64) {
1545 InstIcmp::ICond Condition = Inst->getCondition();
1546 size_t Index = static_cast<size_t>(Condition);
1547 assert(Index < TableIcmp64Size);
1548 // The table is indexed by InstIcmp::Condition. Make sure it didn't fall
1549 // out of order.
1550 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
1551 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
1552 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
1553 InstX8632Label *Label = InstX8632Label::create(Func, this);
1554 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
1555 _cmp(loOperand(Src0), Src1LoRI);
1556 _br(InstX8632Br::Br_ne, Label);
1557 _cmp(hiOperand(Src0), Src1HiRI);
1558 _br(InstX8632Br::Br_ne, Label);
1559 Context.insert(InstFakeUse::create(Func, Dest));
1560 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
1561 Context.insert(Label);
1562 } else {
1563 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
1564 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
1565 _mov(Dest, One);
1566 _cmp(hiOperand(Src0), Src1HiRI);
1567 _br(TableIcmp64[Index].C1, LabelTrue);
1568 _br(TableIcmp64[Index].C2, LabelFalse);
1569 _cmp(loOperand(Src0), Src1LoRI);
1570 _br(TableIcmp64[Index].C3, LabelTrue);
1571 Context.insert(LabelFalse);
1572 Context.insert(InstFakeUse::create(Func, Dest));
1573 _mov(Dest, Zero);
1574 Context.insert(LabelTrue);
1575 }
1576 return;
1577 }
1578 // cmp b, c
1579 Operand *Src0New =
1580 legalize(Src0, IsSrc1ImmOrReg ? Legal_All : Legal_Reg, true);
1581 InstX8632Label *Label = InstX8632Label::create(Func, this);
1582 _cmp(Src0New, Src1);
1583 _mov(Dest, One);
1584 _br(getIcmp32Mapping(Inst->getCondition()), Label);
1585 Context.insert(InstFakeUse::create(Func, Dest));
1586 _mov(Dest, Zero);
1587 Context.insert(Label);
1588 }
1589
1590 void TargetX8632::lowerLoad(const InstLoad *Inst) {
1591 // A Load instruction can be treated the same as an Assign
1592 // instruction, after the source operand is transformed into an
1593 // OperandX8632Mem operand. Note that the address mode
1594 // optimization already creates an OperandX8632Mem operand, so it
1595 // doesn't need another level of transformation.
1596 Type Ty = Inst->getDest()->getType();
1597 Operand *Src0 = Inst->getSourceAddress();
1598 // Address mode optimization already creates an OperandX8632Mem
1599 // operand, so it doesn't need another level of transformation.
1600 if (!llvm::isa<OperandX8632Mem>(Src0)) {
1601 Variable *Base = llvm::dyn_cast<Variable>(Src0);
1602 Constant *Offset = llvm::dyn_cast<Constant>(Src0);
1603 assert(Base || Offset);
1604 Src0 = OperandX8632Mem::create(Func, Ty, Base, Offset);
1605 }
1606
1607 InstAssign *Assign = InstAssign::create(Func, Inst->getDest(), Src0);
1608 lowerAssign(Assign);
1609 }
1610
1611 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
1612 Func->setError("Phi lowering not implemented");
1613 }
1614
1615 void TargetX8632::lowerRet(const InstRet *Inst) {
1616 Variable *Reg = NULL;
1617 if (Inst->hasRetValue()) {
1618 Operand *Src0 = legalize(Inst->getRetValue());
1619 if (Src0->getType() == IceType_i64) {
1620 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);
1621 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);
1622 Reg = eax;
1623 Context.insert(InstFakeUse::create(Func, edx));
1624 } else if (Src0->getType() == IceType_f32 ||
1625 Src0->getType() == IceType_f64) {
1626 _fld(Src0);
1627 } else {
1628 _mov(Reg, Src0, Reg_eax);
1629 }
1630 }
1631 _ret(Reg);
1632 // Add a fake use of esp to make sure esp stays alive for the entire
1633 // function. Otherwise post-call esp adjustments get dead-code
1634 // eliminated. TODO: Are there more places where the fake use
1635 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
1636 // have a ret instruction.
1637 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);
1638 Context.insert(InstFakeUse::create(Func, esp));
1639 }
1640
1641 void TargetX8632::lowerSelect(const InstSelect *Inst) {
1642 // a=d?b:c ==> cmp d,0; a=b; jne L1; FakeUse(a); a=c; L1:
1643 Variable *Dest = Inst->getDest();
1644 Operand *SrcT = Inst->getTrueOperand();
1645 Operand *SrcF = Inst->getFalseOperand();
1646 Operand *Condition = legalize(Inst->getCondition());
1647 Constant *Zero = Ctx->getConstantInt(IceType_i32, 0);
1648 InstX8632Label *Label = InstX8632Label::create(Func, this);
1649
1650 if (Dest->getType() == IceType_i64) {
1651 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
1652 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
1653 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
1654 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
1655 _cmp(Condition, Zero);
1656 _mov(DestLo, SrcLoRI);
1657 _mov(DestHi, SrcHiRI);
1658 _br(InstX8632Br::Br_ne, Label);
1659 Context.insert(InstFakeUse::create(Func, DestLo));
1660 Context.insert(InstFakeUse::create(Func, DestHi));
1661 Operand *SrcFLo = loOperand(SrcF);
1662 Operand *SrcFHi = hiOperand(SrcF);
1663 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
1664 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
1665 _mov(DestLo, SrcLoRI);
1666 _mov(DestHi, SrcHiRI);
1667 } else {
1668 _cmp(Condition, Zero);
1669 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
1670 _mov(Dest, SrcT);
1671 _br(InstX8632Br::Br_ne, Label);
1672 Context.insert(InstFakeUse::create(Func, Dest));
1673 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
1674 _mov(Dest, SrcF);
1675 }
1676
1677 Context.insert(Label);
1678 }
1679
1680 void TargetX8632::lowerStore(const InstStore *Inst) {
1681 Operand *Value = Inst->getData();
1682 Operand *Addr = Inst->getAddr();
1683 OperandX8632Mem *NewAddr = llvm::dyn_cast<OperandX8632Mem>(Addr);
1684 // Address mode optimization already creates an OperandX8632Mem
1685 // operand, so it doesn't need another level of transformation.
1686 if (!NewAddr) {
1687 // The address will be either a constant (which represents a global
1688 // variable) or a variable, so either the Base or Offset component
1689 // of the OperandX8632Mem will be set.
1690 Variable *Base = llvm::dyn_cast<Variable>(Addr);
1691 Constant *Offset = llvm::dyn_cast<Constant>(Addr);
1692 assert(Base || Offset);
1693 NewAddr = OperandX8632Mem::create(Func, Value->getType(), Base, Offset);
1694 }
1695 NewAddr = llvm::cast<OperandX8632Mem>(legalize(NewAddr));
1696
1697 if (NewAddr->getType() == IceType_i64) {
1698 Value = legalize(Value);
1699 Operand *ValueHi = legalize(hiOperand(Value), Legal_Reg | Legal_Imm, true);
1700 Operand *ValueLo = legalize(loOperand(Value), Legal_Reg | Legal_Imm, true);
1701 _store(ValueHi, llvm::cast<OperandX8632Mem>(hiOperand(NewAddr)));
1702 _store(ValueLo, llvm::cast<OperandX8632Mem>(loOperand(NewAddr)));
1703 } else {
1704 Value = legalize(Value, Legal_Reg | Legal_Imm, true);
1705 _store(Value, NewAddr);
1706 }
1707 }
1708
1709 void TargetX8632::lowerSwitch(const InstSwitch *Inst) {
1710 // This implements the most naive possible lowering.
1711 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
1712 Operand *Src0 = Inst->getComparison();
1713 SizeT NumCases = Inst->getNumCases();
1714 // OK, we'll be slightly less naive by forcing Src into a physical
1715 // register if there are 2 or more uses.
1716 if (NumCases >= 2)
1717 Src0 = legalizeToVar(Src0, true);
1718 else
1719 Src0 = legalize(Src0, Legal_All, true);
1720 for (SizeT I = 0; I < NumCases; ++I) {
1721 Operand *Value = Ctx->getConstantInt(IceType_i32, Inst->getValue(I));
1722 _cmp(Src0, Value);
1723 _br(InstX8632Br::Br_e, Inst->getLabel(I));
1724 }
1725
1726 _br(Inst->getLabelDefault());
1727 }
1728
1729 void TargetX8632::lowerUnreachable(const InstUnreachable * /*Inst*/) {
1730 const SizeT MaxSrcs = 0;
1731 Variable *Dest = NULL;
1732 InstCall *Call = makeHelperCall("ice_unreachable", Dest, MaxSrcs);
1733 lowerCall(Call);
1734 }
1735
1736 Operand *TargetX8632::legalize(Operand *From, LegalMask Allowed,
1737 bool AllowOverlap, int32_t RegNum) {
1738 assert(Allowed & Legal_Reg);
1739 assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);
1740 if (OperandX8632Mem *Mem = llvm::dyn_cast<OperandX8632Mem>(From)) {
1741 Variable *Base = Mem->getBase();
1742 Variable *Index = Mem->getIndex();
1743 Variable *RegBase = Base;
1744 Variable *RegIndex = Index;
1745 if (Base) {
1746 RegBase = legalizeToVar(Base, true);
1747 }
1748 if (Index) {
1749 RegIndex = legalizeToVar(Index, true);
1750 }
1751 if (Base != RegBase || Index != RegIndex) {
1752 From =
1753 OperandX8632Mem::create(Func, Mem->getType(), RegBase,
1754 Mem->getOffset(), RegIndex, Mem->getShift());
1755 }
1756
1757 if (!(Allowed & Legal_Mem)) {
1758 Variable *Reg = makeReg(From->getType(), RegNum);
1759 _mov(Reg, From, RegNum);
1760 From = Reg;
1761 }
1762 return From;
1763 }
1764 if (llvm::isa<Constant>(From)) {
1765 if (!(Allowed & Legal_Imm)) {
1766 Variable *Reg = makeReg(From->getType(), RegNum);
1767 _mov(Reg, From);
1768 From = Reg;
1769 }
1770 return From;
1771 }
1772 if (Variable *Var = llvm::dyn_cast<Variable>(From)) {
1773 // We need a new physical register for the operand if:
1774 // Mem is not allowed and Var->getRegNum() is unknown, or
1775 // RegNum is required and Var->getRegNum() doesn't match.
1776 if ((!(Allowed & Legal_Mem) && !Var->hasReg()) ||
1777 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {
1778 Variable *Reg = makeReg(From->getType(), RegNum);
1779 if (RegNum == Variable::NoRegister) {
1780 Reg->setPreferredRegister(Var, AllowOverlap);
1781 }
1782 _mov(Reg, From);
1783 From = Reg;
1784 }
1785 return From;
1786 }
1787 llvm_unreachable("Unhandled operand kind in legalize()");
1788 return From;
1789 }
1790
1791 Variable *TargetX8632::legalizeToVar(Operand *From, bool AllowOverlap,
1792 int32_t RegNum) {
1793 return llvm::cast<Variable>(legalize(From, Legal_Reg, AllowOverlap, RegNum));
1794 }
1795
1796 Variable *TargetX8632::makeReg(Type Type, int32_t RegNum) {
1797 Variable *Reg = Func->makeVariable(Type, Context.getNode());
1798 if (RegNum == Variable::NoRegister)
1799 Reg->setWeightInfinite();
1800 else
1801 Reg->setRegNum(RegNum);
1802 return Reg;
1803 }
1804
1805 void TargetX8632::postLower() {
1806 if (Ctx->getOptLevel() != Opt_m1)
1807 return;
1808 // TODO: Avoid recomputing WhiteList every instruction.
1809 llvm::SmallBitVector WhiteList = getRegisterSet(RegSet_All, RegSet_None);
1810 // Make one pass to black-list pre-colored registers. TODO: If
1811 // there was some prior register allocation pass that made register
1812 // assignments, those registers need to be black-listed here as
1813 // well.
1814 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
1815 ++I) {
1816 const Inst *Inst = *I;
1817 if (Inst->isDeleted())
1818 continue;
1819 if (llvm::isa<InstFakeKill>(Inst))
1820 continue;
1821 SizeT VarIndex = 0;
1822 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
1823 Operand *Src = Inst->getSrc(SrcNum);
1824 SizeT NumVars = Src->getNumVars();
1825 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
1826 const Variable *Var = Src->getVar(J);
1827 if (!Var->hasReg())
1828 continue;
1829 WhiteList[Var->getRegNum()] = false;
1830 }
1831 }
1832 }
1833 // The second pass colors infinite-weight variables.
1834 llvm::SmallBitVector AvailableRegisters = WhiteList;
1835 for (InstList::iterator I = Context.getCur(), E = Context.getEnd(); I != E;
1836 ++I) {
1837 const Inst *Inst = *I;
1838 if (Inst->isDeleted())
1839 continue;
1840 SizeT VarIndex = 0;
1841 for (SizeT SrcNum = 0; SrcNum < Inst->getSrcSize(); ++SrcNum) {
1842 Operand *Src = Inst->getSrc(SrcNum);
1843 SizeT NumVars = Src->getNumVars();
1844 for (SizeT J = 0; J < NumVars; ++J, ++VarIndex) {
1845 Variable *Var = Src->getVar(J);
1846 if (Var->hasReg())
1847 continue;
1848 if (!Var->getWeight().isInf())
1849 continue;
1850 llvm::SmallBitVector AvailableTypedRegisters =
1851 AvailableRegisters & getRegisterSetForType(Var->getType());
1852 if (!AvailableTypedRegisters.any()) {
1853 // This is a hack in case we run out of physical registers
1854 // due to an excessive number of "push" instructions from
1855 // lowering a call.
1856 AvailableRegisters = WhiteList;
1857 AvailableTypedRegisters =
1858 AvailableRegisters & getRegisterSetForType(Var->getType());
1859 }
1860 assert(AvailableTypedRegisters.any());
1861 int32_t RegNum = AvailableTypedRegisters.find_first();
1862 Var->setRegNum(RegNum);
1863 AvailableRegisters[RegNum] = false;
1864 }
1865 }
1866 }
1867 }
1868
1869 } // end of namespace Ice
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698