OLD | NEW |
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// | 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// |
2 // | 2 // |
3 // The Subzero Code Generator | 3 // The Subzero Code Generator |
4 // | 4 // |
5 // This file is distributed under the University of Illinois Open Source | 5 // This file is distributed under the University of Illinois Open Source |
6 // License. See LICENSE.TXT for details. | 6 // License. See LICENSE.TXT for details. |
7 // | 7 // |
8 //===----------------------------------------------------------------------===// | 8 //===----------------------------------------------------------------------===// |
9 // | 9 // |
10 // This file implements the TargetLoweringX8632 class, which | 10 // This file implements the TargetLoweringX8632 class, which |
11 // consists almost entirely of the lowering sequence for each | 11 // consists almost entirely of the lowering sequence for each |
12 // high-level instruction. It also implements | 12 // high-level instruction. It also implements |
13 // TargetX8632Fast::postLower() which does the simplest possible | 13 // TargetX8632Fast::postLower() which does the simplest possible |
14 // register allocation for the "fast" target. | 14 // register allocation for the "fast" target. |
15 // | 15 // |
16 //===----------------------------------------------------------------------===// | 16 //===----------------------------------------------------------------------===// |
17 | 17 |
18 #include "IceDefs.h" | 18 #include "IceDefs.h" |
19 #include "IceCfg.h" | 19 #include "IceCfg.h" |
20 #include "IceCfgNode.h" | 20 #include "IceCfgNode.h" |
21 #include "IceClFlags.h" | 21 #include "IceClFlags.h" |
22 #include "IceInstX8632.h" | 22 #include "IceInstX8632.h" |
23 #include "IceOperand.h" | 23 #include "IceOperand.h" |
| 24 #include "IceRegistersX8632.h" |
24 #include "IceTargetLoweringX8632.def" | 25 #include "IceTargetLoweringX8632.def" |
25 #include "IceTargetLoweringX8632.h" | 26 #include "IceTargetLoweringX8632.h" |
26 #include "llvm/ADT/DenseMap.h" | 27 #include "llvm/ADT/DenseMap.h" |
27 #include "llvm/Support/MathExtras.h" | 28 #include "llvm/Support/MathExtras.h" |
28 #include "llvm/Support/CommandLine.h" | 29 #include "llvm/Support/CommandLine.h" |
29 | 30 |
30 namespace Ice { | 31 namespace Ice { |
31 | 32 |
32 namespace { | 33 namespace { |
33 | 34 |
(...skipping 10 matching lines...) Expand all Loading... |
44 // table by hand, good execution tests are helpful. | 45 // table by hand, good execution tests are helpful. |
45 // | 46 // |
46 // The last two columns describe the case when the operands are vectors | 47 // The last two columns describe the case when the operands are vectors |
47 // of floating point values. For most fcmp conditions, there is a clear | 48 // of floating point values. For most fcmp conditions, there is a clear |
48 // mapping to a single x86 cmpps instruction variant. Some fcmp | 49 // mapping to a single x86 cmpps instruction variant. Some fcmp |
49 // conditions require special code to handle and these are marked in the | 50 // conditions require special code to handle and these are marked in the |
50 // table with a Cmpps_Invalid predicate. | 51 // table with a Cmpps_Invalid predicate. |
51 const struct TableFcmp_ { | 52 const struct TableFcmp_ { |
52 uint32_t Default; | 53 uint32_t Default; |
53 bool SwapScalarOperands; | 54 bool SwapScalarOperands; |
54 InstX8632::BrCond C1, C2; | 55 CondX86::BrCond C1, C2; |
55 bool SwapVectorOperands; | 56 bool SwapVectorOperands; |
56 InstX8632Cmpps::CmppsCond Predicate; | 57 CondX86::CmppsCond Predicate; |
57 } TableFcmp[] = { | 58 } TableFcmp[] = { |
58 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ | 59 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
59 { \ | 60 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \ |
60 dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \ | |
61 } \ | |
62 , | 61 , |
63 FCMPX8632_TABLE | 62 FCMPX8632_TABLE |
64 #undef X | 63 #undef X |
65 }; | 64 }; |
66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); | 65 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); |
67 | 66 |
68 // The following table summarizes the logic for lowering the icmp instruction | 67 // The following table summarizes the logic for lowering the icmp instruction |
69 // for i32 and narrower types. Each icmp condition has a clear mapping to an | 68 // for i32 and narrower types. Each icmp condition has a clear mapping to an |
70 // x86 conditional branch instruction. | 69 // x86 conditional branch instruction. |
71 | 70 |
72 const struct TableIcmp32_ { | 71 const struct TableIcmp32_ { |
73 InstX8632::BrCond Mapping; | 72 CondX86::BrCond Mapping; |
74 } TableIcmp32[] = { | 73 } TableIcmp32[] = { |
75 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 74 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
76 { InstX8632Br::C_32 } \ | 75 { CondX86::C_32 } \ |
77 , | 76 , |
78 ICMPX8632_TABLE | 77 ICMPX8632_TABLE |
79 #undef X | 78 #undef X |
80 }; | 79 }; |
81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); | 80 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); |
82 | 81 |
83 // The following table summarizes the logic for lowering the icmp instruction | 82 // The following table summarizes the logic for lowering the icmp instruction |
84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and | 83 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and |
85 // conditional branches are needed. For the other conditions, three separate | 84 // conditional branches are needed. For the other conditions, three separate |
86 // conditional branches are needed. | 85 // conditional branches are needed. |
87 const struct TableIcmp64_ { | 86 const struct TableIcmp64_ { |
88 InstX8632::BrCond C1, C2, C3; | 87 CondX86::BrCond C1, C2, C3; |
89 } TableIcmp64[] = { | 88 } TableIcmp64[] = { |
90 #define X(val, C_32, C1_64, C2_64, C3_64) \ | 89 #define X(val, C_32, C1_64, C2_64, C3_64) \ |
91 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \ | 90 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \ |
92 , | 91 , |
93 ICMPX8632_TABLE | 92 ICMPX8632_TABLE |
94 #undef X | 93 #undef X |
95 }; | 94 }; |
96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); | 95 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); |
97 | 96 |
98 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { | 97 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { |
99 size_t Index = static_cast<size_t>(Cond); | 98 size_t Index = static_cast<size_t>(Cond); |
100 assert(Index < TableIcmp32Size); | 99 assert(Index < TableIcmp32Size); |
101 return TableIcmp32[Index].Mapping; | 100 return TableIcmp32[Index].Mapping; |
102 } | 101 } |
103 | 102 |
104 const struct TableTypeX8632Attributes_ { | 103 const struct TableTypeX8632Attributes_ { |
105 Type InVectorElementType; | 104 Type InVectorElementType; |
106 } TableTypeX8632Attributes[] = { | 105 } TableTypeX8632Attributes[] = { |
107 #define X(tag, elementty, cvt, sdss, pack, width) \ | 106 #define X(tag, elementty, cvt, sdss, pack, width) \ |
108 { elementty } \ | 107 { elementty } \ |
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
257 #undef X | 256 #undef X |
258 } | 257 } |
259 } | 258 } |
260 | 259 |
261 } // end of anonymous namespace | 260 } // end of anonymous namespace |
262 | 261 |
263 TargetX8632::TargetX8632(Cfg *Func) | 262 TargetX8632::TargetX8632(Cfg *Func) |
264 : TargetLowering(Func), InstructionSet(CLInstructionSet), | 263 : TargetLowering(Func), InstructionSet(CLInstructionSet), |
265 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), | 264 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), |
266 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), | 265 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), |
267 PhysicalRegisters(VarList(Reg_NUM)) { | 266 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) { |
268 // TODO: Don't initialize IntegerRegisters and friends every time. | 267 // TODO: Don't initialize IntegerRegisters and friends every time. |
269 // Instead, initialize in some sort of static initializer for the | 268 // Instead, initialize in some sort of static initializer for the |
270 // class. | 269 // class. |
271 llvm::SmallBitVector IntegerRegisters(Reg_NUM); | 270 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM); |
272 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); | 271 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM); |
273 llvm::SmallBitVector FloatRegisters(Reg_NUM); | 272 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM); |
274 llvm::SmallBitVector VectorRegisters(Reg_NUM); | 273 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM); |
275 llvm::SmallBitVector InvalidRegisters(Reg_NUM); | 274 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM); |
276 ScratchRegs.resize(Reg_NUM); | 275 ScratchRegs.resize(RegX8632::Reg_NUM); |
277 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | 276 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
278 frameptr, isI8, isInt, isFP) \ | 277 frameptr, isI8, isInt, isFP) \ |
279 IntegerRegisters[val] = isInt; \ | 278 IntegerRegisters[RegX8632::val] = isInt; \ |
280 IntegerRegistersI8[val] = isI8; \ | 279 IntegerRegistersI8[RegX8632::val] = isI8; \ |
281 FloatRegisters[val] = isFP; \ | 280 FloatRegisters[RegX8632::val] = isFP; \ |
282 VectorRegisters[val] = isFP; \ | 281 VectorRegisters[RegX8632::val] = isFP; \ |
283 ScratchRegs[val] = scratch; | 282 ScratchRegs[RegX8632::val] = scratch; |
284 REGX8632_TABLE; | 283 REGX8632_TABLE; |
285 #undef X | 284 #undef X |
286 TypeToRegisterSet[IceType_void] = InvalidRegisters; | 285 TypeToRegisterSet[IceType_void] = InvalidRegisters; |
287 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; | 286 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; |
288 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; | 287 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; |
289 TypeToRegisterSet[IceType_i16] = IntegerRegisters; | 288 TypeToRegisterSet[IceType_i16] = IntegerRegisters; |
290 TypeToRegisterSet[IceType_i32] = IntegerRegisters; | 289 TypeToRegisterSet[IceType_i32] = IntegerRegisters; |
291 TypeToRegisterSet[IceType_i64] = IntegerRegisters; | 290 TypeToRegisterSet[IceType_i64] = IntegerRegisters; |
292 TypeToRegisterSet[IceType_f32] = FloatRegisters; | 291 TypeToRegisterSet[IceType_f32] = FloatRegisters; |
293 TypeToRegisterSet[IceType_f64] = FloatRegisters; | 292 TypeToRegisterSet[IceType_f64] = FloatRegisters; |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
453 } | 452 } |
454 | 453 |
455 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { | 454 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { |
456 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { | 455 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { |
457 return Br->optimizeBranch(NextNode); | 456 return Br->optimizeBranch(NextNode); |
458 } | 457 } |
459 return false; | 458 return false; |
460 } | 459 } |
461 | 460 |
462 IceString TargetX8632::RegNames[] = { | 461 IceString TargetX8632::RegNames[] = { |
463 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | 462 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
464 frameptr, isI8, isInt, isFP) \ | 463 frameptr, isI8, isInt, isFP) \ |
465 name, | 464 name, |
466 REGX8632_TABLE | 465 REGX8632_TABLE |
467 #undef X | 466 #undef X |
468 }; | 467 }; |
469 | 468 |
470 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) { | 469 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) { |
471 assert(RegNum < PhysicalRegisters.size()); | 470 assert(RegNum < PhysicalRegisters.size()); |
472 Variable *Reg = PhysicalRegisters[RegNum]; | 471 Variable *Reg = PhysicalRegisters[RegNum]; |
473 if (Reg == NULL) { | 472 if (Reg == NULL) { |
474 CfgNode *Node = NULL; // NULL means multi-block lifetime | 473 CfgNode *Node = NULL; // NULL means multi-block lifetime |
475 Reg = Func->makeVariable(IceType_i32, Node); | 474 Reg = Func->makeVariable(IceType_i32, Node); |
476 Reg->setRegNum(RegNum); | 475 Reg->setRegNum(RegNum); |
477 PhysicalRegisters[RegNum] = Reg; | 476 PhysicalRegisters[RegNum] = Reg; |
478 } | 477 } |
479 return Reg; | 478 return Reg; |
480 } | 479 } |
481 | 480 |
482 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { | 481 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { |
483 assert(RegNum < Reg_NUM); | 482 assert(RegNum < RegX8632::Reg_NUM); |
484 static IceString RegNames8[] = { | 483 static IceString RegNames8[] = { |
485 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | 484 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
486 frameptr, isI8, isInt, isFP) \ | 485 frameptr, isI8, isInt, isFP) \ |
487 name8, | 486 name8, |
488 REGX8632_TABLE | 487 REGX8632_TABLE |
489 #undef X | 488 #undef X |
490 }; | 489 }; |
491 static IceString RegNames16[] = { | 490 static IceString RegNames16[] = { |
492 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | 491 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
493 frameptr, isI8, isInt, isFP) \ | 492 frameptr, isI8, isInt, isFP) \ |
494 name16, | 493 name16, |
495 REGX8632_TABLE | 494 REGX8632_TABLE |
496 #undef X | 495 #undef X |
497 }; | 496 }; |
498 switch (Ty) { | 497 switch (Ty) { |
499 case IceType_i1: | 498 case IceType_i1: |
500 case IceType_i8: | 499 case IceType_i8: |
501 return RegNames8[RegNum]; | 500 return RegNames8[RegNum]; |
502 case IceType_i16: | 501 case IceType_i16: |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
539 | 538 |
540 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS; | 539 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS; |
541 ++I) { | 540 ++I) { |
542 Variable *Arg = Args[I]; | 541 Variable *Arg = Args[I]; |
543 Type Ty = Arg->getType(); | 542 Type Ty = Arg->getType(); |
544 if (!isVectorType(Ty)) | 543 if (!isVectorType(Ty)) |
545 continue; | 544 continue; |
546 // Replace Arg in the argument list with the home register. Then | 545 // Replace Arg in the argument list with the home register. Then |
547 // generate an instruction in the prolog to copy the home register | 546 // generate an instruction in the prolog to copy the home register |
548 // to the assigned location of Arg. | 547 // to the assigned location of Arg. |
549 int32_t RegNum = Reg_xmm0 + NumXmmArgs; | 548 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs; |
550 ++NumXmmArgs; | 549 ++NumXmmArgs; |
551 IceString Name = "home_reg:" + Arg->getName(); | 550 IceString Name = "home_reg:" + Arg->getName(); |
552 const CfgNode *DefNode = NULL; | 551 const CfgNode *DefNode = NULL; |
553 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); | 552 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); |
554 RegisterArg->setRegNum(RegNum); | 553 RegisterArg->setRegNum(RegNum); |
555 RegisterArg->setIsArg(Func); | 554 RegisterArg->setIsArg(Func); |
556 Arg->setIsArg(Func, false); | 555 Arg->setIsArg(Func, false); |
557 | 556 |
558 Args[I] = RegisterArg; | 557 Args[I] = RegisterArg; |
559 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); | 558 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); |
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
775 _push(getPhysicalRegister(i), SuppressStackAdjustment); | 774 _push(getPhysicalRegister(i), SuppressStackAdjustment); |
776 } | 775 } |
777 } | 776 } |
778 Ctx->statsUpdateRegistersSaved(NumCallee); | 777 Ctx->statsUpdateRegistersSaved(NumCallee); |
779 | 778 |
780 // Generate "push ebp; mov ebp, esp" | 779 // Generate "push ebp; mov ebp, esp" |
781 if (IsEbpBasedFrame) { | 780 if (IsEbpBasedFrame) { |
782 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) | 781 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) |
783 .count() == 0); | 782 .count() == 0); |
784 PreservedRegsSizeBytes += 4; | 783 PreservedRegsSizeBytes += 4; |
785 Variable *ebp = getPhysicalRegister(Reg_ebp); | 784 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); |
786 Variable *esp = getPhysicalRegister(Reg_esp); | 785 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); |
787 const bool SuppressStackAdjustment = true; | 786 const bool SuppressStackAdjustment = true; |
788 _push(ebp, SuppressStackAdjustment); | 787 _push(ebp, SuppressStackAdjustment); |
789 _mov(ebp, esp); | 788 _mov(ebp, esp); |
790 } | 789 } |
791 | 790 |
792 // Align the variables area. SpillAreaPaddingBytes is the size of | 791 // Align the variables area. SpillAreaPaddingBytes is the size of |
793 // the region after the preserved registers and before the spill | 792 // the region after the preserved registers and before the spill |
794 // areas. | 793 // areas. |
795 uint32_t SpillAreaPaddingBytes = 0; | 794 uint32_t SpillAreaPaddingBytes = 0; |
796 if (SpillAreaAlignmentBytes) { | 795 if (SpillAreaAlignmentBytes) { |
(...skipping 17 matching lines...) Expand all Loading... |
814 | 813 |
815 // Align esp if necessary. | 814 // Align esp if necessary. |
816 if (NeedsStackAlignment) { | 815 if (NeedsStackAlignment) { |
817 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 816 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
818 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); | 817 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); |
819 SpillAreaSizeBytes = StackSize - StackOffset; | 818 SpillAreaSizeBytes = StackSize - StackOffset; |
820 } | 819 } |
821 | 820 |
822 // Generate "sub esp, SpillAreaSizeBytes" | 821 // Generate "sub esp, SpillAreaSizeBytes" |
823 if (SpillAreaSizeBytes) | 822 if (SpillAreaSizeBytes) |
824 _sub(getPhysicalRegister(Reg_esp), | 823 _sub(getPhysicalRegister(RegX8632::Reg_esp), |
825 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); | 824 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); |
826 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); | 825 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); |
827 | 826 |
828 resetStackAdjustment(); | 827 resetStackAdjustment(); |
829 | 828 |
830 // Fill in stack offsets for stack args, and copy args into registers | 829 // Fill in stack offsets for stack args, and copy args into registers |
831 // for those that were register-allocated. Args are pushed right to | 830 // for those that were register-allocated. Args are pushed right to |
832 // left, so Arg[0] is closest to the stack/frame pointer. | 831 // left, so Arg[0] is closest to the stack/frame pointer. |
833 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); | 832 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
834 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; | 833 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; |
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
924 if (RI == E) | 923 if (RI == E) |
925 return; | 924 return; |
926 | 925 |
927 // Convert the reverse_iterator position into its corresponding | 926 // Convert the reverse_iterator position into its corresponding |
928 // (forward) iterator position. | 927 // (forward) iterator position. |
929 InstList::iterator InsertPoint = RI.base(); | 928 InstList::iterator InsertPoint = RI.base(); |
930 --InsertPoint; | 929 --InsertPoint; |
931 Context.init(Node); | 930 Context.init(Node); |
932 Context.setInsertPoint(InsertPoint); | 931 Context.setInsertPoint(InsertPoint); |
933 | 932 |
934 Variable *esp = getPhysicalRegister(Reg_esp); | 933 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); |
935 if (IsEbpBasedFrame) { | 934 if (IsEbpBasedFrame) { |
936 Variable *ebp = getPhysicalRegister(Reg_ebp); | 935 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp); |
937 _mov(esp, ebp); | 936 _mov(esp, ebp); |
938 _pop(ebp); | 937 _pop(ebp); |
939 } else { | 938 } else { |
940 // add esp, SpillAreaSizeBytes | 939 // add esp, SpillAreaSizeBytes |
941 if (SpillAreaSizeBytes) | 940 if (SpillAreaSizeBytes) |
942 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); | 941 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); |
943 } | 942 } |
944 | 943 |
945 // Add pop instructions for preserved registers. | 944 // Add pop instructions for preserved registers. |
946 llvm::SmallBitVector CalleeSaves = | 945 llvm::SmallBitVector CalleeSaves = |
947 getRegisterSet(RegSet_CalleeSave, RegSet_None); | 946 getRegisterSet(RegSet_CalleeSave, RegSet_None); |
948 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { | 947 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { |
949 SizeT j = CalleeSaves.size() - i - 1; | 948 SizeT j = CalleeSaves.size() - i - 1; |
950 if (j == Reg_ebp && IsEbpBasedFrame) | 949 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame) |
951 continue; | 950 continue; |
952 if (CalleeSaves[j] && RegsUsed[j]) { | 951 if (CalleeSaves[j] && RegsUsed[j]) { |
953 _pop(getPhysicalRegister(j)); | 952 _pop(getPhysicalRegister(j)); |
954 } | 953 } |
955 } | 954 } |
956 } | 955 } |
957 | 956 |
958 template <typename T> struct PoolTypeConverter {}; | 957 template <typename T> struct PoolTypeConverter {}; |
959 | 958 |
960 template <> struct PoolTypeConverter<float> { | 959 template <> struct PoolTypeConverter<float> { |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1097 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, | 1096 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, |
1098 Mem->getIndex(), Mem->getShift(), | 1097 Mem->getIndex(), Mem->getShift(), |
1099 Mem->getSegmentRegister()); | 1098 Mem->getSegmentRegister()); |
1100 } | 1099 } |
1101 llvm_unreachable("Unsupported operand type"); | 1100 llvm_unreachable("Unsupported operand type"); |
1102 return NULL; | 1101 return NULL; |
1103 } | 1102 } |
1104 | 1103 |
1105 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, | 1104 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, |
1106 RegSetMask Exclude) const { | 1105 RegSetMask Exclude) const { |
1107 llvm::SmallBitVector Registers(Reg_NUM); | 1106 llvm::SmallBitVector Registers(RegX8632::Reg_NUM); |
1108 | 1107 |
1109 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ | 1108 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \ |
1110 frameptr, isI8, isInt, isFP) \ | 1109 frameptr, isI8, isInt, isFP) \ |
1111 if (scratch && (Include & RegSet_CallerSave)) \ | 1110 if (scratch && (Include & RegSet_CallerSave)) \ |
1112 Registers[val] = true; \ | 1111 Registers[RegX8632::val] = true; \ |
1113 if (preserved && (Include & RegSet_CalleeSave)) \ | 1112 if (preserved && (Include & RegSet_CalleeSave)) \ |
1114 Registers[val] = true; \ | 1113 Registers[RegX8632::val] = true; \ |
1115 if (stackptr && (Include & RegSet_StackPointer)) \ | 1114 if (stackptr && (Include & RegSet_StackPointer)) \ |
1116 Registers[val] = true; \ | 1115 Registers[RegX8632::val] = true; \ |
1117 if (frameptr && (Include & RegSet_FramePointer)) \ | 1116 if (frameptr && (Include & RegSet_FramePointer)) \ |
1118 Registers[val] = true; \ | 1117 Registers[RegX8632::val] = true; \ |
1119 if (scratch && (Exclude & RegSet_CallerSave)) \ | 1118 if (scratch && (Exclude & RegSet_CallerSave)) \ |
1120 Registers[val] = false; \ | 1119 Registers[RegX8632::val] = false; \ |
1121 if (preserved && (Exclude & RegSet_CalleeSave)) \ | 1120 if (preserved && (Exclude & RegSet_CalleeSave)) \ |
1122 Registers[val] = false; \ | 1121 Registers[RegX8632::val] = false; \ |
1123 if (stackptr && (Exclude & RegSet_StackPointer)) \ | 1122 if (stackptr && (Exclude & RegSet_StackPointer)) \ |
1124 Registers[val] = false; \ | 1123 Registers[RegX8632::val] = false; \ |
1125 if (frameptr && (Exclude & RegSet_FramePointer)) \ | 1124 if (frameptr && (Exclude & RegSet_FramePointer)) \ |
1126 Registers[val] = false; | 1125 Registers[RegX8632::val] = false; |
1127 | 1126 |
1128 REGX8632_TABLE | 1127 REGX8632_TABLE |
1129 | 1128 |
1130 #undef X | 1129 #undef X |
1131 | 1130 |
1132 return Registers; | 1131 return Registers; |
1133 } | 1132 } |
1134 | 1133 |
1135 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 1134 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { |
1136 IsEbpBasedFrame = true; | 1135 IsEbpBasedFrame = true; |
1137 // Conservatively require the stack to be aligned. Some stack | 1136 // Conservatively require the stack to be aligned. Some stack |
1138 // adjustment operations implemented below assume that the stack is | 1137 // adjustment operations implemented below assume that the stack is |
1139 // aligned before the alloca. All the alloca code ensures that the | 1138 // aligned before the alloca. All the alloca code ensures that the |
1140 // stack alignment is preserved after the alloca. The stack alignment | 1139 // stack alignment is preserved after the alloca. The stack alignment |
1141 // restriction can be relaxed in some cases. | 1140 // restriction can be relaxed in some cases. |
1142 NeedsStackAlignment = true; | 1141 NeedsStackAlignment = true; |
1143 | 1142 |
1144 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc. | 1143 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc. |
1145 Variable *esp = getPhysicalRegister(Reg_esp); | 1144 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp); |
1146 Operand *TotalSize = legalize(Inst->getSizeInBytes()); | 1145 Operand *TotalSize = legalize(Inst->getSizeInBytes()); |
1147 Variable *Dest = Inst->getDest(); | 1146 Variable *Dest = Inst->getDest(); |
1148 uint32_t AlignmentParam = Inst->getAlignInBytes(); | 1147 uint32_t AlignmentParam = Inst->getAlignInBytes(); |
1149 // For default align=0, set it to the real value 1, to avoid any | 1148 // For default align=0, set it to the real value 1, to avoid any |
1150 // bit-manipulation problems below. | 1149 // bit-manipulation problems below. |
1151 AlignmentParam = std::max(AlignmentParam, 1u); | 1150 AlignmentParam = std::max(AlignmentParam, 1u); |
1152 | 1151 |
1153 // LLVM enforces power of 2 alignment. | 1152 // LLVM enforces power of 2 alignment. |
1154 assert((AlignmentParam & (AlignmentParam - 1)) == 0); | 1153 assert((AlignmentParam & (AlignmentParam - 1)) == 0); |
1155 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); | 1154 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); |
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1226 case InstArithmetic::Sub: | 1225 case InstArithmetic::Sub: |
1227 _mov(T_Lo, Src0Lo); | 1226 _mov(T_Lo, Src0Lo); |
1228 _sub(T_Lo, Src1Lo); | 1227 _sub(T_Lo, Src1Lo); |
1229 _mov(DestLo, T_Lo); | 1228 _mov(DestLo, T_Lo); |
1230 _mov(T_Hi, Src0Hi); | 1229 _mov(T_Hi, Src0Hi); |
1231 _sbb(T_Hi, Src1Hi); | 1230 _sbb(T_Hi, Src1Hi); |
1232 _mov(DestHi, T_Hi); | 1231 _mov(DestHi, T_Hi); |
1233 break; | 1232 break; |
1234 case InstArithmetic::Mul: { | 1233 case InstArithmetic::Mul: { |
1235 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | 1234 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; |
1236 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax); | 1235 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax); |
1237 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx); | 1236 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx); |
1238 // gcc does the following: | 1237 // gcc does the following: |
1239 // a=b*c ==> | 1238 // a=b*c ==> |
1240 // t1 = b.hi; t1 *=(imul) c.lo | 1239 // t1 = b.hi; t1 *=(imul) c.lo |
1241 // t2 = c.hi; t2 *=(imul) b.lo | 1240 // t2 = c.hi; t2 *=(imul) b.lo |
1242 // t3:eax = b.lo | 1241 // t3:eax = b.lo |
1243 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo | 1242 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo |
1244 // a.lo = t4.lo | 1243 // a.lo = t4.lo |
1245 // t4.hi += t1 | 1244 // t4.hi += t1 |
1246 // t4.hi += t2 | 1245 // t4.hi += t2 |
1247 // a.hi = t4.hi | 1246 // a.hi = t4.hi |
1248 // The mul instruction cannot take an immediate operand. | 1247 // The mul instruction cannot take an immediate operand. |
1249 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); | 1248 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); |
1250 _mov(T_1, Src0Hi); | 1249 _mov(T_1, Src0Hi); |
1251 _imul(T_1, Src1Lo); | 1250 _imul(T_1, Src1Lo); |
1252 _mov(T_2, Src1Hi); | 1251 _mov(T_2, Src1Hi); |
1253 _imul(T_2, Src0Lo); | 1252 _imul(T_2, Src0Lo); |
1254 _mov(T_3, Src0Lo, Reg_eax); | 1253 _mov(T_3, Src0Lo, RegX8632::Reg_eax); |
1255 _mul(T_4Lo, T_3, Src1Lo); | 1254 _mul(T_4Lo, T_3, Src1Lo); |
1256 // The mul instruction produces two dest variables, edx:eax. We | 1255 // The mul instruction produces two dest variables, edx:eax. We |
1257 // create a fake definition of edx to account for this. | 1256 // create a fake definition of edx to account for this. |
1258 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); | 1257 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); |
1259 _mov(DestLo, T_4Lo); | 1258 _mov(DestLo, T_4Lo); |
1260 _add(T_4Hi, T_1); | 1259 _add(T_4Hi, T_1); |
1261 _add(T_4Hi, T_2); | 1260 _add(T_4Hi, T_2); |
1262 _mov(DestHi, T_4Hi); | 1261 _mov(DestHi, T_4Hi); |
1263 } break; | 1262 } break; |
1264 case InstArithmetic::Shl: { | 1263 case InstArithmetic::Shl: { |
(...skipping 10 matching lines...) Expand all Loading... |
1275 // use(t3) | 1274 // use(t3) |
1276 // t3 = t2 | 1275 // t3 = t2 |
1277 // t2 = 0 | 1276 // t2 = 0 |
1278 // L1: | 1277 // L1: |
1279 // a.lo = t2 | 1278 // a.lo = t2 |
1280 // a.hi = t3 | 1279 // a.hi = t3 |
1281 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | 1280 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; |
1282 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); | 1281 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); |
1283 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1282 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1284 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1283 InstX8632Label *Label = InstX8632Label::create(Func, this); |
1285 _mov(T_1, Src1Lo, Reg_ecx); | 1284 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); |
1286 _mov(T_2, Src0Lo); | 1285 _mov(T_2, Src0Lo); |
1287 _mov(T_3, Src0Hi); | 1286 _mov(T_3, Src0Hi); |
1288 _shld(T_3, T_2, T_1); | 1287 _shld(T_3, T_2, T_1); |
1289 _shl(T_2, T_1); | 1288 _shl(T_2, T_1); |
1290 _test(T_1, BitTest); | 1289 _test(T_1, BitTest); |
1291 _br(InstX8632Br::Br_e, Label); | 1290 _br(CondX86::Br_e, Label); |
1292 // Because of the intra-block control flow, we need to fake a use | 1291 // Because of the intra-block control flow, we need to fake a use |
1293 // of T_3 to prevent its earlier definition from being dead-code | 1292 // of T_3 to prevent its earlier definition from being dead-code |
1294 // eliminated in the presence of its later definition. | 1293 // eliminated in the presence of its later definition. |
1295 Context.insert(InstFakeUse::create(Func, T_3)); | 1294 Context.insert(InstFakeUse::create(Func, T_3)); |
1296 _mov(T_3, T_2); | 1295 _mov(T_3, T_2); |
1297 _mov(T_2, Zero); | 1296 _mov(T_2, Zero); |
1298 Context.insert(Label); | 1297 Context.insert(Label); |
1299 _mov(DestLo, T_2); | 1298 _mov(DestLo, T_2); |
1300 _mov(DestHi, T_3); | 1299 _mov(DestHi, T_3); |
1301 } break; | 1300 } break; |
1302 case InstArithmetic::Lshr: { | 1301 case InstArithmetic::Lshr: { |
1303 // a=b>>c (unsigned) ==> | 1302 // a=b>>c (unsigned) ==> |
1304 // t1:ecx = c.lo & 0xff | 1303 // t1:ecx = c.lo & 0xff |
1305 // t2 = b.lo | 1304 // t2 = b.lo |
1306 // t3 = b.hi | 1305 // t3 = b.hi |
1307 // t2 = shrd t2, t3, t1 | 1306 // t2 = shrd t2, t3, t1 |
1308 // t3 = shr t3, t1 | 1307 // t3 = shr t3, t1 |
1309 // test t1, 0x20 | 1308 // test t1, 0x20 |
1310 // je L1 | 1309 // je L1 |
1311 // use(t2) | 1310 // use(t2) |
1312 // t2 = t3 | 1311 // t2 = t3 |
1313 // t3 = 0 | 1312 // t3 = 0 |
1314 // L1: | 1313 // L1: |
1315 // a.lo = t2 | 1314 // a.lo = t2 |
1316 // a.hi = t3 | 1315 // a.hi = t3 |
1317 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | 1316 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; |
1318 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); | 1317 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); |
1319 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1318 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1320 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1319 InstX8632Label *Label = InstX8632Label::create(Func, this); |
1321 _mov(T_1, Src1Lo, Reg_ecx); | 1320 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); |
1322 _mov(T_2, Src0Lo); | 1321 _mov(T_2, Src0Lo); |
1323 _mov(T_3, Src0Hi); | 1322 _mov(T_3, Src0Hi); |
1324 _shrd(T_2, T_3, T_1); | 1323 _shrd(T_2, T_3, T_1); |
1325 _shr(T_3, T_1); | 1324 _shr(T_3, T_1); |
1326 _test(T_1, BitTest); | 1325 _test(T_1, BitTest); |
1327 _br(InstX8632Br::Br_e, Label); | 1326 _br(CondX86::Br_e, Label); |
1328 // Because of the intra-block control flow, we need to fake a use | 1327 // Because of the intra-block control flow, we need to fake a use |
1329 // of T_3 to prevent its earlier definition from being dead-code | 1328 // of T_3 to prevent its earlier definition from being dead-code |
1330 // eliminated in the presence of its later definition. | 1329 // eliminated in the presence of its later definition. |
1331 Context.insert(InstFakeUse::create(Func, T_2)); | 1330 Context.insert(InstFakeUse::create(Func, T_2)); |
1332 _mov(T_2, T_3); | 1331 _mov(T_2, T_3); |
1333 _mov(T_3, Zero); | 1332 _mov(T_3, Zero); |
1334 Context.insert(Label); | 1333 Context.insert(Label); |
1335 _mov(DestLo, T_2); | 1334 _mov(DestLo, T_2); |
1336 _mov(DestHi, T_3); | 1335 _mov(DestHi, T_3); |
1337 } break; | 1336 } break; |
1338 case InstArithmetic::Ashr: { | 1337 case InstArithmetic::Ashr: { |
1339 // a=b>>c (signed) ==> | 1338 // a=b>>c (signed) ==> |
1340 // t1:ecx = c.lo & 0xff | 1339 // t1:ecx = c.lo & 0xff |
1341 // t2 = b.lo | 1340 // t2 = b.lo |
1342 // t3 = b.hi | 1341 // t3 = b.hi |
1343 // t2 = shrd t2, t3, t1 | 1342 // t2 = shrd t2, t3, t1 |
1344 // t3 = sar t3, t1 | 1343 // t3 = sar t3, t1 |
1345 // test t1, 0x20 | 1344 // test t1, 0x20 |
1346 // je L1 | 1345 // je L1 |
1347 // use(t2) | 1346 // use(t2) |
1348 // t2 = t3 | 1347 // t2 = t3 |
1349 // t3 = sar t3, 0x1f | 1348 // t3 = sar t3, 0x1f |
1350 // L1: | 1349 // L1: |
1351 // a.lo = t2 | 1350 // a.lo = t2 |
1352 // a.hi = t3 | 1351 // a.hi = t3 |
1353 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; | 1352 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; |
1354 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); | 1353 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); |
1355 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f); | 1354 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f); |
1356 InstX8632Label *Label = InstX8632Label::create(Func, this); | 1355 InstX8632Label *Label = InstX8632Label::create(Func, this); |
1357 _mov(T_1, Src1Lo, Reg_ecx); | 1356 _mov(T_1, Src1Lo, RegX8632::Reg_ecx); |
1358 _mov(T_2, Src0Lo); | 1357 _mov(T_2, Src0Lo); |
1359 _mov(T_3, Src0Hi); | 1358 _mov(T_3, Src0Hi); |
1360 _shrd(T_2, T_3, T_1); | 1359 _shrd(T_2, T_3, T_1); |
1361 _sar(T_3, T_1); | 1360 _sar(T_3, T_1); |
1362 _test(T_1, BitTest); | 1361 _test(T_1, BitTest); |
1363 _br(InstX8632Br::Br_e, Label); | 1362 _br(CondX86::Br_e, Label); |
1364 // Because of the intra-block control flow, we need to fake a use | 1363 // Because of the intra-block control flow, we need to fake a use |
1365 // of T_3 to prevent its earlier definition from being dead-code | 1364 // of T_3 to prevent its earlier definition from being dead-code |
1366 // eliminated in the presence of its later definition. | 1365 // eliminated in the presence of its later definition. |
1367 Context.insert(InstFakeUse::create(Func, T_2)); | 1366 Context.insert(InstFakeUse::create(Func, T_2)); |
1368 _mov(T_2, T_3); | 1367 _mov(T_2, T_3); |
1369 _sar(T_3, SignExtend); | 1368 _sar(T_3, SignExtend); |
1370 Context.insert(Label); | 1369 Context.insert(Label); |
1371 _mov(DestLo, T_2); | 1370 _mov(DestLo, T_2); |
1372 _mov(DestHi, T_3); | 1371 _mov(DestHi, T_3); |
1373 } break; | 1372 } break; |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1568 break; | 1567 break; |
1569 case InstArithmetic::Mul: | 1568 case InstArithmetic::Mul: |
1570 // TODO: Optimize for llvm::isa<Constant>(Src1) | 1569 // TODO: Optimize for llvm::isa<Constant>(Src1) |
1571 // TODO: Strength-reduce multiplications by a constant, | 1570 // TODO: Strength-reduce multiplications by a constant, |
1572 // particularly -1 and powers of 2. Advanced: use lea to | 1571 // particularly -1 and powers of 2. Advanced: use lea to |
1573 // multiply by 3, 5, 9. | 1572 // multiply by 3, 5, 9. |
1574 // | 1573 // |
1575 // The 8-bit version of imul only allows the form "imul r/m8" | 1574 // The 8-bit version of imul only allows the form "imul r/m8" |
1576 // where T must be in eax. | 1575 // where T must be in eax. |
1577 if (Dest->getType() == IceType_i8) | 1576 if (Dest->getType() == IceType_i8) |
1578 _mov(T, Src0, Reg_eax); | 1577 _mov(T, Src0, RegX8632::Reg_eax); |
1579 else | 1578 else |
1580 _mov(T, Src0); | 1579 _mov(T, Src0); |
1581 _imul(T, Src1); | 1580 _imul(T, Src1); |
1582 _mov(Dest, T); | 1581 _mov(Dest, T); |
1583 break; | 1582 break; |
1584 case InstArithmetic::Shl: | 1583 case InstArithmetic::Shl: |
1585 _mov(T, Src0); | 1584 _mov(T, Src0); |
1586 if (!llvm::isa<Constant>(Src1)) | 1585 if (!llvm::isa<Constant>(Src1)) |
1587 Src1 = legalizeToVar(Src1, false, Reg_ecx); | 1586 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx); |
1588 _shl(T, Src1); | 1587 _shl(T, Src1); |
1589 _mov(Dest, T); | 1588 _mov(Dest, T); |
1590 break; | 1589 break; |
1591 case InstArithmetic::Lshr: | 1590 case InstArithmetic::Lshr: |
1592 _mov(T, Src0); | 1591 _mov(T, Src0); |
1593 if (!llvm::isa<Constant>(Src1)) | 1592 if (!llvm::isa<Constant>(Src1)) |
1594 Src1 = legalizeToVar(Src1, false, Reg_ecx); | 1593 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx); |
1595 _shr(T, Src1); | 1594 _shr(T, Src1); |
1596 _mov(Dest, T); | 1595 _mov(Dest, T); |
1597 break; | 1596 break; |
1598 case InstArithmetic::Ashr: | 1597 case InstArithmetic::Ashr: |
1599 _mov(T, Src0); | 1598 _mov(T, Src0); |
1600 if (!llvm::isa<Constant>(Src1)) | 1599 if (!llvm::isa<Constant>(Src1)) |
1601 Src1 = legalizeToVar(Src1, false, Reg_ecx); | 1600 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx); |
1602 _sar(T, Src1); | 1601 _sar(T, Src1); |
1603 _mov(Dest, T); | 1602 _mov(Dest, T); |
1604 break; | 1603 break; |
1605 case InstArithmetic::Udiv: | 1604 case InstArithmetic::Udiv: |
1606 // div and idiv are the few arithmetic operators that do not allow | 1605 // div and idiv are the few arithmetic operators that do not allow |
1607 // immediates as the operand. | 1606 // immediates as the operand. |
1608 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1607 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1609 if (Dest->getType() == IceType_i8) { | 1608 if (Dest->getType() == IceType_i8) { |
1610 Variable *T_ah = NULL; | 1609 Variable *T_ah = NULL; |
1611 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1610 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
1612 _mov(T, Src0, Reg_eax); | 1611 _mov(T, Src0, RegX8632::Reg_eax); |
1613 _mov(T_ah, Zero, Reg_ah); | 1612 _mov(T_ah, Zero, RegX8632::Reg_ah); |
1614 _div(T, Src1, T_ah); | 1613 _div(T, Src1, T_ah); |
1615 _mov(Dest, T); | 1614 _mov(Dest, T); |
1616 } else { | 1615 } else { |
1617 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1616 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1618 _mov(T, Src0, Reg_eax); | 1617 _mov(T, Src0, RegX8632::Reg_eax); |
1619 _mov(T_edx, Zero, Reg_edx); | 1618 _mov(T_edx, Zero, RegX8632::Reg_edx); |
1620 _div(T, Src1, T_edx); | 1619 _div(T, Src1, T_edx); |
1621 _mov(Dest, T); | 1620 _mov(Dest, T); |
1622 } | 1621 } |
1623 break; | 1622 break; |
1624 case InstArithmetic::Sdiv: | 1623 case InstArithmetic::Sdiv: |
1625 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1624 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1626 if (Dest->getType() == IceType_i8) { | 1625 if (Dest->getType() == IceType_i8) { |
1627 _mov(T, Src0, Reg_eax); | 1626 _mov(T, Src0, RegX8632::Reg_eax); |
1628 _cbwdq(T, T); | 1627 _cbwdq(T, T); |
1629 _idiv(T, Src1, T); | 1628 _idiv(T, Src1, T); |
1630 _mov(Dest, T); | 1629 _mov(Dest, T); |
1631 } else { | 1630 } else { |
1632 T_edx = makeReg(IceType_i32, Reg_edx); | 1631 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
1633 _mov(T, Src0, Reg_eax); | 1632 _mov(T, Src0, RegX8632::Reg_eax); |
1634 _cbwdq(T_edx, T); | 1633 _cbwdq(T_edx, T); |
1635 _idiv(T, Src1, T_edx); | 1634 _idiv(T, Src1, T_edx); |
1636 _mov(Dest, T); | 1635 _mov(Dest, T); |
1637 } | 1636 } |
1638 break; | 1637 break; |
1639 case InstArithmetic::Urem: | 1638 case InstArithmetic::Urem: |
1640 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1639 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1641 if (Dest->getType() == IceType_i8) { | 1640 if (Dest->getType() == IceType_i8) { |
1642 Variable *T_ah = NULL; | 1641 Variable *T_ah = NULL; |
1643 Constant *Zero = Ctx->getConstantZero(IceType_i8); | 1642 Constant *Zero = Ctx->getConstantZero(IceType_i8); |
1644 _mov(T, Src0, Reg_eax); | 1643 _mov(T, Src0, RegX8632::Reg_eax); |
1645 _mov(T_ah, Zero, Reg_ah); | 1644 _mov(T_ah, Zero, RegX8632::Reg_ah); |
1646 _div(T_ah, Src1, T); | 1645 _div(T_ah, Src1, T); |
1647 _mov(Dest, T_ah); | 1646 _mov(Dest, T_ah); |
1648 } else { | 1647 } else { |
1649 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1648 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1650 _mov(T_edx, Zero, Reg_edx); | 1649 _mov(T_edx, Zero, RegX8632::Reg_edx); |
1651 _mov(T, Src0, Reg_eax); | 1650 _mov(T, Src0, RegX8632::Reg_eax); |
1652 _div(T_edx, Src1, T); | 1651 _div(T_edx, Src1, T); |
1653 _mov(Dest, T_edx); | 1652 _mov(Dest, T_edx); |
1654 } | 1653 } |
1655 break; | 1654 break; |
1656 case InstArithmetic::Srem: | 1655 case InstArithmetic::Srem: |
1657 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); | 1656 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); |
1658 if (Dest->getType() == IceType_i8) { | 1657 if (Dest->getType() == IceType_i8) { |
1659 Variable *T_ah = makeReg(IceType_i8, Reg_ah); | 1658 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah); |
1660 _mov(T, Src0, Reg_eax); | 1659 _mov(T, Src0, RegX8632::Reg_eax); |
1661 _cbwdq(T, T); | 1660 _cbwdq(T, T); |
1662 Context.insert(InstFakeDef::create(Func, T_ah)); | 1661 Context.insert(InstFakeDef::create(Func, T_ah)); |
1663 _idiv(T_ah, Src1, T); | 1662 _idiv(T_ah, Src1, T); |
1664 _mov(Dest, T_ah); | 1663 _mov(Dest, T_ah); |
1665 } else { | 1664 } else { |
1666 T_edx = makeReg(IceType_i32, Reg_edx); | 1665 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
1667 _mov(T, Src0, Reg_eax); | 1666 _mov(T, Src0, RegX8632::Reg_eax); |
1668 _cbwdq(T_edx, T); | 1667 _cbwdq(T_edx, T); |
1669 _idiv(T_edx, Src1, T); | 1668 _idiv(T_edx, Src1, T); |
1670 _mov(Dest, T_edx); | 1669 _mov(Dest, T_edx); |
1671 } | 1670 } |
1672 break; | 1671 break; |
1673 case InstArithmetic::Fadd: | 1672 case InstArithmetic::Fadd: |
1674 _mov(T, Src0); | 1673 _mov(T, Src0); |
1675 _addss(T, Src1); | 1674 _addss(T, Src1); |
1676 _mov(Dest, T); | 1675 _mov(Dest, T); |
1677 break; | 1676 break; |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1729 } | 1728 } |
1730 } | 1729 } |
1731 | 1730 |
1732 void TargetX8632::lowerBr(const InstBr *Inst) { | 1731 void TargetX8632::lowerBr(const InstBr *Inst) { |
1733 if (Inst->isUnconditional()) { | 1732 if (Inst->isUnconditional()) { |
1734 _br(Inst->getTargetUnconditional()); | 1733 _br(Inst->getTargetUnconditional()); |
1735 } else { | 1734 } else { |
1736 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem); | 1735 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem); |
1737 Constant *Zero = Ctx->getConstantZero(IceType_i32); | 1736 Constant *Zero = Ctx->getConstantZero(IceType_i32); |
1738 _cmp(Src0, Zero); | 1737 _cmp(Src0, Zero); |
1739 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); | 1738 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); |
1740 } | 1739 } |
1741 } | 1740 } |
1742 | 1741 |
1743 void TargetX8632::lowerCall(const InstCall *Instr) { | 1742 void TargetX8632::lowerCall(const InstCall *Instr) { |
1744 // x86-32 calling convention: | 1743 // x86-32 calling convention: |
1745 // | 1744 // |
1746 // * At the point before the call, the stack must be aligned to 16 | 1745 // * At the point before the call, the stack must be aligned to 16 |
1747 // bytes. | 1746 // bytes. |
1748 // | 1747 // |
1749 // * The first four arguments of vector type, regardless of their | 1748 // * The first four arguments of vector type, regardless of their |
(...skipping 25 matching lines...) Expand all Loading... |
1775 // The PNaCl ABI requires the width of arguments to be at least 32 bits. | 1774 // The PNaCl ABI requires the width of arguments to be at least 32 bits. |
1776 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || | 1775 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || |
1777 Ty == IceType_f64 || isVectorType(Ty)); | 1776 Ty == IceType_f64 || isVectorType(Ty)); |
1778 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { | 1777 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { |
1779 XmmArgs.push_back(Arg); | 1778 XmmArgs.push_back(Arg); |
1780 } else { | 1779 } else { |
1781 StackArgs.push_back(Arg); | 1780 StackArgs.push_back(Arg); |
1782 if (isVectorType(Arg->getType())) { | 1781 if (isVectorType(Arg->getType())) { |
1783 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); | 1782 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); |
1784 } | 1783 } |
1785 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 1784 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
1786 Constant *Loc = | 1785 Constant *Loc = |
1787 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes); | 1786 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes); |
1788 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); | 1787 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); |
1789 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); | 1788 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); |
1790 } | 1789 } |
1791 } | 1790 } |
1792 | 1791 |
1793 // Adjust the parameter area so that the stack is aligned. It is | 1792 // Adjust the parameter area so that the stack is aligned. It is |
1794 // assumed that the stack is already aligned at the start of the | 1793 // assumed that the stack is already aligned at the start of the |
1795 // calling sequence. | 1794 // calling sequence. |
(...skipping 21 matching lines...) Expand all Loading... |
1817 // Copy arguments to be passed in registers to the appropriate | 1816 // Copy arguments to be passed in registers to the appropriate |
1818 // registers. | 1817 // registers. |
1819 // TODO: Investigate the impact of lowering arguments passed in | 1818 // TODO: Investigate the impact of lowering arguments passed in |
1820 // registers after lowering stack arguments as opposed to the other | 1819 // registers after lowering stack arguments as opposed to the other |
1821 // way around. Lowering register arguments after stack arguments may | 1820 // way around. Lowering register arguments after stack arguments may |
1822 // reduce register pressure. On the other hand, lowering register | 1821 // reduce register pressure. On the other hand, lowering register |
1823 // arguments first (before stack arguments) may result in more compact | 1822 // arguments first (before stack arguments) may result in more compact |
1824 // code, as the memory operand displacements may end up being smaller | 1823 // code, as the memory operand displacements may end up being smaller |
1825 // before any stack adjustment is done. | 1824 // before any stack adjustment is done. |
1826 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { | 1825 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
1827 Variable *Reg = legalizeToVar(XmmArgs[i], false, Reg_xmm0 + i); | 1826 Variable *Reg = legalizeToVar(XmmArgs[i], false, RegX8632::Reg_xmm0 + i); |
1828 // Generate a FakeUse of register arguments so that they do not get | 1827 // Generate a FakeUse of register arguments so that they do not get |
1829 // dead code eliminated as a result of the FakeKill of scratch | 1828 // dead code eliminated as a result of the FakeKill of scratch |
1830 // registers after the call. | 1829 // registers after the call. |
1831 Context.insert(InstFakeUse::create(Func, Reg)); | 1830 Context.insert(InstFakeUse::create(Func, Reg)); |
1832 } | 1831 } |
1833 // Generate the call instruction. Assign its result to a temporary | 1832 // Generate the call instruction. Assign its result to a temporary |
1834 // with high register allocation weight. | 1833 // with high register allocation weight. |
1835 Variable *Dest = Instr->getDest(); | 1834 Variable *Dest = Instr->getDest(); |
1836 // ReturnReg doubles as ReturnRegLo as necessary. | 1835 // ReturnReg doubles as ReturnRegLo as necessary. |
1837 Variable *ReturnReg = NULL; | 1836 Variable *ReturnReg = NULL; |
1838 Variable *ReturnRegHi = NULL; | 1837 Variable *ReturnRegHi = NULL; |
1839 if (Dest) { | 1838 if (Dest) { |
1840 switch (Dest->getType()) { | 1839 switch (Dest->getType()) { |
1841 case IceType_NUM: | 1840 case IceType_NUM: |
1842 llvm_unreachable("Invalid Call dest type"); | 1841 llvm_unreachable("Invalid Call dest type"); |
1843 break; | 1842 break; |
1844 case IceType_void: | 1843 case IceType_void: |
1845 break; | 1844 break; |
1846 case IceType_i1: | 1845 case IceType_i1: |
1847 case IceType_i8: | 1846 case IceType_i8: |
1848 case IceType_i16: | 1847 case IceType_i16: |
1849 case IceType_i32: | 1848 case IceType_i32: |
1850 ReturnReg = makeReg(Dest->getType(), Reg_eax); | 1849 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax); |
1851 break; | 1850 break; |
1852 case IceType_i64: | 1851 case IceType_i64: |
1853 ReturnReg = makeReg(IceType_i32, Reg_eax); | 1852 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax); |
1854 ReturnRegHi = makeReg(IceType_i32, Reg_edx); | 1853 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx); |
1855 break; | 1854 break; |
1856 case IceType_f32: | 1855 case IceType_f32: |
1857 case IceType_f64: | 1856 case IceType_f64: |
1858 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with | 1857 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with |
1859 // the fstp instruction. | 1858 // the fstp instruction. |
1860 break; | 1859 break; |
1861 case IceType_v4i1: | 1860 case IceType_v4i1: |
1862 case IceType_v8i1: | 1861 case IceType_v8i1: |
1863 case IceType_v16i1: | 1862 case IceType_v16i1: |
1864 case IceType_v16i8: | 1863 case IceType_v16i8: |
1865 case IceType_v8i16: | 1864 case IceType_v8i16: |
1866 case IceType_v4i32: | 1865 case IceType_v4i32: |
1867 case IceType_v4f32: | 1866 case IceType_v4f32: |
1868 ReturnReg = makeReg(Dest->getType(), Reg_xmm0); | 1867 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0); |
1869 break; | 1868 break; |
1870 } | 1869 } |
1871 } | 1870 } |
1872 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once | 1871 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once |
1873 // a proper emitter is used. | 1872 // a proper emitter is used. |
1874 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); | 1873 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); |
1875 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); | 1874 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); |
1876 Context.insert(NewCall); | 1875 Context.insert(NewCall); |
1877 if (ReturnRegHi) | 1876 if (ReturnRegHi) |
1878 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); | 1877 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
1879 | 1878 |
1880 // Add the appropriate offset to esp. The call instruction takes care | 1879 // Add the appropriate offset to esp. The call instruction takes care |
1881 // of resetting the stack offset during emission. | 1880 // of resetting the stack offset during emission. |
1882 if (ParameterAreaSizeBytes) { | 1881 if (ParameterAreaSizeBytes) { |
1883 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 1882 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
1884 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes)); | 1883 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes)); |
1885 } | 1884 } |
1886 | 1885 |
1887 // Insert a register-kill pseudo instruction. | 1886 // Insert a register-kill pseudo instruction. |
1888 VarList KilledRegs; | 1887 VarList KilledRegs; |
1889 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { | 1888 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { |
1890 if (ScratchRegs[i]) | 1889 if (ScratchRegs[i]) |
1891 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); | 1890 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); |
1892 } | 1891 } |
1893 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); | 1892 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); |
(...skipping 579 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2473 // makeVectorOfOnes() requires an integer vector type. | 2472 // makeVectorOfOnes() requires an integer vector type. |
2474 T = makeVectorOfMinusOnes(IceType_v4i32); | 2473 T = makeVectorOfMinusOnes(IceType_v4i32); |
2475 } else if (Condition == InstFcmp::False) { | 2474 } else if (Condition == InstFcmp::False) { |
2476 T = makeVectorOfZeros(Dest->getType()); | 2475 T = makeVectorOfZeros(Dest->getType()); |
2477 } else { | 2476 } else { |
2478 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 2477 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
2479 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2478 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
2480 | 2479 |
2481 switch (Condition) { | 2480 switch (Condition) { |
2482 default: { | 2481 default: { |
2483 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; | 2482 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate; |
2484 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); | 2483 assert(Predicate != CondX86::Cmpps_Invalid); |
2485 T = makeReg(Src0RM->getType()); | 2484 T = makeReg(Src0RM->getType()); |
2486 _movp(T, Src0RM); | 2485 _movp(T, Src0RM); |
2487 _cmpps(T, Src1RM, Predicate); | 2486 _cmpps(T, Src1RM, Predicate); |
2488 } break; | 2487 } break; |
2489 case InstFcmp::One: { | 2488 case InstFcmp::One: { |
2490 // Check both unequal and ordered. | 2489 // Check both unequal and ordered. |
2491 T = makeReg(Src0RM->getType()); | 2490 T = makeReg(Src0RM->getType()); |
2492 Variable *T2 = makeReg(Src0RM->getType()); | 2491 Variable *T2 = makeReg(Src0RM->getType()); |
2493 _movp(T, Src0RM); | 2492 _movp(T, Src0RM); |
2494 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); | 2493 _cmpps(T, Src1RM, CondX86::Cmpps_neq); |
2495 _movp(T2, Src0RM); | 2494 _movp(T2, Src0RM); |
2496 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); | 2495 _cmpps(T2, Src1RM, CondX86::Cmpps_ord); |
2497 _pand(T, T2); | 2496 _pand(T, T2); |
2498 } break; | 2497 } break; |
2499 case InstFcmp::Ueq: { | 2498 case InstFcmp::Ueq: { |
2500 // Check both equal or unordered. | 2499 // Check both equal or unordered. |
2501 T = makeReg(Src0RM->getType()); | 2500 T = makeReg(Src0RM->getType()); |
2502 Variable *T2 = makeReg(Src0RM->getType()); | 2501 Variable *T2 = makeReg(Src0RM->getType()); |
2503 _movp(T, Src0RM); | 2502 _movp(T, Src0RM); |
2504 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); | 2503 _cmpps(T, Src1RM, CondX86::Cmpps_eq); |
2505 _movp(T2, Src0RM); | 2504 _movp(T2, Src0RM); |
2506 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); | 2505 _cmpps(T2, Src1RM, CondX86::Cmpps_unord); |
2507 _por(T, T2); | 2506 _por(T, T2); |
2508 } break; | 2507 } break; |
2509 } | 2508 } |
2510 } | 2509 } |
2511 | 2510 |
2512 _movp(Dest, T); | 2511 _movp(Dest, T); |
2513 eliminateNextVectorSextInstruction(Dest); | 2512 eliminateNextVectorSextInstruction(Dest); |
2514 return; | 2513 return; |
2515 } | 2514 } |
2516 | 2515 |
2517 // Lowering a = fcmp cond, b, c | 2516 // Lowering a = fcmp cond, b, c |
2518 // ucomiss b, c /* only if C1 != Br_None */ | 2517 // ucomiss b, c /* only if C1 != Br_None */ |
2519 // /* but swap b,c order if SwapOperands==true */ | 2518 // /* but swap b,c order if SwapOperands==true */ |
2520 // mov a, <default> | 2519 // mov a, <default> |
2521 // j<C1> label /* only if C1 != Br_None */ | 2520 // j<C1> label /* only if C1 != Br_None */ |
2522 // j<C2> label /* only if C2 != Br_None */ | 2521 // j<C2> label /* only if C2 != Br_None */ |
2523 // FakeUse(a) /* only if C1 != Br_None */ | 2522 // FakeUse(a) /* only if C1 != Br_None */ |
2524 // mov a, !<default> /* only if C1 != Br_None */ | 2523 // mov a, !<default> /* only if C1 != Br_None */ |
2525 // label: /* only if C1 != Br_None */ | 2524 // label: /* only if C1 != Br_None */ |
2526 InstFcmp::FCond Condition = Inst->getCondition(); | 2525 InstFcmp::FCond Condition = Inst->getCondition(); |
2527 size_t Index = static_cast<size_t>(Condition); | 2526 size_t Index = static_cast<size_t>(Condition); |
2528 assert(Index < TableFcmpSize); | 2527 assert(Index < TableFcmpSize); |
2529 if (TableFcmp[Index].SwapScalarOperands) { | 2528 if (TableFcmp[Index].SwapScalarOperands) { |
2530 Operand *Tmp = Src0; | 2529 Operand *Tmp = Src0; |
2531 Src0 = Src1; | 2530 Src0 = Src1; |
2532 Src1 = Tmp; | 2531 Src1 = Tmp; |
2533 } | 2532 } |
2534 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); | 2533 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None); |
2535 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); | 2534 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None); |
2536 if (HasC1) { | 2535 if (HasC1) { |
2537 Src0 = legalize(Src0); | 2536 Src0 = legalize(Src0); |
2538 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 2537 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
2539 Variable *T = NULL; | 2538 Variable *T = NULL; |
2540 _mov(T, Src0); | 2539 _mov(T, Src0); |
2541 _ucomiss(T, Src1RM); | 2540 _ucomiss(T, Src1RM); |
2542 } | 2541 } |
2543 Constant *Default = | 2542 Constant *Default = |
2544 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default); | 2543 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default); |
2545 _mov(Dest, Default); | 2544 _mov(Dest, Default); |
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2698 size_t Index = static_cast<size_t>(Condition); | 2697 size_t Index = static_cast<size_t>(Condition); |
2699 assert(Index < TableIcmp64Size); | 2698 assert(Index < TableIcmp64Size); |
2700 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); | 2699 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); |
2701 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); | 2700 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); |
2702 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); | 2701 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); |
2703 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); | 2702 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); |
2704 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { | 2703 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { |
2705 InstX8632Label *Label = InstX8632Label::create(Func, this); | 2704 InstX8632Label *Label = InstX8632Label::create(Func, this); |
2706 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); | 2705 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); |
2707 _cmp(Src0LoRM, Src1LoRI); | 2706 _cmp(Src0LoRM, Src1LoRI); |
2708 _br(InstX8632Br::Br_ne, Label); | 2707 _br(CondX86::Br_ne, Label); |
2709 _cmp(Src0HiRM, Src1HiRI); | 2708 _cmp(Src0HiRM, Src1HiRI); |
2710 _br(InstX8632Br::Br_ne, Label); | 2709 _br(CondX86::Br_ne, Label); |
2711 Context.insert(InstFakeUse::create(Func, Dest)); | 2710 Context.insert(InstFakeUse::create(Func, Dest)); |
2712 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); | 2711 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); |
2713 Context.insert(Label); | 2712 Context.insert(Label); |
2714 } else { | 2713 } else { |
2715 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); | 2714 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); |
2716 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); | 2715 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); |
2717 _mov(Dest, One); | 2716 _mov(Dest, One); |
2718 _cmp(Src0HiRM, Src1HiRI); | 2717 _cmp(Src0HiRM, Src1HiRI); |
2719 _br(TableIcmp64[Index].C1, LabelTrue); | 2718 _br(TableIcmp64[Index].C1, LabelTrue); |
2720 _br(TableIcmp64[Index].C2, LabelFalse); | 2719 _br(TableIcmp64[Index].C2, LabelFalse); |
(...skipping 432 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3153 } | 3152 } |
3154 case Intrinsics::Sqrt: { | 3153 case Intrinsics::Sqrt: { |
3155 Operand *Src = legalize(Instr->getArg(0)); | 3154 Operand *Src = legalize(Instr->getArg(0)); |
3156 Variable *Dest = Instr->getDest(); | 3155 Variable *Dest = Instr->getDest(); |
3157 Variable *T = makeReg(Dest->getType()); | 3156 Variable *T = makeReg(Dest->getType()); |
3158 _sqrtss(T, Src); | 3157 _sqrtss(T, Src); |
3159 _mov(Dest, T); | 3158 _mov(Dest, T); |
3160 return; | 3159 return; |
3161 } | 3160 } |
3162 case Intrinsics::Stacksave: { | 3161 case Intrinsics::Stacksave: { |
3163 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 3162 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
3164 Variable *Dest = Instr->getDest(); | 3163 Variable *Dest = Instr->getDest(); |
3165 _mov(Dest, esp); | 3164 _mov(Dest, esp); |
3166 return; | 3165 return; |
3167 } | 3166 } |
3168 case Intrinsics::Stackrestore: { | 3167 case Intrinsics::Stackrestore: { |
3169 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 3168 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
3170 _mov(esp, Instr->getArg(0)); | 3169 _mov(esp, Instr->getArg(0)); |
3171 return; | 3170 return; |
3172 } | 3171 } |
3173 case Intrinsics::Trap: | 3172 case Intrinsics::Trap: |
3174 _ud2(); | 3173 _ud2(); |
3175 return; | 3174 return; |
3176 case Intrinsics::UnknownIntrinsic: | 3175 case Intrinsics::UnknownIntrinsic: |
3177 Func->setError("Should not be lowering UnknownIntrinsic"); | 3176 Func->setError("Should not be lowering UnknownIntrinsic"); |
3178 return; | 3177 return; |
3179 } | 3178 } |
3180 return; | 3179 return; |
3181 } | 3180 } |
3182 | 3181 |
3183 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, | 3182 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, |
3184 Operand *Expected, Operand *Desired) { | 3183 Operand *Expected, Operand *Desired) { |
3185 if (Expected->getType() == IceType_i64) { | 3184 if (Expected->getType() == IceType_i64) { |
3186 // Reserve the pre-colored registers first, before adding any more | 3185 // Reserve the pre-colored registers first, before adding any more |
3187 // infinite-weight variables from FormMemoryOperand's legalization. | 3186 // infinite-weight variables from FormMemoryOperand's legalization. |
3188 Variable *T_edx = makeReg(IceType_i32, Reg_edx); | 3187 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
3189 Variable *T_eax = makeReg(IceType_i32, Reg_eax); | 3188 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); |
3190 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); | 3189 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
3191 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); | 3190 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); |
3192 _mov(T_eax, loOperand(Expected)); | 3191 _mov(T_eax, loOperand(Expected)); |
3193 _mov(T_edx, hiOperand(Expected)); | 3192 _mov(T_edx, hiOperand(Expected)); |
3194 _mov(T_ebx, loOperand(Desired)); | 3193 _mov(T_ebx, loOperand(Desired)); |
3195 _mov(T_ecx, hiOperand(Desired)); | 3194 _mov(T_ecx, hiOperand(Desired)); |
3196 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | 3195 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
3197 const bool Locked = true; | 3196 const bool Locked = true; |
3198 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3197 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3199 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); | 3198 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); |
3200 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); | 3199 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); |
3201 _mov(DestLo, T_eax); | 3200 _mov(DestLo, T_eax); |
3202 _mov(DestHi, T_edx); | 3201 _mov(DestHi, T_edx); |
3203 return; | 3202 return; |
3204 } | 3203 } |
3205 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); | 3204 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax); |
3206 _mov(T_eax, Expected); | 3205 _mov(T_eax, Expected); |
3207 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); | 3206 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); |
3208 Variable *DesiredReg = legalizeToVar(Desired); | 3207 Variable *DesiredReg = legalizeToVar(Desired); |
3209 const bool Locked = true; | 3208 const bool Locked = true; |
3210 _cmpxchg(Addr, T_eax, DesiredReg, Locked); | 3209 _cmpxchg(Addr, T_eax, DesiredReg, Locked); |
3211 _mov(DestPrev, T_eax); | 3210 _mov(DestPrev, T_eax); |
3212 } | 3211 } |
3213 | 3212 |
3214 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, | 3213 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, |
3215 Operand *Expected, | 3214 Operand *Expected, |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3264 NextBr->isLastUse(NextCmp->getDest())) { | 3263 NextBr->isLastUse(NextCmp->getDest())) { |
3265 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); | 3264 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); |
3266 for (size_t i = 0; i < PhiAssigns.size(); ++i) { | 3265 for (size_t i = 0; i < PhiAssigns.size(); ++i) { |
3267 // Lower the phi assignments now, before the branch (same placement | 3266 // Lower the phi assignments now, before the branch (same placement |
3268 // as before). | 3267 // as before). |
3269 InstAssign *PhiAssign = PhiAssigns[i]; | 3268 InstAssign *PhiAssign = PhiAssigns[i]; |
3270 PhiAssign->setDeleted(); | 3269 PhiAssign->setDeleted(); |
3271 lowerAssign(PhiAssign); | 3270 lowerAssign(PhiAssign); |
3272 Context.advanceNext(); | 3271 Context.advanceNext(); |
3273 } | 3272 } |
3274 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); | 3273 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); |
3275 // Skip over the old compare and branch, by deleting them. | 3274 // Skip over the old compare and branch, by deleting them. |
3276 NextCmp->setDeleted(); | 3275 NextCmp->setDeleted(); |
3277 NextBr->setDeleted(); | 3276 NextBr->setDeleted(); |
3278 Context.advanceNext(); | 3277 Context.advanceNext(); |
3279 Context.advanceNext(); | 3278 Context.advanceNext(); |
3280 return true; | 3279 return true; |
3281 } | 3280 } |
3282 } | 3281 } |
3283 } | 3282 } |
3284 return false; | 3283 return false; |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3391 // mov <reg>, eax | 3390 // mov <reg>, eax |
3392 // op <reg>, [desired_adj] | 3391 // op <reg>, [desired_adj] |
3393 // lock cmpxchg [ptr], <reg> | 3392 // lock cmpxchg [ptr], <reg> |
3394 // jne .LABEL | 3393 // jne .LABEL |
3395 // mov <dest>, eax | 3394 // mov <dest>, eax |
3396 // | 3395 // |
3397 // If Op_{Lo,Hi} are NULL, then just copy the value. | 3396 // If Op_{Lo,Hi} are NULL, then just copy the value. |
3398 Val = legalize(Val); | 3397 Val = legalize(Val); |
3399 Type Ty = Val->getType(); | 3398 Type Ty = Val->getType(); |
3400 if (Ty == IceType_i64) { | 3399 if (Ty == IceType_i64) { |
3401 Variable *T_edx = makeReg(IceType_i32, Reg_edx); | 3400 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx); |
3402 Variable *T_eax = makeReg(IceType_i32, Reg_eax); | 3401 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax); |
3403 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); | 3402 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); |
3404 _mov(T_eax, loOperand(Addr)); | 3403 _mov(T_eax, loOperand(Addr)); |
3405 _mov(T_edx, hiOperand(Addr)); | 3404 _mov(T_edx, hiOperand(Addr)); |
3406 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); | 3405 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
3407 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); | 3406 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx); |
3408 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3407 InstX8632Label *Label = InstX8632Label::create(Func, this); |
3409 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL; | 3408 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL; |
3410 if (!IsXchg8b) { | 3409 if (!IsXchg8b) { |
3411 Context.insert(Label); | 3410 Context.insert(Label); |
3412 _mov(T_ebx, T_eax); | 3411 _mov(T_ebx, T_eax); |
3413 (this->*Op_Lo)(T_ebx, loOperand(Val)); | 3412 (this->*Op_Lo)(T_ebx, loOperand(Val)); |
3414 _mov(T_ecx, T_edx); | 3413 _mov(T_ecx, T_edx); |
3415 (this->*Op_Hi)(T_ecx, hiOperand(Val)); | 3414 (this->*Op_Hi)(T_ecx, hiOperand(Val)); |
3416 } else { | 3415 } else { |
3417 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. | 3416 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. |
3418 // It just needs the Val loaded into ebx and ecx. | 3417 // It just needs the Val loaded into ebx and ecx. |
3419 // That can also be done before the loop. | 3418 // That can also be done before the loop. |
3420 _mov(T_ebx, loOperand(Val)); | 3419 _mov(T_ebx, loOperand(Val)); |
3421 _mov(T_ecx, hiOperand(Val)); | 3420 _mov(T_ecx, hiOperand(Val)); |
3422 Context.insert(Label); | 3421 Context.insert(Label); |
3423 } | 3422 } |
3424 const bool Locked = true; | 3423 const bool Locked = true; |
3425 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); | 3424 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); |
3426 _br(InstX8632Br::Br_ne, Label); | 3425 _br(CondX86::Br_ne, Label); |
3427 if (!IsXchg8b) { | 3426 if (!IsXchg8b) { |
3428 // If Val is a variable, model the extended live range of Val through | 3427 // If Val is a variable, model the extended live range of Val through |
3429 // the end of the loop, since it will be re-used by the loop. | 3428 // the end of the loop, since it will be re-used by the loop. |
3430 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3429 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3431 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); | 3430 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); |
3432 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); | 3431 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); |
3433 Context.insert(InstFakeUse::create(Func, ValLo)); | 3432 Context.insert(InstFakeUse::create(Func, ValLo)); |
3434 Context.insert(InstFakeUse::create(Func, ValHi)); | 3433 Context.insert(InstFakeUse::create(Func, ValHi)); |
3435 } | 3434 } |
3436 } else { | 3435 } else { |
3437 // For xchg, the loop is slightly smaller and ebx/ecx are used. | 3436 // For xchg, the loop is slightly smaller and ebx/ecx are used. |
3438 Context.insert(InstFakeUse::create(Func, T_ebx)); | 3437 Context.insert(InstFakeUse::create(Func, T_ebx)); |
3439 Context.insert(InstFakeUse::create(Func, T_ecx)); | 3438 Context.insert(InstFakeUse::create(Func, T_ecx)); |
3440 } | 3439 } |
3441 // The address base is also reused in the loop. | 3440 // The address base is also reused in the loop. |
3442 Context.insert(InstFakeUse::create(Func, Addr->getBase())); | 3441 Context.insert(InstFakeUse::create(Func, Addr->getBase())); |
3443 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3442 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3444 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3443 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3445 _mov(DestLo, T_eax); | 3444 _mov(DestLo, T_eax); |
3446 _mov(DestHi, T_edx); | 3445 _mov(DestHi, T_edx); |
3447 return; | 3446 return; |
3448 } | 3447 } |
3449 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); | 3448 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); |
3450 Variable *T_eax = makeReg(Ty, Reg_eax); | 3449 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax); |
3451 _mov(T_eax, Addr); | 3450 _mov(T_eax, Addr); |
3452 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3451 InstX8632Label *Label = InstX8632Label::create(Func, this); |
3453 Context.insert(Label); | 3452 Context.insert(Label); |
3454 // We want to pick a different register for T than Eax, so don't use | 3453 // We want to pick a different register for T than Eax, so don't use |
3455 // _mov(T == NULL, T_eax). | 3454 // _mov(T == NULL, T_eax). |
3456 Variable *T = makeReg(Ty); | 3455 Variable *T = makeReg(Ty); |
3457 _mov(T, T_eax); | 3456 _mov(T, T_eax); |
3458 (this->*Op_Lo)(T, Val); | 3457 (this->*Op_Lo)(T, Val); |
3459 const bool Locked = true; | 3458 const bool Locked = true; |
3460 _cmpxchg(Addr, T_eax, T, Locked); | 3459 _cmpxchg(Addr, T_eax, T, Locked); |
3461 _br(InstX8632Br::Br_ne, Label); | 3460 _br(CondX86::Br_ne, Label); |
3462 // If Val is a variable, model the extended live range of Val through | 3461 // If Val is a variable, model the extended live range of Val through |
3463 // the end of the loop, since it will be re-used by the loop. | 3462 // the end of the loop, since it will be re-used by the loop. |
3464 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { | 3463 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { |
3465 Context.insert(InstFakeUse::create(Func, ValVar)); | 3464 Context.insert(InstFakeUse::create(Func, ValVar)); |
3466 } | 3465 } |
3467 // The address base is also reused in the loop. | 3466 // The address base is also reused in the loop. |
3468 Context.insert(InstFakeUse::create(Func, Addr->getBase())); | 3467 Context.insert(InstFakeUse::create(Func, Addr->getBase())); |
3469 _mov(Dest, T_eax); | 3468 _mov(Dest, T_eax); |
3470 } | 3469 } |
3471 | 3470 |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3512 } | 3511 } |
3513 Variable *T_Dest = makeReg(IceType_i32); | 3512 Variable *T_Dest = makeReg(IceType_i32); |
3514 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32); | 3513 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32); |
3515 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31); | 3514 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31); |
3516 if (Cttz) { | 3515 if (Cttz) { |
3517 _mov(T_Dest, ThirtyTwo); | 3516 _mov(T_Dest, ThirtyTwo); |
3518 } else { | 3517 } else { |
3519 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63); | 3518 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63); |
3520 _mov(T_Dest, SixtyThree); | 3519 _mov(T_Dest, SixtyThree); |
3521 } | 3520 } |
3522 _cmov(T_Dest, T, InstX8632::Br_ne); | 3521 _cmov(T_Dest, T, CondX86::Br_ne); |
3523 if (!Cttz) { | 3522 if (!Cttz) { |
3524 _xor(T_Dest, ThirtyOne); | 3523 _xor(T_Dest, ThirtyOne); |
3525 } | 3524 } |
3526 if (Ty == IceType_i32) { | 3525 if (Ty == IceType_i32) { |
3527 _mov(Dest, T_Dest); | 3526 _mov(Dest, T_Dest); |
3528 return; | 3527 return; |
3529 } | 3528 } |
3530 _add(T_Dest, ThirtyTwo); | 3529 _add(T_Dest, ThirtyTwo); |
3531 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3530 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3532 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3531 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3533 // Will be using "test" on this, so we need a registerized variable. | 3532 // Will be using "test" on this, so we need a registerized variable. |
3534 Variable *SecondVar = legalizeToVar(SecondVal); | 3533 Variable *SecondVar = legalizeToVar(SecondVal); |
3535 Variable *T_Dest2 = makeReg(IceType_i32); | 3534 Variable *T_Dest2 = makeReg(IceType_i32); |
3536 if (Cttz) { | 3535 if (Cttz) { |
3537 _bsf(T_Dest2, SecondVar); | 3536 _bsf(T_Dest2, SecondVar); |
3538 } else { | 3537 } else { |
3539 _bsr(T_Dest2, SecondVar); | 3538 _bsr(T_Dest2, SecondVar); |
3540 _xor(T_Dest2, ThirtyOne); | 3539 _xor(T_Dest2, ThirtyOne); |
3541 } | 3540 } |
3542 _test(SecondVar, SecondVar); | 3541 _test(SecondVar, SecondVar); |
3543 _cmov(T_Dest2, T_Dest, InstX8632::Br_e); | 3542 _cmov(T_Dest2, T_Dest, CondX86::Br_e); |
3544 _mov(DestLo, T_Dest2); | 3543 _mov(DestLo, T_Dest2); |
3545 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); | 3544 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); |
3546 } | 3545 } |
3547 | 3546 |
3548 namespace { | 3547 namespace { |
3549 | 3548 |
3550 bool isAdd(const Inst *Inst) { | 3549 bool isAdd(const Inst *Inst) { |
3551 if (const InstArithmetic *Arith = | 3550 if (const InstArithmetic *Arith = |
3552 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { | 3551 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { |
3553 return (Arith->getOp() == InstArithmetic::Add); | 3552 return (Arith->getOp() == InstArithmetic::Add); |
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3860 | 3859 |
3861 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { | 3860 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { |
3862 Func->setError("Phi found in regular instruction list"); | 3861 Func->setError("Phi found in regular instruction list"); |
3863 } | 3862 } |
3864 | 3863 |
3865 void TargetX8632::lowerRet(const InstRet *Inst) { | 3864 void TargetX8632::lowerRet(const InstRet *Inst) { |
3866 Variable *Reg = NULL; | 3865 Variable *Reg = NULL; |
3867 if (Inst->hasRetValue()) { | 3866 if (Inst->hasRetValue()) { |
3868 Operand *Src0 = legalize(Inst->getRetValue()); | 3867 Operand *Src0 = legalize(Inst->getRetValue()); |
3869 if (Src0->getType() == IceType_i64) { | 3868 if (Src0->getType() == IceType_i64) { |
3870 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax); | 3869 Variable *eax = legalizeToVar(loOperand(Src0), false, RegX8632::Reg_eax); |
3871 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx); | 3870 Variable *edx = legalizeToVar(hiOperand(Src0), false, RegX8632::Reg_edx); |
3872 Reg = eax; | 3871 Reg = eax; |
3873 Context.insert(InstFakeUse::create(Func, edx)); | 3872 Context.insert(InstFakeUse::create(Func, edx)); |
3874 } else if (Src0->getType() == IceType_f32 || | 3873 } else if (Src0->getType() == IceType_f32 || |
3875 Src0->getType() == IceType_f64) { | 3874 Src0->getType() == IceType_f64) { |
3876 _fld(Src0); | 3875 _fld(Src0); |
3877 } else if (isVectorType(Src0->getType())) { | 3876 } else if (isVectorType(Src0->getType())) { |
3878 Reg = legalizeToVar(Src0, false, Reg_xmm0); | 3877 Reg = legalizeToVar(Src0, false, RegX8632::Reg_xmm0); |
3879 } else { | 3878 } else { |
3880 _mov(Reg, Src0, Reg_eax); | 3879 _mov(Reg, Src0, RegX8632::Reg_eax); |
3881 } | 3880 } |
3882 } | 3881 } |
3883 _ret(Reg); | 3882 _ret(Reg); |
3884 // Add a fake use of esp to make sure esp stays alive for the entire | 3883 // Add a fake use of esp to make sure esp stays alive for the entire |
3885 // function. Otherwise post-call esp adjustments get dead-code | 3884 // function. Otherwise post-call esp adjustments get dead-code |
3886 // eliminated. TODO: Are there more places where the fake use | 3885 // eliminated. TODO: Are there more places where the fake use |
3887 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not | 3886 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
3888 // have a ret instruction. | 3887 // have a ret instruction. |
3889 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); | 3888 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp); |
3890 Context.insert(InstFakeUse::create(Func, esp)); | 3889 Context.insert(InstFakeUse::create(Func, esp)); |
3891 } | 3890 } |
3892 | 3891 |
3893 void TargetX8632::lowerSelect(const InstSelect *Inst) { | 3892 void TargetX8632::lowerSelect(const InstSelect *Inst) { |
3894 Variable *Dest = Inst->getDest(); | 3893 Variable *Dest = Inst->getDest(); |
3895 Operand *SrcT = Inst->getTrueOperand(); | 3894 Operand *SrcT = Inst->getTrueOperand(); |
3896 Operand *SrcF = Inst->getFalseOperand(); | 3895 Operand *SrcF = Inst->getFalseOperand(); |
3897 Operand *Condition = Inst->getCondition(); | 3896 Operand *Condition = Inst->getCondition(); |
3898 | 3897 |
3899 if (isVectorType(Dest->getType())) { | 3898 if (isVectorType(Dest->getType())) { |
3900 Type SrcTy = SrcT->getType(); | 3899 Type SrcTy = SrcT->getType(); |
3901 Variable *T = makeReg(SrcTy); | 3900 Variable *T = makeReg(SrcTy); |
3902 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 3901 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
3903 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 3902 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
3904 if (InstructionSet >= SSE4_1) { | 3903 if (InstructionSet >= SSE4_1) { |
3905 // TODO(wala): If the condition operand is a constant, use blendps | 3904 // TODO(wala): If the condition operand is a constant, use blendps |
3906 // or pblendw. | 3905 // or pblendw. |
3907 // | 3906 // |
3908 // Use blendvps or pblendvb to implement select. | 3907 // Use blendvps or pblendvb to implement select. |
3909 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || | 3908 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || |
3910 SrcTy == IceType_v4f32) { | 3909 SrcTy == IceType_v4f32) { |
3911 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); | 3910 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); |
3912 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); | 3911 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0); |
3913 _movp(xmm0, ConditionRM); | 3912 _movp(xmm0, ConditionRM); |
3914 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); | 3913 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); |
3915 _movp(T, SrcFRM); | 3914 _movp(T, SrcFRM); |
3916 _blendvps(T, SrcTRM, xmm0); | 3915 _blendvps(T, SrcTRM, xmm0); |
3917 _movp(Dest, T); | 3916 _movp(Dest, T); |
3918 } else { | 3917 } else { |
3919 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 3918 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
3920 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 | 3919 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 |
3921 : IceType_v16i8; | 3920 : IceType_v16i8; |
3922 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); | 3921 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0); |
3923 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 3922 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
3924 _movp(T, SrcFRM); | 3923 _movp(T, SrcFRM); |
3925 _pblendvb(T, SrcTRM, xmm0); | 3924 _pblendvb(T, SrcTRM, xmm0); |
3926 _movp(Dest, T); | 3925 _movp(Dest, T); |
3927 } | 3926 } |
3928 return; | 3927 return; |
3929 } | 3928 } |
3930 // Lower select without SSE4.1: | 3929 // Lower select without SSE4.1: |
3931 // a=d?b:c ==> | 3930 // a=d?b:c ==> |
3932 // if elementtype(d) != i1: | 3931 // if elementtype(d) != i1: |
(...skipping 27 matching lines...) Expand all Loading... |
3960 InstX8632Label *Label = InstX8632Label::create(Func, this); | 3959 InstX8632Label *Label = InstX8632Label::create(Func, this); |
3961 | 3960 |
3962 if (Dest->getType() == IceType_i64) { | 3961 if (Dest->getType() == IceType_i64) { |
3963 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); | 3962 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); |
3964 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); | 3963 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); |
3965 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true); | 3964 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true); |
3966 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true); | 3965 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true); |
3967 _cmp(ConditionRM, Zero); | 3966 _cmp(ConditionRM, Zero); |
3968 _mov(DestLo, SrcLoRI); | 3967 _mov(DestLo, SrcLoRI); |
3969 _mov(DestHi, SrcHiRI); | 3968 _mov(DestHi, SrcHiRI); |
3970 _br(InstX8632Br::Br_ne, Label); | 3969 _br(CondX86::Br_ne, Label); |
3971 Context.insert(InstFakeUse::create(Func, DestLo)); | 3970 Context.insert(InstFakeUse::create(Func, DestLo)); |
3972 Context.insert(InstFakeUse::create(Func, DestHi)); | 3971 Context.insert(InstFakeUse::create(Func, DestHi)); |
3973 Operand *SrcFLo = loOperand(SrcF); | 3972 Operand *SrcFLo = loOperand(SrcF); |
3974 Operand *SrcFHi = hiOperand(SrcF); | 3973 Operand *SrcFHi = hiOperand(SrcF); |
3975 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true); | 3974 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true); |
3976 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true); | 3975 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true); |
3977 _mov(DestLo, SrcLoRI); | 3976 _mov(DestLo, SrcLoRI); |
3978 _mov(DestHi, SrcHiRI); | 3977 _mov(DestHi, SrcHiRI); |
3979 } else { | 3978 } else { |
3980 _cmp(ConditionRM, Zero); | 3979 _cmp(ConditionRM, Zero); |
3981 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true); | 3980 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true); |
3982 _mov(Dest, SrcT); | 3981 _mov(Dest, SrcT); |
3983 _br(InstX8632Br::Br_ne, Label); | 3982 _br(CondX86::Br_ne, Label); |
3984 Context.insert(InstFakeUse::create(Func, Dest)); | 3983 Context.insert(InstFakeUse::create(Func, Dest)); |
3985 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true); | 3984 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true); |
3986 _mov(Dest, SrcF); | 3985 _mov(Dest, SrcF); |
3987 } | 3986 } |
3988 | 3987 |
3989 Context.insert(Label); | 3988 Context.insert(Label); |
3990 } | 3989 } |
3991 | 3990 |
3992 void TargetX8632::lowerStore(const InstStore *Inst) { | 3991 void TargetX8632::lowerStore(const InstStore *Inst) { |
3993 Operand *Value = Inst->getData(); | 3992 Operand *Value = Inst->getData(); |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4041 // OK, we'll be slightly less naive by forcing Src into a physical | 4040 // OK, we'll be slightly less naive by forcing Src into a physical |
4042 // register if there are 2 or more uses. | 4041 // register if there are 2 or more uses. |
4043 if (NumCases >= 2) | 4042 if (NumCases >= 2) |
4044 Src0 = legalizeToVar(Src0, true); | 4043 Src0 = legalizeToVar(Src0, true); |
4045 else | 4044 else |
4046 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true); | 4045 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true); |
4047 for (SizeT I = 0; I < NumCases; ++I) { | 4046 for (SizeT I = 0; I < NumCases; ++I) { |
4048 // TODO(stichnot): Correct lowering for IceType_i64. | 4047 // TODO(stichnot): Correct lowering for IceType_i64. |
4049 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); | 4048 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); |
4050 _cmp(Src0, Value); | 4049 _cmp(Src0, Value); |
4051 _br(InstX8632Br::Br_e, Inst->getLabel(I)); | 4050 _br(CondX86::Br_e, Inst->getLabel(I)); |
4052 } | 4051 } |
4053 | 4052 |
4054 _br(Inst->getLabelDefault()); | 4053 _br(Inst->getLabelDefault()); |
4055 } | 4054 } |
4056 | 4055 |
4057 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, | 4056 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
4058 Variable *Dest, Operand *Src0, | 4057 Variable *Dest, Operand *Src0, |
4059 Operand *Src1) { | 4058 Operand *Src1) { |
4060 assert(isVectorType(Dest->getType())); | 4059 assert(isVectorType(Dest->getType())); |
4061 Type Ty = Dest->getType(); | 4060 Type Ty = Dest->getType(); |
(...skipping 487 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4549 Str << "\t.align\t" << Align << "\n"; | 4548 Str << "\t.align\t" << Align << "\n"; |
4550 Str << MangledName << ":\n"; | 4549 Str << MangledName << ":\n"; |
4551 for (SizeT i = 0; i < Size; ++i) { | 4550 for (SizeT i = 0; i < Size; ++i) { |
4552 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; | 4551 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; |
4553 } | 4552 } |
4554 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; | 4553 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; |
4555 } | 4554 } |
4556 } | 4555 } |
4557 | 4556 |
4558 } // end of namespace Ice | 4557 } // end of namespace Ice |
OLD | NEW |