Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(198)

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 582113003: Lift register and condition code enums out into their own file. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: add first and last Created 6 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===// 1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//
2 // 2 //
3 // The Subzero Code Generator 3 // The Subzero Code Generator
4 // 4 //
5 // This file is distributed under the University of Illinois Open Source 5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details. 6 // License. See LICENSE.TXT for details.
7 // 7 //
8 //===----------------------------------------------------------------------===// 8 //===----------------------------------------------------------------------===//
9 // 9 //
10 // This file implements the TargetLoweringX8632 class, which 10 // This file implements the TargetLoweringX8632 class, which
11 // consists almost entirely of the lowering sequence for each 11 // consists almost entirely of the lowering sequence for each
12 // high-level instruction. It also implements 12 // high-level instruction. It also implements
13 // TargetX8632Fast::postLower() which does the simplest possible 13 // TargetX8632Fast::postLower() which does the simplest possible
14 // register allocation for the "fast" target. 14 // register allocation for the "fast" target.
15 // 15 //
16 //===----------------------------------------------------------------------===// 16 //===----------------------------------------------------------------------===//
17 17
18 #include "IceDefs.h" 18 #include "IceDefs.h"
19 #include "IceCfg.h" 19 #include "IceCfg.h"
20 #include "IceCfgNode.h" 20 #include "IceCfgNode.h"
21 #include "IceClFlags.h" 21 #include "IceClFlags.h"
22 #include "IceInstX8632.h" 22 #include "IceInstX8632.h"
23 #include "IceOperand.h" 23 #include "IceOperand.h"
24 #include "IceRegistersX8632.h"
24 #include "IceTargetLoweringX8632.def" 25 #include "IceTargetLoweringX8632.def"
25 #include "IceTargetLoweringX8632.h" 26 #include "IceTargetLoweringX8632.h"
26 #include "llvm/ADT/DenseMap.h" 27 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/Support/MathExtras.h" 28 #include "llvm/Support/MathExtras.h"
28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/CommandLine.h"
29 30
30 namespace Ice { 31 namespace Ice {
31 32
32 namespace { 33 namespace {
33 34
(...skipping 10 matching lines...) Expand all
44 // table by hand, good execution tests are helpful. 45 // table by hand, good execution tests are helpful.
45 // 46 //
46 // The last two columns describe the case when the operands are vectors 47 // The last two columns describe the case when the operands are vectors
47 // of floating point values. For most fcmp conditions, there is a clear 48 // of floating point values. For most fcmp conditions, there is a clear
48 // mapping to a single x86 cmpps instruction variant. Some fcmp 49 // mapping to a single x86 cmpps instruction variant. Some fcmp
49 // conditions require special code to handle and these are marked in the 50 // conditions require special code to handle and these are marked in the
50 // table with a Cmpps_Invalid predicate. 51 // table with a Cmpps_Invalid predicate.
51 const struct TableFcmp_ { 52 const struct TableFcmp_ {
52 uint32_t Default; 53 uint32_t Default;
53 bool SwapScalarOperands; 54 bool SwapScalarOperands;
54 InstX8632::BrCond C1, C2; 55 CondX86::BrCond C1, C2;
55 bool SwapVectorOperands; 56 bool SwapVectorOperands;
56 InstX8632Cmpps::CmppsCond Predicate; 57 CondX86::CmppsCond Predicate;
57 } TableFcmp[] = { 58 } TableFcmp[] = {
58 #define X(val, dflt, swapS, C1, C2, swapV, pred) \ 59 #define X(val, dflt, swapS, C1, C2, swapV, pred) \
59 { \ 60 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \
60 dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \
61 } \
62 , 61 ,
63 FCMPX8632_TABLE 62 FCMPX8632_TABLE
64 #undef X 63 #undef X
65 }; 64 };
66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp); 65 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);
67 66
68 // The following table summarizes the logic for lowering the icmp instruction 67 // The following table summarizes the logic for lowering the icmp instruction
69 // for i32 and narrower types. Each icmp condition has a clear mapping to an 68 // for i32 and narrower types. Each icmp condition has a clear mapping to an
70 // x86 conditional branch instruction. 69 // x86 conditional branch instruction.
71 70
72 const struct TableIcmp32_ { 71 const struct TableIcmp32_ {
73 InstX8632::BrCond Mapping; 72 CondX86::BrCond Mapping;
74 } TableIcmp32[] = { 73 } TableIcmp32[] = {
75 #define X(val, C_32, C1_64, C2_64, C3_64) \ 74 #define X(val, C_32, C1_64, C2_64, C3_64) \
76 { InstX8632Br::C_32 } \ 75 { CondX86::C_32 } \
77 , 76 ,
78 ICMPX8632_TABLE 77 ICMPX8632_TABLE
79 #undef X 78 #undef X
80 }; 79 };
81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32); 80 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);
82 81
83 // The following table summarizes the logic for lowering the icmp instruction 82 // The following table summarizes the logic for lowering the icmp instruction
84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and 83 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and
85 // conditional branches are needed. For the other conditions, three separate 84 // conditional branches are needed. For the other conditions, three separate
86 // conditional branches are needed. 85 // conditional branches are needed.
87 const struct TableIcmp64_ { 86 const struct TableIcmp64_ {
88 InstX8632::BrCond C1, C2, C3; 87 CondX86::BrCond C1, C2, C3;
89 } TableIcmp64[] = { 88 } TableIcmp64[] = {
90 #define X(val, C_32, C1_64, C2_64, C3_64) \ 89 #define X(val, C_32, C1_64, C2_64, C3_64) \
91 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \ 90 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \
92 , 91 ,
93 ICMPX8632_TABLE 92 ICMPX8632_TABLE
94 #undef X 93 #undef X
95 }; 94 };
96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64); 95 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);
97 96
98 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) { 97 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
99 size_t Index = static_cast<size_t>(Cond); 98 size_t Index = static_cast<size_t>(Cond);
100 assert(Index < TableIcmp32Size); 99 assert(Index < TableIcmp32Size);
101 return TableIcmp32[Index].Mapping; 100 return TableIcmp32[Index].Mapping;
102 } 101 }
103 102
104 const struct TableTypeX8632Attributes_ { 103 const struct TableTypeX8632Attributes_ {
105 Type InVectorElementType; 104 Type InVectorElementType;
106 } TableTypeX8632Attributes[] = { 105 } TableTypeX8632Attributes[] = {
107 #define X(tag, elementty, cvt, sdss, pack, width) \ 106 #define X(tag, elementty, cvt, sdss, pack, width) \
108 { elementty } \ 107 { elementty } \
(...skipping 148 matching lines...) Expand 10 before | Expand all | Expand 10 after
257 #undef X 256 #undef X
258 } 257 }
259 } 258 }
260 259
261 } // end of anonymous namespace 260 } // end of anonymous namespace
262 261
263 TargetX8632::TargetX8632(Cfg *Func) 262 TargetX8632::TargetX8632(Cfg *Func)
264 : TargetLowering(Func), InstructionSet(CLInstructionSet), 263 : TargetLowering(Func), InstructionSet(CLInstructionSet),
265 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0), 264 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),
266 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false), 265 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),
267 PhysicalRegisters(VarList(Reg_NUM)) { 266 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) {
268 // TODO: Don't initialize IntegerRegisters and friends every time. 267 // TODO: Don't initialize IntegerRegisters and friends every time.
269 // Instead, initialize in some sort of static initializer for the 268 // Instead, initialize in some sort of static initializer for the
270 // class. 269 // class.
271 llvm::SmallBitVector IntegerRegisters(Reg_NUM); 270 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);
272 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM); 271 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);
273 llvm::SmallBitVector FloatRegisters(Reg_NUM); 272 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);
274 llvm::SmallBitVector VectorRegisters(Reg_NUM); 273 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);
275 llvm::SmallBitVector InvalidRegisters(Reg_NUM); 274 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);
276 ScratchRegs.resize(Reg_NUM); 275 ScratchRegs.resize(RegX8632::Reg_NUM);
277 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ 276 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
278 frameptr, isI8, isInt, isFP) \ 277 frameptr, isI8, isInt, isFP) \
279 IntegerRegisters[val] = isInt; \ 278 IntegerRegisters[RegX8632::val] = isInt; \
280 IntegerRegistersI8[val] = isI8; \ 279 IntegerRegistersI8[RegX8632::val] = isI8; \
281 FloatRegisters[val] = isFP; \ 280 FloatRegisters[RegX8632::val] = isFP; \
282 VectorRegisters[val] = isFP; \ 281 VectorRegisters[RegX8632::val] = isFP; \
283 ScratchRegs[val] = scratch; 282 ScratchRegs[RegX8632::val] = scratch;
284 REGX8632_TABLE; 283 REGX8632_TABLE;
285 #undef X 284 #undef X
286 TypeToRegisterSet[IceType_void] = InvalidRegisters; 285 TypeToRegisterSet[IceType_void] = InvalidRegisters;
287 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8; 286 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;
288 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8; 287 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;
289 TypeToRegisterSet[IceType_i16] = IntegerRegisters; 288 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
290 TypeToRegisterSet[IceType_i32] = IntegerRegisters; 289 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
291 TypeToRegisterSet[IceType_i64] = IntegerRegisters; 290 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
292 TypeToRegisterSet[IceType_f32] = FloatRegisters; 291 TypeToRegisterSet[IceType_f32] = FloatRegisters;
293 TypeToRegisterSet[IceType_f64] = FloatRegisters; 292 TypeToRegisterSet[IceType_f64] = FloatRegisters;
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 } 452 }
454 453
455 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) { 454 bool TargetX8632::doBranchOpt(Inst *I, const CfgNode *NextNode) {
456 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) { 455 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {
457 return Br->optimizeBranch(NextNode); 456 return Br->optimizeBranch(NextNode);
458 } 457 }
459 return false; 458 return false;
460 } 459 }
461 460
462 IceString TargetX8632::RegNames[] = { 461 IceString TargetX8632::RegNames[] = {
463 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ 462 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
464 frameptr, isI8, isInt, isFP) \ 463 frameptr, isI8, isInt, isFP) \
465 name, 464 name,
466 REGX8632_TABLE 465 REGX8632_TABLE
467 #undef X 466 #undef X
468 }; 467 };
469 468
470 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) { 469 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {
471 assert(RegNum < PhysicalRegisters.size()); 470 assert(RegNum < PhysicalRegisters.size());
472 Variable *Reg = PhysicalRegisters[RegNum]; 471 Variable *Reg = PhysicalRegisters[RegNum];
473 if (Reg == NULL) { 472 if (Reg == NULL) {
474 CfgNode *Node = NULL; // NULL means multi-block lifetime 473 CfgNode *Node = NULL; // NULL means multi-block lifetime
475 Reg = Func->makeVariable(IceType_i32, Node); 474 Reg = Func->makeVariable(IceType_i32, Node);
476 Reg->setRegNum(RegNum); 475 Reg->setRegNum(RegNum);
477 PhysicalRegisters[RegNum] = Reg; 476 PhysicalRegisters[RegNum] = Reg;
478 } 477 }
479 return Reg; 478 return Reg;
480 } 479 }
481 480
482 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const { 481 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {
483 assert(RegNum < Reg_NUM); 482 assert(RegNum < RegX8632::Reg_NUM);
484 static IceString RegNames8[] = { 483 static IceString RegNames8[] = {
485 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ 484 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
486 frameptr, isI8, isInt, isFP) \ 485 frameptr, isI8, isInt, isFP) \
487 name8, 486 name8,
488 REGX8632_TABLE 487 REGX8632_TABLE
489 #undef X 488 #undef X
490 }; 489 };
491 static IceString RegNames16[] = { 490 static IceString RegNames16[] = {
492 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ 491 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
493 frameptr, isI8, isInt, isFP) \ 492 frameptr, isI8, isInt, isFP) \
494 name16, 493 name16,
495 REGX8632_TABLE 494 REGX8632_TABLE
496 #undef X 495 #undef X
497 }; 496 };
498 switch (Ty) { 497 switch (Ty) {
499 case IceType_i1: 498 case IceType_i1:
500 case IceType_i8: 499 case IceType_i8:
501 return RegNames8[RegNum]; 500 return RegNames8[RegNum];
502 case IceType_i16: 501 case IceType_i16:
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
539 538
540 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS; 539 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;
541 ++I) { 540 ++I) {
542 Variable *Arg = Args[I]; 541 Variable *Arg = Args[I];
543 Type Ty = Arg->getType(); 542 Type Ty = Arg->getType();
544 if (!isVectorType(Ty)) 543 if (!isVectorType(Ty))
545 continue; 544 continue;
546 // Replace Arg in the argument list with the home register. Then 545 // Replace Arg in the argument list with the home register. Then
547 // generate an instruction in the prolog to copy the home register 546 // generate an instruction in the prolog to copy the home register
548 // to the assigned location of Arg. 547 // to the assigned location of Arg.
549 int32_t RegNum = Reg_xmm0 + NumXmmArgs; 548 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;
550 ++NumXmmArgs; 549 ++NumXmmArgs;
551 IceString Name = "home_reg:" + Arg->getName(); 550 IceString Name = "home_reg:" + Arg->getName();
552 const CfgNode *DefNode = NULL; 551 const CfgNode *DefNode = NULL;
553 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name); 552 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);
554 RegisterArg->setRegNum(RegNum); 553 RegisterArg->setRegNum(RegNum);
555 RegisterArg->setIsArg(Func); 554 RegisterArg->setIsArg(Func);
556 Arg->setIsArg(Func, false); 555 Arg->setIsArg(Func, false);
557 556
558 Args[I] = RegisterArg; 557 Args[I] = RegisterArg;
559 Context.insert(InstAssign::create(Func, Arg, RegisterArg)); 558 Context.insert(InstAssign::create(Func, Arg, RegisterArg));
(...skipping 215 matching lines...) Expand 10 before | Expand all | Expand 10 after
775 _push(getPhysicalRegister(i), SuppressStackAdjustment); 774 _push(getPhysicalRegister(i), SuppressStackAdjustment);
776 } 775 }
777 } 776 }
778 Ctx->statsUpdateRegistersSaved(NumCallee); 777 Ctx->statsUpdateRegistersSaved(NumCallee);
779 778
780 // Generate "push ebp; mov ebp, esp" 779 // Generate "push ebp; mov ebp, esp"
781 if (IsEbpBasedFrame) { 780 if (IsEbpBasedFrame) {
782 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None)) 781 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))
783 .count() == 0); 782 .count() == 0);
784 PreservedRegsSizeBytes += 4; 783 PreservedRegsSizeBytes += 4;
785 Variable *ebp = getPhysicalRegister(Reg_ebp); 784 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
786 Variable *esp = getPhysicalRegister(Reg_esp); 785 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
787 const bool SuppressStackAdjustment = true; 786 const bool SuppressStackAdjustment = true;
788 _push(ebp, SuppressStackAdjustment); 787 _push(ebp, SuppressStackAdjustment);
789 _mov(ebp, esp); 788 _mov(ebp, esp);
790 } 789 }
791 790
792 // Align the variables area. SpillAreaPaddingBytes is the size of 791 // Align the variables area. SpillAreaPaddingBytes is the size of
793 // the region after the preserved registers and before the spill 792 // the region after the preserved registers and before the spill
794 // areas. 793 // areas.
795 uint32_t SpillAreaPaddingBytes = 0; 794 uint32_t SpillAreaPaddingBytes = 0;
796 if (SpillAreaAlignmentBytes) { 795 if (SpillAreaAlignmentBytes) {
(...skipping 17 matching lines...) Expand all
814 813
815 // Align esp if necessary. 814 // Align esp if necessary.
816 if (NeedsStackAlignment) { 815 if (NeedsStackAlignment) {
817 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; 816 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;
818 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes); 817 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
819 SpillAreaSizeBytes = StackSize - StackOffset; 818 SpillAreaSizeBytes = StackSize - StackOffset;
820 } 819 }
821 820
822 // Generate "sub esp, SpillAreaSizeBytes" 821 // Generate "sub esp, SpillAreaSizeBytes"
823 if (SpillAreaSizeBytes) 822 if (SpillAreaSizeBytes)
824 _sub(getPhysicalRegister(Reg_esp), 823 _sub(getPhysicalRegister(RegX8632::Reg_esp),
825 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); 824 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
826 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes); 825 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
827 826
828 resetStackAdjustment(); 827 resetStackAdjustment();
829 828
830 // Fill in stack offsets for stack args, and copy args into registers 829 // Fill in stack offsets for stack args, and copy args into registers
831 // for those that were register-allocated. Args are pushed right to 830 // for those that were register-allocated. Args are pushed right to
832 // left, so Arg[0] is closest to the stack/frame pointer. 831 // left, so Arg[0] is closest to the stack/frame pointer.
833 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); 832 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
834 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES; 833 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;
(...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after
924 if (RI == E) 923 if (RI == E)
925 return; 924 return;
926 925
927 // Convert the reverse_iterator position into its corresponding 926 // Convert the reverse_iterator position into its corresponding
928 // (forward) iterator position. 927 // (forward) iterator position.
929 InstList::iterator InsertPoint = RI.base(); 928 InstList::iterator InsertPoint = RI.base();
930 --InsertPoint; 929 --InsertPoint;
931 Context.init(Node); 930 Context.init(Node);
932 Context.setInsertPoint(InsertPoint); 931 Context.setInsertPoint(InsertPoint);
933 932
934 Variable *esp = getPhysicalRegister(Reg_esp); 933 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
935 if (IsEbpBasedFrame) { 934 if (IsEbpBasedFrame) {
936 Variable *ebp = getPhysicalRegister(Reg_ebp); 935 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);
937 _mov(esp, ebp); 936 _mov(esp, ebp);
938 _pop(ebp); 937 _pop(ebp);
939 } else { 938 } else {
940 // add esp, SpillAreaSizeBytes 939 // add esp, SpillAreaSizeBytes
941 if (SpillAreaSizeBytes) 940 if (SpillAreaSizeBytes)
942 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes)); 941 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));
943 } 942 }
944 943
945 // Add pop instructions for preserved registers. 944 // Add pop instructions for preserved registers.
946 llvm::SmallBitVector CalleeSaves = 945 llvm::SmallBitVector CalleeSaves =
947 getRegisterSet(RegSet_CalleeSave, RegSet_None); 946 getRegisterSet(RegSet_CalleeSave, RegSet_None);
948 for (SizeT i = 0; i < CalleeSaves.size(); ++i) { 947 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
949 SizeT j = CalleeSaves.size() - i - 1; 948 SizeT j = CalleeSaves.size() - i - 1;
950 if (j == Reg_ebp && IsEbpBasedFrame) 949 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)
951 continue; 950 continue;
952 if (CalleeSaves[j] && RegsUsed[j]) { 951 if (CalleeSaves[j] && RegsUsed[j]) {
953 _pop(getPhysicalRegister(j)); 952 _pop(getPhysicalRegister(j));
954 } 953 }
955 } 954 }
956 } 955 }
957 956
958 template <typename T> struct PoolTypeConverter {}; 957 template <typename T> struct PoolTypeConverter {};
959 958
960 template <> struct PoolTypeConverter<float> { 959 template <> struct PoolTypeConverter<float> {
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after
1097 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset, 1096 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,
1098 Mem->getIndex(), Mem->getShift(), 1097 Mem->getIndex(), Mem->getShift(),
1099 Mem->getSegmentRegister()); 1098 Mem->getSegmentRegister());
1100 } 1099 }
1101 llvm_unreachable("Unsupported operand type"); 1100 llvm_unreachable("Unsupported operand type");
1102 return NULL; 1101 return NULL;
1103 } 1102 }
1104 1103
1105 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include, 1104 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,
1106 RegSetMask Exclude) const { 1105 RegSetMask Exclude) const {
1107 llvm::SmallBitVector Registers(Reg_NUM); 1106 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);
1108 1107
1109 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \ 1108 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \
1110 frameptr, isI8, isInt, isFP) \ 1109 frameptr, isI8, isInt, isFP) \
1111 if (scratch && (Include & RegSet_CallerSave)) \ 1110 if (scratch && (Include & RegSet_CallerSave)) \
1112 Registers[val] = true; \ 1111 Registers[RegX8632::val] = true; \
1113 if (preserved && (Include & RegSet_CalleeSave)) \ 1112 if (preserved && (Include & RegSet_CalleeSave)) \
1114 Registers[val] = true; \ 1113 Registers[RegX8632::val] = true; \
1115 if (stackptr && (Include & RegSet_StackPointer)) \ 1114 if (stackptr && (Include & RegSet_StackPointer)) \
1116 Registers[val] = true; \ 1115 Registers[RegX8632::val] = true; \
1117 if (frameptr && (Include & RegSet_FramePointer)) \ 1116 if (frameptr && (Include & RegSet_FramePointer)) \
1118 Registers[val] = true; \ 1117 Registers[RegX8632::val] = true; \
1119 if (scratch && (Exclude & RegSet_CallerSave)) \ 1118 if (scratch && (Exclude & RegSet_CallerSave)) \
1120 Registers[val] = false; \ 1119 Registers[RegX8632::val] = false; \
1121 if (preserved && (Exclude & RegSet_CalleeSave)) \ 1120 if (preserved && (Exclude & RegSet_CalleeSave)) \
1122 Registers[val] = false; \ 1121 Registers[RegX8632::val] = false; \
1123 if (stackptr && (Exclude & RegSet_StackPointer)) \ 1122 if (stackptr && (Exclude & RegSet_StackPointer)) \
1124 Registers[val] = false; \ 1123 Registers[RegX8632::val] = false; \
1125 if (frameptr && (Exclude & RegSet_FramePointer)) \ 1124 if (frameptr && (Exclude & RegSet_FramePointer)) \
1126 Registers[val] = false; 1125 Registers[RegX8632::val] = false;
1127 1126
1128 REGX8632_TABLE 1127 REGX8632_TABLE
1129 1128
1130 #undef X 1129 #undef X
1131 1130
1132 return Registers; 1131 return Registers;
1133 } 1132 }
1134 1133
1135 void TargetX8632::lowerAlloca(const InstAlloca *Inst) { 1134 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {
1136 IsEbpBasedFrame = true; 1135 IsEbpBasedFrame = true;
1137 // Conservatively require the stack to be aligned. Some stack 1136 // Conservatively require the stack to be aligned. Some stack
1138 // adjustment operations implemented below assume that the stack is 1137 // adjustment operations implemented below assume that the stack is
1139 // aligned before the alloca. All the alloca code ensures that the 1138 // aligned before the alloca. All the alloca code ensures that the
1140 // stack alignment is preserved after the alloca. The stack alignment 1139 // stack alignment is preserved after the alloca. The stack alignment
1141 // restriction can be relaxed in some cases. 1140 // restriction can be relaxed in some cases.
1142 NeedsStackAlignment = true; 1141 NeedsStackAlignment = true;
1143 1142
1144 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc. 1143 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.
1145 Variable *esp = getPhysicalRegister(Reg_esp); 1144 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);
1146 Operand *TotalSize = legalize(Inst->getSizeInBytes()); 1145 Operand *TotalSize = legalize(Inst->getSizeInBytes());
1147 Variable *Dest = Inst->getDest(); 1146 Variable *Dest = Inst->getDest();
1148 uint32_t AlignmentParam = Inst->getAlignInBytes(); 1147 uint32_t AlignmentParam = Inst->getAlignInBytes();
1149 // For default align=0, set it to the real value 1, to avoid any 1148 // For default align=0, set it to the real value 1, to avoid any
1150 // bit-manipulation problems below. 1149 // bit-manipulation problems below.
1151 AlignmentParam = std::max(AlignmentParam, 1u); 1150 AlignmentParam = std::max(AlignmentParam, 1u);
1152 1151
1153 // LLVM enforces power of 2 alignment. 1152 // LLVM enforces power of 2 alignment.
1154 assert((AlignmentParam & (AlignmentParam - 1)) == 0); 1153 assert((AlignmentParam & (AlignmentParam - 1)) == 0);
1155 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); 1154 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
1226 case InstArithmetic::Sub: 1225 case InstArithmetic::Sub:
1227 _mov(T_Lo, Src0Lo); 1226 _mov(T_Lo, Src0Lo);
1228 _sub(T_Lo, Src1Lo); 1227 _sub(T_Lo, Src1Lo);
1229 _mov(DestLo, T_Lo); 1228 _mov(DestLo, T_Lo);
1230 _mov(T_Hi, Src0Hi); 1229 _mov(T_Hi, Src0Hi);
1231 _sbb(T_Hi, Src1Hi); 1230 _sbb(T_Hi, Src1Hi);
1232 _mov(DestHi, T_Hi); 1231 _mov(DestHi, T_Hi);
1233 break; 1232 break;
1234 case InstArithmetic::Mul: { 1233 case InstArithmetic::Mul: {
1235 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1234 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1236 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax); 1235 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);
1237 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx); 1236 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);
1238 // gcc does the following: 1237 // gcc does the following:
1239 // a=b*c ==> 1238 // a=b*c ==>
1240 // t1 = b.hi; t1 *=(imul) c.lo 1239 // t1 = b.hi; t1 *=(imul) c.lo
1241 // t2 = c.hi; t2 *=(imul) b.lo 1240 // t2 = c.hi; t2 *=(imul) b.lo
1242 // t3:eax = b.lo 1241 // t3:eax = b.lo
1243 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo 1242 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo
1244 // a.lo = t4.lo 1243 // a.lo = t4.lo
1245 // t4.hi += t1 1244 // t4.hi += t1
1246 // t4.hi += t2 1245 // t4.hi += t2
1247 // a.hi = t4.hi 1246 // a.hi = t4.hi
1248 // The mul instruction cannot take an immediate operand. 1247 // The mul instruction cannot take an immediate operand.
1249 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem); 1248 Src1Lo = legalize(Src1Lo, Legal_Reg | Legal_Mem);
1250 _mov(T_1, Src0Hi); 1249 _mov(T_1, Src0Hi);
1251 _imul(T_1, Src1Lo); 1250 _imul(T_1, Src1Lo);
1252 _mov(T_2, Src1Hi); 1251 _mov(T_2, Src1Hi);
1253 _imul(T_2, Src0Lo); 1252 _imul(T_2, Src0Lo);
1254 _mov(T_3, Src0Lo, Reg_eax); 1253 _mov(T_3, Src0Lo, RegX8632::Reg_eax);
1255 _mul(T_4Lo, T_3, Src1Lo); 1254 _mul(T_4Lo, T_3, Src1Lo);
1256 // The mul instruction produces two dest variables, edx:eax. We 1255 // The mul instruction produces two dest variables, edx:eax. We
1257 // create a fake definition of edx to account for this. 1256 // create a fake definition of edx to account for this.
1258 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo)); 1257 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));
1259 _mov(DestLo, T_4Lo); 1258 _mov(DestLo, T_4Lo);
1260 _add(T_4Hi, T_1); 1259 _add(T_4Hi, T_1);
1261 _add(T_4Hi, T_2); 1260 _add(T_4Hi, T_2);
1262 _mov(DestHi, T_4Hi); 1261 _mov(DestHi, T_4Hi);
1263 } break; 1262 } break;
1264 case InstArithmetic::Shl: { 1263 case InstArithmetic::Shl: {
(...skipping 10 matching lines...) Expand all
1275 // use(t3) 1274 // use(t3)
1276 // t3 = t2 1275 // t3 = t2
1277 // t2 = 0 1276 // t2 = 0
1278 // L1: 1277 // L1:
1279 // a.lo = t2 1278 // a.lo = t2
1280 // a.hi = t3 1279 // a.hi = t3
1281 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1280 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1282 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1281 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1283 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1282 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1284 InstX8632Label *Label = InstX8632Label::create(Func, this); 1283 InstX8632Label *Label = InstX8632Label::create(Func, this);
1285 _mov(T_1, Src1Lo, Reg_ecx); 1284 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1286 _mov(T_2, Src0Lo); 1285 _mov(T_2, Src0Lo);
1287 _mov(T_3, Src0Hi); 1286 _mov(T_3, Src0Hi);
1288 _shld(T_3, T_2, T_1); 1287 _shld(T_3, T_2, T_1);
1289 _shl(T_2, T_1); 1288 _shl(T_2, T_1);
1290 _test(T_1, BitTest); 1289 _test(T_1, BitTest);
1291 _br(InstX8632Br::Br_e, Label); 1290 _br(CondX86::Br_e, Label);
1292 // Because of the intra-block control flow, we need to fake a use 1291 // Because of the intra-block control flow, we need to fake a use
1293 // of T_3 to prevent its earlier definition from being dead-code 1292 // of T_3 to prevent its earlier definition from being dead-code
1294 // eliminated in the presence of its later definition. 1293 // eliminated in the presence of its later definition.
1295 Context.insert(InstFakeUse::create(Func, T_3)); 1294 Context.insert(InstFakeUse::create(Func, T_3));
1296 _mov(T_3, T_2); 1295 _mov(T_3, T_2);
1297 _mov(T_2, Zero); 1296 _mov(T_2, Zero);
1298 Context.insert(Label); 1297 Context.insert(Label);
1299 _mov(DestLo, T_2); 1298 _mov(DestLo, T_2);
1300 _mov(DestHi, T_3); 1299 _mov(DestHi, T_3);
1301 } break; 1300 } break;
1302 case InstArithmetic::Lshr: { 1301 case InstArithmetic::Lshr: {
1303 // a=b>>c (unsigned) ==> 1302 // a=b>>c (unsigned) ==>
1304 // t1:ecx = c.lo & 0xff 1303 // t1:ecx = c.lo & 0xff
1305 // t2 = b.lo 1304 // t2 = b.lo
1306 // t3 = b.hi 1305 // t3 = b.hi
1307 // t2 = shrd t2, t3, t1 1306 // t2 = shrd t2, t3, t1
1308 // t3 = shr t3, t1 1307 // t3 = shr t3, t1
1309 // test t1, 0x20 1308 // test t1, 0x20
1310 // je L1 1309 // je L1
1311 // use(t2) 1310 // use(t2)
1312 // t2 = t3 1311 // t2 = t3
1313 // t3 = 0 1312 // t3 = 0
1314 // L1: 1313 // L1:
1315 // a.lo = t2 1314 // a.lo = t2
1316 // a.hi = t3 1315 // a.hi = t3
1317 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1316 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1318 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1317 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1319 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1318 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1320 InstX8632Label *Label = InstX8632Label::create(Func, this); 1319 InstX8632Label *Label = InstX8632Label::create(Func, this);
1321 _mov(T_1, Src1Lo, Reg_ecx); 1320 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1322 _mov(T_2, Src0Lo); 1321 _mov(T_2, Src0Lo);
1323 _mov(T_3, Src0Hi); 1322 _mov(T_3, Src0Hi);
1324 _shrd(T_2, T_3, T_1); 1323 _shrd(T_2, T_3, T_1);
1325 _shr(T_3, T_1); 1324 _shr(T_3, T_1);
1326 _test(T_1, BitTest); 1325 _test(T_1, BitTest);
1327 _br(InstX8632Br::Br_e, Label); 1326 _br(CondX86::Br_e, Label);
1328 // Because of the intra-block control flow, we need to fake a use 1327 // Because of the intra-block control flow, we need to fake a use
1329 // of T_3 to prevent its earlier definition from being dead-code 1328 // of T_3 to prevent its earlier definition from being dead-code
1330 // eliminated in the presence of its later definition. 1329 // eliminated in the presence of its later definition.
1331 Context.insert(InstFakeUse::create(Func, T_2)); 1330 Context.insert(InstFakeUse::create(Func, T_2));
1332 _mov(T_2, T_3); 1331 _mov(T_2, T_3);
1333 _mov(T_3, Zero); 1332 _mov(T_3, Zero);
1334 Context.insert(Label); 1333 Context.insert(Label);
1335 _mov(DestLo, T_2); 1334 _mov(DestLo, T_2);
1336 _mov(DestHi, T_3); 1335 _mov(DestHi, T_3);
1337 } break; 1336 } break;
1338 case InstArithmetic::Ashr: { 1337 case InstArithmetic::Ashr: {
1339 // a=b>>c (signed) ==> 1338 // a=b>>c (signed) ==>
1340 // t1:ecx = c.lo & 0xff 1339 // t1:ecx = c.lo & 0xff
1341 // t2 = b.lo 1340 // t2 = b.lo
1342 // t3 = b.hi 1341 // t3 = b.hi
1343 // t2 = shrd t2, t3, t1 1342 // t2 = shrd t2, t3, t1
1344 // t3 = sar t3, t1 1343 // t3 = sar t3, t1
1345 // test t1, 0x20 1344 // test t1, 0x20
1346 // je L1 1345 // je L1
1347 // use(t2) 1346 // use(t2)
1348 // t2 = t3 1347 // t2 = t3
1349 // t3 = sar t3, 0x1f 1348 // t3 = sar t3, 0x1f
1350 // L1: 1349 // L1:
1351 // a.lo = t2 1350 // a.lo = t2
1352 // a.hi = t3 1351 // a.hi = t3
1353 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL; 1352 Variable *T_1 = NULL, *T_2 = NULL, *T_3 = NULL;
1354 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20); 1353 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);
1355 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f); 1354 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);
1356 InstX8632Label *Label = InstX8632Label::create(Func, this); 1355 InstX8632Label *Label = InstX8632Label::create(Func, this);
1357 _mov(T_1, Src1Lo, Reg_ecx); 1356 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);
1358 _mov(T_2, Src0Lo); 1357 _mov(T_2, Src0Lo);
1359 _mov(T_3, Src0Hi); 1358 _mov(T_3, Src0Hi);
1360 _shrd(T_2, T_3, T_1); 1359 _shrd(T_2, T_3, T_1);
1361 _sar(T_3, T_1); 1360 _sar(T_3, T_1);
1362 _test(T_1, BitTest); 1361 _test(T_1, BitTest);
1363 _br(InstX8632Br::Br_e, Label); 1362 _br(CondX86::Br_e, Label);
1364 // Because of the intra-block control flow, we need to fake a use 1363 // Because of the intra-block control flow, we need to fake a use
1365 // of T_3 to prevent its earlier definition from being dead-code 1364 // of T_3 to prevent its earlier definition from being dead-code
1366 // eliminated in the presence of its later definition. 1365 // eliminated in the presence of its later definition.
1367 Context.insert(InstFakeUse::create(Func, T_2)); 1366 Context.insert(InstFakeUse::create(Func, T_2));
1368 _mov(T_2, T_3); 1367 _mov(T_2, T_3);
1369 _sar(T_3, SignExtend); 1368 _sar(T_3, SignExtend);
1370 Context.insert(Label); 1369 Context.insert(Label);
1371 _mov(DestLo, T_2); 1370 _mov(DestLo, T_2);
1372 _mov(DestHi, T_3); 1371 _mov(DestHi, T_3);
1373 } break; 1372 } break;
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
1568 break; 1567 break;
1569 case InstArithmetic::Mul: 1568 case InstArithmetic::Mul:
1570 // TODO: Optimize for llvm::isa<Constant>(Src1) 1569 // TODO: Optimize for llvm::isa<Constant>(Src1)
1571 // TODO: Strength-reduce multiplications by a constant, 1570 // TODO: Strength-reduce multiplications by a constant,
1572 // particularly -1 and powers of 2. Advanced: use lea to 1571 // particularly -1 and powers of 2. Advanced: use lea to
1573 // multiply by 3, 5, 9. 1572 // multiply by 3, 5, 9.
1574 // 1573 //
1575 // The 8-bit version of imul only allows the form "imul r/m8" 1574 // The 8-bit version of imul only allows the form "imul r/m8"
1576 // where T must be in eax. 1575 // where T must be in eax.
1577 if (Dest->getType() == IceType_i8) 1576 if (Dest->getType() == IceType_i8)
1578 _mov(T, Src0, Reg_eax); 1577 _mov(T, Src0, RegX8632::Reg_eax);
1579 else 1578 else
1580 _mov(T, Src0); 1579 _mov(T, Src0);
1581 _imul(T, Src1); 1580 _imul(T, Src1);
1582 _mov(Dest, T); 1581 _mov(Dest, T);
1583 break; 1582 break;
1584 case InstArithmetic::Shl: 1583 case InstArithmetic::Shl:
1585 _mov(T, Src0); 1584 _mov(T, Src0);
1586 if (!llvm::isa<Constant>(Src1)) 1585 if (!llvm::isa<Constant>(Src1))
1587 Src1 = legalizeToVar(Src1, false, Reg_ecx); 1586 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);
1588 _shl(T, Src1); 1587 _shl(T, Src1);
1589 _mov(Dest, T); 1588 _mov(Dest, T);
1590 break; 1589 break;
1591 case InstArithmetic::Lshr: 1590 case InstArithmetic::Lshr:
1592 _mov(T, Src0); 1591 _mov(T, Src0);
1593 if (!llvm::isa<Constant>(Src1)) 1592 if (!llvm::isa<Constant>(Src1))
1594 Src1 = legalizeToVar(Src1, false, Reg_ecx); 1593 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);
1595 _shr(T, Src1); 1594 _shr(T, Src1);
1596 _mov(Dest, T); 1595 _mov(Dest, T);
1597 break; 1596 break;
1598 case InstArithmetic::Ashr: 1597 case InstArithmetic::Ashr:
1599 _mov(T, Src0); 1598 _mov(T, Src0);
1600 if (!llvm::isa<Constant>(Src1)) 1599 if (!llvm::isa<Constant>(Src1))
1601 Src1 = legalizeToVar(Src1, false, Reg_ecx); 1600 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);
1602 _sar(T, Src1); 1601 _sar(T, Src1);
1603 _mov(Dest, T); 1602 _mov(Dest, T);
1604 break; 1603 break;
1605 case InstArithmetic::Udiv: 1604 case InstArithmetic::Udiv:
1606 // div and idiv are the few arithmetic operators that do not allow 1605 // div and idiv are the few arithmetic operators that do not allow
1607 // immediates as the operand. 1606 // immediates as the operand.
1608 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1607 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1609 if (Dest->getType() == IceType_i8) { 1608 if (Dest->getType() == IceType_i8) {
1610 Variable *T_ah = NULL; 1609 Variable *T_ah = NULL;
1611 Constant *Zero = Ctx->getConstantZero(IceType_i8); 1610 Constant *Zero = Ctx->getConstantZero(IceType_i8);
1612 _mov(T, Src0, Reg_eax); 1611 _mov(T, Src0, RegX8632::Reg_eax);
1613 _mov(T_ah, Zero, Reg_ah); 1612 _mov(T_ah, Zero, RegX8632::Reg_ah);
1614 _div(T, Src1, T_ah); 1613 _div(T, Src1, T_ah);
1615 _mov(Dest, T); 1614 _mov(Dest, T);
1616 } else { 1615 } else {
1617 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1616 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1618 _mov(T, Src0, Reg_eax); 1617 _mov(T, Src0, RegX8632::Reg_eax);
1619 _mov(T_edx, Zero, Reg_edx); 1618 _mov(T_edx, Zero, RegX8632::Reg_edx);
1620 _div(T, Src1, T_edx); 1619 _div(T, Src1, T_edx);
1621 _mov(Dest, T); 1620 _mov(Dest, T);
1622 } 1621 }
1623 break; 1622 break;
1624 case InstArithmetic::Sdiv: 1623 case InstArithmetic::Sdiv:
1625 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1624 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1626 if (Dest->getType() == IceType_i8) { 1625 if (Dest->getType() == IceType_i8) {
1627 _mov(T, Src0, Reg_eax); 1626 _mov(T, Src0, RegX8632::Reg_eax);
1628 _cbwdq(T, T); 1627 _cbwdq(T, T);
1629 _idiv(T, Src1, T); 1628 _idiv(T, Src1, T);
1630 _mov(Dest, T); 1629 _mov(Dest, T);
1631 } else { 1630 } else {
1632 T_edx = makeReg(IceType_i32, Reg_edx); 1631 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1633 _mov(T, Src0, Reg_eax); 1632 _mov(T, Src0, RegX8632::Reg_eax);
1634 _cbwdq(T_edx, T); 1633 _cbwdq(T_edx, T);
1635 _idiv(T, Src1, T_edx); 1634 _idiv(T, Src1, T_edx);
1636 _mov(Dest, T); 1635 _mov(Dest, T);
1637 } 1636 }
1638 break; 1637 break;
1639 case InstArithmetic::Urem: 1638 case InstArithmetic::Urem:
1640 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1639 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1641 if (Dest->getType() == IceType_i8) { 1640 if (Dest->getType() == IceType_i8) {
1642 Variable *T_ah = NULL; 1641 Variable *T_ah = NULL;
1643 Constant *Zero = Ctx->getConstantZero(IceType_i8); 1642 Constant *Zero = Ctx->getConstantZero(IceType_i8);
1644 _mov(T, Src0, Reg_eax); 1643 _mov(T, Src0, RegX8632::Reg_eax);
1645 _mov(T_ah, Zero, Reg_ah); 1644 _mov(T_ah, Zero, RegX8632::Reg_ah);
1646 _div(T_ah, Src1, T); 1645 _div(T_ah, Src1, T);
1647 _mov(Dest, T_ah); 1646 _mov(Dest, T_ah);
1648 } else { 1647 } else {
1649 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1648 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1650 _mov(T_edx, Zero, Reg_edx); 1649 _mov(T_edx, Zero, RegX8632::Reg_edx);
1651 _mov(T, Src0, Reg_eax); 1650 _mov(T, Src0, RegX8632::Reg_eax);
1652 _div(T_edx, Src1, T); 1651 _div(T_edx, Src1, T);
1653 _mov(Dest, T_edx); 1652 _mov(Dest, T_edx);
1654 } 1653 }
1655 break; 1654 break;
1656 case InstArithmetic::Srem: 1655 case InstArithmetic::Srem:
1657 Src1 = legalize(Src1, Legal_Reg | Legal_Mem); 1656 Src1 = legalize(Src1, Legal_Reg | Legal_Mem);
1658 if (Dest->getType() == IceType_i8) { 1657 if (Dest->getType() == IceType_i8) {
1659 Variable *T_ah = makeReg(IceType_i8, Reg_ah); 1658 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);
1660 _mov(T, Src0, Reg_eax); 1659 _mov(T, Src0, RegX8632::Reg_eax);
1661 _cbwdq(T, T); 1660 _cbwdq(T, T);
1662 Context.insert(InstFakeDef::create(Func, T_ah)); 1661 Context.insert(InstFakeDef::create(Func, T_ah));
1663 _idiv(T_ah, Src1, T); 1662 _idiv(T_ah, Src1, T);
1664 _mov(Dest, T_ah); 1663 _mov(Dest, T_ah);
1665 } else { 1664 } else {
1666 T_edx = makeReg(IceType_i32, Reg_edx); 1665 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
1667 _mov(T, Src0, Reg_eax); 1666 _mov(T, Src0, RegX8632::Reg_eax);
1668 _cbwdq(T_edx, T); 1667 _cbwdq(T_edx, T);
1669 _idiv(T_edx, Src1, T); 1668 _idiv(T_edx, Src1, T);
1670 _mov(Dest, T_edx); 1669 _mov(Dest, T_edx);
1671 } 1670 }
1672 break; 1671 break;
1673 case InstArithmetic::Fadd: 1672 case InstArithmetic::Fadd:
1674 _mov(T, Src0); 1673 _mov(T, Src0);
1675 _addss(T, Src1); 1674 _addss(T, Src1);
1676 _mov(Dest, T); 1675 _mov(Dest, T);
1677 break; 1676 break;
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
1729 } 1728 }
1730 } 1729 }
1731 1730
1732 void TargetX8632::lowerBr(const InstBr *Inst) { 1731 void TargetX8632::lowerBr(const InstBr *Inst) {
1733 if (Inst->isUnconditional()) { 1732 if (Inst->isUnconditional()) {
1734 _br(Inst->getTargetUnconditional()); 1733 _br(Inst->getTargetUnconditional());
1735 } else { 1734 } else {
1736 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem); 1735 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg | Legal_Mem);
1737 Constant *Zero = Ctx->getConstantZero(IceType_i32); 1736 Constant *Zero = Ctx->getConstantZero(IceType_i32);
1738 _cmp(Src0, Zero); 1737 _cmp(Src0, Zero);
1739 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse()); 1738 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());
1740 } 1739 }
1741 } 1740 }
1742 1741
1743 void TargetX8632::lowerCall(const InstCall *Instr) { 1742 void TargetX8632::lowerCall(const InstCall *Instr) {
1744 // x86-32 calling convention: 1743 // x86-32 calling convention:
1745 // 1744 //
1746 // * At the point before the call, the stack must be aligned to 16 1745 // * At the point before the call, the stack must be aligned to 16
1747 // bytes. 1746 // bytes.
1748 // 1747 //
1749 // * The first four arguments of vector type, regardless of their 1748 // * The first four arguments of vector type, regardless of their
(...skipping 25 matching lines...) Expand all
1775 // The PNaCl ABI requires the width of arguments to be at least 32 bits. 1774 // The PNaCl ABI requires the width of arguments to be at least 32 bits.
1776 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 || 1775 assert(Ty == IceType_i32 || Ty == IceType_f32 || Ty == IceType_i64 ||
1777 Ty == IceType_f64 || isVectorType(Ty)); 1776 Ty == IceType_f64 || isVectorType(Ty));
1778 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) { 1777 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {
1779 XmmArgs.push_back(Arg); 1778 XmmArgs.push_back(Arg);
1780 } else { 1779 } else {
1781 StackArgs.push_back(Arg); 1780 StackArgs.push_back(Arg);
1782 if (isVectorType(Arg->getType())) { 1781 if (isVectorType(Arg->getType())) {
1783 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes); 1782 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
1784 } 1783 }
1785 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 1784 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
1786 Constant *Loc = 1785 Constant *Loc =
1787 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes); 1786 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);
1788 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc)); 1787 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));
1789 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType()); 1788 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());
1790 } 1789 }
1791 } 1790 }
1792 1791
1793 // Adjust the parameter area so that the stack is aligned. It is 1792 // Adjust the parameter area so that the stack is aligned. It is
1794 // assumed that the stack is already aligned at the start of the 1793 // assumed that the stack is already aligned at the start of the
1795 // calling sequence. 1794 // calling sequence.
(...skipping 21 matching lines...) Expand all
1817 // Copy arguments to be passed in registers to the appropriate 1816 // Copy arguments to be passed in registers to the appropriate
1818 // registers. 1817 // registers.
1819 // TODO: Investigate the impact of lowering arguments passed in 1818 // TODO: Investigate the impact of lowering arguments passed in
1820 // registers after lowering stack arguments as opposed to the other 1819 // registers after lowering stack arguments as opposed to the other
1821 // way around. Lowering register arguments after stack arguments may 1820 // way around. Lowering register arguments after stack arguments may
1822 // reduce register pressure. On the other hand, lowering register 1821 // reduce register pressure. On the other hand, lowering register
1823 // arguments first (before stack arguments) may result in more compact 1822 // arguments first (before stack arguments) may result in more compact
1824 // code, as the memory operand displacements may end up being smaller 1823 // code, as the memory operand displacements may end up being smaller
1825 // before any stack adjustment is done. 1824 // before any stack adjustment is done.
1826 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { 1825 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {
1827 Variable *Reg = legalizeToVar(XmmArgs[i], false, Reg_xmm0 + i); 1826 Variable *Reg = legalizeToVar(XmmArgs[i], false, RegX8632::Reg_xmm0 + i);
1828 // Generate a FakeUse of register arguments so that they do not get 1827 // Generate a FakeUse of register arguments so that they do not get
1829 // dead code eliminated as a result of the FakeKill of scratch 1828 // dead code eliminated as a result of the FakeKill of scratch
1830 // registers after the call. 1829 // registers after the call.
1831 Context.insert(InstFakeUse::create(Func, Reg)); 1830 Context.insert(InstFakeUse::create(Func, Reg));
1832 } 1831 }
1833 // Generate the call instruction. Assign its result to a temporary 1832 // Generate the call instruction. Assign its result to a temporary
1834 // with high register allocation weight. 1833 // with high register allocation weight.
1835 Variable *Dest = Instr->getDest(); 1834 Variable *Dest = Instr->getDest();
1836 // ReturnReg doubles as ReturnRegLo as necessary. 1835 // ReturnReg doubles as ReturnRegLo as necessary.
1837 Variable *ReturnReg = NULL; 1836 Variable *ReturnReg = NULL;
1838 Variable *ReturnRegHi = NULL; 1837 Variable *ReturnRegHi = NULL;
1839 if (Dest) { 1838 if (Dest) {
1840 switch (Dest->getType()) { 1839 switch (Dest->getType()) {
1841 case IceType_NUM: 1840 case IceType_NUM:
1842 llvm_unreachable("Invalid Call dest type"); 1841 llvm_unreachable("Invalid Call dest type");
1843 break; 1842 break;
1844 case IceType_void: 1843 case IceType_void:
1845 break; 1844 break;
1846 case IceType_i1: 1845 case IceType_i1:
1847 case IceType_i8: 1846 case IceType_i8:
1848 case IceType_i16: 1847 case IceType_i16:
1849 case IceType_i32: 1848 case IceType_i32:
1850 ReturnReg = makeReg(Dest->getType(), Reg_eax); 1849 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);
1851 break; 1850 break;
1852 case IceType_i64: 1851 case IceType_i64:
1853 ReturnReg = makeReg(IceType_i32, Reg_eax); 1852 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);
1854 ReturnRegHi = makeReg(IceType_i32, Reg_edx); 1853 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);
1855 break; 1854 break;
1856 case IceType_f32: 1855 case IceType_f32:
1857 case IceType_f64: 1856 case IceType_f64:
1858 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with 1857 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with
1859 // the fstp instruction. 1858 // the fstp instruction.
1860 break; 1859 break;
1861 case IceType_v4i1: 1860 case IceType_v4i1:
1862 case IceType_v8i1: 1861 case IceType_v8i1:
1863 case IceType_v16i1: 1862 case IceType_v16i1:
1864 case IceType_v16i8: 1863 case IceType_v16i8:
1865 case IceType_v8i16: 1864 case IceType_v8i16:
1866 case IceType_v4i32: 1865 case IceType_v4i32:
1867 case IceType_v4f32: 1866 case IceType_v4f32:
1868 ReturnReg = makeReg(Dest->getType(), Reg_xmm0); 1867 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);
1869 break; 1868 break;
1870 } 1869 }
1871 } 1870 }
1872 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once 1871 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once
1873 // a proper emitter is used. 1872 // a proper emitter is used.
1874 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All); 1873 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);
1875 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget); 1874 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);
1876 Context.insert(NewCall); 1875 Context.insert(NewCall);
1877 if (ReturnRegHi) 1876 if (ReturnRegHi)
1878 Context.insert(InstFakeDef::create(Func, ReturnRegHi)); 1877 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
1879 1878
1880 // Add the appropriate offset to esp. The call instruction takes care 1879 // Add the appropriate offset to esp. The call instruction takes care
1881 // of resetting the stack offset during emission. 1880 // of resetting the stack offset during emission.
1882 if (ParameterAreaSizeBytes) { 1881 if (ParameterAreaSizeBytes) {
1883 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 1882 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
1884 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes)); 1883 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));
1885 } 1884 }
1886 1885
1887 // Insert a register-kill pseudo instruction. 1886 // Insert a register-kill pseudo instruction.
1888 VarList KilledRegs; 1887 VarList KilledRegs;
1889 for (SizeT i = 0; i < ScratchRegs.size(); ++i) { 1888 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {
1890 if (ScratchRegs[i]) 1889 if (ScratchRegs[i])
1891 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i)); 1890 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));
1892 } 1891 }
1893 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall)); 1892 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));
(...skipping 579 matching lines...) Expand 10 before | Expand all | Expand 10 after
2473 // makeVectorOfOnes() requires an integer vector type. 2472 // makeVectorOfOnes() requires an integer vector type.
2474 T = makeVectorOfMinusOnes(IceType_v4i32); 2473 T = makeVectorOfMinusOnes(IceType_v4i32);
2475 } else if (Condition == InstFcmp::False) { 2474 } else if (Condition == InstFcmp::False) {
2476 T = makeVectorOfZeros(Dest->getType()); 2475 T = makeVectorOfZeros(Dest->getType());
2477 } else { 2476 } else {
2478 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); 2477 Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);
2479 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2478 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2480 2479
2481 switch (Condition) { 2480 switch (Condition) {
2482 default: { 2481 default: {
2483 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; 2482 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;
2484 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); 2483 assert(Predicate != CondX86::Cmpps_Invalid);
2485 T = makeReg(Src0RM->getType()); 2484 T = makeReg(Src0RM->getType());
2486 _movp(T, Src0RM); 2485 _movp(T, Src0RM);
2487 _cmpps(T, Src1RM, Predicate); 2486 _cmpps(T, Src1RM, Predicate);
2488 } break; 2487 } break;
2489 case InstFcmp::One: { 2488 case InstFcmp::One: {
2490 // Check both unequal and ordered. 2489 // Check both unequal and ordered.
2491 T = makeReg(Src0RM->getType()); 2490 T = makeReg(Src0RM->getType());
2492 Variable *T2 = makeReg(Src0RM->getType()); 2491 Variable *T2 = makeReg(Src0RM->getType());
2493 _movp(T, Src0RM); 2492 _movp(T, Src0RM);
2494 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); 2493 _cmpps(T, Src1RM, CondX86::Cmpps_neq);
2495 _movp(T2, Src0RM); 2494 _movp(T2, Src0RM);
2496 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord); 2495 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);
2497 _pand(T, T2); 2496 _pand(T, T2);
2498 } break; 2497 } break;
2499 case InstFcmp::Ueq: { 2498 case InstFcmp::Ueq: {
2500 // Check both equal or unordered. 2499 // Check both equal or unordered.
2501 T = makeReg(Src0RM->getType()); 2500 T = makeReg(Src0RM->getType());
2502 Variable *T2 = makeReg(Src0RM->getType()); 2501 Variable *T2 = makeReg(Src0RM->getType());
2503 _movp(T, Src0RM); 2502 _movp(T, Src0RM);
2504 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); 2503 _cmpps(T, Src1RM, CondX86::Cmpps_eq);
2505 _movp(T2, Src0RM); 2504 _movp(T2, Src0RM);
2506 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord); 2505 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);
2507 _por(T, T2); 2506 _por(T, T2);
2508 } break; 2507 } break;
2509 } 2508 }
2510 } 2509 }
2511 2510
2512 _movp(Dest, T); 2511 _movp(Dest, T);
2513 eliminateNextVectorSextInstruction(Dest); 2512 eliminateNextVectorSextInstruction(Dest);
2514 return; 2513 return;
2515 } 2514 }
2516 2515
2517 // Lowering a = fcmp cond, b, c 2516 // Lowering a = fcmp cond, b, c
2518 // ucomiss b, c /* only if C1 != Br_None */ 2517 // ucomiss b, c /* only if C1 != Br_None */
2519 // /* but swap b,c order if SwapOperands==true */ 2518 // /* but swap b,c order if SwapOperands==true */
2520 // mov a, <default> 2519 // mov a, <default>
2521 // j<C1> label /* only if C1 != Br_None */ 2520 // j<C1> label /* only if C1 != Br_None */
2522 // j<C2> label /* only if C2 != Br_None */ 2521 // j<C2> label /* only if C2 != Br_None */
2523 // FakeUse(a) /* only if C1 != Br_None */ 2522 // FakeUse(a) /* only if C1 != Br_None */
2524 // mov a, !<default> /* only if C1 != Br_None */ 2523 // mov a, !<default> /* only if C1 != Br_None */
2525 // label: /* only if C1 != Br_None */ 2524 // label: /* only if C1 != Br_None */
2526 InstFcmp::FCond Condition = Inst->getCondition(); 2525 InstFcmp::FCond Condition = Inst->getCondition();
2527 size_t Index = static_cast<size_t>(Condition); 2526 size_t Index = static_cast<size_t>(Condition);
2528 assert(Index < TableFcmpSize); 2527 assert(Index < TableFcmpSize);
2529 if (TableFcmp[Index].SwapScalarOperands) { 2528 if (TableFcmp[Index].SwapScalarOperands) {
2530 Operand *Tmp = Src0; 2529 Operand *Tmp = Src0;
2531 Src0 = Src1; 2530 Src0 = Src1;
2532 Src1 = Tmp; 2531 Src1 = Tmp;
2533 } 2532 }
2534 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None); 2533 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);
2535 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None); 2534 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);
2536 if (HasC1) { 2535 if (HasC1) {
2537 Src0 = legalize(Src0); 2536 Src0 = legalize(Src0);
2538 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); 2537 Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);
2539 Variable *T = NULL; 2538 Variable *T = NULL;
2540 _mov(T, Src0); 2539 _mov(T, Src0);
2541 _ucomiss(T, Src1RM); 2540 _ucomiss(T, Src1RM);
2542 } 2541 }
2543 Constant *Default = 2542 Constant *Default =
2544 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default); 2543 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);
2545 _mov(Dest, Default); 2544 _mov(Dest, Default);
(...skipping 152 matching lines...) Expand 10 before | Expand all | Expand 10 after
2698 size_t Index = static_cast<size_t>(Condition); 2697 size_t Index = static_cast<size_t>(Condition);
2699 assert(Index < TableIcmp64Size); 2698 assert(Index < TableIcmp64Size);
2700 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem); 2699 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg | Legal_Mem);
2701 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem); 2700 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg | Legal_Mem);
2702 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm); 2701 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg | Legal_Imm);
2703 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm); 2702 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg | Legal_Imm);
2704 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) { 2703 if (Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) {
2705 InstX8632Label *Label = InstX8632Label::create(Func, this); 2704 InstX8632Label *Label = InstX8632Label::create(Func, this);
2706 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One)); 2705 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));
2707 _cmp(Src0LoRM, Src1LoRI); 2706 _cmp(Src0LoRM, Src1LoRI);
2708 _br(InstX8632Br::Br_ne, Label); 2707 _br(CondX86::Br_ne, Label);
2709 _cmp(Src0HiRM, Src1HiRI); 2708 _cmp(Src0HiRM, Src1HiRI);
2710 _br(InstX8632Br::Br_ne, Label); 2709 _br(CondX86::Br_ne, Label);
2711 Context.insert(InstFakeUse::create(Func, Dest)); 2710 Context.insert(InstFakeUse::create(Func, Dest));
2712 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero)); 2711 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));
2713 Context.insert(Label); 2712 Context.insert(Label);
2714 } else { 2713 } else {
2715 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this); 2714 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);
2716 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this); 2715 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);
2717 _mov(Dest, One); 2716 _mov(Dest, One);
2718 _cmp(Src0HiRM, Src1HiRI); 2717 _cmp(Src0HiRM, Src1HiRI);
2719 _br(TableIcmp64[Index].C1, LabelTrue); 2718 _br(TableIcmp64[Index].C1, LabelTrue);
2720 _br(TableIcmp64[Index].C2, LabelFalse); 2719 _br(TableIcmp64[Index].C2, LabelFalse);
(...skipping 432 matching lines...) Expand 10 before | Expand all | Expand 10 after
3153 } 3152 }
3154 case Intrinsics::Sqrt: { 3153 case Intrinsics::Sqrt: {
3155 Operand *Src = legalize(Instr->getArg(0)); 3154 Operand *Src = legalize(Instr->getArg(0));
3156 Variable *Dest = Instr->getDest(); 3155 Variable *Dest = Instr->getDest();
3157 Variable *T = makeReg(Dest->getType()); 3156 Variable *T = makeReg(Dest->getType());
3158 _sqrtss(T, Src); 3157 _sqrtss(T, Src);
3159 _mov(Dest, T); 3158 _mov(Dest, T);
3160 return; 3159 return;
3161 } 3160 }
3162 case Intrinsics::Stacksave: { 3161 case Intrinsics::Stacksave: {
3163 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 3162 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
3164 Variable *Dest = Instr->getDest(); 3163 Variable *Dest = Instr->getDest();
3165 _mov(Dest, esp); 3164 _mov(Dest, esp);
3166 return; 3165 return;
3167 } 3166 }
3168 case Intrinsics::Stackrestore: { 3167 case Intrinsics::Stackrestore: {
3169 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 3168 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
3170 _mov(esp, Instr->getArg(0)); 3169 _mov(esp, Instr->getArg(0));
3171 return; 3170 return;
3172 } 3171 }
3173 case Intrinsics::Trap: 3172 case Intrinsics::Trap:
3174 _ud2(); 3173 _ud2();
3175 return; 3174 return;
3176 case Intrinsics::UnknownIntrinsic: 3175 case Intrinsics::UnknownIntrinsic:
3177 Func->setError("Should not be lowering UnknownIntrinsic"); 3176 Func->setError("Should not be lowering UnknownIntrinsic");
3178 return; 3177 return;
3179 } 3178 }
3180 return; 3179 return;
3181 } 3180 }
3182 3181
3183 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr, 3182 void TargetX8632::lowerAtomicCmpxchg(Variable *DestPrev, Operand *Ptr,
3184 Operand *Expected, Operand *Desired) { 3183 Operand *Expected, Operand *Desired) {
3185 if (Expected->getType() == IceType_i64) { 3184 if (Expected->getType() == IceType_i64) {
3186 // Reserve the pre-colored registers first, before adding any more 3185 // Reserve the pre-colored registers first, before adding any more
3187 // infinite-weight variables from FormMemoryOperand's legalization. 3186 // infinite-weight variables from FormMemoryOperand's legalization.
3188 Variable *T_edx = makeReg(IceType_i32, Reg_edx); 3187 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3189 Variable *T_eax = makeReg(IceType_i32, Reg_eax); 3188 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3190 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); 3189 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3191 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); 3190 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
3192 _mov(T_eax, loOperand(Expected)); 3191 _mov(T_eax, loOperand(Expected));
3193 _mov(T_edx, hiOperand(Expected)); 3192 _mov(T_edx, hiOperand(Expected));
3194 _mov(T_ebx, loOperand(Desired)); 3193 _mov(T_ebx, loOperand(Desired));
3195 _mov(T_ecx, hiOperand(Desired)); 3194 _mov(T_ecx, hiOperand(Desired));
3196 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); 3195 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3197 const bool Locked = true; 3196 const bool Locked = true;
3198 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3197 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3199 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev)); 3198 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));
3200 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev)); 3199 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));
3201 _mov(DestLo, T_eax); 3200 _mov(DestLo, T_eax);
3202 _mov(DestHi, T_edx); 3201 _mov(DestHi, T_edx);
3203 return; 3202 return;
3204 } 3203 }
3205 Variable *T_eax = makeReg(Expected->getType(), Reg_eax); 3204 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);
3206 _mov(T_eax, Expected); 3205 _mov(T_eax, Expected);
3207 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType()); 3206 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());
3208 Variable *DesiredReg = legalizeToVar(Desired); 3207 Variable *DesiredReg = legalizeToVar(Desired);
3209 const bool Locked = true; 3208 const bool Locked = true;
3210 _cmpxchg(Addr, T_eax, DesiredReg, Locked); 3209 _cmpxchg(Addr, T_eax, DesiredReg, Locked);
3211 _mov(DestPrev, T_eax); 3210 _mov(DestPrev, T_eax);
3212 } 3211 }
3213 3212
3214 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem, 3213 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable *Dest, Operand *PtrToMem,
3215 Operand *Expected, 3214 Operand *Expected,
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
3264 NextBr->isLastUse(NextCmp->getDest())) { 3263 NextBr->isLastUse(NextCmp->getDest())) {
3265 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired); 3264 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);
3266 for (size_t i = 0; i < PhiAssigns.size(); ++i) { 3265 for (size_t i = 0; i < PhiAssigns.size(); ++i) {
3267 // Lower the phi assignments now, before the branch (same placement 3266 // Lower the phi assignments now, before the branch (same placement
3268 // as before). 3267 // as before).
3269 InstAssign *PhiAssign = PhiAssigns[i]; 3268 InstAssign *PhiAssign = PhiAssigns[i];
3270 PhiAssign->setDeleted(); 3269 PhiAssign->setDeleted();
3271 lowerAssign(PhiAssign); 3270 lowerAssign(PhiAssign);
3272 Context.advanceNext(); 3271 Context.advanceNext();
3273 } 3272 }
3274 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse()); 3273 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());
3275 // Skip over the old compare and branch, by deleting them. 3274 // Skip over the old compare and branch, by deleting them.
3276 NextCmp->setDeleted(); 3275 NextCmp->setDeleted();
3277 NextBr->setDeleted(); 3276 NextBr->setDeleted();
3278 Context.advanceNext(); 3277 Context.advanceNext();
3279 Context.advanceNext(); 3278 Context.advanceNext();
3280 return true; 3279 return true;
3281 } 3280 }
3282 } 3281 }
3283 } 3282 }
3284 return false; 3283 return false;
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
3391 // mov <reg>, eax 3390 // mov <reg>, eax
3392 // op <reg>, [desired_adj] 3391 // op <reg>, [desired_adj]
3393 // lock cmpxchg [ptr], <reg> 3392 // lock cmpxchg [ptr], <reg>
3394 // jne .LABEL 3393 // jne .LABEL
3395 // mov <dest>, eax 3394 // mov <dest>, eax
3396 // 3395 //
3397 // If Op_{Lo,Hi} are NULL, then just copy the value. 3396 // If Op_{Lo,Hi} are NULL, then just copy the value.
3398 Val = legalize(Val); 3397 Val = legalize(Val);
3399 Type Ty = Val->getType(); 3398 Type Ty = Val->getType();
3400 if (Ty == IceType_i64) { 3399 if (Ty == IceType_i64) {
3401 Variable *T_edx = makeReg(IceType_i32, Reg_edx); 3400 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);
3402 Variable *T_eax = makeReg(IceType_i32, Reg_eax); 3401 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);
3403 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); 3402 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3404 _mov(T_eax, loOperand(Addr)); 3403 _mov(T_eax, loOperand(Addr));
3405 _mov(T_edx, hiOperand(Addr)); 3404 _mov(T_edx, hiOperand(Addr));
3406 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx); 3405 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);
3407 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx); 3406 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);
3408 InstX8632Label *Label = InstX8632Label::create(Func, this); 3407 InstX8632Label *Label = InstX8632Label::create(Func, this);
3409 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL; 3408 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;
3410 if (!IsXchg8b) { 3409 if (!IsXchg8b) {
3411 Context.insert(Label); 3410 Context.insert(Label);
3412 _mov(T_ebx, T_eax); 3411 _mov(T_ebx, T_eax);
3413 (this->*Op_Lo)(T_ebx, loOperand(Val)); 3412 (this->*Op_Lo)(T_ebx, loOperand(Val));
3414 _mov(T_ecx, T_edx); 3413 _mov(T_ecx, T_edx);
3415 (this->*Op_Hi)(T_ecx, hiOperand(Val)); 3414 (this->*Op_Hi)(T_ecx, hiOperand(Val));
3416 } else { 3415 } else {
3417 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi. 3416 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.
3418 // It just needs the Val loaded into ebx and ecx. 3417 // It just needs the Val loaded into ebx and ecx.
3419 // That can also be done before the loop. 3418 // That can also be done before the loop.
3420 _mov(T_ebx, loOperand(Val)); 3419 _mov(T_ebx, loOperand(Val));
3421 _mov(T_ecx, hiOperand(Val)); 3420 _mov(T_ecx, hiOperand(Val));
3422 Context.insert(Label); 3421 Context.insert(Label);
3423 } 3422 }
3424 const bool Locked = true; 3423 const bool Locked = true;
3425 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked); 3424 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);
3426 _br(InstX8632Br::Br_ne, Label); 3425 _br(CondX86::Br_ne, Label);
3427 if (!IsXchg8b) { 3426 if (!IsXchg8b) {
3428 // If Val is a variable, model the extended live range of Val through 3427 // If Val is a variable, model the extended live range of Val through
3429 // the end of the loop, since it will be re-used by the loop. 3428 // the end of the loop, since it will be re-used by the loop.
3430 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3429 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3431 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar)); 3430 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));
3432 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar)); 3431 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));
3433 Context.insert(InstFakeUse::create(Func, ValLo)); 3432 Context.insert(InstFakeUse::create(Func, ValLo));
3434 Context.insert(InstFakeUse::create(Func, ValHi)); 3433 Context.insert(InstFakeUse::create(Func, ValHi));
3435 } 3434 }
3436 } else { 3435 } else {
3437 // For xchg, the loop is slightly smaller and ebx/ecx are used. 3436 // For xchg, the loop is slightly smaller and ebx/ecx are used.
3438 Context.insert(InstFakeUse::create(Func, T_ebx)); 3437 Context.insert(InstFakeUse::create(Func, T_ebx));
3439 Context.insert(InstFakeUse::create(Func, T_ecx)); 3438 Context.insert(InstFakeUse::create(Func, T_ecx));
3440 } 3439 }
3441 // The address base is also reused in the loop. 3440 // The address base is also reused in the loop.
3442 Context.insert(InstFakeUse::create(Func, Addr->getBase())); 3441 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3443 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3442 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3444 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3443 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3445 _mov(DestLo, T_eax); 3444 _mov(DestLo, T_eax);
3446 _mov(DestHi, T_edx); 3445 _mov(DestHi, T_edx);
3447 return; 3446 return;
3448 } 3447 }
3449 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty); 3448 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);
3450 Variable *T_eax = makeReg(Ty, Reg_eax); 3449 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);
3451 _mov(T_eax, Addr); 3450 _mov(T_eax, Addr);
3452 InstX8632Label *Label = InstX8632Label::create(Func, this); 3451 InstX8632Label *Label = InstX8632Label::create(Func, this);
3453 Context.insert(Label); 3452 Context.insert(Label);
3454 // We want to pick a different register for T than Eax, so don't use 3453 // We want to pick a different register for T than Eax, so don't use
3455 // _mov(T == NULL, T_eax). 3454 // _mov(T == NULL, T_eax).
3456 Variable *T = makeReg(Ty); 3455 Variable *T = makeReg(Ty);
3457 _mov(T, T_eax); 3456 _mov(T, T_eax);
3458 (this->*Op_Lo)(T, Val); 3457 (this->*Op_Lo)(T, Val);
3459 const bool Locked = true; 3458 const bool Locked = true;
3460 _cmpxchg(Addr, T_eax, T, Locked); 3459 _cmpxchg(Addr, T_eax, T, Locked);
3461 _br(InstX8632Br::Br_ne, Label); 3460 _br(CondX86::Br_ne, Label);
3462 // If Val is a variable, model the extended live range of Val through 3461 // If Val is a variable, model the extended live range of Val through
3463 // the end of the loop, since it will be re-used by the loop. 3462 // the end of the loop, since it will be re-used by the loop.
3464 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) { 3463 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {
3465 Context.insert(InstFakeUse::create(Func, ValVar)); 3464 Context.insert(InstFakeUse::create(Func, ValVar));
3466 } 3465 }
3467 // The address base is also reused in the loop. 3466 // The address base is also reused in the loop.
3468 Context.insert(InstFakeUse::create(Func, Addr->getBase())); 3467 Context.insert(InstFakeUse::create(Func, Addr->getBase()));
3469 _mov(Dest, T_eax); 3468 _mov(Dest, T_eax);
3470 } 3469 }
3471 3470
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after
3512 } 3511 }
3513 Variable *T_Dest = makeReg(IceType_i32); 3512 Variable *T_Dest = makeReg(IceType_i32);
3514 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32); 3513 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);
3515 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31); 3514 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);
3516 if (Cttz) { 3515 if (Cttz) {
3517 _mov(T_Dest, ThirtyTwo); 3516 _mov(T_Dest, ThirtyTwo);
3518 } else { 3517 } else {
3519 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63); 3518 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);
3520 _mov(T_Dest, SixtyThree); 3519 _mov(T_Dest, SixtyThree);
3521 } 3520 }
3522 _cmov(T_Dest, T, InstX8632::Br_ne); 3521 _cmov(T_Dest, T, CondX86::Br_ne);
3523 if (!Cttz) { 3522 if (!Cttz) {
3524 _xor(T_Dest, ThirtyOne); 3523 _xor(T_Dest, ThirtyOne);
3525 } 3524 }
3526 if (Ty == IceType_i32) { 3525 if (Ty == IceType_i32) {
3527 _mov(Dest, T_Dest); 3526 _mov(Dest, T_Dest);
3528 return; 3527 return;
3529 } 3528 }
3530 _add(T_Dest, ThirtyTwo); 3529 _add(T_Dest, ThirtyTwo);
3531 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3530 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3532 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3531 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3533 // Will be using "test" on this, so we need a registerized variable. 3532 // Will be using "test" on this, so we need a registerized variable.
3534 Variable *SecondVar = legalizeToVar(SecondVal); 3533 Variable *SecondVar = legalizeToVar(SecondVal);
3535 Variable *T_Dest2 = makeReg(IceType_i32); 3534 Variable *T_Dest2 = makeReg(IceType_i32);
3536 if (Cttz) { 3535 if (Cttz) {
3537 _bsf(T_Dest2, SecondVar); 3536 _bsf(T_Dest2, SecondVar);
3538 } else { 3537 } else {
3539 _bsr(T_Dest2, SecondVar); 3538 _bsr(T_Dest2, SecondVar);
3540 _xor(T_Dest2, ThirtyOne); 3539 _xor(T_Dest2, ThirtyOne);
3541 } 3540 }
3542 _test(SecondVar, SecondVar); 3541 _test(SecondVar, SecondVar);
3543 _cmov(T_Dest2, T_Dest, InstX8632::Br_e); 3542 _cmov(T_Dest2, T_Dest, CondX86::Br_e);
3544 _mov(DestLo, T_Dest2); 3543 _mov(DestLo, T_Dest2);
3545 _mov(DestHi, Ctx->getConstantZero(IceType_i32)); 3544 _mov(DestHi, Ctx->getConstantZero(IceType_i32));
3546 } 3545 }
3547 3546
3548 namespace { 3547 namespace {
3549 3548
3550 bool isAdd(const Inst *Inst) { 3549 bool isAdd(const Inst *Inst) {
3551 if (const InstArithmetic *Arith = 3550 if (const InstArithmetic *Arith =
3552 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) { 3551 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {
3553 return (Arith->getOp() == InstArithmetic::Add); 3552 return (Arith->getOp() == InstArithmetic::Add);
(...skipping 306 matching lines...) Expand 10 before | Expand all | Expand 10 after
3860 3859
3861 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) { 3860 void TargetX8632::lowerPhi(const InstPhi * /*Inst*/) {
3862 Func->setError("Phi found in regular instruction list"); 3861 Func->setError("Phi found in regular instruction list");
3863 } 3862 }
3864 3863
3865 void TargetX8632::lowerRet(const InstRet *Inst) { 3864 void TargetX8632::lowerRet(const InstRet *Inst) {
3866 Variable *Reg = NULL; 3865 Variable *Reg = NULL;
3867 if (Inst->hasRetValue()) { 3866 if (Inst->hasRetValue()) {
3868 Operand *Src0 = legalize(Inst->getRetValue()); 3867 Operand *Src0 = legalize(Inst->getRetValue());
3869 if (Src0->getType() == IceType_i64) { 3868 if (Src0->getType() == IceType_i64) {
3870 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax); 3869 Variable *eax = legalizeToVar(loOperand(Src0), false, RegX8632::Reg_eax);
3871 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx); 3870 Variable *edx = legalizeToVar(hiOperand(Src0), false, RegX8632::Reg_edx);
3872 Reg = eax; 3871 Reg = eax;
3873 Context.insert(InstFakeUse::create(Func, edx)); 3872 Context.insert(InstFakeUse::create(Func, edx));
3874 } else if (Src0->getType() == IceType_f32 || 3873 } else if (Src0->getType() == IceType_f32 ||
3875 Src0->getType() == IceType_f64) { 3874 Src0->getType() == IceType_f64) {
3876 _fld(Src0); 3875 _fld(Src0);
3877 } else if (isVectorType(Src0->getType())) { 3876 } else if (isVectorType(Src0->getType())) {
3878 Reg = legalizeToVar(Src0, false, Reg_xmm0); 3877 Reg = legalizeToVar(Src0, false, RegX8632::Reg_xmm0);
3879 } else { 3878 } else {
3880 _mov(Reg, Src0, Reg_eax); 3879 _mov(Reg, Src0, RegX8632::Reg_eax);
3881 } 3880 }
3882 } 3881 }
3883 _ret(Reg); 3882 _ret(Reg);
3884 // Add a fake use of esp to make sure esp stays alive for the entire 3883 // Add a fake use of esp to make sure esp stays alive for the entire
3885 // function. Otherwise post-call esp adjustments get dead-code 3884 // function. Otherwise post-call esp adjustments get dead-code
3886 // eliminated. TODO: Are there more places where the fake use 3885 // eliminated. TODO: Are there more places where the fake use
3887 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not 3886 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
3888 // have a ret instruction. 3887 // have a ret instruction.
3889 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp); 3888 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);
3890 Context.insert(InstFakeUse::create(Func, esp)); 3889 Context.insert(InstFakeUse::create(Func, esp));
3891 } 3890 }
3892 3891
3893 void TargetX8632::lowerSelect(const InstSelect *Inst) { 3892 void TargetX8632::lowerSelect(const InstSelect *Inst) {
3894 Variable *Dest = Inst->getDest(); 3893 Variable *Dest = Inst->getDest();
3895 Operand *SrcT = Inst->getTrueOperand(); 3894 Operand *SrcT = Inst->getTrueOperand();
3896 Operand *SrcF = Inst->getFalseOperand(); 3895 Operand *SrcF = Inst->getFalseOperand();
3897 Operand *Condition = Inst->getCondition(); 3896 Operand *Condition = Inst->getCondition();
3898 3897
3899 if (isVectorType(Dest->getType())) { 3898 if (isVectorType(Dest->getType())) {
3900 Type SrcTy = SrcT->getType(); 3899 Type SrcTy = SrcT->getType();
3901 Variable *T = makeReg(SrcTy); 3900 Variable *T = makeReg(SrcTy);
3902 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); 3901 Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);
3903 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); 3902 Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);
3904 if (InstructionSet >= SSE4_1) { 3903 if (InstructionSet >= SSE4_1) {
3905 // TODO(wala): If the condition operand is a constant, use blendps 3904 // TODO(wala): If the condition operand is a constant, use blendps
3906 // or pblendw. 3905 // or pblendw.
3907 // 3906 //
3908 // Use blendvps or pblendvb to implement select. 3907 // Use blendvps or pblendvb to implement select.
3909 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 || 3908 if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||
3910 SrcTy == IceType_v4f32) { 3909 SrcTy == IceType_v4f32) {
3911 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem); 3910 Operand *ConditionRM = legalize(Condition, Legal_Reg | Legal_Mem);
3912 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0); 3911 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);
3913 _movp(xmm0, ConditionRM); 3912 _movp(xmm0, ConditionRM);
3914 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31)); 3913 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));
3915 _movp(T, SrcFRM); 3914 _movp(T, SrcFRM);
3916 _blendvps(T, SrcTRM, xmm0); 3915 _blendvps(T, SrcTRM, xmm0);
3917 _movp(Dest, T); 3916 _movp(Dest, T);
3918 } else { 3917 } else {
3919 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); 3918 assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16);
3920 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16 3919 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16
3921 : IceType_v16i8; 3920 : IceType_v16i8;
3922 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); 3921 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);
3923 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); 3922 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));
3924 _movp(T, SrcFRM); 3923 _movp(T, SrcFRM);
3925 _pblendvb(T, SrcTRM, xmm0); 3924 _pblendvb(T, SrcTRM, xmm0);
3926 _movp(Dest, T); 3925 _movp(Dest, T);
3927 } 3926 }
3928 return; 3927 return;
3929 } 3928 }
3930 // Lower select without SSE4.1: 3929 // Lower select without SSE4.1:
3931 // a=d?b:c ==> 3930 // a=d?b:c ==>
3932 // if elementtype(d) != i1: 3931 // if elementtype(d) != i1:
(...skipping 27 matching lines...) Expand all
3960 InstX8632Label *Label = InstX8632Label::create(Func, this); 3959 InstX8632Label *Label = InstX8632Label::create(Func, this);
3961 3960
3962 if (Dest->getType() == IceType_i64) { 3961 if (Dest->getType() == IceType_i64) {
3963 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest)); 3962 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
3964 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest)); 3963 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3965 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true); 3964 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg | Legal_Imm, true);
3966 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true); 3965 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg | Legal_Imm, true);
3967 _cmp(ConditionRM, Zero); 3966 _cmp(ConditionRM, Zero);
3968 _mov(DestLo, SrcLoRI); 3967 _mov(DestLo, SrcLoRI);
3969 _mov(DestHi, SrcHiRI); 3968 _mov(DestHi, SrcHiRI);
3970 _br(InstX8632Br::Br_ne, Label); 3969 _br(CondX86::Br_ne, Label);
3971 Context.insert(InstFakeUse::create(Func, DestLo)); 3970 Context.insert(InstFakeUse::create(Func, DestLo));
3972 Context.insert(InstFakeUse::create(Func, DestHi)); 3971 Context.insert(InstFakeUse::create(Func, DestHi));
3973 Operand *SrcFLo = loOperand(SrcF); 3972 Operand *SrcFLo = loOperand(SrcF);
3974 Operand *SrcFHi = hiOperand(SrcF); 3973 Operand *SrcFHi = hiOperand(SrcF);
3975 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true); 3974 SrcLoRI = legalize(SrcFLo, Legal_Reg | Legal_Imm, true);
3976 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true); 3975 SrcHiRI = legalize(SrcFHi, Legal_Reg | Legal_Imm, true);
3977 _mov(DestLo, SrcLoRI); 3976 _mov(DestLo, SrcLoRI);
3978 _mov(DestHi, SrcHiRI); 3977 _mov(DestHi, SrcHiRI);
3979 } else { 3978 } else {
3980 _cmp(ConditionRM, Zero); 3979 _cmp(ConditionRM, Zero);
3981 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true); 3980 SrcT = legalize(SrcT, Legal_Reg | Legal_Imm, true);
3982 _mov(Dest, SrcT); 3981 _mov(Dest, SrcT);
3983 _br(InstX8632Br::Br_ne, Label); 3982 _br(CondX86::Br_ne, Label);
3984 Context.insert(InstFakeUse::create(Func, Dest)); 3983 Context.insert(InstFakeUse::create(Func, Dest));
3985 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true); 3984 SrcF = legalize(SrcF, Legal_Reg | Legal_Imm, true);
3986 _mov(Dest, SrcF); 3985 _mov(Dest, SrcF);
3987 } 3986 }
3988 3987
3989 Context.insert(Label); 3988 Context.insert(Label);
3990 } 3989 }
3991 3990
3992 void TargetX8632::lowerStore(const InstStore *Inst) { 3991 void TargetX8632::lowerStore(const InstStore *Inst) {
3993 Operand *Value = Inst->getData(); 3992 Operand *Value = Inst->getData();
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
4041 // OK, we'll be slightly less naive by forcing Src into a physical 4040 // OK, we'll be slightly less naive by forcing Src into a physical
4042 // register if there are 2 or more uses. 4041 // register if there are 2 or more uses.
4043 if (NumCases >= 2) 4042 if (NumCases >= 2)
4044 Src0 = legalizeToVar(Src0, true); 4043 Src0 = legalizeToVar(Src0, true);
4045 else 4044 else
4046 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true); 4045 Src0 = legalize(Src0, Legal_Reg | Legal_Mem, true);
4047 for (SizeT I = 0; I < NumCases; ++I) { 4046 for (SizeT I = 0; I < NumCases; ++I) {
4048 // TODO(stichnot): Correct lowering for IceType_i64. 4047 // TODO(stichnot): Correct lowering for IceType_i64.
4049 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I)); 4048 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));
4050 _cmp(Src0, Value); 4049 _cmp(Src0, Value);
4051 _br(InstX8632Br::Br_e, Inst->getLabel(I)); 4050 _br(CondX86::Br_e, Inst->getLabel(I));
4052 } 4051 }
4053 4052
4054 _br(Inst->getLabelDefault()); 4053 _br(Inst->getLabelDefault());
4055 } 4054 }
4056 4055
4057 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind, 4056 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,
4058 Variable *Dest, Operand *Src0, 4057 Variable *Dest, Operand *Src0,
4059 Operand *Src1) { 4058 Operand *Src1) {
4060 assert(isVectorType(Dest->getType())); 4059 assert(isVectorType(Dest->getType()));
4061 Type Ty = Dest->getType(); 4060 Type Ty = Dest->getType();
(...skipping 487 matching lines...) Expand 10 before | Expand all | Expand 10 after
4549 Str << "\t.align\t" << Align << "\n"; 4548 Str << "\t.align\t" << Align << "\n";
4550 Str << MangledName << ":\n"; 4549 Str << MangledName << ":\n";
4551 for (SizeT i = 0; i < Size; ++i) { 4550 for (SizeT i = 0; i < Size; ++i) {
4552 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n"; 4551 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";
4553 } 4552 }
4554 Str << "\t.size\t" << MangledName << ", " << Size << "\n"; 4553 Str << "\t.size\t" << MangledName << ", " << Size << "\n";
4555 } 4554 }
4556 } 4555 }
4557 4556
4558 } // end of namespace Ice 4557 } // end of namespace Ice
OLDNEW
« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698