src/IceTargetLoweringX8632.cpp - Issue 582113003: Lift register and condition code enums out into their own file.

Side by Side Diff: src/IceTargetLoweringX8632.cpp

Issue 582113003: Lift register and condition code enums out into their own file. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: add first and last Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//	1 //===- subzero/src/IceTargetLoweringX8632.cpp - x86-32 lowering -----------===//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 //

10 // This file implements the TargetLoweringX8632 class, which	10 // This file implements the TargetLoweringX8632 class, which

11 // consists almost entirely of the lowering sequence for each	11 // consists almost entirely of the lowering sequence for each

12 // high-level instruction. It also implements	12 // high-level instruction. It also implements

13 // TargetX8632Fast::postLower() which does the simplest possible	13 // TargetX8632Fast::postLower() which does the simplest possible

14 // register allocation for the "fast" target.	14 // register allocation for the "fast" target.

15 //	15 //

16 //===----------------------------------------------------------------------===//	16 //===----------------------------------------------------------------------===//

17	17

18 #include "IceDefs.h"	18 #include "IceDefs.h"

19 #include "IceCfg.h"	19 #include "IceCfg.h"

20 #include "IceCfgNode.h"	20 #include "IceCfgNode.h"

21 #include "IceClFlags.h"	21 #include "IceClFlags.h"

22 #include "IceInstX8632.h"	22 #include "IceInstX8632.h"

23 #include "IceOperand.h"	23 #include "IceOperand.h"

	24 #include "IceRegistersX8632.h"

24 #include "IceTargetLoweringX8632.def"	25 #include "IceTargetLoweringX8632.def"

25 #include "IceTargetLoweringX8632.h"	26 #include "IceTargetLoweringX8632.h"

26 #include "llvm/ADT/DenseMap.h"	27 #include "llvm/ADT/DenseMap.h"

27 #include "llvm/Support/MathExtras.h"	28 #include "llvm/Support/MathExtras.h"

28 #include "llvm/Support/CommandLine.h"	29 #include "llvm/Support/CommandLine.h"

29	30

30 namespace Ice {	31 namespace Ice {

31	32

32 namespace {	33 namespace {

33	34

(...skipping 10 matching lines...) Expand all Loading...
44 // table by hand, good execution tests are helpful.	45 // table by hand, good execution tests are helpful.

45 //	46 //

46 // The last two columns describe the case when the operands are vectors	47 // The last two columns describe the case when the operands are vectors

47 // of floating point values. For most fcmp conditions, there is a clear	48 // of floating point values. For most fcmp conditions, there is a clear

48 // mapping to a single x86 cmpps instruction variant. Some fcmp	49 // mapping to a single x86 cmpps instruction variant. Some fcmp

49 // conditions require special code to handle and these are marked in the	50 // conditions require special code to handle and these are marked in the

50 // table with a Cmpps_Invalid predicate.	51 // table with a Cmpps_Invalid predicate.

51 const struct TableFcmp_ {	52 const struct TableFcmp_ {

52 uint32_t Default;	53 uint32_t Default;

53 bool SwapScalarOperands;	54 bool SwapScalarOperands;

54 InstX8632::BrCond C1, C2;	55 CondX86::BrCond C1, C2;

55 bool SwapVectorOperands;	56 bool SwapVectorOperands;

56 InstX8632Cmpps::CmppsCond Predicate;	57 CondX86::CmppsCond Predicate;

57 } TableFcmp[] = {	58 } TableFcmp[] = {

58 #define X(val, dflt, swapS, C1, C2, swapV, pred) \	59 #define X(val, dflt, swapS, C1, C2, swapV, pred) \

59 { \	60 { dflt, swapS, CondX86::C1, CondX86::C2, swapV, CondX86::pred } \

60 dflt, swapS, InstX8632Br::C1, InstX8632Br::C2, swapV, InstX8632Cmpps::pred \

61 } \

62 ,	61 ,

63 FCMPX8632_TABLE	62 FCMPX8632_TABLE

64 #undef X	63 #undef X

65 };	64 };

66 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);	65 const size_t TableFcmpSize = llvm::array_lengthof(TableFcmp);

67	66

68 // The following table summarizes the logic for lowering the icmp instruction	67 // The following table summarizes the logic for lowering the icmp instruction

69 // for i32 and narrower types. Each icmp condition has a clear mapping to an	68 // for i32 and narrower types. Each icmp condition has a clear mapping to an

70 // x86 conditional branch instruction.	69 // x86 conditional branch instruction.

71	70

72 const struct TableIcmp32_ {	71 const struct TableIcmp32_ {

73 InstX8632::BrCond Mapping;	72 CondX86::BrCond Mapping;

74 } TableIcmp32[] = {	73 } TableIcmp32[] = {

75 #define X(val, C_32, C1_64, C2_64, C3_64) \	74 #define X(val, C_32, C1_64, C2_64, C3_64) \

76 { InstX8632Br::C_32 } \	75 { CondX86::C_32 } \

77 ,	76 ,

78 ICMPX8632_TABLE	77 ICMPX8632_TABLE

79 #undef X	78 #undef X

80 };	79 };

81 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);	80 const size_t TableIcmp32Size = llvm::array_lengthof(TableIcmp32);

82	81

83 // The following table summarizes the logic for lowering the icmp instruction	82 // The following table summarizes the logic for lowering the icmp instruction

84 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and	83 // for the i64 type. For Eq and Ne, two separate 32-bit comparisons and

85 // conditional branches are needed. For the other conditions, three separate	84 // conditional branches are needed. For the other conditions, three separate

86 // conditional branches are needed.	85 // conditional branches are needed.

87 const struct TableIcmp64_ {	86 const struct TableIcmp64_ {

88 InstX8632::BrCond C1, C2, C3;	87 CondX86::BrCond C1, C2, C3;

89 } TableIcmp64[] = {	88 } TableIcmp64[] = {

90 #define X(val, C_32, C1_64, C2_64, C3_64) \	89 #define X(val, C_32, C1_64, C2_64, C3_64) \

91 { InstX8632Br::C1_64, InstX8632Br::C2_64, InstX8632Br::C3_64 } \	90 { CondX86::C1_64, CondX86::C2_64, CondX86::C3_64 } \

92 ,	91 ,

93 ICMPX8632_TABLE	92 ICMPX8632_TABLE

94 #undef X	93 #undef X

95 };	94 };

96 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);	95 const size_t TableIcmp64Size = llvm::array_lengthof(TableIcmp64);

97	96

98 InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {	97 CondX86::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {

99 size_t Index = static_cast<size_t>(Cond);	98 size_t Index = static_cast<size_t>(Cond);

100 assert(Index < TableIcmp32Size);	99 assert(Index < TableIcmp32Size);

101 return TableIcmp32[Index].Mapping;	100 return TableIcmp32[Index].Mapping;

102 }	101 }

103	102

104 const struct TableTypeX8632Attributes_ {	103 const struct TableTypeX8632Attributes_ {

105 Type InVectorElementType;	104 Type InVectorElementType;

106 } TableTypeX8632Attributes[] = {	105 } TableTypeX8632Attributes[] = {

107 #define X(tag, elementty, cvt, sdss, pack, width) \	106 #define X(tag, elementty, cvt, sdss, pack, width) \

108 { elementty } \	107 { elementty } \

(...skipping 148 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
257 #undef X	256 #undef X

258 }	257 }

259 }	258 }

260	259

261 } // end of anonymous namespace	260 } // end of anonymous namespace

262	261

263 TargetX8632::TargetX8632(Cfg *Func)	262 TargetX8632::TargetX8632(Cfg *Func)

264 : TargetLowering(Func), InstructionSet(CLInstructionSet),	263 : TargetLowering(Func), InstructionSet(CLInstructionSet),

265 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),	264 IsEbpBasedFrame(false), NeedsStackAlignment(false), FrameSizeLocals(0),

266 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),	265 SpillAreaSizeBytes(0), NextLabelNumber(0), ComputedLiveRanges(false),

267 PhysicalRegisters(VarList(Reg_NUM)) {	266 PhysicalRegisters(VarList(RegX8632::Reg_NUM)) {

268 // TODO: Don't initialize IntegerRegisters and friends every time.	267 // TODO: Don't initialize IntegerRegisters and friends every time.

269 // Instead, initialize in some sort of static initializer for the	268 // Instead, initialize in some sort of static initializer for the

270 // class.	269 // class.

271 llvm::SmallBitVector IntegerRegisters(Reg_NUM);	270 llvm::SmallBitVector IntegerRegisters(RegX8632::Reg_NUM);

272 llvm::SmallBitVector IntegerRegistersI8(Reg_NUM);	271 llvm::SmallBitVector IntegerRegistersI8(RegX8632::Reg_NUM);

273 llvm::SmallBitVector FloatRegisters(Reg_NUM);	272 llvm::SmallBitVector FloatRegisters(RegX8632::Reg_NUM);

274 llvm::SmallBitVector VectorRegisters(Reg_NUM);	273 llvm::SmallBitVector VectorRegisters(RegX8632::Reg_NUM);

275 llvm::SmallBitVector InvalidRegisters(Reg_NUM);	274 llvm::SmallBitVector InvalidRegisters(RegX8632::Reg_NUM);

276 ScratchRegs.resize(Reg_NUM);	275 ScratchRegs.resize(RegX8632::Reg_NUM);

277 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \	276 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

278 frameptr, isI8, isInt, isFP) \	277 frameptr, isI8, isInt, isFP) \

279 IntegerRegisters[val] = isInt; \	278 IntegerRegisters[RegX8632::val] = isInt; \

280 IntegerRegistersI8[val] = isI8; \	279 IntegerRegistersI8[RegX8632::val] = isI8; \

281 FloatRegisters[val] = isFP; \	280 FloatRegisters[RegX8632::val] = isFP; \

282 VectorRegisters[val] = isFP; \	281 VectorRegisters[RegX8632::val] = isFP; \

283 ScratchRegs[val] = scratch;	282 ScratchRegs[RegX8632::val] = scratch;

284 REGX8632_TABLE;	283 REGX8632_TABLE;

285 #undef X	284 #undef X

286 TypeToRegisterSet[IceType_void] = InvalidRegisters;	285 TypeToRegisterSet[IceType_void] = InvalidRegisters;

287 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;	286 TypeToRegisterSet[IceType_i1] = IntegerRegistersI8;

288 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;	287 TypeToRegisterSet[IceType_i8] = IntegerRegistersI8;

289 TypeToRegisterSet[IceType_i16] = IntegerRegisters;	288 TypeToRegisterSet[IceType_i16] = IntegerRegisters;

290 TypeToRegisterSet[IceType_i32] = IntegerRegisters;	289 TypeToRegisterSet[IceType_i32] = IntegerRegisters;

291 TypeToRegisterSet[IceType_i64] = IntegerRegisters;	290 TypeToRegisterSet[IceType_i64] = IntegerRegisters;

292 TypeToRegisterSet[IceType_f32] = FloatRegisters;	291 TypeToRegisterSet[IceType_f32] = FloatRegisters;

293 TypeToRegisterSet[IceType_f64] = FloatRegisters;	292 TypeToRegisterSet[IceType_f64] = FloatRegisters;

(...skipping 159 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
453 }	452 }

454	453

455 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {	454 bool TargetX8632::doBranchOpt(Inst I, const CfgNode NextNode) {

456 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {	455 if (InstX8632Br *Br = llvm::dyn_cast<InstX8632Br>(I)) {

457 return Br->optimizeBranch(NextNode);	456 return Br->optimizeBranch(NextNode);

458 }	457 }

459 return false;	458 return false;

460 }	459 }

461	460

462 IceString TargetX8632::RegNames[] = {	461 IceString TargetX8632::RegNames[] = {

463 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \	462 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

464 frameptr, isI8, isInt, isFP) \	463 frameptr, isI8, isInt, isFP) \

465 name,	464 name,

466 REGX8632_TABLE	465 REGX8632_TABLE

467 #undef X	466 #undef X

468 };	467 };

469	468

470 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {	469 Variable *TargetX8632::getPhysicalRegister(SizeT RegNum) {

471 assert(RegNum < PhysicalRegisters.size());	470 assert(RegNum < PhysicalRegisters.size());

472 Variable *Reg = PhysicalRegisters[RegNum];	471 Variable *Reg = PhysicalRegisters[RegNum];

473 if (Reg == NULL) {	472 if (Reg == NULL) {

474 CfgNode *Node = NULL; // NULL means multi-block lifetime	473 CfgNode *Node = NULL; // NULL means multi-block lifetime

475 Reg = Func->makeVariable(IceType_i32, Node);	474 Reg = Func->makeVariable(IceType_i32, Node);

476 Reg->setRegNum(RegNum);	475 Reg->setRegNum(RegNum);

477 PhysicalRegisters[RegNum] = Reg;	476 PhysicalRegisters[RegNum] = Reg;

478 }	477 }

479 return Reg;	478 return Reg;

480 }	479 }

481	480

482 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {	481 IceString TargetX8632::getRegName(SizeT RegNum, Type Ty) const {

483 assert(RegNum < Reg_NUM);	482 assert(RegNum < RegX8632::Reg_NUM);

484 static IceString RegNames8[] = {	483 static IceString RegNames8[] = {

485 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \	484 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

486 frameptr, isI8, isInt, isFP) \	485 frameptr, isI8, isInt, isFP) \

487 name8,	486 name8,

488 REGX8632_TABLE	487 REGX8632_TABLE

489 #undef X	488 #undef X

490 };	489 };

491 static IceString RegNames16[] = {	490 static IceString RegNames16[] = {

492 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \	491 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

493 frameptr, isI8, isInt, isFP) \	492 frameptr, isI8, isInt, isFP) \

494 name16,	493 name16,

495 REGX8632_TABLE	494 REGX8632_TABLE

496 #undef X	495 #undef X

497 };	496 };

498 switch (Ty) {	497 switch (Ty) {

499 case IceType_i1:	498 case IceType_i1:

500 case IceType_i8:	499 case IceType_i8:

501 return RegNames8[RegNum];	500 return RegNames8[RegNum];

502 case IceType_i16:	501 case IceType_i16:

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
539	538

540 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;	539 for (SizeT I = 0, E = Args.size(); I < E && NumXmmArgs < X86_MAX_XMM_ARGS;

541 ++I) {	540 ++I) {

542 Variable *Arg = Args[I];	541 Variable *Arg = Args[I];

543 Type Ty = Arg->getType();	542 Type Ty = Arg->getType();

544 if (!isVectorType(Ty))	543 if (!isVectorType(Ty))

545 continue;	544 continue;

546 // Replace Arg in the argument list with the home register. Then	545 // Replace Arg in the argument list with the home register. Then

547 // generate an instruction in the prolog to copy the home register	546 // generate an instruction in the prolog to copy the home register

548 // to the assigned location of Arg.	547 // to the assigned location of Arg.

549 int32_t RegNum = Reg_xmm0 + NumXmmArgs;	548 int32_t RegNum = RegX8632::Reg_xmm0 + NumXmmArgs;

550 ++NumXmmArgs;	549 ++NumXmmArgs;

551 IceString Name = "home_reg:" + Arg->getName();	550 IceString Name = "home_reg:" + Arg->getName();

552 const CfgNode *DefNode = NULL;	551 const CfgNode *DefNode = NULL;

553 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);	552 Variable *RegisterArg = Func->makeVariable(Ty, DefNode, Name);

554 RegisterArg->setRegNum(RegNum);	553 RegisterArg->setRegNum(RegNum);

555 RegisterArg->setIsArg(Func);	554 RegisterArg->setIsArg(Func);

556 Arg->setIsArg(Func, false);	555 Arg->setIsArg(Func, false);

557	556

558 Args[I] = RegisterArg;	557 Args[I] = RegisterArg;

559 Context.insert(InstAssign::create(Func, Arg, RegisterArg));	558 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

(...skipping 215 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
775 _push(getPhysicalRegister(i), SuppressStackAdjustment);	774 _push(getPhysicalRegister(i), SuppressStackAdjustment);

776 }	775 }

777 }	776 }

778 Ctx->statsUpdateRegistersSaved(NumCallee);	777 Ctx->statsUpdateRegistersSaved(NumCallee);

779	778

780 // Generate "push ebp; mov ebp, esp"	779 // Generate "push ebp; mov ebp, esp"

781 if (IsEbpBasedFrame) {	780 if (IsEbpBasedFrame) {

782 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))	781 assert((RegsUsed & getRegisterSet(RegSet_FramePointer, RegSet_None))

783 .count() == 0);	782 .count() == 0);

784 PreservedRegsSizeBytes += 4;	783 PreservedRegsSizeBytes += 4;

785 Variable *ebp = getPhysicalRegister(Reg_ebp);	784 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);

786 Variable *esp = getPhysicalRegister(Reg_esp);	785 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

787 const bool SuppressStackAdjustment = true;	786 const bool SuppressStackAdjustment = true;

788 _push(ebp, SuppressStackAdjustment);	787 _push(ebp, SuppressStackAdjustment);

789 _mov(ebp, esp);	788 _mov(ebp, esp);

790 }	789 }

791	790

792 // Align the variables area. SpillAreaPaddingBytes is the size of	791 // Align the variables area. SpillAreaPaddingBytes is the size of

793 // the region after the preserved registers and before the spill	792 // the region after the preserved registers and before the spill

794 // areas.	793 // areas.

795 uint32_t SpillAreaPaddingBytes = 0;	794 uint32_t SpillAreaPaddingBytes = 0;

796 if (SpillAreaAlignmentBytes) {	795 if (SpillAreaAlignmentBytes) {

(...skipping 17 matching lines...) Expand all Loading...
814	813

815 // Align esp if necessary.	814 // Align esp if necessary.

816 if (NeedsStackAlignment) {	815 if (NeedsStackAlignment) {

817 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;	816 uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes;

818 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);	817 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);

819 SpillAreaSizeBytes = StackSize - StackOffset;	818 SpillAreaSizeBytes = StackSize - StackOffset;

820 }	819 }

821	820

822 // Generate "sub esp, SpillAreaSizeBytes"	821 // Generate "sub esp, SpillAreaSizeBytes"

823 if (SpillAreaSizeBytes)	822 if (SpillAreaSizeBytes)

824 _sub(getPhysicalRegister(Reg_esp),	823 _sub(getPhysicalRegister(RegX8632::Reg_esp),

825 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));	824 Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));

826 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);	825 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);

827	826

828 resetStackAdjustment();	827 resetStackAdjustment();

829	828

830 // Fill in stack offsets for stack args, and copy args into registers	829 // Fill in stack offsets for stack args, and copy args into registers

831 // for those that were register-allocated. Args are pushed right to	830 // for those that were register-allocated. Args are pushed right to

832 // left, so Arg[0] is closest to the stack/frame pointer.	831 // left, so Arg[0] is closest to the stack/frame pointer.

833 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());	832 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

834 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;	833 size_t BasicFrameOffset = PreservedRegsSizeBytes + X86_RET_IP_SIZE_BYTES;

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
924 if (RI == E)	923 if (RI == E)

925 return;	924 return;

926	925

927 // Convert the reverse_iterator position into its corresponding	926 // Convert the reverse_iterator position into its corresponding

928 // (forward) iterator position.	927 // (forward) iterator position.

929 InstList::iterator InsertPoint = RI.base();	928 InstList::iterator InsertPoint = RI.base();

930 --InsertPoint;	929 --InsertPoint;

931 Context.init(Node);	930 Context.init(Node);

932 Context.setInsertPoint(InsertPoint);	931 Context.setInsertPoint(InsertPoint);

933	932

934 Variable *esp = getPhysicalRegister(Reg_esp);	933 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

935 if (IsEbpBasedFrame) {	934 if (IsEbpBasedFrame) {

936 Variable *ebp = getPhysicalRegister(Reg_ebp);	935 Variable *ebp = getPhysicalRegister(RegX8632::Reg_ebp);

937 _mov(esp, ebp);	936 _mov(esp, ebp);

938 _pop(ebp);	937 _pop(ebp);

939 } else {	938 } else {

940 // add esp, SpillAreaSizeBytes	939 // add esp, SpillAreaSizeBytes

941 if (SpillAreaSizeBytes)	940 if (SpillAreaSizeBytes)

942 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));	941 _add(esp, Ctx->getConstantInt32(IceType_i32, SpillAreaSizeBytes));

943 }	942 }

944	943

945 // Add pop instructions for preserved registers.	944 // Add pop instructions for preserved registers.

946 llvm::SmallBitVector CalleeSaves =	945 llvm::SmallBitVector CalleeSaves =

947 getRegisterSet(RegSet_CalleeSave, RegSet_None);	946 getRegisterSet(RegSet_CalleeSave, RegSet_None);

948 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {	947 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

949 SizeT j = CalleeSaves.size() - i - 1;	948 SizeT j = CalleeSaves.size() - i - 1;

950 if (j == Reg_ebp && IsEbpBasedFrame)	949 if (j == RegX8632::Reg_ebp && IsEbpBasedFrame)

951 continue;	950 continue;

952 if (CalleeSaves[j] && RegsUsed[j]) {	951 if (CalleeSaves[j] && RegsUsed[j]) {

953 _pop(getPhysicalRegister(j));	952 _pop(getPhysicalRegister(j));

954 }	953 }

955 }	954 }

956 }	955 }

957	956

958 template <typename T> struct PoolTypeConverter {};	957 template <typename T> struct PoolTypeConverter {};

959	958

960 template <> struct PoolTypeConverter<float> {	959 template <> struct PoolTypeConverter<float> {

(...skipping 136 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1097 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,	1096 return OperandX8632Mem::create(Func, IceType_i32, Mem->getBase(), Offset,

1098 Mem->getIndex(), Mem->getShift(),	1097 Mem->getIndex(), Mem->getShift(),

1099 Mem->getSegmentRegister());	1098 Mem->getSegmentRegister());

1100 }	1099 }

1101 llvm_unreachable("Unsupported operand type");	1100 llvm_unreachable("Unsupported operand type");

1102 return NULL;	1101 return NULL;

1103 }	1102 }

1104	1103

1105 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,	1104 llvm::SmallBitVector TargetX8632::getRegisterSet(RegSetMask Include,

1106 RegSetMask Exclude) const {	1105 RegSetMask Exclude) const {

1107 llvm::SmallBitVector Registers(Reg_NUM);	1106 llvm::SmallBitVector Registers(RegX8632::Reg_NUM);

1108	1107

1109 #define X(val, init, name, name16, name8, scratch, preserved, stackptr, \	1108 #define X(val, encode, name, name16, name8, scratch, preserved, stackptr, \

1110 frameptr, isI8, isInt, isFP) \	1109 frameptr, isI8, isInt, isFP) \

1111 if (scratch && (Include & RegSet_CallerSave)) \	1110 if (scratch && (Include & RegSet_CallerSave)) \

1112 Registers[val] = true; \	1111 Registers[RegX8632::val] = true; \

1113 if (preserved && (Include & RegSet_CalleeSave)) \	1112 if (preserved && (Include & RegSet_CalleeSave)) \

1114 Registers[val] = true; \	1113 Registers[RegX8632::val] = true; \

1115 if (stackptr && (Include & RegSet_StackPointer)) \	1114 if (stackptr && (Include & RegSet_StackPointer)) \

1116 Registers[val] = true; \	1115 Registers[RegX8632::val] = true; \

1117 if (frameptr && (Include & RegSet_FramePointer)) \	1116 if (frameptr && (Include & RegSet_FramePointer)) \

1118 Registers[val] = true; \	1117 Registers[RegX8632::val] = true; \

1119 if (scratch && (Exclude & RegSet_CallerSave)) \	1118 if (scratch && (Exclude & RegSet_CallerSave)) \

1120 Registers[val] = false; \	1119 Registers[RegX8632::val] = false; \

1121 if (preserved && (Exclude & RegSet_CalleeSave)) \	1120 if (preserved && (Exclude & RegSet_CalleeSave)) \

1122 Registers[val] = false; \	1121 Registers[RegX8632::val] = false; \

1123 if (stackptr && (Exclude & RegSet_StackPointer)) \	1122 if (stackptr && (Exclude & RegSet_StackPointer)) \

1124 Registers[val] = false; \	1123 Registers[RegX8632::val] = false; \

1125 if (frameptr && (Exclude & RegSet_FramePointer)) \	1124 if (frameptr && (Exclude & RegSet_FramePointer)) \

1126 Registers[val] = false;	1125 Registers[RegX8632::val] = false;

1127	1126

1128 REGX8632_TABLE	1127 REGX8632_TABLE

1129	1128

1130 #undef X	1129 #undef X

1131	1130

1132 return Registers;	1131 return Registers;

1133 }	1132 }

1134	1133

1135 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {	1134 void TargetX8632::lowerAlloca(const InstAlloca *Inst) {

1136 IsEbpBasedFrame = true;	1135 IsEbpBasedFrame = true;

1137 // Conservatively require the stack to be aligned. Some stack	1136 // Conservatively require the stack to be aligned. Some stack

1138 // adjustment operations implemented below assume that the stack is	1137 // adjustment operations implemented below assume that the stack is

1139 // aligned before the alloca. All the alloca code ensures that the	1138 // aligned before the alloca. All the alloca code ensures that the

1140 // stack alignment is preserved after the alloca. The stack alignment	1139 // stack alignment is preserved after the alloca. The stack alignment

1141 // restriction can be relaxed in some cases.	1140 // restriction can be relaxed in some cases.

1142 NeedsStackAlignment = true;	1141 NeedsStackAlignment = true;

1143	1142

1144 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.	1143 // TODO(sehr,stichnot): minimize the number of adjustments of esp, etc.

1145 Variable *esp = getPhysicalRegister(Reg_esp);	1144 Variable *esp = getPhysicalRegister(RegX8632::Reg_esp);

1146 Operand *TotalSize = legalize(Inst->getSizeInBytes());	1145 Operand *TotalSize = legalize(Inst->getSizeInBytes());

1147 Variable *Dest = Inst->getDest();	1146 Variable *Dest = Inst->getDest();

1148 uint32_t AlignmentParam = Inst->getAlignInBytes();	1147 uint32_t AlignmentParam = Inst->getAlignInBytes();

1149 // For default align=0, set it to the real value 1, to avoid any	1148 // For default align=0, set it to the real value 1, to avoid any

1150 // bit-manipulation problems below.	1149 // bit-manipulation problems below.

1151 AlignmentParam = std::max(AlignmentParam, 1u);	1150 AlignmentParam = std::max(AlignmentParam, 1u);

1152	1151

1153 // LLVM enforces power of 2 alignment.	1152 // LLVM enforces power of 2 alignment.

1154 assert((AlignmentParam & (AlignmentParam - 1)) == 0);	1153 assert((AlignmentParam & (AlignmentParam - 1)) == 0);

1155 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);	1154 assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0);

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1226 case InstArithmetic::Sub:	1225 case InstArithmetic::Sub:

1227 _mov(T_Lo, Src0Lo);	1226 _mov(T_Lo, Src0Lo);

1228 _sub(T_Lo, Src1Lo);	1227 _sub(T_Lo, Src1Lo);

1229 _mov(DestLo, T_Lo);	1228 _mov(DestLo, T_Lo);

1230 _mov(T_Hi, Src0Hi);	1229 _mov(T_Hi, Src0Hi);

1231 _sbb(T_Hi, Src1Hi);	1230 _sbb(T_Hi, Src1Hi);

1232 _mov(DestHi, T_Hi);	1231 _mov(DestHi, T_Hi);

1233 break;	1232 break;

1234 case InstArithmetic::Mul: {	1233 case InstArithmetic::Mul: {

1235 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1234 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1236 Variable *T_4Lo = makeReg(IceType_i32, Reg_eax);	1235 Variable *T_4Lo = makeReg(IceType_i32, RegX8632::Reg_eax);

1237 Variable *T_4Hi = makeReg(IceType_i32, Reg_edx);	1236 Variable *T_4Hi = makeReg(IceType_i32, RegX8632::Reg_edx);

1238 // gcc does the following:	1237 // gcc does the following:

1239 // a=b*c ==>	1238 // a=b*c ==>

1240 // t1 = b.hi; t1 *=(imul) c.lo	1239 // t1 = b.hi; t1 *=(imul) c.lo

1241 // t2 = c.hi; t2 *=(imul) b.lo	1240 // t2 = c.hi; t2 *=(imul) b.lo

1242 // t3:eax = b.lo	1241 // t3:eax = b.lo

1243 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo	1242 // t4.hi:edx,t4.lo:eax = t3:eax *(mul) c.lo

1244 // a.lo = t4.lo	1243 // a.lo = t4.lo

1245 // t4.hi += t1	1244 // t4.hi += t1

1246 // t4.hi += t2	1245 // t4.hi += t2

1247 // a.hi = t4.hi	1246 // a.hi = t4.hi

1248 // The mul instruction cannot take an immediate operand.	1247 // The mul instruction cannot take an immediate operand.

1249 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Mem);	1248 Src1Lo = legalize(Src1Lo, Legal_Reg \| Legal_Mem);

1250 _mov(T_1, Src0Hi);	1249 _mov(T_1, Src0Hi);

1251 _imul(T_1, Src1Lo);	1250 _imul(T_1, Src1Lo);

1252 _mov(T_2, Src1Hi);	1251 _mov(T_2, Src1Hi);

1253 _imul(T_2, Src0Lo);	1252 _imul(T_2, Src0Lo);

1254 _mov(T_3, Src0Lo, Reg_eax);	1253 _mov(T_3, Src0Lo, RegX8632::Reg_eax);

1255 _mul(T_4Lo, T_3, Src1Lo);	1254 _mul(T_4Lo, T_3, Src1Lo);

1256 // The mul instruction produces two dest variables, edx:eax. We	1255 // The mul instruction produces two dest variables, edx:eax. We

1257 // create a fake definition of edx to account for this.	1256 // create a fake definition of edx to account for this.

1258 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));	1257 Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));

1259 _mov(DestLo, T_4Lo);	1258 _mov(DestLo, T_4Lo);

1260 _add(T_4Hi, T_1);	1259 _add(T_4Hi, T_1);

1261 _add(T_4Hi, T_2);	1260 _add(T_4Hi, T_2);

1262 _mov(DestHi, T_4Hi);	1261 _mov(DestHi, T_4Hi);

1263 } break;	1262 } break;

1264 case InstArithmetic::Shl: {	1263 case InstArithmetic::Shl: {

(...skipping 10 matching lines...) Expand all Loading...
1275 // use(t3)	1274 // use(t3)

1276 // t3 = t2	1275 // t3 = t2

1277 // t2 = 0	1276 // t2 = 0

1278 // L1:	1277 // L1:

1279 // a.lo = t2	1278 // a.lo = t2

1280 // a.hi = t3	1279 // a.hi = t3

1281 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1280 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1282 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);	1281 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);

1283 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1282 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1284 InstX8632Label *Label = InstX8632Label::create(Func, this);	1283 InstX8632Label *Label = InstX8632Label::create(Func, this);

1285 _mov(T_1, Src1Lo, Reg_ecx);	1284 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1286 _mov(T_2, Src0Lo);	1285 _mov(T_2, Src0Lo);

1287 _mov(T_3, Src0Hi);	1286 _mov(T_3, Src0Hi);

1288 _shld(T_3, T_2, T_1);	1287 _shld(T_3, T_2, T_1);

1289 _shl(T_2, T_1);	1288 _shl(T_2, T_1);

1290 _test(T_1, BitTest);	1289 _test(T_1, BitTest);

1291 _br(InstX8632Br::Br_e, Label);	1290 _br(CondX86::Br_e, Label);

1292 // Because of the intra-block control flow, we need to fake a use	1291 // Because of the intra-block control flow, we need to fake a use

1293 // of T_3 to prevent its earlier definition from being dead-code	1292 // of T_3 to prevent its earlier definition from being dead-code

1294 // eliminated in the presence of its later definition.	1293 // eliminated in the presence of its later definition.

1295 Context.insert(InstFakeUse::create(Func, T_3));	1294 Context.insert(InstFakeUse::create(Func, T_3));

1296 _mov(T_3, T_2);	1295 _mov(T_3, T_2);

1297 _mov(T_2, Zero);	1296 _mov(T_2, Zero);

1298 Context.insert(Label);	1297 Context.insert(Label);

1299 _mov(DestLo, T_2);	1298 _mov(DestLo, T_2);

1300 _mov(DestHi, T_3);	1299 _mov(DestHi, T_3);

1301 } break;	1300 } break;

1302 case InstArithmetic::Lshr: {	1301 case InstArithmetic::Lshr: {

1303 // a=b>>c (unsigned) ==>	1302 // a=b>>c (unsigned) ==>

1304 // t1:ecx = c.lo & 0xff	1303 // t1:ecx = c.lo & 0xff

1305 // t2 = b.lo	1304 // t2 = b.lo

1306 // t3 = b.hi	1305 // t3 = b.hi

1307 // t2 = shrd t2, t3, t1	1306 // t2 = shrd t2, t3, t1

1308 // t3 = shr t3, t1	1307 // t3 = shr t3, t1

1309 // test t1, 0x20	1308 // test t1, 0x20

1310 // je L1	1309 // je L1

1311 // use(t2)	1310 // use(t2)

1312 // t2 = t3	1311 // t2 = t3

1313 // t3 = 0	1312 // t3 = 0

1314 // L1:	1313 // L1:

1315 // a.lo = t2	1314 // a.lo = t2

1316 // a.hi = t3	1315 // a.hi = t3

1317 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1316 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1318 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);	1317 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);

1319 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1318 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1320 InstX8632Label *Label = InstX8632Label::create(Func, this);	1319 InstX8632Label *Label = InstX8632Label::create(Func, this);

1321 _mov(T_1, Src1Lo, Reg_ecx);	1320 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1322 _mov(T_2, Src0Lo);	1321 _mov(T_2, Src0Lo);

1323 _mov(T_3, Src0Hi);	1322 _mov(T_3, Src0Hi);

1324 _shrd(T_2, T_3, T_1);	1323 _shrd(T_2, T_3, T_1);

1325 _shr(T_3, T_1);	1324 _shr(T_3, T_1);

1326 _test(T_1, BitTest);	1325 _test(T_1, BitTest);

1327 _br(InstX8632Br::Br_e, Label);	1326 _br(CondX86::Br_e, Label);

1328 // Because of the intra-block control flow, we need to fake a use	1327 // Because of the intra-block control flow, we need to fake a use

1329 // of T_3 to prevent its earlier definition from being dead-code	1328 // of T_3 to prevent its earlier definition from being dead-code

1330 // eliminated in the presence of its later definition.	1329 // eliminated in the presence of its later definition.

1331 Context.insert(InstFakeUse::create(Func, T_2));	1330 Context.insert(InstFakeUse::create(Func, T_2));

1332 _mov(T_2, T_3);	1331 _mov(T_2, T_3);

1333 _mov(T_3, Zero);	1332 _mov(T_3, Zero);

1334 Context.insert(Label);	1333 Context.insert(Label);

1335 _mov(DestLo, T_2);	1334 _mov(DestLo, T_2);

1336 _mov(DestHi, T_3);	1335 _mov(DestHi, T_3);

1337 } break;	1336 } break;

1338 case InstArithmetic::Ashr: {	1337 case InstArithmetic::Ashr: {

1339 // a=b>>c (signed) ==>	1338 // a=b>>c (signed) ==>

1340 // t1:ecx = c.lo & 0xff	1339 // t1:ecx = c.lo & 0xff

1341 // t2 = b.lo	1340 // t2 = b.lo

1342 // t3 = b.hi	1341 // t3 = b.hi

1343 // t2 = shrd t2, t3, t1	1342 // t2 = shrd t2, t3, t1

1344 // t3 = sar t3, t1	1343 // t3 = sar t3, t1

1345 // test t1, 0x20	1344 // test t1, 0x20

1346 // je L1	1345 // je L1

1347 // use(t2)	1346 // use(t2)

1348 // t2 = t3	1347 // t2 = t3

1349 // t3 = sar t3, 0x1f	1348 // t3 = sar t3, 0x1f

1350 // L1:	1349 // L1:

1351 // a.lo = t2	1350 // a.lo = t2

1352 // a.hi = t3	1351 // a.hi = t3

1353 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;	1352 Variable T_1 = NULL, T_2 = NULL, *T_3 = NULL;

1354 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);	1353 Constant *BitTest = Ctx->getConstantInt32(IceType_i32, 0x20);

1355 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);	1354 Constant *SignExtend = Ctx->getConstantInt32(IceType_i32, 0x1f);

1356 InstX8632Label *Label = InstX8632Label::create(Func, this);	1355 InstX8632Label *Label = InstX8632Label::create(Func, this);

1357 _mov(T_1, Src1Lo, Reg_ecx);	1356 _mov(T_1, Src1Lo, RegX8632::Reg_ecx);

1358 _mov(T_2, Src0Lo);	1357 _mov(T_2, Src0Lo);

1359 _mov(T_3, Src0Hi);	1358 _mov(T_3, Src0Hi);

1360 _shrd(T_2, T_3, T_1);	1359 _shrd(T_2, T_3, T_1);

1361 _sar(T_3, T_1);	1360 _sar(T_3, T_1);

1362 _test(T_1, BitTest);	1361 _test(T_1, BitTest);

1363 _br(InstX8632Br::Br_e, Label);	1362 _br(CondX86::Br_e, Label);

1364 // Because of the intra-block control flow, we need to fake a use	1363 // Because of the intra-block control flow, we need to fake a use

1365 // of T_3 to prevent its earlier definition from being dead-code	1364 // of T_3 to prevent its earlier definition from being dead-code

1366 // eliminated in the presence of its later definition.	1365 // eliminated in the presence of its later definition.

1367 Context.insert(InstFakeUse::create(Func, T_2));	1366 Context.insert(InstFakeUse::create(Func, T_2));

1368 _mov(T_2, T_3);	1367 _mov(T_2, T_3);

1369 _sar(T_3, SignExtend);	1368 _sar(T_3, SignExtend);

1370 Context.insert(Label);	1369 Context.insert(Label);

1371 _mov(DestLo, T_2);	1370 _mov(DestLo, T_2);

1372 _mov(DestHi, T_3);	1371 _mov(DestHi, T_3);

1373 } break;	1372 } break;

(...skipping 194 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1568 break;	1567 break;

1569 case InstArithmetic::Mul:	1568 case InstArithmetic::Mul:

1570 // TODO: Optimize for llvm::isa<Constant>(Src1)	1569 // TODO: Optimize for llvm::isa<Constant>(Src1)

1571 // TODO: Strength-reduce multiplications by a constant,	1570 // TODO: Strength-reduce multiplications by a constant,

1572 // particularly -1 and powers of 2. Advanced: use lea to	1571 // particularly -1 and powers of 2. Advanced: use lea to

1573 // multiply by 3, 5, 9.	1572 // multiply by 3, 5, 9.

1574 //	1573 //

1575 // The 8-bit version of imul only allows the form "imul r/m8"	1574 // The 8-bit version of imul only allows the form "imul r/m8"

1576 // where T must be in eax.	1575 // where T must be in eax.

1577 if (Dest->getType() == IceType_i8)	1576 if (Dest->getType() == IceType_i8)

1578 _mov(T, Src0, Reg_eax);	1577 _mov(T, Src0, RegX8632::Reg_eax);

1579 else	1578 else

1580 _mov(T, Src0);	1579 _mov(T, Src0);

1581 _imul(T, Src1);	1580 _imul(T, Src1);

1582 _mov(Dest, T);	1581 _mov(Dest, T);

1583 break;	1582 break;

1584 case InstArithmetic::Shl:	1583 case InstArithmetic::Shl:

1585 _mov(T, Src0);	1584 _mov(T, Src0);

1586 if (!llvm::isa<Constant>(Src1))	1585 if (!llvm::isa<Constant>(Src1))

1587 Src1 = legalizeToVar(Src1, false, Reg_ecx);	1586 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);

1588 _shl(T, Src1);	1587 _shl(T, Src1);

1589 _mov(Dest, T);	1588 _mov(Dest, T);

1590 break;	1589 break;

1591 case InstArithmetic::Lshr:	1590 case InstArithmetic::Lshr:

1592 _mov(T, Src0);	1591 _mov(T, Src0);

1593 if (!llvm::isa<Constant>(Src1))	1592 if (!llvm::isa<Constant>(Src1))

1594 Src1 = legalizeToVar(Src1, false, Reg_ecx);	1593 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);

1595 _shr(T, Src1);	1594 _shr(T, Src1);

1596 _mov(Dest, T);	1595 _mov(Dest, T);

1597 break;	1596 break;

1598 case InstArithmetic::Ashr:	1597 case InstArithmetic::Ashr:

1599 _mov(T, Src0);	1598 _mov(T, Src0);

1600 if (!llvm::isa<Constant>(Src1))	1599 if (!llvm::isa<Constant>(Src1))

1601 Src1 = legalizeToVar(Src1, false, Reg_ecx);	1600 Src1 = legalizeToVar(Src1, false, RegX8632::Reg_ecx);

1602 _sar(T, Src1);	1601 _sar(T, Src1);

1603 _mov(Dest, T);	1602 _mov(Dest, T);

1604 break;	1603 break;

1605 case InstArithmetic::Udiv:	1604 case InstArithmetic::Udiv:

1606 // div and idiv are the few arithmetic operators that do not allow	1605 // div and idiv are the few arithmetic operators that do not allow

1607 // immediates as the operand.	1606 // immediates as the operand.

1608 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1607 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1609 if (Dest->getType() == IceType_i8) {	1608 if (Dest->getType() == IceType_i8) {

1610 Variable *T_ah = NULL;	1609 Variable *T_ah = NULL;

1611 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1610 Constant *Zero = Ctx->getConstantZero(IceType_i8);

1612 _mov(T, Src0, Reg_eax);	1611 _mov(T, Src0, RegX8632::Reg_eax);

1613 _mov(T_ah, Zero, Reg_ah);	1612 _mov(T_ah, Zero, RegX8632::Reg_ah);

1614 _div(T, Src1, T_ah);	1613 _div(T, Src1, T_ah);

1615 _mov(Dest, T);	1614 _mov(Dest, T);

1616 } else {	1615 } else {

1617 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1616 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1618 _mov(T, Src0, Reg_eax);	1617 _mov(T, Src0, RegX8632::Reg_eax);

1619 _mov(T_edx, Zero, Reg_edx);	1618 _mov(T_edx, Zero, RegX8632::Reg_edx);

1620 _div(T, Src1, T_edx);	1619 _div(T, Src1, T_edx);

1621 _mov(Dest, T);	1620 _mov(Dest, T);

1622 }	1621 }

1623 break;	1622 break;

1624 case InstArithmetic::Sdiv:	1623 case InstArithmetic::Sdiv:

1625 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1624 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1626 if (Dest->getType() == IceType_i8) {	1625 if (Dest->getType() == IceType_i8) {

1627 _mov(T, Src0, Reg_eax);	1626 _mov(T, Src0, RegX8632::Reg_eax);

1628 _cbwdq(T, T);	1627 _cbwdq(T, T);

1629 _idiv(T, Src1, T);	1628 _idiv(T, Src1, T);

1630 _mov(Dest, T);	1629 _mov(Dest, T);

1631 } else {	1630 } else {

1632 T_edx = makeReg(IceType_i32, Reg_edx);	1631 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

1633 _mov(T, Src0, Reg_eax);	1632 _mov(T, Src0, RegX8632::Reg_eax);

1634 _cbwdq(T_edx, T);	1633 _cbwdq(T_edx, T);

1635 _idiv(T, Src1, T_edx);	1634 _idiv(T, Src1, T_edx);

1636 _mov(Dest, T);	1635 _mov(Dest, T);

1637 }	1636 }

1638 break;	1637 break;

1639 case InstArithmetic::Urem:	1638 case InstArithmetic::Urem:

1640 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1639 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1641 if (Dest->getType() == IceType_i8) {	1640 if (Dest->getType() == IceType_i8) {

1642 Variable *T_ah = NULL;	1641 Variable *T_ah = NULL;

1643 Constant *Zero = Ctx->getConstantZero(IceType_i8);	1642 Constant *Zero = Ctx->getConstantZero(IceType_i8);

1644 _mov(T, Src0, Reg_eax);	1643 _mov(T, Src0, RegX8632::Reg_eax);

1645 _mov(T_ah, Zero, Reg_ah);	1644 _mov(T_ah, Zero, RegX8632::Reg_ah);

1646 _div(T_ah, Src1, T);	1645 _div(T_ah, Src1, T);

1647 _mov(Dest, T_ah);	1646 _mov(Dest, T_ah);

1648 } else {	1647 } else {

1649 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1648 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1650 _mov(T_edx, Zero, Reg_edx);	1649 _mov(T_edx, Zero, RegX8632::Reg_edx);

1651 _mov(T, Src0, Reg_eax);	1650 _mov(T, Src0, RegX8632::Reg_eax);

1652 _div(T_edx, Src1, T);	1651 _div(T_edx, Src1, T);

1653 _mov(Dest, T_edx);	1652 _mov(Dest, T_edx);

1654 }	1653 }

1655 break;	1654 break;

1656 case InstArithmetic::Srem:	1655 case InstArithmetic::Srem:

1657 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);	1656 Src1 = legalize(Src1, Legal_Reg \| Legal_Mem);

1658 if (Dest->getType() == IceType_i8) {	1657 if (Dest->getType() == IceType_i8) {

1659 Variable *T_ah = makeReg(IceType_i8, Reg_ah);	1658 Variable *T_ah = makeReg(IceType_i8, RegX8632::Reg_ah);

1660 _mov(T, Src0, Reg_eax);	1659 _mov(T, Src0, RegX8632::Reg_eax);

1661 _cbwdq(T, T);	1660 _cbwdq(T, T);

1662 Context.insert(InstFakeDef::create(Func, T_ah));	1661 Context.insert(InstFakeDef::create(Func, T_ah));

1663 _idiv(T_ah, Src1, T);	1662 _idiv(T_ah, Src1, T);

1664 _mov(Dest, T_ah);	1663 _mov(Dest, T_ah);

1665 } else {	1664 } else {

1666 T_edx = makeReg(IceType_i32, Reg_edx);	1665 T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

1667 _mov(T, Src0, Reg_eax);	1666 _mov(T, Src0, RegX8632::Reg_eax);

1668 _cbwdq(T_edx, T);	1667 _cbwdq(T_edx, T);

1669 _idiv(T_edx, Src1, T);	1668 _idiv(T_edx, Src1, T);

1670 _mov(Dest, T_edx);	1669 _mov(Dest, T_edx);

1671 }	1670 }

1672 break;	1671 break;

1673 case InstArithmetic::Fadd:	1672 case InstArithmetic::Fadd:

1674 _mov(T, Src0);	1673 _mov(T, Src0);

1675 _addss(T, Src1);	1674 _addss(T, Src1);

1676 _mov(Dest, T);	1675 _mov(Dest, T);

1677 break;	1676 break;

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1729 }	1728 }

1730 }	1729 }

1731	1730

1732 void TargetX8632::lowerBr(const InstBr *Inst) {	1731 void TargetX8632::lowerBr(const InstBr *Inst) {

1733 if (Inst->isUnconditional()) {	1732 if (Inst->isUnconditional()) {

1734 _br(Inst->getTargetUnconditional());	1733 _br(Inst->getTargetUnconditional());

1735 } else {	1734 } else {

1736 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg \| Legal_Mem);	1735 Operand *Src0 = legalize(Inst->getCondition(), Legal_Reg \| Legal_Mem);

1737 Constant *Zero = Ctx->getConstantZero(IceType_i32);	1736 Constant *Zero = Ctx->getConstantZero(IceType_i32);

1738 _cmp(Src0, Zero);	1737 _cmp(Src0, Zero);

1739 _br(InstX8632Br::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());	1738 _br(CondX86::Br_ne, Inst->getTargetTrue(), Inst->getTargetFalse());

1740 }	1739 }

1741 }	1740 }

1742	1741

1743 void TargetX8632::lowerCall(const InstCall *Instr) {	1742 void TargetX8632::lowerCall(const InstCall *Instr) {

1744 // x86-32 calling convention:	1743 // x86-32 calling convention:

1745 //	1744 //

1746 // * At the point before the call, the stack must be aligned to 16	1745 // * At the point before the call, the stack must be aligned to 16

1747 // bytes.	1746 // bytes.

1748 //	1747 //

1749 // * The first four arguments of vector type, regardless of their	1748 // * The first four arguments of vector type, regardless of their

(...skipping 25 matching lines...) Expand all Loading...
1775 // The PNaCl ABI requires the width of arguments to be at least 32 bits.	1774 // The PNaCl ABI requires the width of arguments to be at least 32 bits.

1776 assert(Ty == IceType_i32 \|\| Ty == IceType_f32 \|\| Ty == IceType_i64 \|\|	1775 assert(Ty == IceType_i32 \|\| Ty == IceType_f32 \|\| Ty == IceType_i64 \|\|

1777 Ty == IceType_f64 \|\| isVectorType(Ty));	1776 Ty == IceType_f64 \|\| isVectorType(Ty));

1778 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {	1777 if (isVectorType(Ty) && XmmArgs.size() < X86_MAX_XMM_ARGS) {

1779 XmmArgs.push_back(Arg);	1778 XmmArgs.push_back(Arg);

1780 } else {	1779 } else {

1781 StackArgs.push_back(Arg);	1780 StackArgs.push_back(Arg);

1782 if (isVectorType(Arg->getType())) {	1781 if (isVectorType(Arg->getType())) {

1783 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);	1782 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

1784 }	1783 }

1785 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);	1784 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

1786 Constant *Loc =	1785 Constant *Loc =

1787 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);	1786 Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes);

1788 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));	1787 StackArgLocations.push_back(OperandX8632Mem::create(Func, Ty, esp, Loc));

1789 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());	1788 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Arg->getType());

1790 }	1789 }

1791 }	1790 }

1792	1791

1793 // Adjust the parameter area so that the stack is aligned. It is	1792 // Adjust the parameter area so that the stack is aligned. It is

1794 // assumed that the stack is already aligned at the start of the	1793 // assumed that the stack is already aligned at the start of the

1795 // calling sequence.	1794 // calling sequence.

(...skipping 21 matching lines...) Expand all Loading...
1817 // Copy arguments to be passed in registers to the appropriate	1816 // Copy arguments to be passed in registers to the appropriate

1818 // registers.	1817 // registers.

1819 // TODO: Investigate the impact of lowering arguments passed in	1818 // TODO: Investigate the impact of lowering arguments passed in

1820 // registers after lowering stack arguments as opposed to the other	1819 // registers after lowering stack arguments as opposed to the other

1821 // way around. Lowering register arguments after stack arguments may	1820 // way around. Lowering register arguments after stack arguments may

1822 // reduce register pressure. On the other hand, lowering register	1821 // reduce register pressure. On the other hand, lowering register

1823 // arguments first (before stack arguments) may result in more compact	1822 // arguments first (before stack arguments) may result in more compact

1824 // code, as the memory operand displacements may end up being smaller	1823 // code, as the memory operand displacements may end up being smaller

1825 // before any stack adjustment is done.	1824 // before any stack adjustment is done.

1826 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {	1825 for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) {

1827 Variable *Reg = legalizeToVar(XmmArgs[i], false, Reg_xmm0 + i);	1826 Variable *Reg = legalizeToVar(XmmArgs[i], false, RegX8632::Reg_xmm0 + i);

1828 // Generate a FakeUse of register arguments so that they do not get	1827 // Generate a FakeUse of register arguments so that they do not get

1829 // dead code eliminated as a result of the FakeKill of scratch	1828 // dead code eliminated as a result of the FakeKill of scratch

1830 // registers after the call.	1829 // registers after the call.

1831 Context.insert(InstFakeUse::create(Func, Reg));	1830 Context.insert(InstFakeUse::create(Func, Reg));

1832 }	1831 }

1833 // Generate the call instruction. Assign its result to a temporary	1832 // Generate the call instruction. Assign its result to a temporary

1834 // with high register allocation weight.	1833 // with high register allocation weight.

1835 Variable *Dest = Instr->getDest();	1834 Variable *Dest = Instr->getDest();

1836 // ReturnReg doubles as ReturnRegLo as necessary.	1835 // ReturnReg doubles as ReturnRegLo as necessary.

1837 Variable *ReturnReg = NULL;	1836 Variable *ReturnReg = NULL;

1838 Variable *ReturnRegHi = NULL;	1837 Variable *ReturnRegHi = NULL;

1839 if (Dest) {	1838 if (Dest) {

1840 switch (Dest->getType()) {	1839 switch (Dest->getType()) {

1841 case IceType_NUM:	1840 case IceType_NUM:

1842 llvm_unreachable("Invalid Call dest type");	1841 llvm_unreachable("Invalid Call dest type");

1843 break;	1842 break;

1844 case IceType_void:	1843 case IceType_void:

1845 break;	1844 break;

1846 case IceType_i1:	1845 case IceType_i1:

1847 case IceType_i8:	1846 case IceType_i8:

1848 case IceType_i16:	1847 case IceType_i16:

1849 case IceType_i32:	1848 case IceType_i32:

1850 ReturnReg = makeReg(Dest->getType(), Reg_eax);	1849 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_eax);

1851 break;	1850 break;

1852 case IceType_i64:	1851 case IceType_i64:

1853 ReturnReg = makeReg(IceType_i32, Reg_eax);	1852 ReturnReg = makeReg(IceType_i32, RegX8632::Reg_eax);

1854 ReturnRegHi = makeReg(IceType_i32, Reg_edx);	1853 ReturnRegHi = makeReg(IceType_i32, RegX8632::Reg_edx);

1855 break;	1854 break;

1856 case IceType_f32:	1855 case IceType_f32:

1857 case IceType_f64:	1856 case IceType_f64:

1858 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with	1857 // Leave ReturnReg==ReturnRegHi==NULL, and capture the result with

1859 // the fstp instruction.	1858 // the fstp instruction.

1860 break;	1859 break;

1861 case IceType_v4i1:	1860 case IceType_v4i1:

1862 case IceType_v8i1:	1861 case IceType_v8i1:

1863 case IceType_v16i1:	1862 case IceType_v16i1:

1864 case IceType_v16i8:	1863 case IceType_v16i8:

1865 case IceType_v8i16:	1864 case IceType_v8i16:

1866 case IceType_v4i32:	1865 case IceType_v4i32:

1867 case IceType_v4f32:	1866 case IceType_v4f32:

1868 ReturnReg = makeReg(Dest->getType(), Reg_xmm0);	1867 ReturnReg = makeReg(Dest->getType(), RegX8632::Reg_xmm0);

1869 break;	1868 break;

1870 }	1869 }

1871 }	1870 }

1872 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once	1871 // TODO(stichnot): LEAHACK: remove Legal_All (and use default) once

1873 // a proper emitter is used.	1872 // a proper emitter is used.

1874 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);	1873 Operand *CallTarget = legalize(Instr->getCallTarget(), Legal_All);

1875 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);	1874 Inst *NewCall = InstX8632Call::create(Func, ReturnReg, CallTarget);

1876 Context.insert(NewCall);	1875 Context.insert(NewCall);

1877 if (ReturnRegHi)	1876 if (ReturnRegHi)

1878 Context.insert(InstFakeDef::create(Func, ReturnRegHi));	1877 Context.insert(InstFakeDef::create(Func, ReturnRegHi));

1879	1878

1880 // Add the appropriate offset to esp. The call instruction takes care	1879 // Add the appropriate offset to esp. The call instruction takes care

1881 // of resetting the stack offset during emission.	1880 // of resetting the stack offset during emission.

1882 if (ParameterAreaSizeBytes) {	1881 if (ParameterAreaSizeBytes) {

1883 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);	1882 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

1884 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));	1883 _add(esp, Ctx->getConstantInt32(IceType_i32, ParameterAreaSizeBytes));

1885 }	1884 }

1886	1885

1887 // Insert a register-kill pseudo instruction.	1886 // Insert a register-kill pseudo instruction.

1888 VarList KilledRegs;	1887 VarList KilledRegs;

1889 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {	1888 for (SizeT i = 0; i < ScratchRegs.size(); ++i) {

1890 if (ScratchRegs[i])	1889 if (ScratchRegs[i])

1891 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));	1890 KilledRegs.push_back(Func->getTarget()->getPhysicalRegister(i));

1892 }	1891 }

1893 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));	1892 Context.insert(InstFakeKill::create(Func, KilledRegs, NewCall));

(...skipping 579 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2473 // makeVectorOfOnes() requires an integer vector type.	2472 // makeVectorOfOnes() requires an integer vector type.

2474 T = makeVectorOfMinusOnes(IceType_v4i32);	2473 T = makeVectorOfMinusOnes(IceType_v4i32);

2475 } else if (Condition == InstFcmp::False) {	2474 } else if (Condition == InstFcmp::False) {

2476 T = makeVectorOfZeros(Dest->getType());	2475 T = makeVectorOfZeros(Dest->getType());

2477 } else {	2476 } else {

2478 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);	2477 Operand *Src0RM = legalize(Src0, Legal_Reg \| Legal_Mem);

2479 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2478 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2480	2479

2481 switch (Condition) {	2480 switch (Condition) {

2482 default: {	2481 default: {

2483 InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate;	2482 CondX86::CmppsCond Predicate = TableFcmp[Index].Predicate;

2484 assert(Predicate != InstX8632Cmpps::Cmpps_Invalid);	2483 assert(Predicate != CondX86::Cmpps_Invalid);

2485 T = makeReg(Src0RM->getType());	2484 T = makeReg(Src0RM->getType());

2486 _movp(T, Src0RM);	2485 _movp(T, Src0RM);

2487 _cmpps(T, Src1RM, Predicate);	2486 _cmpps(T, Src1RM, Predicate);

2488 } break;	2487 } break;

2489 case InstFcmp::One: {	2488 case InstFcmp::One: {

2490 // Check both unequal and ordered.	2489 // Check both unequal and ordered.

2491 T = makeReg(Src0RM->getType());	2490 T = makeReg(Src0RM->getType());

2492 Variable *T2 = makeReg(Src0RM->getType());	2491 Variable *T2 = makeReg(Src0RM->getType());

2493 _movp(T, Src0RM);	2492 _movp(T, Src0RM);

2494 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq);	2493 _cmpps(T, Src1RM, CondX86::Cmpps_neq);

2495 _movp(T2, Src0RM);	2494 _movp(T2, Src0RM);

2496 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_ord);	2495 _cmpps(T2, Src1RM, CondX86::Cmpps_ord);

2497 _pand(T, T2);	2496 _pand(T, T2);

2498 } break;	2497 } break;

2499 case InstFcmp::Ueq: {	2498 case InstFcmp::Ueq: {

2500 // Check both equal or unordered.	2499 // Check both equal or unordered.

2501 T = makeReg(Src0RM->getType());	2500 T = makeReg(Src0RM->getType());

2502 Variable *T2 = makeReg(Src0RM->getType());	2501 Variable *T2 = makeReg(Src0RM->getType());

2503 _movp(T, Src0RM);	2502 _movp(T, Src0RM);

2504 _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq);	2503 _cmpps(T, Src1RM, CondX86::Cmpps_eq);

2505 _movp(T2, Src0RM);	2504 _movp(T2, Src0RM);

2506 _cmpps(T2, Src1RM, InstX8632Cmpps::Cmpps_unord);	2505 _cmpps(T2, Src1RM, CondX86::Cmpps_unord);

2507 _por(T, T2);	2506 _por(T, T2);

2508 } break;	2507 } break;

2509 }	2508 }

2510 }	2509 }

2511	2510

2512 _movp(Dest, T);	2511 _movp(Dest, T);

2513 eliminateNextVectorSextInstruction(Dest);	2512 eliminateNextVectorSextInstruction(Dest);

2514 return;	2513 return;

2515 }	2514 }

2516	2515

2517 // Lowering a = fcmp cond, b, c	2516 // Lowering a = fcmp cond, b, c

2518 // ucomiss b, c /* only if C1 != Br_None */	2517 // ucomiss b, c /* only if C1 != Br_None */

2519 // /* but swap b,c order if SwapOperands==true */	2518 // /* but swap b,c order if SwapOperands==true */

2520 // mov a, <default>	2519 // mov a, <default>

2521 // j<C1> label /* only if C1 != Br_None */	2520 // j<C1> label /* only if C1 != Br_None */

2522 // j<C2> label /* only if C2 != Br_None */	2521 // j<C2> label /* only if C2 != Br_None */

2523 // FakeUse(a) /* only if C1 != Br_None */	2522 // FakeUse(a) /* only if C1 != Br_None */

2524 // mov a, !<default> /* only if C1 != Br_None */	2523 // mov a, !<default> /* only if C1 != Br_None */

2525 // label: /* only if C1 != Br_None */	2524 // label: /* only if C1 != Br_None */

2526 InstFcmp::FCond Condition = Inst->getCondition();	2525 InstFcmp::FCond Condition = Inst->getCondition();

2527 size_t Index = static_cast<size_t>(Condition);	2526 size_t Index = static_cast<size_t>(Condition);

2528 assert(Index < TableFcmpSize);	2527 assert(Index < TableFcmpSize);

2529 if (TableFcmp[Index].SwapScalarOperands) {	2528 if (TableFcmp[Index].SwapScalarOperands) {

2530 Operand *Tmp = Src0;	2529 Operand *Tmp = Src0;

2531 Src0 = Src1;	2530 Src0 = Src1;

2532 Src1 = Tmp;	2531 Src1 = Tmp;

2533 }	2532 }

2534 bool HasC1 = (TableFcmp[Index].C1 != InstX8632Br::Br_None);	2533 bool HasC1 = (TableFcmp[Index].C1 != CondX86::Br_None);

2535 bool HasC2 = (TableFcmp[Index].C2 != InstX8632Br::Br_None);	2534 bool HasC2 = (TableFcmp[Index].C2 != CondX86::Br_None);

2536 if (HasC1) {	2535 if (HasC1) {

2537 Src0 = legalize(Src0);	2536 Src0 = legalize(Src0);

2538 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);	2537 Operand *Src1RM = legalize(Src1, Legal_Reg \| Legal_Mem);

2539 Variable *T = NULL;	2538 Variable *T = NULL;

2540 _mov(T, Src0);	2539 _mov(T, Src0);

2541 _ucomiss(T, Src1RM);	2540 _ucomiss(T, Src1RM);

2542 }	2541 }

2543 Constant *Default =	2542 Constant *Default =

2544 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);	2543 Ctx->getConstantInt32(IceType_i32, TableFcmp[Index].Default);

2545 _mov(Dest, Default);	2544 _mov(Dest, Default);

(...skipping 152 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2698 size_t Index = static_cast<size_t>(Condition);	2697 size_t Index = static_cast<size_t>(Condition);

2699 assert(Index < TableIcmp64Size);	2698 assert(Index < TableIcmp64Size);

2700 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);	2699 Operand *Src0LoRM = legalize(loOperand(Src0), Legal_Reg \| Legal_Mem);

2701 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);	2700 Operand *Src0HiRM = legalize(hiOperand(Src0), Legal_Reg \| Legal_Mem);

2702 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);	2701 Operand *Src1LoRI = legalize(loOperand(Src1), Legal_Reg \| Legal_Imm);

2703 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);	2702 Operand *Src1HiRI = legalize(hiOperand(Src1), Legal_Reg \| Legal_Imm);

2704 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {	2703 if (Condition == InstIcmp::Eq \|\| Condition == InstIcmp::Ne) {

2705 InstX8632Label *Label = InstX8632Label::create(Func, this);	2704 InstX8632Label *Label = InstX8632Label::create(Func, this);

2706 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));	2705 _mov(Dest, (Condition == InstIcmp::Eq ? Zero : One));

2707 _cmp(Src0LoRM, Src1LoRI);	2706 _cmp(Src0LoRM, Src1LoRI);

2708 _br(InstX8632Br::Br_ne, Label);	2707 _br(CondX86::Br_ne, Label);

2709 _cmp(Src0HiRM, Src1HiRI);	2708 _cmp(Src0HiRM, Src1HiRI);

2710 _br(InstX8632Br::Br_ne, Label);	2709 _br(CondX86::Br_ne, Label);

2711 Context.insert(InstFakeUse::create(Func, Dest));	2710 Context.insert(InstFakeUse::create(Func, Dest));

2712 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));	2711 _mov(Dest, (Condition == InstIcmp::Eq ? One : Zero));

2713 Context.insert(Label);	2712 Context.insert(Label);

2714 } else {	2713 } else {

2715 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);	2714 InstX8632Label *LabelFalse = InstX8632Label::create(Func, this);

2716 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);	2715 InstX8632Label *LabelTrue = InstX8632Label::create(Func, this);

2717 _mov(Dest, One);	2716 _mov(Dest, One);

2718 _cmp(Src0HiRM, Src1HiRI);	2717 _cmp(Src0HiRM, Src1HiRI);

2719 _br(TableIcmp64[Index].C1, LabelTrue);	2718 _br(TableIcmp64[Index].C1, LabelTrue);

2720 _br(TableIcmp64[Index].C2, LabelFalse);	2719 _br(TableIcmp64[Index].C2, LabelFalse);

(...skipping 432 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3153 }	3152 }

3154 case Intrinsics::Sqrt: {	3153 case Intrinsics::Sqrt: {

3155 Operand *Src = legalize(Instr->getArg(0));	3154 Operand *Src = legalize(Instr->getArg(0));

3156 Variable *Dest = Instr->getDest();	3155 Variable *Dest = Instr->getDest();

3157 Variable *T = makeReg(Dest->getType());	3156 Variable *T = makeReg(Dest->getType());

3158 _sqrtss(T, Src);	3157 _sqrtss(T, Src);

3159 _mov(Dest, T);	3158 _mov(Dest, T);

3160 return;	3159 return;

3161 }	3160 }

3162 case Intrinsics::Stacksave: {	3161 case Intrinsics::Stacksave: {

3163 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);	3162 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

3164 Variable *Dest = Instr->getDest();	3163 Variable *Dest = Instr->getDest();

3165 _mov(Dest, esp);	3164 _mov(Dest, esp);

3166 return;	3165 return;

3167 }	3166 }

3168 case Intrinsics::Stackrestore: {	3167 case Intrinsics::Stackrestore: {

3169 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);	3168 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

3170 _mov(esp, Instr->getArg(0));	3169 _mov(esp, Instr->getArg(0));

3171 return;	3170 return;

3172 }	3171 }

3173 case Intrinsics::Trap:	3172 case Intrinsics::Trap:

3174 _ud2();	3173 _ud2();

3175 return;	3174 return;

3176 case Intrinsics::UnknownIntrinsic:	3175 case Intrinsics::UnknownIntrinsic:

3177 Func->setError("Should not be lowering UnknownIntrinsic");	3176 Func->setError("Should not be lowering UnknownIntrinsic");

3178 return;	3177 return;

3179 }	3178 }

3180 return;	3179 return;

3181 }	3180 }

3182	3181

3183 void TargetX8632::lowerAtomicCmpxchg(Variable DestPrev, Operand Ptr,	3182 void TargetX8632::lowerAtomicCmpxchg(Variable DestPrev, Operand Ptr,

3184 Operand Expected, Operand Desired) {	3183 Operand Expected, Operand Desired) {

3185 if (Expected->getType() == IceType_i64) {	3184 if (Expected->getType() == IceType_i64) {

3186 // Reserve the pre-colored registers first, before adding any more	3185 // Reserve the pre-colored registers first, before adding any more

3187 // infinite-weight variables from FormMemoryOperand's legalization.	3186 // infinite-weight variables from FormMemoryOperand's legalization.

3188 Variable *T_edx = makeReg(IceType_i32, Reg_edx);	3187 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

3189 Variable *T_eax = makeReg(IceType_i32, Reg_eax);	3188 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);

3190 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);	3189 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

3191 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);	3190 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);

3192 _mov(T_eax, loOperand(Expected));	3191 _mov(T_eax, loOperand(Expected));

3193 _mov(T_edx, hiOperand(Expected));	3192 _mov(T_edx, hiOperand(Expected));

3194 _mov(T_ebx, loOperand(Desired));	3193 _mov(T_ebx, loOperand(Desired));

3195 _mov(T_ecx, hiOperand(Desired));	3194 _mov(T_ecx, hiOperand(Desired));

3196 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());	3195 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());

3197 const bool Locked = true;	3196 const bool Locked = true;

3198 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3197 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3199 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));	3198 Variable *DestLo = llvm::cast<Variable>(loOperand(DestPrev));

3200 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));	3199 Variable *DestHi = llvm::cast<Variable>(hiOperand(DestPrev));

3201 _mov(DestLo, T_eax);	3200 _mov(DestLo, T_eax);

3202 _mov(DestHi, T_edx);	3201 _mov(DestHi, T_edx);

3203 return;	3202 return;

3204 }	3203 }

3205 Variable *T_eax = makeReg(Expected->getType(), Reg_eax);	3204 Variable *T_eax = makeReg(Expected->getType(), RegX8632::Reg_eax);

3206 _mov(T_eax, Expected);	3205 _mov(T_eax, Expected);

3207 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());	3206 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Expected->getType());

3208 Variable *DesiredReg = legalizeToVar(Desired);	3207 Variable *DesiredReg = legalizeToVar(Desired);

3209 const bool Locked = true;	3208 const bool Locked = true;

3210 _cmpxchg(Addr, T_eax, DesiredReg, Locked);	3209 _cmpxchg(Addr, T_eax, DesiredReg, Locked);

3211 _mov(DestPrev, T_eax);	3210 _mov(DestPrev, T_eax);

3212 }	3211 }

3213	3212

3214 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable Dest, Operand PtrToMem,	3213 bool TargetX8632::tryOptimizedCmpxchgCmpBr(Variable Dest, Operand PtrToMem,

3215 Operand *Expected,	3214 Operand *Expected,

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3264 NextBr->isLastUse(NextCmp->getDest())) {	3263 NextBr->isLastUse(NextCmp->getDest())) {

3265 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);	3264 lowerAtomicCmpxchg(Dest, PtrToMem, Expected, Desired);

3266 for (size_t i = 0; i < PhiAssigns.size(); ++i) {	3265 for (size_t i = 0; i < PhiAssigns.size(); ++i) {

3267 // Lower the phi assignments now, before the branch (same placement	3266 // Lower the phi assignments now, before the branch (same placement

3268 // as before).	3267 // as before).

3269 InstAssign *PhiAssign = PhiAssigns[i];	3268 InstAssign *PhiAssign = PhiAssigns[i];

3270 PhiAssign->setDeleted();	3269 PhiAssign->setDeleted();

3271 lowerAssign(PhiAssign);	3270 lowerAssign(PhiAssign);

3272 Context.advanceNext();	3271 Context.advanceNext();

3273 }	3272 }

3274 _br(InstX8632::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());	3273 _br(CondX86::Br_e, NextBr->getTargetTrue(), NextBr->getTargetFalse());

3275 // Skip over the old compare and branch, by deleting them.	3274 // Skip over the old compare and branch, by deleting them.

3276 NextCmp->setDeleted();	3275 NextCmp->setDeleted();

3277 NextBr->setDeleted();	3276 NextBr->setDeleted();

3278 Context.advanceNext();	3277 Context.advanceNext();

3279 Context.advanceNext();	3278 Context.advanceNext();

3280 return true;	3279 return true;

3281 }	3280 }

3282 }	3281 }

3283 }	3282 }

3284 return false;	3283 return false;

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3391 // mov <reg>, eax	3390 // mov <reg>, eax

3392 // op <reg>, [desired_adj]	3391 // op <reg>, [desired_adj]

3393 // lock cmpxchg [ptr], <reg>	3392 // lock cmpxchg [ptr], <reg>

3394 // jne .LABEL	3393 // jne .LABEL

3395 // mov <dest>, eax	3394 // mov <dest>, eax

3396 //	3395 //

3397 // If Op_{Lo,Hi} are NULL, then just copy the value.	3396 // If Op_{Lo,Hi} are NULL, then just copy the value.

3398 Val = legalize(Val);	3397 Val = legalize(Val);

3399 Type Ty = Val->getType();	3398 Type Ty = Val->getType();

3400 if (Ty == IceType_i64) {	3399 if (Ty == IceType_i64) {

3401 Variable *T_edx = makeReg(IceType_i32, Reg_edx);	3400 Variable *T_edx = makeReg(IceType_i32, RegX8632::Reg_edx);

3402 Variable *T_eax = makeReg(IceType_i32, Reg_eax);	3401 Variable *T_eax = makeReg(IceType_i32, RegX8632::Reg_eax);

3403 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);	3402 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);

3404 _mov(T_eax, loOperand(Addr));	3403 _mov(T_eax, loOperand(Addr));

3405 _mov(T_edx, hiOperand(Addr));	3404 _mov(T_edx, hiOperand(Addr));

3406 Variable *T_ecx = makeReg(IceType_i32, Reg_ecx);	3405 Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx);

3407 Variable *T_ebx = makeReg(IceType_i32, Reg_ebx);	3406 Variable *T_ebx = makeReg(IceType_i32, RegX8632::Reg_ebx);

3408 InstX8632Label *Label = InstX8632Label::create(Func, this);	3407 InstX8632Label *Label = InstX8632Label::create(Func, this);

3409 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;	3408 const bool IsXchg8b = Op_Lo == NULL && Op_Hi == NULL;

3410 if (!IsXchg8b) {	3409 if (!IsXchg8b) {

3411 Context.insert(Label);	3410 Context.insert(Label);

3412 _mov(T_ebx, T_eax);	3411 _mov(T_ebx, T_eax);

3413 (this->*Op_Lo)(T_ebx, loOperand(Val));	3412 (this->*Op_Lo)(T_ebx, loOperand(Val));

3414 _mov(T_ecx, T_edx);	3413 _mov(T_ecx, T_edx);

3415 (this->*Op_Hi)(T_ecx, hiOperand(Val));	3414 (this->*Op_Hi)(T_ecx, hiOperand(Val));

3416 } else {	3415 } else {

3417 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.	3416 // This is for xchg, which doesn't need an actual Op_Lo/Op_Hi.

3418 // It just needs the Val loaded into ebx and ecx.	3417 // It just needs the Val loaded into ebx and ecx.

3419 // That can also be done before the loop.	3418 // That can also be done before the loop.

3420 _mov(T_ebx, loOperand(Val));	3419 _mov(T_ebx, loOperand(Val));

3421 _mov(T_ecx, hiOperand(Val));	3420 _mov(T_ecx, hiOperand(Val));

3422 Context.insert(Label);	3421 Context.insert(Label);

3423 }	3422 }

3424 const bool Locked = true;	3423 const bool Locked = true;

3425 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);	3424 _cmpxchg8b(Addr, T_edx, T_eax, T_ecx, T_ebx, Locked);

3426 _br(InstX8632Br::Br_ne, Label);	3425 _br(CondX86::Br_ne, Label);

3427 if (!IsXchg8b) {	3426 if (!IsXchg8b) {

3428 // If Val is a variable, model the extended live range of Val through	3427 // If Val is a variable, model the extended live range of Val through

3429 // the end of the loop, since it will be re-used by the loop.	3428 // the end of the loop, since it will be re-used by the loop.

3430 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {	3429 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3431 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));	3430 Variable *ValLo = llvm::cast<Variable>(loOperand(ValVar));

3432 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));	3431 Variable *ValHi = llvm::cast<Variable>(hiOperand(ValVar));

3433 Context.insert(InstFakeUse::create(Func, ValLo));	3432 Context.insert(InstFakeUse::create(Func, ValLo));

3434 Context.insert(InstFakeUse::create(Func, ValHi));	3433 Context.insert(InstFakeUse::create(Func, ValHi));

3435 }	3434 }

3436 } else {	3435 } else {

3437 // For xchg, the loop is slightly smaller and ebx/ecx are used.	3436 // For xchg, the loop is slightly smaller and ebx/ecx are used.

3438 Context.insert(InstFakeUse::create(Func, T_ebx));	3437 Context.insert(InstFakeUse::create(Func, T_ebx));

3439 Context.insert(InstFakeUse::create(Func, T_ecx));	3438 Context.insert(InstFakeUse::create(Func, T_ecx));

3440 }	3439 }

3441 // The address base is also reused in the loop.	3440 // The address base is also reused in the loop.

3442 Context.insert(InstFakeUse::create(Func, Addr->getBase()));	3441 Context.insert(InstFakeUse::create(Func, Addr->getBase()));

3443 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3442 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

3444 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3443 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3445 _mov(DestLo, T_eax);	3444 _mov(DestLo, T_eax);

3446 _mov(DestHi, T_edx);	3445 _mov(DestHi, T_edx);

3447 return;	3446 return;

3448 }	3447 }

3449 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);	3448 OperandX8632Mem *Addr = FormMemoryOperand(Ptr, Ty);

3450 Variable *T_eax = makeReg(Ty, Reg_eax);	3449 Variable *T_eax = makeReg(Ty, RegX8632::Reg_eax);

3451 _mov(T_eax, Addr);	3450 _mov(T_eax, Addr);

3452 InstX8632Label *Label = InstX8632Label::create(Func, this);	3451 InstX8632Label *Label = InstX8632Label::create(Func, this);

3453 Context.insert(Label);	3452 Context.insert(Label);

3454 // We want to pick a different register for T than Eax, so don't use	3453 // We want to pick a different register for T than Eax, so don't use

3455 // _mov(T == NULL, T_eax).	3454 // _mov(T == NULL, T_eax).

3456 Variable *T = makeReg(Ty);	3455 Variable *T = makeReg(Ty);

3457 _mov(T, T_eax);	3456 _mov(T, T_eax);

3458 (this->*Op_Lo)(T, Val);	3457 (this->*Op_Lo)(T, Val);

3459 const bool Locked = true;	3458 const bool Locked = true;

3460 _cmpxchg(Addr, T_eax, T, Locked);	3459 _cmpxchg(Addr, T_eax, T, Locked);

3461 _br(InstX8632Br::Br_ne, Label);	3460 _br(CondX86::Br_ne, Label);

3462 // If Val is a variable, model the extended live range of Val through	3461 // If Val is a variable, model the extended live range of Val through

3463 // the end of the loop, since it will be re-used by the loop.	3462 // the end of the loop, since it will be re-used by the loop.

3464 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {	3463 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3465 Context.insert(InstFakeUse::create(Func, ValVar));	3464 Context.insert(InstFakeUse::create(Func, ValVar));

3466 }	3465 }

3467 // The address base is also reused in the loop.	3466 // The address base is also reused in the loop.

3468 Context.insert(InstFakeUse::create(Func, Addr->getBase()));	3467 Context.insert(InstFakeUse::create(Func, Addr->getBase()));

3469 _mov(Dest, T_eax);	3468 _mov(Dest, T_eax);

3470 }	3469 }

3471	3470

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3512 }	3511 }

3513 Variable *T_Dest = makeReg(IceType_i32);	3512 Variable *T_Dest = makeReg(IceType_i32);

3514 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);	3513 Constant *ThirtyTwo = Ctx->getConstantInt32(IceType_i32, 32);

3515 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);	3514 Constant *ThirtyOne = Ctx->getConstantInt32(IceType_i32, 31);

3516 if (Cttz) {	3515 if (Cttz) {

3517 _mov(T_Dest, ThirtyTwo);	3516 _mov(T_Dest, ThirtyTwo);

3518 } else {	3517 } else {

3519 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);	3518 Constant *SixtyThree = Ctx->getConstantInt32(IceType_i32, 63);

3520 _mov(T_Dest, SixtyThree);	3519 _mov(T_Dest, SixtyThree);

3521 }	3520 }

3522 _cmov(T_Dest, T, InstX8632::Br_ne);	3521 _cmov(T_Dest, T, CondX86::Br_ne);

3523 if (!Cttz) {	3522 if (!Cttz) {

3524 _xor(T_Dest, ThirtyOne);	3523 _xor(T_Dest, ThirtyOne);

3525 }	3524 }

3526 if (Ty == IceType_i32) {	3525 if (Ty == IceType_i32) {

3527 _mov(Dest, T_Dest);	3526 _mov(Dest, T_Dest);

3528 return;	3527 return;

3529 }	3528 }

3530 _add(T_Dest, ThirtyTwo);	3529 _add(T_Dest, ThirtyTwo);

3531 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3530 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

3532 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3531 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3533 // Will be using "test" on this, so we need a registerized variable.	3532 // Will be using "test" on this, so we need a registerized variable.

3534 Variable *SecondVar = legalizeToVar(SecondVal);	3533 Variable *SecondVar = legalizeToVar(SecondVal);

3535 Variable *T_Dest2 = makeReg(IceType_i32);	3534 Variable *T_Dest2 = makeReg(IceType_i32);

3536 if (Cttz) {	3535 if (Cttz) {

3537 _bsf(T_Dest2, SecondVar);	3536 _bsf(T_Dest2, SecondVar);

3538 } else {	3537 } else {

3539 _bsr(T_Dest2, SecondVar);	3538 _bsr(T_Dest2, SecondVar);

3540 _xor(T_Dest2, ThirtyOne);	3539 _xor(T_Dest2, ThirtyOne);

3541 }	3540 }

3542 _test(SecondVar, SecondVar);	3541 _test(SecondVar, SecondVar);

3543 _cmov(T_Dest2, T_Dest, InstX8632::Br_e);	3542 _cmov(T_Dest2, T_Dest, CondX86::Br_e);

3544 _mov(DestLo, T_Dest2);	3543 _mov(DestLo, T_Dest2);

3545 _mov(DestHi, Ctx->getConstantZero(IceType_i32));	3544 _mov(DestHi, Ctx->getConstantZero(IceType_i32));

3546 }	3545 }

3547	3546

3548 namespace {	3547 namespace {

3549	3548

3550 bool isAdd(const Inst *Inst) {	3549 bool isAdd(const Inst *Inst) {

3551 if (const InstArithmetic *Arith =	3550 if (const InstArithmetic *Arith =

3552 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {	3551 llvm::dyn_cast_or_null<const InstArithmetic>(Inst)) {

3553 return (Arith->getOp() == InstArithmetic::Add);	3552 return (Arith->getOp() == InstArithmetic::Add);

(...skipping 306 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3860	3859

3861 void TargetX8632::lowerPhi(const InstPhi * /Inst/) {	3860 void TargetX8632::lowerPhi(const InstPhi * /Inst/) {

3862 Func->setError("Phi found in regular instruction list");	3861 Func->setError("Phi found in regular instruction list");

3863 }	3862 }

3864	3863

3865 void TargetX8632::lowerRet(const InstRet *Inst) {	3864 void TargetX8632::lowerRet(const InstRet *Inst) {

3866 Variable *Reg = NULL;	3865 Variable *Reg = NULL;

3867 if (Inst->hasRetValue()) {	3866 if (Inst->hasRetValue()) {

3868 Operand *Src0 = legalize(Inst->getRetValue());	3867 Operand *Src0 = legalize(Inst->getRetValue());

3869 if (Src0->getType() == IceType_i64) {	3868 if (Src0->getType() == IceType_i64) {

3870 Variable *eax = legalizeToVar(loOperand(Src0), false, Reg_eax);	3869 Variable *eax = legalizeToVar(loOperand(Src0), false, RegX8632::Reg_eax);

3871 Variable *edx = legalizeToVar(hiOperand(Src0), false, Reg_edx);	3870 Variable *edx = legalizeToVar(hiOperand(Src0), false, RegX8632::Reg_edx);

3872 Reg = eax;	3871 Reg = eax;

3873 Context.insert(InstFakeUse::create(Func, edx));	3872 Context.insert(InstFakeUse::create(Func, edx));

3874 } else if (Src0->getType() == IceType_f32 \|\|	3873 } else if (Src0->getType() == IceType_f32 \|\|

3875 Src0->getType() == IceType_f64) {	3874 Src0->getType() == IceType_f64) {

3876 _fld(Src0);	3875 _fld(Src0);

3877 } else if (isVectorType(Src0->getType())) {	3876 } else if (isVectorType(Src0->getType())) {

3878 Reg = legalizeToVar(Src0, false, Reg_xmm0);	3877 Reg = legalizeToVar(Src0, false, RegX8632::Reg_xmm0);

3879 } else {	3878 } else {

3880 _mov(Reg, Src0, Reg_eax);	3879 _mov(Reg, Src0, RegX8632::Reg_eax);

3881 }	3880 }

3882 }	3881 }

3883 _ret(Reg);	3882 _ret(Reg);

3884 // Add a fake use of esp to make sure esp stays alive for the entire	3883 // Add a fake use of esp to make sure esp stays alive for the entire

3885 // function. Otherwise post-call esp adjustments get dead-code	3884 // function. Otherwise post-call esp adjustments get dead-code

3886 // eliminated. TODO: Are there more places where the fake use	3885 // eliminated. TODO: Are there more places where the fake use

3887 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not	3886 // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

3888 // have a ret instruction.	3887 // have a ret instruction.

3889 Variable *esp = Func->getTarget()->getPhysicalRegister(Reg_esp);	3888 Variable *esp = Func->getTarget()->getPhysicalRegister(RegX8632::Reg_esp);

3890 Context.insert(InstFakeUse::create(Func, esp));	3889 Context.insert(InstFakeUse::create(Func, esp));

3891 }	3890 }

3892	3891

3893 void TargetX8632::lowerSelect(const InstSelect *Inst) {	3892 void TargetX8632::lowerSelect(const InstSelect *Inst) {

3894 Variable *Dest = Inst->getDest();	3893 Variable *Dest = Inst->getDest();

3895 Operand *SrcT = Inst->getTrueOperand();	3894 Operand *SrcT = Inst->getTrueOperand();

3896 Operand *SrcF = Inst->getFalseOperand();	3895 Operand *SrcF = Inst->getFalseOperand();

3897 Operand *Condition = Inst->getCondition();	3896 Operand *Condition = Inst->getCondition();

3898	3897

3899 if (isVectorType(Dest->getType())) {	3898 if (isVectorType(Dest->getType())) {

3900 Type SrcTy = SrcT->getType();	3899 Type SrcTy = SrcT->getType();

3901 Variable *T = makeReg(SrcTy);	3900 Variable *T = makeReg(SrcTy);

3902 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);	3901 Operand *SrcTRM = legalize(SrcT, Legal_Reg \| Legal_Mem);

3903 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);	3902 Operand *SrcFRM = legalize(SrcF, Legal_Reg \| Legal_Mem);

3904 if (InstructionSet >= SSE4_1) {	3903 if (InstructionSet >= SSE4_1) {

3905 // TODO(wala): If the condition operand is a constant, use blendps	3904 // TODO(wala): If the condition operand is a constant, use blendps

3906 // or pblendw.	3905 // or pblendw.

3907 //	3906 //

3908 // Use blendvps or pblendvb to implement select.	3907 // Use blendvps or pblendvb to implement select.

3909 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|	3908 if (SrcTy == IceType_v4i1 \|\| SrcTy == IceType_v4i32 \|\|

3910 SrcTy == IceType_v4f32) {	3909 SrcTy == IceType_v4f32) {

3911 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);	3910 Operand *ConditionRM = legalize(Condition, Legal_Reg \| Legal_Mem);

3912 Variable *xmm0 = makeReg(IceType_v4i32, Reg_xmm0);	3911 Variable *xmm0 = makeReg(IceType_v4i32, RegX8632::Reg_xmm0);

3913 _movp(xmm0, ConditionRM);	3912 _movp(xmm0, ConditionRM);

3914 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));	3913 _psll(xmm0, Ctx->getConstantInt32(IceType_i8, 31));

3915 _movp(T, SrcFRM);	3914 _movp(T, SrcFRM);

3916 _blendvps(T, SrcTRM, xmm0);	3915 _blendvps(T, SrcTRM, xmm0);

3917 _movp(Dest, T);	3916 _movp(Dest, T);

3918 } else {	3917 } else {

3919 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);	3918 assert(typeNumElements(SrcTy) == 8 \|\| typeNumElements(SrcTy) == 16);

3920 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16	3919 Type SignExtTy = Condition->getType() == IceType_v8i1 ? IceType_v8i16

3921 : IceType_v16i8;	3920 : IceType_v16i8;

3922 Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0);	3921 Variable *xmm0 = makeReg(SignExtTy, RegX8632::Reg_xmm0);

3923 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));	3922 lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition));

3924 _movp(T, SrcFRM);	3923 _movp(T, SrcFRM);

3925 _pblendvb(T, SrcTRM, xmm0);	3924 _pblendvb(T, SrcTRM, xmm0);

3926 _movp(Dest, T);	3925 _movp(Dest, T);

3927 }	3926 }

3928 return;	3927 return;

3929 }	3928 }

3930 // Lower select without SSE4.1:	3929 // Lower select without SSE4.1:

3931 // a=d?b:c ==>	3930 // a=d?b:c ==>

3932 // if elementtype(d) != i1:	3931 // if elementtype(d) != i1:

(...skipping 27 matching lines...) Expand all Loading...
3960 InstX8632Label *Label = InstX8632Label::create(Func, this);	3959 InstX8632Label *Label = InstX8632Label::create(Func, this);

3961	3960

3962 if (Dest->getType() == IceType_i64) {	3961 if (Dest->getType() == IceType_i64) {

3963 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));	3962 Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

3964 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));	3963 Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

3965 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg \| Legal_Imm, true);	3964 Operand *SrcLoRI = legalize(loOperand(SrcT), Legal_Reg \| Legal_Imm, true);

3966 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Imm, true);	3965 Operand *SrcHiRI = legalize(hiOperand(SrcT), Legal_Reg \| Legal_Imm, true);

3967 _cmp(ConditionRM, Zero);	3966 _cmp(ConditionRM, Zero);

3968 _mov(DestLo, SrcLoRI);	3967 _mov(DestLo, SrcLoRI);

3969 _mov(DestHi, SrcHiRI);	3968 _mov(DestHi, SrcHiRI);

3970 _br(InstX8632Br::Br_ne, Label);	3969 _br(CondX86::Br_ne, Label);

3971 Context.insert(InstFakeUse::create(Func, DestLo));	3970 Context.insert(InstFakeUse::create(Func, DestLo));

3972 Context.insert(InstFakeUse::create(Func, DestHi));	3971 Context.insert(InstFakeUse::create(Func, DestHi));

3973 Operand *SrcFLo = loOperand(SrcF);	3972 Operand *SrcFLo = loOperand(SrcF);

3974 Operand *SrcFHi = hiOperand(SrcF);	3973 Operand *SrcFHi = hiOperand(SrcF);

3975 SrcLoRI = legalize(SrcFLo, Legal_Reg \| Legal_Imm, true);	3974 SrcLoRI = legalize(SrcFLo, Legal_Reg \| Legal_Imm, true);

3976 SrcHiRI = legalize(SrcFHi, Legal_Reg \| Legal_Imm, true);	3975 SrcHiRI = legalize(SrcFHi, Legal_Reg \| Legal_Imm, true);

3977 _mov(DestLo, SrcLoRI);	3976 _mov(DestLo, SrcLoRI);

3978 _mov(DestHi, SrcHiRI);	3977 _mov(DestHi, SrcHiRI);

3979 } else {	3978 } else {

3980 _cmp(ConditionRM, Zero);	3979 _cmp(ConditionRM, Zero);

3981 SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm, true);	3980 SrcT = legalize(SrcT, Legal_Reg \| Legal_Imm, true);

3982 _mov(Dest, SrcT);	3981 _mov(Dest, SrcT);

3983 _br(InstX8632Br::Br_ne, Label);	3982 _br(CondX86::Br_ne, Label);

3984 Context.insert(InstFakeUse::create(Func, Dest));	3983 Context.insert(InstFakeUse::create(Func, Dest));

3985 SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm, true);	3984 SrcF = legalize(SrcF, Legal_Reg \| Legal_Imm, true);

3986 _mov(Dest, SrcF);	3985 _mov(Dest, SrcF);

3987 }	3986 }

3988	3987

3989 Context.insert(Label);	3988 Context.insert(Label);

3990 }	3989 }

3991	3990

3992 void TargetX8632::lowerStore(const InstStore *Inst) {	3991 void TargetX8632::lowerStore(const InstStore *Inst) {

3993 Operand *Value = Inst->getData();	3992 Operand *Value = Inst->getData();

(...skipping 47 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4041 // OK, we'll be slightly less naive by forcing Src into a physical	4040 // OK, we'll be slightly less naive by forcing Src into a physical

4042 // register if there are 2 or more uses.	4041 // register if there are 2 or more uses.

4043 if (NumCases >= 2)	4042 if (NumCases >= 2)

4044 Src0 = legalizeToVar(Src0, true);	4043 Src0 = legalizeToVar(Src0, true);

4045 else	4044 else

4046 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem, true);	4045 Src0 = legalize(Src0, Legal_Reg \| Legal_Mem, true);

4047 for (SizeT I = 0; I < NumCases; ++I) {	4046 for (SizeT I = 0; I < NumCases; ++I) {

4048 // TODO(stichnot): Correct lowering for IceType_i64.	4047 // TODO(stichnot): Correct lowering for IceType_i64.

4049 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));	4048 Constant *Value = Ctx->getConstantInt32(IceType_i32, Inst->getValue(I));

4050 _cmp(Src0, Value);	4049 _cmp(Src0, Value);

4051 _br(InstX8632Br::Br_e, Inst->getLabel(I));	4050 _br(CondX86::Br_e, Inst->getLabel(I));

4052 }	4051 }

4053	4052

4054 _br(Inst->getLabelDefault());	4053 _br(Inst->getLabelDefault());

4055 }	4054 }

4056	4055

4057 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,	4056 void TargetX8632::scalarizeArithmetic(InstArithmetic::OpKind Kind,

4058 Variable Dest, Operand Src0,	4057 Variable Dest, Operand Src0,

4059 Operand *Src1) {	4058 Operand *Src1) {

4060 assert(isVectorType(Dest->getType()));	4059 assert(isVectorType(Dest->getType()));

4061 Type Ty = Dest->getType();	4060 Type Ty = Dest->getType();

(...skipping 487 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4549 Str << "\t.align\t" << Align << "\n";	4548 Str << "\t.align\t" << Align << "\n";

4550 Str << MangledName << ":\n";	4549 Str << MangledName << ":\n";

4551 for (SizeT i = 0; i < Size; ++i) {	4550 for (SizeT i = 0; i < Size; ++i) {

4552 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";	4551 Str << "\t.byte\t" << (((unsigned)Data[i]) & 0xff) << "\n";

4553 }	4552 }

4554 Str << "\t.size\t" << MangledName << ", " << Size << "\n";	4553 Str << "\t.size\t" << MangledName << ", " << Size << "\n";

4555 }	4554 }

4556 }	4555 }

4557	4556

4558 } // end of namespace Ice	4557 } // end of namespace Ice

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX8632.h ('k') | no next file » | no next file with comments »