src/IceTargetLoweringX86BaseImpl.h - Issue 1216963007: Doxygenize the documentation comments

Side by Side Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1216963007: Doxygenize the documentation comments (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Rebase to master Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//	1 //===- subzero/src/IceTargetLoweringX86BaseImpl.h - x86 lowering -- C++ --==//

2 //	2 //

3 // The Subzero Code Generator	3 // The Subzero Code Generator

4 //	4 //

5 // This file is distributed under the University of Illinois Open Source	5 // This file is distributed under the University of Illinois Open Source

6 // License. See LICENSE.TXT for details.	6 // License. See LICENSE.TXT for details.

7 //	7 //

8 //===----------------------------------------------------------------------===//	8 //===----------------------------------------------------------------------===//

9 //	9 ///

10 // This file implements the TargetLoweringX86Base class, which	10 /// \file

11 // consists almost entirely of the lowering sequence for each	11 /// This file implements the TargetLoweringX86Base class, which

12 // high-level instruction.	12 /// consists almost entirely of the lowering sequence for each

13 //	13 /// high-level instruction.

	14 ///

14 //===----------------------------------------------------------------------===//	15 //===----------------------------------------------------------------------===//

15	16

16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	17 #ifndef SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

17 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	18 #define SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

18	19

19 #include "IceCfg.h"	20 #include "IceCfg.h"

20 #include "IceCfgNode.h"	21 #include "IceCfgNode.h"

21 #include "IceClFlags.h"	22 #include "IceClFlags.h"

22 #include "IceDefs.h"	23 #include "IceDefs.h"

23 #include "IceELFObjectWriter.h"	24 #include "IceELFObjectWriter.h"

24 #include "IceGlobalInits.h"	25 #include "IceGlobalInits.h"

25 #include "IceInstX8632.h"	26 #include "IceInstX8632.h"

26 #include "IceLiveness.h"	27 #include "IceLiveness.h"

27 #include "IceOperand.h"	28 #include "IceOperand.h"

28 #include "IceRegistersX8632.h"	29 #include "IceRegistersX8632.h"

29 #include "IceTargetLoweringX8632.def"	30 #include "IceTargetLoweringX8632.def"

30 #include "IceTargetLoweringX8632.h"	31 #include "IceTargetLoweringX8632.h"

31 #include "IceUtils.h"	32 #include "IceUtils.h"

32 #include "llvm/Support/MathExtras.h"	33 #include "llvm/Support/MathExtras.h"

33	34

34 namespace Ice {	35 namespace Ice {

35 namespace X86Internal {	36 namespace X86Internal {

36	37

37 // A helper class to ease the settings of RandomizationPoolingPause	38 /// A helper class to ease the settings of RandomizationPoolingPause

38 // to disable constant blinding or pooling for some translation phases.	39 /// to disable constant blinding or pooling for some translation phases.

39 class BoolFlagSaver {	40 class BoolFlagSaver {

40 BoolFlagSaver() = delete;	41 BoolFlagSaver() = delete;

41 BoolFlagSaver(const BoolFlagSaver &) = delete;	42 BoolFlagSaver(const BoolFlagSaver &) = delete;

42 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;	43 BoolFlagSaver &operator=(const BoolFlagSaver &) = delete;

43	44

44 public:	45 public:

45 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }	46 BoolFlagSaver(bool &F, bool NewValue) : OldValue(F), Flag(F) { F = NewValue; }

46 ~BoolFlagSaver() { Flag = OldValue; }	47 ~BoolFlagSaver() { Flag = OldValue; }

47	48

48 private:	49 private:

49 const bool OldValue;	50 const bool OldValue;

50 bool &Flag;	51 bool &Flag;

51 };	52 };

52	53

53 template <class MachineTraits> class BoolFoldingEntry {	54 template <class MachineTraits> class BoolFoldingEntry {

54 BoolFoldingEntry(const BoolFoldingEntry &) = delete;	55 BoolFoldingEntry(const BoolFoldingEntry &) = delete;

55	56

56 public:	57 public:

57 BoolFoldingEntry() = default;	58 BoolFoldingEntry() = default;

58 explicit BoolFoldingEntry(Inst *I);	59 explicit BoolFoldingEntry(Inst *I);

59 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;	60 BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default;

60 // Instr is the instruction producing the i1-type variable of interest.	61 /// Instr is the instruction producing the i1-type variable of interest.

61 Inst *Instr = nullptr;	62 Inst *Instr = nullptr;

62 // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).	63 /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).

63 bool IsComplex = false;	64 bool IsComplex = false;

64 // IsLiveOut is initialized conservatively to true, and is set to false when	65 /// IsLiveOut is initialized conservatively to true, and is set to false when

65 // we encounter an instruction that ends Var's live range. We disable the	66 /// we encounter an instruction that ends Var's live range. We disable the

66 // folding optimization when Var is live beyond this basic block. Note that	67 /// folding optimization when Var is live beyond this basic block. Note that

67 // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will	68 /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will

68 // always be true and the folding optimization will never be performed.	69 /// always be true and the folding optimization will never be performed.

69 bool IsLiveOut = true;	70 bool IsLiveOut = true;

70 // NumUses counts the number of times Var is used as a source operand in the	71 // NumUses counts the number of times Var is used as a source operand in the

71 // basic block. If IsComplex is true and there is more than one use of Var,	72 // basic block. If IsComplex is true and there is more than one use of Var,

72 // then the folding optimization is disabled for Var.	73 // then the folding optimization is disabled for Var.

73 uint32_t NumUses = 0;	74 uint32_t NumUses = 0;

74 };	75 };

75	76

76 template <class MachineTraits> class BoolFolding {	77 template <class MachineTraits> class BoolFolding {

77 public:	78 public:

78 enum BoolFoldingProducerKind {	79 enum BoolFoldingProducerKind {

79 PK_None,	80 PK_None,

80 PK_Icmp32,	81 PK_Icmp32,

81 PK_Icmp64,	82 PK_Icmp64,

82 PK_Fcmp,	83 PK_Fcmp,

83 PK_Trunc	84 PK_Trunc

84 };	85 };

85	86

86 // Currently the actual enum values are not used (other than CK_None), but we	87 /// Currently the actual enum values are not used (other than CK_None), but we

87 // go	88 /// go

88 // ahead and produce them anyway for symmetry with the	89 /// ahead and produce them anyway for symmetry with the

89 // BoolFoldingProducerKind.	90 /// BoolFoldingProducerKind.

90 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };	91 enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext };

91	92

92 private:	93 private:

93 BoolFolding(const BoolFolding &) = delete;	94 BoolFolding(const BoolFolding &) = delete;

94 BoolFolding &operator=(const BoolFolding &) = delete;	95 BoolFolding &operator=(const BoolFolding &) = delete;

95	96

96 public:	97 public:

97 BoolFolding() = default;	98 BoolFolding() = default;

98 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);	99 static BoolFoldingProducerKind getProducerKind(const Inst *Instr);

99 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);	100 static BoolFoldingConsumerKind getConsumerKind(const Inst *Instr);

100 static bool hasComplexLowering(const Inst *Instr);	101 static bool hasComplexLowering(const Inst *Instr);

101 void init(CfgNode *Node);	102 void init(CfgNode *Node);

102 const Inst getProducerFor(const Operand Opnd) const;	103 const Inst getProducerFor(const Operand Opnd) const;

103 void dump(const Cfg *Func) const;	104 void dump(const Cfg *Func) const;

104	105

105 private:	106 private:

106 // Returns true if Producers contains a valid entry for the given VarNum.	107 /// Returns true if Producers contains a valid entry for the given VarNum.

107 bool containsValid(SizeT VarNum) const {	108 bool containsValid(SizeT VarNum) const {

108 auto Element = Producers.find(VarNum);	109 auto Element = Producers.find(VarNum);

109 return Element != Producers.end() && Element->second.Instr != nullptr;	110 return Element != Producers.end() && Element->second.Instr != nullptr;

110 }	111 }

111 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }	112 void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; }

112 // Producers maps Variable::Number to a BoolFoldingEntry.	113 /// Producers maps Variable::Number to a BoolFoldingEntry.

113 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;	114 std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers;

114 };	115 };

115	116

116 template <class MachineTraits>	117 template <class MachineTraits>

117 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)	118 BoolFoldingEntry<MachineTraits>::BoolFoldingEntry(Inst *I)

118 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}	119 : Instr(I), IsComplex(BoolFolding<MachineTraits>::hasComplexLowering(I)) {}

119	120

120 template <class MachineTraits>	121 template <class MachineTraits>

121 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind	122 typename BoolFolding<MachineTraits>::BoolFoldingProducerKind

122 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {	123 BoolFolding<MachineTraits>::getProducerKind(const Inst *Instr) {

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
155 return CK_None;	156 return CK_None;

156 case InstCast::Sext:	157 case InstCast::Sext:

157 return CK_Sext;	158 return CK_Sext;

158 case InstCast::Zext:	159 case InstCast::Zext:

159 return CK_Zext;	160 return CK_Zext;

160 }	161 }

161 }	162 }

162 return CK_None;	163 return CK_None;

163 }	164 }

164	165

165 // Returns true if the producing instruction has a "complex" lowering	166 /// Returns true if the producing instruction has a "complex" lowering

166 // sequence. This generally means that its lowering sequence requires	167 /// sequence. This generally means that its lowering sequence requires

167 // more than one conditional branch, namely 64-bit integer compares	168 /// more than one conditional branch, namely 64-bit integer compares

168 // and some floating-point compares. When this is true, and there is	169 /// and some floating-point compares. When this is true, and there is

169 // more than one consumer, we prefer to disable the folding	170 /// more than one consumer, we prefer to disable the folding

170 // optimization because it minimizes branches.	171 /// optimization because it minimizes branches.

171 template <class MachineTraits>	172 template <class MachineTraits>

172 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {	173 bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {

173 switch (getProducerKind(Instr)) {	174 switch (getProducerKind(Instr)) {

174 default:	175 default:

175 return false;	176 return false;

176 case PK_Icmp64:	177 case PK_Icmp64:

177 return true;	178 return true;

178 case PK_Fcmp:	179 case PK_Fcmp:

179 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]	180 return MachineTraits::TableFcmp[llvm::cast<InstFcmp>(Instr)->getCondition()]

180 .C2 != MachineTraits::Cond::Br_None;	181 .C2 != MachineTraits::Cond::Br_None;

(...skipping 422 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
603 }	604 }

604	605

605 // Converts a ConstantInteger32 operand into its constant value, or	606 // Converts a ConstantInteger32 operand into its constant value, or

606 // MemoryOrderInvalid if the operand is not a ConstantInteger32.	607 // MemoryOrderInvalid if the operand is not a ConstantInteger32.

607 uint64_t getConstantMemoryOrder(Operand *Opnd) {	608 uint64_t getConstantMemoryOrder(Operand *Opnd) {

608 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))	609 if (auto Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))

609 return Integer->getValue();	610 return Integer->getValue();

610 return Intrinsics::MemoryOrderInvalid;	611 return Intrinsics::MemoryOrderInvalid;

611 }	612 }

612	613

613 // Determines whether the dest of a Load instruction can be folded	614 /// Determines whether the dest of a Load instruction can be folded

614 // into one of the src operands of a 2-operand instruction. This is	615 /// into one of the src operands of a 2-operand instruction. This is

615 // true as long as the load dest matches exactly one of the binary	616 /// true as long as the load dest matches exactly one of the binary

616 // instruction's src operands. Replaces Src0 or Src1 with LoadSrc if	617 /// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if

617 // the answer is true.	618 /// the answer is true.

618 bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,	619 bool canFoldLoadIntoBinaryInst(Operand LoadSrc, Variable LoadDest,

619 Operand &Src0, Operand &Src1) {	620 Operand &Src0, Operand &Src1) {

620 if (Src0 == LoadDest && Src1 != LoadDest) {	621 if (Src0 == LoadDest && Src1 != LoadDest) {

621 Src0 = LoadSrc;	622 Src0 = LoadSrc;

622 return true;	623 return true;

623 }	624 }

624 if (Src0 != LoadDest && Src1 == LoadDest) {	625 if (Src0 != LoadDest && Src1 == LoadDest) {

625 Src1 = LoadSrc;	626 Src1 = LoadSrc;

626 return true;	627 return true;

627 }	628 }

(...skipping 217 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
845 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));	846 RegisterArg->setName(Func, "home_reg:" + Arg->getName(Func));

846 RegisterArg->setRegNum(RegNum);	847 RegisterArg->setRegNum(RegNum);

847 RegisterArg->setIsArg();	848 RegisterArg->setIsArg();

848 Arg->setIsArg(false);	849 Arg->setIsArg(false);

849	850

850 Args[I] = RegisterArg;	851 Args[I] = RegisterArg;

851 Context.insert(InstAssign::create(Func, Arg, RegisterArg));	852 Context.insert(InstAssign::create(Func, Arg, RegisterArg));

852 }	853 }

853 }	854 }

854	855

855 // Helper function for addProlog().	856 /// Helper function for addProlog().

856 //	857 ///

857 // This assumes Arg is an argument passed on the stack. This sets the	858 /// This assumes Arg is an argument passed on the stack. This sets the

858 // frame offset for Arg and updates InArgsSizeBytes according to Arg's	859 /// frame offset for Arg and updates InArgsSizeBytes according to Arg's

859 // width. For an I64 arg that has been split into Lo and Hi components,	860 /// width. For an I64 arg that has been split into Lo and Hi components,

860 // it calls itself recursively on the components, taking care to handle	861 /// it calls itself recursively on the components, taking care to handle

861 // Lo first because of the little-endian architecture. Lastly, this	862 /// Lo first because of the little-endian architecture. Lastly, this

862 // function generates an instruction to copy Arg into its assigned	863 /// function generates an instruction to copy Arg into its assigned

863 // register if applicable.	864 /// register if applicable.

864 template <class Machine>	865 template <class Machine>

865 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,	866 void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

866 Variable *FramePtr,	867 Variable *FramePtr,

867 size_t BasicFrameOffset,	868 size_t BasicFrameOffset,

868 size_t &InArgsSizeBytes) {	869 size_t &InArgsSizeBytes) {

869 Variable *Lo = Arg->getLo();	870 Variable *Lo = Arg->getLo();

870 Variable *Hi = Arg->getHi();	871 Variable *Hi = Arg->getHi();

871 Type Ty = Arg->getType();	872 Type Ty = Arg->getType();

872 if (Lo && Hi && Ty == IceType_i64) {	873 if (Lo && Hi && Ty == IceType_i64) {

873 assert(Lo->getType() != IceType_i64); // don't want infinite recursion	874 assert(Lo->getType() != IceType_i64); // don't want infinite recursion

(...skipping 466 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1340 // multiple of the required alignment at runtime.	1341 // multiple of the required alignment at runtime.

1341 Variable *T = makeReg(IceType_i32);	1342 Variable *T = makeReg(IceType_i32);

1342 _mov(T, TotalSize);	1343 _mov(T, TotalSize);

1343 _add(T, Ctx->getConstantInt32(Alignment - 1));	1344 _add(T, Ctx->getConstantInt32(Alignment - 1));

1344 _and(T, Ctx->getConstantInt32(-Alignment));	1345 _and(T, Ctx->getConstantInt32(-Alignment));

1345 _sub(esp, T);	1346 _sub(esp, T);

1346 }	1347 }

1347 _mov(Dest, esp);	1348 _mov(Dest, esp);

1348 }	1349 }

1349	1350

1350 // Strength-reduce scalar integer multiplication by a constant (for	1351 /// Strength-reduce scalar integer multiplication by a constant (for

1351 // i32 or narrower) for certain constants. The lea instruction can be	1352 /// i32 or narrower) for certain constants. The lea instruction can be

1352 // used to multiply by 3, 5, or 9, and the lsh instruction can be used	1353 /// used to multiply by 3, 5, or 9, and the lsh instruction can be used

1353 // to multiply by powers of 2. These can be combined such that	1354 /// to multiply by powers of 2. These can be combined such that

1354 // e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,	1355 /// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,

1355 // combined with left-shifting by 2.	1356 /// combined with left-shifting by 2.

1356 template <class Machine>	1357 template <class Machine>

1357 bool TargetX86Base<Machine>::optimizeScalarMul(Variable Dest, Operand Src0,	1358 bool TargetX86Base<Machine>::optimizeScalarMul(Variable Dest, Operand Src0,

1358 int32_t Src1) {	1359 int32_t Src1) {

1359 // Disable this optimization for Om1 and O0, just to keep things	1360 // Disable this optimization for Om1 and O0, just to keep things

1360 // simple there.	1361 // simple there.

1361 if (Ctx->getFlags().getOptLevel() < Opt_1)	1362 if (Ctx->getFlags().getOptLevel() < Opt_1)

1362 return false;	1363 return false;

1363 Type Ty = Dest->getType();	1364 Type Ty = Dest->getType();

1364 Variable *T = nullptr;	1365 Variable *T = nullptr;

1365 if (Src1 == -1) {	1366 if (Src1 == -1) {

(...skipping 1018 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
2384 if (DestTy == IceType_v16i8) {	2385 if (DestTy == IceType_v16i8) {

2385 // onemask = materialize(1,1,...); dst = (src & onemask) > 0	2386 // onemask = materialize(1,1,...); dst = (src & onemask) > 0

2386 Variable *OneMask = makeVectorOfOnes(Dest->getType());	2387 Variable *OneMask = makeVectorOfOnes(Dest->getType());

2387 Variable *T = makeReg(DestTy);	2388 Variable *T = makeReg(DestTy);

2388 _movp(T, Src0RM);	2389 _movp(T, Src0RM);

2389 _pand(T, OneMask);	2390 _pand(T, OneMask);

2390 Variable *Zeros = makeVectorOfZeros(Dest->getType());	2391 Variable *Zeros = makeVectorOfZeros(Dest->getType());

2391 _pcmpgt(T, Zeros);	2392 _pcmpgt(T, Zeros);

2392 _movp(Dest, T);	2393 _movp(Dest, T);

2393 } else {	2394 } else {

2394 // width = width(elty) - 1; dest = (src << width) >> width	2395 /// width = width(elty) - 1; dest = (src << width) >> width

2395 SizeT ShiftAmount =	2396 SizeT ShiftAmount =

2396 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -	2397 Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) -

2397 1;	2398 1;

2398 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);	2399 Constant *ShiftConstant = Ctx->getConstantInt8(ShiftAmount);

2399 Variable *T = makeReg(DestTy);	2400 Variable *T = makeReg(DestTy);

2400 _movp(T, Src0RM);	2401 _movp(T, Src0RM);

2401 _psll(T, ShiftConstant);	2402 _psll(T, ShiftConstant);

2402 _psra(T, ShiftConstant);	2403 _psra(T, ShiftConstant);

2403 _movp(Dest, T);	2404 _movp(Dest, T);

2404 }	2405 }

(...skipping 1527 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
3932 // the end of the loop, since it will be re-used by the loop.	3933 // the end of the loop, since it will be re-used by the loop.

3933 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {	3934 if (Variable *ValVar = llvm::dyn_cast<Variable>(Val)) {

3934 Context.insert(InstFakeUse::create(Func, ValVar));	3935 Context.insert(InstFakeUse::create(Func, ValVar));

3935 }	3936 }

3936 // The address base (if any) is also reused in the loop.	3937 // The address base (if any) is also reused in the loop.

3937 if (Variable *Base = Addr->getBase())	3938 if (Variable *Base = Addr->getBase())

3938 Context.insert(InstFakeUse::create(Func, Base));	3939 Context.insert(InstFakeUse::create(Func, Base));

3939 _mov(Dest, T_eax);	3940 _mov(Dest, T_eax);

3940 }	3941 }

3941	3942

3942 // Lowers count {trailing, leading} zeros intrinsic.	3943 /// Lowers count {trailing, leading} zeros intrinsic.

3943 //	3944 ///

3944 // We could do constant folding here, but that should have	3945 /// We could do constant folding here, but that should have

3945 // been done by the front-end/middle-end optimizations.	3946 /// been done by the front-end/middle-end optimizations.

3946 template <class Machine>	3947 template <class Machine>

3947 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,	3948 void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest,

3948 Operand *FirstVal,	3949 Operand *FirstVal,

3949 Operand *SecondVal) {	3950 Operand *SecondVal) {

3950 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).	3951 // TODO(jvoung): Determine if the user CPU supports LZCNT (BMI).

3951 // Then the instructions will handle the Val == 0 case much more simply	3952 // Then the instructions will handle the Val == 0 case much more simply

3952 // and won't require conversion from bit position to number of zeros.	3953 // and won't require conversion from bit position to number of zeros.

3953 //	3954 //

3954 // Otherwise:	3955 // Otherwise:

3955 // bsr IF_NOT_ZERO, Val	3956 // bsr IF_NOT_ZERO, Val

(...skipping 645 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4601	4602

4602 // Insert the result into position.	4603 // Insert the result into position.

4603 Variable *DestT = Func->template makeVariable(Ty);	4604 Variable *DestT = Func->template makeVariable(Ty);

4604 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));	4605 lowerInsertElement(InstInsertElement::create(Func, DestT, T, Res, Index));

4605 T = DestT;	4606 T = DestT;

4606 }	4607 }

4607	4608

4608 lowerAssign(InstAssign::create(Func, Dest, T));	4609 lowerAssign(InstAssign::create(Func, Dest, T));

4609 }	4610 }

4610	4611

4611 // The following pattern occurs often in lowered C and C++ code:	4612 /// The following pattern occurs often in lowered C and C++ code:

4612 //	4613 ///

4613 // %cmp = fcmp/icmp pred <n x ty> %src0, %src1	4614 /// %cmp = fcmp/icmp pred <n x ty> %src0, %src1

4614 // %cmp.ext = sext <n x i1> %cmp to <n x ty>	4615 /// %cmp.ext = sext <n x i1> %cmp to <n x ty>

4615 //	4616 ///

4616 // We can eliminate the sext operation by copying the result of pcmpeqd,	4617 /// We can eliminate the sext operation by copying the result of pcmpeqd,

4617 // pcmpgtd, or cmpps (which produce sign extended results) to the result	4618 /// pcmpgtd, or cmpps (which produce sign extended results) to the result

4618 // of the sext operation.	4619 /// of the sext operation.

4619 template <class Machine>	4620 template <class Machine>

4620 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(	4621 void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(

4621 Variable *SignExtendedResult) {	4622 Variable *SignExtendedResult) {

4622 if (InstCast *NextCast =	4623 if (InstCast *NextCast =

4623 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {	4624 llvm::dyn_cast_or_null<InstCast>(Context.getNextInst())) {

4624 if (NextCast->getCastKind() == InstCast::Sext &&	4625 if (NextCast->getCastKind() == InstCast::Sext &&

4625 NextCast->getSrc(0) == SignExtendedResult) {	4626 NextCast->getSrc(0) == SignExtendedResult) {

4626 NextCast->setDeleted();	4627 NextCast->setDeleted();

4627 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));	4628 _movp(NextCast->getDest(), legalizeToVar(SignExtendedResult));

4628 // Skip over the instruction.	4629 // Skip over the instruction.

(...skipping 85 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4714	4715

4715 template <class Machine>	4716 template <class Machine>

4716 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {	4717 void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {

4717 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {	4718 if (const auto *RMW = llvm::dyn_cast<InstX8632FakeRMW>(Instr)) {

4718 lowerRMW(RMW);	4719 lowerRMW(RMW);

4719 } else {	4720 } else {

4720 TargetLowering::lowerOther(Instr);	4721 TargetLowering::lowerOther(Instr);

4721 }	4722 }

4722 }	4723 }

4723	4724

4724 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to	4725 /// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to

4725 // preserve integrity of liveness analysis. Undef values are also	4726 /// preserve integrity of liveness analysis. Undef values are also

4726 // turned into zeroes, since loOperand() and hiOperand() don't expect	4727 /// turned into zeroes, since loOperand() and hiOperand() don't expect

4727 // Undef input.	4728 /// Undef input.

4728 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {	4729 template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {

4729 // Pause constant blinding or pooling, blinding or pooling will be done later	4730 // Pause constant blinding or pooling, blinding or pooling will be done later

4730 // during phi lowering assignments	4731 // during phi lowering assignments

4731 BoolFlagSaver B(RandomizationPoolingPaused, true);	4732 BoolFlagSaver B(RandomizationPoolingPaused, true);

4732	4733

4733 CfgNode *Node = Context.getNode();	4734 CfgNode *Node = Context.getNode();

4734 for (Inst &I : Node->getPhis()) {	4735 for (Inst &I : Node->getPhis()) {

4735 auto Phi = llvm::dyn_cast<InstPhi>(&I);	4736 auto Phi = llvm::dyn_cast<InstPhi>(&I);

4736 if (Phi->isDeleted())	4737 if (Phi->isDeleted())

4737 continue;	4738 continue;

(...skipping 25 matching lines...) Expand all Loading...
4763 // because they do in fact need a register to materialize the vector	4764 // because they do in fact need a register to materialize the vector

4764 // of zeroes into.	4765 // of zeroes into.

4765 if (llvm::isa<ConstantUndef>(Opnd))	4766 if (llvm::isa<ConstantUndef>(Opnd))

4766 return isScalarFloatingType(Opnd->getType()) \|\|	4767 return isScalarFloatingType(Opnd->getType()) \|\|

4767 isVectorType(Opnd->getType());	4768 isVectorType(Opnd->getType());

4768 if (llvm::isa<Constant>(Opnd))	4769 if (llvm::isa<Constant>(Opnd))

4769 return isScalarFloatingType(Opnd->getType());	4770 return isScalarFloatingType(Opnd->getType());

4770 return true;	4771 return true;

4771 }	4772 }

4772	4773

4773 // Lower the pre-ordered list of assignments into mov instructions.	4774 /// Lower the pre-ordered list of assignments into mov instructions.

4774 // Also has to do some ad-hoc register allocation as necessary.	4775 /// Also has to do some ad-hoc register allocation as necessary.

4775 template <class Machine>	4776 template <class Machine>

4776 void TargetX86Base<Machine>::lowerPhiAssignments(	4777 void TargetX86Base<Machine>::lowerPhiAssignments(

4777 CfgNode *Node, const AssignList &Assignments) {	4778 CfgNode *Node, const AssignList &Assignments) {

4778 // Check that this is a properly initialized shell of a node.	4779 // Check that this is a properly initialized shell of a node.

4779 assert(Node->getOutEdges().size() == 1);	4780 assert(Node->getOutEdges().size() == 1);

4780 assert(Node->getInsts().empty());	4781 assert(Node->getInsts().empty());

4781 assert(Node->getPhis().empty());	4782 assert(Node->getPhis().empty());

4782 CfgNode *Succ = Node->getOutEdges().front();	4783 CfgNode *Succ = Node->getOutEdges().front();

4783 getContext().init(Node);	4784 getContext().init(Node);

4784 // Register set setup similar to regAlloc().	4785 // Register set setup similar to regAlloc().

(...skipping 183 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
4968 // SSE has no left shift operation for vectors of 8 bit integers.	4969 // SSE has no left shift operation for vectors of 8 bit integers.

4969 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;	4970 const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080;

4970 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);	4971 Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK);

4971 Variable *Reg = makeReg(Ty, RegNum);	4972 Variable *Reg = makeReg(Ty, RegNum);

4972 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));	4973 _movd(Reg, legalize(ConstantMask, Legal_Reg \| Legal_Mem));

4973 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));	4974 _pshufd(Reg, Reg, Ctx->getConstantZero(IceType_i8));

4974 return Reg;	4975 return Reg;

4975 }	4976 }

4976 }	4977 }

4977	4978

4978 // Construct a mask in a register that can be and'ed with a	4979 /// Construct a mask in a register that can be and'ed with a

4979 // floating-point value to mask off its sign bit. The value will be	4980 /// floating-point value to mask off its sign bit. The value will be

4980 // <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>	4981 /// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>

4981 // for f64. Construct it as vector of ones logically right shifted	4982 /// for f64. Construct it as vector of ones logically right shifted

4982 // one bit. TODO(stichnot): Fix the wala TODO above, to represent	4983 /// one bit. TODO(stichnot): Fix the wala TODO above, to represent

4983 // vector constants in memory.	4984 /// vector constants in memory.

4984 template <class Machine>	4985 template <class Machine>

4985 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,	4986 Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,

4986 int32_t RegNum) {	4987 int32_t RegNum) {

4987 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);	4988 Variable *Reg = makeVectorOfMinusOnes(Ty, RegNum);

4988 _psrl(Reg, Ctx->getConstantInt8(1));	4989 _psrl(Reg, Ctx->getConstantInt8(1));

4989 return Reg;	4990 return Reg;

4990 }	4991 }

4991	4992

4992 template <class Machine>	4993 template <class Machine>

4993 OperandX8632Mem *	4994 OperandX8632Mem *

4994 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,	4995 TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,

4995 uint32_t Offset) {	4996 uint32_t Offset) {

4996 // Ensure that Loc is a stack slot.	4997 // Ensure that Loc is a stack slot.

4997 assert(Slot->getWeight().isZero());	4998 assert(Slot->getWeight().isZero());

4998 assert(Slot->getRegNum() == Variable::NoRegister);	4999 assert(Slot->getRegNum() == Variable::NoRegister);

4999 // Compute the location of Loc in memory.	5000 // Compute the location of Loc in memory.

5000 // TODO(wala,stichnot): lea should not be required. The address of	5001 // TODO(wala,stichnot): lea should not be required. The address of

5001 // the stack slot is known at compile time (although not until after	5002 // the stack slot is known at compile time (although not until after

5002 // addProlog()).	5003 // addProlog()).

5003 const Type PointerType = IceType_i32;	5004 const Type PointerType = IceType_i32;

5004 Variable *Loc = makeReg(PointerType);	5005 Variable *Loc = makeReg(PointerType);

5005 _lea(Loc, Slot);	5006 _lea(Loc, Slot);

5006 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);	5007 Constant *ConstantOffset = Ctx->getConstantInt32(Offset);

5007 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);	5008 return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);

5008 }	5009 }

5009	5010

5010 // Helper for legalize() to emit the right code to lower an operand to a	5011 /// Helper for legalize() to emit the right code to lower an operand to a

5011 // register of the appropriate type.	5012 /// register of the appropriate type.

5012 template <class Machine>	5013 template <class Machine>

5013 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {	5014 Variable TargetX86Base<Machine>::copyToReg(Operand Src, int32_t RegNum) {

5014 Type Ty = Src->getType();	5015 Type Ty = Src->getType();

5015 Variable *Reg = makeReg(Ty, RegNum);	5016 Variable *Reg = makeReg(Ty, RegNum);

5016 if (isVectorType(Ty)) {	5017 if (isVectorType(Ty)) {

5017 _movp(Reg, Src);	5018 _movp(Reg, Src);

5018 } else {	5019 } else {

5019 _mov(Reg, Src);	5020 _mov(Reg, Src);

5020 }	5021 }

5021 return Reg;	5022 return Reg;

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5128 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|	5129 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) \|\|

5129 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {	5130 (RegNum != Variable::NoRegister && RegNum != Var->getRegNum())) {

5130 From = copyToReg(From, RegNum);	5131 From = copyToReg(From, RegNum);

5131 }	5132 }

5132 return From;	5133 return From;

5133 }	5134 }

5134 llvm_unreachable("Unhandled operand kind in legalize()");	5135 llvm_unreachable("Unhandled operand kind in legalize()");

5135 return From;	5136 return From;

5136 }	5137 }

5137	5138

5138 // Provide a trivial wrapper to legalize() for this common usage.	5139 /// Provide a trivial wrapper to legalize() for this common usage.

5139 template <class Machine>	5140 template <class Machine>

5140 Variable TargetX86Base<Machine>::legalizeToVar(Operand From, int32_t RegNum) {	5141 Variable TargetX86Base<Machine>::legalizeToVar(Operand From, int32_t RegNum) {

5141 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));	5142 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));

5142 }	5143 }

5143	5144

5144 // For the cmp instruction, if Src1 is an immediate, or known to be a	5145 /// For the cmp instruction, if Src1 is an immediate, or known to be a

5145 // physical register, we can allow Src0 to be a memory operand.	5146 /// physical register, we can allow Src0 to be a memory operand.

5146 // Otherwise, Src0 must be copied into a physical register.	5147 /// Otherwise, Src0 must be copied into a physical register.

5147 // (Actually, either Src0 or Src1 can be chosen for the physical	5148 /// (Actually, either Src0 or Src1 can be chosen for the physical

5148 // register, but unfortunately we have to commit to one or the other	5149 /// register, but unfortunately we have to commit to one or the other

5149 // before register allocation.)	5150 /// before register allocation.)

5150 template <class Machine>	5151 template <class Machine>

5151 Operand TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand Src0,	5152 Operand TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand Src0,

5152 Operand *Src1) {	5153 Operand *Src1) {

5153 bool IsSrc1ImmOrReg = false;	5154 bool IsSrc1ImmOrReg = false;

5154 if (llvm::isa<Constant>(Src1)) {	5155 if (llvm::isa<Constant>(Src1)) {

5155 IsSrc1ImmOrReg = true;	5156 IsSrc1ImmOrReg = true;

5156 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {	5157 } else if (Variable *Var = llvm::dyn_cast<Variable>(Src1)) {

5157 if (Var->hasReg())	5158 if (Var->hasReg())

5158 IsSrc1ImmOrReg = true;	5159 IsSrc1ImmOrReg = true;

5159 }	5160 }

(...skipping 148 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5308 return;	5309 return;

5309 Ostream &Str = Ctx->getStrEmit();	5310 Ostream &Str = Ctx->getStrEmit();

5310 C->emitPoolLabel(Str);	5311 C->emitPoolLabel(Str);

5311 }	5312 }

5312	5313

5313 template <class Machine>	5314 template <class Machine>

5314 void TargetX86Base<Machine>::emit(const ConstantUndef *) const {	5315 void TargetX86Base<Machine>::emit(const ConstantUndef *) const {

5315 llvm::report_fatal_error("undef value encountered by emitter.");	5316 llvm::report_fatal_error("undef value encountered by emitter.");

5316 }	5317 }

5317	5318

5318 // Randomize or pool an Immediate.	5319 /// Randomize or pool an Immediate.

5319 template <class Machine>	5320 template <class Machine>

5320 Operand TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant Immediate,	5321 Operand TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant Immediate,

5321 int32_t RegNum) {	5322 int32_t RegNum) {

5322 assert(llvm::isa<ConstantInteger32>(Immediate) \|\|	5323 assert(llvm::isa<ConstantInteger32>(Immediate) \|\|

5323 llvm::isa<ConstantRelocatable>(Immediate));	5324 llvm::isa<ConstantRelocatable>(Immediate));

5324 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|	5325 if (Ctx->getFlags().getRandomizeAndPoolImmediatesOption() == RPI_None \|\|

5325 RandomizationPoolingPaused == true) {	5326 RandomizationPoolingPaused == true) {

5326 // Immediates randomization/pooling off or paused	5327 // Immediates randomization/pooling off or paused

5327 return Immediate;	5328 return Immediate;

5328 }	5329 }

(...skipping 178 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
5507 }	5508 }

5508 // the offset is not eligible for blinding or pooling, return the original	5509 // the offset is not eligible for blinding or pooling, return the original

5509 // mem operand	5510 // mem operand

5510 return MemOperand;	5511 return MemOperand;

5511 }	5512 }

5512	5513

5513 } // end of namespace X86Internal	5514 } // end of namespace X86Internal

5514 } // end of namespace Ice	5515 } // end of namespace Ice

5515	5516

5516 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H	5517 #endif // SUBZERO_SRC_ICETARGETLOWERINGX86BASEIMPL_H

OLD	NEW

« no previous file with comments | « src/IceTargetLoweringX86Base.h ('k') | src/IceThreading.h » ('j') | no next file with comments »