Chromium Code Reviews| Index: src/IceTargetLoweringX86BaseImpl.h |
| diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h |
| index a2bf75f6885b065de8b756ec2f9f470425c2201c..22e2dcb261899192db65b10331c0a8dbd0e71338 100644 |
| --- a/src/IceTargetLoweringX86BaseImpl.h |
| +++ b/src/IceTargetLoweringX86BaseImpl.h |
| @@ -34,8 +34,8 @@ |
| namespace Ice { |
| namespace X86Internal { |
| -// A helper class to ease the settings of RandomizationPoolingPause |
| -// to disable constant blinding or pooling for some translation phases. |
| +/// A helper class to ease the settings of RandomizationPoolingPause |
| +/// to disable constant blinding or pooling for some translation phases. |
| class BoolFlagSaver { |
| BoolFlagSaver() = delete; |
| BoolFlagSaver(const BoolFlagSaver &) = delete; |
| @@ -57,15 +57,15 @@ public: |
| BoolFoldingEntry() = default; |
| explicit BoolFoldingEntry(Inst *I); |
| BoolFoldingEntry &operator=(const BoolFoldingEntry &) = default; |
| - // Instr is the instruction producing the i1-type variable of interest. |
| + /// Instr is the instruction producing the i1-type variable of interest. |
| Inst *Instr = nullptr; |
| - // IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). |
| + /// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr). |
| bool IsComplex = false; |
| - // IsLiveOut is initialized conservatively to true, and is set to false when |
| - // we encounter an instruction that ends Var's live range. We disable the |
| - // folding optimization when Var is live beyond this basic block. Note that |
| - // if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will |
| - // always be true and the folding optimization will never be performed. |
| + /// IsLiveOut is initialized conservatively to true, and is set to false when |
| + /// we encounter an instruction that ends Var's live range. We disable the |
| + /// folding optimization when Var is live beyond this basic block. Note that |
| + /// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will |
| + /// always be true and the folding optimization will never be performed. |
| bool IsLiveOut = true; |
| // NumUses counts the number of times Var is used as a source operand in the |
| // basic block. If IsComplex is true and there is more than one use of Var, |
| @@ -83,10 +83,10 @@ public: |
| PK_Trunc |
| }; |
| - // Currently the actual enum values are not used (other than CK_None), but we |
| - // go |
| - // ahead and produce them anyway for symmetry with the |
| - // BoolFoldingProducerKind. |
| + /// Currently the actual enum values are not used (other than CK_None), but we |
| + /// go |
| + /// ahead and produce them anyway for symmetry with the |
| + /// BoolFoldingProducerKind. |
| enum BoolFoldingConsumerKind { CK_None, CK_Br, CK_Select, CK_Sext, CK_Zext }; |
| private: |
| @@ -103,13 +103,13 @@ public: |
| void dump(const Cfg *Func) const; |
| private: |
| - // Returns true if Producers contains a valid entry for the given VarNum. |
| + /// Returns true if Producers contains a valid entry for the given VarNum. |
| bool containsValid(SizeT VarNum) const { |
| auto Element = Producers.find(VarNum); |
| return Element != Producers.end() && Element->second.Instr != nullptr; |
| } |
| void setInvalid(SizeT VarNum) { Producers[VarNum].Instr = nullptr; } |
| - // Producers maps Variable::Number to a BoolFoldingEntry. |
| + /// Producers maps Variable::Number to a BoolFoldingEntry. |
| std::unordered_map<SizeT, BoolFoldingEntry<MachineTraits>> Producers; |
| }; |
| @@ -162,12 +162,12 @@ BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) { |
| return CK_None; |
| } |
| -// Returns true if the producing instruction has a "complex" lowering |
| -// sequence. This generally means that its lowering sequence requires |
| -// more than one conditional branch, namely 64-bit integer compares |
| -// and some floating-point compares. When this is true, and there is |
| -// more than one consumer, we prefer to disable the folding |
| -// optimization because it minimizes branches. |
| +/// Returns true if the producing instruction has a "complex" lowering |
| +/// sequence. This generally means that its lowering sequence requires |
| +/// more than one conditional branch, namely 64-bit integer compares |
| +/// and some floating-point compares. When this is true, and there is |
| +/// more than one consumer, we prefer to disable the folding |
| +/// optimization because it minimizes branches. |
| template <class MachineTraits> |
| bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) { |
| switch (getProducerKind(Instr)) { |
| @@ -610,11 +610,11 @@ uint64_t getConstantMemoryOrder(Operand *Opnd) { |
| return Intrinsics::MemoryOrderInvalid; |
| } |
| -// Determines whether the dest of a Load instruction can be folded |
| -// into one of the src operands of a 2-operand instruction. This is |
| -// true as long as the load dest matches exactly one of the binary |
| -// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if |
| -// the answer is true. |
| +/// Determines whether the dest of a Load instruction can be folded |
| +/// into one of the src operands of a 2-operand instruction. This is |
| +/// true as long as the load dest matches exactly one of the binary |
| +/// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if |
| +/// the answer is true. |
| bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest, |
| Operand *&Src0, Operand *&Src1) { |
| if (Src0 == LoadDest && Src1 != LoadDest) { |
| @@ -851,15 +851,15 @@ template <class Machine> void TargetX86Base<Machine>::lowerArguments() { |
| } |
| } |
| -// Helper function for addProlog(). |
| -// |
| -// This assumes Arg is an argument passed on the stack. This sets the |
| -// frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| -// width. For an I64 arg that has been split into Lo and Hi components, |
| -// it calls itself recursively on the components, taking care to handle |
| -// Lo first because of the little-endian architecture. Lastly, this |
| -// function generates an instruction to copy Arg into its assigned |
| -// register if applicable. |
| +/// Helper function for addProlog(). |
| +/// |
| +/// This assumes Arg is an argument passed on the stack. This sets the |
| +/// frame offset for Arg and updates InArgsSizeBytes according to Arg's |
| +/// width. For an I64 arg that has been split into Lo and Hi components, |
| +/// it calls itself recursively on the components, taking care to handle |
| +/// Lo first because of the little-endian architecture. Lastly, this |
| +/// function generates an instruction to copy Arg into its assigned |
| +/// register if applicable. |
| template <class Machine> |
| void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg, |
| Variable *FramePtr, |
| @@ -1146,13 +1146,13 @@ template <class Machine> void TargetX86Base<Machine>::addEpilog(CfgNode *Node) { |
| if (!Ctx->getFlags().getUseSandboxing()) |
| return; |
| - // Change the original ret instruction into a sandboxed return sequence. |
| - // t:ecx = pop |
| - // bundle_lock |
| - // and t, ~31 |
| - // jmp *t |
| - // bundle_unlock |
| - // FakeUse <original_ret_operand> |
| + /// Change the original ret instruction into a sandboxed return sequence. |
|
Karl
2015/07/06 18:08:49
This is in the middle of a method. Should it have
ascull
2015/07/06 19:29:09
Done.
|
| + /// t:ecx = pop |
| + /// bundle_lock |
| + /// and t, ~31 |
| + /// jmp *t |
| + /// bundle_unlock |
| + /// FakeUse <original_ret_operand> |
| const SizeT BundleSize = |
| 1 << Func->template getAssembler<>()->getBundleAlignLog2Bytes(); |
| Variable *T_ecx = makeReg(IceType_i32, RegX8632::Reg_ecx); |
| @@ -1346,12 +1346,12 @@ void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) { |
| _mov(Dest, esp); |
| } |
| -// Strength-reduce scalar integer multiplication by a constant (for |
| -// i32 or narrower) for certain constants. The lea instruction can be |
| -// used to multiply by 3, 5, or 9, and the lsh instruction can be used |
| -// to multiply by powers of 2. These can be combined such that |
| -// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, |
| -// combined with left-shifting by 2. |
| +/// Strength-reduce scalar integer multiplication by a constant (for |
| +/// i32 or narrower) for certain constants. The lea instruction can be |
| +/// used to multiply by 3, 5, or 9, and the lsh instruction can be used |
| +/// to multiply by powers of 2. These can be combined such that |
| +/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5, |
| +/// combined with left-shifting by 2. |
| template <class Machine> |
| bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| int32_t Src1) { |
| @@ -1412,8 +1412,8 @@ bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0, |
| // Lea optimization only works for i16 and i32 types, not i8. |
| if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9)) |
| return false; |
| - // Limit the number of lea/shl operations for a single multiply, to |
| - // a somewhat arbitrary choice of 3. |
| + /// Limit the number of lea/shl operations for a single multiply, to |
|
Karl
2015/07/06 18:08:49
Again, this is within a method. Should it have dox
ascull
2015/07/06 19:29:09
Done.
|
| + /// a somewhat arbitrary choice of 3. |
| const uint32_t MaxOpsForOptimizedMul = 3; |
| if (CountOps > MaxOpsForOptimizedMul) |
| return false; |
| @@ -1769,15 +1769,15 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) { |
| // pshufd T4, T1, {0,2,1,3} |
| // movups Dest, T4 |
| - // Mask that directs pshufd to create a vector with entries |
| - // Src[1, 0, 3, 0] |
| + /// Mask that directs pshufd to create a vector with entries |
|
Karl
2015/07/06 18:08:48
Similar question here (not at declaration level).
ascull
2015/07/06 19:29:09
Done.
|
| + /// Src[1, 0, 3, 0] |
| const unsigned Constant1030 = 0x31; |
| Constant *Mask1030 = Ctx->getConstantInt32(Constant1030); |
| - // Mask that directs shufps to create a vector with entries |
| - // Dest[0, 2], Src[0, 2] |
| + /// Mask that directs shufps to create a vector with entries |
| + /// Dest[0, 2], Src[0, 2] |
| const unsigned Mask0202 = 0x88; |
| - // Mask that directs pshufd to create a vector with entries |
| - // Src[0, 2, 1, 3] |
| + /// Mask that directs pshufd to create a vector with entries |
| + /// Src[0, 2, 1, 3] |
| const unsigned Mask0213 = 0xd8; |
| Variable *T1 = makeReg(IceType_v4i32); |
| Variable *T2 = makeReg(IceType_v4i32); |
| @@ -2387,7 +2387,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { |
| _pcmpgt(T, Zeros); |
| _movp(Dest, T); |
| } else { |
| - // width = width(elty) - 1; dest = (src << width) >> width |
| + /// width = width(elty) - 1; dest = (src << width) >> width |
| SizeT ShiftAmount = |
| Traits::X86_CHAR_BIT * typeWidthInBytes(typeElementType(DestTy)) - |
| 1; |
| @@ -2619,7 +2619,7 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { |
| _cvt(T, Src0RM, InstX8632Cvt::Dq2ps); |
| _movp(Dest, T); |
| } else if (Inst->getSrc(0)->getType() == IceType_i64) { |
| - // Use a helper for x86-32. |
| + /// Use a helper for x86-32. |
|
Karl
2015/07/06 18:08:49
Should this be a doxygen comment (inside method).
ascull
2015/07/06 19:29:09
Done.
|
| const SizeT MaxSrcs = 1; |
| Type DestType = Dest->getType(); |
| InstCall *Call = |
| @@ -2655,8 +2655,8 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) { |
| lowerCall(Call); |
| } else if (Src0->getType() == IceType_i64 || |
| Src0->getType() == IceType_i32) { |
| - // Use a helper for x86-32 and x86-64. Also use a helper for |
| - // i32 on x86-32. |
| + /// Use a helper for x86-32 and x86-64. Also use a helper for |
| + /// i32 on x86-32. |
|
Karl
2015/07/06 18:08:48
Again, not a declaration. Should it be /// ?
ascull
2015/07/06 19:29:09
Done.
|
| const SizeT MaxSrcs = 1; |
| Type DestType = Dest->getType(); |
| IceString TargetString; |
| @@ -3235,28 +3235,28 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) { |
| return; |
| } |
| - // shufps treats the source and desination operands as vectors of |
| - // four doublewords. The destination's two high doublewords are |
| - // selected from the source operand and the two low doublewords are |
| - // selected from the (original value of) the destination operand. |
| - // An insertelement operation can be effected with a sequence of two |
| - // shufps operations with appropriate masks. In all cases below, |
| - // Element[0] is being inserted into SourceVectOperand. Indices are |
| - // ordered from left to right. |
| - // |
| - // insertelement into index 1 (result is stored in ElementR): |
| - // ElementR := ElementR[0, 0] SourceVectRM[0, 0] |
| - // ElementR := ElementR[3, 0] SourceVectRM[2, 3] |
| - // |
| - // insertelement into index 2 (result is stored in T): |
| - // T := SourceVectRM |
| - // ElementR := ElementR[0, 0] T[0, 3] |
| - // T := T[0, 1] ElementR[0, 3] |
| - // |
| - // insertelement into index 3 (result is stored in T): |
| - // T := SourceVectRM |
| - // ElementR := ElementR[0, 0] T[0, 2] |
| - // T := T[0, 1] ElementR[3, 0] |
| + /// shufps treats the source and desination operands as vectors of |
| + /// four doublewords. The destination's two high doublewords are |
| + /// selected from the source operand and the two low doublewords are |
| + /// selected from the (original value of) the destination operand. |
| + /// An insertelement operation can be effected with a sequence of two |
| + /// shufps operations with appropriate masks. In all cases below, |
| + /// Element[0] is being inserted into SourceVectOperand. Indices are |
| + /// ordered from left to right. |
| + /// |
| + /// insertelement into index 1 (result is stored in ElementR): |
| + /// ElementR := ElementR[0, 0] SourceVectRM[0, 0] |
| + /// ElementR := ElementR[3, 0] SourceVectRM[2, 3] |
| + /// |
| + /// insertelement into index 2 (result is stored in T): |
| + /// T := SourceVectRM |
| + /// ElementR := ElementR[0, 0] T[0, 3] |
| + /// T := T[0, 1] ElementR[0, 3] |
| + /// |
| + /// insertelement into index 3 (result is stored in T): |
| + /// T := SourceVectRM |
| + /// ElementR := ElementR[0, 0] T[0, 2] |
| + /// T := T[0, 1] ElementR[3, 0] |
|
Karl
2015/07/06 18:08:49
Again, inside method. Should this be /// ?
ascull
2015/07/06 19:29:09
Done.
|
| const unsigned char Mask1[3] = {0, 192, 128}; |
| const unsigned char Mask2[3] = {227, 196, 52}; |
| @@ -3931,10 +3931,10 @@ void TargetX86Base<Machine>::expandAtomicRMWAsCmpxchg(LowerBinOp Op_Lo, |
| _mov(Dest, T_eax); |
| } |
| -// Lowers count {trailing, leading} zeros intrinsic. |
| -// |
| -// We could do constant folding here, but that should have |
| -// been done by the front-end/middle-end optimizations. |
| +/// Lowers count {trailing, leading} zeros intrinsic. |
| +/// |
| +/// We could do constant folding here, but that should have |
| +/// been done by the front-end/middle-end optimizations. |
| template <class Machine> |
| void TargetX86Base<Machine>::lowerCountZeros(bool Cttz, Type Ty, Variable *Dest, |
| Operand *FirstVal, |
| @@ -4270,10 +4270,10 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptLoad() { |
| Variable *Index = nullptr; |
| uint16_t Shift = 0; |
| int32_t Offset = 0; // TODO: make Constant |
| - // Vanilla ICE load instructions should not use the segment registers, |
| - // and computeAddressOpt only works at the level of Variables and Constants, |
| - // not other OperandX8632Mem, so there should be no mention of segment |
| - // registers there either. |
| + /// Vanilla ICE load instructions should not use the segment registers, |
| + /// and computeAddressOpt only works at the level of Variables and Constants, |
| + /// not other OperandX8632Mem, so there should be no mention of segment |
| + /// registers there either. |
|
Karl
2015/07/06 18:08:48
Should this be /// (inside method).
ascull
2015/07/06 19:29:09
Done.
|
| const OperandX8632Mem::SegmentRegisters SegmentReg = |
| OperandX8632Mem::DefaultSegment; |
| Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| @@ -4501,10 +4501,10 @@ template <class Machine> void TargetX86Base<Machine>::doAddressOptStore() { |
| uint16_t Shift = 0; |
| int32_t Offset = 0; // TODO: make Constant |
| Variable *Base = llvm::dyn_cast<Variable>(Addr); |
| - // Vanilla ICE store instructions should not use the segment registers, |
| - // and computeAddressOpt only works at the level of Variables and Constants, |
| - // not other OperandX8632Mem, so there should be no mention of segment |
| - // registers there either. |
| + /// Vanilla ICE store instructions should not use the segment registers, |
| + /// and computeAddressOpt only works at the level of Variables and Constants, |
| + /// not other OperandX8632Mem, so there should be no mention of segment |
| + /// registers there either. |
|
Karl
2015/07/06 18:08:48
Similar here (inside method).
ascull
2015/07/06 19:29:10
Done.
|
| const OperandX8632Mem::SegmentRegisters SegmentReg = |
| OperandX8632Mem::DefaultSegment; |
| computeAddressOpt(Func, Inst, Base, Index, Shift, Offset); |
| @@ -4597,14 +4597,14 @@ void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind, |
| lowerAssign(InstAssign::create(Func, Dest, T)); |
| } |
| -// The following pattern occurs often in lowered C and C++ code: |
| -// |
| -// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| -// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| -// |
| -// We can eliminate the sext operation by copying the result of pcmpeqd, |
| -// pcmpgtd, or cmpps (which produce sign extended results) to the result |
| -// of the sext operation. |
| +/// The following pattern occurs often in lowered C and C++ code: |
| +/// |
| +/// %cmp = fcmp/icmp pred <n x ty> %src0, %src1 |
| +/// %cmp.ext = sext <n x i1> %cmp to <n x ty> |
| +/// |
| +/// We can eliminate the sext operation by copying the result of pcmpeqd, |
| +/// pcmpgtd, or cmpps (which produce sign extended results) to the result |
| +/// of the sext operation. |
| template <class Machine> |
| void TargetX86Base<Machine>::eliminateNextVectorSextInstruction( |
| Variable *SignExtendedResult) { |
| @@ -4710,10 +4710,10 @@ void TargetX86Base<Machine>::lowerOther(const Inst *Instr) { |
| } |
| } |
| -// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| -// preserve integrity of liveness analysis. Undef values are also |
| -// turned into zeroes, since loOperand() and hiOperand() don't expect |
| -// Undef input. |
| +/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to |
| +/// preserve integrity of liveness analysis. Undef values are also |
| +/// turned into zeroes, since loOperand() and hiOperand() don't expect |
| +/// Undef input. |
| template <class Machine> void TargetX86Base<Machine>::prelowerPhis() { |
| // Pause constant blinding or pooling, blinding or pooling will be done later |
| // during phi lowering assignments |
| @@ -4759,8 +4759,8 @@ bool isMemoryOperand(const Operand *Opnd) { |
| return true; |
| } |
| -// Lower the pre-ordered list of assignments into mov instructions. |
| -// Also has to do some ad-hoc register allocation as necessary. |
| +/// Lower the pre-ordered list of assignments into mov instructions. |
| +/// Also has to do some ad-hoc register allocation as necessary. |
| template <class Machine> |
| void TargetX86Base<Machine>::lowerPhiAssignments( |
| CfgNode *Node, const AssignList &Assignments) { |
| @@ -4954,7 +4954,7 @@ Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, |
| _psll(Reg, Ctx->getConstantInt8(Shift)); |
| return Reg; |
| } else { |
| - // SSE has no left shift operation for vectors of 8 bit integers. |
| + /// SSE has no left shift operation for vectors of 8 bit integers. |
|
Karl
2015/07/06 18:08:49
Why? (inside method).
ascull
2015/07/06 19:29:09
Done.
|
| const uint32_t HIGH_ORDER_BITS_MASK = 0x80808080; |
| Constant *ConstantMask = Ctx->getConstantInt32(HIGH_ORDER_BITS_MASK); |
| Variable *Reg = makeReg(Ty, RegNum); |
| @@ -4964,12 +4964,12 @@ Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty, |
| } |
| } |
| -// Construct a mask in a register that can be and'ed with a |
| -// floating-point value to mask off its sign bit. The value will be |
| -// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> |
| -// for f64. Construct it as vector of ones logically right shifted |
| -// one bit. TODO(stichnot): Fix the wala TODO above, to represent |
| -// vector constants in memory. |
| +/// Construct a mask in a register that can be and'ed with a |
| +/// floating-point value to mask off its sign bit. The value will be |
| +/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff> |
| +/// for f64. Construct it as vector of ones logically right shifted |
| +/// one bit. TODO(stichnot): Fix the wala TODO above, to represent |
| +/// vector constants in memory. |
| template <class Machine> |
| Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty, |
| int32_t RegNum) { |
| @@ -4985,7 +4985,7 @@ TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| // Ensure that Loc is a stack slot. |
| assert(Slot->getWeight().isZero()); |
| assert(Slot->getRegNum() == Variable::NoRegister); |
| - // Compute the location of Loc in memory. |
| + /// Compute the location of Loc in memory. |
|
Karl
2015/07/06 18:08:49
Why? (inside method).
ascull
2015/07/06 19:29:09
Done.
|
| // TODO(wala,stichnot): lea should not be required. The address of |
| // the stack slot is known at compile time (although not until after |
| // addProlog()). |
| @@ -4996,8 +4996,8 @@ TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot, |
| return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset); |
| } |
| -// Helper for legalize() to emit the right code to lower an operand to a |
| -// register of the appropriate type. |
| +/// Helper for legalize() to emit the right code to lower an operand to a |
| +/// register of the appropriate type. |
| template <class Machine> |
| Variable *TargetX86Base<Machine>::copyToReg(Operand *Src, int32_t RegNum) { |
| Type Ty = Src->getType(); |
| @@ -5106,9 +5106,9 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, |
| return From; |
| } |
| if (auto Var = llvm::dyn_cast<Variable>(From)) { |
| - // Check if the variable is guaranteed a physical register. This |
| - // can happen either when the variable is pre-colored or when it is |
| - // assigned infinite weight. |
| + /// Check if the variable is guaranteed a physical register. This |
| + /// can happen either when the variable is pre-colored or when it is |
| + /// assigned infinite weight. |
|
Karl
2015/07/06 18:08:49
Why? (inside method).
ascull
2015/07/06 19:29:09
Done.
|
| bool MustHaveRegister = (Var->hasReg() || Var->getWeight().isInf()); |
| // We need a new physical register for the operand if: |
| // Mem is not allowed and Var isn't guaranteed a physical |
| @@ -5124,18 +5124,18 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed, |
| return From; |
| } |
| -// Provide a trivial wrapper to legalize() for this common usage. |
| +/// Provide a trivial wrapper to legalize() for this common usage. |
| template <class Machine> |
| Variable *TargetX86Base<Machine>::legalizeToVar(Operand *From, int32_t RegNum) { |
| return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum)); |
| } |
| -// For the cmp instruction, if Src1 is an immediate, or known to be a |
| -// physical register, we can allow Src0 to be a memory operand. |
| -// Otherwise, Src0 must be copied into a physical register. |
| -// (Actually, either Src0 or Src1 can be chosen for the physical |
| -// register, but unfortunately we have to commit to one or the other |
| -// before register allocation.) |
| +/// For the cmp instruction, if Src1 is an immediate, or known to be a |
| +/// physical register, we can allow Src0 to be a memory operand. |
| +/// Otherwise, Src0 must be copied into a physical register. |
| +/// (Actually, either Src0 or Src1 can be chosen for the physical |
| +/// register, but unfortunately we have to commit to one or the other |
| +/// before register allocation.) |
| template <class Machine> |
| Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0, |
| Operand *Src1) { |
| @@ -5209,9 +5209,9 @@ void TargetX86Base<Machine>::makeRandomRegisterPermutation( |
| // TODO(stichnot): Declaring Permutation this way loses type/size |
| // information. Fix this in conjunction with the caller-side TODO. |
| assert(Permutation.size() >= RegX8632::Reg_NUM); |
| - // Expected upper bound on the number of registers in a single |
| - // equivalence class. For x86-32, this would comprise the 8 XMM |
| - // registers. This is for performance, not correctness. |
| + /// Expected upper bound on the number of registers in a single |
| + /// equivalence class. For x86-32, this would comprise the 8 XMM |
| + /// registers. This is for performance, not correctness. |
|
Karl
2015/07/06 18:08:49
Why ? (inside method).
ascull
2015/07/06 19:29:09
Done.
|
| static const unsigned MaxEquivalenceClassSize = 8; |
| typedef llvm::SmallVector<int32_t, MaxEquivalenceClassSize> RegisterList; |
| typedef std::map<uint32_t, RegisterList> EquivalenceClassMap; |
| @@ -5304,7 +5304,7 @@ void TargetX86Base<Machine>::emit(const ConstantUndef *) const { |
| llvm::report_fatal_error("undef value encountered by emitter."); |
| } |
| -// Randomize or pool an Immediate. |
| +/// Randomize or pool an Immediate. |
| template <class Machine> |
| Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate, |
| int32_t RegNum) { |