src/IceTargetLoweringX86BaseImpl.h - Issue 1341423002: Reflow comments to use the full width.

Unified Diff: src/IceTargetLoweringX86BaseImpl.h

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringX86BaseImpl.h

diff --git a/src/IceTargetLoweringX86BaseImpl.h b/src/IceTargetLoweringX86BaseImpl.h

index 99a1aeb2a8080cbf0885681a8848e190e8bbf6f0..73f8fa8b8e2987fa6d6bbb2956753a17559de8d1 100644

--- a/src/IceTargetLoweringX86BaseImpl.h

+++ b/src/IceTargetLoweringX86BaseImpl.h

@@ -8,9 +8,8 @@

//===----------------------------------------------------------------------===//

///

/// \file

-/// This file implements the TargetLoweringX86Base class, which

-/// consists almost entirely of the lowering sequence for each

-/// high-level instruction.

+/// This file implements the TargetLoweringX86Base class, which consists almost

+/// entirely of the lowering sequence for each high-level instruction.

///

//===----------------------------------------------------------------------===//

@@ -63,13 +62,13 @@ public:

/// IsComplex is the cached result of BoolFolding::hasComplexLowering(Instr).

bool IsComplex = false;

/// IsLiveOut is initialized conservatively to true, and is set to false when

- /// we encounter an instruction that ends Var's live range. We disable the

- /// folding optimization when Var is live beyond this basic block. Note that

+ /// we encounter an instruction that ends Var's live range. We disable the

+ /// folding optimization when Var is live beyond this basic block. Note that

/// if liveness analysis is not performed (e.g. in Om1 mode), IsLiveOut will

/// always be true and the folding optimization will never be performed.

bool IsLiveOut = true;

// NumUses counts the number of times Var is used as a source operand in the

- // basic block. If IsComplex is true and there is more than one use of Var,

+ // basic block. If IsComplex is true and there is more than one use of Var,

// then the folding optimization is disabled for Var.

uint32_t NumUses = 0;

};

@@ -166,7 +165,7 @@ BoolFolding<MachineTraits>::getConsumerKind(const Inst *Instr) {

/// Returns true if the producing instruction has a "complex" lowering sequence.

/// This generally means that its lowering sequence requires more than one

/// conditional branch, namely 64-bit integer compares and some floating-point

-/// compares. When this is true, and there is more than one consumer, we prefer

+/// compares. When this is true, and there is more than one consumer, we prefer

/// to disable the folding optimization because it minimizes branches.

template <class MachineTraits>

bool BoolFolding<MachineTraits>::hasComplexLowering(const Inst *Instr) {

@@ -222,9 +221,9 @@ void BoolFolding<MachineTraits>::init(CfgNode *Node) {

setInvalid(I.first);

continue;

}

- // Mark as "dead" rather than outright deleting. This is so that other

+ // Mark as "dead" rather than outright deleting. This is so that other

// peephole style optimizations during or before lowering have access to

- // this instruction in undeleted form. See for example

+ // this instruction in undeleted form. See for example

// tryOptimizedCmpxchgCmpBr().

I.second.Instr->setDead();

}

@@ -303,8 +302,9 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {

// Run this early so it can be used to focus optimizations on potentially hot

// code.

- // TODO(stichnot,ascull): currently only used for regalloc not expensive high

- // level optimizations which could be focused on potentially hot code.

+ // TODO(stichnot,ascull): currently only used for regalloc not

+ // expensive high level optimizations which could be focused on potentially

+ // hot code.

Func->computeLoopNestDepth();

Func->dump("After loop nest depth analysis");

@@ -312,7 +312,7 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {

Func->getVMetadata()->init(VMK_SingleDefs);

Func->doAddressOpt();

- // Find read-modify-write opportunities. Do this after address mode

+ // Find read-modify-write opportunities. Do this after address mode

// optimization so that doAddressOpt() doesn't need to be applied to RMW

// instructions as well.

findRMW();

@@ -321,8 +321,8 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {

// Argument lowering

Func->doArgLowering();

- // Target lowering. This requires liveness analysis for some parts of the

- // lowering decisions, such as compare/branch fusing. If non-lightweight

+ // Target lowering. This requires liveness analysis for some parts of the

+ // lowering decisions, such as compare/branch fusing. If non-lightweight

// liveness analysis is used, the instructions need to be renumbered first

// TODO: This renumbering should only be necessary if we're actually

// calculating live intervals, which we only do for register allocation.

@@ -330,9 +330,9 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {

if (Func->hasError())

return;

- // TODO: It should be sufficient to use the fastest liveness calculation, i.e.

- // livenessLightweight(). However, for some reason that slows down the rest

- // of the translation. Investigate.

+ // TODO: It should be sufficient to use the fastest liveness calculation,

+ // i.e. livenessLightweight(). However, for some reason that slows down the

+ // rest of the translation. Investigate.

Func->liveness(Liveness_Basic);

if (Func->hasError())

return;

@@ -357,7 +357,7 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {

Func->liveness(Liveness_Intervals);

if (Func->hasError())

return;

- // Validate the live range computations. The expensive validation call is

+ // Validate the live range computations. The expensive validation call is

// deliberately only made when assertions are enabled.

assert(Func->validateLiveness());

// The post-codegen dump is done here, after liveness analysis and associated

@@ -386,9 +386,9 @@ template <class Machine> void TargetX86Base<Machine>::translateO2() {

// Shuffle basic block order if -reorder-basic-blocks is enabled.

Func->shuffleNodes();

- // Branch optimization. This needs to be done just before code emission. In

+ // Branch optimization. This needs to be done just before code emission. In

// particular, no transformations that insert or reorder CfgNodes should be

- // done after branch optimization. We go ahead and do it before nop insertion

+ // done after branch optimization. We go ahead and do it before nop insertion

// to reduce the amount of work needed for searching for opportunities.

Func->doBranchOpt();

Func->dump("After branch optimization");

@@ -495,10 +495,10 @@ template <class Machine> void TargetX86Base<Machine>::findRMW() {

Ostream &Str = Func->getContext()->getStrDump();

for (CfgNode *Node : Func->getNodes()) {

// Walk through the instructions, considering each sequence of 3

- // instructions, and look for the particular RMW pattern. Note that this

- // search can be "broken" (false negatives) if there are intervening deleted

- // instructions, or intervening instructions that could be safely moved out

- // of the way to reveal an RMW pattern.

+ // instructions, and look for the particular RMW pattern. Note that this

+ // search can be "broken" (false negatives) if there are intervening

+ // deleted instructions, or intervening instructions that could be safely

+ // moved out of the way to reveal an RMW pattern.

auto E = Node->getInsts().end();

auto I1 = E, I2 = E, I3 = Node->getInsts().begin();

for (; I3 != E; I1 = I2, I2 = I3, ++I3) {

@@ -528,21 +528,21 @@ template <class Machine> void TargetX86Base<Machine>::findRMW() {

// problems later.

// With this transformation, the Store instruction acquires a Dest

- // variable and is now subject to dead code elimination if there are

- // no more uses of "b". Variable "x" is a beacon for determining

- // whether the Store instruction gets dead-code eliminated. If the

- // Store instruction is eliminated, then it must be the case that

- // the RMW instruction ends x's live range, and therefore the RMW

- // instruction will be retained and later lowered. On the other

- // hand, if the RMW instruction does not end x's live range, then

- // the Store instruction must still be present, and therefore the

- // RMW instruction is ignored during lowering because it is

- // redundant with the Store instruction.

+ // variable and is now subject to dead code elimination if there

+ // are no more uses of "b". Variable "x" is a beacon for

+ // determining whether the Store instruction gets dead-code

+ // eliminated. If the Store instruction is eliminated, then it

+ // must be the case that the RMW instruction ends x's live range,

+ // and therefore the RMW instruction will be retained and later

+ // lowered. On the other hand, if the RMW instruction does not end

+ // x's live range, then the Store instruction must still be

+ // present, and therefore the RMW instruction is ignored during

+ // lowering because it is redundant with the Store instruction.

// Note that if "a" has further uses, the RMW transformation may

// still trigger, resulting in two loads and one store, which is

- // worse than the original one load and one store. However, this is

- // probably rare, and caching probably keeps it just as fast.

+ // worse than the original one load and one store. However, this

+ // is probably rare, and caching probably keeps it just as fast.

if (!isSameMemAddressOperand<Machine>(Load->getSourceAddress(),

Store->getAddr()))

continue;

@@ -589,11 +589,10 @@ inline uint64_t getConstantMemoryOrder(Operand *Opnd) {

return Intrinsics::MemoryOrderInvalid;

}

-/// Determines whether the dest of a Load instruction can be folded

-/// into one of the src operands of a 2-operand instruction. This is

-/// true as long as the load dest matches exactly one of the binary

-/// instruction's src operands. Replaces Src0 or Src1 with LoadSrc if

-/// the answer is true.

+/// Determines whether the dest of a Load instruction can be folded into one of

+/// the src operands of a 2-operand instruction. This is true as long as the

+/// load dest matches exactly one of the binary instruction's src operands.

+/// Replaces Src0 or Src1 with LoadSrc if the answer is true.

inline bool canFoldLoadIntoBinaryInst(Operand *LoadSrc, Variable *LoadDest,

Operand *&Src0, Operand *&Src1) {

if (Src0 == LoadDest && Src1 != LoadDest) {

@@ -615,8 +614,8 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

Operand *LoadSrc = nullptr;

Inst *CurInst = Context.getCur();

Inst *Next = Context.getNextInst();

- // Determine whether the current instruction is a Load

- // instruction or equivalent.

+ // Determine whether the current instruction is a Load instruction or

+ // equivalent.

if (auto *Load = llvm::dyn_cast<InstLoad>(CurInst)) {

// An InstLoad always qualifies.

LoadDest = Load->getDest();

@@ -624,9 +623,9 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

LoadSrc = formMemoryOperand(Load->getSourceAddress(),

LoadDest->getType(), DoLegalize);

} else if (auto *Intrin = llvm::dyn_cast<InstIntrinsicCall>(CurInst)) {

- // An AtomicLoad intrinsic qualifies as long as it has a valid

- // memory ordering, and can be implemented in a single

- // instruction (i.e., not i64 on x86-32).

+ // An AtomicLoad intrinsic qualifies as long as it has a valid memory

+ // ordering, and can be implemented in a single instruction (i.e., not

+ // i64 on x86-32).

Intrinsics::IntrinsicID ID = Intrin->getIntrinsicInfo().ID;

if (ID == Intrinsics::AtomicLoad &&

(Traits::Is64Bit || Intrin->getDest()->getType() != IceType_i64) &&

@@ -638,9 +637,9 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

DoLegalize);

}

- // A Load instruction can be folded into the following

- // instruction only if the following instruction ends the Load's

- // Dest variable's live range.

+ // A Load instruction can be folded into the following instruction only

+ // if the following instruction ends the Load's Dest variable's live

+ // range.

if (LoadDest && Next && Next->isLastUse(LoadDest)) {

assert(LoadSrc);

Inst *NewInst = nullptr;

@@ -673,8 +672,7 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

Select->getCondition(), Src0, Src1);

}

} else if (auto *Cast = llvm::dyn_cast<InstCast>(Next)) {

- // The load dest can always be folded into a Cast

- // instruction.

+ // The load dest can always be folded into a Cast instruction.

Variable *Src0 = llvm::dyn_cast<Variable>(Cast->getSrc(0));

if (Src0 == LoadDest) {

NewInst = InstCast::create(Func, Cast->getCastKind(),

@@ -685,8 +683,8 @@ template <class Machine> void TargetX86Base<Machine>::doLoadOpt() {

CurInst->setDeleted();

Next->setDeleted();

Context.insert(NewInst);

- // Update NewInst->LiveRangesEnded so that target lowering

- // may benefit. Also update NewInst->HasSideEffects.

+ // Update NewInst->LiveRangesEnded so that target lowering may

+ // benefit. Also update NewInst->HasSideEffects.

NewInst->spliceLivenessInfo(Next, CurInst);

}

@@ -721,8 +719,8 @@ Variable *TargetX86Base<Machine>::getPhysicalRegister(SizeT RegNum, Type Ty) {

Reg = Func->makeVariable(Ty);

Reg->setRegNum(RegNum);

PhysicalRegisters[Ty][RegNum] = Reg;

- // Specially mark esp as an "argument" so that it is considered

- // live upon function entry.

+ // Specially mark esp as an "argument" so that it is considered live upon

+ // function entry.

if (RegNum == Traits::RegisterSet::Reg_esp) {

Func->addImplicitArg(Reg);

Reg->setIgnoreLiveness();

@@ -782,13 +780,12 @@ TargetX86Base<Machine>::stackVarToAsmOperand(const Variable *Var) const {

/// Helper function for addProlog().

///

-/// This assumes Arg is an argument passed on the stack. This sets the

-/// frame offset for Arg and updates InArgsSizeBytes according to Arg's

-/// width. For an I64 arg that has been split into Lo and Hi components,

-/// it calls itself recursively on the components, taking care to handle

-/// Lo first because of the little-endian architecture. Lastly, this

-/// function generates an instruction to copy Arg into its assigned

-/// register if applicable.

+/// This assumes Arg is an argument passed on the stack. This sets the frame

+/// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

+/// I64 arg that has been split into Lo and Hi components, it calls itself

+/// recursively on the components, taking care to handle Lo first because of the

+/// little-endian architecture. Lastly, this function generates an instruction

+/// to copy Arg into its assigned register if applicable.

template <class Machine>

void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

Variable *FramePtr,

@@ -819,8 +816,8 @@ void TargetX86Base<Machine>::finishArgumentLowering(Variable *Arg,

_mov(Arg, Mem);

}

// This argument-copying instruction uses an explicit Traits::X86OperandMem

- // operand instead of a Variable, so its fill-from-stack operation has to be

- // tracked separately for statistics.

+ // operand instead of a Variable, so its fill-from-stack operation has to

+ // be tracked separately for statistics.

Ctx->statsUpdateFills();

}

@@ -837,9 +834,8 @@ TargetX86Base<Machine>::split64(Variable *Var) {

default:

return;

case IceType_i64:

- // TODO: Only consider F64 if we need to push each half when

- // passing as an argument to a function call. Note that each half

- // is still typed as I32.

+ // TODO: Only consider F64 if we need to push each half when passing as an

+ // argument to a function call. Note that each half is still typed as I32.

case IceType_f64:

break;

}

@@ -946,11 +942,11 @@ TargetX86Base<Machine>::getRegisterSet(RegSetMask Include,

template <class Machine>

void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

IsEbpBasedFrame = true;

- // Conservatively require the stack to be aligned. Some stack

- // adjustment operations implemented below assume that the stack is

- // aligned before the alloca. All the alloca code ensures that the

- // stack alignment is preserved after the alloca. The stack alignment

- // restriction can be relaxed in some cases.

+ // Conservatively require the stack to be aligned. Some stack adjustment

+ // operations implemented below assume that the stack is aligned before the

+ // alloca. All the alloca code ensures that the stack alignment is preserved

+ // after the alloca. The stack alignment restriction can be relaxed in some

+ // cases.

NeedsStackAlignment = true;

// TODO(stichnot): minimize the number of adjustments of esp, etc.

@@ -977,8 +973,8 @@ void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

Value = Utils::applyAlignment(Value, Alignment);

_sub(esp, Ctx->getConstantInt32(Value));

} else {

- // Non-constant sizes need to be adjusted to the next highest

- // multiple of the required alignment at runtime.

+ // Non-constant sizes need to be adjusted to the next highest multiple of

+ // the required alignment at runtime.

Variable *T = makeReg(IceType_i32);

_mov(T, TotalSize);

_add(T, Ctx->getConstantInt32(Alignment - 1));

@@ -988,17 +984,16 @@ void TargetX86Base<Machine>::lowerAlloca(const InstAlloca *Inst) {

_mov(Dest, esp);

}

-/// Strength-reduce scalar integer multiplication by a constant (for

-/// i32 or narrower) for certain constants. The lea instruction can be

-/// used to multiply by 3, 5, or 9, and the lsh instruction can be used

-/// to multiply by powers of 2. These can be combined such that

-/// e.g. multiplying by 100 can be done as 2 lea-based multiplies by 5,

-/// combined with left-shifting by 2.

+/// Strength-reduce scalar integer multiplication by a constant (for i32 or

+/// narrower) for certain constants. The lea instruction can be used to multiply

+/// by 3, 5, or 9, and the lsh instruction can be used to multiply by powers of

+/// 2. These can be combined such that e.g. multiplying by 100 can be done as 2

+/// lea-based multiplies by 5, combined with left-shifting by 2.

template <class Machine>

bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,

int32_t Src1) {

- // Disable this optimization for Om1 and O0, just to keep things

- // simple there.

+ // Disable this optimization for Om1 and O0, just to keep things simple

+ // there.

if (Ctx->getFlags().getOptLevel() < Opt_1)

return false;

Type Ty = Dest->getType();

@@ -1054,8 +1049,8 @@ bool TargetX86Base<Machine>::optimizeScalarMul(Variable *Dest, Operand *Src0,

// Lea optimization only works for i16 and i32 types, not i8.

if (Ty != IceType_i16 && Ty != IceType_i32 && (Count3 || Count5 || Count9))

return false;

- // Limit the number of lea/shl operations for a single multiply, to

- // a somewhat arbitrary choice of 3.

+ // Limit the number of lea/shl operations for a single multiply, to a

+ // somewhat arbitrary choice of 3.

const uint32_t MaxOpsForOptimizedMul = 3;

if (CountOps > MaxOpsForOptimizedMul)

return false;

@@ -1101,11 +1096,11 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

}

if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

// These x86-32 helper-call-involved instructions are lowered in this

- // separate switch. This is because loOperand() and hiOperand()

- // may insert redundant instructions for constant blinding and

- // pooling. Such redundant instructions will fail liveness analysis

- // under -Om1 setting. And, actually these arguments do not need

- // to be processed with loOperand() and hiOperand() to be used.

+ // separate switch. This is because loOperand() and hiOperand() may insert

+ // redundant instructions for constant blinding and pooling. Such redundant

+ // instructions will fail liveness analysis under -Om1 setting. And,

+ // actually these arguments do not need to be processed with loOperand()

+ // and hiOperand() to be used.

switch (Inst->getOp()) {

case InstArithmetic::Udiv: {

const SizeT MaxSrcs = 2;

@@ -1216,8 +1211,8 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

_imul(T_2, Src0Lo);

_mov(T_3, Src0Lo, Traits::RegisterSet::Reg_eax);

_mul(T_4Lo, T_3, Src1Lo);

- // The mul instruction produces two dest variables, edx:eax. We

- // create a fake definition of edx to account for this.

+ // The mul instruction produces two dest variables, edx:eax. We create a

+ // fake definition of edx to account for this.

Context.insert(InstFakeDef::create(Func, T_4Hi, T_4Lo));

_mov(DestLo, T_4Lo);

_add(T_4Hi, T_1);

@@ -1253,9 +1248,9 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

_shl(T_2, T_1);

_test(T_1, BitTest);

_br(Traits::Cond::Br_e, Label);

- // T_2 and T_3 are being assigned again because of the

- // intra-block control flow, so we need the _mov_nonkillable

- // variant to avoid liveness problems.

+ // T_2 and T_3 are being assigned again because of the intra-block

+ // control flow, so we need the _mov_nonkillable variant to avoid

+ // liveness problems.

_mov_nonkillable(T_3, T_2);

_mov_nonkillable(T_2, Zero);

Context.insert(Label);

@@ -1289,9 +1284,9 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

_shr(T_3, T_1);

_test(T_1, BitTest);

_br(Traits::Cond::Br_e, Label);

- // T_2 and T_3 are being assigned again because of the

- // intra-block control flow, so we need the _mov_nonkillable

- // variant to avoid liveness problems.

+ // T_2 and T_3 are being assigned again because of the intra-block

+ // control flow, so we need the _mov_nonkillable variant to avoid

+ // liveness problems.

_mov_nonkillable(T_2, T_3);

_mov_nonkillable(T_3, Zero);

Context.insert(Label);

@@ -1325,10 +1320,10 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

_sar(T_3, T_1);

_test(T_1, BitTest);

_br(Traits::Cond::Br_e, Label);

- // T_2 and T_3 are being assigned again because of the

- // intra-block control flow, so T_2 needs the _mov_nonkillable

- // variant to avoid liveness problems. T_3 doesn't need special

- // treatment because it is reassigned via _sar instead of _mov.

+ // T_2 and T_3 are being assigned again because of the intra-block

+ // control flow, so T_2 needs the _mov_nonkillable variant to avoid

+ // liveness problems. T_3 doesn't need special treatment because it is

+ // reassigned via _sar instead of _mov.

_mov_nonkillable(T_2, T_3);

_sar(T_3, SignExtend);

Context.insert(Label);

@@ -1353,8 +1348,8 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

return;

}

if (isVectorType(Dest->getType())) {

- // TODO: Trap on integer divide and integer modulo by zero.

- // See: https://code.google.com/p/nativeclient/issues/detail?id=3899

+ // TODO: Trap on integer divide and integer modulo by zero. See:

+ // https://code.google.com/p/nativeclient/issues/detail?id=3899

if (llvm::isa<typename Traits::X86OperandMem>(Src1))

Src1 = legalizeToReg(Src1);

switch (Inst->getOp()) {

@@ -1519,8 +1514,8 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

if (optimizeScalarMul(Dest, Src0, C->getValue()))

return;

}

- // The 8-bit version of imul only allows the form "imul r/m8"

- // where T must be in eax.

+ // The 8-bit version of imul only allows the form "imul r/m8" where T must

+ // be in eax.

if (isByteSizedArithType(Dest->getType())) {

_mov(T, Src0, Traits::RegisterSet::Reg_eax);

Src1 = legalize(Src1, Legal_Reg | Legal_Mem);

@@ -1580,11 +1575,11 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

}

break;

case InstArithmetic::Sdiv:

- // TODO(stichnot): Enable this after doing better performance

- // and cross testing.

+ // TODO(stichnot): Enable this after doing better performance and cross

+ // testing.

if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

- // Optimize division by constant power of 2, but not for Om1

- // or O0, just to keep things simple there.

+ // Optimize division by constant power of 2, but not for Om1 or O0, just

+ // to keep things simple there.

if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

int32_t Divisor = C->getValue();

uint32_t UDivisor = static_cast<uint32_t>(Divisor);

@@ -1600,8 +1595,8 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

// dest=t

uint32_t TypeWidth = Traits::X86_CHAR_BIT * typeWidthInBytes(Ty);

_mov(T, Src0);

- // If for some reason we are dividing by 1, just treat it

- // like an assignment.

+ // If for some reason we are dividing by 1, just treat it like an

+ // assignment.

if (LogDiv > 0) {

// The initial sar is unnecessary when dividing by 2.

if (LogDiv > 1)

@@ -1656,11 +1651,11 @@ void TargetX86Base<Machine>::lowerArithmetic(const InstArithmetic *Inst) {

}

break;

case InstArithmetic::Srem:

- // TODO(stichnot): Enable this after doing better performance

- // and cross testing.

+ // TODO(stichnot): Enable this after doing better performance and cross

+ // testing.

if (false && Ctx->getFlags().getOptLevel() >= Opt_1) {

- // Optimize mod by constant power of 2, but not for Om1 or O0,

- // just to keep things simple there.

+ // Optimize mod by constant power of 2, but not for Om1 or O0, just to

+ // keep things simple there.

if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {

int32_t Divisor = C->getValue();

uint32_t UDivisor = static_cast<uint32_t>(Divisor);

@@ -1777,8 +1772,8 @@ void TargetX86Base<Machine>::lowerAssign(const InstAssign *Inst) {

// memory.

Src0Legal = legalize(Src0);

} else {

- // If Dest could be a stack operand, then RI must be a physical

- // register or a scalar integer immediate.

+ // If Dest could be a stack operand, then RI must be a physical register

+ // or a scalar integer immediate.

Src0Legal = legalize(Src0, Legal_Reg | Legal_Imm);

}

if (isVectorType(Dest->getType()))

@@ -1803,8 +1798,8 @@ void TargetX86Base<Machine>::lowerBr(const InstBr *Inst) {

default:

break;

case BoolFolding::PK_Icmp32: {

- // TODO(stichnot): Refactor similarities between this block and

- // the corresponding code in lowerIcmp().

+ // TODO(stichnot): Refactor similarities between this block and the

+ // corresponding code in lowerIcmp().

auto *Cmp = llvm::dyn_cast<InstIcmp>(Producer);

Operand *Src0 = Producer->getSrc(0);

Operand *Src1 = legalize(Producer->getSrc(1));

@@ -1835,10 +1830,10 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

case InstCast::Sext: {

// Src0RM is the source operand legalized to physical register or memory,

// but not immediate, since the relevant x86 native instructions don't

- // allow an immediate operand. If the operand is an immediate, we could

- // consider computing the strength-reduced result at translation time,

- // but we're unlikely to see something like that in the bitcode that

- // the optimizer wouldn't have already taken care of.

+ // allow an immediate operand. If the operand is an immediate, we could

+ // consider computing the strength-reduced result at translation time, but

+ // we're unlikely to see something like that in the bitcode that the

+ // optimizer wouldn't have already taken care of.

Operand *Src0RM = legalize(Inst->getSrc(0), Legal_Reg | Legal_Mem);

if (isVectorType(Dest->getType())) {

Type DestTy = Dest->getType();

@@ -1898,8 +1893,8 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

typeWidthInBytes(Src0RM->getType())) {

_mov(T, Src0RM);

} else {

- // Widen the source using movsx or movzx. (It doesn't matter

- // which one, since the following shl/sar overwrite the bits.)

+ // Widen the source using movsx or movzx. (It doesn't matter which one,

+ // since the following shl/sar overwrite the bits.)

_movzx(T, Src0RM);

}

_shl(T, ShiftAmount);

@@ -2010,12 +2005,11 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

_cvt(T, Src0RM, Traits::Insts::Cvt::Tps2dq);

_movp(Dest, T);

} else if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

- // Use a helper for converting floating-point values to 64-bit

- // integers. SSE2 appears to have no way to convert from xmm

- // registers to something like the edx:eax register pair, and

- // gcc and clang both want to use x87 instructions complete with

- // temporary manipulation of the status word. This helper is

- // not needed for x86-64.

+ // Use a helper for converting floating-point values to 64-bit integers.

+ // SSE2 appears to have no way to convert from xmm registers to something

+ // like the edx:eax register pair, and gcc and clang both want to use x87

+ // instructions complete with temporary manipulation of the status word.

+ // This helper is not needed for x86-64.

split64(Dest);

const SizeT MaxSrcs = 1;

Type SrcType = Inst->getSrc(0)->getType();

@@ -2150,8 +2144,8 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

lowerCall(Call);

} else if (Src0->getType() == IceType_i64 ||

(!Traits::Is64Bit && Src0->getType() == IceType_i32)) {

- // Use a helper for x86-32 and x86-64. Also use a helper for

- // i32 on x86-32.

+ // Use a helper for x86-32 and x86-64. Also use a helper for i32 on

+ // x86-32.

const SizeT MaxSrcs = 1;

Type DestType = Dest->getType();

IceString TargetString;

@@ -2285,8 +2279,8 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

if (Traits::Is64Bit) {

Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);

Variable *T = makeReg(IceType_f64);

- // Movd requires its fp argument (in this case, the bitcast destination)

- // to be an xmm register.

+ // Movd requires its fp argument (in this case, the bitcast

+ // destination) to be an xmm register.

T->setMustHaveReg();

_movd(T, Src0RM);

_mov(Dest, T);

@@ -2318,8 +2312,8 @@ void TargetX86Base<Machine>::lowerCast(const InstCast *Inst) {

Func, Spill, Traits::VariableSplit::High);

_mov(T_Lo, loOperand(Src0));

// Technically, the Spill is defined after the _store happens, but

- // SpillLo is considered a "use" of Spill so define Spill before it

- // is used.

+ // SpillLo is considered a "use" of Spill so define Spill before it is

+ // used.

Context.insert(InstFakeDef::create(Func, Spill));

_store(T_Lo, SpillLo);

_mov(T_Hi, hiOperand(Src0));

@@ -2384,8 +2378,8 @@ void TargetX86Base<Machine>::lowerExtractElement(

// Use pshufd and movd/movss.

Variable *T = nullptr;

if (Index) {

- // The shuffle only needs to occur if the element to be extracted

- // is not at the lowest index.

+ // The shuffle only needs to occur if the element to be extracted is not

+ // at the lowest index.

Constant *Mask = Ctx->getConstantInt32(Index);

T = makeReg(Ty);

_pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask);

@@ -2396,11 +2390,11 @@ void TargetX86Base<Machine>::lowerExtractElement(

if (InVectorElementTy == IceType_i32) {

_movd(ExtractedElementR, T);

} else { // Ty == IceType_f32

- // TODO(wala): _movss is only used here because _mov does not

- // allow a vector source and a scalar destination. _mov should be

- // able to be used here.

- // _movss is a binary instruction, so the FakeDef is needed to

- // keep the live range analysis consistent.

+ // TODO(wala): _movss is only used here because _mov does not allow a

+ // vector source and a scalar destination. _mov should be able to be

+ // used here.

+ // _movss is a binary instruction, so the FakeDef is needed to keep the

+ // live range analysis consistent.

Context.insert(InstFakeDef::create(Func, ExtractedElementR));

_movss(ExtractedElementR, T);

}

@@ -2408,8 +2402,8 @@ void TargetX86Base<Machine>::lowerExtractElement(

assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);

// Spill the value to a stack slot and do the extraction in memory.

- // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

- // support for legalizing to mem is implemented.

+ // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support

+ // for legalizing to mem is implemented.

Variable *Slot = Func->makeVariable(Ty);

Slot->setMustNotHaveReg();

_movp(Slot, legalizeToReg(SourceVectNotLegalized));

@@ -2589,9 +2583,9 @@ void TargetX86Base<Machine>::lowerIcmp(const InstIcmp *Inst) {

Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem);

Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem);

- // SSE2 only has signed comparison operations. Transform unsigned

- // inputs in a manner that allows for the use of signed comparison

- // operations by flipping the high order bits.

+ // SSE2 only has signed comparison operations. Transform unsigned inputs in

+ // a manner that allows for the use of signed comparison operations by

+ // flipping the high order bits.

if (Condition == InstIcmp::Ugt || Condition == InstIcmp::Uge ||

Condition == InstIcmp::Ult || Condition == InstIcmp::Ule) {

Variable *T0 = makeReg(Ty);

@@ -2726,8 +2720,8 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {

Type InVectorElementTy = Traits::getInVectorElementType(Ty);

if (ElementTy == IceType_i1) {

- // Expand the element to the appropriate size for it to be inserted

- // in the vector.

+ // Expand the element to the appropriate size for it to be inserted in the

+ // vector.

Variable *Expanded = Func->makeVariable(InVectorElementTy);

InstCast *Cast = InstCast::create(Func, InstCast::Zext, Expanded,

ElementToInsertNotLegalized);

@@ -2773,14 +2767,13 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {

return;

}

- // shufps treats the source and desination operands as vectors of

- // four doublewords. The destination's two high doublewords are

- // selected from the source operand and the two low doublewords are

- // selected from the (original value of) the destination operand.

- // An insertelement operation can be effected with a sequence of two

- // shufps operations with appropriate masks. In all cases below,

- // Element[0] is being inserted into SourceVectOperand. Indices are

- // ordered from left to right.

+ // shufps treats the source and desination operands as vectors of four

Jim Stichnoth 2015/09/16 00:01:29 destination

ascull 2015/09/16 18:30:09 Done.

+ // doublewords. The destination's two high doublewords are selected from

+ // the source operand and the two low doublewords are selected from the

+ // (original value of) the destination operand. An insertelement operation

+ // can be effected with a sequence of two shufps operations with

+ // appropriate masks. In all cases below, Element[0] is being inserted

+ // into SourceVectOperand. Indices are ordered from left to right.

// insertelement into index 1 (result is stored in ElementR):

// ElementR := ElementR[0, 0] SourceVectRM[0, 0]

@@ -2814,11 +2807,10 @@ void TargetX86Base<Machine>::lowerInsertElement(const InstInsertElement *Inst) {

}

} else {

assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);

- // Spill the value to a stack slot and perform the insertion in

- // memory.

+ // Spill the value to a stack slot and perform the insertion in memory.

- // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when

- // support for legalizing to mem is implemented.

+ // TODO(wala): use legalize(SourceVectNotLegalized, Legal_Mem) when support

+ // for legalizing to mem is implemented.

Variable *Slot = Func->makeVariable(Ty);

Slot->setMustNotHaveReg();

_movp(Slot, legalizeToReg(SourceVectNotLegalized));

@@ -2864,25 +2856,25 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

_mfence();

return;

case Intrinsics::AtomicFenceAll:

- // NOTE: FenceAll should prevent and load/store from being moved

- // across the fence (both atomic and non-atomic). The InstX8632Mfence

- // instruction is currently marked coarsely as "HasSideEffects".

+ // NOTE: FenceAll should prevent and load/store from being moved across the

+ // fence (both atomic and non-atomic). The InstX8632Mfence instruction is

+ // currently marked coarsely as "HasSideEffects".

_mfence();

return;

case Intrinsics::AtomicIsLockFree: {

// X86 is always lock free for 8/16/32/64 bit accesses.

- // TODO(jvoung): Since the result is constant when given a constant

- // byte size, this opens up DCE opportunities.

+ // TODO(jvoung): Since the result is constant when given a constant byte

+ // size, this opens up DCE opportunities.

Operand *ByteSize = Instr->getArg(0);

Variable *Dest = Instr->getDest();

if (ConstantInteger32 *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize)) {

Constant *Result;

switch (CI->getValue()) {

default:

- // Some x86-64 processors support the cmpxchg16b intruction, which

- // can make 16-byte operations lock free (when used with the LOCK

- // prefix). However, that's not supported in 32-bit mode, so just

- // return 0 even for large sizes.

+ // Some x86-64 processors support the cmpxchg16b intruction, which can

Jim Stichnoth 2015/09/16 00:01:29 instruction

ascull 2015/09/16 18:30:09 Done.

+ // make 16-byte operations lock free (when used with the LOCK prefix).

+ // However, that's not supported in 32-bit mode, so just return 0 even

+ // for large sizes.

Result = Ctx->getConstantZero(IceType_i32);

break;

case 1:

@@ -2900,8 +2892,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

return;

}

case Intrinsics::AtomicLoad: {

- // We require the memory address to be naturally aligned.

- // Given that is the case, then normal loads are atomic.

+ // We require the memory address to be naturally aligned. Given that is the

+ // case, then normal loads are atomic.

if (!Intrinsics::isMemoryOrderValid(

ID, getConstantMemoryOrder(Instr->getArg(1)))) {

Func->setError("Unexpected memory ordering for AtomicLoad");

@@ -2910,10 +2902,10 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

Variable *Dest = Instr->getDest();

if (!Traits::Is64Bit && Dest->getType() == IceType_i64) {

// Follow what GCC does and use a movq instead of what lowerLoad()

- // normally does (split the load into two).

- // Thus, this skips load/arithmetic op folding. Load/arithmetic folding

- // can't happen anyway, since this is x86-32 and integer arithmetic only

- // happens on 32-bit quantities.

+ // normally does (split the load into two). Thus, this skips

+ // load/arithmetic op folding. Load/arithmetic folding can't happen

+ // anyway, since this is x86-32 and integer arithmetic only happens on

+ // 32-bit quantities.

Variable *T = makeReg(IceType_f64);

typename Traits::X86OperandMem *Addr =

formMemoryOperand(Instr->getArg(0), IceType_f64);

@@ -2929,8 +2921,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

InstLoad *Load = InstLoad::create(Func, Dest, Instr->getArg(0));

lowerLoad(Load);

// Make sure the atomic load isn't elided when unused, by adding a FakeUse.

- // Since lowerLoad may fuse the load w/ an arithmetic instruction,

- // insert the FakeUse on the last-inserted instruction's dest.

+ // Since lowerLoad may fuse the load w/ an arithmetic instruction, insert

+ // the FakeUse on the last-inserted instruction's dest.

Context.insert(

InstFakeUse::create(Func, Context.getLastInserted()->getDest()));

return;

@@ -2953,15 +2945,15 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

Func->setError("Unexpected memory ordering for AtomicStore");

return;

}

- // We require the memory address to be naturally aligned.

- // Given that is the case, then normal stores are atomic.

- // Add a fence after the store to make it visible.

+ // We require the memory address to be naturally aligned. Given that is the

+ // case, then normal stores are atomic. Add a fence after the store to make

+ // it visible.

Operand *Value = Instr->getArg(0);

Operand *Ptr = Instr->getArg(1);

if (!Traits::Is64Bit && Value->getType() == IceType_i64) {

- // Use a movq instead of what lowerStore() normally does

- // (split the store into two), following what GCC does.

- // Cast the bits from int -> to an xmm register first.

+ // Use a movq instead of what lowerStore() normally does (split the store

+ // into two), following what GCC does. Cast the bits from int -> to an

+ // xmm register first.

Variable *T = makeReg(IceType_f64);

InstCast *Cast = InstCast::create(Func, InstCast::Bitcast, T, Value);

lowerCast(Cast);

@@ -2980,8 +2972,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

case Intrinsics::Bswap: {

Variable *Dest = Instr->getDest();

Operand *Val = Instr->getArg(0);

- // In 32-bit mode, bswap only works on 32-bit arguments, and the

- // argument must be a register. Use rotate left for 16-bit bswap.

+ // In 32-bit mode, bswap only works on 32-bit arguments, and the argument

+ // must be a register. Use rotate left for 16-bit bswap.

if (!Traits::Is64Bit && Val->getType() == IceType_i64) {

Val = legalizeUndef(Val);

Variable *T_Lo = legalizeToReg(loOperand(Val));

@@ -3070,8 +3062,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

return;

}

case Intrinsics::Ctlz: {

- // The "is zero undef" parameter is ignored and we always return

- // a well-defined value.

+ // The "is zero undef" parameter is ignored and we always return a

+ // well-defined value.

Operand *Val = legalize(Instr->getArg(0));

Operand *FirstVal;

Operand *SecondVal = nullptr;

@@ -3087,8 +3079,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

return;

}

case Intrinsics::Cttz: {

- // The "is zero undef" parameter is ignored and we always return

- // a well-defined value.

+ // The "is zero undef" parameter is ignored and we always return a

+ // well-defined value.

Operand *Val = legalize(Instr->getArg(0));

Operand *FirstVal;

Operand *SecondVal = nullptr;

@@ -3108,8 +3100,8 @@ void TargetX86Base<Machine>::lowerIntrinsicCall(

Type Ty = Src->getType();

Variable *Dest = Instr->getDest();

Variable *T = makeVectorOfFabsMask(Ty);

- // The pand instruction operates on an m128 memory operand, so if

- // Src is an f32 or f64, we need to make sure it's in a register.

+ // The pand instruction operates on an m128 memory operand, so if Src is an

+ // f32 or f64, we need to make sure it's in a register.

if (isVectorType(Ty)) {

if (llvm::isa<typename Traits::X86OperandMem>(Src))

Src = legalizeToReg(Src);

@@ -3694,8 +3686,8 @@ void TargetX86Base<Machine>::lowerMemmove(Operand *Dest, Operand *Src,

Variable *Reg;

// Copy the data into registers as the source and destination could overlap

- // so make sure not to clobber the memory. This also means overlapping moves

- // can be used as we are taking a safe snapshot of the memory.

+ // so make sure not to clobber the memory. This also means overlapping

+ // moves can be used as we are taking a safe snapshot of the memory.

Type Ty = largestTypeInSize(CountValue);

uint32_t TyWidth = typeWidthInBytes(Ty);

@@ -3896,8 +3888,7 @@ inline void dumpAddressOpt(const Cfg *Func, const Variable *Base,

inline bool matchTransitiveAssign(const VariablesMetadata *VMetadata,

Variable *&Var, const Inst *&Reason) {

- // Var originates from Var=SrcVar ==>

- // set Var:=SrcVar

+ // Var originates from Var=SrcVar ==> set Var:=SrcVar

if (Var == nullptr)

return false;

if (const Inst *VarAssign = VMetadata->getSingleDefinition(Var)) {

@@ -4059,10 +4050,10 @@ inline void computeAddressOpt(Cfg *Func, const Inst *Instr, Variable *&Base,

(void)Offset; // TODO: pattern-match for non-zero offsets.

if (Base == nullptr)

return;

- // If the Base has more than one use or is live across multiple

- // blocks, then don't go further. Alternatively (?), never consider

- // a transformation that would change a variable that is currently

- // *not* live across basic block boundaries into one that *is*.

+ // If the Base has more than one use or is live across multiple blocks, then

+ // don't go further. Alternatively (?), never consider a transformation that

+ // would change a variable that is currently *not* live across basic block

+ // boundaries into one that *is*.

if (Func->getVMetadata()->isMultiBlock(Base) /* || Base->getUseCount() > 1*/)

return;

@@ -4170,8 +4161,8 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {

Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem);

Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem);

if (InstructionSet >= Traits::SSE4_1) {

- // TODO(wala): If the condition operand is a constant, use blendps

- // or pblendw.

+ // TODO(wala): If the condition operand is a constant, use blendps or

+ // pblendw.

// Use blendvps or pblendvb to implement select.

if (SrcTy == IceType_v4i1 || SrcTy == IceType_v4i32 ||

@@ -4248,8 +4239,8 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {

_cmp(CmpOpnd0, CmpOpnd1);

if (typeWidthInBytes(DestTy) == 1 || isFloatingType(DestTy)) {

- // The cmov instruction doesn't allow 8-bit or FP operands, so

- // we need explicit control flow.

+ // The cmov instruction doesn't allow 8-bit or FP operands, so we need

+ // explicit control flow.

// d=cmp e,f; a=d?b:c ==> cmp e,f; a=b; jne L1; a=c; L1:

typename Traits::Insts::Label *Label =

Traits::Insts::Label::create(Func, this);

@@ -4262,8 +4253,8 @@ void TargetX86Base<Machine>::lowerSelect(const InstSelect *Inst) {

return;

}

// mov t, SrcF; cmov_cond t, SrcT; mov dest, t

- // But if SrcT is immediate, we might be able to do better, as

- // the cmov instruction doesn't allow an immediate operand:

+ // But if SrcT is immediate, we might be able to do better, as the cmov

+ // instruction doesn't allow an immediate operand:

// mov t, SrcT; cmov_!cond t, SrcF; mov dest, t

if (llvm::isa<Constant>(SrcT) && !llvm::isa<Constant>(SrcF)) {

std::swap(SrcT, SrcF);

@@ -4623,8 +4614,8 @@ void TargetX86Base<Machine>::scalarizeArithmetic(InstArithmetic::OpKind Kind,

/// %cmp.ext = sext <n x i1> %cmp to <n x ty>

///

/// We can eliminate the sext operation by copying the result of pcmpeqd,

-/// pcmpgtd, or cmpps (which produce sign extended results) to the result

-/// of the sext operation.

+/// pcmpgtd, or cmpps (which produce sign extended results) to the result of the

+/// sext operation.

template <class Machine>

void TargetX86Base<Machine>::eliminateNextVectorSextInstruction(

Variable *SignExtendedResult) {

@@ -4649,13 +4640,12 @@ void TargetX86Base<Machine>::lowerUnreachable(

template <class Machine>

void TargetX86Base<Machine>::lowerRMW(

const typename Traits::Insts::FakeRMW *RMW) {

- // If the beacon variable's live range does not end in this

- // instruction, then it must end in the modified Store instruction

- // that follows. This means that the original Store instruction is

- // still there, either because the value being stored is used beyond

- // the Store instruction, or because dead code elimination did not

- // happen. In either case, we cancel RMW lowering (and the caller

- // deletes the RMW instruction).

+ // If the beacon variable's live range does not end in this instruction, then

+ // it must end in the modified Store instruction that follows. This means

+ // that the original Store instruction is still there, either because the

+ // value being stored is used beyond the Store instruction, or because dead

+ // code elimination did not happen. In either case, we cancel RMW lowering

+ // (and the caller deletes the RMW instruction).

if (!RMW->isLastUse(RMW->getBeacon()))

return;

Operand *Src = RMW->getData();

@@ -4736,10 +4726,9 @@ void TargetX86Base<Machine>::lowerOther(const Inst *Instr) {

}

-/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to

-/// preserve integrity of liveness analysis. Undef values are also

-/// turned into zeroes, since loOperand() and hiOperand() don't expect

-/// Undef input.

+/// Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve

+/// integrity of liveness analysis. Undef values are also turned into zeroes,

+/// since loOperand() and hiOperand() don't expect Undef input.

template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {

if (Traits::Is64Bit) {

// On x86-64 we don't need to prelower phis -- the architecture can handle

@@ -4747,25 +4736,25 @@ template <class Machine> void TargetX86Base<Machine>::prelowerPhis() {

return;

}

- // Pause constant blinding or pooling, blinding or pooling will be done

- // later during phi lowering assignments

+ // Pause constant blinding or pooling, blinding or pooling will be done later

+ // during phi lowering assignments

BoolFlagSaver B(RandomizationPoolingPaused, true);

PhiLowering::prelowerPhis32Bit<TargetX86Base<Machine>>(

this, Context.getNode(), Func);

}

-// There is no support for loading or emitting vector constants, so the

-// vector values returned from makeVectorOfZeros, makeVectorOfOnes,

-// etc. are initialized with register operations.

+// There is no support for loading or emitting vector constants, so the vector

+// values returned from makeVectorOfZeros, makeVectorOfOnes, etc. are

+// initialized with register operations.

-// TODO(wala): Add limited support for vector constants so that

-// complex initialization in registers is unnecessary.

+// TODO(wala): Add limited support for vector constants so that complex

+// initialization in registers is unnecessary.

template <class Machine>

Variable *TargetX86Base<Machine>::makeVectorOfZeros(Type Ty, int32_t RegNum) {

Variable *Reg = makeReg(Ty, RegNum);

- // Insert a FakeDef, since otherwise the live range of Reg might

- // be overestimated.

+ // Insert a FakeDef, since otherwise the live range of Reg might be

+ // overestimated.

Context.insert(InstFakeDef::create(Func, Reg));

_pxor(Reg, Reg);

return Reg;

@@ -4811,12 +4800,12 @@ Variable *TargetX86Base<Machine>::makeVectorOfHighOrderBits(Type Ty,

}

-/// Construct a mask in a register that can be and'ed with a

-/// floating-point value to mask off its sign bit. The value will be

-/// <4 x 0x7fffffff> for f32 and v4f32, and <2 x 0x7fffffffffffffff>

-/// for f64. Construct it as vector of ones logically right shifted

-/// one bit. TODO(stichnot): Fix the wala TODO above, to represent

-/// vector constants in memory.

+/// Construct a mask in a register that can be and'ed with a floating-point

+/// value to mask off its sign bit. The value will be <4 x 0x7fffffff> for f32

+/// and v4f32, and <2 x 0x7fffffffffffffff> for f64. Construct it as vector of

+/// ones logically right shifted one bit.

+// TODO(stichnot): Fix the wala

+// TODO: above, to represent vector constants in memory.

template <class Machine>

Variable *TargetX86Base<Machine>::makeVectorOfFabsMask(Type Ty,

int32_t RegNum) {

@@ -4833,9 +4822,9 @@ TargetX86Base<Machine>::getMemoryOperandForStackSlot(Type Ty, Variable *Slot,

assert(Slot->mustNotHaveReg());

assert(Slot->getRegNum() == Variable::NoRegister);

// Compute the location of Loc in memory.

- // TODO(wala,stichnot): lea should not be required. The address of

- // the stack slot is known at compile time (although not until after

- // addProlog()).

+ // TODO(wala,stichnot): lea should not

+ // be required. The address of the stack slot is known at compile time

+ // (although not until after addProlog()).

const Type PointerType = IceType_i32;

Variable *Loc = makeReg(PointerType);

_lea(Loc, Slot);

@@ -4861,20 +4850,19 @@ template <class Machine>

Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,

int32_t RegNum) {

Type Ty = From->getType();

- // Assert that a physical register is allowed. To date, all calls

- // to legalize() allow a physical register. If a physical register

- // needs to be explicitly disallowed, then new code will need to be

- // written to force a spill.

+ // Assert that a physical register is allowed. To date, all calls to

+ // legalize() allow a physical register. If a physical register needs to be

+ // explicitly disallowed, then new code will need to be written to force a

+ // spill.

assert(Allowed & Legal_Reg);

- // If we're asking for a specific physical register, make sure we're

- // not allowing any other operand kinds. (This could be future

- // work, e.g. allow the shl shift amount to be either an immediate

- // or in ecx.)

+ // If we're asking for a specific physical register, make sure we're not

+ // allowing any other operand kinds. (This could be future work, e.g. allow

+ // the shl shift amount to be either an immediate or in ecx.)

assert(RegNum == Variable::NoRegister || Allowed == Legal_Reg);

if (auto Mem = llvm::dyn_cast<typename Traits::X86OperandMem>(From)) {

- // Before doing anything with a Mem operand, we need to ensure

- // that the Base and Index components are in physical registers.

+ // Before doing anything with a Mem operand, we need to ensure that the

+ // Base and Index components are in physical registers.

Variable *Base = Mem->getBase();

Variable *Index = Mem->getIndex();

Variable *RegBase = nullptr;

@@ -4919,8 +4907,8 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,

}

- // If the operand is an 32 bit constant integer, we should check

- // whether we need to randomize it or pool it.

+ // If the operand is an 32 bit constant integer, we should check whether we

+ // need to randomize it or pool it.

if (ConstantInteger32 *C = llvm::dyn_cast<ConstantInteger32>(Const)) {

Operand *NewConst = randomizeOrPoolImmediate(C, RegNum);

if (NewConst != Const) {

@@ -4928,8 +4916,8 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,

}

- // Convert a scalar floating point constant into an explicit

- // memory operand.

+ // Convert a scalar floating point constant into an explicit memory

+ // operand.

if (isScalarFloatingType(Ty)) {

Variable *Base = nullptr;

std::string Buffer;

@@ -4952,9 +4940,9 @@ Operand *TargetX86Base<Machine>::legalize(Operand *From, LegalMask Allowed,

return From;

}

if (auto Var = llvm::dyn_cast<Variable>(From)) {

- // Check if the variable is guaranteed a physical register. This

- // can happen either when the variable is pre-colored or when it is

- // assigned infinite weight.

+ // Check if the variable is guaranteed a physical register. This can happen

+ // either when the variable is pre-colored or when it is assigned infinite

+ // weight.

bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());

// We need a new physical register for the operand if:

// Mem is not allowed and Var isn't guaranteed a physical

@@ -4982,16 +4970,16 @@ Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {

Type Ty = From->getType();

if (llvm::isa<ConstantUndef>(From)) {

// Lower undefs to zero. Another option is to lower undefs to an

- // uninitialized register; however, using an uninitialized register

- // results in less predictable code.

+ // uninitialized register; however, using an uninitialized register results

+ // in less predictable code.

- // If in the future the implementation is changed to lower undef

- // values to uninitialized registers, a FakeDef will be needed:

+ // If in the future the implementation is changed to lower undef values to

+ // uninitialized registers, a FakeDef will be needed:

// Context.insert(InstFakeDef::create(Func, Reg));

// This is in order to ensure that the live range of Reg is not

- // overestimated. If the constant being lowered is a 64 bit value,

- // then the result should be split and the lo and hi components will

- // need to go in uninitialized registers.

+ // overestimated. If the constant being lowered is a 64 bit value, then

+ // the result should be split and the lo and hi components will need to go

+ // in uninitialized registers.

if (isVectorType(Ty))

return makeVectorOfZeros(Ty, RegNum);

return Ctx->getConstantZero(Ty);

@@ -4999,12 +4987,11 @@ Operand *TargetX86Base<Machine>::legalizeUndef(Operand *From, int32_t RegNum) {

return From;

}

-/// For the cmp instruction, if Src1 is an immediate, or known to be a

-/// physical register, we can allow Src0 to be a memory operand.

-/// Otherwise, Src0 must be copied into a physical register.

-/// (Actually, either Src0 or Src1 can be chosen for the physical

-/// register, but unfortunately we have to commit to one or the other

-/// before register allocation.)

+/// For the cmp instruction, if Src1 is an immediate, or known to be a physical

+/// register, we can allow Src0 to be a memory operand. Otherwise, Src0 must be

+/// copied into a physical register. (Actually, either Src0 or Src1 can be

+/// chosen for the physical register, but unfortunately we have to commit to one

+/// or the other before register allocation.)

template <class Machine>

Operand *TargetX86Base<Machine>::legalizeSrc0ForCmp(Operand *Src0,

Operand *Src1) {

@@ -5031,11 +5018,10 @@ TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,

Constant *Offset = llvm::dyn_cast<Constant>(Opnd);

assert(Base || Offset);

if (Offset) {

- // During memory operand building, we do not blind or pool

- // the constant offset, we will work on the whole memory

- // operand later as one entity later, this save one instruction.

- // By turning blinding and pooling off, we guarantee

- // legalize(Offset) will return a Constant*.

+ // During memory operand building, we do not blind or pool the constant

+ // offset, we will work on the whole memory operand later as one entity

+ // later, this save one instruction. By turning blinding and pooling off,

+ // we guarantee legalize(Offset) will return a Constant*.

{

BoolFlagSaver B(RandomizationPoolingPaused, true);

@@ -5047,8 +5033,8 @@ TargetX86Base<Machine>::formMemoryOperand(Operand *Opnd, Type Ty,

}

Mem = Traits::X86OperandMem::create(Func, Ty, Base, Offset);

}

- // Do legalization, which contains randomization/pooling

- // or do randomization/pooling.

+ // Do legalization, which contains randomization/pooling or do

+ // randomization/pooling.

return llvm::cast<typename Traits::X86OperandMem>(

DoLegalize ? legalize(Mem) : randomizeOrPoolImmediate(Mem));

}

@@ -5173,9 +5159,8 @@ Operand *TargetX86Base<Machine>::randomizeOrPoolImmediate(Constant *Immediate,

// If we have already assigned a phy register, we must come from

// andvancedPhiLowering()=>lowerAssign(). In this case we should reuse

Jim Stichnoth 2015/09/16 00:01:29 advancedPhiLowering

ascull 2015/09/16 18:30:09 Done.

// the assigned register as this assignment is that start of its use-def

- // chain. So we add RegNum argument here.

- // Note we use 'lea' instruction instead of 'xor' to avoid affecting

- // the flags.

+ // chain. So we add RegNum argument here. Note we use 'lea' instruction

+ // instead of 'xor' to avoid affecting the flags.

Variable *Reg = makeReg(IceType_i32, RegNum);

ConstantInteger32 *Integer = llvm::cast<ConstantInteger32>(Immediate);

uint32_t Value = Integer->getValue();

@@ -5238,8 +5223,8 @@ TargetX86Base<Machine>::randomizeOrPoolImmediate(

return MemOperand;

}

- // If this memory operand is already a randommized one, we do

- // not randomize it again.

+ // If this memory operand is already a randommized one, we do not randomize

Jim Stichnoth 2015/09/16 00:01:29 randomized

ascull 2015/09/16 18:30:09 Done.

+ // it again.

if (MemOperand->getRandomized())

return MemOperand;

@@ -5274,9 +5259,8 @@ TargetX86Base<Machine>::randomizeOrPoolImmediate(

Variable *RegTemp = makeReg(MemOperand->getOffset()->getType(), RegNum);

_lea(RegTemp, TempMemOperand);

// As source operand doesn't use the dstreg, we don't need to add

- // _set_dest_nonkillable().

- // But if we use the same Dest Reg, that is, with RegNum

- // assigned, we should add this _set_dest_nonkillable()

+ // _set_dest_nonkillable(). But if we use the same Dest Reg, that is,

+ // with RegNum assigned, we should add this _set_dest_nonkillable()

if (RegNum != Variable::NoRegister)

_set_dest_nonkillable();

@@ -5302,12 +5286,11 @@ TargetX86Base<Machine>::randomizeOrPoolImmediate(

// =>[RegTemp, index, shift]

assert(Ctx->getFlags().getRandomizeAndPoolImmediatesOption() ==

RPI_Pool);

- // Memory operand should never exist as source operands in phi

- // lowering assignments, so there is no need to reuse any registers

- // here. For phi lowering, we should not ask for new physical

- // registers in general.

- // However, if we do meet Memory Operand during phi lowering, we

- // should not blind or pool the immediates for now.

+ // Memory operand should never exist as source operands in phi lowering

+ // assignments, so there is no need to reuse any registers here. For

+ // phi lowering, we should not ask for new physical registers in

+ // general. However, if we do meet Memory Operand during phi lowering,

+ // we should not blind or pool the immediates for now.

if (RegNum != Variable::NoRegister)

return MemOperand;

Variable *RegTemp = makeReg(IceType_i32);

« src/IceTargetLoweringARM32.cpp ('K') | « src/IceTargetLoweringX86Base.h ('k') | src/IceThreading.h » ('j') | src/PNaClTranslator.cpp » ('J')