src/IceTargetLoweringARM32.cpp - Issue 1341423002: Reflow comments to use the full width.

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/IceTargetLoweringARM32.cpp

diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp

index 3e92637123220274a9f35c7207861d874bce6a4f..f8d9d4519ac70eac144e1422ed176f510ccc73ed 100644

--- a/src/IceTargetLoweringARM32.cpp

+++ b/src/IceTargetLoweringARM32.cpp

@@ -47,7 +47,7 @@ namespace {

} while (0)

// The following table summarizes the logic for lowering the icmp instruction

-// for i32 and narrower types. Each icmp condition has a clear mapping to an

+// for i32 and narrower types. Each icmp condition has a clear mapping to an

// ARM32 conditional move instruction.

const struct TableIcmp32_ {

@@ -62,8 +62,8 @@ const struct TableIcmp32_ {

// The following table summarizes the logic for lowering the icmp instruction

// for the i64 type. Two conditional moves are needed for setting to 1 or 0.

-// The operands may need to be swapped, and there is a slight difference

-// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).

+// The operands may need to be swapped, and there is a slight difference for

+// signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).

const struct TableIcmp64_ {

bool IsSigned;

bool Swapped;

@@ -82,18 +82,16 @@ CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {

return TableIcmp32[Index].Mapping;

}

-// In some cases, there are x-macros tables for both high-level and

-// low-level instructions/operands that use the same enum key value.

-// The tables are kept separate to maintain a proper separation

-// between abstraction layers. There is a risk that the tables could

-// get out of sync if enum values are reordered or if entries are

-// added or deleted. The following dummy namespaces use

+// In some cases, there are x-macros tables for both high-level and low-level

+// instructions/operands that use the same enum key value. The tables are kept

+// separate to maintain a proper separation between abstraction layers. There

+// is a risk that the tables could get out of sync if enum values are reordered

+// or if entries are added or deleted. The following dummy namespaces use

// static_asserts to ensure everything is kept in sync.

// Validate the enum values in ICMPARM32_TABLE.

namespace dummy1 {

-// Define a temporary set of enum values based on low-level table

-// entries.

+// Define a temporary set of enum values based on low-level table entries.

enum _tmp_enum {

#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,

ICMPARM32_TABLE

@@ -104,8 +102,8 @@ enum _tmp_enum {

#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;

ICEINSTICMP_TABLE

#undef X

-// Define a set of constants based on low-level table entries, and

-// ensure the table entry keys are consistent.

+// Define a set of constants based on low-level table entries, and ensure the

+// table entry keys are consistent.

#define X(val, signed, swapped64, C_32, C1_64, C2_64) \

static const int _table2_##val = _tmp_##val; \

static_assert( \

@@ -113,8 +111,8 @@ ICEINSTICMP_TABLE

"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");

ICMPARM32_TABLE

#undef X

-// Repeat the static asserts with respect to the high-level table

-// entries in case the high-level table has extra entries.

+// Repeat the static asserts with respect to the high-level table entries in

+// case the high-level table has extra entries.

#define X(tag, str) \

static_assert( \

_table1_##tag == _table2_##tag, \

@@ -126,17 +124,17 @@ ICEINSTICMP_TABLE

// Stack alignment

const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;

-// Value is in bytes. Return Value adjusted to the next highest multiple

-// of the stack alignment.

+// Value is in bytes. Return Value adjusted to the next highest multiple of the

+// stack alignment.

uint32_t applyStackAlignment(uint32_t Value) {

return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);

}

-// Value is in bytes. Return Value adjusted to the next highest multiple

-// of the stack alignment required for the given type.

+// Value is in bytes. Return Value adjusted to the next highest multiple of the

+// stack alignment required for the given type.

uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {

- // Use natural alignment, except that normally (non-NaCl) ARM only

- // aligns vectors to 8 bytes.

+ // Use natural alignment, except that normally (non-NaCl) ARM only aligns

+ // vectors to 8 bytes.

// TODO(jvoung): Check this ...

size_t typeAlignInBytes = typeWidthInBytes(Ty);

if (isVectorType(Ty))

@@ -172,9 +170,8 @@ TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {

TargetARM32::TargetARM32(Cfg *Func)

: TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {

- // TODO: Don't initialize IntegerRegisters and friends every time.

- // Instead, initialize in some sort of static initializer for the

- // class.

+ // TODO: Don't initialize IntegerRegisters and friends every time. Instead,

+ // initialize in some sort of static initializer for the class.

// Limit this size (or do all bitsets need to be the same width)???

llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);

llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);

@@ -243,19 +240,18 @@ void TargetARM32::translateO2() {

// Argument lowering

Func->doArgLowering();

- // Target lowering. This requires liveness analysis for some parts

- // of the lowering decisions, such as compare/branch fusing. If

- // non-lightweight liveness analysis is used, the instructions need

- // to be renumbered first. TODO: This renumbering should only be

- // necessary if we're actually calculating live intervals, which we

- // only do for register allocation.

+ // Target lowering. This requires liveness analysis for some parts of the

+ // lowering decisions, such as compare/branch fusing. If non-lightweight

+ // liveness analysis is used, the instructions need to be renumbered first.

+ // TODO: This renumbering should only be necessary if we're actually

+ // calculating live intervals, which we only do for register allocation.

Func->renumberInstructions();

if (Func->hasError())

return;

- // TODO: It should be sufficient to use the fastest liveness

- // calculation, i.e. livenessLightweight(). However, for some

- // reason that slows down the rest of the translation. Investigate.

+ // TODO: It should be sufficient to use the fastest liveness calculation,

+ // i.e. livenessLightweight(). However, for some reason that slows down the

+ // rest of the translation. Investigate.

Func->liveness(Liveness_Basic);

if (Func->hasError())

return;

@@ -266,19 +262,19 @@ void TargetARM32::translateO2() {

return;

Func->dump("After ARM32 codegen");

- // Register allocation. This requires instruction renumbering and

- // full liveness analysis.

+ // Register allocation. This requires instruction renumbering and full

+ // liveness analysis.

Func->renumberInstructions();

if (Func->hasError())

return;

Func->liveness(Liveness_Intervals);

if (Func->hasError())

return;

- // Validate the live range computations. The expensive validation

- // call is deliberately only made when assertions are enabled.

+ // Validate the live range computations. The expensive validation call is

+ // deliberately only made when assertions are enabled.

assert(Func->validateLiveness());

- // The post-codegen dump is done here, after liveness analysis and

- // associated cleanup, to make the dump cleaner and more useful.

+ // The post-codegen dump is done here, after liveness analysis and associated

+ // cleanup, to make the dump cleaner and more useful.

Func->dump("After initial ARM32 codegen");

Func->getVMetadata()->init(VMK_All);

regAlloc(RAK_Global);

@@ -305,11 +301,10 @@ void TargetARM32::translateO2() {

Func->contractEmptyNodes();

Func->reorderNodes();

- // Branch optimization. This needs to be done just before code

- // emission. In particular, no transformations that insert or

- // reorder CfgNodes should be done after branch optimization. We go

- // ahead and do it before nop insertion to reduce the amount of work

- // needed for searching for opportunities.

+ // Branch optimization. This needs to be done just before code emission. In

+ // particular, no transformations that insert or reorder CfgNodes should be

+ // done after branch optimization. We go ahead and do it before nop insertion

+ // to reduce the amount of work needed for searching for opportunities.

Func->doBranchOpt();

Func->dump("After branch optimization");

@@ -395,8 +390,8 @@ Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {

Reg = Func->makeVariable(Ty);

Reg->setRegNum(RegNum);

PhysicalRegisters[Ty][RegNum] = Reg;

- // Specially mark SP and LR as an "argument" so that it is considered

- // live upon function entry.

+ // Specially mark SP and LR as an "argument" so that it is considered live

+ // upon function entry.

if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {

Func->addImplicitArg(Reg);

Reg->setIgnoreLiveness();

@@ -445,15 +440,15 @@ bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {

if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)

return false;

int32_t RegLo, RegHi;

- // Always start i64 registers at an even register, so this may end

- // up padding away a register.

+ // Always start i64 registers at an even register, so this may end up padding

+ // away a register.

NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);

RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;

++NumGPRRegsUsed;

RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;

++NumGPRRegsUsed;

- // If this bumps us past the boundary, don't allocate to a register

- // and leave any previously speculatively consumed registers as consumed.

+ // If this bumps us past the boundary, don't allocate to a register and leave

+ // any previously speculatively consumed registers as consumed.

if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)

return false;

Regs->first = RegLo;

@@ -474,15 +469,15 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {

return false;

if (isVectorType(Ty)) {

NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);

- // Q registers are declared in reverse order, so

- // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract

- // NumFPRegUnits from Reg_q0. Same thing goes for D registers.

+ // Q registers are declared in reverse order, so RegARM32::Reg_q0 >

+ // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from

+ // Reg_q0. Same thing goes for D registers.

static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,

"ARM32 Q registers are possibly declared incorrectly.");

*Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);

NumFPRegUnits += 4;

- // If this bumps us past the boundary, don't allocate to a register

- // and leave any previously speculatively consumed registers as consumed.

+ // If this bumps us past the boundary, don't allocate to a register and

+ // leave any previously speculatively consumed registers as consumed.

if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)

return false;

} else if (Ty == IceType_f64) {

@@ -491,8 +486,8 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {

NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);

*Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);

NumFPRegUnits += 2;

- // If this bumps us past the boundary, don't allocate to a register

- // and leave any previously speculatively consumed registers as consumed.

+ // If this bumps us past the boundary, don't allocate to a register and

+ // leave any previously speculatively consumed registers as consumed.

if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)

return false;

} else {

@@ -509,9 +504,9 @@ void TargetARM32::lowerArguments() {

VarList &Args = Func->getArgs();

TargetARM32::CallingConv CC;

- // For each register argument, replace Arg in the argument list with the

- // home register. Then generate an instruction in the prolog to copy the

- // home register to the assigned location of Arg.

+ // For each register argument, replace Arg in the argument list with the home

+ // register. Then generate an instruction in the prolog to copy the home

+ // register to the assigned location of Arg.

Context.init(Func->getEntryNode());

Context.setInsertPoint(Context.getCur());

@@ -568,13 +563,12 @@ void TargetARM32::lowerArguments() {

// Helper function for addProlog().

-// This assumes Arg is an argument passed on the stack. This sets the

-// frame offset for Arg and updates InArgsSizeBytes according to Arg's

-// width. For an I64 arg that has been split into Lo and Hi components,

-// it calls itself recursively on the components, taking care to handle

-// Lo first because of the little-endian architecture. Lastly, this

-// function generates an instruction to copy Arg into its assigned

-// register if applicable.

+// This assumes Arg is an argument passed on the stack. This sets the frame

+// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an

+// I64 arg that has been split into Lo and Hi components, it calls itself

+// recursively on the components, taking care to handle Lo first because of the

+// little-endian architecture. Lastly, this function generates an instruction

+// to copy Arg into its assigned register if applicable.

void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

size_t BasicFrameOffset,

size_t &InArgsSizeBytes) {

@@ -591,8 +585,8 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);

Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);

InArgsSizeBytes += typeWidthInBytesOnStack(Ty);

- // If the argument variable has been assigned a register, we need to load

- // the value from the stack slot.

+ // If the argument variable has been assigned a register, we need to load the

+ // value from the stack slot.

if (Arg->hasReg()) {

assert(Ty != IceType_i64);

OperandARM32Mem *Mem = OperandARM32Mem::create(

@@ -606,10 +600,9 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,

} else {

_ldr(Arg, Mem);

}

- // This argument-copying instruction uses an explicit

- // OperandARM32Mem operand instead of a Variable, so its

- // fill-from-stack operation has to be tracked separately for

- // statistics.

+ // This argument-copying instruction uses an explicit OperandARM32Mem

+ // operand instead of a Variable, so its fill-from-stack operation has to

+ // be tracked separately for statistics.

Ctx->statsUpdateFills();

}

@@ -642,16 +635,15 @@ void TargetARM32::addProlog(CfgNode *Node) {

// * GlobalsAndSubsequentPaddingSize: areas 3 - 4

// * LocalsSpillAreaSize: area 5

// * SpillAreaSizeBytes: areas 2 - 6

- // Determine stack frame offsets for each Variable without a

- // register assignment. This can be done as one variable per stack

- // slot. Or, do coalescing by running the register allocator again

- // with an infinite set of registers (as a side effect, this gives

- // variables a second chance at physical register assignment).

+ // Determine stack frame offsets for each Variable without a register

+ // assignment. This can be done as one variable per stack slot. Or, do

+ // coalescing by running the register allocator again with an infinite set of

+ // registers (as a side effect, this gives variables a second chance at

+ // physical register assignment).

- // A middle ground approach is to leverage sparsity and allocate one

- // block of space on the frame for globals (variables with

- // multi-block lifetime), and one block to share for locals

- // (single-block lifetime).

+ // A middle ground approach is to leverage sparsity and allocate one block of

+ // space on the frame for globals (variables with multi-block lifetime), and

+ // one block to share for locals (single-block lifetime).

Context.init(Node);

Context.setInsertPoint(Context.getCur());

@@ -661,14 +653,13 @@ void TargetARM32::addProlog(CfgNode *Node) {

RegsUsed = llvm::SmallBitVector(CalleeSaves.size());

VarList SortedSpilledVariables;

size_t GlobalsSize = 0;

- // If there is a separate locals area, this represents that area.

- // Otherwise it counts any variable not counted by GlobalsSize.

+ // If there is a separate locals area, this represents that area. Otherwise

+ // it counts any variable not counted by GlobalsSize.

SpillAreaSizeBytes = 0;

- // If there is a separate locals area, this specifies the alignment

- // for it.

+ // If there is a separate locals area, this specifies the alignment for it.

uint32_t LocalsSlotsAlignmentBytes = 0;

- // The entire spill locations area gets aligned to largest natural

- // alignment of the variables that have a spill slot.

+ // The entire spill locations area gets aligned to largest natural alignment

+ // of the variables that have a spill slot.

uint32_t SpillAreaAlignmentBytes = 0;

// For now, we don't have target-specific variables that need special

// treatment (no stack-slot-linked SpillVariable type).

@@ -682,12 +673,11 @@ void TargetARM32::addProlog(CfgNode *Node) {

uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;

SpillAreaSizeBytes += GlobalsSize;

- // Add push instructions for preserved registers.

- // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).

- // Unlike x86, ARM also has callee-saved float/vector registers.

- // The "vpush" instruction can handle a whole list of float/vector

- // registers, but it only handles contiguous sequences of registers

- // by specifying the start and the length.

+ // Add push instructions for preserved registers. On ARM, "push" can push a

+ // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has

+ // callee-saved float/vector registers. The "vpush" instruction can handle a

+ // whole list of float/vector registers, but it only handles contiguous

+ // sequences of registers by specifying the start and the length.

VarList GPRsToPreserve;

GPRsToPreserve.reserve(CalleeSaves.size());

uint32_t NumCallee = 0;

@@ -704,8 +694,8 @@ void TargetARM32::addProlog(CfgNode *Node) {

}

for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

if (CalleeSaves[i] && RegsUsed[i]) {

- // TODO(jvoung): do separate vpush for each floating point

- // register segment and += 4, or 8 depending on type.

+ // TODO(jvoung): do separate vpush for each floating point register

+ // segment and += 4, or 8 depending on type.

++NumCallee;

PreservedRegsSizeBytes += 4;

GPRsToPreserve.push_back(getPhysicalRegister(i));

@@ -724,10 +714,10 @@ void TargetARM32::addProlog(CfgNode *Node) {

Context.insert(InstFakeUse::create(Func, FP));

}

- // Align the variables area. SpillAreaPaddingBytes is the size of

- // the region after the preserved registers and before the spill areas.

- // LocalsSlotsPaddingBytes is the amount of padding between the globals

- // and locals area if they are separate.

+ // Align the variables area. SpillAreaPaddingBytes is the size of the region

+ // after the preserved registers and before the spill areas.

+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and

+ // locals area if they are separate.

assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);

assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);

uint32_t SpillAreaPaddingBytes = 0;

@@ -758,9 +748,9 @@ void TargetARM32::addProlog(CfgNode *Node) {

resetStackAdjustment();

- // Fill in stack offsets for stack args, and copy args into registers

- // for those that were register-allocated. Args are pushed right to

- // left, so Arg[0] is closest to the stack/frame pointer.

+ // Fill in stack offsets for stack args, and copy args into registers for

+ // those that were register-allocated. Args are pushed right to left, so

+ // Arg[0] is closest to the stack/frame pointer.

Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());

size_t BasicFrameOffset = PreservedRegsSizeBytes;

if (!UsesFramePointer)

@@ -830,8 +820,8 @@ void TargetARM32::addEpilog(CfgNode *Node) {

if (RI == E)

return;

- // Convert the reverse_iterator position into its corresponding

- // (forward) iterator position.

+ // Convert the reverse_iterator position into its corresponding (forward)

+ // iterator position.

InstList::iterator InsertPoint = RI.base();

--InsertPoint;

Context.init(Node);

@@ -840,9 +830,9 @@ void TargetARM32::addEpilog(CfgNode *Node) {

Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

if (UsesFramePointer) {

Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);

- // For late-stage liveness analysis (e.g. asm-verbose mode),

- // adding a fake use of SP before the assignment of SP=FP keeps

- // previous SP adjustments from being dead-code eliminated.

+ // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake

+ // use of SP before the assignment of SP=FP keeps previous SP adjustments

+ // from being dead-code eliminated.

Context.insert(InstFakeUse::create(Func, SP));

_mov(SP, FP);

} else {

@@ -868,8 +858,8 @@ void TargetARM32::addEpilog(CfgNode *Node) {

if (!MaybeLeafFunc) {

CalleeSaves[RegARM32::Reg_lr] = true;

}

- // Pop registers in ascending order just like push

- // (instead of in reverse order).

+ // Pop registers in ascending order just like push (instead of in reverse

+ // order).

for (SizeT i = 0; i < CalleeSaves.size(); ++i) {

if (CalleeSaves[i] && RegsUsed[i]) {

GPRsToRestore.push_back(getPhysicalRegister(i));

@@ -903,17 +893,16 @@ void TargetARM32::addEpilog(CfgNode *Node) {

bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {

constexpr bool SignExt = false;

- // TODO(jvoung): vldr of FP stack slots has a different limit from the

- // plain stackSlotType().

+ // TODO(jvoung): vldr of FP stack slots has a different limit from the plain

+ // stackSlotType().

return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);

}

StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,

Variable *OrigBaseReg) {

int32_t Offset = Var->getStackOffset();

- // Legalize will likely need a movw/movt combination, but if the top

- // bits are all 0 from negating the offset and subtracting, we could

- // use that instead.

+ // Legalize will likely need a movw/movt combination, but if the top bits are

+ // all 0 from negating the offset and subtracting, we could use that instead.

bool ShouldSub = (-Offset & 0xFFFF0000) == 0;

if (ShouldSub)

Offset = -Offset;

@@ -949,15 +938,15 @@ void TargetARM32::legalizeStackSlots() {

return;

Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());

int32_t StackAdjust = 0;

- // Do a fairly naive greedy clustering for now. Pick the first stack slot

+ // Do a fairly naive greedy clustering for now. Pick the first stack slot

// that's out of bounds and make a new base reg using the architecture's temp

- // register. If that works for the next slot, then great. Otherwise, create

- // a new base register, clobbering the previous base register. Never share a

- // base reg across different basic blocks. This isn't ideal if local and

+ // register. If that works for the next slot, then great. Otherwise, create a

+ // new base register, clobbering the previous base register. Never share a

+ // base reg across different basic blocks. This isn't ideal if local and

// multi-block variables are far apart and their references are interspersed.

- // It may help to be more coordinated about assign stack slot numbers

- // and may help to assign smaller offsets to higher-weight variables

- // so that they don't depend on this legalization.

+ // It may help to be more coordinated about assign stack slot numbers and may

+ // help to assign smaller offsets to higher-weight variables so that they

+ // don't depend on this legalization.

for (CfgNode *Node : Func->getNodes()) {

Context.init(Node);

StackVariable *NewBaseReg = nullptr;

@@ -986,7 +975,7 @@ void TargetARM32::legalizeStackSlots() {

continue;

}

- // For now, only Mov instructions can have stack variables. We need to

+ // For now, only Mov instructions can have stack variables. We need to

// know the type of instruction because we currently create a fresh one

// to replace Dest/Source, rather than mutate in place.

auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);

@@ -1117,15 +1106,15 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {

static_cast<uint32_t>(Const->getValue() >> 32));

}

if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {

- // Conservatively disallow memory operands with side-effects

- // in case of duplication.

+ // Conservatively disallow memory operands with side-effects in case of

+ // duplication.

assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||

Mem->getAddrMode() == OperandARM32Mem::NegOffset);

const Type SplitType = IceType_i32;

if (Mem->isRegReg()) {

// We have to make a temp variable T, and add 4 to either Base or Index.

- // The Index may be shifted, so adding 4 can mean something else.

- // Thus, prefer T := Base + 4, and use T as the new Base.

+ // The Index may be shifted, so adding 4 can mean something else. Thus,

+ // prefer T := Base + 4, and use T as the new Base.

Variable *Base = Mem->getBase();

Constant *Four = Ctx->getConstantInt32(4);

Variable *NewBase = Func->makeVariable(Base->getType());

@@ -1144,8 +1133,8 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {

// We have to make a temp variable and add 4 to either Base or Offset.

// If we add 4 to Offset, this will convert a non-RegReg addressing

// mode into a RegReg addressing mode. Since NaCl sandboxing disallows

- // RegReg addressing modes, prefer adding to base and replacing instead.

- // Thus we leave the old offset alone.

+ // RegReg addressing modes, prefer adding to base and replacing

+ // instead. Thus we leave the old offset alone.

Constant *Four = Ctx->getConstantInt32(4);

Variable *NewBase = Func->makeVariable(Base->getType());

lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,

@@ -1195,11 +1184,11 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,

void TargetARM32::lowerAlloca(const InstAlloca *Inst) {

UsesFramePointer = true;

- // Conservatively require the stack to be aligned. Some stack

- // adjustment operations implemented below assume that the stack is

- // aligned before the alloca. All the alloca code ensures that the

- // stack alignment is preserved after the alloca. The stack alignment

- // restriction can be relaxed in some cases.

+ // Conservatively require the stack to be aligned. Some stack adjustment

+ // operations implemented below assume that the stack is aligned before the

+ // alloca. All the alloca code ensures that the stack alignment is preserved

+ // after the alloca. The stack alignment restriction can be relaxed in some

+ // cases.

NeedsStackAlignment = true;

// TODO(stichnot): minimize the number of adjustments of SP, etc.

@@ -1226,8 +1215,8 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {

Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));

_sub(SP, SP, SubAmount);

} else {

- // Non-constant sizes need to be adjusted to the next highest

- // multiple of the required alignment at runtime.

+ // Non-constant sizes need to be adjusted to the next highest multiple of

+ // the required alignment at runtime.

TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);

Variable *T = makeReg(IceType_i32);

_mov(T, TotalSize);

@@ -1265,8 +1254,8 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {

case IceType_i64: {

Variable *ScratchReg = makeReg(IceType_i32);

_orrs(ScratchReg, SrcLoReg, SrcHi);

- // ScratchReg isn't going to be used, but we need the

- // side-effect of setting flags from this operation.

+ // ScratchReg isn't going to be used, but we need the side-effect of

+ // setting flags from this operation.

Context.insert(InstFakeUse::create(Func, ScratchReg));

}

@@ -1310,21 +1299,21 @@ void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,

void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

Variable *Dest = Inst->getDest();

- // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier

- // to legalize Src0 to flex or Src1 to flex and there is a reversible

- // instruction. E.g., reverse subtract with immediate, register vs

- // register, immediate.

- // Or it may be the case that the operands aren't swapped, but the

- // bits can be flipped and a different operation applied.

- // E.g., use BIC (bit clear) instead of AND for some masks.

+ // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to

+ // legalize Src0 to flex or Src1 to flex and there is a reversible

+ // instruction. E.g., reverse subtract with immediate, register vs register,

+ // immediate.

+ // Or it may be the case that the operands aren't swapped, but the bits can

+ // be flipped and a different operation applied. E.g., use BIC (bit clear)

+ // instead of AND for some masks.

Operand *Src0 = legalizeUndef(Inst->getSrc(0));

Operand *Src1 = legalizeUndef(Inst->getSrc(1));

if (Dest->getType() == IceType_i64) {

- // These helper-call-involved instructions are lowered in this

- // separate switch. This is because we would otherwise assume that

- // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused

- // with helper calls, and such unused/redundant instructions will fail

- // liveness analysis under -Om1 setting.

+ // These helper-call-involved instructions are lowered in this separate

+ // switch. This is because we would otherwise assume that we need to

+ // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with

+ // helper calls, and such unused/redundant instructions will fail liveness

+ // analysis under -Om1 setting.

switch (Inst->getOp()) {

default:

break;

@@ -1332,11 +1321,10 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

case InstArithmetic::Sdiv:

case InstArithmetic::Urem:

case InstArithmetic::Srem: {

- // Check for divide by 0 (ARM normally doesn't trap, but we want it

- // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized

- // to a register, which will hide a constant source operand.

- // Instead, check the not-yet-legalized Src1 to optimize-out a divide

- // by 0 check.

+ // Check for divide by 0 (ARM normally doesn't trap, but we want it to

+ // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a

+ // register, which will hide a constant source operand. Instead, check

+ // the not-yet-legalized Src1 to optimize-out a divide by 0 check.

if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {

if (C64->getValue() == 0) {

_trap();

@@ -1348,8 +1336,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

div0Check(IceType_i64, Src1Lo, Src1Hi);

}

// Technically, ARM has their own aeabi routines, but we can use the

- // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,

- // but uses the more standard __moddi3 for rem.

+ // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses

+ // the more standard __moddi3 for rem.

const char *HelperName = "";

switch (Inst->getOp()) {

default:

@@ -1472,12 +1460,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

// lsl t_lo, b.lo, c.lo

// a.lo = t_lo

// a.hi = t_hi

- // Can be strength-reduced for constant-shifts, but we don't do

- // that for now.

- // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.

- // On ARM, shifts only take the lower 8 bits of the shift register,

- // and saturate to the range 0-32, so the negative value will

- // saturate to 32.

+ // Can be strength-reduced for constant-shifts, but we don't do that for

+ // now.

+ // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On

+ // ARM, shifts only take the lower 8 bits of the shift register, and

+ // saturate to the range 0-32, so the negative value will saturate to 32.

Variable *T_Hi = makeReg(IceType_i32);

Variable *Src1RLo = legalizeToReg(Src1Lo);

Constant *ThirtyTwo = Ctx->getConstantInt32(32);

@@ -1493,8 +1480,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

_mov(DestHi, T_Hi);

Variable *T_Lo = makeReg(IceType_i32);

// _mov seems to sometimes have better register preferencing than lsl.

- // Otherwise mov w/ lsl shifted register is a pseudo-instruction

- // that maps to lsl.

+ // Otherwise mov w/ lsl shifted register is a pseudo-instruction that

+ // maps to lsl.

_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,

OperandARM32::LSL, Src1RLo));

_mov(DestLo, T_Lo);

@@ -1513,9 +1500,9 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {

// a.hi = t_hi

case InstArithmetic::Ashr: {

// a=b>>c (signed) ==> ...

- // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,

- // and the next orr should be conditioned on PLUS. The last two

- // right shifts should also be arithmetic.

+ // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the

+ // next orr should be conditioned on PLUS. The last two right shifts

+ // should also be arithmetic.

bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;

Variable *T_Lo = makeReg(IceType_i32);

Variable *Src1RLo = legalizeToReg(Src1Lo);

@@ -1723,13 +1710,13 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {

Operand *NewSrc;

if (Dest->hasReg()) {

// If Dest already has a physical register, then legalize the Src operand

- // into a Variable with the same register assignment. This especially

+ // into a Variable with the same register assignment. This especially

// helps allow the use of Flex operands.

NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());

} else {

- // Dest could be a stack operand. Since we could potentially need

- // to do a Store (and store can only have Register operands),

- // legalize this to a register.

+ // Dest could be a stack operand. Since we could potentially need to do a

+ // Store (and store can only have Register operands), legalize this to a

+ // register.

NewSrc = legalize(Src0, Legal_Reg);

}

if (isVectorType(Dest->getType())) {

@@ -1810,25 +1797,24 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

}

- // Adjust the parameter area so that the stack is aligned. It is

- // assumed that the stack is already aligned at the start of the

- // calling sequence.

+ // Adjust the parameter area so that the stack is aligned. It is assumed that

+ // the stack is already aligned at the start of the calling sequence.

ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);

- // Subtract the appropriate amount for the argument area. This also

- // takes care of setting the stack adjustment during emission.

+ // Subtract the appropriate amount for the argument area. This also takes

+ // care of setting the stack adjustment during emission.

- // TODO: If for some reason the call instruction gets dead-code

- // eliminated after lowering, we would need to ensure that the

- // pre-call and the post-call esp adjustment get eliminated as well.

+ // TODO: If for some reason the call instruction gets dead-code eliminated

+ // after lowering, we would need to ensure that the pre-call and the

+ // post-call esp adjustment get eliminated as well.

if (ParameterAreaSizeBytes) {

Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),

Legal_Reg | Legal_Flex);

_adjust_stack(ParameterAreaSizeBytes, SubAmount);

}

- // Copy arguments that are passed on the stack to the appropriate

- // stack locations.

+ // Copy arguments that are passed on the stack to the appropriate stack

+ // locations.

Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

for (auto &StackArg : StackArgs) {

ConstantInteger32 *Loc =

@@ -1850,9 +1836,9 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

// Copy arguments to be passed in registers to the appropriate registers.

for (auto &GPRArg : GPRArgs) {

Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);

- // Generate a FakeUse of register arguments so that they do not get

- // dead code eliminated as a result of the FakeKill of scratch

- // registers after the call.

+ // Generate a FakeUse of register arguments so that they do not get dead

+ // code eliminated as a result of the FakeKill of scratch registers after

+ // the call.

Context.insert(InstFakeUse::create(Func, Reg));

}

for (auto &FPArg : FPArgs) {

@@ -1860,8 +1846,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

Context.insert(InstFakeUse::create(Func, Reg));

}

- // Generate the call instruction. Assign its result to a temporary

- // with high register allocation weight.

+ // Generate the call instruction. Assign its result to a temporary with high

+ // register allocation weight.

Variable *Dest = Instr->getDest();

// ReturnReg doubles as ReturnRegLo as necessary.

Variable *ReturnReg = nullptr;

@@ -1901,12 +1887,12 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

}

Operand *CallTarget = Instr->getCallTarget();

- // TODO(jvoung): Handle sandboxing.

- // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();

+ // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =

+ // Ctx->getFlags().getUseSandboxing();

- // Allow ConstantRelocatable to be left alone as a direct call,

- // but force other constants like ConstantInteger32 to be in

- // a register and make it an indirect call.

+ // Allow ConstantRelocatable to be left alone as a direct call, but force

+ // other constants like ConstantInteger32 to be in a register and make it an

+ // indirect call.

if (!llvm::isa<ConstantRelocatable>(CallTarget)) {

CallTarget = legalize(CallTarget, Legal_Reg);

}

@@ -1915,8 +1901,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {

if (ReturnRegHi)

Context.insert(InstFakeDef::create(Func, ReturnRegHi));

- // Add the appropriate offset to SP. The call instruction takes care

- // of resetting the stack offset during emission.

+ // Add the appropriate offset to SP. The call instruction takes care of

+ // resetting the stack offset during emission.

if (ParameterAreaSizeBytes) {

Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),

Legal_Reg | Legal_Flex);

@@ -2024,8 +2010,8 @@ void TargetARM32::lowerCast(const InstCast *Inst) {

Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));

Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

Variable *T_Lo = makeReg(DestLo->getType());

- // i32 and i1 can just take up the whole register.

- // i32 doesn't need uxt, while i1 will have an and mask later anyway.

+ // i32 and i1 can just take up the whole register. i32 doesn't need uxt,

+ // while i1 will have an and mask later anyway.

if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {

Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);

_mov(T_Lo, Src0RF);

@@ -2046,9 +2032,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) {

Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);

Constant *One = Ctx->getConstantInt32(1);

Variable *T = makeReg(Dest->getType());

- // Just use _mov instead of _uxt since all registers are 32-bit.

- // _uxt requires the source to be a register so could have required

- // a _mov from legalize anyway.

+ // Just use _mov instead of _uxt since all registers are 32-bit. _uxt

+ // requires the source to be a register so could have required a _mov

+ // from legalize anyway.

_mov(T, Src0RF);

_and(T, T, One);

_mov(Dest, T);

@@ -2212,8 +2198,8 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

// mov.<C2> t, #0 mov.<C2> t, #0

// mov a, t mov a, t

// where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"

- // is used for signed compares. In some cases, b and c need to be swapped

- // as well.

+ // is used for signed compares. In some cases, b and c need to be swapped as

+ // well.

// LLVM does:

// for EQ and NE:

@@ -2223,13 +2209,12 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

// mov.<C> t, #1

// mov a, t

- // that's nice in that it's just as short but has fewer dependencies

- // for better ILP at the cost of more registers.

+ // that's nice in that it's just as short but has fewer dependencies for

+ // better ILP at the cost of more registers.

- // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with

- // two unconditional mov #0, two cmps, two conditional mov #1,

- // and one conditonal reg mov. That has few dependencies for good ILP,

- // but is a longer sequence.

+ // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two

+ // unconditional mov #0, two cmps, two conditional mov #1, and one conditonal

Jim Stichnoth 2015/09/16 00:01:29 conditional

ascull 2015/09/16 18:30:09 Done.

+ // reg mov. That has few dependencies for good ILP, but is a longer sequence.

// So, we are going with the GCC version since it's usually better (except

// perhaps for eq/ne). We could revisit special-casing eq/ne later.

@@ -2257,8 +2242,8 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

Variable *ScratchReg = makeReg(IceType_i32);

_cmp(Src0Lo, Src1LoRF);

_sbcs(ScratchReg, Src0Hi, Src1HiRF);

- // ScratchReg isn't going to be used, but we need the

- // side-effect of setting flags from this operation.

+ // ScratchReg isn't going to be used, but we need the side-effect of

+ // setting flags from this operation.

Context.insert(InstFakeUse::create(Func, ScratchReg));

} else {

_cmp(Src0Hi, Src1HiRF);

@@ -2278,8 +2263,8 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

// mov.C1 t, #0

// mov.C2 t, #1

// mov a, t

- // where the unsigned/sign extension is not needed for 32-bit.

- // They also have special cases for EQ and NE. E.g., for NE:

+ // where the unsigned/sign extension is not needed for 32-bit. They also have

+ // special cases for EQ and NE. E.g., for NE:

// <extend to tb, tc>

// subs t, tb, tc

// movne t, #1

@@ -2292,13 +2277,13 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {

// mov.<C> t, #1

// mov a, t

- // the left shift is by 0, 16, or 24, which allows the comparison to focus

- // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).

- // For the unsigned case, for some reason it does similar to GCC and does

- // a uxtb first. It's not clear to me why that special-casing is needed.

+ // the left shift is by 0, 16, or 24, which allows the comparison to focus on

+ // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For

+ // the unsigned case, for some reason it does similar to GCC and does a uxtb

+ // first. It's not clear to me why that special-casing is needed.

- // We'll go with the LLVM way for now, since it's shorter and has just as

- // few dependencies.

+ // We'll go with the LLVM way for now, since it's shorter and has just as few

+ // dependencies.

int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());

assert(ShiftAmt >= 0);

Constant *ShiftConst = nullptr;

@@ -2341,9 +2326,9 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

UnimplementedError(Func->getContext()->getFlags());

return;

case Intrinsics::AtomicFenceAll:

- // NOTE: FenceAll should prevent and load/store from being moved

- // across the fence (both atomic and non-atomic). The InstARM32Mfence

- // instruction is currently marked coarsely as "HasSideEffects".

+ // NOTE: FenceAll should prevent and load/store from being moved across the

+ // fence (both atomic and non-atomic). The InstARM32Mfence instruction is

+ // currently marked coarsely as "HasSideEffects".

UnimplementedError(Func->getContext()->getFlags());

return;

case Intrinsics::AtomicIsLockFree: {

@@ -2401,10 +2386,10 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

Call->addArg(Val);

lowerCall(Call);

// The popcount helpers always return 32-bit values, while the intrinsic's

- // signature matches some 64-bit platform's native instructions and

- // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest

- // just in case the user doesn't do that in the IR or doesn't toss the bits

- // via truncate.

+ // signature matches some 64-bit platform's native instructions and expect

+ // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in

+ // case the user doesn't do that in the IR or doesn't toss the bits via

+ // truncate.

if (Val->getType() == IceType_i64) {

Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));

Constant *Zero = Ctx->getConstantZero(IceType_i32);

@@ -2415,8 +2400,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {

return;

}

case Intrinsics::Ctlz: {

- // The "is zero undef" parameter is ignored and we always return

- // a well-defined value.

+ // The "is zero undef" parameter is ignored and we always return a

+ // well-defined value.

Operand *Val = Instr->getArg(0);

Variable *ValLoR;

Variable *ValHiR = nullptr;

@@ -2563,9 +2548,9 @@ void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {

Variable *T2 = makeReg(IceType_i32);

_add(T2, T, ThirtyTwo);

_clz(T2, ValHiR, CondARM32::NE);

- // T2 is actually a source as well when the predicate is not AL

- // (since it may leave T2 alone). We use set_dest_nonkillable to

- // prolong the liveness of T2 as if it was used as a source.

+ // T2 is actually a source as well when the predicate is not AL (since it

+ // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness

+ // of T2 as if it was used as a source.

_set_dest_nonkillable();

_mov(DestLo, T2);

Variable *T3 = nullptr;

@@ -2578,15 +2563,14 @@ void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {

}

void TargetARM32::lowerLoad(const InstLoad *Load) {

- // A Load instruction can be treated the same as an Assign

- // instruction, after the source operand is transformed into an

- // OperandARM32Mem operand.

+ // A Load instruction can be treated the same as an Assign instruction, after

+ // the source operand is transformed into an OperandARM32Mem operand.

Type Ty = Load->getDest()->getType();

Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);

Variable *DestLoad = Load->getDest();

- // TODO(jvoung): handled folding opportunities. Sign and zero extension

- // can be folded into a load.

+ // TODO(jvoung): handled folding opportunities. Sign and zero extension can

+ // be folded into a load.

InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);

lowerAssign(Assign);

}

@@ -2632,17 +2616,15 @@ void TargetARM32::lowerRet(const InstRet *Inst) {

_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);

}

- // Add a ret instruction even if sandboxing is enabled, because

- // addEpilog explicitly looks for a ret instruction as a marker for

- // where to insert the frame removal instructions.

- // addEpilog is responsible for restoring the "lr" register as needed

- // prior to this ret instruction.

+ // Add a ret instruction even if sandboxing is enabled, because addEpilog

+ // explicitly looks for a ret instruction as a marker for where to insert the

+ // frame removal instructions. addEpilog is responsible for restoring the

+ // "lr" register as needed prior to this ret instruction.

_ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);

- // Add a fake use of sp to make sure sp stays alive for the entire

- // function. Otherwise post-call sp adjustments get dead-code

- // eliminated. TODO: Are there more places where the fake use

- // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not

- // have a ret instruction.

+ // Add a fake use of sp to make sure sp stays alive for the entire function.

+ // Otherwise post-call sp adjustments get dead-code eliminated.

+ // TODO: Are there more places where the fake use should be inserted? E.g.

+ // "void f(int n){while(1) g(n);}" may not have a ret instruction.

Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);

Context.insert(InstFakeUse::create(Func, SP));

}

@@ -2776,8 +2758,8 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {

if (isVectorType(Ty) || isFloatingType(Ty)) {

_vmov(Reg, Src);

} else {

- // Mov's Src operand can really only be the flexible second operand type

- // or a register. Users should guarantee that.

+ // Mov's Src operand can really only be the flexible second operand type or

+ // a register. Users should guarantee that.

_mov(Reg, Src);

}

return Reg;

@@ -2786,18 +2768,17 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {

Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

int32_t RegNum) {

Type Ty = From->getType();

- // Assert that a physical register is allowed. To date, all calls

- // to legalize() allow a physical register. Legal_Flex converts

- // registers to the right type OperandARM32FlexReg as needed.

+ // Assert that a physical register is allowed. To date, all calls to

+ // legalize() allow a physical register. Legal_Flex converts registers to the

+ // right type OperandARM32FlexReg as needed.

assert(Allowed & Legal_Reg);

- // Go through the various types of operands:

- // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.

- // Given the above assertion, if type of operand is not legal

- // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy

- // to a register.

+ // Go through the various types of operands: OperandARM32Mem,

+ // OperandARM32Flex, Constant, and Variable. Given the above assertion, if

+ // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we

+ // can always copy to a register.

if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {

- // Before doing anything with a Mem operand, we need to ensure

- // that the Base and Index components are in physical registers.

+ // Before doing anything with a Mem operand, we need to ensure that the

+ // Base and Index components are in physical registers.

Variable *Base = Mem->getBase();

Variable *Index = Mem->getIndex();

Variable *RegBase = nullptr;

@@ -2842,8 +2823,8 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {

if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {

From = FlexReg->getReg();

- // Fall through and let From be checked as a Variable below,

- // where it may or may not need a register.

+ // Fall through and let From be checked as a Variable below, where it

+ // may or may not need a register.

} else {

return copyToReg(Flex, RegNum);

}

@@ -2868,10 +2849,10 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

uint32_t RotateAmt;

uint32_t Immed_8;

uint32_t Value = static_cast<uint32_t>(C32->getValue());

- // Check if the immediate will fit in a Flexible second operand,

- // if a Flexible second operand is allowed. We need to know the exact

- // value, so that rules out relocatable constants.

- // Also try the inverse and use MVN if possible.

+ // Check if the immediate will fit in a Flexible second operand, if a

+ // Flexible second operand is allowed. We need to know the exact value,

+ // so that rules out relocatable constants. Also try the inverse and use

+ // MVN if possible.

if (CanBeFlex &&

OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {

return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);

@@ -2901,12 +2882,12 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

} else {

assert(isScalarFloatingType(Ty));

// Load floats/doubles from literal pool.

- // TODO(jvoung): Allow certain immediates to be encoded directly in

- // an operand. See Table A7-18 of the ARM manual:

- // "Floating-point modified immediate constants".

- // Or, for 32-bit floating point numbers, just encode the raw bits

- // into a movw/movt pair to GPR, and vmov to an SREG, instead of using

- // a movw/movt pair to get the const-pool address then loading to SREG.

+ // TODO(jvoung): Allow certain immediates to be encoded directly in an

+ // operand. See Table A7-18 of the ARM manual: "Floating-point modified

+ // immediate constants". Or, for 32-bit floating point numbers, just

+ // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,

+ // instead of using a movw/movt pair to get the const-pool address then

+ // loading to SREG.

std::string Buffer;

llvm::raw_string_ostream StrBuf(Buffer);

llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);

@@ -2921,9 +2902,9 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,

}

if (auto Var = llvm::dyn_cast<Variable>(From)) {

- // Check if the variable is guaranteed a physical register. This

- // can happen either when the variable is pre-colored or when it is

- // assigned infinite weight.

+ // Check if the variable is guaranteed a physical register. This can happen

+ // either when the variable is pre-colored or when it is assigned infinite

+ // weight.

bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());

// We need a new physical register for the operand if:

// Mem is not allowed and Var isn't guaranteed a physical

@@ -2949,17 +2930,16 @@ Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) {

Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {

Type Ty = From->getType();

if (llvm::isa<ConstantUndef>(From)) {

- // Lower undefs to zero. Another option is to lower undefs to an

- // uninitialized register; however, using an uninitialized register

- // results in less predictable code.

+ // Lower undefs to zero. Another option is to lower undefs to an

+ // uninitialized register; however, using an uninitialized register results

+ // in less predictable code.

- // If in the future the implementation is changed to lower undef

- // values to uninitialized registers, a FakeDef will be needed:

- // Context.insert(InstFakeDef::create(Func, Reg));

- // This is in order to ensure that the live range of Reg is not

- // overestimated. If the constant being lowered is a 64 bit value,

- // then the result should be split and the lo and hi components will

- // need to go in uninitialized registers.

+ // If in the future the implementation is changed to lower undef values to

+ // uninitialized registers, a FakeDef will be needed:

+ // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to

+ // ensure that the live range of Reg is not overestimated. If the constant

+ // being lowered is a 64 bit value, then the result should be split and the

+ // lo and hi components will need to go in uninitialized registers.

if (isVectorType(Ty))

return makeVectorOfZeros(Ty, RegNum);

return Ctx->getConstantZero(Ty);

@@ -2969,15 +2949,15 @@ Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {

OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {

OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);

- // It may be the case that address mode optimization already creates

- // an OperandARM32Mem, so in that case it wouldn't need another level

- // of transformation.

+ // It may be the case that address mode optimization already creates an

+ // OperandARM32Mem, so in that case it wouldn't need another level of

+ // transformation.

if (Mem) {

return llvm::cast<OperandARM32Mem>(legalize(Mem));

}

- // If we didn't do address mode optimization, then we only

- // have a base/offset to work with. ARM always requires a base

- // register, so just use that to hold the operand.

+ // If we didn't do address mode optimization, then we only have a base/offset

+ // to work with. ARM always requires a base register, so just use that to

+ // hold the operand.

Variable *Base = legalizeToReg(Operand);

return OperandARM32Mem::create(

Func, Ty, Base,

@@ -3000,9 +2980,9 @@ void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {

uint32_t RotateAmt;

uint32_t Immed_8;

Operand *Mask;

- // Use AND or BIC to mask off the bits, depending on which immediate fits

- // (if it fits at all). Assume Align is usually small, in which case BIC

- // works better. Thus, this rounds down to the alignment.

+ // Use AND or BIC to mask off the bits, depending on which immediate fits (if

+ // it fits at all). Assume Align is usually small, in which case BIC works

+ // better. Thus, this rounds down to the alignment.

if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {

Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);

_bic(Reg, Reg, Mask);

@@ -3094,17 +3074,18 @@ void TargetHeaderARM32::lower() {

OstreamLocker L(Ctx);

Ostream &Str = Ctx->getStrEmit();

Str << ".syntax unified\n";

- // Emit build attributes in format: .eabi_attribute TAG, VALUE.

- // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"

- // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf

+ // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of

+ // "Addenda to, and Errata in the ABI for the ARM architecture"

+ // http://infocenter.arm.com

+ // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf

- // Tag_conformance should be be emitted first in a file-scope

- // sub-subsection of the first public subsection of the attributes.

+ // Tag_conformance should be be emitted first in a file-scope sub-subsection

+ // of the first public subsection of the attributes.

Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";

- // Chromebooks are at least A15, but do A9 for higher compat.

- // For some reason, the LLVM ARM asm parser has the .cpu directive override

- // the mattr specified on the commandline. So to test hwdiv, we need to set

- // the .cpu directive higher (can't just rely on --mattr=...).

+ // Chromebooks are at least A15, but do A9 for higher compat. For some

+ // reason, the LLVM ARM asm parser has the .cpu directive override the mattr

+ // specified on the commandline. So to test hwdiv, we need to set the .cpu

+ // directive higher (can't just rely on --mattr=...).

if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {

Str << ".cpu cortex-a15\n";

} else {

« src/IceLoopAnalyzer.h ('K') | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | src/IceTargetLoweringX86BaseImpl.h » ('J')