Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Unified Diff: src/IceTargetLoweringARM32.cpp

Issue 1341423002: Reflow comments to use the full width. (Closed) Base URL: https://chromium.googlesource.com/native_client/pnacl-subzero.git@master
Patch Set: Fix spelling and rebase Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringARM32.cpp
diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp
index 0634e452d90e6e340a3b4a54b8a745beba732efe..fef145f3b47dec98fea1db86b5da96d9b5ab4de3 100644
--- a/src/IceTargetLoweringARM32.cpp
+++ b/src/IceTargetLoweringARM32.cpp
@@ -47,7 +47,7 @@ namespace {
} while (0)
// The following table summarizes the logic for lowering the icmp instruction
-// for i32 and narrower types. Each icmp condition has a clear mapping to an
+// for i32 and narrower types. Each icmp condition has a clear mapping to an
// ARM32 conditional move instruction.
const struct TableIcmp32_ {
@@ -62,8 +62,8 @@ const struct TableIcmp32_ {
// The following table summarizes the logic for lowering the icmp instruction
// for the i64 type. Two conditional moves are needed for setting to 1 or 0.
-// The operands may need to be swapped, and there is a slight difference
-// for signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
+// The operands may need to be swapped, and there is a slight difference for
+// signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
const struct TableIcmp64_ {
bool IsSigned;
bool Swapped;
@@ -82,18 +82,16 @@ CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
return TableIcmp32[Index].Mapping;
}
-// In some cases, there are x-macros tables for both high-level and
-// low-level instructions/operands that use the same enum key value.
-// The tables are kept separate to maintain a proper separation
-// between abstraction layers. There is a risk that the tables could
-// get out of sync if enum values are reordered or if entries are
-// added or deleted. The following dummy namespaces use
+// In some cases, there are x-macros tables for both high-level and low-level
+// instructions/operands that use the same enum key value. The tables are kept
+// separate to maintain a proper separation between abstraction layers. There
+// is a risk that the tables could get out of sync if enum values are reordered
+// or if entries are added or deleted. The following dummy namespaces use
// static_asserts to ensure everything is kept in sync.
// Validate the enum values in ICMPARM32_TABLE.
namespace dummy1 {
-// Define a temporary set of enum values based on low-level table
-// entries.
+// Define a temporary set of enum values based on low-level table entries.
enum _tmp_enum {
#define X(val, signed, swapped64, C_32, C1_64, C2_64) _tmp_##val,
ICMPARM32_TABLE
@@ -104,8 +102,8 @@ enum _tmp_enum {
#define X(tag, str) static const int _table1_##tag = InstIcmp::tag;
ICEINSTICMP_TABLE
#undef X
-// Define a set of constants based on low-level table entries, and
-// ensure the table entry keys are consistent.
+// Define a set of constants based on low-level table entries, and ensure the
+// table entry keys are consistent.
#define X(val, signed, swapped64, C_32, C1_64, C2_64) \
static const int _table2_##val = _tmp_##val; \
static_assert( \
@@ -113,8 +111,8 @@ ICEINSTICMP_TABLE
"Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE");
ICMPARM32_TABLE
#undef X
-// Repeat the static asserts with respect to the high-level table
-// entries in case the high-level table has extra entries.
+// Repeat the static asserts with respect to the high-level table entries in
+// case the high-level table has extra entries.
#define X(tag, str) \
static_assert( \
_table1_##tag == _table2_##tag, \
@@ -126,17 +124,17 @@ ICEINSTICMP_TABLE
// Stack alignment
const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
-// Value is in bytes. Return Value adjusted to the next highest multiple
-// of the stack alignment.
+// Value is in bytes. Return Value adjusted to the next highest multiple of the
+// stack alignment.
uint32_t applyStackAlignment(uint32_t Value) {
return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
}
-// Value is in bytes. Return Value adjusted to the next highest multiple
-// of the stack alignment required for the given type.
+// Value is in bytes. Return Value adjusted to the next highest multiple of the
+// stack alignment required for the given type.
uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
- // Use natural alignment, except that normally (non-NaCl) ARM only
- // aligns vectors to 8 bytes.
+ // Use natural alignment, except that normally (non-NaCl) ARM only aligns
+ // vectors to 8 bytes.
// TODO(jvoung): Check this ...
size_t typeAlignInBytes = typeWidthInBytes(Ty);
if (isVectorType(Ty))
@@ -172,9 +170,8 @@ TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
TargetARM32::TargetARM32(Cfg *Func)
: TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {
- // TODO: Don't initialize IntegerRegisters and friends every time.
- // Instead, initialize in some sort of static initializer for the
- // class.
+ // TODO: Don't initialize IntegerRegisters and friends every time. Instead,
+ // initialize in some sort of static initializer for the class.
// Limit this size (or do all bitsets need to be the same width)???
llvm::SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
llvm::SmallBitVector Float32Registers(RegARM32::Reg_NUM);
@@ -243,19 +240,18 @@ void TargetARM32::translateO2() {
// Argument lowering
Func->doArgLowering();
- // Target lowering. This requires liveness analysis for some parts
- // of the lowering decisions, such as compare/branch fusing. If
- // non-lightweight liveness analysis is used, the instructions need
- // to be renumbered first. TODO: This renumbering should only be
- // necessary if we're actually calculating live intervals, which we
- // only do for register allocation.
+ // Target lowering. This requires liveness analysis for some parts of the
+ // lowering decisions, such as compare/branch fusing. If non-lightweight
+ // liveness analysis is used, the instructions need to be renumbered first.
+ // TODO: This renumbering should only be necessary if we're actually
+ // calculating live intervals, which we only do for register allocation.
Func->renumberInstructions();
if (Func->hasError())
return;
- // TODO: It should be sufficient to use the fastest liveness
- // calculation, i.e. livenessLightweight(). However, for some
- // reason that slows down the rest of the translation. Investigate.
+ // TODO: It should be sufficient to use the fastest liveness calculation,
+ // i.e. livenessLightweight(). However, for some reason that slows down the
+ // rest of the translation. Investigate.
Func->liveness(Liveness_Basic);
if (Func->hasError())
return;
@@ -266,19 +262,19 @@ void TargetARM32::translateO2() {
return;
Func->dump("After ARM32 codegen");
- // Register allocation. This requires instruction renumbering and
- // full liveness analysis.
+ // Register allocation. This requires instruction renumbering and full
+ // liveness analysis.
Func->renumberInstructions();
if (Func->hasError())
return;
Func->liveness(Liveness_Intervals);
if (Func->hasError())
return;
- // Validate the live range computations. The expensive validation
- // call is deliberately only made when assertions are enabled.
+ // Validate the live range computations. The expensive validation call is
+ // deliberately only made when assertions are enabled.
assert(Func->validateLiveness());
- // The post-codegen dump is done here, after liveness analysis and
- // associated cleanup, to make the dump cleaner and more useful.
+ // The post-codegen dump is done here, after liveness analysis and associated
+ // cleanup, to make the dump cleaner and more useful.
Func->dump("After initial ARM32 codegen");
Func->getVMetadata()->init(VMK_All);
regAlloc(RAK_Global);
@@ -305,11 +301,10 @@ void TargetARM32::translateO2() {
Func->contractEmptyNodes();
Func->reorderNodes();
- // Branch optimization. This needs to be done just before code
- // emission. In particular, no transformations that insert or
- // reorder CfgNodes should be done after branch optimization. We go
- // ahead and do it before nop insertion to reduce the amount of work
- // needed for searching for opportunities.
+ // Branch optimization. This needs to be done just before code emission. In
+ // particular, no transformations that insert or reorder CfgNodes should be
+ // done after branch optimization. We go ahead and do it before nop insertion
+ // to reduce the amount of work needed for searching for opportunities.
Func->doBranchOpt();
Func->dump("After branch optimization");
@@ -395,8 +390,8 @@ Variable *TargetARM32::getPhysicalRegister(SizeT RegNum, Type Ty) {
Reg = Func->makeVariable(Ty);
Reg->setRegNum(RegNum);
PhysicalRegisters[Ty][RegNum] = Reg;
- // Specially mark SP and LR as an "argument" so that it is considered
- // live upon function entry.
+ // Specially mark SP and LR as an "argument" so that it is considered live
+ // upon function entry.
if (RegNum == RegARM32::Reg_sp || RegNum == RegARM32::Reg_lr) {
Func->addImplicitArg(Reg);
Reg->setIgnoreLiveness();
@@ -445,15 +440,15 @@ bool TargetARM32::CallingConv::I64InRegs(std::pair<int32_t, int32_t> *Regs) {
if (NumGPRRegsUsed >= ARM32_MAX_GPR_ARG)
return false;
int32_t RegLo, RegHi;
- // Always start i64 registers at an even register, so this may end
- // up padding away a register.
+ // Always start i64 registers at an even register, so this may end up padding
+ // away a register.
NumGPRRegsUsed = Utils::applyAlignment(NumGPRRegsUsed, 2);
RegLo = RegARM32::Reg_r0 + NumGPRRegsUsed;
++NumGPRRegsUsed;
RegHi = RegARM32::Reg_r0 + NumGPRRegsUsed;
++NumGPRRegsUsed;
- // If this bumps us past the boundary, don't allocate to a register
- // and leave any previously speculatively consumed registers as consumed.
+ // If this bumps us past the boundary, don't allocate to a register and leave
+ // any previously speculatively consumed registers as consumed.
if (NumGPRRegsUsed > ARM32_MAX_GPR_ARG)
return false;
Regs->first = RegLo;
@@ -474,15 +469,15 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
return false;
if (isVectorType(Ty)) {
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 4);
- // Q registers are declared in reverse order, so
- // RegARM32::Reg_q0 > RegARM32::Reg_q1. Therefore, we need to subtract
- // NumFPRegUnits from Reg_q0. Same thing goes for D registers.
+ // Q registers are declared in reverse order, so RegARM32::Reg_q0 >
+ // RegARM32::Reg_q1. Therefore, we need to subtract NumFPRegUnits from
+ // Reg_q0. Same thing goes for D registers.
static_assert(RegARM32::Reg_q0 > RegARM32::Reg_q1,
"ARM32 Q registers are possibly declared incorrectly.");
*Reg = RegARM32::Reg_q0 - (NumFPRegUnits / 4);
NumFPRegUnits += 4;
- // If this bumps us past the boundary, don't allocate to a register
- // and leave any previously speculatively consumed registers as consumed.
+ // If this bumps us past the boundary, don't allocate to a register and
+ // leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
} else if (Ty == IceType_f64) {
@@ -491,8 +486,8 @@ bool TargetARM32::CallingConv::FPInReg(Type Ty, int32_t *Reg) {
NumFPRegUnits = Utils::applyAlignment(NumFPRegUnits, 2);
*Reg = RegARM32::Reg_d0 - (NumFPRegUnits / 2);
NumFPRegUnits += 2;
- // If this bumps us past the boundary, don't allocate to a register
- // and leave any previously speculatively consumed registers as consumed.
+ // If this bumps us past the boundary, don't allocate to a register and
+ // leave any previously speculatively consumed registers as consumed.
if (NumFPRegUnits > ARM32_MAX_FP_REG_UNITS)
return false;
} else {
@@ -509,9 +504,9 @@ void TargetARM32::lowerArguments() {
VarList &Args = Func->getArgs();
TargetARM32::CallingConv CC;
- // For each register argument, replace Arg in the argument list with the
- // home register. Then generate an instruction in the prolog to copy the
- // home register to the assigned location of Arg.
+ // For each register argument, replace Arg in the argument list with the home
+ // register. Then generate an instruction in the prolog to copy the home
+ // register to the assigned location of Arg.
Context.init(Func->getEntryNode());
Context.setInsertPoint(Context.getCur());
@@ -568,13 +563,12 @@ void TargetARM32::lowerArguments() {
// Helper function for addProlog().
//
-// This assumes Arg is an argument passed on the stack. This sets the
-// frame offset for Arg and updates InArgsSizeBytes according to Arg's
-// width. For an I64 arg that has been split into Lo and Hi components,
-// it calls itself recursively on the components, taking care to handle
-// Lo first because of the little-endian architecture. Lastly, this
-// function generates an instruction to copy Arg into its assigned
-// register if applicable.
+// This assumes Arg is an argument passed on the stack. This sets the frame
+// offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
+// I64 arg that has been split into Lo and Hi components, it calls itself
+// recursively on the components, taking care to handle Lo first because of the
+// little-endian architecture. Lastly, this function generates an instruction
+// to copy Arg into its assigned register if applicable.
void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
size_t BasicFrameOffset,
size_t &InArgsSizeBytes) {
@@ -591,8 +585,8 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
InArgsSizeBytes = applyStackAlignmentTy(InArgsSizeBytes, Ty);
Arg->setStackOffset(BasicFrameOffset + InArgsSizeBytes);
InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
- // If the argument variable has been assigned a register, we need to load
- // the value from the stack slot.
+ // If the argument variable has been assigned a register, we need to load the
+ // value from the stack slot.
if (Arg->hasReg()) {
assert(Ty != IceType_i64);
OperandARM32Mem *Mem = OperandARM32Mem::create(
@@ -606,10 +600,9 @@ void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
} else {
_ldr(Arg, Mem);
}
- // This argument-copying instruction uses an explicit
- // OperandARM32Mem operand instead of a Variable, so its
- // fill-from-stack operation has to be tracked separately for
- // statistics.
+ // This argument-copying instruction uses an explicit OperandARM32Mem
+ // operand instead of a Variable, so its fill-from-stack operation has to
+ // be tracked separately for statistics.
Ctx->statsUpdateFills();
}
}
@@ -642,16 +635,15 @@ void TargetARM32::addProlog(CfgNode *Node) {
// * GlobalsAndSubsequentPaddingSize: areas 3 - 4
// * LocalsSpillAreaSize: area 5
// * SpillAreaSizeBytes: areas 2 - 6
- // Determine stack frame offsets for each Variable without a
- // register assignment. This can be done as one variable per stack
- // slot. Or, do coalescing by running the register allocator again
- // with an infinite set of registers (as a side effect, this gives
- // variables a second chance at physical register assignment).
+ // Determine stack frame offsets for each Variable without a register
+ // assignment. This can be done as one variable per stack slot. Or, do
+ // coalescing by running the register allocator again with an infinite set of
+ // registers (as a side effect, this gives variables a second chance at
+ // physical register assignment).
//
- // A middle ground approach is to leverage sparsity and allocate one
- // block of space on the frame for globals (variables with
- // multi-block lifetime), and one block to share for locals
- // (single-block lifetime).
+ // A middle ground approach is to leverage sparsity and allocate one block of
+ // space on the frame for globals (variables with multi-block lifetime), and
+ // one block to share for locals (single-block lifetime).
Context.init(Node);
Context.setInsertPoint(Context.getCur());
@@ -661,14 +653,13 @@ void TargetARM32::addProlog(CfgNode *Node) {
RegsUsed = llvm::SmallBitVector(CalleeSaves.size());
VarList SortedSpilledVariables;
size_t GlobalsSize = 0;
- // If there is a separate locals area, this represents that area.
- // Otherwise it counts any variable not counted by GlobalsSize.
+ // If there is a separate locals area, this represents that area. Otherwise
+ // it counts any variable not counted by GlobalsSize.
SpillAreaSizeBytes = 0;
- // If there is a separate locals area, this specifies the alignment
- // for it.
+ // If there is a separate locals area, this specifies the alignment for it.
uint32_t LocalsSlotsAlignmentBytes = 0;
- // The entire spill locations area gets aligned to largest natural
- // alignment of the variables that have a spill slot.
+ // The entire spill locations area gets aligned to largest natural alignment
+ // of the variables that have a spill slot.
uint32_t SpillAreaAlignmentBytes = 0;
// For now, we don't have target-specific variables that need special
// treatment (no stack-slot-linked SpillVariable type).
@@ -682,12 +673,11 @@ void TargetARM32::addProlog(CfgNode *Node) {
uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
SpillAreaSizeBytes += GlobalsSize;
- // Add push instructions for preserved registers.
- // On ARM, "push" can push a whole list of GPRs via a bitmask (0-15).
- // Unlike x86, ARM also has callee-saved float/vector registers.
- // The "vpush" instruction can handle a whole list of float/vector
- // registers, but it only handles contiguous sequences of registers
- // by specifying the start and the length.
+ // Add push instructions for preserved registers. On ARM, "push" can push a
+ // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
+ // callee-saved float/vector registers. The "vpush" instruction can handle a
+ // whole list of float/vector registers, but it only handles contiguous
+ // sequences of registers by specifying the start and the length.
VarList GPRsToPreserve;
GPRsToPreserve.reserve(CalleeSaves.size());
uint32_t NumCallee = 0;
@@ -704,8 +694,8 @@ void TargetARM32::addProlog(CfgNode *Node) {
}
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
- // TODO(jvoung): do separate vpush for each floating point
- // register segment and += 4, or 8 depending on type.
+ // TODO(jvoung): do separate vpush for each floating point register
+ // segment and += 4, or 8 depending on type.
++NumCallee;
PreservedRegsSizeBytes += 4;
GPRsToPreserve.push_back(getPhysicalRegister(i));
@@ -724,10 +714,10 @@ void TargetARM32::addProlog(CfgNode *Node) {
Context.insert(InstFakeUse::create(Func, FP));
}
- // Align the variables area. SpillAreaPaddingBytes is the size of
- // the region after the preserved registers and before the spill areas.
- // LocalsSlotsPaddingBytes is the amount of padding between the globals
- // and locals area if they are separate.
+ // Align the variables area. SpillAreaPaddingBytes is the size of the region
+ // after the preserved registers and before the spill areas.
+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and
+ // locals area if they are separate.
assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
uint32_t SpillAreaPaddingBytes = 0;
@@ -758,9 +748,9 @@ void TargetARM32::addProlog(CfgNode *Node) {
resetStackAdjustment();
- // Fill in stack offsets for stack args, and copy args into registers
- // for those that were register-allocated. Args are pushed right to
- // left, so Arg[0] is closest to the stack/frame pointer.
+ // Fill in stack offsets for stack args, and copy args into registers for
+ // those that were register-allocated. Args are pushed right to left, so
+ // Arg[0] is closest to the stack/frame pointer.
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
size_t BasicFrameOffset = PreservedRegsSizeBytes;
if (!UsesFramePointer)
@@ -830,8 +820,8 @@ void TargetARM32::addEpilog(CfgNode *Node) {
if (RI == E)
return;
- // Convert the reverse_iterator position into its corresponding
- // (forward) iterator position.
+ // Convert the reverse_iterator position into its corresponding (forward)
+ // iterator position.
InstList::iterator InsertPoint = RI.base();
--InsertPoint;
Context.init(Node);
@@ -840,9 +830,9 @@ void TargetARM32::addEpilog(CfgNode *Node) {
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
if (UsesFramePointer) {
Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
- // For late-stage liveness analysis (e.g. asm-verbose mode),
- // adding a fake use of SP before the assignment of SP=FP keeps
- // previous SP adjustments from being dead-code eliminated.
+ // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
+ // use of SP before the assignment of SP=FP keeps previous SP adjustments
+ // from being dead-code eliminated.
Context.insert(InstFakeUse::create(Func, SP));
_mov(SP, FP);
} else {
@@ -868,8 +858,8 @@ void TargetARM32::addEpilog(CfgNode *Node) {
if (!MaybeLeafFunc) {
CalleeSaves[RegARM32::Reg_lr] = true;
}
- // Pop registers in ascending order just like push
- // (instead of in reverse order).
+ // Pop registers in ascending order just like push (instead of in reverse
+ // order).
for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
if (CalleeSaves[i] && RegsUsed[i]) {
GPRsToRestore.push_back(getPhysicalRegister(i));
@@ -903,17 +893,16 @@ void TargetARM32::addEpilog(CfgNode *Node) {
bool TargetARM32::isLegalVariableStackOffset(int32_t Offset) const {
constexpr bool SignExt = false;
- // TODO(jvoung): vldr of FP stack slots has a different limit from the
- // plain stackSlotType().
+ // TODO(jvoung): vldr of FP stack slots has a different limit from the plain
+ // stackSlotType().
return OperandARM32Mem::canHoldOffset(stackSlotType(), SignExt, Offset);
}
StackVariable *TargetARM32::legalizeVariableSlot(Variable *Var,
Variable *OrigBaseReg) {
int32_t Offset = Var->getStackOffset();
- // Legalize will likely need a movw/movt combination, but if the top
- // bits are all 0 from negating the offset and subtracting, we could
- // use that instead.
+ // Legalize will likely need a movw/movt combination, but if the top bits are
+ // all 0 from negating the offset and subtracting, we could use that instead.
bool ShouldSub = (-Offset & 0xFFFF0000) == 0;
if (ShouldSub)
Offset = -Offset;
@@ -949,15 +938,15 @@ void TargetARM32::legalizeStackSlots() {
return;
Variable *OrigBaseReg = getPhysicalRegister(getFrameOrStackReg());
int32_t StackAdjust = 0;
- // Do a fairly naive greedy clustering for now. Pick the first stack slot
+ // Do a fairly naive greedy clustering for now. Pick the first stack slot
// that's out of bounds and make a new base reg using the architecture's temp
- // register. If that works for the next slot, then great. Otherwise, create
- // a new base register, clobbering the previous base register. Never share a
- // base reg across different basic blocks. This isn't ideal if local and
+ // register. If that works for the next slot, then great. Otherwise, create a
+ // new base register, clobbering the previous base register. Never share a
+ // base reg across different basic blocks. This isn't ideal if local and
// multi-block variables are far apart and their references are interspersed.
- // It may help to be more coordinated about assign stack slot numbers
- // and may help to assign smaller offsets to higher-weight variables
- // so that they don't depend on this legalization.
+ // It may help to be more coordinated about assign stack slot numbers and may
+ // help to assign smaller offsets to higher-weight variables so that they
+ // don't depend on this legalization.
for (CfgNode *Node : Func->getNodes()) {
Context.init(Node);
StackVariable *NewBaseReg = nullptr;
@@ -986,7 +975,7 @@ void TargetARM32::legalizeStackSlots() {
continue;
}
}
- // For now, only Mov instructions can have stack variables. We need to
+ // For now, only Mov instructions can have stack variables. We need to
// know the type of instruction because we currently create a fresh one
// to replace Dest/Source, rather than mutate in place.
auto *MovInst = llvm::dyn_cast<InstARM32Mov>(CurInstr);
@@ -1117,15 +1106,15 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
static_cast<uint32_t>(Const->getValue() >> 32));
}
if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
- // Conservatively disallow memory operands with side-effects
- // in case of duplication.
+ // Conservatively disallow memory operands with side-effects in case of
+ // duplication.
assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
Mem->getAddrMode() == OperandARM32Mem::NegOffset);
const Type SplitType = IceType_i32;
if (Mem->isRegReg()) {
// We have to make a temp variable T, and add 4 to either Base or Index.
- // The Index may be shifted, so adding 4 can mean something else.
- // Thus, prefer T := Base + 4, and use T as the new Base.
+ // The Index may be shifted, so adding 4 can mean something else. Thus,
+ // prefer T := Base + 4, and use T as the new Base.
Variable *Base = Mem->getBase();
Constant *Four = Ctx->getConstantInt32(4);
Variable *NewBase = Func->makeVariable(Base->getType());
@@ -1144,8 +1133,8 @@ Operand *TargetARM32::hiOperand(Operand *Operand) {
// We have to make a temp variable and add 4 to either Base or Offset.
// If we add 4 to Offset, this will convert a non-RegReg addressing
// mode into a RegReg addressing mode. Since NaCl sandboxing disallows
- // RegReg addressing modes, prefer adding to base and replacing instead.
- // Thus we leave the old offset alone.
+ // RegReg addressing modes, prefer adding to base and replacing
+ // instead. Thus we leave the old offset alone.
Constant *Four = Ctx->getConstantInt32(4);
Variable *NewBase = Func->makeVariable(Base->getType());
lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
@@ -1195,11 +1184,11 @@ llvm::SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
UsesFramePointer = true;
- // Conservatively require the stack to be aligned. Some stack
- // adjustment operations implemented below assume that the stack is
- // aligned before the alloca. All the alloca code ensures that the
- // stack alignment is preserved after the alloca. The stack alignment
- // restriction can be relaxed in some cases.
+ // Conservatively require the stack to be aligned. Some stack adjustment
+ // operations implemented below assume that the stack is aligned before the
+ // alloca. All the alloca code ensures that the stack alignment is preserved
+ // after the alloca. The stack alignment restriction can be relaxed in some
+ // cases.
NeedsStackAlignment = true;
// TODO(stichnot): minimize the number of adjustments of SP, etc.
@@ -1226,8 +1215,8 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) {
Operand *SubAmount = legalize(Ctx->getConstantInt32(Value));
_sub(SP, SP, SubAmount);
} else {
- // Non-constant sizes need to be adjusted to the next highest
- // multiple of the required alignment at runtime.
+ // Non-constant sizes need to be adjusted to the next highest multiple of
+ // the required alignment at runtime.
TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
Variable *T = makeReg(IceType_i32);
_mov(T, TotalSize);
@@ -1265,8 +1254,8 @@ void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
case IceType_i64: {
Variable *ScratchReg = makeReg(IceType_i32);
_orrs(ScratchReg, SrcLoReg, SrcHi);
- // ScratchReg isn't going to be used, but we need the
- // side-effect of setting flags from this operation.
+ // ScratchReg isn't going to be used, but we need the side-effect of
+ // setting flags from this operation.
Context.insert(InstFakeUse::create(Func, ScratchReg));
}
}
@@ -1310,21 +1299,21 @@ void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
Variable *Dest = Inst->getDest();
- // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier
- // to legalize Src0 to flex or Src1 to flex and there is a reversible
- // instruction. E.g., reverse subtract with immediate, register vs
- // register, immediate.
- // Or it may be the case that the operands aren't swapped, but the
- // bits can be flipped and a different operation applied.
- // E.g., use BIC (bit clear) instead of AND for some masks.
+ // TODO(jvoung): Should be able to flip Src0 and Src1 if it is easier to
+ // legalize Src0 to flex or Src1 to flex and there is a reversible
+ // instruction. E.g., reverse subtract with immediate, register vs register,
+ // immediate.
+ // Or it may be the case that the operands aren't swapped, but the bits can
+ // be flipped and a different operation applied. E.g., use BIC (bit clear)
+ // instead of AND for some masks.
Operand *Src0 = legalizeUndef(Inst->getSrc(0));
Operand *Src1 = legalizeUndef(Inst->getSrc(1));
if (Dest->getType() == IceType_i64) {
- // These helper-call-involved instructions are lowered in this
- // separate switch. This is because we would otherwise assume that
- // we need to legalize Src0 to Src0RLo and Src0Hi. However, those go unused
- // with helper calls, and such unused/redundant instructions will fail
- // liveness analysis under -Om1 setting.
+ // These helper-call-involved instructions are lowered in this separate
+ // switch. This is because we would otherwise assume that we need to
+ // legalize Src0 to Src0RLo and Src0Hi. However, those go unused with
+ // helper calls, and such unused/redundant instructions will fail liveness
+ // analysis under -Om1 setting.
switch (Inst->getOp()) {
default:
break;
@@ -1332,11 +1321,10 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
case InstArithmetic::Sdiv:
case InstArithmetic::Urem:
case InstArithmetic::Srem: {
- // Check for divide by 0 (ARM normally doesn't trap, but we want it
- // to trap for NaCl). Src1Lo and Src1Hi may have already been legalized
- // to a register, which will hide a constant source operand.
- // Instead, check the not-yet-legalized Src1 to optimize-out a divide
- // by 0 check.
+ // Check for divide by 0 (ARM normally doesn't trap, but we want it to
+ // trap for NaCl). Src1Lo and Src1Hi may have already been legalized to a
+ // register, which will hide a constant source operand. Instead, check
+ // the not-yet-legalized Src1 to optimize-out a divide by 0 check.
if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src1)) {
if (C64->getValue() == 0) {
_trap();
@@ -1348,8 +1336,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
div0Check(IceType_i64, Src1Lo, Src1Hi);
}
// Technically, ARM has their own aeabi routines, but we can use the
- // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div,
- // but uses the more standard __moddi3 for rem.
+ // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
+ // the more standard __moddi3 for rem.
const char *HelperName = "";
switch (Inst->getOp()) {
default:
@@ -1472,12 +1460,11 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// lsl t_lo, b.lo, c.lo
// a.lo = t_lo
// a.hi = t_hi
- // Can be strength-reduced for constant-shifts, but we don't do
- // that for now.
- // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative.
- // On ARM, shifts only take the lower 8 bits of the shift register,
- // and saturate to the range 0-32, so the negative value will
- // saturate to 32.
+ // Can be strength-reduced for constant-shifts, but we don't do that for
+ // now.
+ // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
+ // ARM, shifts only take the lower 8 bits of the shift register, and
+ // saturate to the range 0-32, so the negative value will saturate to 32.
Variable *T_Hi = makeReg(IceType_i32);
Variable *Src1RLo = legalizeToReg(Src1Lo);
Constant *ThirtyTwo = Ctx->getConstantInt32(32);
@@ -1493,8 +1480,8 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
_mov(DestHi, T_Hi);
Variable *T_Lo = makeReg(IceType_i32);
// _mov seems to sometimes have better register preferencing than lsl.
- // Otherwise mov w/ lsl shifted register is a pseudo-instruction
- // that maps to lsl.
+ // Otherwise mov w/ lsl shifted register is a pseudo-instruction that
+ // maps to lsl.
_mov(T_Lo, OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
OperandARM32::LSL, Src1RLo));
_mov(DestLo, T_Lo);
@@ -1513,9 +1500,9 @@ void TargetARM32::lowerArithmetic(const InstArithmetic *Inst) {
// a.hi = t_hi
case InstArithmetic::Ashr: {
// a=b>>c (signed) ==> ...
- // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags,
- // and the next orr should be conditioned on PLUS. The last two
- // right shifts should also be arithmetic.
+ // Ashr is similar, but the sub t_c2, c.lo, #32 should set flags, and the
+ // next orr should be conditioned on PLUS. The last two right shifts
+ // should also be arithmetic.
bool IsAshr = Inst->getOp() == InstArithmetic::Ashr;
Variable *T_Lo = makeReg(IceType_i32);
Variable *Src1RLo = legalizeToReg(Src1Lo);
@@ -1723,13 +1710,13 @@ void TargetARM32::lowerAssign(const InstAssign *Inst) {
Operand *NewSrc;
if (Dest->hasReg()) {
// If Dest already has a physical register, then legalize the Src operand
- // into a Variable with the same register assignment. This especially
+ // into a Variable with the same register assignment. This especially
// helps allow the use of Flex operands.
NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
} else {
- // Dest could be a stack operand. Since we could potentially need
- // to do a Store (and store can only have Register operands),
- // legalize this to a register.
+ // Dest could be a stack operand. Since we could potentially need to do a
+ // Store (and store can only have Register operands), legalize this to a
+ // register.
NewSrc = legalize(Src0, Legal_Reg);
}
if (isVectorType(Dest->getType())) {
@@ -1810,25 +1797,24 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
}
}
- // Adjust the parameter area so that the stack is aligned. It is
- // assumed that the stack is already aligned at the start of the
- // calling sequence.
+ // Adjust the parameter area so that the stack is aligned. It is assumed that
+ // the stack is already aligned at the start of the calling sequence.
ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
- // Subtract the appropriate amount for the argument area. This also
- // takes care of setting the stack adjustment during emission.
+ // Subtract the appropriate amount for the argument area. This also takes
+ // care of setting the stack adjustment during emission.
//
- // TODO: If for some reason the call instruction gets dead-code
- // eliminated after lowering, we would need to ensure that the
- // pre-call and the post-call esp adjustment get eliminated as well.
+ // TODO: If for some reason the call instruction gets dead-code eliminated
+ // after lowering, we would need to ensure that the pre-call and the
+ // post-call esp adjustment get eliminated as well.
if (ParameterAreaSizeBytes) {
Operand *SubAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
_adjust_stack(ParameterAreaSizeBytes, SubAmount);
}
- // Copy arguments that are passed on the stack to the appropriate
- // stack locations.
+ // Copy arguments that are passed on the stack to the appropriate stack
+ // locations.
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
for (auto &StackArg : StackArgs) {
ConstantInteger32 *Loc =
@@ -1850,9 +1836,9 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
// Copy arguments to be passed in registers to the appropriate registers.
for (auto &GPRArg : GPRArgs) {
Variable *Reg = legalizeToReg(GPRArg.first, GPRArg.second);
- // Generate a FakeUse of register arguments so that they do not get
- // dead code eliminated as a result of the FakeKill of scratch
- // registers after the call.
+ // Generate a FakeUse of register arguments so that they do not get dead
+ // code eliminated as a result of the FakeKill of scratch registers after
+ // the call.
Context.insert(InstFakeUse::create(Func, Reg));
}
for (auto &FPArg : FPArgs) {
@@ -1860,8 +1846,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
Context.insert(InstFakeUse::create(Func, Reg));
}
- // Generate the call instruction. Assign its result to a temporary
- // with high register allocation weight.
+ // Generate the call instruction. Assign its result to a temporary with high
+ // register allocation weight.
Variable *Dest = Instr->getDest();
// ReturnReg doubles as ReturnRegLo as necessary.
Variable *ReturnReg = nullptr;
@@ -1901,12 +1887,12 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
}
}
Operand *CallTarget = Instr->getCallTarget();
- // TODO(jvoung): Handle sandboxing.
- // const bool NeedSandboxing = Ctx->getFlags().getUseSandboxing();
+ // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing =
+ // Ctx->getFlags().getUseSandboxing();
- // Allow ConstantRelocatable to be left alone as a direct call,
- // but force other constants like ConstantInteger32 to be in
- // a register and make it an indirect call.
+ // Allow ConstantRelocatable to be left alone as a direct call, but force
+ // other constants like ConstantInteger32 to be in a register and make it an
+ // indirect call.
if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
CallTarget = legalize(CallTarget, Legal_Reg);
}
@@ -1915,8 +1901,8 @@ void TargetARM32::lowerCall(const InstCall *Instr) {
if (ReturnRegHi)
Context.insert(InstFakeDef::create(Func, ReturnRegHi));
- // Add the appropriate offset to SP. The call instruction takes care
- // of resetting the stack offset during emission.
+ // Add the appropriate offset to SP. The call instruction takes care of
+ // resetting the stack offset during emission.
if (ParameterAreaSizeBytes) {
Operand *AddAmount = legalize(Ctx->getConstantInt32(ParameterAreaSizeBytes),
Legal_Reg | Legal_Flex);
@@ -2024,8 +2010,8 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
Variable *DestLo = llvm::cast<Variable>(loOperand(Dest));
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Variable *T_Lo = makeReg(DestLo->getType());
- // i32 and i1 can just take up the whole register.
- // i32 doesn't need uxt, while i1 will have an and mask later anyway.
+ // i32 and i1 can just take up the whole register. i32 doesn't need uxt,
+ // while i1 will have an and mask later anyway.
if (Src0->getType() == IceType_i32 || Src0->getType() == IceType_i1) {
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
_mov(T_Lo, Src0RF);
@@ -2046,9 +2032,9 @@ void TargetARM32::lowerCast(const InstCast *Inst) {
Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
Constant *One = Ctx->getConstantInt32(1);
Variable *T = makeReg(Dest->getType());
- // Just use _mov instead of _uxt since all registers are 32-bit.
- // _uxt requires the source to be a register so could have required
- // a _mov from legalize anyway.
+ // Just use _mov instead of _uxt since all registers are 32-bit. _uxt
+ // requires the source to be a register so could have required a _mov
+ // from legalize anyway.
_mov(T, Src0RF);
_and(T, T, One);
_mov(Dest, T);
@@ -2288,8 +2274,8 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
// mov.<C2> t, #0 mov.<C2> t, #0
// mov a, t mov a, t
// where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
- // is used for signed compares. In some cases, b and c need to be swapped
- // as well.
+ // is used for signed compares. In some cases, b and c need to be swapped as
+ // well.
//
// LLVM does:
// for EQ and NE:
@@ -2299,13 +2285,13 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
// mov.<C> t, #1
// mov a, t
//
- // that's nice in that it's just as short but has fewer dependencies
- // for better ILP at the cost of more registers.
+ // that's nice in that it's just as short but has fewer dependencies for
+ // better ILP at the cost of more registers.
//
- // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with
- // two unconditional mov #0, two cmps, two conditional mov #1,
- // and one conditonal reg mov. That has few dependencies for good ILP,
- // but is a longer sequence.
+ // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
+ // unconditional mov #0, two cmps, two conditional mov #1, and one
+ // conditional reg mov. That has few dependencies for good ILP, but is a
+ // longer sequence.
//
// So, we are going with the GCC version since it's usually better (except
// perhaps for eq/ne). We could revisit special-casing eq/ne later.
@@ -2333,8 +2319,8 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
Variable *ScratchReg = makeReg(IceType_i32);
_cmp(Src0Lo, Src1LoRF);
_sbcs(ScratchReg, Src0Hi, Src1HiRF);
- // ScratchReg isn't going to be used, but we need the
- // side-effect of setting flags from this operation.
+ // ScratchReg isn't going to be used, but we need the side-effect of
+ // setting flags from this operation.
Context.insert(InstFakeUse::create(Func, ScratchReg));
} else {
_cmp(Src0Hi, Src1HiRF);
@@ -2354,8 +2340,8 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
// mov.C1 t, #0
// mov.C2 t, #1
// mov a, t
- // where the unsigned/sign extension is not needed for 32-bit.
- // They also have special cases for EQ and NE. E.g., for NE:
+ // where the unsigned/sign extension is not needed for 32-bit. They also have
+ // special cases for EQ and NE. E.g., for NE:
// <extend to tb, tc>
// subs t, tb, tc
// movne t, #1
@@ -2368,13 +2354,13 @@ void TargetARM32::lowerIcmp(const InstIcmp *Inst) {
// mov.<C> t, #1
// mov a, t
//
- // the left shift is by 0, 16, or 24, which allows the comparison to focus
- // on the digits that actually matter (for 16-bit or 8-bit signed/unsigned).
- // For the unsigned case, for some reason it does similar to GCC and does
- // a uxtb first. It's not clear to me why that special-casing is needed.
+ // the left shift is by 0, 16, or 24, which allows the comparison to focus on
+ // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
+ // the unsigned case, for some reason it does similar to GCC and does a uxtb
+ // first. It's not clear to me why that special-casing is needed.
//
- // We'll go with the LLVM way for now, since it's shorter and has just as
- // few dependencies.
+ // We'll go with the LLVM way for now, since it's shorter and has just as few
+ // dependencies.
int32_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
assert(ShiftAmt >= 0);
Constant *ShiftConst = nullptr;
@@ -2417,9 +2403,9 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
UnimplementedError(Func->getContext()->getFlags());
return;
case Intrinsics::AtomicFenceAll:
- // NOTE: FenceAll should prevent and load/store from being moved
- // across the fence (both atomic and non-atomic). The InstARM32Mfence
- // instruction is currently marked coarsely as "HasSideEffects".
+ // NOTE: FenceAll should prevent and load/store from being moved across the
+ // fence (both atomic and non-atomic). The InstARM32Mfence instruction is
+ // currently marked coarsely as "HasSideEffects".
UnimplementedError(Func->getContext()->getFlags());
return;
case Intrinsics::AtomicIsLockFree: {
@@ -2477,10 +2463,10 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
Call->addArg(Val);
lowerCall(Call);
// The popcount helpers always return 32-bit values, while the intrinsic's
- // signature matches some 64-bit platform's native instructions and
- // expect to fill a 64-bit reg. Thus, clear the upper bits of the dest
- // just in case the user doesn't do that in the IR or doesn't toss the bits
- // via truncate.
+ // signature matches some 64-bit platform's native instructions and expect
+ // to fill a 64-bit reg. Thus, clear the upper bits of the dest just in
+ // case the user doesn't do that in the IR or doesn't toss the bits via
+ // truncate.
if (Val->getType() == IceType_i64) {
Variable *DestHi = llvm::cast<Variable>(hiOperand(Dest));
Constant *Zero = Ctx->getConstantZero(IceType_i32);
@@ -2491,8 +2477,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
return;
}
case Intrinsics::Ctlz: {
- // The "is zero undef" parameter is ignored and we always return
- // a well-defined value.
+ // The "is zero undef" parameter is ignored and we always return a
+ // well-defined value.
Operand *Val = Instr->getArg(0);
Variable *ValLoR;
Variable *ValHiR = nullptr;
@@ -2639,9 +2625,9 @@ void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
Variable *T2 = makeReg(IceType_i32);
_add(T2, T, ThirtyTwo);
_clz(T2, ValHiR, CondARM32::NE);
- // T2 is actually a source as well when the predicate is not AL
- // (since it may leave T2 alone). We use set_dest_nonkillable to
- // prolong the liveness of T2 as if it was used as a source.
+ // T2 is actually a source as well when the predicate is not AL (since it
+ // may leave T2 alone). We use set_dest_nonkillable to prolong the liveness
+ // of T2 as if it was used as a source.
_set_dest_nonkillable();
_mov(DestLo, T2);
Variable *T3 = nullptr;
@@ -2654,15 +2640,14 @@ void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
}
void TargetARM32::lowerLoad(const InstLoad *Load) {
- // A Load instruction can be treated the same as an Assign
- // instruction, after the source operand is transformed into an
- // OperandARM32Mem operand.
+ // A Load instruction can be treated the same as an Assign instruction, after
+ // the source operand is transformed into an OperandARM32Mem operand.
Type Ty = Load->getDest()->getType();
Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
Variable *DestLoad = Load->getDest();
- // TODO(jvoung): handled folding opportunities. Sign and zero extension
- // can be folded into a load.
+ // TODO(jvoung): handled folding opportunities. Sign and zero extension can
+ // be folded into a load.
InstAssign *Assign = InstAssign::create(Func, DestLoad, Src0);
lowerAssign(Assign);
}
@@ -2708,17 +2693,15 @@ void TargetARM32::lowerRet(const InstRet *Inst) {
_mov(Reg, Src0F, CondARM32::AL, RegARM32::Reg_r0);
}
}
- // Add a ret instruction even if sandboxing is enabled, because
- // addEpilog explicitly looks for a ret instruction as a marker for
- // where to insert the frame removal instructions.
- // addEpilog is responsible for restoring the "lr" register as needed
- // prior to this ret instruction.
+ // Add a ret instruction even if sandboxing is enabled, because addEpilog
+ // explicitly looks for a ret instruction as a marker for where to insert the
+ // frame removal instructions. addEpilog is responsible for restoring the
+ // "lr" register as needed prior to this ret instruction.
_ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
- // Add a fake use of sp to make sure sp stays alive for the entire
- // function. Otherwise post-call sp adjustments get dead-code
- // eliminated. TODO: Are there more places where the fake use
- // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not
- // have a ret instruction.
+ // Add a fake use of sp to make sure sp stays alive for the entire function.
+ // Otherwise post-call sp adjustments get dead-code eliminated.
+ // TODO: Are there more places where the fake use should be inserted? E.g.
+ // "void f(int n){while(1) g(n);}" may not have a ret instruction.
Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
Context.insert(InstFakeUse::create(Func, SP));
}
@@ -2852,8 +2835,8 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
if (isVectorType(Ty) || isFloatingType(Ty)) {
_vmov(Reg, Src);
} else {
- // Mov's Src operand can really only be the flexible second operand type
- // or a register. Users should guarantee that.
+ // Mov's Src operand can really only be the flexible second operand type or
+ // a register. Users should guarantee that.
_mov(Reg, Src);
}
return Reg;
@@ -2862,18 +2845,17 @@ Variable *TargetARM32::copyToReg(Operand *Src, int32_t RegNum) {
Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
int32_t RegNum) {
Type Ty = From->getType();
- // Assert that a physical register is allowed. To date, all calls
- // to legalize() allow a physical register. Legal_Flex converts
- // registers to the right type OperandARM32FlexReg as needed.
+ // Assert that a physical register is allowed. To date, all calls to
+ // legalize() allow a physical register. Legal_Flex converts registers to the
+ // right type OperandARM32FlexReg as needed.
assert(Allowed & Legal_Reg);
- // Go through the various types of operands:
- // OperandARM32Mem, OperandARM32Flex, Constant, and Variable.
- // Given the above assertion, if type of operand is not legal
- // (e.g., OperandARM32Mem and !Legal_Mem), we can always copy
- // to a register.
+ // Go through the various types of operands: OperandARM32Mem,
+ // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
+ // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
+ // can always copy to a register.
if (auto Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
- // Before doing anything with a Mem operand, we need to ensure
- // that the Base and Index components are in physical registers.
+ // Before doing anything with a Mem operand, we need to ensure that the
+ // Base and Index components are in physical registers.
Variable *Base = Mem->getBase();
Variable *Index = Mem->getIndex();
Variable *RegBase = nullptr;
@@ -2918,8 +2900,8 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
if (auto FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
From = FlexReg->getReg();
- // Fall through and let From be checked as a Variable below,
- // where it may or may not need a register.
+ // Fall through and let From be checked as a Variable below, where it
+ // may or may not need a register.
} else {
return copyToReg(Flex, RegNum);
}
@@ -2944,10 +2926,10 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
uint32_t RotateAmt;
uint32_t Immed_8;
uint32_t Value = static_cast<uint32_t>(C32->getValue());
- // Check if the immediate will fit in a Flexible second operand,
- // if a Flexible second operand is allowed. We need to know the exact
- // value, so that rules out relocatable constants.
- // Also try the inverse and use MVN if possible.
+ // Check if the immediate will fit in a Flexible second operand, if a
+ // Flexible second operand is allowed. We need to know the exact value,
+ // so that rules out relocatable constants. Also try the inverse and use
+ // MVN if possible.
if (CanBeFlex &&
OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
return OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
@@ -2977,12 +2959,12 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
} else {
assert(isScalarFloatingType(Ty));
// Load floats/doubles from literal pool.
- // TODO(jvoung): Allow certain immediates to be encoded directly in
- // an operand. See Table A7-18 of the ARM manual:
- // "Floating-point modified immediate constants".
- // Or, for 32-bit floating point numbers, just encode the raw bits
- // into a movw/movt pair to GPR, and vmov to an SREG, instead of using
- // a movw/movt pair to get the const-pool address then loading to SREG.
+ // TODO(jvoung): Allow certain immediates to be encoded directly in an
+ // operand. See Table A7-18 of the ARM manual: "Floating-point modified
+ // immediate constants". Or, for 32-bit floating point numbers, just
+ // encode the raw bits into a movw/movt pair to GPR, and vmov to an SREG,
+ // instead of using a movw/movt pair to get the const-pool address then
+ // loading to SREG.
std::string Buffer;
llvm::raw_string_ostream StrBuf(Buffer);
llvm::cast<Constant>(From)->emitPoolLabel(StrBuf);
@@ -2997,9 +2979,9 @@ Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
}
if (auto Var = llvm::dyn_cast<Variable>(From)) {
- // Check if the variable is guaranteed a physical register. This
- // can happen either when the variable is pre-colored or when it is
- // assigned infinite weight.
+ // Check if the variable is guaranteed a physical register. This can happen
+ // either when the variable is pre-colored or when it is assigned infinite
+ // weight.
bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
// We need a new physical register for the operand if:
// Mem is not allowed and Var isn't guaranteed a physical
@@ -3025,17 +3007,16 @@ Variable *TargetARM32::legalizeToReg(Operand *From, int32_t RegNum) {
Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
Type Ty = From->getType();
if (llvm::isa<ConstantUndef>(From)) {
- // Lower undefs to zero. Another option is to lower undefs to an
- // uninitialized register; however, using an uninitialized register
- // results in less predictable code.
+ // Lower undefs to zero. Another option is to lower undefs to an
+ // uninitialized register; however, using an uninitialized register results
+ // in less predictable code.
//
- // If in the future the implementation is changed to lower undef
- // values to uninitialized registers, a FakeDef will be needed:
- // Context.insert(InstFakeDef::create(Func, Reg));
- // This is in order to ensure that the live range of Reg is not
- // overestimated. If the constant being lowered is a 64 bit value,
- // then the result should be split and the lo and hi components will
- // need to go in uninitialized registers.
+ // If in the future the implementation is changed to lower undef values to
+ // uninitialized registers, a FakeDef will be needed:
+ // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
+ // ensure that the live range of Reg is not overestimated. If the constant
+ // being lowered is a 64 bit value, then the result should be split and the
+ // lo and hi components will need to go in uninitialized registers.
if (isVectorType(Ty))
return makeVectorOfZeros(Ty, RegNum);
return Ctx->getConstantZero(Ty);
@@ -3045,15 +3026,15 @@ Operand *TargetARM32::legalizeUndef(Operand *From, int32_t RegNum) {
OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
OperandARM32Mem *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
- // It may be the case that address mode optimization already creates
- // an OperandARM32Mem, so in that case it wouldn't need another level
- // of transformation.
+ // It may be the case that address mode optimization already creates an
+ // OperandARM32Mem, so in that case it wouldn't need another level of
+ // transformation.
if (Mem) {
return llvm::cast<OperandARM32Mem>(legalize(Mem));
}
- // If we didn't do address mode optimization, then we only
- // have a base/offset to work with. ARM always requires a base
- // register, so just use that to hold the operand.
+ // If we didn't do address mode optimization, then we only have a base/offset
+ // to work with. ARM always requires a base register, so just use that to
+ // hold the operand.
Variable *Base = legalizeToReg(Operand);
return OperandARM32Mem::create(
Func, Ty, Base,
@@ -3076,9 +3057,9 @@ void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align) {
uint32_t RotateAmt;
uint32_t Immed_8;
Operand *Mask;
- // Use AND or BIC to mask off the bits, depending on which immediate fits
- // (if it fits at all). Assume Align is usually small, in which case BIC
- // works better. Thus, this rounds down to the alignment.
+ // Use AND or BIC to mask off the bits, depending on which immediate fits (if
+ // it fits at all). Assume Align is usually small, in which case BIC works
+ // better. Thus, this rounds down to the alignment.
if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex);
_bic(Reg, Reg, Mask);
@@ -3170,17 +3151,18 @@ void TargetHeaderARM32::lower() {
OstreamLocker L(Ctx);
Ostream &Str = Ctx->getStrEmit();
Str << ".syntax unified\n";
- // Emit build attributes in format: .eabi_attribute TAG, VALUE.
- // See Sec. 2 of "Addenda to, and Errata in the ABI for the ARM architecture"
- // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
+ // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
+ // "Addenda to, and Errata in the ABI for the ARM architecture"
+ // http://infocenter.arm.com
+ // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
//
- // Tag_conformance should be be emitted first in a file-scope
- // sub-subsection of the first public subsection of the attributes.
+ // Tag_conformance should be be emitted first in a file-scope sub-subsection
+ // of the first public subsection of the attributes.
Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
- // Chromebooks are at least A15, but do A9 for higher compat.
- // For some reason, the LLVM ARM asm parser has the .cpu directive override
- // the mattr specified on the commandline. So to test hwdiv, we need to set
- // the .cpu directive higher (can't just rely on --mattr=...).
+ // Chromebooks are at least A15, but do A9 for higher compat. For some
+ // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
+ // specified on the commandline. So to test hwdiv, we need to set the .cpu
+ // directive higher (can't just rely on --mattr=...).
if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
Str << ".cpu cortex-a15\n";
} else {
« no previous file with comments | « src/IceTargetLoweringARM32.h ('k') | src/IceTargetLoweringMIPS32.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698