Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index f06150fc1b5dc9370997fe3ea5ad613e79224a85..47f6ae15781a23983fe4e4102f87bc663bd7f9bb 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -8,9 +8,8 @@ |
//===----------------------------------------------------------------------===// |
/// |
/// \file |
-/// This file implements the TargetLoweringX8632 class, which |
-/// consists almost entirely of the lowering sequence for each |
-/// high-level instruction. |
+/// This file implements the TargetLoweringX8632 class, which consists almost |
+/// entirely of the lowering sequence for each high-level instruction. |
/// |
//===----------------------------------------------------------------------===// |
@@ -100,24 +99,21 @@ const char *MachineTraits<TargetX8632>::TargetName = "X8632"; |
void TargetX8632::lowerCall(const InstCall *Instr) { |
// x86-32 calling convention: |
// |
- // * At the point before the call, the stack must be aligned to 16 |
- // bytes. |
+ // * At the point before the call, the stack must be aligned to 16 bytes. |
// |
- // * The first four arguments of vector type, regardless of their |
- // position relative to the other arguments in the argument list, are |
- // placed in registers xmm0 - xmm3. |
+ // * The first four arguments of vector type, regardless of their position |
+ // relative to the other arguments in the argument list, are placed in |
+ // registers xmm0 - xmm3. |
// |
- // * Other arguments are pushed onto the stack in right-to-left order, |
- // such that the left-most argument ends up on the top of the stack at |
- // the lowest memory address. |
+ // * Other arguments are pushed onto the stack in right-to-left order, such |
+ // that the left-most argument ends up on the top of the stack at the lowest |
+ // memory address. |
// |
- // * Stack arguments of vector type are aligned to start at the next |
- // highest multiple of 16 bytes. Other stack arguments are aligned to |
- // 4 bytes. |
+ // * Stack arguments of vector type are aligned to start at the next highest |
+ // multiple of 16 bytes. Other stack arguments are aligned to 4 bytes. |
// |
- // This intends to match the section "IA-32 Function Calling |
- // Convention" of the document "OS X ABI Function Call Guide" by |
- // Apple. |
+ // This intends to match the section "IA-32 Function Calling Convention" of |
+ // the document "OS X ABI Function Call Guide" by Apple. |
NeedsStackAlignment = true; |
using OperandList = std::vector<Operand *>; |
@@ -149,46 +145,44 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
} |
} |
- // Adjust the parameter area so that the stack is aligned. It is |
- // assumed that the stack is already aligned at the start of the |
- // calling sequence. |
+ // Adjust the parameter area so that the stack is aligned. It is assumed that |
+ // the stack is already aligned at the start of the calling sequence. |
ParameterAreaSizeBytes = Traits::applyStackAlignment(ParameterAreaSizeBytes); |
- // Subtract the appropriate amount for the argument area. This also |
- // takes care of setting the stack adjustment during emission. |
+ // Subtract the appropriate amount for the argument area. This also takes |
+ // care of setting the stack adjustment during emission. |
// |
- // TODO: If for some reason the call instruction gets dead-code |
- // eliminated after lowering, we would need to ensure that the |
- // pre-call and the post-call esp adjustment get eliminated as well. |
+ // TODO: If for some reason the call instruction gets dead-code eliminated |
+ // after lowering, we would need to ensure that the pre-call and the |
+ // post-call esp adjustment get eliminated as well. |
if (ParameterAreaSizeBytes) { |
_adjust_stack(ParameterAreaSizeBytes); |
} |
- // Copy arguments that are passed on the stack to the appropriate |
- // stack locations. |
+ // Copy arguments that are passed on the stack to the appropriate stack |
+ // locations. |
for (SizeT i = 0, e = StackArgs.size(); i < e; ++i) { |
lowerStore(InstStore::create(Func, StackArgs[i], StackArgLocations[i])); |
} |
- // Copy arguments to be passed in registers to the appropriate |
- // registers. |
- // TODO: Investigate the impact of lowering arguments passed in |
- // registers after lowering stack arguments as opposed to the other |
- // way around. Lowering register arguments after stack arguments may |
- // reduce register pressure. On the other hand, lowering register |
- // arguments first (before stack arguments) may result in more compact |
- // code, as the memory operand displacements may end up being smaller |
- // before any stack adjustment is done. |
+ // Copy arguments to be passed in registers to the appropriate registers. |
+ // TODO: Investigate the impact of lowering arguments passed in registers |
+ // after lowering stack arguments as opposed to the other way around. |
+ // Lowering register arguments after stack arguments may reduce register |
+ // pressure. On the other hand, lowering register arguments first (before |
+ // stack arguments) may result in more compact code, as the memory operand |
+ // displacements may end up being smaller before any stack adjustment is |
+ // done. |
for (SizeT i = 0, NumXmmArgs = XmmArgs.size(); i < NumXmmArgs; ++i) { |
Variable *Reg = |
legalizeToReg(XmmArgs[i], Traits::RegisterSet::Reg_xmm0 + i); |
- // Generate a FakeUse of register arguments so that they do not get |
- // dead code eliminated as a result of the FakeKill of scratch |
- // registers after the call. |
+ // Generate a FakeUse of register arguments so that they do not get dead |
+ // code eliminated as a result of the FakeKill of scratch registers after |
+ // the call. |
Context.insert(InstFakeUse::create(Func, Reg)); |
} |
- // Generate the call instruction. Assign its result to a temporary |
- // with high register allocation weight. |
+ // Generate the call instruction. Assign its result to a temporary with high |
+ // register allocation weight. |
Variable *Dest = Instr->getDest(); |
// ReturnReg doubles as ReturnRegLo as necessary. |
Variable *ReturnReg = nullptr; |
@@ -211,8 +205,8 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
break; |
case IceType_f32: |
case IceType_f64: |
- // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with |
- // the fstp instruction. |
+ // Leave ReturnReg==ReturnRegHi==nullptr, and capture the result with the |
+ // fstp instruction. |
break; |
case IceType_v4i1: |
case IceType_v8i1: |
@@ -247,8 +241,8 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
if (ReturnRegHi) |
Context.insert(InstFakeDef::create(Func, ReturnRegHi)); |
- // Add the appropriate offset to esp. The call instruction takes care |
- // of resetting the stack offset during emission. |
+ // Add the appropriate offset to esp. The call instruction takes care of |
+ // resetting the stack offset during emission. |
if (ParameterAreaSizeBytes) { |
Variable *esp = |
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
@@ -287,22 +281,21 @@ void TargetX8632::lowerCall(const InstCall *Instr) { |
} |
} |
} else if (isScalarFloatingType(Dest->getType())) { |
- // Special treatment for an FP function which returns its result in |
- // st(0). |
- // If Dest ends up being a physical xmm register, the fstp emit code |
- // will route st(0) through a temporary stack slot. |
+ // Special treatment for an FP function which returns its result in st(0). |
+ // If Dest ends up being a physical xmm register, the fstp emit code will |
+ // route st(0) through a temporary stack slot. |
_fstp(Dest); |
- // Create a fake use of Dest in case it actually isn't used, |
- // because st(0) still needs to be popped. |
+ // Create a fake use of Dest in case it actually isn't used, because st(0) |
+ // still needs to be popped. |
Context.insert(InstFakeUse::create(Func, Dest)); |
} |
} |
void TargetX8632::lowerArguments() { |
VarList &Args = Func->getArgs(); |
- // The first four arguments of vector type, regardless of their |
- // position relative to the other arguments in the argument list, are |
- // passed in registers xmm0 - xmm3. |
+ // The first four arguments of vector type, regardless of their position |
+ // relative to the other arguments in the argument list, are passed in |
+ // registers xmm0 - xmm3. |
unsigned NumXmmArgs = 0; |
Context.init(Func->getEntryNode()); |
@@ -314,9 +307,9 @@ void TargetX8632::lowerArguments() { |
Type Ty = Arg->getType(); |
if (!isVectorType(Ty)) |
continue; |
- // Replace Arg in the argument list with the home register. Then |
- // generate an instruction in the prolog to copy the home register |
- // to the assigned location of Arg. |
+ // Replace Arg in the argument list with the home register. Then generate |
+ // an instruction in the prolog to copy the home register to the assigned |
+ // location of Arg. |
int32_t RegNum = Traits::RegisterSet::Reg_xmm0 + NumXmmArgs; |
++NumXmmArgs; |
Variable *RegisterArg = Func->makeVariable(Ty); |
@@ -351,15 +344,14 @@ void TargetX8632::lowerRet(const InstRet *Inst) { |
_mov(Reg, Src0, Traits::RegisterSet::Reg_eax); |
} |
} |
- // Add a ret instruction even if sandboxing is enabled, because |
- // addEpilog explicitly looks for a ret instruction as a marker for |
- // where to insert the frame removal instructions. |
+ // Add a ret instruction even if sandboxing is enabled, because addEpilog |
+ // explicitly looks for a ret instruction as a marker for where to insert the |
+ // frame removal instructions. |
_ret(Reg); |
// Add a fake use of esp to make sure esp stays alive for the entire |
- // function. Otherwise post-call esp adjustments get dead-code |
- // eliminated. TODO: Are there more places where the fake use |
- // should be inserted? E.g. "void f(int n){while(1) g(n);}" may not |
- // have a ret instruction. |
+ // function. Otherwise post-call esp adjustments get dead-code eliminated. |
+ // TODO: Are there more places where the fake use should be inserted? E.g. |
+ // "void f(int n){while(1) g(n);}" may not have a ret instruction. |
Variable *esp = |
Func->getTarget()->getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
Context.insert(InstFakeUse::create(Func, esp)); |
@@ -395,16 +387,15 @@ void TargetX8632::addProlog(CfgNode *Node) { |
// * LocalsSpillAreaSize: area 6 |
// * SpillAreaSizeBytes: areas 3 - 7 |
- // Determine stack frame offsets for each Variable without a |
- // register assignment. This can be done as one variable per stack |
- // slot. Or, do coalescing by running the register allocator again |
- // with an infinite set of registers (as a side effect, this gives |
- // variables a second chance at physical register assignment). |
+ // Determine stack frame offsets for each Variable without a register |
+ // assignment. This can be done as one variable per stack slot. Or, do |
+ // coalescing by running the register allocator again with an infinite set of |
+ // registers (as a side effect, this gives variables a second chance at |
+ // physical register assignment). |
// |
- // A middle ground approach is to leverage sparsity and allocate one |
- // block of space on the frame for globals (variables with |
- // multi-block lifetime), and one block to share for locals |
- // (single-block lifetime). |
+ // A middle ground approach is to leverage sparsity and allocate one block of |
+ // space on the frame for globals (variables with multi-block lifetime), and |
+ // one block to share for locals (single-block lifetime). |
Context.init(Node); |
Context.setInsertPoint(Context.getCur()); |
@@ -414,17 +405,16 @@ void TargetX8632::addProlog(CfgNode *Node) { |
RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
VarList SortedSpilledVariables, VariablesLinkedToSpillSlots; |
size_t GlobalsSize = 0; |
- // If there is a separate locals area, this represents that area. |
- // Otherwise it counts any variable not counted by GlobalsSize. |
+ // If there is a separate locals area, this represents that area. Otherwise |
+ // it counts any variable not counted by GlobalsSize. |
SpillAreaSizeBytes = 0; |
- // If there is a separate locals area, this specifies the alignment |
- // for it. |
+ // If there is a separate locals area, this specifies the alignment for it. |
uint32_t LocalsSlotsAlignmentBytes = 0; |
- // The entire spill locations area gets aligned to largest natural |
- // alignment of the variables that have a spill slot. |
+ // The entire spill locations area gets aligned to largest natural alignment |
+ // of the variables that have a spill slot. |
uint32_t SpillAreaAlignmentBytes = 0; |
- // A spill slot linked to a variable with a stack slot should reuse |
- // that stack slot. |
+ // A spill slot linked to a variable with a stack slot should reuse that |
+ // stack slot. |
std::function<bool(Variable *)> TargetVarHook = |
[&VariablesLinkedToSpillSlots](Variable *Var) { |
if (auto *SpillVar = |
@@ -466,15 +456,14 @@ void TargetX8632::addProlog(CfgNode *Node) { |
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
_push(ebp); |
_mov(ebp, esp); |
- // Keep ebp live for late-stage liveness analysis |
- // (e.g. asm-verbose mode). |
+ // Keep ebp live for late-stage liveness analysis (e.g. asm-verbose mode). |
Context.insert(InstFakeUse::create(Func, ebp)); |
} |
- // Align the variables area. SpillAreaPaddingBytes is the size of |
- // the region after the preserved registers and before the spill areas. |
- // LocalsSlotsPaddingBytes is the amount of padding between the globals |
- // and locals area if they are separate. |
+ // Align the variables area. SpillAreaPaddingBytes is the size of the region |
+ // after the preserved registers and before the spill areas. |
+ // LocalsSlotsPaddingBytes is the amount of padding between the globals and |
+ // locals area if they are separate. |
assert(SpillAreaAlignmentBytes <= Traits::X86_STACK_ALIGNMENT_BYTES); |
assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
uint32_t SpillAreaPaddingBytes = 0; |
@@ -504,9 +493,9 @@ void TargetX8632::addProlog(CfgNode *Node) { |
resetStackAdjustment(); |
- // Fill in stack offsets for stack args, and copy args into registers |
- // for those that were register-allocated. Args are pushed right to |
- // left, so Arg[0] is closest to the stack/frame pointer. |
+ // Fill in stack offsets for stack args, and copy args into registers for |
+ // those that were register-allocated. Args are pushed right to left, so |
+ // Arg[0] is closest to the stack/frame pointer. |
Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg()); |
size_t BasicFrameOffset = |
PreservedRegsSizeBytes + Traits::X86_RET_IP_SIZE_BYTES; |
@@ -576,8 +565,8 @@ void TargetX8632::addEpilog(CfgNode *Node) { |
if (RI == E) |
return; |
- // Convert the reverse_iterator position into its corresponding |
- // (forward) iterator position. |
+ // Convert the reverse_iterator position into its corresponding (forward) |
+ // iterator position. |
InstList::iterator InsertPoint = RI.base(); |
--InsertPoint; |
Context.init(Node); |
@@ -586,9 +575,9 @@ void TargetX8632::addEpilog(CfgNode *Node) { |
Variable *esp = getPhysicalRegister(Traits::RegisterSet::Reg_esp); |
if (IsEbpBasedFrame) { |
Variable *ebp = getPhysicalRegister(Traits::RegisterSet::Reg_ebp); |
- // For late-stage liveness analysis (e.g. asm-verbose mode), |
- // adding a fake use of esp before the assignment of esp=ebp keeps |
- // previous esp adjustments from being dead-code eliminated. |
+ // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake |
+ // use of esp before the assignment of esp=ebp keeps previous esp |
+ // adjustments from being dead-code eliminated. |
Context.insert(InstFakeUse::create(Func, esp)); |
_mov(esp, ebp); |
_pop(ebp); |
@@ -747,8 +736,8 @@ void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { |
continue; |
typename T::IceType *Const = llvm::cast<typename T::IceType>(C); |
typename T::IceType::PrimType Value = Const->getValue(); |
- // Use memcpy() to copy bits from Value into RawValue in a way |
- // that avoids breaking strict-aliasing rules. |
+ // Use memcpy() to copy bits from Value into RawValue in a way that avoids |
+ // breaking strict-aliasing rules. |
typename T::PrimitiveIntType RawValue; |
memcpy(&RawValue, &Value, sizeof(Value)); |
char buf[30]; |
@@ -766,8 +755,8 @@ void TargetDataX8632::emitConstantPool(GlobalContext *Ctx) { |
void TargetDataX8632::lowerConstants() { |
if (Ctx->getFlags().getDisableTranslation()) |
return; |
- // No need to emit constants from the int pool since (for x86) they |
- // are embedded as immediates in the instructions, just emit float/double. |
+ // No need to emit constants from the int pool since (for x86) they are |
+ // embedded as immediates in the instructions, just emit float/double. |
switch (Ctx->getFlags().getOutFileType()) { |
case FT_Elf: { |
ELFObjectWriter *Writer = Ctx->getObjectWriter(); |
@@ -846,19 +835,17 @@ void TargetDataX8632::lowerGlobals(const VariableDeclarationList &Vars, |
TargetHeaderX8632::TargetHeaderX8632(GlobalContext *Ctx) |
: TargetHeaderLowering(Ctx) {} |
-// In some cases, there are x-macros tables for both high-level and |
-// low-level instructions/operands that use the same enum key value. |
-// The tables are kept separate to maintain a proper separation |
-// between abstraction layers. There is a risk that the tables could |
-// get out of sync if enum values are reordered or if entries are |
-// added or deleted. The following dummy namespaces use |
+// In some cases, there are x-macros tables for both high-level and low-level |
+// instructions/operands that use the same enum key value. The tables are kept |
+// separate to maintain a proper separation between abstraction layers. There |
+// is a risk that the tables could get out of sync if enum values are reordered |
+// or if entries are added or deleted. The following dummy namespaces use |
// static_asserts to ensure everything is kept in sync. |
namespace { |
// Validate the enum values in FCMPX8632_TABLE. |
namespace dummy1 { |
-// Define a temporary set of enum values based on low-level table |
-// entries. |
+// Define a temporary set of enum values based on low-level table entries. |
enum _tmp_enum { |
#define X(val, dflt, swapS, C1, C2, swapV, pred) _tmp_##val, |
FCMPX8632_TABLE |
@@ -869,8 +856,8 @@ enum _tmp_enum { |
#define X(tag, str) static const int _table1_##tag = InstFcmp::tag; |
ICEINSTFCMP_TABLE |
#undef X |
-// Define a set of constants based on low-level table entries, and |
-// ensure the table entry keys are consistent. |
+// Define a set of constants based on low-level table entries, and ensure the |
+// table entry keys are consistent. |
#define X(val, dflt, swapS, C1, C2, swapV, pred) \ |
static const int _table2_##val = _tmp_##val; \ |
static_assert( \ |
@@ -878,8 +865,8 @@ ICEINSTFCMP_TABLE |
"Inconsistency between FCMPX8632_TABLE and ICEINSTFCMP_TABLE"); |
FCMPX8632_TABLE |
#undef X |
-// Repeat the static asserts with respect to the high-level table |
-// entries in case the high-level table has extra entries. |
+// Repeat the static asserts with respect to the high-level table entries in |
+// case the high-level table has extra entries. |
#define X(tag, str) \ |
static_assert( \ |
_table1_##tag == _table2_##tag, \ |
@@ -890,8 +877,7 @@ ICEINSTFCMP_TABLE |
// Validate the enum values in ICMPX8632_TABLE. |
namespace dummy2 { |
-// Define a temporary set of enum values based on low-level table |
-// entries. |
+// Define a temporary set of enum values based on low-level table entries. |
enum _tmp_enum { |
#define X(val, C_32, C1_64, C2_64, C3_64) _tmp_##val, |
ICMPX8632_TABLE |
@@ -902,8 +888,8 @@ enum _tmp_enum { |
#define X(tag, str) static const int _table1_##tag = InstIcmp::tag; |
ICEINSTICMP_TABLE |
#undef X |
-// Define a set of constants based on low-level table entries, and |
-// ensure the table entry keys are consistent. |
+// Define a set of constants based on low-level table entries, and ensure the |
+// table entry keys are consistent. |
#define X(val, C_32, C1_64, C2_64, C3_64) \ |
static const int _table2_##val = _tmp_##val; \ |
static_assert( \ |
@@ -911,8 +897,8 @@ ICEINSTICMP_TABLE |
"Inconsistency between ICMPX8632_TABLE and ICEINSTICMP_TABLE"); |
ICMPX8632_TABLE |
#undef X |
-// Repeat the static asserts with respect to the high-level table |
-// entries in case the high-level table has extra entries. |
+// Repeat the static asserts with respect to the high-level table entries in |
+// case the high-level table has extra entries. |
#define X(tag, str) \ |
static_assert( \ |
_table1_##tag == _table2_##tag, \ |
@@ -923,8 +909,7 @@ ICEINSTICMP_TABLE |
// Validate the enum values in ICETYPEX8632_TABLE. |
namespace dummy3 { |
-// Define a temporary set of enum values based on low-level table |
-// entries. |
+// Define a temporary set of enum values based on low-level table entries. |
enum _tmp_enum { |
#define X(tag, elementty, cvt, sdss, pack, width, fld) _tmp_##tag, |
ICETYPEX8632_TABLE |
@@ -936,16 +921,16 @@ enum _tmp_enum { |
static const int _table1_##tag = tag; |
ICETYPE_TABLE |
#undef X |
-// Define a set of constants based on low-level table entries, and |
-// ensure the table entry keys are consistent. |
+// Define a set of constants based on low-level table entries, and ensure the |
+// table entry keys are consistent. |
#define X(tag, elementty, cvt, sdss, pack, width, fld) \ |
static const int _table2_##tag = _tmp_##tag; \ |
static_assert(_table1_##tag == _table2_##tag, \ |
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |
ICETYPEX8632_TABLE |
#undef X |
-// Repeat the static asserts with respect to the high-level table |
-// entries in case the high-level table has extra entries. |
+// Repeat the static asserts with respect to the high-level table entries in |
+// case the high-level table has extra entries. |
#define X(tag, sizeLog2, align, elts, elty, str) \ |
static_assert(_table1_##tag == _table2_##tag, \ |
"Inconsistency between ICETYPEX8632_TABLE and ICETYPE_TABLE"); |