Chromium Code Reviews| Index: src/IceTargetLoweringX8632.cpp |
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
| index a78e21a86cc4801c1e764b5c03939a17ce91a51e..d3a3cee00869736ca60ed1245af190fa3e84e27a 100644 |
| --- a/src/IceTargetLoweringX8632.cpp |
| +++ b/src/IceTargetLoweringX8632.cpp |
| @@ -24,6 +24,8 @@ |
| #include "IceTargetLoweringX8632.h" |
| #include "llvm/Support/CommandLine.h" |
| +#include <strings.h> |
| + |
| namespace Ice { |
| namespace { |
| @@ -128,13 +130,23 @@ const uint32_t X86_CHAR_BIT = 8; |
| const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; |
| // Size of the return address on the stack |
| const uint32_t X86_RET_IP_SIZE_BYTES = 4; |
| +// The base 2 logarithm of the width in bytes of the smallest stack slot |
| +const uint32_t X86_LOG2_OF_MIN_STACK_SLOT_SIZE = 2; |
| +// The base 2 logarithm of the width in bytes of the largest stack slot |
| +const uint32_t X86_LOG2_OF_MAX_STACK_SLOT_SIZE = 4; |
| + |
| +// Value and Alignment are in bytes. Return Value adjusted to the next |
| +// highest multiple of Alignment. |
| +uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) { |
| + // power of 2 |
| + assert((Alignment & (Alignment - 1)) == 0); |
| + return (Value + Alignment - 1) & -Alignment; |
| +} |
| -// Value is a size in bytes. Return Value adjusted to the next highest |
| -// multiple of the stack alignment. |
| +// Value is in bytes. Return Value adjusted to the next highest multiple |
| +// of the stack alignment. |
| uint32_t applyStackAlignment(uint32_t Value) { |
| - // power of 2 |
| - assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); |
| - return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES; |
| + return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
| } |
| // Instruction set options |
| @@ -520,6 +532,30 @@ void TargetX8632::lowerArguments() { |
| } |
| } |
| +void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const { |
|
Jim Stichnoth
2014/08/14 18:21:19
Does this need to be part of TargetX8632, or can i
wala
2014/08/14 18:24:10
It needs access to TargetLoweringX8632::typeWidthI
|
| + // Sort the variables into buckets according to the log of their width |
| + // in bytes. |
| + const SizeT NumBuckets = |
| + X86_LOG2_OF_MAX_STACK_SLOT_SIZE - X86_LOG2_OF_MIN_STACK_SLOT_SIZE + 1; |
| + VarList Buckets[NumBuckets]; |
| + |
| + for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E; |
| + ++I) { |
| + Variable *Var = *I; |
| + uint32_t NaturalAlignment = typeWidthInBytesOnStack(Var->getType()); |
| + SizeT LogNaturalAlignment = ffs(NaturalAlignment) - 1; |
| + assert(LogNaturalAlignment >= X86_LOG2_OF_MIN_STACK_SLOT_SIZE); |
| + assert(LogNaturalAlignment <= X86_LOG2_OF_MAX_STACK_SLOT_SIZE); |
| + SizeT BucketIndex = LogNaturalAlignment - X86_LOG2_OF_MIN_STACK_SLOT_SIZE; |
| + Buckets[BucketIndex].push_back(Var); |
| + } |
| + |
| + for (SizeT I = 0, E = NumBuckets; I < E; ++I) { |
| + VarList &List = Buckets[NumBuckets - I - 1]; |
| + Dest.insert(Dest.end(), List.begin(), List.end()); |
| + } |
| +} |
| + |
| // Helper function for addProlog(). |
| // |
| // This assumes Arg is an argument passed on the stack. This sets the |
| @@ -563,6 +599,35 @@ void TargetX8632::finishArgumentLowering(Variable *Arg, Variable *FramePtr, |
| Type TargetX8632::stackSlotType() { return IceType_i32; } |
| void TargetX8632::addProlog(CfgNode *Node) { |
| + // Stack frame layout: |
| + // |
| + // +------------------------+ |
| + // | 1. return address | |
| + // +------------------------+ |
| + // | 2. preserved registers | |
| + // +------------------------+ |
| + // | 3. padding | |
| + // +------------------------+ |
| + // | 4. global spill area | |
| + // +------------------------+ |
| + // | 5. padding | |
| + // +------------------------+ |
| + // | 6. local spill area | |
| + // +------------------------+ |
| + // | 7. padding | |
| + // +------------------------+ |
| + // | 8. local variables | |
| + // +------------------------+ |
| + // |
| + // The following variables record the size in bytes of the given areas: |
| + // * X86_RET_IP_SIZE_BYTES: area 1 |
| + // * PreservedRegsSizeBytes: area 2 |
| + // * SpillAreaPaddingBytes: area 3 |
| + // * GlobalsSize: area 4 |
| + // * GlobalsAndSubsequentPaddingSize: areas 4 - 5 |
| + // * LocalsSpillAreaSize: area 6 |
| + // * LocalsSizeBytes: areas 3 - 7 |
|
jvoung (off chromium)
2014/08/14 18:40:59
There's a couple of notions of Locals here, local-
wala
2014/08/14 19:47:01
Done.
|
| + |
| // If SimpleCoalescing is false, each variable without a register |
| // gets its own unique stack slot, which leads to large stack |
| // frames. If SimpleCoalescing is true, then each "global" variable |
| @@ -599,6 +664,15 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
| const VarList &Variables = Func->getVariables(); |
| const VarList &Args = Func->getArgs(); |
| + VarList SpilledVariables, SortedSpilledVariables, |
| + VariablesLinkedToSpillSplots; |
| + |
| + // If there is a separate locals area, this specifies the alignment |
| + // for it. |
| + uint32_t LocalsSlotsAlignmentBytes = 0; |
| + // The entire spill locations area gets aligned to largest natural |
| + // alignment of the variables that have a spill slot. |
| + uint32_t SpillAreaAlignmentBytes = 0; |
| for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); |
| I != E; ++I) { |
| Variable *Var = *I; |
| @@ -617,11 +691,23 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| // that stack slot. |
| if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
| if (Variable *Linked = Var->getPreferredRegister()) { |
| - if (!Linked->hasReg()) |
| + if (!Linked->hasReg()) { |
| + VariablesLinkedToSpillSplots.push_back(Var); |
| continue; |
| + } |
| } |
| } |
| + SpilledVariables.push_back(Var); |
| + } |
| + |
| + sortByAlignment(SortedSpilledVariables, SpilledVariables); |
| + for (VarList::const_iterator I = SortedSpilledVariables.begin(), |
| + E = SortedSpilledVariables.end(); |
| + I != E; ++I) { |
| + Variable *Var = *I; |
| size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| + if (!SpillAreaAlignmentBytes) |
| + SpillAreaAlignmentBytes = Increment; |
| if (SimpleCoalescing) { |
| if (Var->isMultiblockLife()) { |
| GlobalsSize += Increment; |
| @@ -630,11 +716,15 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| LocalsSize[NodeIndex] += Increment; |
| if (LocalsSize[NodeIndex] > LocalsSizeBytes) |
| LocalsSizeBytes = LocalsSize[NodeIndex]; |
| + if (!LocalsSlotsAlignmentBytes) |
| + LocalsSlotsAlignmentBytes = Increment; |
| } |
| } else { |
| LocalsSizeBytes += Increment; |
| } |
| } |
| + uint32_t LocalsSpillAreaSize = LocalsSizeBytes; |
| + |
| LocalsSizeBytes += GlobalsSize; |
| // Add push instructions for preserved registers. |
| @@ -658,11 +748,34 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| _mov(ebp, esp); |
| } |
| + // Align the variables area. SpillAreaPaddingBytes is the size of |
| + // the region after the preserved registers and before the spill |
| + // areas. |
| + uint32_t SpillAreaPaddingBytes = 0; |
| + if (SpillAreaAlignmentBytes) { |
| + assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); |
| + uint32_t PaddingStart = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| + uint32_t SpillAreaStart = |
| + applyAlignment(PaddingStart, SpillAreaAlignmentBytes); |
| + SpillAreaPaddingBytes = SpillAreaStart - PaddingStart; |
| + LocalsSizeBytes += SpillAreaPaddingBytes; |
| + } |
| + |
| + // If there are separate globals and locals areas, make sure the |
| + // locals area is aligned by padding the end of the globals area. |
| + uint32_t GlobalsAndSubsequentPaddingSize = GlobalsSize; |
| + if (LocalsSlotsAlignmentBytes) { |
| + assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
| + GlobalsAndSubsequentPaddingSize = |
| + applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); |
| + LocalsSizeBytes += GlobalsAndSubsequentPaddingSize - GlobalsSize; |
| + } |
| + |
| + // Align esp if necessary. |
| if (NeedsStackAlignment) { |
| - uint32_t StackSize = applyStackAlignment( |
| - X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); |
| - LocalsSizeBytes = |
| - StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes; |
| + uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
| + uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes); |
| + LocalsSizeBytes = StackSize - StackOffset; |
| } |
| // Generate "sub esp, LocalsSizeBytes" |
| @@ -692,40 +805,24 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| } |
| // Fill in stack offsets for locals. |
| - size_t TotalGlobalsSize = GlobalsSize; |
| - GlobalsSize = 0; |
| + size_t GlobalsSpaceUsed = SpillAreaPaddingBytes; |
| LocalsSize.assign(LocalsSize.size(), 0); |
| - size_t NextStackOffset = 0; |
| - for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); |
| + size_t NextStackOffset = GlobalsSpaceUsed; |
| + for (VarList::const_iterator I = SortedSpilledVariables.begin(), |
| + E = SortedSpilledVariables.end(); |
| I != E; ++I) { |
| Variable *Var = *I; |
| - if (Var->hasReg()) { |
| - RegsUsed[Var->getRegNum()] = true; |
| - continue; |
| - } |
| - if (Var->getIsArg()) |
| - continue; |
| - if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
| - continue; |
| - if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
| - if (Variable *Linked = Var->getPreferredRegister()) { |
| - if (!Linked->hasReg()) { |
| - // TODO: Make sure Linked has already been assigned a stack |
| - // slot. |
| - Var->setStackOffset(Linked->getStackOffset()); |
| - continue; |
| - } |
| - } |
| - } |
| size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
| if (SimpleCoalescing) { |
| if (Var->isMultiblockLife()) { |
| - GlobalsSize += Increment; |
| - NextStackOffset = GlobalsSize; |
| + GlobalsSpaceUsed += Increment; |
| + NextStackOffset = GlobalsSpaceUsed; |
| } else { |
| SizeT NodeIndex = Var->getLocalUseNode()->getIndex(); |
| LocalsSize[NodeIndex] += Increment; |
| - NextStackOffset = TotalGlobalsSize + LocalsSize[NodeIndex]; |
| + NextStackOffset = SpillAreaPaddingBytes + |
| + GlobalsAndSubsequentPaddingSize + |
| + LocalsSize[NodeIndex]; |
| } |
| } else { |
| NextStackOffset += Increment; |
| @@ -735,16 +832,43 @@ void TargetX8632::addProlog(CfgNode *Node) { |
| else |
| Var->setStackOffset(LocalsSizeBytes - NextStackOffset); |
| } |
| - this->FrameSizeLocals = NextStackOffset; |
| + this->FrameSizeLocals = NextStackOffset - SpillAreaPaddingBytes; |
| this->HasComputedFrame = true; |
| + // Assign stack offsets to variables that have been linked to spilled |
| + // variables. |
| + for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(), |
| + E = VariablesLinkedToSpillSplots.end(); |
| + I != E; ++I) { |
| + Variable *Var = *I; |
| + Variable *Linked = Var->getPreferredRegister(); |
| + Var->setStackOffset(Linked->getStackOffset()); |
| + } |
| + |
| if (Func->getContext()->isVerbose(IceV_Frame)) { |
| - Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes |
| - << "\n" |
| - << "InArgsSizeBytes=" << InArgsSizeBytes |
| - << "\n" |
| - << "PreservedRegsSizeBytes=" |
| - << PreservedRegsSizeBytes << "\n"; |
| + Ostream &Str = Func->getContext()->getStrDump(); |
| + |
| + Str << "Stack layout:\n"; |
| + uint32_t EspAdjustmentPaddingSize = LocalsSizeBytes - LocalsSpillAreaSize - |
| + GlobalsAndSubsequentPaddingSize - |
| + SpillAreaPaddingBytes; |
| + Str << " in-args = " << InArgsSizeBytes << " bytes\n" |
| + << " return address = " << X86_RET_IP_SIZE_BYTES << " bytes\n" |
| + << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n" |
| + << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n" |
| + << " globals spill area = " << GlobalsSize << " bytes\n" |
| + << " globals-locals spill areas intermediate padding = " |
| + << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n" |
| + << " locals spill area = " << LocalsSpillAreaSize << " bytes\n" |
| + << " esp alignment padding = " << EspAdjustmentPaddingSize |
| + << " bytes\n"; |
| + |
| + Str << "Stack details:\n" |
| + << " esp adjustment = " << LocalsSizeBytes << " bytes\n" |
| + << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n" |
| + << " locals spill area alignment = " << LocalsSlotsAlignmentBytes |
| + << " bytes\n" |
| + << " is ebp based = " << IsEbpBasedFrame << "\n"; |
| } |
| } |
| @@ -991,8 +1115,7 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) { |
| if (ConstantInteger *ConstantTotalSize = |
| llvm::dyn_cast<ConstantInteger>(TotalSize)) { |
| uint32_t Value = ConstantTotalSize->getValue(); |
| - // Round Value up to the next highest multiple of the alignment. |
| - Value = (Value + Alignment - 1) & -Alignment; |
| + Value = applyAlignment(Value, Alignment); |
| _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); |
| } else { |
| // Non-constant sizes need to be adjusted to the next highest |
| @@ -1239,12 +1362,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| } else if (isVectorType(Dest->getType())) { |
| // TODO: Trap on integer divide and integer modulo by zero. |
| // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 |
| - // |
| - // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in |
| - // registers. This is a workaround of the fact that there is no |
| - // support for aligning stack operands. Once there is support, |
| - // remove LEGAL_HACK. |
| -#define LEGAL_HACK(s) legalizeToVar((s)) |
| switch (Inst->getOp()) { |
| case InstArithmetic::_num: |
| llvm_unreachable("Unknown arithmetic operator"); |
| @@ -1252,31 +1369,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| case InstArithmetic::Add: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _padd(T, LEGAL_HACK(Src1)); |
| + _padd(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::And: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _pand(T, LEGAL_HACK(Src1)); |
| + _pand(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Or: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _por(T, LEGAL_HACK(Src1)); |
| + _por(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Xor: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _pxor(T, LEGAL_HACK(Src1)); |
| + _pxor(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Sub: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _psub(T, LEGAL_HACK(Src1)); |
| + _psub(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Mul: { |
| @@ -1287,7 +1404,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _pmull(T, LEGAL_HACK(Src1)); |
| + _pmull(T, Src1); |
| _movp(Dest, T); |
| } else if (Dest->getType() == IceType_v4i32) { |
| // Lowering sequence: |
| @@ -1320,14 +1437,9 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| Variable *T3 = makeReg(IceType_v4i32); |
| Variable *T4 = makeReg(IceType_v4i32); |
| _movp(T1, Src0); |
| - // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R |
| - // with Src1 after stack operand alignment support is |
| - // implemented. |
| - Variable *Src0R = LEGAL_HACK(Src0); |
| - Variable *Src1R = LEGAL_HACK(Src1); |
| - _pshufd(T2, Src0R, Mask1030); |
| - _pshufd(T3, Src1R, Mask1030); |
| - _pmuludq(T1, Src1R); |
| + _pshufd(T2, Src0, Mask1030); |
| + _pshufd(T3, Src1, Mask1030); |
| + _pmuludq(T1, Src1); |
| _pmuludq(T2, T3); |
| _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
| _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
| @@ -1349,32 +1461,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
| case InstArithmetic::Fadd: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _addps(T, LEGAL_HACK(Src1)); |
| + _addps(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Fsub: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _subps(T, LEGAL_HACK(Src1)); |
| + _subps(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Fmul: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _mulps(T, LEGAL_HACK(Src1)); |
| + _mulps(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Fdiv: { |
| Variable *T = makeReg(Dest->getType()); |
| _movp(T, Src0); |
| - _divps(T, LEGAL_HACK(Src1)); |
| + _divps(T, Src1); |
| _movp(Dest, T); |
| } break; |
| case InstArithmetic::Frem: |
| scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
| break; |
| } |
| -#undef LEGAL_HACK |
| } else { // Dest->getType() is non-i64 scalar |
| Variable *T_edx = NULL; |
| Variable *T = NULL; |
| @@ -2199,22 +2310,15 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
| _pextr(ExtractedElementR, SourceVectR, Mask); |
| } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
| // Use pshufd and movd/movss. |
| - // |
| - // ALIGNHACK: Force vector operands to registers in instructions |
| - // that require aligned memory operands until support for data |
| - // alignment is implemented. |
| -#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
| - Operand *SourceVectRM = |
| - legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
| Variable *T = NULL; |
| if (Index) { |
| // The shuffle only needs to occur if the element to be extracted |
| // is not at the lowest index. |
| Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
| T = makeReg(Ty); |
| - _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); |
| + _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); |
| } else { |
| - T = ALIGN_HACK(SourceVectRM); |
| + T = legalizeToVar(SourceVectNotLegalized); |
| } |
| if (InVectorElementTy == IceType_i32) { |
| @@ -2228,7 +2332,6 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
| Context.insert(InstFakeDef::create(Func, ExtractedElementR)); |
| _movss(ExtractedElementR, T); |
| } |
| -#undef ALIGN_HACK |
| } else { |
| assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
| // Spill the value to a stack slot and do the extraction in memory. |
| @@ -2287,23 +2390,18 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
| Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
| Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
| - // ALIGNHACK: Without support for data alignment, both operands to |
| - // cmpps need to be forced into registers. Once support for data |
| - // alignment is implemented, remove LEGAL_HACK. |
| -#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| switch (Condition) { |
| default: { |
| InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; |
| assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
| T = makeReg(Src0RM->getType()); |
| _movp(T, Src0RM); |
| - _cmpps(T, LEGAL_HACK(Src1RM), Predicate); |
| + _cmpps(T, Src1RM, Predicate); |
| } break; |
| case InstFcmp::One: { |
| // Check both unequal and ordered. |
| T = makeReg(Src0RM->getType()); |
| Variable *T2 = makeReg(Src0RM->getType()); |
| - Src1RM = LEGAL_HACK(Src1RM); |
| _movp(T, Src0RM); |
| _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); |
| _movp(T2, Src0RM); |
| @@ -2314,7 +2412,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
| // Check both equal or unordered. |
| T = makeReg(Src0RM->getType()); |
| Variable *T2 = makeReg(Src0RM->getType()); |
| - Src1RM = LEGAL_HACK(Src1RM); |
| _movp(T, Src0RM); |
| _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); |
| _movp(T2, Src0RM); |
| @@ -2322,7 +2419,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
| _por(T, T2); |
| } break; |
| } |
| -#undef LEGAL_HACK |
| } |
| _movp(Dest, T); |
| @@ -2427,10 +2523,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| Src1RM = T1; |
| } |
| - // TODO: ALIGNHACK: Both operands to compare instructions need to be |
| - // in registers until data alignment support is implemented. Once |
| - // there is support for data alignment, LEGAL_HACK can be removed. |
| -#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| Variable *T = makeReg(Ty); |
| switch (Condition) { |
| default: |
| @@ -2438,42 +2530,41 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
| break; |
| case InstIcmp::Eq: { |
| _movp(T, Src0RM); |
| - _pcmpeq(T, LEGAL_HACK(Src1RM)); |
| + _pcmpeq(T, Src1RM); |
| } break; |
| case InstIcmp::Ne: { |
| _movp(T, Src0RM); |
| - _pcmpeq(T, LEGAL_HACK(Src1RM)); |
| + _pcmpeq(T, Src1RM); |
| Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| _pxor(T, MinusOne); |
| } break; |
| case InstIcmp::Ugt: |
| case InstIcmp::Sgt: { |
| _movp(T, Src0RM); |
| - _pcmpgt(T, LEGAL_HACK(Src1RM)); |
| + _pcmpgt(T, Src1RM); |
| } break; |
| case InstIcmp::Uge: |
| case InstIcmp::Sge: { |
| // !(Src1RM > Src0RM) |
| _movp(T, Src1RM); |
| - _pcmpgt(T, LEGAL_HACK(Src0RM)); |
| + _pcmpgt(T, Src0RM); |
| Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| _pxor(T, MinusOne); |
| } break; |
| case InstIcmp::Ult: |
| case InstIcmp::Slt: { |
| _movp(T, Src1RM); |
| - _pcmpgt(T, LEGAL_HACK(Src0RM)); |
| + _pcmpgt(T, Src0RM); |
| } break; |
| case InstIcmp::Ule: |
| case InstIcmp::Sle: { |
| // !(Src0RM > Src1RM) |
| _movp(T, Src0RM); |
| - _pcmpgt(T, LEGAL_HACK(Src1RM)); |
| + _pcmpgt(T, Src1RM); |
| Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
| _pxor(T, MinusOne); |
| } break; |
| } |
| -#undef LEGAL_HACK |
| _movp(Dest, T); |
| eliminateNextVectorSextInstruction(Dest); |
| @@ -2649,12 +2740,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
| Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
| Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
| - // ALIGNHACK: Force vector operands to registers in instructions |
| - // that require aligned memory operands until support for data |
| - // alignment is implemented. |
| -#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
| if (Index == 1) { |
| - SourceVectRM = ALIGN_HACK(SourceVectRM); |
| _shufps(ElementR, SourceVectRM, Mask1Constant); |
| _shufps(ElementR, SourceVectRM, Mask2Constant); |
| _movp(Inst->getDest(), ElementR); |
| @@ -2665,7 +2751,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
| _shufps(T, ElementR, Mask2Constant); |
| _movp(Inst->getDest(), T); |
| } |
| -#undef ALIGN_HACK |
| } else { |
| assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
| // Spill the value to a stack slot and perform the insertion in |
| @@ -3627,10 +3712,6 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
| Variable *T = makeReg(SrcTy); |
| Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
| Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
| - // ALIGNHACK: Until data alignment support is implemented, vector |
| - // instructions need to have vector operands in registers. Once |
| - // there is support for data alignment, LEGAL_HACK can be removed. |
| -#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
| if (InstructionSet >= SSE4_1) { |
| // TODO(wala): If the condition operand is a constant, use blendps |
| // or pblendw. |
| @@ -3643,7 +3724,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
| _movp(xmm0, ConditionRM); |
| _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); |
| _movp(T, SrcFRM); |
| - _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); |
| + _blendvps(T, SrcTRM, xmm0); |
| _movp(Dest, T); |
| } else { |
| assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
| @@ -3652,7 +3733,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
| Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); |
| lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
| _movp(T, SrcFRM); |
| - _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); |
| + _pblendvb(T, SrcTRM, xmm0); |
| _movp(Dest, T); |
| } |
| return; |
| @@ -3676,11 +3757,10 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
| _movp(T, ConditionRM); |
| } |
| _movp(T2, T); |
| - _pand(T, LEGAL_HACK(SrcTRM)); |
| - _pandn(T2, LEGAL_HACK(SrcFRM)); |
| + _pand(T, SrcTRM); |
| + _pandn(T2, SrcFRM); |
| _por(T, T2); |
| _movp(Dest, T); |
| -#undef LEGAL_HACK |
| return; |
| } |