 Chromium Code Reviews
 Chromium Code Reviews Issue 465413003:
  Subzero: Align spill locations to natural alignment.  (Closed) 
  Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
    
  
    Issue 465413003:
  Subzero: Align spill locations to natural alignment.  (Closed) 
  Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master| Index: src/IceTargetLoweringX8632.cpp | 
| diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp | 
| index a78e21a86cc4801c1e764b5c03939a17ce91a51e..773f400f267ab07105d8b8e769aab5aeb79be460 100644 | 
| --- a/src/IceTargetLoweringX8632.cpp | 
| +++ b/src/IceTargetLoweringX8632.cpp | 
| @@ -129,12 +129,18 @@ const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; | 
| // Size of the return address on the stack | 
| const uint32_t X86_RET_IP_SIZE_BYTES = 4; | 
| -// Value is a size in bytes. Return Value adjusted to the next highest | 
| -// multiple of the stack alignment. | 
| -uint32_t applyStackAlignment(uint32_t Value) { | 
| +// Value and Alignment are in bytes. Return Value adjusted to the next | 
| +// highest multiple of Alignment. | 
| +uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) { | 
| // power of 2 | 
| - assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); | 
| - return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES; | 
| + assert((Alignment & (Alignment - 1)) == 0); | 
| + return (Value + Alignment - 1) & -Alignment; | 
| +} | 
| + | 
| +// Value is in bytes. Return Value adjusted to the next highest multiple | 
| +// of the stack alignment. | 
| +uint32_t applyStackAlignment(uint32_t Value) { | 
| + return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); | 
| } | 
| // Instruction set options | 
| @@ -520,6 +526,23 @@ void TargetX8632::lowerArguments() { | 
| } | 
| } | 
| +void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const { | 
| + typedef std::map<uint32_t, VarList> BucketMap; | 
| 
Jim Stichnoth
2014/08/13 23:59:34
Use a list<> or vector<> instead of map<> ?
 
Jim Stichnoth
2014/08/14 16:48:29
Never mind this comment, I misread the code.
 
wala
2014/08/14 17:31:24
There are only 4 possible buckets (corresponding t
 
wala
2014/08/14 17:51:59
Actually, there are 3 usable buckets (4 bytes, 8 b
 | 
| + BucketMap Buckets; | 
| + | 
| + for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E; | 
| + ++I) { | 
| + uint32_t NaturalAlignment = typeWidthInBytesOnStack((*I)->getType()); | 
| + Buckets[NaturalAlignment].push_back(*I); | 
| + } | 
| + | 
| + for (BucketMap::reverse_iterator I = Buckets.rbegin(), E = Buckets.rend(); | 
| + I != E; ++I) { | 
| + VarList &List = I->second; | 
| + Dest.insert(Dest.end(), List.begin(), List.end()); | 
| + } | 
| +} | 
| + | 
| // Helper function for addProlog(). | 
| // | 
| // This assumes Arg is an argument passed on the stack. This sets the | 
| @@ -599,6 +622,15 @@ void TargetX8632::addProlog(CfgNode *Node) { | 
| RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); | 
| const VarList &Variables = Func->getVariables(); | 
| const VarList &Args = Func->getArgs(); | 
| + VarList SpilledVariables, SortedSpilledVariables, | 
| + VariablesLinkedToSpillSplots; | 
| + | 
| + // If there is a separate locals area, this specifies the alignment | 
| + // for it. | 
| + uint32_t LocalsSlotsAlignmentBytes = 0; | 
| 
Jim Stichnoth
2014/08/13 23:59:34
At this point, the stack frame layout is starting
 
wala
2014/08/14 17:31:24
Done.
 | 
| + // The entire spill locations area gets aligned to largest natural | 
| + // alignment of the variables that have a spill slot. | 
| + uint32_t SpillAreaAlignmentBytes = 0; | 
| for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | 
| I != E; ++I) { | 
| Variable *Var = *I; | 
| @@ -617,11 +649,23 @@ void TargetX8632::addProlog(CfgNode *Node) { | 
| // that stack slot. | 
| if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | 
| if (Variable *Linked = Var->getPreferredRegister()) { | 
| - if (!Linked->hasReg()) | 
| + if (!Linked->hasReg()) { | 
| + VariablesLinkedToSpillSplots.push_back(Var); | 
| continue; | 
| + } | 
| } | 
| } | 
| + SpilledVariables.push_back(Var); | 
| + } | 
| + | 
| + sortByAlignment(SortedSpilledVariables, SpilledVariables); | 
| + for (VarList::const_iterator I = SortedSpilledVariables.begin(), | 
| + E = SortedSpilledVariables.end(); | 
| + I != E; ++I) { | 
| + Variable *Var = *I; | 
| size_t Increment = typeWidthInBytesOnStack(Var->getType()); | 
| + if (!SpillAreaAlignmentBytes) | 
| + SpillAreaAlignmentBytes = Increment; | 
| if (SimpleCoalescing) { | 
| if (Var->isMultiblockLife()) { | 
| GlobalsSize += Increment; | 
| @@ -630,6 +674,8 @@ void TargetX8632::addProlog(CfgNode *Node) { | 
| LocalsSize[NodeIndex] += Increment; | 
| if (LocalsSize[NodeIndex] > LocalsSizeBytes) | 
| LocalsSizeBytes = LocalsSize[NodeIndex]; | 
| + if (!LocalsSlotsAlignmentBytes) | 
| + LocalsSlotsAlignmentBytes = Increment; | 
| } | 
| } else { | 
| LocalsSizeBytes += Increment; | 
| @@ -658,11 +704,32 @@ void TargetX8632::addProlog(CfgNode *Node) { | 
| _mov(ebp, esp); | 
| } | 
| + // Align the variables area. | 
| + uint32_t SpillAreaPaddingBytes = 0; | 
| + if (SpillAreaAlignmentBytes) { | 
| + assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); | 
| + uint32_t SpillAreaOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 
| + uint32_t SpillAreaStart = | 
| + applyAlignment(SpillAreaOffset, SpillAreaAlignmentBytes); | 
| + SpillAreaPaddingBytes = SpillAreaStart - SpillAreaOffset; | 
| + LocalsSizeBytes += SpillAreaPaddingBytes; | 
| + } | 
| + | 
| + // If there are separate globals and locals areas, make sure the | 
| + // locals area is aligned by padding the end of the globals area. | 
| + if (LocalsSlotsAlignmentBytes) { | 
| + assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); | 
| + uint32_t NewGlobalsSize = | 
| + applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); | 
| + GlobalsSize = NewGlobalsSize; | 
| + LocalsSizeBytes += NewGlobalsSize - GlobalsSize; | 
| 
jvoung (off chromium)
2014/08/14 00:48:49
Isn't this difference always going to be zero?
Do
 
wala
2014/08/14 17:31:24
Ouch, I'm surprised all the tests still passed.
D
 | 
| + } | 
| + | 
| + // Align esp if necessary. | 
| if (NeedsStackAlignment) { | 
| - uint32_t StackSize = applyStackAlignment( | 
| - X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); | 
| - LocalsSizeBytes = | 
| - StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes; | 
| + uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; | 
| + uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes); | 
| + LocalsSizeBytes = StackSize - StackOffset; | 
| } | 
| // Generate "sub esp, LocalsSizeBytes" | 
| @@ -696,17 +763,10 @@ void TargetX8632::addProlog(CfgNode *Node) { | 
| GlobalsSize = 0; | 
| LocalsSize.assign(LocalsSize.size(), 0); | 
| size_t NextStackOffset = 0; | 
| - for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); | 
| + for (VarList::const_iterator I = SortedSpilledVariables.begin(), | 
| + E = SortedSpilledVariables.end(); | 
| I != E; ++I) { | 
| Variable *Var = *I; | 
| - if (Var->hasReg()) { | 
| - RegsUsed[Var->getRegNum()] = true; | 
| - continue; | 
| - } | 
| - if (Var->getIsArg()) | 
| - continue; | 
| - if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) | 
| - continue; | 
| if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { | 
| 
wala
2014/08/14 17:31:24
Since I've separated out the list of variables lin
 | 
| if (Variable *Linked = Var->getPreferredRegister()) { | 
| if (!Linked->hasReg()) { | 
| @@ -731,13 +791,23 @@ void TargetX8632::addProlog(CfgNode *Node) { | 
| NextStackOffset += Increment; | 
| } | 
| if (IsEbpBasedFrame) | 
| - Var->setStackOffset(-NextStackOffset); | 
| - else | 
| - Var->setStackOffset(LocalsSizeBytes - NextStackOffset); | 
| + Var->setStackOffset(-NextStackOffset - SpillAreaPaddingBytes); | 
| + else { | 
| 
Jim Stichnoth
2014/08/13 23:59:34
Probably best to have braces around both clauses o
 
wala
2014/08/14 17:31:24
Done.
 | 
| + Var->setStackOffset(LocalsSizeBytes - NextStackOffset - | 
| + SpillAreaPaddingBytes); | 
| + } | 
| } | 
| this->FrameSizeLocals = NextStackOffset; | 
| this->HasComputedFrame = true; | 
| + for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(), | 
| + E = VariablesLinkedToSpillSplots.end(); | 
| + I != E; ++I) { | 
| + Variable *Var = *I; | 
| + Variable *Linked = Var->getPreferredRegister(); | 
| + Var->setStackOffset(Linked->getStackOffset()); | 
| + } | 
| + | 
| if (Func->getContext()->isVerbose(IceV_Frame)) { | 
| Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes | 
| << "\n" | 
| @@ -991,8 +1061,7 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) { | 
| if (ConstantInteger *ConstantTotalSize = | 
| llvm::dyn_cast<ConstantInteger>(TotalSize)) { | 
| uint32_t Value = ConstantTotalSize->getValue(); | 
| - // Round Value up to the next highest multiple of the alignment. | 
| - Value = (Value + Alignment - 1) & -Alignment; | 
| + Value = applyAlignment(Value, Alignment); | 
| _sub(esp, Ctx->getConstantInt(IceType_i32, Value)); | 
| } else { | 
| // Non-constant sizes need to be adjusted to the next highest | 
| @@ -1239,12 +1308,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 
| } else if (isVectorType(Dest->getType())) { | 
| // TODO: Trap on integer divide and integer modulo by zero. | 
| // See: https://code.google.com/p/nativeclient/issues/detail?id=3899 | 
| - // | 
| - // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in | 
| - // registers. This is a workaround of the fact that there is no | 
| - // support for aligning stack operands. Once there is support, | 
| - // remove LEGAL_HACK. | 
| -#define LEGAL_HACK(s) legalizeToVar((s)) | 
| switch (Inst->getOp()) { | 
| case InstArithmetic::_num: | 
| llvm_unreachable("Unknown arithmetic operator"); | 
| @@ -1252,31 +1315,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 
| case InstArithmetic::Add: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _padd(T, LEGAL_HACK(Src1)); | 
| + _padd(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::And: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _pand(T, LEGAL_HACK(Src1)); | 
| + _pand(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Or: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _por(T, LEGAL_HACK(Src1)); | 
| + _por(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Xor: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _pxor(T, LEGAL_HACK(Src1)); | 
| + _pxor(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Sub: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _psub(T, LEGAL_HACK(Src1)); | 
| + _psub(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Mul: { | 
| @@ -1287,7 +1350,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 
| if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _pmull(T, LEGAL_HACK(Src1)); | 
| + _pmull(T, Src1); | 
| _movp(Dest, T); | 
| } else if (Dest->getType() == IceType_v4i32) { | 
| // Lowering sequence: | 
| @@ -1320,14 +1383,9 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 
| Variable *T3 = makeReg(IceType_v4i32); | 
| Variable *T4 = makeReg(IceType_v4i32); | 
| _movp(T1, Src0); | 
| - // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R | 
| - // with Src1 after stack operand alignment support is | 
| - // implemented. | 
| - Variable *Src0R = LEGAL_HACK(Src0); | 
| - Variable *Src1R = LEGAL_HACK(Src1); | 
| - _pshufd(T2, Src0R, Mask1030); | 
| - _pshufd(T3, Src1R, Mask1030); | 
| - _pmuludq(T1, Src1R); | 
| + _pshufd(T2, Src0, Mask1030); | 
| + _pshufd(T3, Src1, Mask1030); | 
| + _pmuludq(T1, Src1); | 
| _pmuludq(T2, T3); | 
| _shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); | 
| _pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); | 
| @@ -1349,32 +1407,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { | 
| case InstArithmetic::Fadd: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _addps(T, LEGAL_HACK(Src1)); | 
| + _addps(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Fsub: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _subps(T, LEGAL_HACK(Src1)); | 
| + _subps(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Fmul: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _mulps(T, LEGAL_HACK(Src1)); | 
| + _mulps(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Fdiv: { | 
| Variable *T = makeReg(Dest->getType()); | 
| _movp(T, Src0); | 
| - _divps(T, LEGAL_HACK(Src1)); | 
| + _divps(T, Src1); | 
| _movp(Dest, T); | 
| } break; | 
| case InstArithmetic::Frem: | 
| scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); | 
| break; | 
| } | 
| -#undef LEGAL_HACK | 
| } else { // Dest->getType() is non-i64 scalar | 
| Variable *T_edx = NULL; | 
| Variable *T = NULL; | 
| @@ -2199,22 +2256,15 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { | 
| _pextr(ExtractedElementR, SourceVectR, Mask); | 
| } else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { | 
| // Use pshufd and movd/movss. | 
| - // | 
| - // ALIGNHACK: Force vector operands to registers in instructions | 
| - // that require aligned memory operands until support for data | 
| - // alignment is implemented. | 
| -#define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 
| - Operand *SourceVectRM = | 
| - legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); | 
| Variable *T = NULL; | 
| if (Index) { | 
| // The shuffle only needs to occur if the element to be extracted | 
| // is not at the lowest index. | 
| Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); | 
| T = makeReg(Ty); | 
| - _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); | 
| + _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); | 
| } else { | 
| - T = ALIGN_HACK(SourceVectRM); | 
| + T = legalizeToVar(SourceVectNotLegalized); | 
| } | 
| if (InVectorElementTy == IceType_i32) { | 
| @@ -2228,7 +2278,6 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { | 
| Context.insert(InstFakeDef::create(Func, ExtractedElementR)); | 
| _movss(ExtractedElementR, T); | 
| } | 
| -#undef ALIGN_HACK | 
| } else { | 
| assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | 
| // Spill the value to a stack slot and do the extraction in memory. | 
| @@ -2287,23 +2336,18 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 
| Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); | 
| Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); | 
| - // ALIGNHACK: Without support for data alignment, both operands to | 
| - // cmpps need to be forced into registers. Once support for data | 
| - // alignment is implemented, remove LEGAL_HACK. | 
| -#define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 
| switch (Condition) { | 
| default: { | 
| InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; | 
| assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); | 
| T = makeReg(Src0RM->getType()); | 
| _movp(T, Src0RM); | 
| - _cmpps(T, LEGAL_HACK(Src1RM), Predicate); | 
| + _cmpps(T, Src1RM, Predicate); | 
| } break; | 
| case InstFcmp::One: { | 
| // Check both unequal and ordered. | 
| T = makeReg(Src0RM->getType()); | 
| Variable *T2 = makeReg(Src0RM->getType()); | 
| - Src1RM = LEGAL_HACK(Src1RM); | 
| _movp(T, Src0RM); | 
| _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); | 
| _movp(T2, Src0RM); | 
| @@ -2314,7 +2358,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 
| // Check both equal or unordered. | 
| T = makeReg(Src0RM->getType()); | 
| Variable *T2 = makeReg(Src0RM->getType()); | 
| - Src1RM = LEGAL_HACK(Src1RM); | 
| _movp(T, Src0RM); | 
| _cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); | 
| _movp(T2, Src0RM); | 
| @@ -2322,7 +2365,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { | 
| _por(T, T2); | 
| } break; | 
| } | 
| -#undef LEGAL_HACK | 
| } | 
| _movp(Dest, T); | 
| @@ -2427,10 +2469,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 
| Src1RM = T1; | 
| } | 
| - // TODO: ALIGNHACK: Both operands to compare instructions need to be | 
| - // in registers until data alignment support is implemented. Once | 
| - // there is support for data alignment, LEGAL_HACK can be removed. | 
| -#define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 
| Variable *T = makeReg(Ty); | 
| switch (Condition) { | 
| default: | 
| @@ -2438,42 +2476,41 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { | 
| break; | 
| case InstIcmp::Eq: { | 
| _movp(T, Src0RM); | 
| - _pcmpeq(T, LEGAL_HACK(Src1RM)); | 
| + _pcmpeq(T, Src1RM); | 
| } break; | 
| case InstIcmp::Ne: { | 
| _movp(T, Src0RM); | 
| - _pcmpeq(T, LEGAL_HACK(Src1RM)); | 
| + _pcmpeq(T, Src1RM); | 
| Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 
| _pxor(T, MinusOne); | 
| } break; | 
| case InstIcmp::Ugt: | 
| case InstIcmp::Sgt: { | 
| _movp(T, Src0RM); | 
| - _pcmpgt(T, LEGAL_HACK(Src1RM)); | 
| + _pcmpgt(T, Src1RM); | 
| } break; | 
| case InstIcmp::Uge: | 
| case InstIcmp::Sge: { | 
| // !(Src1RM > Src0RM) | 
| _movp(T, Src1RM); | 
| - _pcmpgt(T, LEGAL_HACK(Src0RM)); | 
| + _pcmpgt(T, Src0RM); | 
| Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 
| _pxor(T, MinusOne); | 
| } break; | 
| case InstIcmp::Ult: | 
| case InstIcmp::Slt: { | 
| _movp(T, Src1RM); | 
| - _pcmpgt(T, LEGAL_HACK(Src0RM)); | 
| + _pcmpgt(T, Src0RM); | 
| } break; | 
| case InstIcmp::Ule: | 
| case InstIcmp::Sle: { | 
| // !(Src0RM > Src1RM) | 
| _movp(T, Src0RM); | 
| - _pcmpgt(T, LEGAL_HACK(Src1RM)); | 
| + _pcmpgt(T, Src1RM); | 
| Variable *MinusOne = makeVectorOfMinusOnes(Ty); | 
| _pxor(T, MinusOne); | 
| } break; | 
| } | 
| -#undef LEGAL_HACK | 
| _movp(Dest, T); | 
| eliminateNextVectorSextInstruction(Dest); | 
| @@ -2649,12 +2686,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | 
| Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); | 
| Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); | 
| - // ALIGNHACK: Force vector operands to registers in instructions | 
| - // that require aligned memory operands until support for data | 
| - // alignment is implemented. | 
| -#define ALIGN_HACK(Vect) legalizeToVar((Vect)) | 
| if (Index == 1) { | 
| - SourceVectRM = ALIGN_HACK(SourceVectRM); | 
| _shufps(ElementR, SourceVectRM, Mask1Constant); | 
| _shufps(ElementR, SourceVectRM, Mask2Constant); | 
| _movp(Inst->getDest(), ElementR); | 
| @@ -2665,7 +2697,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { | 
| _shufps(T, ElementR, Mask2Constant); | 
| _movp(Inst->getDest(), T); | 
| } | 
| -#undef ALIGN_HACK | 
| } else { | 
| assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); | 
| // Spill the value to a stack slot and perform the insertion in | 
| @@ -3627,10 +3658,6 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { | 
| Variable *T = makeReg(SrcTy); | 
| Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); | 
| Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); | 
| - // ALIGNHACK: Until data alignment support is implemented, vector | 
| - // instructions need to have vector operands in registers. Once | 
| - // there is support for data alignment, LEGAL_HACK can be removed. | 
| -#define LEGAL_HACK(Vect) legalizeToVar((Vect)) | 
| if (InstructionSet >= SSE4_1) { | 
| // TODO(wala): If the condition operand is a constant, use blendps | 
| // or pblendw. | 
| @@ -3643,7 +3670,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { | 
| _movp(xmm0, ConditionRM); | 
| _psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); | 
| _movp(T, SrcFRM); | 
| - _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); | 
| + _blendvps(T, SrcTRM, xmm0); | 
| _movp(Dest, T); | 
| } else { | 
| assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); | 
| @@ -3652,7 +3679,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { | 
| Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); | 
| lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); | 
| _movp(T, SrcFRM); | 
| - _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); | 
| + _pblendvb(T, SrcTRM, xmm0); | 
| _movp(Dest, T); | 
| } | 
| return; | 
| @@ -3676,11 +3703,10 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { | 
| _movp(T, ConditionRM); | 
| } | 
| _movp(T2, T); | 
| - _pand(T, LEGAL_HACK(SrcTRM)); | 
| - _pandn(T2, LEGAL_HACK(SrcFRM)); | 
| + _pand(T, SrcTRM); | 
| + _pandn(T2, SrcFRM); | 
| _por(T, T2); | 
| _movp(Dest, T); | 
| -#undef LEGAL_HACK | 
| return; | 
| } |