Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index a78e21a86cc4801c1e764b5c03939a17ce91a51e..773f400f267ab07105d8b8e769aab5aeb79be460 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -129,12 +129,18 @@ const uint32_t X86_STACK_ALIGNMENT_BYTES = 16; |
// Size of the return address on the stack |
const uint32_t X86_RET_IP_SIZE_BYTES = 4; |
-// Value is a size in bytes. Return Value adjusted to the next highest |
-// multiple of the stack alignment. |
-uint32_t applyStackAlignment(uint32_t Value) { |
+// Value and Alignment are in bytes. Return Value adjusted to the next |
+// highest multiple of Alignment. |
+uint32_t applyAlignment(uint32_t Value, uint32_t Alignment) { |
// power of 2 |
- assert((X86_STACK_ALIGNMENT_BYTES & (X86_STACK_ALIGNMENT_BYTES - 1)) == 0); |
- return (Value + X86_STACK_ALIGNMENT_BYTES - 1) & -X86_STACK_ALIGNMENT_BYTES; |
+ assert((Alignment & (Alignment - 1)) == 0); |
+ return (Value + Alignment - 1) & -Alignment; |
+} |
+ |
+// Value is in bytes. Return Value adjusted to the next highest multiple |
+// of the stack alignment. |
+uint32_t applyStackAlignment(uint32_t Value) { |
+ return applyAlignment(Value, X86_STACK_ALIGNMENT_BYTES); |
} |
// Instruction set options |
@@ -520,6 +526,23 @@ void TargetX8632::lowerArguments() { |
} |
} |
+void TargetX8632::sortByAlignment(VarList &Dest, const VarList &Source) const { |
+ typedef std::map<uint32_t, VarList> BucketMap; |
Jim Stichnoth
2014/08/13 23:59:34
Use a list<> or vector<> instead of map<> ?
Jim Stichnoth
2014/08/14 16:48:29
Never mind this comment, I misread the code.
wala
2014/08/14 17:31:24
There are only 4 possible buckets (corresponding t
wala
2014/08/14 17:51:59
Actually, there are 3 usable buckets (4 bytes, 8 b
|
+ BucketMap Buckets; |
+ |
+ for (VarList::const_iterator I = Source.begin(), E = Source.end(); I != E; |
+ ++I) { |
+ uint32_t NaturalAlignment = typeWidthInBytesOnStack((*I)->getType()); |
+ Buckets[NaturalAlignment].push_back(*I); |
+ } |
+ |
+ for (BucketMap::reverse_iterator I = Buckets.rbegin(), E = Buckets.rend(); |
+ I != E; ++I) { |
+ VarList &List = I->second; |
+ Dest.insert(Dest.end(), List.begin(), List.end()); |
+ } |
+} |
+ |
// Helper function for addProlog(). |
// |
// This assumes Arg is an argument passed on the stack. This sets the |
@@ -599,6 +622,15 @@ void TargetX8632::addProlog(CfgNode *Node) { |
RegsUsed = llvm::SmallBitVector(CalleeSaves.size()); |
const VarList &Variables = Func->getVariables(); |
const VarList &Args = Func->getArgs(); |
+ VarList SpilledVariables, SortedSpilledVariables, |
+ VariablesLinkedToSpillSplots; |
+ |
+ // If there is a separate locals area, this specifies the alignment |
+ // for it. |
+ uint32_t LocalsSlotsAlignmentBytes = 0; |
Jim Stichnoth
2014/08/13 23:59:34
At this point, the stack frame layout is starting
wala
2014/08/14 17:31:24
Done.
|
+ // The entire spill locations area gets aligned to largest natural |
+ // alignment of the variables that have a spill slot. |
+ uint32_t SpillAreaAlignmentBytes = 0; |
for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); |
I != E; ++I) { |
Variable *Var = *I; |
@@ -617,11 +649,23 @@ void TargetX8632::addProlog(CfgNode *Node) { |
// that stack slot. |
if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
if (Variable *Linked = Var->getPreferredRegister()) { |
- if (!Linked->hasReg()) |
+ if (!Linked->hasReg()) { |
+ VariablesLinkedToSpillSplots.push_back(Var); |
continue; |
+ } |
} |
} |
+ SpilledVariables.push_back(Var); |
+ } |
+ |
+ sortByAlignment(SortedSpilledVariables, SpilledVariables); |
+ for (VarList::const_iterator I = SortedSpilledVariables.begin(), |
+ E = SortedSpilledVariables.end(); |
+ I != E; ++I) { |
+ Variable *Var = *I; |
size_t Increment = typeWidthInBytesOnStack(Var->getType()); |
+ if (!SpillAreaAlignmentBytes) |
+ SpillAreaAlignmentBytes = Increment; |
if (SimpleCoalescing) { |
if (Var->isMultiblockLife()) { |
GlobalsSize += Increment; |
@@ -630,6 +674,8 @@ void TargetX8632::addProlog(CfgNode *Node) { |
LocalsSize[NodeIndex] += Increment; |
if (LocalsSize[NodeIndex] > LocalsSizeBytes) |
LocalsSizeBytes = LocalsSize[NodeIndex]; |
+ if (!LocalsSlotsAlignmentBytes) |
+ LocalsSlotsAlignmentBytes = Increment; |
} |
} else { |
LocalsSizeBytes += Increment; |
@@ -658,11 +704,32 @@ void TargetX8632::addProlog(CfgNode *Node) { |
_mov(ebp, esp); |
} |
+ // Align the variables area. |
+ uint32_t SpillAreaPaddingBytes = 0; |
+ if (SpillAreaAlignmentBytes) { |
+ assert(SpillAreaAlignmentBytes <= X86_STACK_ALIGNMENT_BYTES); |
+ uint32_t SpillAreaOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
+ uint32_t SpillAreaStart = |
+ applyAlignment(SpillAreaOffset, SpillAreaAlignmentBytes); |
+ SpillAreaPaddingBytes = SpillAreaStart - SpillAreaOffset; |
+ LocalsSizeBytes += SpillAreaPaddingBytes; |
+ } |
+ |
+ // If there are separate globals and locals areas, make sure the |
+ // locals area is aligned by padding the end of the globals area. |
+ if (LocalsSlotsAlignmentBytes) { |
+ assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes); |
+ uint32_t NewGlobalsSize = |
+ applyAlignment(GlobalsSize, LocalsSlotsAlignmentBytes); |
+ GlobalsSize = NewGlobalsSize; |
+ LocalsSizeBytes += NewGlobalsSize - GlobalsSize; |
jvoung (off chromium)
2014/08/14 00:48:49
Isn't this difference always going to be zero?
Do
wala
2014/08/14 17:31:24
Ouch, I'm surprised all the tests still passed.
D
|
+ } |
+ |
+ // Align esp if necessary. |
if (NeedsStackAlignment) { |
- uint32_t StackSize = applyStackAlignment( |
- X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes + LocalsSizeBytes); |
- LocalsSizeBytes = |
- StackSize - X86_RET_IP_SIZE_BYTES - PreservedRegsSizeBytes; |
+ uint32_t StackOffset = X86_RET_IP_SIZE_BYTES + PreservedRegsSizeBytes; |
+ uint32_t StackSize = applyStackAlignment(StackOffset + LocalsSizeBytes); |
+ LocalsSizeBytes = StackSize - StackOffset; |
} |
// Generate "sub esp, LocalsSizeBytes" |
@@ -696,17 +763,10 @@ void TargetX8632::addProlog(CfgNode *Node) { |
GlobalsSize = 0; |
LocalsSize.assign(LocalsSize.size(), 0); |
size_t NextStackOffset = 0; |
- for (VarList::const_iterator I = Variables.begin(), E = Variables.end(); |
+ for (VarList::const_iterator I = SortedSpilledVariables.begin(), |
+ E = SortedSpilledVariables.end(); |
I != E; ++I) { |
Variable *Var = *I; |
- if (Var->hasReg()) { |
- RegsUsed[Var->getRegNum()] = true; |
- continue; |
- } |
- if (Var->getIsArg()) |
- continue; |
- if (ComputedLiveRanges && Var->getLiveRange().isEmpty()) |
- continue; |
if (Var->getWeight() == RegWeight::Zero && Var->getRegisterOverlap()) { |
wala
2014/08/14 17:31:24
Since I've separated out the list of variables lin
|
if (Variable *Linked = Var->getPreferredRegister()) { |
if (!Linked->hasReg()) { |
@@ -731,13 +791,23 @@ void TargetX8632::addProlog(CfgNode *Node) { |
NextStackOffset += Increment; |
} |
if (IsEbpBasedFrame) |
- Var->setStackOffset(-NextStackOffset); |
- else |
- Var->setStackOffset(LocalsSizeBytes - NextStackOffset); |
+ Var->setStackOffset(-NextStackOffset - SpillAreaPaddingBytes); |
+ else { |
Jim Stichnoth
2014/08/13 23:59:34
Probably best to have braces around both clauses o
wala
2014/08/14 17:31:24
Done.
|
+ Var->setStackOffset(LocalsSizeBytes - NextStackOffset - |
+ SpillAreaPaddingBytes); |
+ } |
} |
this->FrameSizeLocals = NextStackOffset; |
this->HasComputedFrame = true; |
+ for (VarList::const_iterator I = VariablesLinkedToSpillSplots.begin(), |
+ E = VariablesLinkedToSpillSplots.end(); |
+ I != E; ++I) { |
+ Variable *Var = *I; |
+ Variable *Linked = Var->getPreferredRegister(); |
+ Var->setStackOffset(Linked->getStackOffset()); |
+ } |
+ |
if (Func->getContext()->isVerbose(IceV_Frame)) { |
Func->getContext()->getStrDump() << "LocalsSizeBytes=" << LocalsSizeBytes |
<< "\n" |
@@ -991,8 +1061,7 @@ void TargetX8632::lowerAlloca(const InstAlloca *Inst) { |
if (ConstantInteger *ConstantTotalSize = |
llvm::dyn_cast<ConstantInteger>(TotalSize)) { |
uint32_t Value = ConstantTotalSize->getValue(); |
- // Round Value up to the next highest multiple of the alignment. |
- Value = (Value + Alignment - 1) & -Alignment; |
+ Value = applyAlignment(Value, Alignment); |
_sub(esp, Ctx->getConstantInt(IceType_i32, Value)); |
} else { |
// Non-constant sizes need to be adjusted to the next highest |
@@ -1239,12 +1308,6 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
} else if (isVectorType(Dest->getType())) { |
// TODO: Trap on integer divide and integer modulo by zero. |
// See: https://code.google.com/p/nativeclient/issues/detail?id=3899 |
- // |
- // TODO(wala): ALIGNHACK: All vector arithmetic is currently done in |
- // registers. This is a workaround of the fact that there is no |
- // support for aligning stack operands. Once there is support, |
- // remove LEGAL_HACK. |
-#define LEGAL_HACK(s) legalizeToVar((s)) |
switch (Inst->getOp()) { |
case InstArithmetic::_num: |
llvm_unreachable("Unknown arithmetic operator"); |
@@ -1252,31 +1315,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
case InstArithmetic::Add: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _padd(T, LEGAL_HACK(Src1)); |
+ _padd(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::And: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _pand(T, LEGAL_HACK(Src1)); |
+ _pand(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Or: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _por(T, LEGAL_HACK(Src1)); |
+ _por(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Xor: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _pxor(T, LEGAL_HACK(Src1)); |
+ _pxor(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Sub: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _psub(T, LEGAL_HACK(Src1)); |
+ _psub(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Mul: { |
@@ -1287,7 +1350,7 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
if (TypesAreValidForPmull && InstructionSetIsValidForPmull) { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _pmull(T, LEGAL_HACK(Src1)); |
+ _pmull(T, Src1); |
_movp(Dest, T); |
} else if (Dest->getType() == IceType_v4i32) { |
// Lowering sequence: |
@@ -1320,14 +1383,9 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
Variable *T3 = makeReg(IceType_v4i32); |
Variable *T4 = makeReg(IceType_v4i32); |
_movp(T1, Src0); |
- // TODO(wala): ALIGHNHACK: Replace Src0R with Src0 and Src1R |
- // with Src1 after stack operand alignment support is |
- // implemented. |
- Variable *Src0R = LEGAL_HACK(Src0); |
- Variable *Src1R = LEGAL_HACK(Src1); |
- _pshufd(T2, Src0R, Mask1030); |
- _pshufd(T3, Src1R, Mask1030); |
- _pmuludq(T1, Src1R); |
+ _pshufd(T2, Src0, Mask1030); |
+ _pshufd(T3, Src1, Mask1030); |
+ _pmuludq(T1, Src1); |
_pmuludq(T2, T3); |
_shufps(T1, T2, Ctx->getConstantInt(IceType_i8, Mask0202)); |
_pshufd(T4, T1, Ctx->getConstantInt(IceType_i8, Mask0213)); |
@@ -1349,32 +1407,31 @@ void TargetX8632::lowerArithmetic(const InstArithmetic *Inst) { |
case InstArithmetic::Fadd: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _addps(T, LEGAL_HACK(Src1)); |
+ _addps(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Fsub: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _subps(T, LEGAL_HACK(Src1)); |
+ _subps(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Fmul: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _mulps(T, LEGAL_HACK(Src1)); |
+ _mulps(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Fdiv: { |
Variable *T = makeReg(Dest->getType()); |
_movp(T, Src0); |
- _divps(T, LEGAL_HACK(Src1)); |
+ _divps(T, Src1); |
_movp(Dest, T); |
} break; |
case InstArithmetic::Frem: |
scalarizeArithmetic(Inst->getOp(), Dest, Src0, Src1); |
break; |
} |
-#undef LEGAL_HACK |
} else { // Dest->getType() is non-i64 scalar |
Variable *T_edx = NULL; |
Variable *T = NULL; |
@@ -2199,22 +2256,15 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
_pextr(ExtractedElementR, SourceVectR, Mask); |
} else if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
// Use pshufd and movd/movss. |
- // |
- // ALIGNHACK: Force vector operands to registers in instructions |
- // that require aligned memory operands until support for data |
- // alignment is implemented. |
-#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
- Operand *SourceVectRM = |
- legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem); |
Variable *T = NULL; |
if (Index) { |
// The shuffle only needs to occur if the element to be extracted |
// is not at the lowest index. |
Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
T = makeReg(Ty); |
- _pshufd(T, ALIGN_HACK(SourceVectRM), Mask); |
+ _pshufd(T, legalize(SourceVectNotLegalized, Legal_Reg | Legal_Mem), Mask); |
} else { |
- T = ALIGN_HACK(SourceVectRM); |
+ T = legalizeToVar(SourceVectNotLegalized); |
} |
if (InVectorElementTy == IceType_i32) { |
@@ -2228,7 +2278,6 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
Context.insert(InstFakeDef::create(Func, ExtractedElementR)); |
_movss(ExtractedElementR, T); |
} |
-#undef ALIGN_HACK |
} else { |
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
// Spill the value to a stack slot and do the extraction in memory. |
@@ -2287,23 +2336,18 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
Operand *Src0RM = legalize(Src0, Legal_Reg | Legal_Mem); |
Operand *Src1RM = legalize(Src1, Legal_Reg | Legal_Mem); |
- // ALIGNHACK: Without support for data alignment, both operands to |
- // cmpps need to be forced into registers. Once support for data |
- // alignment is implemented, remove LEGAL_HACK. |
-#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
switch (Condition) { |
default: { |
InstX8632Cmpps::CmppsCond Predicate = TableFcmp[Index].Predicate; |
assert(Predicate != InstX8632Cmpps::Cmpps_Invalid); |
T = makeReg(Src0RM->getType()); |
_movp(T, Src0RM); |
- _cmpps(T, LEGAL_HACK(Src1RM), Predicate); |
+ _cmpps(T, Src1RM, Predicate); |
} break; |
case InstFcmp::One: { |
// Check both unequal and ordered. |
T = makeReg(Src0RM->getType()); |
Variable *T2 = makeReg(Src0RM->getType()); |
- Src1RM = LEGAL_HACK(Src1RM); |
_movp(T, Src0RM); |
_cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_neq); |
_movp(T2, Src0RM); |
@@ -2314,7 +2358,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
// Check both equal or unordered. |
T = makeReg(Src0RM->getType()); |
Variable *T2 = makeReg(Src0RM->getType()); |
- Src1RM = LEGAL_HACK(Src1RM); |
_movp(T, Src0RM); |
_cmpps(T, Src1RM, InstX8632Cmpps::Cmpps_eq); |
_movp(T2, Src0RM); |
@@ -2322,7 +2365,6 @@ void TargetX8632::lowerFcmp(const InstFcmp *Inst) { |
_por(T, T2); |
} break; |
} |
-#undef LEGAL_HACK |
} |
_movp(Dest, T); |
@@ -2427,10 +2469,6 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
Src1RM = T1; |
} |
- // TODO: ALIGNHACK: Both operands to compare instructions need to be |
- // in registers until data alignment support is implemented. Once |
- // there is support for data alignment, LEGAL_HACK can be removed. |
-#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
Variable *T = makeReg(Ty); |
switch (Condition) { |
default: |
@@ -2438,42 +2476,41 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) { |
break; |
case InstIcmp::Eq: { |
_movp(T, Src0RM); |
- _pcmpeq(T, LEGAL_HACK(Src1RM)); |
+ _pcmpeq(T, Src1RM); |
} break; |
case InstIcmp::Ne: { |
_movp(T, Src0RM); |
- _pcmpeq(T, LEGAL_HACK(Src1RM)); |
+ _pcmpeq(T, Src1RM); |
Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
_pxor(T, MinusOne); |
} break; |
case InstIcmp::Ugt: |
case InstIcmp::Sgt: { |
_movp(T, Src0RM); |
- _pcmpgt(T, LEGAL_HACK(Src1RM)); |
+ _pcmpgt(T, Src1RM); |
} break; |
case InstIcmp::Uge: |
case InstIcmp::Sge: { |
// !(Src1RM > Src0RM) |
_movp(T, Src1RM); |
- _pcmpgt(T, LEGAL_HACK(Src0RM)); |
+ _pcmpgt(T, Src0RM); |
Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
_pxor(T, MinusOne); |
} break; |
case InstIcmp::Ult: |
case InstIcmp::Slt: { |
_movp(T, Src1RM); |
- _pcmpgt(T, LEGAL_HACK(Src0RM)); |
+ _pcmpgt(T, Src0RM); |
} break; |
case InstIcmp::Ule: |
case InstIcmp::Sle: { |
// !(Src0RM > Src1RM) |
_movp(T, Src0RM); |
- _pcmpgt(T, LEGAL_HACK(Src1RM)); |
+ _pcmpgt(T, Src1RM); |
Variable *MinusOne = makeVectorOfMinusOnes(Ty); |
_pxor(T, MinusOne); |
} break; |
} |
-#undef LEGAL_HACK |
_movp(Dest, T); |
eliminateNextVectorSextInstruction(Dest); |
@@ -2649,12 +2686,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
- // ALIGNHACK: Force vector operands to registers in instructions |
- // that require aligned memory operands until support for data |
- // alignment is implemented. |
-#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
if (Index == 1) { |
- SourceVectRM = ALIGN_HACK(SourceVectRM); |
_shufps(ElementR, SourceVectRM, Mask1Constant); |
_shufps(ElementR, SourceVectRM, Mask2Constant); |
_movp(Inst->getDest(), ElementR); |
@@ -2665,7 +2697,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
_shufps(T, ElementR, Mask2Constant); |
_movp(Inst->getDest(), T); |
} |
-#undef ALIGN_HACK |
} else { |
assert(Ty == IceType_v16i8 || Ty == IceType_v16i1); |
// Spill the value to a stack slot and perform the insertion in |
@@ -3627,10 +3658,6 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
Variable *T = makeReg(SrcTy); |
Operand *SrcTRM = legalize(SrcT, Legal_Reg | Legal_Mem); |
Operand *SrcFRM = legalize(SrcF, Legal_Reg | Legal_Mem); |
- // ALIGNHACK: Until data alignment support is implemented, vector |
- // instructions need to have vector operands in registers. Once |
- // there is support for data alignment, LEGAL_HACK can be removed. |
-#define LEGAL_HACK(Vect) legalizeToVar((Vect)) |
if (InstructionSet >= SSE4_1) { |
// TODO(wala): If the condition operand is a constant, use blendps |
// or pblendw. |
@@ -3643,7 +3670,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
_movp(xmm0, ConditionRM); |
_psll(xmm0, Ctx->getConstantInt(IceType_i8, 31)); |
_movp(T, SrcFRM); |
- _blendvps(T, LEGAL_HACK(SrcTRM), xmm0); |
+ _blendvps(T, SrcTRM, xmm0); |
_movp(Dest, T); |
} else { |
assert(typeNumElements(SrcTy) == 8 || typeNumElements(SrcTy) == 16); |
@@ -3652,7 +3679,7 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
Variable *xmm0 = makeReg(SignExtTy, Reg_xmm0); |
lowerCast(InstCast::create(Func, InstCast::Sext, xmm0, Condition)); |
_movp(T, SrcFRM); |
- _pblendvb(T, LEGAL_HACK(SrcTRM), xmm0); |
+ _pblendvb(T, SrcTRM, xmm0); |
_movp(Dest, T); |
} |
return; |
@@ -3676,11 +3703,10 @@ void TargetX8632::lowerSelect(const InstSelect *Inst) { |
_movp(T, ConditionRM); |
} |
_movp(T2, T); |
- _pand(T, LEGAL_HACK(SrcTRM)); |
- _pandn(T2, LEGAL_HACK(SrcFRM)); |
+ _pand(T, SrcTRM); |
+ _pandn(T2, SrcFRM); |
_por(T, T2); |
_movp(Dest, T); |
-#undef LEGAL_HACK |
return; |
} |