Index: src/IceTargetLoweringX8632.cpp |
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp |
index 4a719d447c26eeb3e59bc6cee1d1c5971d38ee45..83dc5bd7482527ea4f0e2c6f1ac28fbba7ab91ff 100644 |
--- a/src/IceTargetLoweringX8632.cpp |
+++ b/src/IceTargetLoweringX8632.cpp |
@@ -2165,24 +2165,26 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) { |
// require aligned memory operands until support for stack alignment |
// is implemented. |
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
- Operand *T = NULL; |
+ Variable *T = NULL; |
if (Index) { |
// The shuffle only needs to occur if the element to be extracted |
// is not at the lowest index. |
Constant *Mask = Ctx->getConstantInt(IceType_i8, Index); |
T = makeReg(Ty); |
- _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask); |
+ _pshufd(T, ALIGN_HACK(SourceVectOperand), Mask); |
} else { |
- // TODO(wala): If SourceVectOperand is in memory, express it as |
- // mem32 so that the call to legalizeToVar() is made unnecessary. |
- // _movd and _movss only take mem32 memory operands. |
T = legalizeToVar(SourceVectOperand); |
} |
if (InVectorElementTy == IceType_i32) { |
_movd(ExtractedElement, T); |
- } else { // InVectorElementTy == IceType_f32 |
- // TODO: _mov should be able to be used here. |
+ } else { // Ty == Icetype_f32 |
+ // TODO(wala): _movss is only used here because _mov does not |
+ // allow a vector source and a scalar destination. _mov should be |
+ // able to be used here. |
+ // _movss is a binary instruction, so the FakeDef is needed to |
+ // keep the live range analysis consistent. |
+ Context.insert(InstFakeDef::create(Func, ExtractedElement)); |
_movss(ExtractedElement, T); |
} |
#undef ALIGN_HACK |
@@ -2521,6 +2523,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
// Only constant indices are allowed in PNaCl IR. |
assert(ElementIndex); |
unsigned Index = ElementIndex->getValue(); |
+ assert(Index < typeNumElements(SourceVectOperand->getType())); |
Type Ty = SourceVectOperand->getType(); |
Type ElementTy = typeElementType(Ty); |
@@ -2538,7 +2541,8 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
} |
if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) { |
- // Lower insertelement with 32-bit wide elements using shufps. |
+ // Lower insertelement with 32-bit wide elements using shufps or |
+ // movss. |
// TODO(wala): SSE4.1 has pinsrd and insertps. |
Variable *Element = NULL; |
if (InVectorElementTy == IceType_f32) { |
@@ -2551,6 +2555,14 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
_movd(Element, T); |
} |
+ if (Index == 0) { |
+ Variable *T = makeReg(Ty); |
+ _movp(T, SourceVectOperand); |
+ _movss(T, Element); |
+ _movp(Inst->getDest(), T); |
+ return; |
+ } |
+ |
// shufps treats the source and desination operands as vectors of |
// four doublewords. The destination's two high doublewords are |
// selected from the source operand and the two low doublewords are |
@@ -2560,10 +2572,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
// Element[0] is being inserted into SourceVectOperand. Indices are |
// ordered from left to right. |
// |
- // insertelement into index 0 (result is stored in Element): |
- // Element := Element[0, 0] SourceVectOperand[0, 1] |
- // Element := Element[0, 3] SourceVectOperand[2, 3] |
- // |
// insertelement into index 1 (result is stored in Element): |
// Element := Element[0, 0] SourceVectOperand[0, 0] |
// Element := Element[3, 0] SourceVectOperand[2, 3] |
@@ -2577,17 +2585,17 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) { |
// T := SourceVectOperand |
// Element := Element[0, 0] T[0, 2] |
// T := T[0, 1] Element[3, 0] |
- const unsigned char Mask1[4] = {64, 0, 192, 128}; |
- const unsigned char Mask2[4] = {236, 227, 196, 52}; |
+ const unsigned char Mask1[3] = {0, 192, 128}; |
+ const unsigned char Mask2[3] = {227, 196, 52}; |
- Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]); |
- Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]); |
+ Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]); |
+ Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]); |
// ALIGNHACK: Force vector operands to registers in instructions that |
// require aligned memory operands until support for stack alignment |
// is implemented. |
#define ALIGN_HACK(Vect) legalizeToVar((Vect)) |
- if (Index < 2) { |
+ if (Index == 1) { |
SourceVectOperand = ALIGN_HACK(SourceVectOperand); |
_shufps(Element, SourceVectOperand, Mask1Constant); |
_shufps(Element, SourceVectOperand, Mask2Constant); |