Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 412353005: Use movss to implement insertelement when elements = 4 and index = 0. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Add comment about the usage of movss as a binary op Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/IceInstX8632.cpp ('k') | tests_lit/llvm2ice_tests/vector-ops.ll » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 4a719d447c26eeb3e59bc6cee1d1c5971d38ee45..83dc5bd7482527ea4f0e2c6f1ac28fbba7ab91ff 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -2165,24 +2165,26 @@ void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
// require aligned memory operands until support for stack alignment
// is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
- Operand *T = NULL;
+ Variable *T = NULL;
if (Index) {
// The shuffle only needs to occur if the element to be extracted
// is not at the lowest index.
Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
T = makeReg(Ty);
- _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
+ _pshufd(T, ALIGN_HACK(SourceVectOperand), Mask);
} else {
- // TODO(wala): If SourceVectOperand is in memory, express it as
- // mem32 so that the call to legalizeToVar() is made unnecessary.
- // _movd and _movss only take mem32 memory operands.
T = legalizeToVar(SourceVectOperand);
}
if (InVectorElementTy == IceType_i32) {
_movd(ExtractedElement, T);
- } else { // InVectorElementTy == IceType_f32
- // TODO: _mov should be able to be used here.
+ } else { // Ty == Icetype_f32
+ // TODO(wala): _movss is only used here because _mov does not
+ // allow a vector source and a scalar destination. _mov should be
+ // able to be used here.
+ // _movss is a binary instruction, so the FakeDef is needed to
+ // keep the live range analysis consistent.
+ Context.insert(InstFakeDef::create(Func, ExtractedElement));
_movss(ExtractedElement, T);
}
#undef ALIGN_HACK
@@ -2521,6 +2523,7 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
// Only constant indices are allowed in PNaCl IR.
assert(ElementIndex);
unsigned Index = ElementIndex->getValue();
+ assert(Index < typeNumElements(SourceVectOperand->getType()));
Type Ty = SourceVectOperand->getType();
Type ElementTy = typeElementType(Ty);
@@ -2538,7 +2541,8 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
}
if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
- // Lower insertelement with 32-bit wide elements using shufps.
+ // Lower insertelement with 32-bit wide elements using shufps or
+ // movss.
// TODO(wala): SSE4.1 has pinsrd and insertps.
Variable *Element = NULL;
if (InVectorElementTy == IceType_f32) {
@@ -2551,6 +2555,14 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
_movd(Element, T);
}
+ if (Index == 0) {
+ Variable *T = makeReg(Ty);
+ _movp(T, SourceVectOperand);
+ _movss(T, Element);
+ _movp(Inst->getDest(), T);
+ return;
+ }
+
// shufps treats the source and desination operands as vectors of
// four doublewords. The destination's two high doublewords are
// selected from the source operand and the two low doublewords are
@@ -2560,10 +2572,6 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
// Element[0] is being inserted into SourceVectOperand. Indices are
// ordered from left to right.
//
- // insertelement into index 0 (result is stored in Element):
- // Element := Element[0, 0] SourceVectOperand[0, 1]
- // Element := Element[0, 3] SourceVectOperand[2, 3]
- //
// insertelement into index 1 (result is stored in Element):
// Element := Element[0, 0] SourceVectOperand[0, 0]
// Element := Element[3, 0] SourceVectOperand[2, 3]
@@ -2577,17 +2585,17 @@ void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
// T := SourceVectOperand
// Element := Element[0, 0] T[0, 2]
// T := T[0, 1] Element[3, 0]
- const unsigned char Mask1[4] = {64, 0, 192, 128};
- const unsigned char Mask2[4] = {236, 227, 196, 52};
+ const unsigned char Mask1[3] = {0, 192, 128};
+ const unsigned char Mask2[3] = {227, 196, 52};
- Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
- Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
+ Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index - 1]);
+ Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index - 1]);
// ALIGNHACK: Force vector operands to registers in instructions that
// require aligned memory operands until support for stack alignment
// is implemented.
#define ALIGN_HACK(Vect) legalizeToVar((Vect))
- if (Index < 2) {
+ if (Index == 1) {
SourceVectOperand = ALIGN_HACK(SourceVectOperand);
_shufps(Element, SourceVectOperand, Mask1Constant);
_shufps(Element, SourceVectOperand, Mask2Constant);
« no previous file with comments | « src/IceInstX8632.cpp ('k') | tests_lit/llvm2ice_tests/vector-ops.ll » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698