Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Unified Diff: src/IceTargetLoweringX8632.cpp

Issue 401523003: Lower insertelement and extractelement. (Closed) Base URL: https://gerrit.chromium.org/gerrit/p/native_client/pnacl-subzero.git@master
Patch Set: Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« src/IceInstX8632.cpp ('K') | « src/IceTargetLoweringX8632.h ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/IceTargetLoweringX8632.cpp
diff --git a/src/IceTargetLoweringX8632.cpp b/src/IceTargetLoweringX8632.cpp
index 19a1256ea38df0a24b6041707338a4394a1308dd..9484e3c3e2d34950cb7e4528150ef305523cc249 100644
--- a/src/IceTargetLoweringX8632.cpp
+++ b/src/IceTargetLoweringX8632.cpp
@@ -85,6 +85,22 @@ InstX8632::BrCond getIcmp32Mapping(InstIcmp::ICond Cond) {
return TableIcmp32[Index].Mapping;
}
+// Return the type which the elements of the vector have in the X86
+// representation.
+Type getLoweredElementType(Type Ty) {
+ assert(isVectorType(Ty));
+ switch (Ty) {
+ default:
Jim Stichnoth 2014/07/17 19:49:01 Can this be encoded in ICETYPEX8632_TABLE?
wala 2014/07/17 22:14:12 Done. I've changed the name of this function to g
+ return typeElementType(Ty);
+ case IceType_v4i1:
+ return IceType_i32;
+ case IceType_v8i1:
+ return IceType_i16;
+ case IceType_v16i1:
+ return IceType_i8;
+ }
+}
+
// The maximum number of arguments to pass in XMM registers
const unsigned X86_MAX_XMM_ARGS = 4;
// The number of bits in a byte
@@ -1945,6 +1961,84 @@ void TargetX8632::lowerCast(const InstCast *Inst) {
}
}
+void TargetX8632::lowerExtractElement(const InstExtractElement *Inst) {
+ Operand *SourceVectOperand = Inst->getSrc(0);
+ ConstantInteger *ElementIndex =
+ llvm::dyn_cast<ConstantInteger>(Inst->getSrc(1));
+ // Only constant indices are allowed in PNaCl IR.
+ assert(ElementIndex);
+
+ unsigned Index = ElementIndex->getValue();
+ Type Ty = SourceVectOperand->getType();
+ Type ElementTy = typeElementType(Ty);
+ Type LoweredElementTy = getLoweredElementType(Ty);
+ Variable *ExtractedElement = makeReg(LoweredElementTy);
+
+ // TODO(wala): Determine the best lowering sequences for each type.
+ if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
+ // Lower extractelement operations where the element is 32 bits
+ // wide with pshufd.
+ // TODO(wala): SSE4.1 has extractps and pextrd
+//
+// ALIGNHACK: Force vector operands to registers in instructions that
+// require aligned memory operands until support for stack alignment
+// is implemented.
+#define ALIGN_HACK(Vect) legalizeToVar(Vect)
Jim Stichnoth 2014/07/17 19:49:01 To be absolutely macro-safe, I think this should b
wala 2014/07/17 22:14:12 Done.
+ Operand *T = NULL;
+ if (Index) {
+ // The shuffle only needs to occur if the element to be extracted
+ // is not at the lowest index.
+ Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
+ T = makeReg(Ty);
+ _pshufd(llvm::cast<Variable>(T), ALIGN_HACK(SourceVectOperand), Mask);
+ } else {
+ // TODO(wala): If SourceVectOperand is in memory, express it as
+ // mem32 so that the call to legalizeToVar() is made unnecessary.
+ // _movd and _movss only take mem32 memory operands.
+ T = legalizeToVar(SourceVectOperand);
+ }
+
+ if (LoweredElementTy == IceType_i32) {
+ _movd(ExtractedElement, T);
+ } else { // LoweredElementTy == IceType_f32
+ _movss(ExtractedElement, T);
+ }
+#undef ALIGN_HACK
+ } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
+ Constant *Mask = Ctx->getConstantInt(IceType_i8, Index);
+ _pextrw(ExtractedElement, legalizeToVar(SourceVectOperand), Mask);
+ } else {
+ assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
+ // Spill the value to a stack slot and do the extraction in memory.
+ // TODO(wala): SSE4.1 has pextrb.
+ //
+ // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
+ // support for legalizing to mem is implemented.
+ Variable *Slot = Func->makeVariable(Ty, Context.getNode());
+ Slot->setWeight(RegWeight::Zero);
+ _movp(Slot, legalizeToVar(SourceVectOperand));
+
+ // Compute the location of the element in memory.
+ unsigned Offset = Index * typeWidthInBytes(LoweredElementTy);
+ OperandX8632Mem *Loc =
+ getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset);
+ _mov(ExtractedElement, Loc);
+ }
+
+ if (ElementTy == IceType_i1) {
+ // Truncate extracted integers to i1s if necessary.
+ Variable *T = makeReg(IceType_i1);
+ InstCast *Cast =
+ InstCast::create(Func, InstCast::Trunc, T, ExtractedElement);
+ lowerCast(Cast);
+ ExtractedElement = T;
+ }
+
+ // Copy the element to the destination.
+ Variable *Dest = Inst->getDest();
+ _mov(Dest, ExtractedElement);
+}
+
void TargetX8632::lowerFcmp(const InstFcmp *Inst) {
Operand *Src0 = Inst->getSrc(0);
Operand *Src1 = Inst->getSrc(1);
@@ -2076,6 +2170,125 @@ void TargetX8632::lowerIcmp(const InstIcmp *Inst) {
Context.insert(Label);
}
+void TargetX8632::lowerInsertElement(const InstInsertElement *Inst) {
+ Operand *SourceVectOperand = Inst->getSrc(0);
+ Operand *ElementToInsert = Inst->getSrc(1);
+ ConstantInteger *ElementIndex =
+ llvm::dyn_cast<ConstantInteger>(Inst->getSrc(2));
+ // Only constant indices are allowed in PNaCl IR.
+ assert(ElementIndex);
+ unsigned Index = ElementIndex->getValue();
+
+ Type Ty = SourceVectOperand->getType();
+ Type ElementTy = typeElementType(Ty);
+ Type LoweredElementTy = getLoweredElementType(Ty);
+
+ if (ElementTy == IceType_i1) {
+ // Expand the element to the appropriate size for it to be inserted
+ // in the vector.
+ Variable *Expanded =
+ Func->makeVariable(LoweredElementTy, Context.getNode());
+ InstCast *Cast =
+ InstCast::create(Func, InstCast::Zext, Expanded, ElementToInsert);
+ lowerCast(Cast);
+ ElementToInsert = Expanded;
+ }
+
+ if (Ty == IceType_v4i32 || Ty == IceType_v4f32 || Ty == IceType_v4i1) {
+ // Lower insertelement with 32-bit wide elements using shufps.
+ // TODO(wala): SSE4.1 has pinsrd and insertps.
+//
+// ALIGNHACK: Force vector operands to registers in instructions that
+// require aligned memory operands until support for stack alignment
+// is implemented.
+#define ALIGN_HACK(Vect) legalizeToVar(Vect)
jvoung (off chromium) 2014/07/17 19:36:38 Could move ALIGN_HACK down (near the Index < 2), i
wala 2014/07/17 22:14:12 Done.
+
+ Variable *Element = NULL;
+ if (LoweredElementTy == IceType_f32) {
+ // Element will be in an XMM register since it is floating point.
+ Element = legalizeToVar(ElementToInsert);
+ } else {
+ // Copy an integer to an XMM register.
+ Operand *T = legalize(ElementToInsert, Legal_Reg | Legal_Mem);
+ Element = makeReg(Ty);
+ _movd(Element, T);
+ }
+
+ // shufps treats the source and desination operands as vectors of
+ // four doublewords. The destination's two high doublewords are
+ // selected from the source operand and the two low doublewords are
+ // selected from the (original value of) the destination operand.
+ // An insertelement operation can be effected with a sequence of two
+ // shufps operations with appropriate masks. In all cases below,
+ // Element[0] is being inserted into SourceVectOperand. Indices are
+ // ordered from left to right.
+ //
+ // insertelement into index 0 (result is stored in Element):
+ // Element := Element[0, 0] SourceVectOperand[0, 1]
+ // Element := Element[0, 3] SourceVectOperand[2, 3]
+ //
+ // insertelement into index 1 (result is stored in Element):
+ // Element := Element[0, 0] SourceVectOperand[0, 0]
+ // Element := Element[3, 0] SourceVectOperand[2, 3]
+ //
+ // insertelement into index 2 (result is stored in T):
+ // T := SourceVectOperand
+ // Element := Element[0, 0] T[0, 3]
+ // T := T[0, 1] Element[0, 3]
+ //
+ // insertelement into index 3 (result is stored in T):
+ // T := SourceVectOperand
+ // Element := Element[0, 0] T[0, 2]
+ // T := T[0, 1] Element[3, 0]
+ const unsigned char Mask1[4] = {64, 0, 192, 128};
+ const unsigned char Mask2[4] = {236, 227, 196, 52};
+
+ Constant *Mask1Constant = Ctx->getConstantInt(IceType_i8, Mask1[Index]);
+ Constant *Mask2Constant = Ctx->getConstantInt(IceType_i8, Mask2[Index]);
+
+ if (Index < 2) {
+ SourceVectOperand = ALIGN_HACK(SourceVectOperand);
+ _shufps(Element, SourceVectOperand, Mask1Constant);
+ _shufps(Element, SourceVectOperand, Mask2Constant);
+ _movp(Inst->getDest(), Element);
+ } else {
+ Variable *T = makeReg(Ty);
+ _movp(T, SourceVectOperand);
+ _shufps(Element, T, Mask1Constant);
+ _shufps(T, Element, Mask2Constant);
+ _movp(Inst->getDest(), T);
+ }
+#undef ALIGN_HACK
+ } else if (Ty == IceType_v8i16 || Ty == IceType_v8i1) {
+ Operand *Element = legalize(ElementToInsert, Legal_Mem | Legal_Reg);
+ Variable *T = makeReg(Ty);
+ _movp(T, SourceVectOperand);
+ _pinsrw(T, Element, Ctx->getConstantInt(IceType_i8, Index));
+ _movp(Inst->getDest(), T);
+ } else {
+ assert(Ty == IceType_v16i8 || Ty == IceType_v16i1);
+ // Spill the value to a stack slot and perform the insertion in
+ // memory.
+ // TODO(wala): SSE4.1 has pinsrb.
+ //
+ // TODO(wala): use legalize(SourceVectOperand, Legal_Mem) when
+ // support for legalizing to mem is implemented.
+ Variable *Slot = Func->makeVariable(Ty, Context.getNode());
+ Slot->setWeight(RegWeight::Zero);
+ _movp(Slot, legalizeToVar(SourceVectOperand));
+
+ // Compute the location of the position to insert in memory.
+ unsigned Offset = Index * typeWidthInBytes(LoweredElementTy);
+ OperandX8632Mem *Loc =
+ getMemoryOperandForStackSlot(LoweredElementTy, Slot, Offset);
+ _store(legalizeToVar(ElementToInsert), Loc);
+
+ Variable *T = makeReg(Ty);
+ _movp(T, Slot);
+ _movp(Inst->getDest(), T);
+ }
+}
+
void TargetX8632::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
switch (Instr->getIntrinsicInfo().ID) {
case Intrinsics::AtomicCmpxchg: {
@@ -3007,6 +3220,22 @@ Variable *TargetX8632::makeVectorOfOnes(Type Ty, int32_t RegNum) {
return Dest;
}
+OperandX8632Mem *TargetX8632::getMemoryOperandForStackSlot(Type Ty,
+ Variable *Slot,
+ uint32_t Offset) {
+ // Ensure that Loc is a stack slot.
+ assert(Slot->getWeight() == RegWeight::Zero);
Jim Stichnoth 2014/07/17 19:49:01 May also want to assert NoRegister.
wala 2014/07/17 22:14:12 Done.
+ // Compute the location of Loc in memory.
+ // TODO(wala,stichnot): lea should not be required. The address of
+ // the stack slot is known at compile time (although not until after
+ // addProlog()).
+ const Type PointerType = IceType_i32;
+ Variable *Loc = makeReg(PointerType);
+ _lea(Loc, Slot);
+ Constant *ConstantOffset = Ctx->getConstantInt(IceType_i32, Offset);
+ return OperandX8632Mem::create(Func, Ty, Loc, ConstantOffset);
+}
+
// Helper for legalize() to emit the right code to lower an operand to a
// register of the appropriate type.
Variable *TargetX8632::copyToReg(Operand *Src, int32_t RegNum) {
« src/IceInstX8632.cpp ('K') | « src/IceTargetLoweringX8632.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698